From 7d642373db4c6c1c6ed8af02492047896beb4a47 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Thu, 29 Aug 2019 12:34:52 +0100 Subject: [PATCH 0001/3715] ANDROID: refactor build.config files to remove duplication The build.config.* files largely contain duplicate information by their nature. Reorganize them reduce duplication and to allow adding new configurations without copying the definitions again. Bug: 140224784 Change-Id: I6a3810a125b0ed48591690ca33bb5c02be58218a Signed-off-by: Matthias Maennich --- build.config.aarch64 | 11 +++++++++++ build.config.common | 9 +++++++++ build.config.cuttlefish.aarch64 | 17 +++-------------- build.config.cuttlefish.x86_64 | 17 +++-------------- build.config.x86_64 | 11 +++++++++++ 5 files changed, 37 insertions(+), 28 deletions(-) create mode 100644 build.config.aarch64 create mode 100644 build.config.common create mode 100644 build.config.x86_64 diff --git a/build.config.aarch64 b/build.config.aarch64 new file mode 100644 index 000000000000..523bbc0449f7 --- /dev/null +++ b/build.config.aarch64 @@ -0,0 +1,11 @@ +ARCH=arm64 + +CLANG_TRIPLE=aarch64-linux-gnu- +CROSS_COMPILE=aarch64-linux-androidkernel- +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin + +FILES=" +arch/arm64/boot/Image.gz +vmlinux +System.map +" diff --git a/build.config.common b/build.config.common new file mode 100644 index 000000000000..93d85a5d6f59 --- /dev/null +++ b/build.config.common @@ -0,0 +1,9 @@ +BRANCH=android-4.14-r +KERNEL_DIR=common + +CC=clang +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin +BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 + +EXTRA_CMDS='' +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 index 2cbcdf340485..0cb601958972 100644 --- a/build.config.cuttlefish.aarch64 +++ b/build.config.cuttlefish.aarch64 @@ -1,16 +1,5 @@ -ARCH=arm64 -BRANCH=android-4.14 -CLANG_TRIPLE=aarch64-linux-gnu- -CROSS_COMPILE=aarch64-linux-androidkernel- +. ${ROOT_DIR}/common/build.config.common +. ${ROOT_DIR}/common/build.config.aarch64 + DEFCONFIG=cuttlefish_defconfig -EXTRA_CMDS='' -KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin -FILES=" -arch/arm64/boot/Image.gz -vmlinux -System.map -" -STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 index 0b1a15b1fe3b..fed773ccc64a 100644 --- a/build.config.cuttlefish.x86_64 +++ b/build.config.cuttlefish.x86_64 @@ -1,16 +1,5 @@ -ARCH=x86_64 -BRANCH=android-4.14 -CLANG_TRIPLE=x86_64-linux-gnu- -CROSS_COMPILE=x86_64-linux-androidkernel- +. ${ROOT_DIR}/common/build.config.common +. ${ROOT_DIR}/common/build.config.x86_64 + DEFCONFIG=x86_64_cuttlefish_defconfig -EXTRA_CMDS='' -KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin -FILES=" -arch/x86/boot/bzImage -vmlinux -System.map -" -STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.x86_64 b/build.config.x86_64 new file mode 100644 index 000000000000..df73a47e7220 --- /dev/null +++ b/build.config.x86_64 @@ -0,0 +1,11 @@ +ARCH=x86_64 + +CLANG_TRIPLE=x86_64-linux-gnu- +CROSS_COMPILE=x86_64-linux-androidkernel- +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin + +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" From 45c90e52b70455742d50545f50951db365c379a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Feb 2019 15:42:23 +0100 Subject: [PATCH 0002/3715] UPSTREAM: arm64: avoid clang warning about self-assignment (Upstream commit 366e37e4da23f9df498cc9577cadcb354f7bd431). Building a preprocessed source file for arm64 now always produces a warning with clang because of the page_to_virt() macro assigning a variable to itself. Adding a new temporary variable avoids this issue. Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Reviewed-by: Andrey Konovalov Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas Signed-off-by: Andrey Konovalov Bug: 135692346 Change-Id: I5efe330c68219a32286e3afb6c69019650d51631 --- arch/arm64/include/asm/memory.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ebdd9ce5db1d..9d44bb9dd0dc 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -316,8 +316,9 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - __addr = __tag_set(__addr, page_kasan_tag(page)); \ - ((void *)__addr); \ + unsigned long __addr_tag = \ + __tag_set(__addr, page_kasan_tag(page)); \ + ((void *)__addr_tag); \ }) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) From 5d999ff2e0002380ab19e2c475be921e942f57bc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 1 Aug 2019 10:47:05 -0400 Subject: [PATCH 0003/3715] UPSTREAM: arm64/mm: fix variable 'tag' set but not used (Upstream commit 7732d20a160c76006c7fe7bca5178aea6af1d2e8). When CONFIG_KASAN_SW_TAGS=n, set_tag() is compiled away. GCC throws a warning, mm/kasan/common.c: In function '__kasan_kmalloc': mm/kasan/common.c:464:5: warning: variable 'tag' set but not used [-Wunused-but-set-variable] u8 tag = 0xff; ^~~ Fix it by making __tag_set() a static inline function the same as arch_kasan_set_tag() in mm/kasan/kasan.h for consistency because there is a macro in arch/arm64/include/asm/kasan.h, #define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag) However, when CONFIG_DEBUG_VIRTUAL=n and CONFIG_SPARSEMEM_VMEMMAP=y, page_to_virt() will call __tag_set() with incorrect type of a parameter, so fix that as well. Also, still let page_to_virt() return "void *" instead of "const void *", so will not need to add a similar cast in lowmem_page_address(). Signed-off-by: Qian Cai Signed-off-by: Will Deacon Signed-off-by: Andrey Konovalov Bug: 135692346 Change-Id: Iec54d8b8f0c986d7767b700c31be44066243c68f --- arch/arm64/include/asm/memory.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9d44bb9dd0dc..c6afbaa7ef74 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -225,7 +225,11 @@ static inline unsigned long kaslr_offset(void) #define __tag_reset(addr) untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -#define __tag_set(addr, tag) (addr) +static inline const void *__tag_set(const void *addr, u8 tag) +{ + return addr; +} + #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 #endif @@ -316,8 +320,8 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - unsigned long __addr_tag = \ - __tag_set(__addr, page_kasan_tag(page)); \ + const void *__addr_tag = \ + __tag_set((void *)__addr, page_kasan_tag(page)); \ ((void *)__addr_tag); \ }) From f56c262878e3d77518b2fd6fa6041a8f43a4debe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Aug 2019 17:01:05 +0100 Subject: [PATCH 0004/3715] UPSTREAM: arm64: memory: Implement __tag_set() as common function (Upstream commit 6bbd497f027332b14cf2a6792c418c32286b66c2). There's no need for __tag_set() to be a complicated macro when CONFIG_KASAN_SW_TAGS=y and a simple static inline otherwise. Rewrite the thing as a common static inline function. Tested-by: Steve Capper Reviewed-by: Steve Capper Tested-by: Geert Uytterhoeven Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Andrey Konovalov Bug: 135692346 Change-Id: If1389aebf185a187e1008cd7f4b21308ce3c5790 --- arch/arm64/include/asm/memory.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c6afbaa7ef74..a8a267ccfefb 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -220,20 +220,20 @@ static inline unsigned long kaslr_offset(void) #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_set(addr, tag) (__typeof__(addr))( \ - ((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag)) #define __tag_reset(addr) untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -static inline const void *__tag_set(const void *addr, u8 tag) -{ - return addr; -} - +#define __tag_shifted(tag) 0UL #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 #endif +static inline const void *__tag_set(const void *addr, u8 tag) +{ + u64 __addr = (u64)addr & ~__tag_shifted(0xff); + return (const void *)(__addr | __tag_shifted(tag)); +} + /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h From 7ceb2ea79dbb835a8f938f3eee5792b0e4676eb7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Oct 2019 21:04:18 -0700 Subject: [PATCH 0005/3715] BACKPORT: arm64: tags: Preserve tags for addresses translated via TTBR1 (Upstream commit 597399d0cb91d049fcb78fb45c7694771b583bb7). Sign-extending TTBR1 addresses when converting to an untagged address breaks the documented POSIX semantics for mlock() in some obscure error cases where we end up returning -EINVAL instead of -ENOMEM as a direct result of rewriting the upper address bits. Rework the untagged_addr() macro to preserve the upper address bits for TTBR1 addresses and only clear the tag bits for user addresses. This matches the behaviour of the 'clear_address_tag' assembly macro, so rename that and align the implementations at the same time so that they use the same instruction sequences for the tag manipulation. Link: https://lore.kernel.org/stable/20191014162651.GF19200@arrakis.emea.arm.com/ Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Signed-off-by: Will Deacon Change-Id: Iadecb8f15c12984d228c6d6261edac5abadc3f36 Signed-off-by: Andrey Konovalov Bug: 135692346 --- arch/arm64/include/asm/asm-uaccess.h | 7 +++---- arch/arm64/include/asm/memory.h | 10 ++++++++-- arch/arm64/kernel/entry.S | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index dd49c3567f20..f9af19fe4a02 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -78,10 +78,9 @@ alternative_else_nop_endif /* * Remove the address tag from a virtual address, if present. */ - .macro clear_address_tag, dst, addr - tst \addr, #(1 << 55) - bic \dst, \addr, #(0xff << 56) - csel \dst, \dst, \addr, eq + .macro untagged_addr, dst, addr + sbfx \dst, \addr, #0, #56 + and \dst, \dst, \addr .endm #endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a8a267ccfefb..da96146c2e56 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -215,12 +215,18 @@ static inline unsigned long kaslr_offset(void) * up with a tagged userland pointer. Clear the tag to get a sane pointer to * pass on to access_ok(), for instance. */ -#define untagged_addr(addr) \ +#define __untagged_addr(addr) \ ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) +#define untagged_addr(addr) ({ \ + u64 __addr = (__force u64)addr; \ + __addr &= __untagged_addr(__addr); \ + (__force __typeof__(addr))__addr; \ +}) + #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_reset(addr) untagged_addr(addr) +#define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else #define __tag_shifted(tag) 0UL diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c1ffa95c0ad2..31ebefa68bba 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -580,7 +580,7 @@ el1_da: tbnz x23, #7, 1f // PSR_I_BIT enable_irq 1: - clear_address_tag x0, x3 + untagged_addr x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort @@ -744,7 +744,7 @@ el0_da: // enable interrupts before calling the main handler enable_dbg_and_irq ct_user_exit - clear_address_tag x0, x26 + untagged_addr x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort From 8cc417bd3828ed9662e97a3432f868959be6acaa Mon Sep 17 00:00:00 2001 From: DongJoo Kim Date: Mon, 21 Oct 2019 11:48:00 +0900 Subject: [PATCH 0006/3715] ANDROID: move up spin_unlock_bh() ahead of remove_proc_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It causes BUG because remove_proc_entry may sleep while holding spinlock. BUG: scheduling while atomic: ip6tables-resto/887/0x00000202 [] (wait_for_completion) from [] (proc_entry_rundown+0x74/0xd0) [] (proc_entry_rundown) from [] (remove_proc_entry+0xc0/0x18c) [] (remove_proc_entry) from [] (quota_mt2_destroy+0x88/0xa8) [] (quota_mt2_destroy) from [] (cleanup_entry+0x6c/0xf0) [] (cleanup_entry) from [] (do_replace.constprop.2+0x314/0x438) [] (do_replace.constprop.2) from [] (do_ip6t_set_ctl+0x11c/0x238) [] (do_ip6t_set_ctl) from [] (nf_setsockopt+0xd4/0xf0) [] (nf_setsockopt) from [] (ipv6_setsockopt+0x90/0xb8) [] (ipv6_setsockopt) from [] (rawv6_setsockopt+0x54/0x22c) [] (rawv6_setsockopt) from [] (sock_common_setsockopt+0x28/0x30) [] (sock_common_setsockopt) from [] (SyS_setsockopt+0xb8/0x110) [] (SyS_setsockopt) from [] (ret_fast_syscall+0x0/0x48) This is a fix for an Android specific feature which was imported from unofficial upstream (xtables-addons), which also has the same issue: https://sourceforge.net/p/xtables-addons/xtables-addons/ci/master/tree/extensions/xt_quota2.c#l235 After this change the proc entry may now be removed later, when we're already adding another one, potentially with the same name, this will simply fail during creation, see error path for this at: https://sourceforge.net/p/xtables-addons/xtables-addons/ci/master/tree/extensions/xt_quota2.c#l179 Bug: 143092160 Signed-off-by: Maciej Żenczykowski Signed-off-by: DongJoo Kim Change-Id: I3ff3883738353785f5792c5f06bf6b72985c4c68 --- net/netfilter/xt_quota2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 24b774263aa6..c42724469759 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -296,8 +296,8 @@ static void quota_mt2_destroy(const struct xt_mtdtor_param *par) } list_del(&e->list); - remove_proc_entry(e->name, proc_xt_quota); spin_unlock_bh(&counter_list_lock); + remove_proc_entry(e->name, proc_xt_quota); kfree(e); } From fb1827d6f2b804e9efb9cbc95820a6a5d7232923 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Mon, 21 Oct 2019 18:05:14 -0700 Subject: [PATCH 0007/3715] ANDROID: Move from clang r353983c to r365631c Bug: 139440459 Test: make ARCH=arm64 cuttlefish_defconfig && make ARCH=arm64 Test: make ARCH=x86_64 x86_64_cuttlefish_defconfig && make ARCH=x86_64 Change-Id: I7ee5d9a687843c09bbc5edb591d3337bd5bfbcbc Signed-off-by: Ram Muthiah --- build.config.common | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.config.common b/build.config.common index 93d85a5d6f59..57ebdcd6e8be 100644 --- a/build.config.common +++ b/build.config.common @@ -1,8 +1,8 @@ -BRANCH=android-4.14-r +BRANCH=android-4.14 KERNEL_DIR=common CC=clang -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r365631c/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' From f61c00dc09ccbcf2f196b01b2ec4c3859311416e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 14:35:48 -0700 Subject: [PATCH 0008/3715] ANDROID: fscrypt: add key removal notifier chain Add a notifier chain so that sdcardfs can evict its dentries when an fscrypt key is about to be removed. This is needed for the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl to properly "lock" the encrypted files underneath sdcardfs when an Android user is stopped. This is meant to be a temporary patch carried as part of the sdcardfs patchset until either we stop using sdcardfs, we get sdcardfs upstream, or we find a way to provide what sdcardfs needs while also benefitting a user upstream. Bug: 120446149 Bug: 142275883 Test: see I83b451a2bc40c72fcd01d24aa5c34ad8de427534 Change-Id: Iec79775a71057d05a371d77da4a6541cb8e09cb7 Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 22 ++++++++++++++++++++++ include/linux/fscrypt.h | 14 ++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 7f43ca5d30ae..58822246e229 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -704,12 +704,34 @@ static int check_for_busy_inodes(struct super_block *sb, return -EBUSY; } +static BLOCKING_NOTIFIER_HEAD(fscrypt_key_removal_notifiers); + +/* + * Register a function to be executed when the FS_IOC_REMOVE_ENCRYPTION_KEY + * ioctl has removed a key and is about to try evicting inodes. + */ +int fscrypt_register_key_removal_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fscrypt_key_removal_notifiers, + nb); +} +EXPORT_SYMBOL_GPL(fscrypt_register_key_removal_notifier); + +int fscrypt_unregister_key_removal_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fscrypt_key_removal_notifiers, + nb); +} +EXPORT_SYMBOL_GPL(fscrypt_unregister_key_removal_notifier); + static int try_to_lock_encrypted_files(struct super_block *sb, struct fscrypt_master_key *mk) { int err1; int err2; + blocking_notifier_call_chain(&fscrypt_key_removal_notifiers, 0, NULL); + /* * An inode can't be evicted while it is dirty or has dirty pages. * Thus, we first have to clean the inodes in ->mk_decrypted_inodes. diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 72ea24ce52ab..2aad2f949b9e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -146,6 +146,8 @@ extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); extern int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); +extern int fscrypt_register_key_removal_notifier(struct notifier_block *nb); +extern int fscrypt_unregister_key_removal_notifier(struct notifier_block *nb); /* keysetup.c */ extern int fscrypt_get_encryption_info(struct inode *); @@ -405,6 +407,18 @@ static inline int fscrypt_ioctl_get_key_status(struct file *filp, return -EOPNOTSUPP; } +static inline int fscrypt_register_key_removal_notifier( + struct notifier_block *nb) +{ + return 0; +} + +static inline int fscrypt_unregister_key_removal_notifier( + struct notifier_block *nb) +{ + return 0; +} + /* keysetup.c */ static inline int fscrypt_get_encryption_info(struct inode *inode) { From 3e5dc4ed2d3ff835dbf945efa9270a76a51a4fd5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 14:35:49 -0700 Subject: [PATCH 0009/3715] ANDROID: sdcardfs: evict dentries on fscrypt key removal Use the fscrypt key removal notifier chain to make sdcardfs evict its dentries when an fscrypt key is about to be removed. This is needed for the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl to properly "lock" the encrypted files underneath sdcardfs when an Android user is stopped. Test: pm create-user 10 am start-user 10 find /data/media/10/ # filenames are in plaintext form am stop-user 10 find /data/media/10/ # filenames are in ciphertext form (But currently the kernel and vold still warn about other files still being open, due to b/140762419) Bug: 120446149 Bug: 142275883 Change-Id: I83b451a2bc40c72fcd01d24aa5c34ad8de427534 Signed-off-by: Eric Biggers --- fs/sdcardfs/main.c | 7 +++++++ fs/sdcardfs/sdcardfs.h | 3 +++ fs/sdcardfs/super.c | 17 +++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index d890c5711907..19312ee881cc 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -19,6 +19,7 @@ */ #include "sdcardfs.h" +#include #include #include #include @@ -374,6 +375,9 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, list_add(&sb_info->list, &sdcardfs_super_list); mutex_unlock(&sdcardfs_super_list_lock); + sb_info->fscrypt_nb.notifier_call = sdcardfs_on_fscrypt_key_removed; + fscrypt_register_key_removal_notifier(&sb_info->fscrypt_nb); + if (!silent) pr_info("sdcardfs: mounted on top of %s type %s\n", dev_name, lower_sb->s_type->name); @@ -444,6 +448,9 @@ void sdcardfs_kill_sb(struct super_block *sb) if (sb->s_magic == SDCARDFS_SUPER_MAGIC && sb->s_fs_info) { sbi = SDCARDFS_SB(sb); + + fscrypt_unregister_key_removal_notifier(&sbi->fscrypt_nb); + mutex_lock(&sdcardfs_super_list_lock); list_del(&sbi->list); mutex_unlock(&sdcardfs_super_list_lock); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 6219771ed71c..c9d855da629f 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -150,6 +150,8 @@ extern struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct path *lower_path, userid_t id); +extern int sdcardfs_on_fscrypt_key_removed(struct notifier_block *nb, + unsigned long action, void *data); /* file private data */ struct sdcardfs_file_info { @@ -223,6 +225,7 @@ struct sdcardfs_sb_info { struct path obbpath; void *pkgl_id; struct list_head list; + struct notifier_block fscrypt_nb; }; /* diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index bd2eb0257020..5e7ceae0f9dd 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -319,6 +319,23 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, return 0; }; +int sdcardfs_on_fscrypt_key_removed(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct sdcardfs_sb_info *sbi = container_of(nb, struct sdcardfs_sb_info, + fscrypt_nb); + + /* + * Evict any unused sdcardfs dentries (and hence any unused sdcardfs + * inodes, since sdcardfs doesn't cache unpinned inodes by themselves) + * so that the lower filesystem's encrypted inodes can be evicted. + * This is needed to make the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl + * properly "lock" the files underneath the sdcardfs mount. + */ + shrink_dcache_sb(sbi->sb); + return NOTIFY_OK; +} + const struct super_operations sdcardfs_sops = { .put_super = sdcardfs_put_super, .statfs = sdcardfs_statfs, From 4039d072b0dcd83287b2b2764bdb5b40a1284110 Mon Sep 17 00:00:00 2001 From: Jason Macnak Date: Fri, 25 Oct 2019 13:40:37 -0700 Subject: [PATCH 0010/3715] ANDROID: Allow DRM_IOCTL_MODE_*_DUMB for render clients. Minigbm uses dumb ioctls when the virtio gpu is in 2D mode. This changes allows those calls to pass the permission checks in drm_ioctl_permit(). Bug: b/123764798 Test: booted cuttlefish on drm stack w/o 3d Change-Id: I872ba8f6d0a284127178dd60f8a2048e5e7397fb Signed-off-by: Jason Macnak Signed-off-by: Alistair Delva --- drivers/gpu/drm/drm_ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 53f319369de5..318ebf3763ba 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -640,9 +640,9 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), From 2d0e4c21fcee807d9ae6e6fbe5f107ecf4684902 Mon Sep 17 00:00:00 2001 From: Connor O'Brien Date: Tue, 19 Feb 2019 17:36:53 -0800 Subject: [PATCH 0011/3715] ANDROID: cpufreq: create dummy cpufreq driver /proc/uid_time_in_state has no data on cuttlefish because its cpu frequency tables are empty. Because time in state & concurrent time accounting are intertwined this causes the /proc/uid_concurrent_{policy,active}_time files to also not contain any data. Add a minimal, fake cpufreq driver that creates a freq table with 2 frequencies per policy, to allow testing time in state functionality. Test: all 3 proc files show reasonable data on cuttlefish Test: log shows no errors from bad /proc/uid_time_in_state format Bug: 139763108 Bug: 140796321 Bug: 141206930 Change-Id: I8c7fe1007a80c21a9bcba9455bf837947cf42963 Signed-off-by: Connor O'Brien --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + drivers/cpufreq/Kconfig | 9 ++++ drivers/cpufreq/Makefile | 2 + drivers/cpufreq/dummy-cpufreq.c | 53 ++++++++++++++++++++ 5 files changed, 66 insertions(+) create mode 100644 drivers/cpufreq/dummy-cpufreq.c diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 31c41288dd24..9ee2a6014efb 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -75,6 +75,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPUFREQ_DT=y +CONFIG_CPUFREQ_DUMMY=y CONFIG_ARM_BIG_LITTLE_CPUFREQ=y CONFIG_ARM_DT_BL_CPUFREQ=y CONFIG_ARM_SCPI_CPUFREQ=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 3d8854b6aa48..4ec35bad32d4 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -75,6 +75,7 @@ CONFIG_ACPI_PROCFS_POWER=y # CONFIG_X86_PM_TIMER is not set CONFIG_CPU_FREQ_TIMES=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPUFREQ_DUMMY=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_MSI=y diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index b374515f9813..2f66de0304e4 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -234,6 +234,15 @@ config CPUFREQ_DT_PLATDEV If in doubt, say N. +config CPUFREQ_DUMMY + tristate "Dummy CPU frequency driver" + help + This option adds a generic dummy CPUfreq driver, which sets a fake + 2-frequency table when initializing each policy and otherwise does + nothing. + + If in doubt, say N + if X86 source "drivers/cpufreq/Kconfig.x86" endif diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 3ad8aeb687ef..60e87604eb47 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o +obj-$(CONFIG_CPUFREQ_DUMMY) += dummy-cpufreq.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early diff --git a/drivers/cpufreq/dummy-cpufreq.c b/drivers/cpufreq/dummy-cpufreq.c new file mode 100644 index 000000000000..203c859e2a7b --- /dev/null +++ b/drivers/cpufreq/dummy-cpufreq.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + */ +#include +#include + +static struct cpufreq_frequency_table freq_table[] = { + { .frequency = 1 }, + { .frequency = 2 }, + { .frequency = CPUFREQ_TABLE_END }, +}; + +static int dummy_cpufreq_target_index(struct cpufreq_policy *policy, + unsigned int index) +{ + return 0; +} + +static int dummy_cpufreq_driver_init(struct cpufreq_policy *policy) +{ + return cpufreq_table_validate_and_show(policy, freq_table); +} + +static int dummy_cpufreq_verify(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver dummy_cpufreq_driver = { + .name = "dummy", + .target_index = dummy_cpufreq_target_index, + .init = dummy_cpufreq_driver_init, + .verify = dummy_cpufreq_verify, + .attr = cpufreq_generic_attr, +}; + +static int __init dummy_cpufreq_init(void) +{ + return cpufreq_register_driver(&dummy_cpufreq_driver); +} + +static void __exit dummy_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&dummy_cpufreq_driver); +} + +module_init(dummy_cpufreq_init); +module_exit(dummy_cpufreq_exit); + +MODULE_AUTHOR("Connor O'Brien "); +MODULE_DESCRIPTION("dummy cpufreq driver"); +MODULE_LICENSE("GPL"); From 0b383e2946f55d64cfd2afeaf1586ef4b1f8df89 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 18 Oct 2019 07:43:21 -0400 Subject: [PATCH 0012/3715] rtlwifi: Fix potential overflow on P2P code commit 8c55dedb795be8ec0cf488f98c03a1c2176f7fb1 upstream. Nicolas Waisman noticed that even though noa_len is checked for a compatible length it's still possible to overrun the buffers of p2pinfo since there's no check on the upper bound of noa_num. Bound noa_num against P2P_MAX_NOA_NUM. Bug: 142967706 Reported-by: Nicolas Waisman Signed-off-by: Laura Abbott Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Change-Id: I90a9b285feb50b6b5c30e242756d47848902b634 --- drivers/net/wireless/realtek/rtlwifi/ps.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index f6d00613c53d..e1297809535f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -774,6 +774,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == @@ -868,6 +871,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == From c9a4502888e7226a43c5ca2f252c45767f3981fe Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Mon, 28 Oct 2019 15:07:10 -0700 Subject: [PATCH 0013/3715] ANDROID: dummy_cpufreq: Implement get() Fixes panic in time_cpufreq_notifier+0x94 caused by notifying the x86 TSC synchronization code that a CPU frequency change has occurred, but not properly updating policy->cur beforehand, causing a division by zero. Link: https://github.com/ClangBuiltLinux/linux/issues/756 Change-Id: I687093bbdb402a13341762bf9d91dd7f9daffe8b Signed-off-by: Alistair Delva --- drivers/cpufreq/dummy-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/dummy-cpufreq.c b/drivers/cpufreq/dummy-cpufreq.c index 203c859e2a7b..e614a27af11c 100644 --- a/drivers/cpufreq/dummy-cpufreq.c +++ b/drivers/cpufreq/dummy-cpufreq.c @@ -22,6 +22,11 @@ static int dummy_cpufreq_driver_init(struct cpufreq_policy *policy) return cpufreq_table_validate_and_show(policy, freq_table); } +static unsigned int dummy_cpufreq_get(unsigned int cpu) +{ + return 1; +} + static int dummy_cpufreq_verify(struct cpufreq_policy *policy) { return 0; @@ -31,6 +36,7 @@ static struct cpufreq_driver dummy_cpufreq_driver = { .name = "dummy", .target_index = dummy_cpufreq_target_index, .init = dummy_cpufreq_driver_init, + .get = dummy_cpufreq_get, .verify = dummy_cpufreq_verify, .attr = cpufreq_generic_attr, }; From 59eb65b24c35120ac21574a5971a760ed3a83877 Mon Sep 17 00:00:00 2001 From: Roman Kiryanov Date: Mon, 28 Oct 2019 14:34:48 -0700 Subject: [PATCH 0014/3715] ANDROID: virtio: virtio_input: Set the amount of multitouch slots in virtio input Virtio input was missing the the amount of multitouch slots and kernel was filtering out ABS_MT_SLOT events for a screen is touched in more than one place. Bug: 143488374 Signed-off-by: Roman Kiryanov Change-Id: I617099435af311e6b0ee127b76eafe13834ea8f8 (cherry picked from commit af5cf146e4adae1dda52b006a86bab9d26e49a0e) --- drivers/virtio/virtio_input.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 3a0468f2ceb0..89e14b01959b 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -163,6 +164,12 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs) virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); input_abs_set_res(vi->idev, abs, re); + if (abs == ABS_MT_TRACKING_ID) + input_mt_init_slots(vi->idev, + ma, /* input max finger */ + INPUT_MT_DIRECT + | INPUT_MT_DROP_UNUSED + | INPUT_MT_TRACK); } static int virtinput_init_vqs(struct virtio_input *vi) From 7a5ba8ee796d5149299e070cc8a05313621e70b5 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Wed, 18 Sep 2019 12:20:38 +0800 Subject: [PATCH 0015/3715] scsi: ufs: skip shutdown if hba is not powered [ Upstream commit f51913eef23f74c3bd07899dc7f1ed6df9e521d8 ] In some cases, hba may go through shutdown flow without successful initialization and then make system hang. For example, if ufshcd_change_power_mode() gets error and leads to ufshcd_hba_exit() to release resources of the host, future shutdown flow may hang the system since the host register will be accessed in unpowered state. To solve this issue, simply add checking to skip shutdown for above kind of situation. Link: https://lore.kernel.org/r/1568780438-28753-1-git-send-email-stanley.chu@mediatek.com Signed-off-by: Stanley Chu Acked-by: Bean Huo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 60c9184bad3b..07cae5ea608c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7755,6 +7755,9 @@ int ufshcd_shutdown(struct ufs_hba *hba) { int ret = 0; + if (!hba->is_powered) + goto out; + if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; From 00cee7e535fd30e537cfcf2b9340b6bfcf716488 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Sat, 7 Sep 2019 09:07:30 +0800 Subject: [PATCH 0016/3715] scsi: megaraid: disable device when probe failed after enabled device [ Upstream commit 70054aa39a013fa52eff432f2223b8bd5c0048f8 ] For pci device, need to disable device when probe failed after enabled device. Link: https://lore.kernel.org/r/1567818450-173315-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Xiang Chen Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 9b6f5d024dba..f5c09bbf9374 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4221,11 +4221,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ && pdev->subsystem_device == 0xC000) - return -ENODEV; + goto out_disable_device; /* Now check the magic signature byte */ pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic); if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE) - return -ENODEV; + goto out_disable_device; /* Ok it is probably a megaraid */ } From 1c9b9e5b6b29b48d46c380d78322ecc51c2b367d Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 12 Sep 2019 11:09:06 -0700 Subject: [PATCH 0017/3715] scsi: qla2xxx: Fix unbound sleep in fcport delete path. [ Upstream commit c3b6a1d397420a0fdd97af2f06abfb78adc370df ] There are instances, though rare, where a LOGO request cannot be sent out and the thread in free session done can wait indefinitely. Fix this by putting an upper bound to sleep. Link: https://lore.kernel.org/r/20190912180918.6436-3-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 87e04c4a4982..11753ed3433c 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -996,6 +996,7 @@ static void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; + u16 cnt = 0; while (!ACCESS_ONCE(sess->logout_completed)) { if (!traced) { @@ -1005,6 +1006,9 @@ static void qlt_free_session_done(struct work_struct *work) traced = true; } msleep(100); + cnt++; + if (cnt > 200) + break; } ql_dbg(ql_dbg_disc, vha, 0xf087, From c800252b3a2b46144c0f09f0d42f09cf5e237629 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 24 Sep 2019 09:25:52 -0700 Subject: [PATCH 0018/3715] ARM: OMAP2+: Fix missing reset done flag for am3 and am43 [ Upstream commit 8ad8041b98c665b6147e607b749586d6e20ba73a ] For ti,sysc-omap4 compatible devices with no sysstatus register, we do have reset done status available in the SOFTRESET bit that clears when the reset is done. This is documented for example in am437x TRM for DMTIMER_TIOCP_CFG register. The am335x TRM just says that SOFTRESET bit value 1 means reset is ongoing, but it behaves the same way clearing after reset is done. With the ti-sysc driver handling this automatically based on no sysstatus register defined, we see warnings if SYSC_HAS_RESET_STATUS is missing in the legacy platform data: ti-sysc 48042000.target-module: sysc_flags 00000222 != 00000022 ti-sysc 48044000.target-module: sysc_flags 00000222 != 00000022 ti-sysc 48046000.target-module: sysc_flags 00000222 != 00000022 ... Let's fix these warnings by adding SYSC_HAS_RESET_STATUS. Let's also remove the useless parentheses while at it. If it turns out we do have ti,sysc-omap4 compatible devices without a working SOFTRESET bit we can set up additional quirk handling for it. Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c index de06a1d5ffab..e61c14f59063 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c @@ -966,7 +966,8 @@ static struct omap_hwmod_class_sysconfig am33xx_timer_sysc = { .rev_offs = 0x0000, .sysc_offs = 0x0010, .syss_offs = 0x0014, - .sysc_flags = (SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET), + .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSC_HAS_RESET_STATUS, .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | SIDLE_SMART_WKUP), .sysc_fields = &omap_hwmod_sysc_type2, From 0cafebaf5719dc84361e39f3f3874721ec95d1af Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 17 Sep 2019 17:47:12 -0500 Subject: [PATCH 0019/3715] ieee802154: ca8210: prevent memory leak [ Upstream commit 6402939ec86eaf226c8b8ae00ed983936b164908 ] In ca8210_probe the allocated pdata needs to be assigned to spi_device->dev.platform_data before calling ca8210_get_platform_data. Othrwise when ca8210_get_platform_data fails pdata cannot be released. Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20190917224713.26371-1-navid.emamdoost@gmail.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index dcd10dba08c7..3a58962babd4 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3153,12 +3153,12 @@ static int ca8210_probe(struct spi_device *spi_device) goto error; } + priv->spi->dev.platform_data = pdata; ret = ca8210_get_platform_data(priv->spi, pdata); if (ret) { dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n"); goto error; } - priv->spi->dev.platform_data = pdata; ret = ca8210_dev_com_init(priv); if (ret) { From 5e55d6c5a15cd70f1ececb6c5546a91273aad683 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 30 Sep 2019 11:54:50 +0300 Subject: [PATCH 0020/3715] ARM: dts: am4372: Set memory bandwidth limit for DISPC [ Upstream commit f90ec6cdf674248dcad85bf9af6e064bf472b841 ] Set memory bandwidth limit to filter out resolutions above 720p@60Hz to avoid underflow errors due to the bandwidth needs of higher resolutions. am43xx can not provide enough bandwidth to DISPC to correctly handle 'high' resolutions. Signed-off-by: Peter Ujfalusi Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am4372.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 4714a59fd86d..345c117bd5ef 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -1118,6 +1118,8 @@ ti,hwmods = "dss_dispc"; clocks = <&disp_clk>; clock-names = "fck"; + + max-memory-bandwidth = <230000000>; }; rfbi: rfbi@4832a800 { From 2a21025504e7494932f0b3c926b1bafb0eb75ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Thu, 26 Sep 2019 10:59:17 +0200 Subject: [PATCH 0021/3715] net: dsa: qca8k: Use up to 7 ports for all operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7ae6d93c8f052b7a77ba56ed0f654e22a2876739 ] The QCA8K family supports up to 7 ports. So use the existing QCA8K_NUM_PORTS define to allocate the switch structure and limit all operations with the switch ports. This was not an issue until commit 0394a63acfe2 ("net: dsa: enable and disable all ports") disabled all unused ports. Since the unused ports 7-11 are outside of the correct register range on this switch some registers were rewritten with invalid content. Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Fixes: a0c02161ecfc ("net: dsa: variable number of ports") Fixes: 0394a63acfe2 ("net: dsa: enable and disable all ports") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/qca8k.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index c3c9d7e33bd6..8e49974ffa0e 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -551,7 +551,7 @@ qca8k_setup(struct dsa_switch *ds) BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); /* Setup connection between CPU port & user ports */ - for (i = 0; i < DSA_MAX_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), @@ -900,7 +900,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); + priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS); if (!priv->ds) return -ENOMEM; From 2f327a23a93c5d5da73f80ae6f5131868d509b97 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 30 Sep 2019 11:39:52 +0200 Subject: [PATCH 0022/3715] MIPS: dts: ar9331: fix interrupt-controller size [ Upstream commit 0889d07f3e4b171c453b2aaf2b257f9074cdf624 ] It is two registers each of 4 byte. Signed-off-by: Oleksij Rempel Signed-off-by: Paul Burton Cc: Rob Herring Cc: Mark Rutland Cc: Pengutronix Kernel Team Cc: Ralf Baechle Cc: James Hogan Cc: devicetree@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/boot/dts/qca/ar9331.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index efd5f0722206..39b6269610d4 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; From cfb7eab6cab9f96f888c82ebed019163de53b872 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 27 Sep 2019 16:49:20 +0100 Subject: [PATCH 0023/3715] xen/efi: Set nonblocking callbacks [ Upstream commit df359f0d09dc029829b66322707a2f558cb720f7 ] Other parts of the kernel expect these nonblocking EFI callbacks to exist and crash when running under Xen. Since the implementations of xen_efi_set_variable() and xen_efi_query_variable_info() do not take any locks, use them for the nonblocking callbacks too. Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/arm/xen/efi.c | 2 ++ arch/x86/xen/efi.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c index b4d78959cadf..bc9a37b3cecd 100644 --- a/arch/arm/xen/efi.c +++ b/arch/arm/xen/efi.c @@ -31,7 +31,9 @@ void __init xen_efi_runtime_setup(void) efi.get_variable = xen_efi_get_variable; efi.get_next_variable = xen_efi_get_next_variable; efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; efi.update_capsule = xen_efi_update_capsule; efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a18703be9ead..4769a069d5bd 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -77,7 +77,9 @@ static efi_system_table_t __init *xen_efi_probe(void) efi.get_variable = xen_efi_get_variable; efi.get_next_variable = xen_efi_get_next_variable; efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; efi.update_capsule = xen_efi_update_capsule; efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; From 5ccdcbeb3107e9c00180e8ddbbbcb9147f157adf Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 26 Sep 2019 16:16:50 +0800 Subject: [PATCH 0024/3715] nl80211: fix null pointer dereference [ Upstream commit b501426cf86e70649c983c52f4c823b3c40d72a3 ] If the interface is not in MESH mode, the command 'iw wlanx mpath del' will cause kernel panic. The root cause is null pointer access in mpp_flush_by_proxy(), as the pointer 'sdata->u.mesh.mpp_paths' is NULL for non MESH interface. Unable to handle kernel NULL pointer dereference at virtual address 00000068 [...] PC is at _raw_spin_lock_bh+0x20/0x5c LR is at mesh_path_del+0x1c/0x17c [mac80211] [...] Process iw (pid: 4537, stack limit = 0xd83e0238) [...] [] (_raw_spin_lock_bh) from [] (mesh_path_del+0x1c/0x17c [mac80211]) [] (mesh_path_del [mac80211]) from [] (extack_doit+0x20/0x68 [compat]) [] (extack_doit [compat]) from [] (genl_rcv_msg+0x274/0x30c) [] (genl_rcv_msg) from [] (netlink_rcv_skb+0x58/0xac) [] (netlink_rcv_skb) from [] (genl_rcv+0x20/0x34) [] (genl_rcv) from [] (netlink_unicast+0x11c/0x204) [] (netlink_unicast) from [] (netlink_sendmsg+0x30c/0x370) [] (netlink_sendmsg) from [] (sock_sendmsg+0x70/0x84) [] (sock_sendmsg) from [] (___sys_sendmsg.part.3+0x188/0x228) [] (___sys_sendmsg.part.3) from [] (__sys_sendmsg+0x4c/0x70) [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x44) Code: e2822c02 e2822001 e5832004 f590f000 (e1902f9f) ---[ end trace bbd717600f8f884d ]--- Signed-off-by: Miaoqing Pan Link: https://lore.kernel.org/r/1569485810-761-1-git-send-email-miaoqing@codeaurora.org [trim useless data from commit message] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ec504c4a397b..ff31feeee8e3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5504,6 +5504,9 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_mpath) return -EOPNOTSUPP; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + return rdev_del_mpath(rdev, dev, dst); } From 2f431407aba68d77b6baa5dab044b074fb6cfc4a Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Fri, 27 Sep 2019 10:03:16 +0800 Subject: [PATCH 0025/3715] mac80211: fix txq null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ed31a264065ae92058ce54aa3cc8da8d81dc6d7 ] If the interface type is P2P_DEVICE or NAN, read the file of '/sys/kernel/debug/ieee80211/phyx/netdev:wlanx/aqm' will get a NULL pointer dereference. As for those interface type, the pointer sdata->vif.txq is NULL. Unable to handle kernel NULL pointer dereference at virtual address 00000011 CPU: 1 PID: 30936 Comm: cat Not tainted 4.14.104 #1 task: ffffffc0337e4880 task.stack: ffffff800cd20000 PC is at ieee80211_if_fmt_aqm+0x34/0xa0 [mac80211] LR is at ieee80211_if_fmt_aqm+0x34/0xa0 [mac80211] [...] Process cat (pid: 30936, stack limit = 0xffffff800cd20000) [...] [] ieee80211_if_fmt_aqm+0x34/0xa0 [mac80211] [] ieee80211_if_read+0x60/0xbc [mac80211] [] ieee80211_if_read_aqm+0x28/0x30 [mac80211] [] full_proxy_read+0x2c/0x48 [] __vfs_read+0x2c/0xd4 [] vfs_read+0x8c/0x108 [] SyS_read+0x40/0x7c Signed-off-by: Miaoqing Pan Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/1569549796-8223-1-git-send-email-miaoqing@codeaurora.org [trim useless data from commit message] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/debugfs_netdev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c813207bb123..928b6b0464b8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -490,9 +490,14 @@ static ssize_t ieee80211_if_fmt_aqm( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { struct ieee80211_local *local = sdata->local; - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + struct txq_info *txqi; int len; + if (!sdata->vif.txq) + return 0; + + txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -659,7 +664,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); - if (sdata->local->ops->wake_tx_queue) + if (sdata->local->ops->wake_tx_queue && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) DEBUGFS_ADD(aqm); } From 4f6ba331e558372ede14b96c34f845b434fc3126 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 10 Sep 2019 05:59:07 +0200 Subject: [PATCH 0026/3715] mips: Loongson: Fix the link time qualifier of 'serial_exit()' [ Upstream commit 25b69a889b638b0b7e51e2c4fe717a66bec0e566 ] 'exit' functions should be marked as __exit, not __init. Fixes: 85cc028817ef ("mips: make loongsoon serial driver explicitly modular") Signed-off-by: Christophe JAILLET Signed-off-by: Paul Burton Cc: chenhc@lemote.com Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-janitors@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/loongson64/common/serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c index ffefc1cb2612..98c3a7feb10f 100644 --- a/arch/mips/loongson64/common/serial.c +++ b/arch/mips/loongson64/common/serial.c @@ -110,7 +110,7 @@ static int __init serial_init(void) } module_init(serial_init); -static void __init serial_exit(void) +static void __exit serial_exit(void) { platform_device_unregister(&uart8250_device); } From 875adfc28894262bead3e1c230c1dabcac0b63b5 Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Tue, 1 Oct 2019 13:24:39 -0700 Subject: [PATCH 0027/3715] net: hisilicon: Fix usage of uninitialized variable in function mdio_sc_cfg_reg_write() [ Upstream commit 53de429f4e88f538f7a8ec2b18be8c0cd9b2c8e1 ] In function mdio_sc_cfg_reg_write(), variable "reg_value" could be uninitialized if regmap_read() fails. However, "reg_value" is used to decide the control flow later in the if statement, which is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns_mdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index baf5cc251f32..9a3bc0994a1d 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -156,11 +156,15 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev, { u32 time_cnt; u32 reg_value; + int ret; regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val); for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) { - regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + if (ret) + return ret; + reg_value &= st_msk; if ((!!check_st) == (!!reg_value)) break; From e74a4dc8f2dbcf7819a0c3209db9147a63d82e99 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 4 Oct 2019 20:51:04 +0800 Subject: [PATCH 0028/3715] r8152: Set macpassthru in reset_resume callback [ Upstream commit a54cdeeb04fc719e4c7f19d6e28dba7ea86cee5b ] r8152 may fail to establish network connection after resume from system suspend. If the USB port connects to r8152 lost its power during system suspend, the MAC address was written before is lost. The reason is that The MAC address doesn't get written again in its reset_resume callback. So let's set MAC address again in reset_resume callback. Also remove unnecessary lock as no other locking attempt will happen during reset_resume. Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 455eec3c4694..c0964281ab98 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4465,10 +4465,9 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); - mutex_lock(&tp->control); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - mutex_unlock(&tp->control); + set_ethernet_addr(tp); return rtl8152_resume(intf); } From bf08b7689097e8aea1769d2e4679bbf2d333b76a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 27 Sep 2019 16:30:27 -0700 Subject: [PATCH 0029/3715] namespace: fix namespace.pl script to support relative paths [ Upstream commit 82fdd12b95727640c9a8233c09d602e4518e71f7 ] The namespace.pl script does not work properly if objtree is not set to an absolute path. The do_nm function is run from within the find function, which changes directories. Because of this, appending objtree, $File::Find::dir, and $source, will return a path which is not valid from the current directory. This used to work when objtree was set to an absolute path when using "make namespacecheck". It appears to have not worked when calling ./scripts/namespace.pl directly. This behavior was changed in 7e1c04779efd ("kbuild: Use relative path for $(objtree)", 2014-05-14) Rather than fixing the Makefile to set objtree to an absolute path, just fix namespace.pl to work when srctree and objtree are relative. Also fix the script to use an absolute path for these by default. Use the File::Spec module for this purpose. It's been part of perl 5 since 5.005. The curdir() function is used to get the current directory when the objtree and srctree aren't set in the environment. rel2abs() is used to convert possibly relative objtree and srctree environment variables to absolute paths. Finally, the catfile() function is used instead of string appending paths together, since this is more robust when joining paths together. Signed-off-by: Jacob Keller Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/namespace.pl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/namespace.pl b/scripts/namespace.pl index 729c547fc9e1..30c43e639db8 100755 --- a/scripts/namespace.pl +++ b/scripts/namespace.pl @@ -65,13 +65,14 @@ use warnings; use strict; use File::Find; +use File::Spec; my $nm = ($ENV{'NM'} || "nm") . " -p"; my $objdump = ($ENV{'OBJDUMP'} || "objdump") . " -s -j .comment"; -my $srctree = ""; -my $objtree = ""; -$srctree = "$ENV{'srctree'}/" if (exists($ENV{'srctree'})); -$objtree = "$ENV{'objtree'}/" if (exists($ENV{'objtree'})); +my $srctree = File::Spec->curdir(); +my $objtree = File::Spec->curdir(); +$srctree = File::Spec->rel2abs($ENV{'srctree'}) if (exists($ENV{'srctree'})); +$objtree = File::Spec->rel2abs($ENV{'objtree'}) if (exists($ENV{'objtree'})); if ($#ARGV != -1) { print STDERR "usage: $0 takes no parameters\n"; @@ -231,9 +232,9 @@ sub do_nm } ($source = $basename) =~ s/\.o$//; if (-e "$source.c" || -e "$source.S") { - $source = "$objtree$File::Find::dir/$source"; + $source = File::Spec->catfile($objtree, $File::Find::dir, $source) } else { - $source = "$srctree$File::Find::dir/$source"; + $source = File::Spec->catfile($srctree, $File::Find::dir, $source) } if (! -e "$source.c" && ! -e "$source.S") { # No obvious source, exclude the object if it is conglomerate From 5f58cb078e2ecf7c1cac0b9ef4f1cb78f6c6a515 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 14 Oct 2019 16:58:35 -0700 Subject: [PATCH 0030/3715] md/raid0: fix warning message for parameter default_layout [ Upstream commit 3874d73e06c9b9dc15de0b7382fc223986d75571 ] The message should match the parameter, i.e. raid0.default_layout. Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") Cc: NeilBrown Reported-by: Ivan Topolsky Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 28fb71721770..449c4dd060fc 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -158,7 +158,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } else { pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", mdname(mddev)); - pr_err("md/raid0: please set raid.default_layout to 1 or 2\n"); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); err = -ENOTSUPP; goto abort; } From 2e18e22063986658f0ebfb90f742ab1f6e378f33 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Oct 2019 13:12:37 -0500 Subject: [PATCH 0031/3715] Revert "drm/radeon: Fix EEH during kexec" [ Upstream commit 8d13c187c42e110625d60094668a8f778c092879 ] This reverts commit 6f7fe9a93e6c09bf988c5059403f5f88e17e21e6. This breaks some boards. Maybe just enable this on PPC for now? Bug: https://bugzilla.kernel.org/show_bug.cgi?id=205147 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_drv.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 54d97dd5780a..f4becad0a78c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -368,19 +368,11 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { - struct drm_device *ddev = pci_get_drvdata(pdev); - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); - - /* Some adapters need to be suspended before a - * shutdown occurs in order to prevent an error - * during kexec. - */ - radeon_suspend_kms(ddev, true, true, false); } static int radeon_pmops_suspend(struct device *dev) From 777b3745558a8a308d23719bfa9ec124a75b649b Mon Sep 17 00:00:00 2001 From: Yi Li Date: Fri, 18 Oct 2019 20:20:08 -0700 Subject: [PATCH 0032/3715] ocfs2: fix panic due to ocfs2_wq is null commit b918c43021baaa3648de09e19a4a3dd555a45f40 upstream. mount.ocfs2 failed when reading ocfs2 filesystem superblock encounters an error. ocfs2_initialize_super() returns before allocating ocfs2_wq. ocfs2_dismount_volume() triggers the following panic. Oct 15 16:09:27 cnwarekv-205120 kernel: On-disk corruption discovered.Please run fsck.ocfs2 once the filesystem is unmounted. Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_read_locked_inode:537 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:458 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:491 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_initialize_super:2313 ERROR: status = -30 Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_fill_super:1033 ERROR: status = -30 ------------[ cut here ]------------ Oops: 0002 [#1] SMP NOPTI CPU: 1 PID: 11753 Comm: mount.ocfs2 Tainted: G E 4.14.148-200.ckv.x86_64 #1 Hardware name: Sugon H320-G30/35N16-US, BIOS 0SSDX017 12/21/2018 task: ffff967af0520000 task.stack: ffffa5f05484000 RIP: 0010:mutex_lock+0x19/0x20 Call Trace: flush_workqueue+0x81/0x460 ocfs2_shutdown_local_alloc+0x47/0x440 [ocfs2] ocfs2_dismount_volume+0x84/0x400 [ocfs2] ocfs2_fill_super+0xa4/0x1270 [ocfs2] ? ocfs2_initialize_super.isa.211+0xf20/0xf20 [ocfs2] mount_bdev+0x17f/0x1c0 mount_fs+0x3a/0x160 Link: http://lkml.kernel.org/r/1571139611-24107-1-git-send-email-yili@winhong.com Signed-off-by: Yi Li Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/journal.c | 3 ++- fs/ocfs2/localalloc.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index e5dcea6cee5f..f7fba58618ef 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -231,7 +231,8 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* At this point, we know that no more recovery threads can be * launched, so wait for any recovery completion work to * complete. */ - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); /* * Now that recovery is shut down, and the osb is about to be diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 5d53d0d63d19..ea38677daa06 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -391,7 +391,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) struct ocfs2_dinode *alloc = NULL; cancel_delayed_work(&osb->la_enable_wq); - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; From 41e506d842f8edcec933fddfc6c49e08aea77ab9 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 16 Oct 2019 20:52:09 +0200 Subject: [PATCH 0033/3715] ipv4: Return -ENETUNREACH if we can't create route but saddr is valid [ Upstream commit 595e0651d0296bad2491a4a29a7a43eae6328b02 ] ...instead of -EINVAL. An issue was found with older kernel versions while unplugging a NFS client with pending RPCs, and the wrong error code here prevented it from recovering once link is back up with a configured address. Incidentally, this is not an issue anymore since commit 4f8943f80883 ("SUNRPC: Replace direct task wakeups from softirq context"), included in 5.2-rc7, had the effect of decoupling the forwarding of this error by using SO_ERROR in xs_wake_error(), as pointed out by Benjamin Coddington. To the best of my knowledge, this isn't currently causing any further issue, but the error code doesn't look appropriate anyway, and we might hit this in other paths as well. In detail, as analysed by Gonzalo Siero, once the route is deleted because the interface is down, and can't be resolved and we return -EINVAL here, this ends up, courtesy of inet_sk_rebuild_header(), as the socket error seen by tcp_write_err(), called by tcp_retransmit_timer(). In turn, tcp_write_err() indirectly calls xs_error_report(), which wakes up the RPC pending tasks with a status of -EINVAL. This is then seen by call_status() in the SUN RPC implementation, which aborts the RPC call calling rpc_exit(), instead of handling this as a potentially temporary condition, i.e. as a timeout. Return -EINVAL only if the input parameters passed to ip_route_output_key_hash_rcu() are actually invalid (this is the case if the specified source address is multicast, limited broadcast or all zeroes), but return -ENETUNREACH in all cases where, at the given moment, the given source address doesn't allow resolving the route. While at it, drop the initialisation of err to -ENETUNREACH, which was added to __ip_route_output_key() back then by commit 0315e3827048 ("net: Fix behaviour of unreachable, blackhole and prohibit routes"), but actually had no effect, as it was, and is, overwritten by the fib_lookup() return code assignment, and anyway ignored in all other branches, including the if (fl4->saddr) one: I find this rather confusing, as it would look like -ENETUNREACH is the "default" error, while that statement has no effect. Also note that after commit fc75fc8339e7 ("ipv4: dont create routes on down devices"), we would get -ENETUNREACH if the device is down, but -EINVAL if the source address is specified and we can't resolve the route, and this appears to be rather inconsistent. Reported-by: Stefan Walter Analysed-by: Benjamin Coddington Analysed-by: Gonzalo Siero Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5a1cffb769fd..de7f955ffd0a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2351,14 +2351,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, int orig_oif = fl4->flowi4_oif; unsigned int flags = 0; struct rtable *rth; - int err = -ENETUNREACH; + int err; if (fl4->saddr) { - rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) + ipv4_is_zeronet(fl4->saddr)) { + rth = ERR_PTR(-EINVAL); goto out; + } + + rth = ERR_PTR(-ENETUNREACH); /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: From 46ecabab8d82a851971e3b9ccf1fe3f629248ad3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Oct 2019 10:45:47 -0700 Subject: [PATCH 0034/3715] net: bcmgenet: Fix RGMII_MODE_EN value for GENET v1/2/3 [ Upstream commit efb86fede98cdc70b674692ff617b1162f642c49 ] The RGMII_MODE_EN bit value was 0 for GENET versions 1 through 3, and became 6 for GENET v4 and above, account for that difference. Fixes: aa09677cba42 ("net: bcmgenet: add MDIO routines") Signed-off-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 9d499c5c8f8a..f176a0307f39 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -368,6 +368,7 @@ struct bcmgenet_mib_counters { #define EXT_PWR_DOWN_PHY_EN (1 << 20) #define EXT_RGMII_OOB_CTRL 0x0C +#define RGMII_MODE_EN_V123 (1 << 0) #define RGMII_LINK (1 << 4) #define OOB_DISABLE (1 << 5) #define RGMII_MODE_EN (1 << 6) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c421e2753c8c..0d970e5cb9fd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -277,7 +277,11 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg |= RGMII_MODE_EN | id_mode_dis; + reg |= id_mode_dis; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + reg |= RGMII_MODE_EN_V123; + else + reg |= RGMII_MODE_EN; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } From 0f99c6bbe277bfb6836d9345630a8f23d3aeac9e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 11 Oct 2019 12:53:49 -0700 Subject: [PATCH 0035/3715] net: bcmgenet: Set phydev->dev_flags only for internal PHYs [ Upstream commit 92696286f3bb37ba50e4bd8d1beb24afb759a799 ] phydev->dev_flags is entirely dependent on the PHY device driver which is going to be used, setting the internal GENET PHY revision in those bits only makes sense when drivers/net/phy/bcm7xxx.c is the PHY driver being used. Fixes: 487320c54143 ("net: bcmgenet: communicate integrated PHY revision to PHY driver") Signed-off-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 0d970e5cb9fd..fca9da1b1363 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -296,11 +296,12 @@ int bcmgenet_mii_probe(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); struct device_node *dn = priv->pdev->dev.of_node; struct phy_device *phydev; - u32 phy_flags; + u32 phy_flags = 0; int ret; /* Communicate the integrated PHY revision */ - phy_flags = priv->gphy_rev; + if (priv->internal_phy) + phy_flags = priv->gphy_rev; /* Initialize link state variables that bcmgenet_mii_setup() uses */ priv->old_link = -1; From e354aa381a670805996481b748d2f86106f9bbe4 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 15 Oct 2019 16:42:45 +0200 Subject: [PATCH 0036/3715] net: i82596: fix dma_alloc_attr for sni_82596 [ Upstream commit 61c1d33daf7b5146f44d4363b3322f8cda6a6c43 ] Commit 7f683b920479 ("i825xx: switch to switch to dma_alloc_attrs") switched dma allocation over to dma_alloc_attr, but didn't convert the SNI part to request consistent DMA memory. This broke sni_82596 since driver doesn't do dma_cache_sync for performance reasons. Fix this by using different DMA_ATTRs for lasi_82596 and sni_82596. Fixes: 7f683b920479 ("i825xx: switch to switch to dma_alloc_attrs") Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/i825xx/lasi_82596.c | 4 +++- drivers/net/ethernet/i825xx/lib82596.c | 4 ++-- drivers/net/ethernet/i825xx/sni_82596.c | 4 +++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index b69c622ba8b2..6f0e4019adef 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,6 +96,8 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ +#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT + #define DMA_WBACK(ndev, addr, len) \ do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) @@ -199,7 +201,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) unregister_netdev (dev); dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f00a1dc2128c..da3758fdf025 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev) dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), &lp->dma_addr, GFP_KERNEL, - DMA_ATTR_NON_CONSISTENT); + LIB82596_DMA_ATTR); if (!dma) { printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); return -ENOMEM; @@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev) i = register_netdev(dev); if (i) { dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma, lp->dma_addr, LIB82596_DMA_ATTR); return i; } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index b2c04a789744..43c1fd18670b 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -23,6 +23,8 @@ static const char sni_82596_string[] = "snirm_82596"; +#define LIB82596_DMA_ATTR 0 + #define DMA_WBACK(priv, addr, len) do { } while (0) #define DMA_INV(priv, addr, len) do { } while (0) #define DMA_WBACK_INV(priv, addr, len) do { } while (0) @@ -151,7 +153,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev) unregister_netdev(dev); dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); From 80a0e5d378f8ad8cb086d4018389ebec75f45fb2 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Tue, 15 Oct 2019 11:24:44 +0800 Subject: [PATCH 0037/3715] net: stmmac: disable/enable ptp_ref_clk in suspend/resume flow [ Upstream commit e497c20e203680aba9ccf7bb475959595908ca7e ] disable ptp_ref_clk in suspend flow, and enable it in resume flow. Fixes: f573c0b9c4e0 ("stmmac: move stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to platform structure") Signed-off-by: Biao Huang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f4df9ab0aed5..612773b94ae3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4402,8 +4402,10 @@ int stmmac_suspend(struct device *dev) priv->hw->mac->set_mac(priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ - clk_disable(priv->plat->pclk); - clk_disable(priv->plat->stmmac_clk); + if (priv->plat->clk_ptp_ref) + clk_disable_unprepare(priv->plat->clk_ptp_ref); + clk_disable_unprepare(priv->plat->pclk); + clk_disable_unprepare(priv->plat->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); @@ -4468,8 +4470,10 @@ int stmmac_resume(struct device *dev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk previously disabled */ - clk_enable(priv->plat->stmmac_clk); - clk_enable(priv->plat->pclk); + clk_prepare_enable(priv->plat->stmmac_clk); + clk_prepare_enable(priv->plat->pclk); + if (priv->plat->clk_ptp_ref) + clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); From dce4350a1dd4a71ec52e463043aca2f446064507 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 15 Oct 2019 15:24:38 +0800 Subject: [PATCH 0038/3715] sctp: change sctp_prot .no_autobind with true [ Upstream commit 63dfb7938b13fa2c2fbcb45f34d065769eb09414 ] syzbot reported a memory leak: BUG: memory leak, unreferenced object 0xffff888120b3d380 (size 64): backtrace: [...] slab_alloc mm/slab.c:3319 [inline] [...] kmem_cache_alloc+0x13f/0x2c0 mm/slab.c:3483 [...] sctp_bucket_create net/sctp/socket.c:8523 [inline] [...] sctp_get_port_local+0x189/0x5a0 net/sctp/socket.c:8270 [...] sctp_do_bind+0xcc/0x200 net/sctp/socket.c:402 [...] sctp_bindx_add+0x4b/0xd0 net/sctp/socket.c:497 [...] sctp_setsockopt_bindx+0x156/0x1b0 net/sctp/socket.c:1022 [...] sctp_setsockopt net/sctp/socket.c:4641 [inline] [...] sctp_setsockopt+0xaea/0x2dc0 net/sctp/socket.c:4611 [...] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3147 [...] __sys_setsockopt+0x10f/0x220 net/socket.c:2084 [...] __do_sys_setsockopt net/socket.c:2100 [inline] It was caused by when sending msgs without binding a port, in the path: inet_sendmsg() -> inet_send_prepare() -> inet_autobind() -> .get_port/sctp_get_port(), sp->bind_hash will be set while bp->port is not. Later when binding another port by sctp_setsockopt_bindx(), a new bucket will be created as bp->port is not set. sctp's autobind is supposed to call sctp_autobind() where it does all things including setting bp->port. Since sctp_autobind() is called in sctp_sendmsg() if the sk is not yet bound, it should have skipped the auto bind. THis patch is to avoid calling inet_autobind() in inet_send_prepare() by changing sctp_prot .no_autobind with true, also remove the unused .get_port. Reported-by: syzbot+d44f7bbebdea49dbc84a@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6a2532370545..a18e9be77216 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8313,7 +8313,7 @@ struct proto sctp_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, @@ -8352,7 +8352,7 @@ struct proto sctpv6_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, From eccfa2109a545a16f0feace4b60da881b3a23082 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Oct 2019 11:22:30 -0700 Subject: [PATCH 0039/3715] net: avoid potential infinite loop in tc_ctl_action() [ Upstream commit 39f13ea2f61b439ebe0060393e9c39925c9ee28c ] tc_ctl_action() has the ability to loop forever if tcf_action_add() returns -EAGAIN. This special case has been done in case a module needed to be loaded, but it turns out that tcf_add_notify() could also return -EAGAIN if the socket sk_rcvbuf limit is hit. We need to separate the two cases, and only loop for the module loading case. While we are at it, add a limit of 10 attempts since unbounded loops are always scary. syzbot repro was something like : socket(PF_NETLINK, SOCK_RAW|SOCK_NONBLOCK, NETLINK_ROUTE) = 3 write(3, ..., 38) = 38 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [0], 4) = 0 sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{..., 388}], msg_controllen=0, msg_flags=0x10}, ...) NMI backtrace for cpu 0 CPU: 0 PID: 1054 Comm: khungtaskd Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x70/0xb2 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x23b/0x28b lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline] check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline] watchdog+0x9d0/0xef0 kernel/hung_task.c:289 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8859 Comm: syz-executor910 Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:arch_local_save_flags arch/x86/include/asm/paravirt.h:751 [inline] RIP: 0010:lockdep_hardirqs_off+0x1df/0x2e0 kernel/locking/lockdep.c:3453 Code: 5c 08 00 00 5b 41 5c 41 5d 5d c3 48 c7 c0 58 1d f3 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 0f 85 d3 00 00 00 <48> 83 3d 21 9e 99 07 00 0f 84 b9 00 00 00 9c 58 0f 1f 44 00 00 f6 RSP: 0018:ffff8880a6f3f1b8 EFLAGS: 00000046 RAX: 1ffffffff11e63ab RBX: ffff88808c9c6080 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000000 RDI: ffff88808c9c6914 RBP: ffff8880a6f3f1d0 R08: ffff88808c9c6080 R09: fffffbfff16be5d1 R10: fffffbfff16be5d0 R11: 0000000000000003 R12: ffffffff8746591f R13: ffff88808c9c6080 R14: ffffffff8746591f R15: 0000000000000003 FS: 00000000011e4880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff600400 CR3: 00000000a8920000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: trace_hardirqs_off+0x62/0x240 kernel/trace/trace_preemptirq.c:45 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] _raw_spin_lock_irqsave+0x6f/0xcd kernel/locking/spinlock.c:159 __wake_up_common_lock+0xc8/0x150 kernel/sched/wait.c:122 __wake_up+0xe/0x10 kernel/sched/wait.c:142 netlink_unlock_table net/netlink/af_netlink.c:466 [inline] netlink_unlock_table net/netlink/af_netlink.c:463 [inline] netlink_broadcast_filtered+0x705/0xb80 net/netlink/af_netlink.c:1514 netlink_broadcast+0x3a/0x50 net/netlink/af_netlink.c:1534 rtnetlink_send+0xdd/0x110 net/core/rtnetlink.c:714 tcf_add_notify net/sched/act_api.c:1343 [inline] tcf_action_add+0x243/0x370 net/sched/act_api.c:1362 tc_ctl_action+0x3b5/0x4bc net/sched/act_api.c:1410 rtnetlink_rcv_msg+0x463/0xb00 net/core/rtnetlink.c:5386 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5404 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x531/0x710 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x8a5/0xd60 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x803/0x920 net/socket.c:2311 __sys_sendmsg+0x105/0x1d0 net/socket.c:2356 __do_sys_sendmsg net/socket.c:2365 [inline] __se_sys_sendmsg net/socket.c:2363 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2363 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440939 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot+cf0adbb9c28c8866c788@syzkaller.appspotmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4444d7e755e6..8ae0addb7657 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1072,10 +1072,16 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { - int ret = 0; + int loop, ret; LIST_HEAD(actions); - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); + for (loop = 0; loop < 10; loop++) { + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, + &actions); + if (ret != -EAGAIN) + break; + } + if (ret) return ret; @@ -1122,10 +1128,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, */ if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; -replay: ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); - if (ret == -EAGAIN) - goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, From 48a8f3c2081e83fcaa0ff7c4340f955eb9c55409 Mon Sep 17 00:00:00 2001 From: Alessio Balsini Date: Wed, 23 Oct 2019 18:17:36 +0100 Subject: [PATCH 0040/3715] loop: Add LOOP_SET_DIRECT_IO to compat ioctl [ Upstream commit fdbe4eeeb1aac219b14f10c0ed31ae5d1123e9b8 ] Enabling Direct I/O with loop devices helps reducing memory usage by avoiding double caching. 32 bit applications running on 64 bits systems are currently not able to request direct I/O because is missing from the lo_compat_ioctl. This patch fixes the compatibility issue mentioned above by exporting LOOP_SET_DIRECT_IO as additional lo_compat_ioctl() entry. The input argument for this ioctl is a single long converted to a 1-bit boolean, so compatibility is preserved. Cc: Jens Axboe Signed-off-by: Alessio Balsini Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 87d7c42affbc..ec61dd873c93 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1605,6 +1605,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; default: From 391d4ee568b546c9900cc058b82d290e2f71a99c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Oct 2019 09:58:35 -0700 Subject: [PATCH 0041/3715] memfd: Fix locking when tagging pins The RCU lock is insufficient to protect the radix tree iteration as a deletion from the tree can occur before we take the spinlock to tag the entry. In 4.19, this has manifested as a bug with the following trace: kernel BUG at lib/radix-tree.c:1429! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 7 PID: 6935 Comm: syz-executor.2 Not tainted 4.19.36 #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:radix_tree_tag_set+0x200/0x2f0 lib/radix-tree.c:1429 Code: 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 44 24 10 e8 a3 29 7e fe 48 8b 44 24 10 48 0f ab 03 e9 d2 fe ff ff e8 90 29 7e fe <0f> 0b 48 c7 c7 e0 5a 87 84 e8 f0 e7 08 ff 4c 89 ef e8 4a ff ac fe RSP: 0018:ffff88837b13fb60 EFLAGS: 00010016 RAX: 0000000000040000 RBX: ffff8883c5515d58 RCX: ffffffff82cb2ef0 RDX: 0000000000000b72 RSI: ffffc90004cf2000 RDI: ffff8883c5515d98 RBP: ffff88837b13fb98 R08: ffffed106f627f7e R09: ffffed106f627f7e R10: 0000000000000001 R11: ffffed106f627f7d R12: 0000000000000004 R13: ffffea000d7fea80 R14: 1ffff1106f627f6f R15: 0000000000000002 FS: 00007fa1b8df2700(0000) GS:ffff8883e2fc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa1b8df1db8 CR3: 000000037d4d2001 CR4: 0000000000160ee0 Call Trace: memfd_tag_pins mm/memfd.c:51 [inline] memfd_wait_for_pins+0x2c5/0x12d0 mm/memfd.c:81 memfd_add_seals mm/memfd.c:215 [inline] memfd_fcntl+0x33d/0x4a0 mm/memfd.c:247 do_fcntl+0x589/0xeb0 fs/fcntl.c:421 __do_sys_fcntl fs/fcntl.c:463 [inline] __se_sys_fcntl fs/fcntl.c:448 [inline] __x64_sys_fcntl+0x12d/0x180 fs/fcntl.c:448 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:293 The problem does not occur in mainline due to the XArray rewrite which changed the locking to exclude modification of the tree during iteration. At the time, nobody realised this was a bugfix. Backport the locking changes to stable. Cc: stable@vger.kernel.org Reported-by: zhong jiang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Sasha Levin --- mm/shmem.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 037e2ee9ccac..5b2cc9f9b1f1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2657,11 +2657,12 @@ static void shmem_tag_pins(struct address_space *mapping) void **slot; pgoff_t start; struct page *page; + unsigned int tagged = 0; lru_add_drain(); start = 0; - rcu_read_lock(); + spin_lock_irq(&mapping->tree_lock); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { page = radix_tree_deref_slot(slot); if (!page || radix_tree_exception(page)) { @@ -2670,18 +2671,19 @@ static void shmem_tag_pins(struct address_space *mapping) continue; } } else if (page_count(page) - page_mapcount(page) > 1) { - spin_lock_irq(&mapping->tree_lock); radix_tree_tag_set(&mapping->page_tree, iter.index, SHMEM_TAG_PINNED); - spin_unlock_irq(&mapping->tree_lock); } - if (need_resched()) { - slot = radix_tree_iter_resume(slot, &iter); - cond_resched_rcu(); - } + if (++tagged % 1024) + continue; + + slot = radix_tree_iter_resume(slot, &iter); + spin_unlock_irq(&mapping->tree_lock); + cond_resched(); + spin_lock_irq(&mapping->tree_lock); } - rcu_read_unlock(); + spin_unlock_irq(&mapping->tree_lock); } /* From ddc87ec6918baf53962077123875acb015d8c41c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 14:58:35 +0200 Subject: [PATCH 0042/3715] USB: legousbtower: fix memleak on disconnect commit b6c03e5f7b463efcafd1ce141bd5a8fc4e583ae2 upstream. If disconnect() races with release() after a process has been interrupted, release() could end up returning early and the driver would fail to free its driver data. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191010125835.27031-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/legousbtower.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 155615aadc9c..378a565ec989 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -423,10 +423,7 @@ static int tower_release (struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->lock)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->lock); if (dev->open_count != 1) { dev_dbg(&dev->udev->dev, "%s: device not opened exactly once\n", From ffe87d720053202713f9f2a02bb888e8d97015bc Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 2 May 2019 16:03:26 +0800 Subject: [PATCH 0043/3715] ALSA: hda/realtek - Add support for ALC711 commit 83629532ce45ef9df1f297b419b9ea112045685d upstream. Support new codec ALC711. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ab7bc7ebb721..5412952557f7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -359,6 +359,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -7272,6 +7273,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ @@ -8365,6 +8367,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0711, "ALC711", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc662), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), From e90c15a897d29d8482b53f9cede6da7b42cb7d32 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Oct 2019 14:18:30 -0500 Subject: [PATCH 0044/3715] usb: udc: lpc32xx: fix bad bit shift operation commit b987b66ac3a2bc2f7b03a0ba48a07dc553100c07 upstream. It seems that the right variable to use in this case is *i*, instead of *n*, otherwise there is an undefined behavior when right shifiting by more than 31 bits when multiplying n by 8; notice that *n* can take values equal or greater than 4 (4, 8, 16, ...). Also, notice that under the current conditions (bl = 3), we are skiping the handling of bytes 3, 7, 31... So, fix this by updating this logic and limit *bl* up to 4 instead of up to 3. This fix is based on function udc_stuff_fifo(). Addresses-Coverity-ID: 1454834 ("Bad bit shift operation") Fixes: 24a28e428351 ("USB: gadget driver for LPC32xx") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20191014191830.GA10721@embeddedor Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/lpc32xx_udc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 6df1aded4503..ac2aa04ca657 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -1178,11 +1178,11 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes) tmp = readl(USBD_RXDATA(udc->udp_baseaddr)); bl = bytes - n; - if (bl > 3) - bl = 3; + if (bl > 4) + bl = 4; for (i = 0; i < bl; i++) - data[n + i] = (u8) ((tmp >> (n * 8)) & 0xFF); + data[n + i] = (u8) ((tmp >> (i * 8)) & 0xFF); } break; From a1f27405201eb6268f82bf9f00631a3350ae2786 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Oct 2019 11:57:35 +0200 Subject: [PATCH 0045/3715] USB: serial: ti_usb_3410_5052: fix port-close races commit 6f1d1dc8d540a9aa6e39b9cb86d3a67bbc1c8d8d upstream. Fix races between closing a port and opening or closing another port on the same device which could lead to a failure to start or stop the shared interrupt URB. The latter could potentially cause a use-after-free or worse in the completion handler on driver unbind. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 57e9f6617084..98c22ace784a 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -780,7 +780,6 @@ static void ti_close(struct usb_serial_port *port) struct ti_port *tport; int port_number; int status; - int do_unlock; unsigned long flags; tdev = usb_get_serial_data(port->serial); @@ -804,16 +803,13 @@ static void ti_close(struct usb_serial_port *port) "%s - cannot send close port command, %d\n" , __func__, status); - /* if mutex_lock is interrupted, continue anyway */ - do_unlock = !mutex_lock_interruptible(&tdev->td_open_close_lock); + mutex_lock(&tdev->td_open_close_lock); --tport->tp_tdev->td_open_port_count; - if (tport->tp_tdev->td_open_port_count <= 0) { + if (tport->tp_tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); - tport->tp_tdev->td_open_port_count = 0; } - if (do_unlock) - mutex_unlock(&tdev->td_open_close_lock); + mutex_unlock(&tdev->td_open_close_lock); } From 8a927cbaf361e57671ea822b44048adf251d62a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 14:58:34 +0200 Subject: [PATCH 0046/3715] USB: ldusb: fix memleak on disconnect commit b14a39048c1156cfee76228bf449852da2f14df8 upstream. If disconnect() races with release() after a process has been interrupted, release() could end up returning early and the driver would fail to free its driver data. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191010125835.27031-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index fa5cf349ae19..c9bda7e8ef7a 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -383,10 +383,7 @@ static int ld_usb_release(struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->mutex)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->mutex); if (dev->open_count != 1) { retval = -ENODEV; From 05e3ff801c1b0ed86721661dd87c7171a29d5376 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 15 Oct 2019 19:55:22 +0200 Subject: [PATCH 0047/3715] USB: usblp: fix use-after-free on disconnect commit 7a759197974894213621aa65f0571b51904733d6 upstream. A recent commit addressing a runtime PM use-count regression, introduced a use-after-free by not making sure we held a reference to the struct usb_interface for the lifetime of the driver data. Fixes: 9a31535859bf ("USB: usblp: fix runtime PM after driver unbind") Cc: stable Reported-by: syzbot+cd24df4d075c319ebfc5@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191015175522.18490-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 50836f79f908..5e456a83779d 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -458,6 +458,7 @@ static void usblp_cleanup(struct usblp *usblp) kfree(usblp->readbuf); kfree(usblp->device_id_string); kfree(usblp->statusbuf); + usb_put_intf(usblp->intf); kfree(usblp); } @@ -1120,7 +1121,7 @@ static int usblp_probe(struct usb_interface *intf, init_waitqueue_head(&usblp->wwait); init_usb_anchor(&usblp->urbs); usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; - usblp->intf = intf; + usblp->intf = usb_get_intf(intf); /* Malloc device ID string buffer to the largest expected length, * since we can re-query it on an ioctl and a dynamic string @@ -1209,6 +1210,7 @@ abort: kfree(usblp->readbuf); kfree(usblp->statusbuf); kfree(usblp->device_id_string); + usb_put_intf(usblp->intf); kfree(usblp); abort_ret: return retval; From 312ab599be611fbd8995fbf0f9746e9b0bb686de Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 18 Oct 2019 17:19:54 +0200 Subject: [PATCH 0048/3715] USB: ldusb: fix read info leaks commit 7a6f22d7479b7a0b68eadd308a997dd64dda7dae upstream. Fix broken read implementation, which could be used to trigger slab info leaks. The driver failed to check if the custom ring buffer was still empty when waking up after having waited for more data. This would happen on every interrupt-in completion, even if no data had been added to the ring buffer (e.g. on disconnect events). Due to missing sanity checks and uninitialised (kmalloced) ring-buffer entries, this meant that huge slab info leaks could easily be triggered. Note that the empty-buffer check after wakeup is enough to fix the info leak on disconnect, but let's clear the buffer on allocation and add a sanity check to read() to prevent further leaks. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Reported-by: syzbot+6fe95b826644f7f12b0b@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191018151955.25135-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index c9bda7e8ef7a..cd92ae1231bc 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -467,7 +467,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, /* wait for data */ spin_lock_irq(&dev->rbsl); - if (dev->ring_head == dev->ring_tail) { + while (dev->ring_head == dev->ring_tail) { dev->interrupt_in_done = 0; spin_unlock_irq(&dev->rbsl); if (file->f_flags & O_NONBLOCK) { @@ -477,12 +477,17 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done); if (retval < 0) goto unlock_exit; - } else { - spin_unlock_irq(&dev->rbsl); + + spin_lock_irq(&dev->rbsl); } + spin_unlock_irq(&dev->rbsl); /* actual_buffer contains actual_length + interrupt_in_buffer */ actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); + if (*actual_buffer > dev->interrupt_in_endpoint_size) { + retval = -EIO; + goto unlock_exit; + } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", @@ -693,7 +698,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id * dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); - dev->ring_buffer = kmalloc(ring_buffer_size*(sizeof(size_t)+dev->interrupt_in_endpoint_size), GFP_KERNEL); + dev->ring_buffer = kcalloc(ring_buffer_size, + sizeof(size_t) + dev->interrupt_in_endpoint_size, + GFP_KERNEL); if (!dev->ring_buffer) goto error; dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL); From ec347012bbecf256764bafaa1a38931d6549a2ad Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Oct 2019 14:47:46 +0200 Subject: [PATCH 0049/3715] arm64: sysreg: Move to use definitions for all the SCTLR bits [ Upstream commit 7a00d68ebe5f07cb1db17e7fedfd031f0d87e8bb ] __cpu_setup() configures SCTLR_EL1 using some hard coded hex masks, and el2_setup() duplicates some this when setting RES1 bits. Lets make this the same as KVM's hyp_init, which uses named bits. First, we add definitions for all the SCTLR_EL{1,2} bits, the RES{1,0} bits, and those we want to set or clear. Add a build_bug checks to ensures all bits are either set or clear. This means we don't need to preserve endian-ness configuration generated elsewhere. Finally, move the head.S and proc.S users of these hard-coded masks over to the macro versions. Signed-off-by: James Morse Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/sysreg.h | 69 +++++++++++++++++++++++++++++++-- arch/arm64/kernel/head.S | 13 ++----- arch/arm64/mm/proc.S | 24 +----------- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ede80d47d0ef..cd32a968ff5b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,7 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include #include /* @@ -297,25 +298,81 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_WXN (1 << 19) #define SCTLR_ELx_I (1 << 12) #define SCTLR_ELx_SA (1 << 3) #define SCTLR_ELx_C (1 << 2) #define SCTLR_ELx_A (1 << 1) #define SCTLR_ELx_M 1 -#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ - (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ - (1 << 29)) - #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I) +/* SCTLR_EL2 specific flags. */ +#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ + (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ + (1 << 29)) +#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ + (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ + (1 << 17) | (1 << 20) | (1 << 21) | (1 << 24) | \ + (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2 SCTLR_ELx_EE +#define ENDIAN_CLEAR_EL2 0 +#else +#define ENDIAN_SET_EL2 0 +#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE +#endif + +/* SCTLR_EL2 value used for the hyp-stub */ +#define SCTLR_EL2_SET (ENDIAN_SET_EL2 | SCTLR_EL2_RES1) +#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ + ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0) + + /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) +#define SCTLR_EL1_E0E (1 << 24) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_NTWE (1 << 18) +#define SCTLR_EL1_NTWI (1 << 16) #define SCTLR_EL1_UCT (1 << 15) +#define SCTLR_EL1_DZE (1 << 14) +#define SCTLR_EL1_UMA (1 << 9) #define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_ITD (1 << 7) #define SCTLR_EL1_CP15BEN (1 << 5) +#define SCTLR_EL1_SA0 (1 << 4) + +#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ + (1 << 29)) +#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ + (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#define ENDIAN_CLEAR_EL1 0 +#else +#define ENDIAN_SET_EL1 0 +#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#endif + +#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ + SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_NTWE | SCTLR_EL1_SPAN | ENDIAN_SET_EL1 |\ + SCTLR_EL1_UCI | SCTLR_EL1_RES1) +#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ + SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ + SCTLR_EL1_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ #define ID_AA64ISAR0_RDM_SHIFT 28 @@ -463,6 +520,7 @@ #else +#include #include asm( @@ -519,6 +577,9 @@ static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; + SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS; + SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS; + val = read_sysreg(sctlr_el1); val &= ~clear; val |= set; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 1371542de0d3..92cc7b51f100 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -388,17 +388,13 @@ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq 1f - mrs x0, sctlr_el1 -CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 -CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret -1: mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 +1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE @@ -505,10 +501,7 @@ install_el2_stub: * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 65b040152184..ecbc060807d2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -430,11 +430,7 @@ ENTRY(__cpu_setup) /* * Prepare SCTLR */ - adr x5, crval - ldp w5, w6, [x5] - mrs x0, sctlr_el1 - bic x0, x0, x5 // clear bits - orr x0, x0, x6 // set bits + mov_q x0, SCTLR_EL1_SET /* * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. @@ -470,21 +466,3 @@ ENTRY(__cpu_setup) msr tcr_el1, x10 ret // return to head.S ENDPROC(__cpu_setup) - - /* - * We set the desired value explicitly, including those of the - * reserved bits. The values of bits EE & E0E were set early in - * el2_setup, which are left untouched below. - * - * n n T - * U E WT T UD US IHBS - * CE0 XWHW CZ ME TEEA S - * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM - * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved - * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings - */ - .type crval, #object -crval: - .word 0xfcffffff // clear - .word 0x34d5d91d // set - .popsection From 5f005c7e4d37213a76a1d64ed558fe7e1ff138b6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:47 +0200 Subject: [PATCH 0050/3715] arm64: Expose support for optional ARMv8-A features [ Upstream commit f5e035f8694c3bdddc66ea46ecda965ee6853718 ] ARMv8-A adds a few optional features for ARMv8.2 and ARMv8.3. Expose them to the userspace via HWCAPs and mrs emulation. SHA2-512 - Instruction support for SHA512 Hash algorithm (e.g SHA512H, SHA512H2, SHA512U0, SHA512SU1) SHA3 - SHA3 crypto instructions (EOR3, RAX1, XAR, BCAX). SM3 - Instruction support for Chinese cryptography algorithm SM3 SM4 - Instruction support for Chinese cryptography algorithm SM4 DP - Dot Product instructions (UDOT, SDOT). Cc: Will Deacon Cc: Mark Rutland Cc: Dave Martin Cc: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/cpu-feature-registers.txt | 12 +++++++++++- arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/include/uapi/asm/hwcap.h | 5 +++++ arch/arm64/kernel/cpufeature.c | 9 +++++++++ arch/arm64/kernel/cpuinfo.c | 5 +++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index dad411d635d8..011ddfc1e570 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,10 +110,20 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-32] | n | + | RES0 | [63-48] | n | + |--------------------------------------------------| + | DP | [47-44] | y | + |--------------------------------------------------| + | SM4 | [43-40] | y | + |--------------------------------------------------| + | SM3 | [39-36] | y | + |--------------------------------------------------| + | SHA3 | [35-32] | y | |--------------------------------------------------| | RDM | [31-28] | y | |--------------------------------------------------| + | RES0 | [27-24] | n | + |--------------------------------------------------| | ATOMICS | [23-20] | y | |--------------------------------------------------| | CRC32 | [19-16] | y | diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd32a968ff5b..883999ce0bc7 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -375,6 +375,10 @@ #define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ +#define ID_AA64ISAR0_DP_SHIFT 44 +#define ID_AA64ISAR0_SM4_SHIFT 40 +#define ID_AA64ISAR0_SM3_SHIFT 36 +#define ID_AA64ISAR0_SHA3_SHIFT 32 #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 #define ID_AA64ISAR0_CRC32_SHIFT 16 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b3fdeee739ea..f243c57d1670 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -37,5 +37,10 @@ #define HWCAP_FCMA (1 << 14) #define HWCAP_LRCPC (1 << 15) #define HWCAP_DCPOP (1 << 16) +#define HWCAP_SHA3 (1 << 17) +#define HWCAP_SM3 (1 << 18) +#define HWCAP_SM4 (1 << 19) +#define HWCAP_ASIMDDP (1 << 20) +#define HWCAP_SHA512 (1 << 21) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 57ec681a8f11..b7d00216b775 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -107,6 +107,10 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), @@ -1040,9 +1044,14 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 311885962830..1ff1c5a67081 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -69,6 +69,11 @@ static const char *const hwcap_str[] = { "fcma", "lrcpc", "dcpop", + "sha3", + "sm3", + "sm4", + "asimddp", + "sha512", NULL }; From edcad64ed659810345a080e2aa16a16062a5e3ac Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:48 +0200 Subject: [PATCH 0051/3715] arm64: Fix the feature type for ID register fields [ Upstream commit 5bdecb7971572a1aef828df507558e7a4dfe25ec ] Now that the ARM ARM clearly specifies the rules for inferring the values of the ID register fields, fix the types of the feature bits we have in the kernel. As per ARM ARM DDI0487B.b, section D10.1.4 "Principles of the ID scheme for fields in ID registers" lists the registers to which the scheme applies along with the exceptions. This patch changes the relevant feature bits from FTR_EXACT to FTR_LOWER_SAFE to select the safer value. This will enable an older kernel running on a new CPU detect the safer option rather than completely disabling the feature. Cc: Catalin Marinas Cc: Dave Martin Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 102 ++++++++++++++++----------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b7d00216b775..50d6c9e1a654 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -107,11 +107,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_DP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM4_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), @@ -121,36 +121,36 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), /* Linux doesn't care about the EL3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs @@ -161,20 +161,20 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -201,14 +201,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */ ARM64_FTR_END, }; @@ -229,8 +229,8 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; @@ -242,25 +242,25 @@ static const struct arm64_ftr_bits ftr_dczid[] = { static const struct arm64_ftr_bits ftr_id_isar5[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */ ARM64_FTR_END, }; From 1caa4f72dfc4a0401ec7fad210cfb0ed73d06b4d Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Thu, 24 Oct 2019 14:47:49 +0200 Subject: [PATCH 0052/3715] arm64: v8.4: Support for new floating point multiplication instructions [ Upstream commit 3b3b681097fae73b7f5dcdd42db6cfdf32943d4c ] ARM v8.4 extensions add new neon instructions for performing a multiplication of each FP16 element of one vector with the corresponding FP16 element of a second vector, and to add or subtract this without an intermediate rounding to the corresponding FP32 element in a third vector. This patch detects this feature and let the userspace know about it via a HWCAP bit and MRS emulation. Cc: Dave Martin Reviewed-by: Suzuki K Poulose Signed-off-by: Dongjiu Geng Reviewed-by: Dave Martin Signed-off-by: Catalin Marinas [ardb: fix up for missing SVE in context] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/cpu-feature-registers.txt | 4 +++- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 2 ++ 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 011ddfc1e570..ddd566fea3f2 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,7 +110,9 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-48] | n | + | RES0 | [63-52] | n | + |--------------------------------------------------| + | FHM | [51-48] | y | |--------------------------------------------------| | DP | [47-44] | y | |--------------------------------------------------| diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 883999ce0bc7..ee4b7935155b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -375,6 +375,7 @@ #define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ +#define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_SM4_SHIFT 40 #define ID_AA64ISAR0_SM3_SHIFT 36 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f243c57d1670..f018c3deea3b 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -42,5 +42,7 @@ #define HWCAP_SM4 (1 << 19) #define HWCAP_ASIMDDP (1 << 20) #define HWCAP_SHA512 (1 << 21) +#define HWCAP_SVE (1 << 22) +#define HWCAP_ASIMDFHM (1 << 23) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 50d6c9e1a654..258921542b00 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -107,6 +107,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), @@ -1052,6 +1053,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 1ff1c5a67081..67afe69efb61 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -74,6 +74,8 @@ static const char *const hwcap_str[] = { "sm4", "asimddp", "sha512", + "sve", + "asimdfhm", NULL }; From eb952c6bce53b5389c41ffc3698594cb43b5aeb9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:50 +0200 Subject: [PATCH 0053/3715] arm64: Documentation: cpu-feature-registers: Remove RES0 fields [ Upstream commit 847ecd3fa311cde0f10a1b66c572abb136742b1d ] Remove the invisible RES0 field entries from the table, listing fields in CPU ID feature registers, as : 1) We are only interested in the user visible fields. 2) The field description may not be up-to-date, as the field could be assigned a new meaning. 3) We already explain the rules of the fields which are not visible. Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon [ardb: fix up for missing SVE in context] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/cpu-feature-registers.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index ddd566fea3f2..22cfb86143ee 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,7 +110,6 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-52] | n | |--------------------------------------------------| | FHM | [51-48] | y | |--------------------------------------------------| @@ -124,8 +123,6 @@ infrastructure: |--------------------------------------------------| | RDM | [31-28] | y | |--------------------------------------------------| - | RES0 | [27-24] | n | - |--------------------------------------------------| | ATOMICS | [23-20] | y | |--------------------------------------------------| | CRC32 | [19-16] | y | @@ -135,8 +132,6 @@ infrastructure: | SHA1 | [11-8] | y | |--------------------------------------------------| | AES | [7-4] | y | - |--------------------------------------------------| - | RES0 | [3-0] | n | x--------------------------------------------------x @@ -144,7 +139,8 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-28] | n | + |--------------------------------------------------| + | SVE | [35-32] | y | |--------------------------------------------------| | GIC | [27-24] | n | |--------------------------------------------------| From 053cdffad3dd5e4a9330433ba2b400956f33bd2b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:51 +0200 Subject: [PATCH 0054/3715] arm64: Expose Arm v8.4 features [ Upstream commit 7206dc93a58fb76421c4411eefa3c003337bcb2d ] Expose the new features introduced by Arm v8.4 extensions to Arm v8-A profile. These include : 1) Data indpendent timing of instructions. (DIT, exposed as HWCAP_DIT) 2) Unaligned atomic instructions and Single-copy atomicity of loads and stores. (AT, expose as HWCAP_USCAT) 3) LDAPR and STLR instructions with immediate offsets (extension to LRCPC, exposed as HWCAP_ILRCPC) 4) Flag manipulation instructions (TS, exposed as HWCAP_FLAGM). Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon [ardb: fix up context for missing SVE] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/cpu-feature-registers.txt | 10 ++++++++++ arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/include/uapi/asm/hwcap.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 7 +++++++ arch/arm64/kernel/cpuinfo.c | 4 ++++ 5 files changed, 28 insertions(+) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 22cfb86143ee..7964f03846b1 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,6 +110,7 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| + | TS | [55-52] | y | |--------------------------------------------------| | FHM | [51-48] | y | |--------------------------------------------------| @@ -139,6 +140,7 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| + | DIT | [51-48] | y | |--------------------------------------------------| | SVE | [35-32] | y | |--------------------------------------------------| @@ -191,6 +193,14 @@ infrastructure: | DPB | [3-0] | y | x--------------------------------------------------x + 5) ID_AA64MMFR2_EL1 - Memory model feature register 2 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | AT | [35-32] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ee4b7935155b..eab67c2e2bb3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -375,6 +375,7 @@ #define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ +#define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_SM4_SHIFT 40 @@ -396,6 +397,7 @@ /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 +#define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 #define ID_AA64PFR0_FP_SHIFT 16 @@ -441,6 +443,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_LSM_SHIFT 8 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f018c3deea3b..17c65c8f33cb 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -44,5 +44,9 @@ #define HWCAP_SHA512 (1 << 21) #define HWCAP_SVE (1 << 22) #define HWCAP_ASIMDFHM (1 << 23) +#define HWCAP_DIT (1 << 24) +#define HWCAP_USCAT (1 << 25) +#define HWCAP_ILRCPC (1 << 26) +#define HWCAP_FLAGM (1 << 27) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 258921542b00..d3a93e23a87c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -107,6 +107,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), @@ -132,6 +133,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), @@ -171,6 +173,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -1054,14 +1057,18 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 67afe69efb61..2188db11b654 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -76,6 +76,10 @@ static const char *const hwcap_str[] = { "sha512", "sve", "asimdfhm", + "dit", + "uscat", + "ilrcpc", + "flagm", NULL }; From 31693d2f4efaf31f67410b6b3ed5afe6e46f7679 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2019 14:47:52 +0200 Subject: [PATCH 0055/3715] arm64: move SCTLR_EL{1,2} assertions to [ Upstream commit 1c312e84c2d71da4101754fa6118f703f7473e01 ] Currently we assert that the SCTLR_EL{1,2}_{SET,CLEAR} bits are self-consistent with an assertion in config_sctlr_el1(). This is a bit unusual, since config_sctlr_el1() doesn't make use of these definitions, and is far away from the definitions themselves. We can use the CPP #error directive to have equivalent assertions in , next to the definitions of the set/clear bits, which is a bit clearer and simpler. At the same time, lets fill in the upper 32 bits for both registers in their respective RES0 definitions. This could be a little nicer with GENMASK_ULL(63, 32), but this currently lives in , which cannot safely be included from assembly, as can. Note the when the preprocessor evaluates an expression for an #if directive, all signed or unsigned values are treated as intmax_t or uintmax_t respectively. To avoid ambiguity, we define explicitly define the mask of all 64 bits. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Dave Martin Cc: James Morse Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index eab67c2e2bb3..f0ce6ea6c6d8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -315,7 +315,8 @@ #define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 21) | (1 << 24) | \ - (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31)) + (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffffffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -331,9 +332,9 @@ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) -/* Check all the bits are accounted for */ -#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0) - +#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL2 set/clear bits" +#endif /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) @@ -352,7 +353,8 @@ #define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ - (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31)) + (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffffffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -371,8 +373,9 @@ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ SCTLR_EL1_RES0) -/* Check all the bits are accounted for */ -#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) +#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL1 set/clear bits" +#endif /* id_aa64isar0 */ #define ID_AA64ISAR0_TS_SHIFT 52 @@ -585,9 +588,6 @@ static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; - SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS; - SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS; - val = read_sysreg(sctlr_el1); val &= ~clear; val |= set; From 2fa1fc1f20b1e66cdefc567cf08b2b6ae900cc24 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2019 14:47:53 +0200 Subject: [PATCH 0056/3715] arm64: add PSR_AA32_* definitions [ Upstream commit 25086263425641c74123f9387426c23072b299ea ] The AArch32 CPSR/SPSR format is *almost* identical to the AArch64 SPSR_ELx format for exceptions taken from AArch32, but the two have diverged with the addition of DIT, and we need to treat the two as logically distinct. This patch adds new definitions for the SPSR_ELx format for exceptions taken from AArch32, with a consistent PSR_AA32_ prefix. The existing COMPAT_PSR_ definitions will be used for the PSR format as seen from AArch32. Definitions of DIT are provided for both, and inline functions are provided to map between the two formats. Note that for SPSR_ELx, the (RES0) J bit has been re-allocated as the DIT bit. Once users of the COMPAT_PSR definitions have been migrated over to the PSR_AA32 definitions, the (majority of) the former will be removed, so no efforts is made to avoid duplication until then. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Christoffer Dall Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/ptrace.h | 57 ++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 6069d66e0bc2..1b2a253de6a1 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,7 +35,37 @@ #define COMPAT_PTRACE_GETHBPREGS 29 #define COMPAT_PTRACE_SETHBPREGS 30 -/* AArch32 CPSR bits */ +/* SPSR_ELx bits for exceptions taken from AArch32 */ +#define PSR_AA32_MODE_MASK 0x0000001f +#define PSR_AA32_MODE_USR 0x00000010 +#define PSR_AA32_MODE_FIQ 0x00000011 +#define PSR_AA32_MODE_IRQ 0x00000012 +#define PSR_AA32_MODE_SVC 0x00000013 +#define PSR_AA32_MODE_ABT 0x00000017 +#define PSR_AA32_MODE_HYP 0x0000001a +#define PSR_AA32_MODE_UND 0x0000001b +#define PSR_AA32_MODE_SYS 0x0000001f +#define PSR_AA32_T_BIT 0x00000020 +#define PSR_AA32_F_BIT 0x00000040 +#define PSR_AA32_I_BIT 0x00000080 +#define PSR_AA32_A_BIT 0x00000100 +#define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_DIT_BIT 0x01000000 +#define PSR_AA32_Q_BIT 0x08000000 +#define PSR_AA32_V_BIT 0x10000000 +#define PSR_AA32_C_BIT 0x20000000 +#define PSR_AA32_Z_BIT 0x40000000 +#define PSR_AA32_N_BIT 0x80000000 +#define PSR_AA32_IT_MASK 0x0600fc00 /* If-Then execution state mask */ +#define PSR_AA32_GE_MASK 0x000f0000 + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define PSR_AA32_ENDSTATE PSR_AA32_E_BIT +#else +#define PSR_AA32_ENDSTATE 0 +#endif + +/* AArch32 CPSR bits, as seen in AArch32 */ #define COMPAT_PSR_MODE_MASK 0x0000001f #define COMPAT_PSR_MODE_USR 0x00000010 #define COMPAT_PSR_MODE_FIQ 0x00000011 @@ -50,6 +80,7 @@ #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 #define COMPAT_PSR_E_BIT 0x00000200 +#define COMPAT_PSR_DIT_BIT 0x00200000 #define COMPAT_PSR_J_BIT 0x01000000 #define COMPAT_PSR_Q_BIT 0x08000000 #define COMPAT_PSR_V_BIT 0x10000000 @@ -111,6 +142,30 @@ #define compat_sp_fiq regs[29] #define compat_lr_fiq regs[30] +static inline unsigned long compat_psr_to_pstate(const unsigned long psr) +{ + unsigned long pstate; + + pstate = psr & ~COMPAT_PSR_DIT_BIT; + + if (psr & COMPAT_PSR_DIT_BIT) + pstate |= PSR_AA32_DIT_BIT; + + return pstate; +} + +static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) +{ + unsigned long psr; + + psr = pstate & ~PSR_AA32_DIT_BIT; + + if (pstate & PSR_AA32_DIT_BIT) + psr |= COMPAT_PSR_DIT_BIT; + + return psr; +} + /* * This struct defines the way the registers are stored on the stack during an * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for From 1d0ee40ec243f516d8be09ae0d901566eff3ad8d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2019 14:47:54 +0200 Subject: [PATCH 0057/3715] arm64: Introduce sysreg_clear_set() [ Upstream commit 6ebdf4db8fa564a150f46d32178af0873eb5abbb ] Currently we have a couple of helpers to manipulate bits in particular sysregs: * config_sctlr_el1(u32 clear, u32 set) * change_cpacr(u64 val, u64 mask) The parameters of these differ in naming convention, order, and size, which is unfortunate. They also differ slightly in behaviour, as change_cpacr() skips the sysreg write if the bits are unchanged, which is a useful optimization when sysreg writes are expensive. Before we gain yet another sysreg manipulation function, let's unify these with a common helper, providing a consistent order for clear/set operands, and the write skipping behaviour from change_cpacr(). Code will be migrated to the new helper in subsequent patches. Signed-off-by: Mark Rutland Reviewed-by: Dave Martin Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/sysreg.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f0ce6ea6c6d8..5f391630d0f4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -584,6 +584,17 @@ asm( asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) +/* + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the + * set mask are set. Other bits are left as-is. + */ +#define sysreg_clear_set(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg(__scs_new, sysreg); \ +} while (0) + static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; From 0e606f018d76973b8c8d2b682eddf2633d825e25 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 24 Oct 2019 14:47:55 +0200 Subject: [PATCH 0058/3715] arm64: capabilities: Update prototype for enable call back [ Upstream commit c0cda3b8ee6b4b6851b2fd8b6db91fd7b0e2524a ] We issue the enable() call back for all CPU hwcaps capabilities available on the system, on all the CPUs. So far we have ignored the argument passed to the call back, which had a prototype to accept a "void *" for use with on_each_cpu() and later with stop_machine(). However, with commit 0a0d111d40fd1 ("arm64: cpufeature: Pass capability structure to ->enable callback"), there are some users of the argument who wants the matching capability struct pointer where there are multiple matching criteria for a single capability. Clean up the declaration of the call back to make it clear. 1) Renamed to cpu_enable(), to imply taking necessary actions on the called CPU for the entry. 2) Pass const pointer to the capability, to allow the call back to check the entry. (e.,g to check if any action is needed on the CPU) 3) We don't care about the result of the call back, turning this to a void. Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Andre Przywara Cc: James Morse Acked-by: Robin Murphy Reviewed-by: Julien Thierry Signed-off-by: Dave Martin [suzuki: convert more users, rename call back and drop results] Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 7 +++- arch/arm64/include/asm/processor.h | 5 +-- arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++--------------- arch/arm64/kernel/cpufeature.c | 34 +++++++++++------- arch/arm64/kernel/fpsimd.c | 1 + arch/arm64/kernel/traps.c | 4 +-- arch/arm64/mm/fault.c | 3 +- 7 files changed, 60 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5048c7a55eef..bff4d95db039 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -96,7 +96,12 @@ struct arm64_cpu_capabilities { u16 capability; int def_scope; /* default scope */ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); - int (*enable)(void *); /* Called on all active CPUs */ + /* + * Take the appropriate actions to enable this capability for this CPU. + * For each successfully booted CPU, this method is called for each + * globally detected capability. + */ + void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 91bb97d8bdbf..9b6ac522a71a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -222,8 +223,8 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -int cpu_enable_pan(void *__unused); -int cpu_enable_cache_maint_trap(void *__unused); +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3d6d7fae45de..3c2a68d766a2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -61,11 +61,11 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, (arm64_ftr_reg_ctrel0.sys_val & mask); } -static int cpu_enable_trap_ctr_access(void *__unused) +static void +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { /* Clear SCTLR_EL1.UCT */ config_sctlr_el1(SCTLR_EL1_UCT, 0); - return 0; } #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR @@ -169,25 +169,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static int enable_smccc_arch_workaround_1(void *data) +static void +enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { - const struct arm64_cpu_capabilities *entry = data; bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return 0; + return; if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return 0; + return; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return 0; + return; cb = call_hvc_arch_workaround_1; smccc_start = __smccc_workaround_1_hvc_start; smccc_end = __smccc_workaround_1_hvc_end; @@ -197,19 +197,19 @@ static int enable_smccc_arch_workaround_1(void *data) arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return 0; + return; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return 0; + return; } install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); - return 0; + return; } static void qcom_link_stack_sanitization(void) @@ -224,15 +224,12 @@ static void qcom_link_stack_sanitization(void) : "=&r" (tmp)); } -static int qcom_enable_link_stack_sanitization(void *data) +static void +qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) { - const struct arm64_cpu_capabilities *entry = data; - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, __qcom_hyp_sanitize_link_stack_start, __qcom_hyp_sanitize_link_stack_end); - - return 0; } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ @@ -431,7 +428,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), - .enable = cpu_enable_cache_maint_trap, + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -440,7 +437,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), - .enable = cpu_enable_cache_maint_trap, + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -521,14 +518,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_type, .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .cpu_enable = cpu_enable_trap_ctr_access, }, { .desc = "Mismatched cache type", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 { @@ -567,27 +564,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - .enable = qcom_enable_link_stack_sanitization, + .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, @@ -596,7 +593,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .enable = qcom_enable_link_stack_sanitization, + .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, @@ -605,12 +602,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, #endif #ifdef CONFIG_ARM64_SSBD @@ -636,8 +633,8 @@ void verify_local_cpu_errata_workarounds(void) for (; caps->matches; caps++) { if (cpus_have_cap(caps->capability)) { - if (caps->enable) - caps->enable((void *)caps); + if (caps->cpu_enable) + caps->cpu_enable(caps); } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: Requires work around for %s, not detected" " at boot time\n", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d3a93e23a87c..17aa34d70771 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -859,7 +859,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, ID_AA64PFR0_CSV3_SHIFT); } -static int kpti_install_ng_mappings(void *__unused) +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); extern kpti_remap_fn idmap_kpti_install_ng_mappings; @@ -869,7 +870,7 @@ static int kpti_install_ng_mappings(void *__unused) int cpu = smp_processor_id(); if (kpti_applied) - return 0; + return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); @@ -880,7 +881,7 @@ static int kpti_install_ng_mappings(void *__unused) if (!cpu) kpti_applied = true; - return 0; + return; } static int __init parse_kpti(char *str) @@ -897,7 +898,7 @@ static int __init parse_kpti(char *str) early_param("kpti", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ -static int cpu_copy_el2regs(void *__unused) +static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* * Copy register values that aren't redirected by hardware. @@ -909,8 +910,6 @@ static int cpu_copy_el2regs(void *__unused) */ if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); - - return 0; } static const struct arm64_cpu_capabilities arm64_features[] = { @@ -934,7 +933,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR1_PAN_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = 1, - .enable = cpu_enable_pan, + .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) @@ -982,7 +981,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_VIRT_HOST_EXTN, .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, - .enable = cpu_copy_el2regs, + .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", @@ -1006,7 +1005,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_UNMAP_KERNEL_AT_EL0, .def_scope = SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, - .enable = kpti_install_ng_mappings, + .cpu_enable = kpti_install_ng_mappings, }, #endif { @@ -1169,6 +1168,14 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static int __enable_cpu_capability(void *arg) +{ + const struct arm64_cpu_capabilities *cap = arg; + + cap->cpu_enable(cap); + return 0; +} + /* * Run through the enabled capabilities and enable() it on all active * CPUs @@ -1184,14 +1191,15 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) /* Ensure cpus_have_const_cap(num) works */ static_branch_enable(&cpu_hwcap_keys[num]); - if (caps->enable) { + if (caps->cpu_enable) { /* * Use stop_machine() as it schedules the work allowing * us to modify PSTATE, instead of on_each_cpu() which * uses an IPI, giving us a PSTATE that disappears when * we return. */ - stop_machine(caps->enable, (void *)caps, cpu_online_mask); + stop_machine(__enable_cpu_capability, (void *)caps, + cpu_online_mask); } } } @@ -1249,8 +1257,8 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) smp_processor_id(), caps->desc); cpu_die_early(); } - if (caps->enable) - caps->enable((void *)caps); + if (caps->cpu_enable) + caps->cpu_enable(caps); } } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5d547deb6996..f4fdf6420ac5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -28,6 +28,7 @@ #include #include +#include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 74259ae9c7f2..a4e49e947684 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -436,10 +437,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } -int cpu_enable_cache_maint_trap(void *__unused) +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { config_sctlr_el1(SCTLR_EL1_UCI, 0); - return 0; } #define __user_cache_maint(insn, address, res) \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 465b90d7abf2..bf7c285d0c82 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -875,7 +875,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -int cpu_enable_pan(void *__unused) +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* * We modify PSTATE. This won't work from irq context as the PSTATE @@ -885,6 +885,5 @@ int cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); asm(SET_PSTATE_PAN(1)); - return 0; } #endif /* CONFIG_ARM64_PAN */ From d56e7aa4116720bc4de761110f1bc9da6e5394c9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:56 +0200 Subject: [PATCH 0059/3715] arm64: capabilities: Move errata work around check on boot CPU [ Upstream commit 5e91107b06811f0ca147cebbedce53626c9c4443 ] We trigger CPU errata work around check on the boot CPU from smp_prepare_boot_cpu() to make sure that we run the checks only after the CPU feature infrastructure is initialised. While this is correct, we can also do this from init_cpu_features() which initilises the infrastructure, and is called only on the Boot CPU. This helps to consolidate the CPU capability handling to cpufeature.c. No functional changes. Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 5 +++++ arch/arm64/kernel/smp.c | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 17aa34d70771..1269d496db0a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -521,6 +521,11 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } + /* + * Run the errata work around checks on the boot CPU, once we have + * initialised the cpu feature infrastructure. + */ + update_cpu_errata_workarounds(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b7ad41d7b6ee..e9b8395e24a7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -449,12 +449,6 @@ void __init smp_prepare_boot_cpu(void) jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); - /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure from - * cpuinfo_store_boot_cpu() above. - */ - update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) From e89e2a26f9969d1c8e422c7a288ee76c27d20a53 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:57 +0200 Subject: [PATCH 0060/3715] arm64: capabilities: Move errata processing code [ Upstream commit 1e89baed5d50d2b8d9fd420830902570270703f1 ] We have errata work around processing code in cpu_errata.c, which calls back into helpers defined in cpufeature.c. Now that we are going to make the handling of capabilities generic, by adding the information to each capability, move the errata work around specific processing code. No functional changes. Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: Andre Przywara Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 7 ----- arch/arm64/kernel/cpu_errata.c | 33 ---------------------- arch/arm64/kernel/cpufeature.c | 43 +++++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bff4d95db039..22ebede86100 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -230,15 +230,8 @@ static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) } void __init setup_cpu_features(void); - -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info); -void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_capabilities(void); -void update_cpu_errata_workarounds(void); -void __init enable_errata_workarounds(void); -void verify_local_cpu_errata_workarounds(void); u64 read_sanitised_ftr_reg(u32 id); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3c2a68d766a2..f1885beb2588 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -621,36 +621,3 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; - -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -void verify_local_cpu_errata_workarounds(void) -{ - const struct arm64_cpu_capabilities *caps = arm64_errata; - - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->cpu_enable) - caps->cpu_enable(caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } - } -} - -void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, "enabling workaround for"); -} - -void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1269d496db0a..353464b82d61 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -484,6 +484,9 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->user_mask = user_mask; } +extern const struct arm64_cpu_capabilities arm64_errata[]; +static void update_cpu_errata_workarounds(void); + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -1160,8 +1163,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; } -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) +static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + const char *info) { for (; caps->matches; caps++) { if (!caps->matches(caps, caps->def_scope)) @@ -1185,7 +1188,8 @@ static int __enable_cpu_capability(void *arg) * Run through the enabled capabilities and enable() it on all active * CPUs */ -void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { unsigned int num = caps->capability; @@ -1267,6 +1271,39 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) } } +/* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU requires + * an errata not detected at boot, fail this CPU. + */ +static void verify_local_cpu_errata_workarounds(void) +{ + const struct arm64_cpu_capabilities *caps = arm64_errata; + + for (; caps->matches; caps++) { + if (cpus_have_cap(caps->capability)) { + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } + } +} + +static void update_cpu_errata_workarounds(void) +{ + update_cpu_capabilities(arm64_errata, "enabling workaround for"); +} + +static void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} + /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. From 6c21fc25e9b0d86f5f4ac5d3d549d6b611dd3541 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:58 +0200 Subject: [PATCH 0061/3715] arm64: capabilities: Prepare for fine grained capabilities [ Upstream commit 143ba05d867af34827faf99e0eed4de27106c7cb ] We use arm64_cpu_capabilities to represent CPU ELF HWCAPs exposed to the userspace and the CPU hwcaps used by the kernel, which include cpu features and CPU errata work arounds. Capabilities have some properties that decide how they should be treated : 1) Detection, i.e scope : A cap could be "detected" either : - if it is present on at least one CPU (SCOPE_LOCAL_CPU) Or - if it is present on all the CPUs (SCOPE_SYSTEM) 2) When is it enabled ? - A cap is treated as "enabled" when the system takes some action based on whether the capability is detected or not. e.g, setting some control register, patching the kernel code. Right now, we treat all caps are enabled at boot-time, after all the CPUs are brought up by the kernel. But there are certain caps, which are enabled early during the boot (e.g, VHE, GIC_CPUIF for NMI) and kernel starts using them, even before the secondary CPUs are brought up. We would need a way to describe this for each capability. 3) Conflict on a late CPU - When a CPU is brought up, it is checked against the caps that are known to be enabled on the system (via verify_local_cpu_capabilities()). Based on the state of the capability on the CPU vs. that of System we could have the following combinations of conflict. x-----------------------------x | Type | System | Late CPU | ------------------------------| | a | y | n | ------------------------------| | b | n | y | x-----------------------------x Case (a) is not permitted for caps which are system features, which the system expects all the CPUs to have (e.g VHE). While (a) is ignored for all errata work arounds. However, there could be exceptions to the plain filtering approach. e.g, KPTI is an optional feature for a late CPU as long as the system already enables it. Case (b) is not permitted for errata work arounds which requires some work around, which cannot be delayed. And we ignore (b) for features. Here, yet again, KPTI is an exception, where if a late CPU needs KPTI we are too late to enable it (because we change the allocation of ASIDs etc). So this calls for a lot more fine grained behavior for each capability. And if we define all the attributes to control their behavior properly, we may be able to use a single table for the CPU hwcaps (which cover errata and features, not the ELF HWCAPs). This is a prepartory step to get there. More bits would be added for the properties listed above. We are going to use a bit-mask to encode all the properties of a capabilities. This patch encodes the "SCOPE" of the capability. As such there is no change in how the capabilities are treated. Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 105 ++++++++++++++++++++++++++-- arch/arm64/kernel/cpu_errata.c | 12 ++-- arch/arm64/kernel/cpufeature.c | 34 ++++----- 3 files changed, 122 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 22ebede86100..909e005f9612 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -85,16 +85,104 @@ struct arm64_ftr_reg { extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; -/* scope of capability check */ -enum { - SCOPE_SYSTEM, - SCOPE_LOCAL_CPU, -}; +/* + * CPU capabilities: + * + * We use arm64_cpu_capabilities to represent system features, errata work + * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * ELF HWCAPs (which are exposed to user). + * + * To support systems with heterogeneous CPUs, we need to make sure that we + * detect the capabilities correctly on the system and take appropriate + * measures to ensure there are no incompatibilities. + * + * This comment tries to explain how we treat the capabilities. + * Each capability has the following list of attributes : + * + * 1) Scope of Detection : The system detects a given capability by + * performing some checks at runtime. This could be, e.g, checking the + * value of a field in CPU ID feature register or checking the cpu + * model. The capability provides a call back ( @matches() ) to + * perform the check. Scope defines how the checks should be performed. + * There are two cases: + * + * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one + * matches. This implies, we have to run the check on all the + * booting CPUs, until the system decides that state of the + * capability is finalised. (See section 2 below) + * Or + * b) SCOPE_SYSTEM: check all the CPUs and "detect" if all the CPUs + * matches. This implies, we run the check only once, when the + * system decides to finalise the state of the capability. If the + * capability relies on a field in one of the CPU ID feature + * registers, we use the sanitised value of the register from the + * CPU feature infrastructure to make the decision. + * + * The process of detection is usually denoted by "update" capability + * state in the code. + * + * 2) Finalise the state : The kernel should finalise the state of a + * capability at some point during its execution and take necessary + * actions if any. Usually, this is done, after all the boot-time + * enabled CPUs are brought up by the kernel, so that it can make + * better decision based on the available set of CPUs. However, there + * are some special cases, where the action is taken during the early + * boot by the primary boot CPU. (e.g, running the kernel at EL2 with + * Virtualisation Host Extensions). The kernel usually disallows any + * changes to the state of a capability once it finalises the capability + * and takes any action, as it may be impossible to execute the actions + * safely. A CPU brought up after a capability is "finalised" is + * referred to as "Late CPU" w.r.t the capability. e.g, all secondary + * CPUs are treated "late CPUs" for capabilities determined by the boot + * CPU. + * + * 3) Verification: When a CPU is brought online (e.g, by user or by the + * kernel), the kernel should make sure that it is safe to use the CPU, + * by verifying that the CPU is compliant with the state of the + * capabilities finalised already. This happens via : + * + * secondary_start_kernel()-> check_local_cpu_capabilities() + * + * As explained in (2) above, capabilities could be finalised at + * different points in the execution. Each CPU is verified against the + * "finalised" capabilities and if there is a conflict, the kernel takes + * an action, based on the severity (e.g, a CPU could be prevented from + * booting or cause a kernel panic). The CPU is allowed to "affect" the + * state of the capability, if it has not been finalised already. + * + * 4) Action: As mentioned in (2), the kernel can take an action for each + * detected capability, on all CPUs on the system. Appropriate actions + * include, turning on an architectural feature, modifying the control + * registers (e.g, SCTLR, TCR etc.) or patching the kernel via + * alternatives. The kernel patching is batched and performed at later + * point. The actions are always initiated only after the capability + * is finalised. This is usally denoted by "enabling" the capability. + * The actions are initiated as follows : + * a) Action is triggered on all online CPUs, after the capability is + * finalised, invoked within the stop_machine() context from + * enable_cpu_capabilitie(). + * + * b) Any late CPU, brought up after (1), the action is triggered via: + * + * check_local_cpu_capabilities() -> verify_local_cpu_capabilities() + * + */ + + +/* Decide how the capability is detected. On a local CPU vs System wide */ +#define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) +#define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) +#define ARM64_CPUCAP_SCOPE_MASK \ + (ARM64_CPUCAP_SCOPE_SYSTEM | \ + ARM64_CPUCAP_SCOPE_LOCAL_CPU) + +#define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM +#define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU struct arm64_cpu_capabilities { const char *desc; u16 capability; - int def_scope; /* default scope */ + u16 type; bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); /* * Take the appropriate actions to enable this capability for this CPU. @@ -119,6 +207,11 @@ struct arm64_cpu_capabilities { }; }; +static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) +{ + return cap->type & ARM64_CPUCAP_SCOPE_MASK; +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f1885beb2588..72f701da24c9 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -406,14 +406,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, #endif /* CONFIG_ARM64_SSBD */ #define MIDR_RANGE(model, min, max) \ - .def_scope = SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ .midr_range_max = max #define MIDR_ALL_VERSIONS(model) \ - .def_scope = SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = 0, \ @@ -517,14 +517,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_type, - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .cpu_enable = cpu_enable_trap_ctr_access, }, { .desc = "Mismatched cache type", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -538,7 +538,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .midr_model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, @@ -613,7 +613,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypass Disable", - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .capability = ARM64_SSBD, .matches = has_ssbd_mitigation, }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 353464b82d61..5a3becce5a3e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -924,7 +924,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -935,7 +935,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -948,7 +948,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -959,14 +959,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -980,21 +980,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1004,14 +1004,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Reduced HYP mapping offset", .capability = ARM64_HYP_OFFSET_LOW, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = hyp_offset_low, }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, @@ -1019,7 +1019,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1027,7 +1027,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1037,16 +1037,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ { \ .desc = #cap, \ - .def_scope = SCOPE_SYSTEM, \ + .type = ARM64_CPUCAP_SCOPE_SYSTEM, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ - .hwcap_type = type, \ + .hwcap_type = cap_type, \ .hwcap = cap, \ } @@ -1140,7 +1140,7 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) /* We support emulation of accesses to CPU ID feature registers */ elf_hwcap |= HWCAP_CPUID; for (; hwcaps->matches; hwcaps++) - if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); } @@ -1167,7 +1167,7 @@ static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { for (; caps->matches; caps++) { - if (!caps->matches(caps, caps->def_scope)) + if (!caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) From 185b632259e87823a949373324ba1555b5e2b955 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:47:59 +0200 Subject: [PATCH 0062/3715] arm64: capabilities: Add flags to handle the conflicts on late CPU [ Upstream commit 5b4747c5dce7a873e1e7fe1608835825f714267a ] When a CPU is brought up, it is checked against the caps that are known to be enabled on the system (via verify_local_cpu_capabilities()). Based on the state of the capability on the CPU vs. that of System we could have the following combinations of conflict. x-----------------------------x | Type | System | Late CPU | |-----------------------------| | a | y | n | |-----------------------------| | b | n | y | x-----------------------------x Case (a) is not permitted for caps which are system features, which the system expects all the CPUs to have (e.g VHE). While (a) is ignored for all errata work arounds. However, there could be exceptions to the plain filtering approach. e.g, KPTI is an optional feature for a late CPU as long as the system already enables it. Case (b) is not permitted for errata work arounds that cannot be activated after the kernel has finished booting.And we ignore (b) for features. Here, yet again, KPTI is an exception, where if a late CPU needs KPTI we are too late to enable it (because we change the allocation of ASIDs etc). Add two different flags to indicate how the conflict should be handled. ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - CPUs may have the capability ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - CPUs may not have the cappability. Now that we have the flags to describe the behavior of the errata and the features, as we treat them, define types for ERRATUM and FEATURE. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 68 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 12 ++--- arch/arm64/kernel/cpufeature.c | 26 +++++------ 3 files changed, 87 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 909e005f9612..89aeeeaf81bb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -149,6 +149,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * an action, based on the severity (e.g, a CPU could be prevented from * booting or cause a kernel panic). The CPU is allowed to "affect" the * state of the capability, if it has not been finalised already. + * See section 5 for more details on conflicts. * * 4) Action: As mentioned in (2), the kernel can take an action for each * detected capability, on all CPUs on the system. Appropriate actions @@ -166,6 +167,34 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * * check_local_cpu_capabilities() -> verify_local_cpu_capabilities() * + * 5) Conflicts: Based on the state of the capability on a late CPU vs. + * the system state, we could have the following combinations : + * + * x-----------------------------x + * | Type | System | Late CPU | + * |-----------------------------| + * | a | y | n | + * |-----------------------------| + * | b | n | y | + * x-----------------------------x + * + * Two separate flag bits are defined to indicate whether each kind of + * conflict can be allowed: + * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - Case(a) is allowed + * ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - Case(b) is allowed + * + * Case (a) is not permitted for a capability that the system requires + * all CPUs to have in order for the capability to be enabled. This is + * typical for capabilities that represent enhanced functionality. + * + * Case (b) is not permitted for a capability that must be enabled + * during boot if any CPU in the system requires it in order to run + * safely. This is typical for erratum work arounds that cannot be + * enabled after the corresponding capability is finalised. + * + * In some non-typical cases either both (a) and (b), or neither, + * should be permitted. This can be described by including neither + * or both flags in the capability's type field. */ @@ -179,6 +208,33 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +/* + * Is it permitted for a late CPU to have this capability when system + * hasn't already enabled it ? + */ +#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU ((u16)BIT(4)) +/* Is it safe for a late CPU to miss this capability when system has it */ +#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5)) + +/* + * CPU errata workarounds that need to be enabled at boot time if one or + * more CPUs in the system requires it. When one of these capabilities + * has been enabled, it is safe to allow any CPU to boot that doesn't + * require the workaround. However, it is not safe if a "late" CPU + * requires a workaround and the system hasn't enabled it already. + */ +#define ARM64_CPUCAP_LOCAL_CPU_ERRATUM \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on system-wide value of a + * feature. It is safe for a late CPU to have this feature even though + * the system hasn't enabled it, although the featuer will not be used + * by Linux in this case. If the system has enabled this feature already, + * then every late CPU must have it. + */ +#define ARM64_CPUCAP_SYSTEM_FEATURE \ + (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) + struct arm64_cpu_capabilities { const char *desc; u16 capability; @@ -212,6 +268,18 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) return cap->type & ARM64_CPUCAP_SCOPE_MASK; } +static inline bool +cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU); +} + +static inline bool +cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 72f701da24c9..588b994b7120 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -406,14 +406,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, #endif /* CONFIG_ARM64_SSBD */ #define MIDR_RANGE(model, min, max) \ - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ .midr_range_max = max #define MIDR_ALL_VERSIONS(model) \ - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = 0, \ @@ -517,14 +517,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = cpu_enable_trap_ctr_access, }, { .desc = "Mismatched cache type", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -538,7 +538,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .midr_model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, @@ -613,7 +613,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypass Disable", - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .capability = ARM64_SSBD, .matches = has_ssbd_mitigation, }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5a3becce5a3e..70b504d84683 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -924,7 +924,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -935,7 +935,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -948,7 +948,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -959,14 +959,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -980,21 +980,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1004,14 +1004,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Reduced HYP mapping offset", .capability = ARM64_HYP_OFFSET_LOW, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = hyp_offset_low, }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, @@ -1019,7 +1019,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1027,7 +1027,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1040,7 +1040,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ { \ .desc = #cap, \ - .type = ARM64_CPUCAP_SCOPE_SYSTEM, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ From 2a531333099332495a6222952dc2604f76bc0ba4 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:00 +0200 Subject: [PATCH 0063/3715] arm64: capabilities: Unify the verification [ Upstream commit eaac4d83daa50fc1b9b7850346e9a62adfd4647e ] Now that each capability describes how to treat the conflicts of CPU cap state vs System wide cap state, we can unify the verification logic to a single place. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 91 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 33 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 70b504d84683..e73fae0c0ca7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1228,6 +1228,58 @@ static inline void set_sys_caps_initialised(void) sys_caps_initialised = true; } +/* + * Run through the list of capabilities to check for conflicts. + * If the system has already detected a capability, take necessary + * action on this CPU. + * + * Returns "false" on conflicts. + */ +static bool +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list) +{ + bool cpu_has_cap, system_has_cap; + const struct arm64_cpu_capabilities *caps; + + for (caps = caps_list; caps->matches; caps++) { + cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); + system_has_cap = cpus_have_cap(caps->capability); + + if (system_has_cap) { + /* + * Check if the new CPU misses an advertised feature, + * which is not safe to miss. + */ + if (!cpu_has_cap && !cpucap_late_cpu_optional(caps)) + break; + /* + * We have to issue cpu_enable() irrespective of + * whether the CPU has it or not, as it is enabeld + * system wide. It is upto the call back to take + * appropriate action on this CPU. + */ + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else { + /* + * Check if the CPU has this capability if it isn't + * safe to have when the system doesn't. + */ + if (cpu_has_cap && !cpucap_late_cpu_permitted(caps)) + break; + } + } + + if (caps->matches) { + pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n", + smp_processor_id(), caps->capability, + caps->desc, system_has_cap, cpu_has_cap); + return false; + } + + return true; +} + /* * Check for CPU features that are used in early boot * based on the Boot CPU value. @@ -1250,25 +1302,10 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) +static void verify_local_cpu_features(void) { - const struct arm64_cpu_capabilities *caps = caps_list; - for (; caps->matches; caps++) { - if (!cpus_have_cap(caps->capability)) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!__this_cpu_has_cap(caps_list, caps->capability)) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps->desc); - cpu_die_early(); - } - if (caps->cpu_enable) - caps->cpu_enable(caps); - } + if (!__verify_local_cpu_caps(arm64_features)) + cpu_die_early(); } /* @@ -1278,20 +1315,8 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) */ static void verify_local_cpu_errata_workarounds(void) { - const struct arm64_cpu_capabilities *caps = arm64_errata; - - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->cpu_enable) - caps->cpu_enable(caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } - } + if (!__verify_local_cpu_caps(arm64_errata)) + cpu_die_early(); } static void update_cpu_errata_workarounds(void) @@ -1315,7 +1340,7 @@ static void __init enable_errata_workarounds(void) static void verify_local_cpu_capabilities(void) { verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(arm64_features); + verify_local_cpu_features(); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) verify_local_elf_hwcaps(compat_elf_hwcaps); From 9e3fa8a15596061620b2e1cb203ef403e77fa8da Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:01 +0200 Subject: [PATCH 0064/3715] arm64: capabilities: Filter the entries based on a given mask [ Upstream commit cce360b54ce6ca1bcf4b0a870ec076d83606775e ] While processing the list of capabilities, it is useful to filter out some of the entries based on the given mask for the scope of the capabilities to allow better control. This can be used later for handling LOCAL vs SYSTEM wide capabilities and more. All capabilities should have their scope set to either LOCAL_CPU or SYSTEM. No functional/flow change. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 33 +++++++++++++++++++---------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89aeeeaf81bb..b19dd89bcce9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -207,6 +207,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +#define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK /* * Is it permitted for a late CPU to have this capability when system diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e73fae0c0ca7..b88871d5f179 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1164,10 +1164,12 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, } static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) + u16 scope_mask, const char *info) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { - if (!caps->matches(caps, cpucap_default_scope(caps))) + if (!(caps->type & scope_mask) || + !caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) @@ -1189,12 +1191,14 @@ static int __enable_cpu_capability(void *arg) * CPUs */ static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { unsigned int num = caps->capability; - if (!cpus_have_cap(num)) + if (!(caps->type & scope_mask) || !cpus_have_cap(num)) continue; /* Ensure cpus_have_const_cap(num) works */ @@ -1236,12 +1240,18 @@ static inline void set_sys_caps_initialised(void) * Returns "false" on conflicts. */ static bool -__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list) +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, + u16 scope_mask) { bool cpu_has_cap, system_has_cap; const struct arm64_cpu_capabilities *caps; + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; + for (caps = caps_list; caps->matches; caps++) { + if (!(caps->type & scope_mask)) + continue; + cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); system_has_cap = cpus_have_cap(caps->capability); @@ -1304,7 +1314,7 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) static void verify_local_cpu_features(void) { - if (!__verify_local_cpu_caps(arm64_features)) + if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) cpu_die_early(); } @@ -1315,18 +1325,19 @@ static void verify_local_cpu_features(void) */ static void verify_local_cpu_errata_workarounds(void) { - if (!__verify_local_cpu_caps(arm64_errata)) + if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) cpu_die_early(); } static void update_cpu_errata_workarounds(void) { - update_cpu_capabilities(arm64_errata, "enabling workaround for"); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); } static void __init enable_errata_workarounds(void) { - enable_cpu_capabilities(arm64_errata); + enable_cpu_capabilities(arm64_errata, SCOPE_ALL); } /* @@ -1368,8 +1379,8 @@ void check_local_cpu_capabilities(void) static void __init setup_feature_capabilities(void) { - update_cpu_capabilities(arm64_features, "detected feature:"); - enable_cpu_capabilities(arm64_features); + update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + enable_cpu_capabilities(arm64_features, SCOPE_ALL); } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); From 59118c737b4716cca063a9f35b4f4131774eaac5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:02 +0200 Subject: [PATCH 0065/3715] arm64: capabilities: Prepare for grouping features and errata work arounds [ Upstream commit 600b9c919c2f4d07a7bf67864086aa3432224674 ] We are about to group the handling of all capabilities (features and errata workarounds). This patch open codes the wrapper routines to make it easier to merge the handling. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 58 +++++++++++----------------------- 1 file changed, 18 insertions(+), 40 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b88871d5f179..bc76597abe7b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -485,7 +485,8 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static void update_cpu_errata_workarounds(void); +static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -528,7 +529,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around checks on the boot CPU, once we have * initialised the cpu feature infrastructure. */ - update_cpu_errata_workarounds(); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1312,33 +1314,6 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void verify_local_cpu_features(void) -{ - if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) - cpu_die_early(); -} - -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -static void verify_local_cpu_errata_workarounds(void) -{ - if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) - cpu_die_early(); -} - -static void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, SCOPE_ALL, - "enabling workaround for"); -} - -static void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata, SCOPE_ALL); -} /* * Run through the enabled system capabilities and enable() it on this CPU. @@ -1350,8 +1325,15 @@ static void __init enable_errata_workarounds(void) */ static void verify_local_cpu_capabilities(void) { - verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(); + /* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU + * requires an errata not detected at boot, fail this CPU. + */ + if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) + cpu_die_early(); + if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) + cpu_die_early(); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) verify_local_elf_hwcaps(compat_elf_hwcaps); @@ -1372,17 +1354,12 @@ void check_local_cpu_capabilities(void) * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_errata_workarounds(); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); else verify_local_cpu_capabilities(); } -static void __init setup_feature_capabilities(void) -{ - update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); - enable_cpu_capabilities(arm64_features, SCOPE_ALL); -} - DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); EXPORT_SYMBOL(arm64_const_caps_ready); @@ -1405,8 +1382,9 @@ void __init setup_cpu_features(void) int cls; /* Set the CPU feature capabilies */ - setup_feature_capabilities(); - enable_errata_workarounds(); + update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + enable_cpu_capabilities(arm64_features, SCOPE_ALL); + enable_cpu_capabilities(arm64_errata, SCOPE_ALL); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); From 0a599aa7daca84b95335667162f530292e91e3d1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:03 +0200 Subject: [PATCH 0066/3715] arm64: capabilities: Split the processing of errata work arounds [ Upstream commit d69fe9a7e7214d49fe157ec20889892388d0fe23 ] Right now we run through the errata workarounds check on all boot active CPUs, with SCOPE_ALL. This wouldn't help for detecting erratum workarounds with a SYSTEM_SCOPE. There are none yet, but we plan to introduce some: let us clean this up so that such workarounds can be detected and enabled correctly. So, we run the checks with SCOPE_LOCAL_CPU on all CPUs and SCOPE_SYSTEM checks are run only once after all the boot time CPUs are active. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index bc76597abe7b..291f8899b37f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -529,7 +529,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around checks on the boot CPU, once we have * initialised the cpu feature infrastructure. */ - update_cpu_capabilities(arm64_errata, SCOPE_ALL, + update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); } @@ -1354,7 +1354,7 @@ void check_local_cpu_capabilities(void) * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_capabilities(arm64_errata, SCOPE_ALL, + update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); else verify_local_cpu_capabilities(); @@ -1383,6 +1383,8 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, + "enabling workaround for"); enable_cpu_capabilities(arm64_features, SCOPE_ALL); enable_cpu_capabilities(arm64_errata, SCOPE_ALL); mark_const_caps_ready(); From 33236e444f1c84b23ac265687578d2176640b2ba Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:04 +0200 Subject: [PATCH 0067/3715] arm64: capabilities: Allow features based on local CPU scope [ Upstream commit fbd890b9b8497bab04c1d338bd97579a7bc53fab ] So far we have treated the feature capabilities as system wide and this wouldn't help with features that could be detected locally on one or more CPUs (e.g, KPTI, Software prefetch). This patch splits the feature detection to two phases : 1) Local CPU features are checked on all boot time active CPUs. 2) System wide features are checked only once after all CPUs are active. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 291f8899b37f..dda06f3436c2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -485,6 +485,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; +static const struct arm64_cpu_capabilities arm64_features[]; static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, u16 scope_mask, const char *info); @@ -526,11 +527,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) } /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure. + * Run the errata work around and local feature checks on the + * boot CPU, once we have initialised the cpu feature infrastructure. */ update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); + update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, "detected:"); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1349,15 +1351,18 @@ void check_local_cpu_capabilities(void) /* * If we haven't finalised the system capabilities, this CPU gets - * a chance to update the errata work arounds. + * a chance to update the errata work arounds and local features. * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) + if (!sys_caps_initialised) { update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); - else + update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, + "detected:"); + } else { verify_local_cpu_capabilities(); + } } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1382,7 +1387,7 @@ void __init setup_cpu_features(void) int cls; /* Set the CPU feature capabilies */ - update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + update_cpu_capabilities(arm64_features, SCOPE_SYSTEM, "detected:"); update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, "enabling workaround for"); enable_cpu_capabilities(arm64_features, SCOPE_ALL); From f1696036165b7369ef73f96953c15d495ff60497 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:05 +0200 Subject: [PATCH 0068/3715] arm64: capabilities: Group handling of features and errata workarounds [ Upstream commit ed478b3f9e4ac97fdbe07007fb2662415de8fe25 ] Now that the features and errata workarounds have the same rules and flow, group the handling of the tables. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 73 +++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dda06f3436c2..f4d640dc7f8b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -485,9 +485,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static const struct arm64_cpu_capabilities arm64_features[]; -static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask, const char *info); +static void update_cpu_capabilities(u16 scope_mask); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -530,9 +528,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around and local feature checks on the * boot CPU, once we have initialised the cpu feature infrastructure. */ - update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, - "enabling workaround for"); - update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, "detected:"); + update_cpu_capabilities(SCOPE_LOCAL_CPU); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1167,8 +1163,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; } -static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask, const char *info) +static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info) { scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { @@ -1182,6 +1178,13 @@ static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void update_cpu_capabilities(u16 scope_mask) +{ + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); + __update_cpu_capabilities(arm64_errata, scope_mask, + "enabling workaround for"); +} + static int __enable_cpu_capability(void *arg) { const struct arm64_cpu_capabilities *cap = arg; @@ -1195,8 +1198,8 @@ static int __enable_cpu_capability(void *arg) * CPUs */ static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask) +__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { @@ -1221,6 +1224,12 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void __init enable_cpu_capabilities(u16 scope_mask) +{ + __enable_cpu_capabilities(arm64_features, scope_mask); + __enable_cpu_capabilities(arm64_errata, scope_mask); +} + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -1294,6 +1303,12 @@ __verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, return true; } +static bool verify_local_cpu_caps(u16 scope_mask) +{ + return __verify_local_cpu_caps(arm64_errata, scope_mask) && + __verify_local_cpu_caps(arm64_features, scope_mask); +} + /* * Check for CPU features that are used in early boot * based on the Boot CPU value. @@ -1327,15 +1342,9 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) */ static void verify_local_cpu_capabilities(void) { - /* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU - * requires an errata not detected at boot, fail this CPU. - */ - if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) - cpu_die_early(); - if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) + if (!verify_local_cpu_caps(SCOPE_ALL)) cpu_die_early(); + verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) verify_local_elf_hwcaps(compat_elf_hwcaps); @@ -1355,14 +1364,10 @@ void check_local_cpu_capabilities(void) * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) { - update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, - "enabling workaround for"); - update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, - "detected:"); - } else { + if (!sys_caps_initialised) + update_cpu_capabilities(SCOPE_LOCAL_CPU); + else verify_local_cpu_capabilities(); - } } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1381,17 +1386,23 @@ bool this_cpu_has_cap(unsigned int cap) __this_cpu_has_cap(arm64_errata, cap)); } +static void __init setup_system_capabilities(void) +{ + /* + * We have finalised the system-wide safe feature + * registers, finalise the capabilities that depend + * on it. Also enable all the available capabilities. + */ + update_cpu_capabilities(SCOPE_SYSTEM); + enable_cpu_capabilities(SCOPE_ALL); +} + void __init setup_cpu_features(void) { u32 cwg; int cls; - /* Set the CPU feature capabilies */ - update_cpu_capabilities(arm64_features, SCOPE_SYSTEM, "detected:"); - update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, - "enabling workaround for"); - enable_cpu_capabilities(arm64_features, SCOPE_ALL); - enable_cpu_capabilities(arm64_errata, SCOPE_ALL); + setup_system_capabilities(); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); From 32354dd01c29c97b68b00e9816617d50b7f3659f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:06 +0200 Subject: [PATCH 0069/3715] arm64: capabilities: Introduce weak features based on local CPU [ Upstream commit 5c137714dd8cae464dbd5f028c07af149e6d09fc ] Now that we have the flexibility of defining system features based on individual CPUs, introduce CPU feature type that can be detected on a local SCOPE and ignores the conflict on late CPUs. This is applicable for ARM64_HAS_NO_HW_PREFETCH, where it is fine for the system to have CPUs without hardware prefetch turning up later. We only suffer a performance penalty, nothing fatal. Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b19dd89bcce9..09825b667af0 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -235,6 +235,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; */ #define ARM64_CPUCAP_SYSTEM_FEATURE \ (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on feature of one or more CPUs. + * All possible conflicts for a late CPU are ignored. + */ +#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) struct arm64_cpu_capabilities { const char *desc; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f4d640dc7f8b..439cdca71024 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -959,7 +959,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO From 808ab828e638efdc5bfe62b8481aef0882f35eab Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:07 +0200 Subject: [PATCH 0070/3715] arm64: capabilities: Restrict KPTI detection to boot-time CPUs [ Upstream commit d3aec8a28be3b88bf75442e7c24fd9da8d69a6df ] KPTI is treated as a system wide feature and is only detected if all the CPUs in the sysetm needs the defense, unless it is forced via kernel command line. This leaves a system with a mix of CPUs with and without the defense vulnerable. Also, if a late CPU needs KPTI but KPTI was not activated at boot time, the CPU is currently allowed to boot, which is a potential security vulnerability. This patch ensures that the KPTI is turned on if at least one CPU detects the capability (i.e, change scope to SCOPE_LOCAL_CPU). Also rejetcs a late CPU, if it requires the defense, when the system hasn't enabled it, Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 9 +++++++++ arch/arm64/kernel/cpufeature.c | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 09825b667af0..96c99b201b2f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -244,6 +244,15 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time, on one or more CPUs. A late CPU + * is not allowed to have the capability when the system doesn't have it. + * It is Ok for a late CPU to miss the feature. + */ +#define ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) + struct arm64_cpu_capabilities { const char *desc; u16 capability; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 439cdca71024..b3ebbc56bebb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -824,10 +824,9 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { char const *str = "command line option"; - u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -863,8 +862,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } /* Defer to CPU feature registers */ - return !cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV3_SHIFT); + return !has_cpuid_feature(entry, scope); } static void @@ -1011,7 +1009,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, + /* + * The ID feature fields below are used to indicate that + * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for + * more details. + */ + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, From 6527925caa7fe97a3a78e51a0681d15c9bc00d17 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:08 +0200 Subject: [PATCH 0071/3715] arm64: capabilities: Add support for features enabled early [ Upstream commit fd9d63da17daf09c0099e3d5e3f0c0f03d9b251b ] The kernel detects and uses some of the features based on the boot CPU and expects that all the following CPUs conform to it. e.g, with VHE and the boot CPU running at EL2, the kernel decides to keep the kernel running at EL2. If another CPU is brought up without this capability, we use custom hooks (via check_early_cpu_features()) to handle it. To handle such capabilities add support for detecting and enabling capabilities based on the boot CPU. A bit is added to indicate if the capability should be detected early on the boot CPU. The infrastructure then ensures that such capabilities are probed and "enabled" early on in the boot CPU and, enabled on the subsequent CPUs. Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 48 +++++++++++++++++++----- arch/arm64/kernel/cpufeature.c | 57 ++++++++++++++++++++++------- 2 files changed, 83 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96c99b201b2f..793e5fd4c583 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -104,7 +104,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * value of a field in CPU ID feature register or checking the cpu * model. The capability provides a call back ( @matches() ) to * perform the check. Scope defines how the checks should be performed. - * There are two cases: + * There are three cases: * * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one * matches. This implies, we have to run the check on all the @@ -117,6 +117,11 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * capability relies on a field in one of the CPU ID feature * registers, we use the sanitised value of the register from the * CPU feature infrastructure to make the decision. + * Or + * c) SCOPE_BOOT_CPU: Check only on the primary boot CPU to detect the + * feature. This category is for features that are "finalised" + * (or used) by the kernel very early even before the SMP cpus + * are brought up. * * The process of detection is usually denoted by "update" capability * state in the code. @@ -136,6 +141,11 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPUs are treated "late CPUs" for capabilities determined by the boot * CPU. * + * At the moment there are two passes of finalising the capabilities. + * a) Boot CPU scope capabilities - Finalised by primary boot CPU via + * setup_boot_cpu_capabilities(). + * b) Everything except (a) - Run via setup_system_capabilities(). + * * 3) Verification: When a CPU is brought online (e.g, by user or by the * kernel), the kernel should make sure that it is safe to use the CPU, * by verifying that the CPU is compliant with the state of the @@ -144,12 +154,21 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * secondary_start_kernel()-> check_local_cpu_capabilities() * * As explained in (2) above, capabilities could be finalised at - * different points in the execution. Each CPU is verified against the - * "finalised" capabilities and if there is a conflict, the kernel takes - * an action, based on the severity (e.g, a CPU could be prevented from - * booting or cause a kernel panic). The CPU is allowed to "affect" the - * state of the capability, if it has not been finalised already. - * See section 5 for more details on conflicts. + * different points in the execution. Each newly booted CPU is verified + * against the capabilities that have been finalised by the time it + * boots. + * + * a) SCOPE_BOOT_CPU : All CPUs are verified against the capability + * except for the primary boot CPU. + * + * b) SCOPE_LOCAL_CPU, SCOPE_SYSTEM: All CPUs hotplugged on by the + * user after the kernel boot are verified against the capability. + * + * If there is a conflict, the kernel takes an action, based on the + * severity (e.g, a CPU could be prevented from booting or cause a + * kernel panic). The CPU is allowed to "affect" the state of the + * capability, if it has not been finalised already. See section 5 + * for more details on conflicts. * * 4) Action: As mentioned in (2), the kernel can take an action for each * detected capability, on all CPUs on the system. Appropriate actions @@ -198,15 +217,26 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; */ -/* Decide how the capability is detected. On a local CPU vs System wide */ +/* + * Decide how the capability is detected. + * On any local CPU vs System wide vs the primary boot CPU + */ #define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) #define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) +/* + * The capabilitiy is detected on the Boot CPU and is used by kernel + * during early boot. i.e, the capability should be "detected" and + * "enabled" as early as possibly on all booting CPUs. + */ +#define ARM64_CPUCAP_SCOPE_BOOT_CPU ((u16)BIT(2)) #define ARM64_CPUCAP_SCOPE_MASK \ (ARM64_CPUCAP_SCOPE_SYSTEM | \ - ARM64_CPUCAP_SCOPE_LOCAL_CPU) + ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_SCOPE_BOOT_CPU) #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +#define SCOPE_BOOT_CPU ARM64_CPUCAP_SCOPE_BOOT_CPU #define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b3ebbc56bebb..1a1eb3b85e82 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -485,7 +485,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static void update_cpu_capabilities(u16 scope_mask); +static void __init setup_boot_cpu_capabilities(void); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -525,10 +525,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) } /* - * Run the errata work around and local feature checks on the - * boot CPU, once we have initialised the cpu feature infrastructure. + * Detect and enable early CPU capabilities based on the boot CPU, + * after we have initialised the CPU feature infrastructure. */ - update_cpu_capabilities(SCOPE_LOCAL_CPU); + setup_boot_cpu_capabilities(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1219,13 +1219,24 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, if (caps->cpu_enable) { /* - * Use stop_machine() as it schedules the work allowing - * us to modify PSTATE, instead of on_each_cpu() which - * uses an IPI, giving us a PSTATE that disappears when - * we return. + * Capabilities with SCOPE_BOOT_CPU scope are finalised + * before any secondary CPU boots. Thus, each secondary + * will enable the capability as appropriate via + * check_local_cpu_capabilities(). The only exception is + * the boot CPU, for which the capability must be + * enabled here. This approach avoids costly + * stop_machine() calls for this case. + * + * Otherwise, use stop_machine() as it schedules the + * work allowing us to modify PSTATE, instead of + * on_each_cpu() which uses an IPI, giving us a PSTATE + * that disappears when we return. */ - stop_machine(__enable_cpu_capability, (void *)caps, - cpu_online_mask); + if (scope_mask & SCOPE_BOOT_CPU) + caps->cpu_enable(caps); + else + stop_machine(__enable_cpu_capability, + (void *)caps, cpu_online_mask); } } } @@ -1323,6 +1334,12 @@ static void check_early_cpu_features(void) { verify_cpu_run_el(); verify_cpu_asid_bits(); + /* + * Early features are used by the kernel already. If there + * is a conflict, we cannot proceed further. + */ + if (!verify_local_cpu_caps(SCOPE_BOOT_CPU)) + cpu_panic_kernel(); } static void @@ -1348,7 +1365,12 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) */ static void verify_local_cpu_capabilities(void) { - if (!verify_local_cpu_caps(SCOPE_ALL)) + /* + * The capabilities with SCOPE_BOOT_CPU are checked from + * check_early_cpu_features(), as they need to be verified + * on all secondary CPUs. + */ + if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU)) cpu_die_early(); verify_local_elf_hwcaps(arm64_elf_hwcaps); @@ -1376,6 +1398,14 @@ void check_local_cpu_capabilities(void) verify_local_cpu_capabilities(); } +static void __init setup_boot_cpu_capabilities(void) +{ + /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */ + update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); + /* Enable the SCOPE_BOOT_CPU capabilities alone right away */ + enable_cpu_capabilities(SCOPE_BOOT_CPU); +} + DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); EXPORT_SYMBOL(arm64_const_caps_ready); @@ -1397,10 +1427,11 @@ static void __init setup_system_capabilities(void) /* * We have finalised the system-wide safe feature * registers, finalise the capabilities that depend - * on it. Also enable all the available capabilities. + * on it. Also enable all the available capabilities, + * that are not enabled already. */ update_cpu_capabilities(SCOPE_SYSTEM); - enable_cpu_capabilities(SCOPE_ALL); + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); } void __init setup_cpu_features(void) From ee0ccd259b4c4e362a75e382dae26b69560429a1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:09 +0200 Subject: [PATCH 0072/3715] arm64: capabilities: Change scope of VHE to Boot CPU feature [ Upstream commit 830dcc9f9a7cd26a812522a26efaacf7df6fc365 ] We expect all CPUs to be running at the same EL inside the kernel with or without VHE enabled and we have strict checks to ensure that any mismatch triggers a kernel panic. If VHE is enabled, we use the feature based on the boot CPU and all other CPUs should follow. This makes it a perfect candidate for a capability based on the boot CPU, which should be matched by all the CPUs (both when is ON and OFF). This saves us some not-so-pretty hooks and special code, just for verifying the conflict. The patch also makes the VHE capability entry depend on CONFIG_ARM64_VHE. Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 6 +++++ arch/arm64/include/asm/virt.h | 6 ----- arch/arm64/kernel/cpufeature.c | 5 ++-- arch/arm64/kernel/smp.c | 38 ----------------------------- 4 files changed, 9 insertions(+), 46 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 793e5fd4c583..839aaa1505a3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -283,6 +283,12 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) +/* + * CPU feature used early in the boot based on the boot CPU. All secondary + * CPUs must match the state of the capability as detected by the boot CPU. + */ +#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE ARM64_CPUCAP_SCOPE_BOOT_CPU + struct arm64_cpu_capabilities { const char *desc; u16 capability; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index c5f89442785c..9d1e24e030b3 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -102,12 +102,6 @@ static inline bool has_vhe(void) return false; } -#ifdef CONFIG_ARM64_VHE -extern void verify_cpu_run_el(void); -#else -static inline void verify_cpu_run_el(void) {} -#endif - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1a1eb3b85e82..d1897d8f40a2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -982,13 +982,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, +#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, @@ -1332,7 +1334,6 @@ static bool verify_local_cpu_caps(u16 scope_mask) */ static void check_early_cpu_features(void) { - verify_cpu_run_el(); verify_cpu_asid_bits(); /* * Early features are used by the kernel already. If there diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index e9b8395e24a7..a683cd499515 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -83,43 +83,6 @@ enum ipi_msg_type { IPI_WAKEUP }; -#ifdef CONFIG_ARM64_VHE - -/* Whether the boot CPU is running in HYP mode or not*/ -static bool boot_cpu_hyp_mode; - -static inline void save_boot_cpu_run_el(void) -{ - boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); -} - -static inline bool is_boot_cpu_in_hyp_mode(void) -{ - return boot_cpu_hyp_mode; -} - -/* - * Verify that a secondary CPU is running the kernel at the same - * EL as that of the boot CPU. - */ -void verify_cpu_run_el(void) -{ - bool in_el2 = is_kernel_in_hyp_mode(); - bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); - - if (in_el2 ^ boot_cpu_el2) { - pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", - smp_processor_id(), - in_el2 ? 2 : 1, - boot_cpu_el2 ? 2 : 1); - cpu_panic_kernel(); - } -} - -#else -static inline void save_boot_cpu_run_el(void) {} -#endif - #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -448,7 +411,6 @@ void __init smp_prepare_boot_cpu(void) */ jump_label_init(); cpuinfo_store_boot_cpu(); - save_boot_cpu_run_el(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) From 41b3073644e3b694439ff737e92decb670aceda2 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:10 +0200 Subject: [PATCH 0073/3715] arm64: capabilities: Clean up midr range helpers [ Upstream commit 5e7951ce19abf4113645ae789c033917356ee96f ] We are about to introduce generic MIDR range helpers. Clean up the existing helpers in erratum handling, preparing them to use generic version. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 107 +++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 588b994b7120..1e87e1427cc3 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -405,20 +405,38 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, } #endif /* CONFIG_ARM64_SSBD */ -#define MIDR_RANGE(model, min, max) \ - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = min, \ - .midr_range_max = max +#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .midr_range_max = MIDR_CPU_VAR_REV(v_max, r_max) -#define MIDR_ALL_VERSIONS(model) \ - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = 0, \ +#define CAP_MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = MIDR_CPU_VAR_REV(0, 0), \ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) +#define MIDR_FIXED(rev, revidr_mask) \ + .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} + +#define ERRATA_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +/* Errata affecting a range of revisions of given model variant */ +#define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ + ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) + +/* Errata affecting a single variant/revision of a model */ +#define ERRATA_MIDR_REV(model, var, rev) \ + ERRATA_MIDR_RANGE(model, var, rev, var, rev) + +/* Errata affecting all variants/revisions of a given a model */ +#define ERRATA_MIDR_ALL_VERSIONS(model) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_ALL_VERSIONS(model) + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -427,7 +445,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[012] */ .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2), .cpu_enable = cpu_enable_cache_maint_trap, }, #endif @@ -436,7 +454,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01] */ .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1), .cpu_enable = cpu_enable_cache_maint_trap, }, #endif @@ -445,9 +463,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_834220 @@ -455,9 +473,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 834220", .capability = ARM64_WORKAROUND_834220, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -465,7 +483,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -473,7 +491,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, pass 1.x */ .desc = "Cavium erratum 23154", .capability = ARM64_WORKAROUND_CAVIUM_23154, - MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -481,15 +499,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 1)), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 1), }, { /* Cavium ThunderX, T81 pass 1.0 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_30115 @@ -497,20 +515,21 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 2), }, { /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2), }, { /* Cavium ThunderX, T83 pass 1.0 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0), }, #endif { @@ -531,9 +550,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, { .desc = "Qualcomm Technologies Kryo erratum 1003", @@ -547,9 +564,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -557,56 +572,56 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A73 all versions */ .desc = "ARM erratum 858921", .capability = ARM64_WORKAROUND_858921, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + ERRATA_MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), .cpu_enable = enable_smccc_arch_workaround_1, }, #endif From 5a5e2f938e2e4c7b89d1f64f436ecdad547d122e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:11 +0200 Subject: [PATCH 0074/3715] arm64: Add helpers for checking CPU MIDR against a range [ Upstream commit 1df310505d6d544802016f6bae49aab836ae8510 ] Add helpers for checking if the given CPU midr falls in a range of variants/revisions for a given model. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/cputype.h | 30 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 18 +++++++---------- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 839aaa1505a3..ade058ada2b0 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include +#include #include #include @@ -302,8 +303,7 @@ struct arm64_cpu_capabilities { void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { struct { /* To be used for erratum handling only */ - u32 midr_model; - u32 midr_range_min, midr_range_max; + struct midr_range midr_range; }; struct { /* Feature register checking */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 04569aa267fd..c60eb29ea261 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -125,6 +125,36 @@ #define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) +/* + * Represent a range of MIDR values for a given CPU model and a + * range of variant/revision values. + * + * @model - CPU model as defined by MIDR_CPU_MODEL + * @rv_min - Minimum value for the revision/variant as defined by + * MIDR_CPU_VAR_REV + * @rv_max - Maximum value for the variant/revision for the range. + */ +struct midr_range { + u32 model; + u32 rv_min; + u32 rv_max; +}; + +#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \ + { \ + .model = m, \ + .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ + } + +#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) + +static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) +{ + return MIDR_IS_CPU_MODEL_RANGE(midr, range->model, + range->rv_min, range->rv_max); +} + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1e87e1427cc3..a3675d279b90 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -26,10 +26,10 @@ static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + u32 midr = read_cpuid_id(); + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, - entry->midr_range_min, - entry->midr_range_max); + return is_midr_in_range(midr, &entry->midr_range); } static bool __maybe_unused @@ -43,7 +43,7 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | MIDR_ARCHITECTURE_MASK; - return model == entry->midr_model; + return model == entry->midr_range.model; } static bool @@ -407,15 +407,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = MIDR_CPU_VAR_REV(v_min, r_min), \ - .midr_range_max = MIDR_CPU_VAR_REV(v_max, r_max) + .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) #define CAP_MIDR_ALL_VERSIONS(model) \ .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = MIDR_CPU_VAR_REV(0, 0), \ - .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + .midr_range = MIDR_ALL_VERSIONS(model) #define MIDR_FIXED(rev, revidr_mask) \ .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} @@ -556,7 +552,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .midr_model = MIDR_QCOM_KRYO, + .midr_range.model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, #endif From 8a82aee7bdfd5bfcabdc741b1051dae98b576f9c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:12 +0200 Subject: [PATCH 0075/3715] arm64: Add MIDR encoding for Arm Cortex-A55 and Cortex-A35 [ Upstream commit 6e616864f21160d8d503523b60a53a29cecc6f24 ] Update the MIDR encodings for the Cortex-A55 and Cortex-A35 Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index c60eb29ea261..ef03243beaae 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -85,6 +85,8 @@ #define ARM_CPU_PART_CORTEX_A53 0xD03 #define ARM_CPU_PART_CORTEX_A73 0xD09 #define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A35 0xD04 +#define ARM_CPU_PART_CORTEX_A55 0xD05 #define APM_CPU_PART_POTENZA 0x000 @@ -108,6 +110,8 @@ #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) From 197a83aaa821bb51f64a1d97a7a1146341801bb5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 24 Oct 2019 14:48:13 +0200 Subject: [PATCH 0076/3715] arm64: capabilities: Add support for checks based on a list of MIDRs [ Upstream commit be5b299830c63ed76e0357473c4218c85fb388b3 ] Add helpers for detecting an errata on list of midr ranges of affected CPUs, with the same work around. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon [ardb: add Cortex-A35 to kpti_safe_list[] as well] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/include/asm/cputype.h | 9 ++++ arch/arm64/kernel/cpu_errata.c | 81 ++++++++++++++++------------- arch/arm64/kernel/cpufeature.c | 21 ++++---- 4 files changed, 66 insertions(+), 46 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ade058ada2b0..9776c19d03d4 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -306,6 +306,7 @@ struct arm64_cpu_capabilities { struct midr_range midr_range; }; + const struct midr_range *midr_range_list; struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ef03243beaae..b23456035eac 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -159,6 +159,15 @@ static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) range->rv_min, range->rv_max); } +static inline bool +is_midr_in_range_list(u32 midr, struct midr_range const *ranges) +{ + while (ranges->model) + if (is_midr_in_range(midr, ranges++)) + return true; + return false; +} + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a3675d279b90..096a679510ad 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -32,6 +32,14 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) return is_midr_in_range(midr, &entry->midr_range); } +static bool __maybe_unused +is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list); +} + static bool __maybe_unused is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) { @@ -420,6 +428,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) +#define CAP_MIDR_RANGE_LIST(list) \ + .matches = is_affected_midr_range_list, \ + .midr_range_list = list + /* Errata affecting a range of revisions of given model variant */ #define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) @@ -433,6 +445,35 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_ALL_VERSIONS(model) +/* Errata affecting a list of midr ranges, with same work around */ +#define ERRATA_MIDR_RANGE_LIST(midr_list) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE_LIST(midr_list) + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + +/* + * List of CPUs where we need to issue a psci call to + * harden the branch predictor. + */ +static const struct midr_range arm64_bp_harden_smccc_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + {}, +}; + +static const struct midr_range qcom_bp_harden_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + {}, +}; + +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -574,51 +615,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .cpu_enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .cpu_enable = enable_smccc_arch_workaround_1, + ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), }, #endif #ifdef CONFIG_ARM64_SSBD diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d1897d8f40a2..ebc9fd869577 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -826,6 +826,17 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int scope) { + /* List of CPUs that are not vulnerable and don't need KPTI */ + static const struct midr_range kpti_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + }; char const *str = "command line option"; /* @@ -850,16 +861,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return true; /* Don't force KPTI for CPUs that are not vulnerable */ - switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { - case MIDR_CAVIUM_THUNDERX2: - case MIDR_BRCM_VULCAN: - case MIDR_CORTEX_A53: - case MIDR_CORTEX_A55: - case MIDR_CORTEX_A57: - case MIDR_CORTEX_A72: - case MIDR_CORTEX_A73: + if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) return false; - } /* Defer to CPU feature registers */ return !has_cpuid_feature(entry, scope); From b65b0eb466bc157e0d3d50cfb77d97dba0076201 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Thu, 24 Oct 2019 14:48:14 +0200 Subject: [PATCH 0077/3715] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening [ Upstream commit 4bc352ffb39e4eec253e70f8c076f2f48a6c1926 ] The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead of Silicon provider service ID 0xC2001700. Cc: # 4.14+ Signed-off-by: Shanker Donthineni [maz: reworked errata framework integration] Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 7 ++--- arch/arm64/include/asm/kvm_asm.h | 2 -- arch/arm64/kernel/bpi.S | 7 ----- arch/arm64/kernel/cpu_errata.c | 54 ++++++++++---------------------- arch/arm64/kvm/hyp/entry.S | 12 ------- arch/arm64/kvm/hyp/switch.c | 10 ------ 6 files changed, 20 insertions(+), 72 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 7d6425d426ac..0ed9f7951097 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -42,10 +42,9 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_UNMAP_KERNEL_AT_EL0 23 #define ARM64_HARDEN_BRANCH_PREDICTOR 24 -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 -#define ARM64_SSBD 26 -#define ARM64_MISMATCHED_CACHE_TYPE 27 +#define ARM64_SSBD 25 +#define ARM64_MISMATCHED_CACHE_TYPE 26 -#define ARM64_NCAPS 28 +#define ARM64_NCAPS 27 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 1a6d02350fc6..c59e81b65132 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -70,8 +70,6 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); -extern void __qcom_hyp_sanitize_btac_predictors(void); - /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ #define __hyp_this_cpu_ptr(sym) \ ({ \ diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index e5de33513b5d..0af46cfdbbf3 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -55,13 +55,6 @@ ENTRY(__bp_harden_hyp_vecs_start) .endr ENTRY(__bp_harden_hyp_vecs_end) -ENTRY(__qcom_hyp_sanitize_link_stack_start) - stp x29, x30, [sp, #-16]! - .rept 16 - bl . + 4 - .endr - ldp x29, x30, [sp], #16 -ENTRY(__qcom_hyp_sanitize_link_stack_end) .macro smccc_workaround_1 inst sub sp, sp, #(8 * 4) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 096a679510ad..4204b668df7a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -83,8 +83,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM -extern char __qcom_hyp_sanitize_link_stack_start[]; -extern char __qcom_hyp_sanitize_link_stack_end[]; extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; extern char __smccc_workaround_1_hvc_start[]; @@ -131,8 +129,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __qcom_hyp_sanitize_link_stack_start NULL -#define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL #define __smccc_workaround_1_hvc_start NULL @@ -177,12 +173,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } +static void qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + static void enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; + u32 midr = read_cpuid_id(); if (!entry->matches(entry, SCOPE_LOCAL_CPU)) return; @@ -215,30 +224,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) return; } + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) + cb = qcom_link_stack_sanitization; + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); return; } - -static void qcom_link_stack_sanitization(void) -{ - u64 tmp; - - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); -} - -static void -qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) -{ - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, - __qcom_hyp_sanitize_link_stack_start, - __qcom_hyp_sanitize_link_stack_end); -} #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #ifdef CONFIG_ARM64_SSBD @@ -463,10 +456,6 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - {}, -}; - -static const struct midr_range qcom_bp_harden_cpus[] = { MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), {}, @@ -618,15 +607,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), .cpu_enable = enable_smccc_arch_workaround_1, }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), - .cpu_enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), - }, #endif #ifdef CONFIG_ARM64_SSBD { diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index a7b3c198d4de..a360ac6e89e9 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -196,15 +196,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -ENTRY(__qcom_hyp_sanitize_btac_predictors) - /** - * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) - * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls - * b15-b0: contains SiP functionID - */ - movz x0, #0x1700 - movk x0, #0xc200, lsl #16 - smc #0 - ret -ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 44845996b554..4a8fdbb29286 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -405,16 +405,6 @@ again: __set_host_arch_workaround_state(vcpu); - if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { - u32 midr = read_cpuid_id(); - - /* Apply BTAC predictors mitigation to all Falkor chips */ - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { - __qcom_hyp_sanitize_btac_predictors(); - } - } - fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); From 31ee977f709d28be1b62f47295e15cc464bab808 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2019 14:48:15 +0200 Subject: [PATCH 0078/3715] arm64: don't zero DIT on signal return [ Upstream commit 1265132127b63502d34e0f58c8bdef3a4dc927c2 ] Currently valid_user_regs() treats SPSR_ELx.DIT as a RES0 bit, causing it to be zeroed upon exception return, rather than preserved. Thus, code relying on DIT will not function as expected, and may expose an unexpected timing sidechannel. Let's remove DIT from the set of RES0 bits, such that it is preserved. At the same time, the related comment is updated to better describe the situation, and to take into account the most recent documentation of SPSR_ELx, in ARM DDI 0487C.a. Signed-off-by: Mark Rutland Fixes: 7206dc93a58fb764 ("arm64: Expose Arm v8.4 features") Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 34d915b6974b..d8ff8f26db6d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1402,15 +1402,19 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) } /* - * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a + * We also take into account DIT (bit 24), which is not yet documented, and + * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be + * allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. + * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ - GENMASK_ULL(5, 5)) + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) static int valid_compat_regs(struct user_pt_regs *regs) { From 36e5ae4d22973ea8534aaff7f75d42eae343bb60 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Oct 2019 14:48:16 +0200 Subject: [PATCH 0079/3715] arm64: Get rid of __smccc_workaround_1_hvc_* [ Upstream commit 22765f30dbaf1118c6ff0fcb8b99c9f2b4d396d5 ] The very existence of __smccc_workaround_1_hvc_* is a thinko, as KVM will never use a HVC call to perform the branch prediction invalidation. Even as a nested hypervisor, it would use an SMC instruction. Let's get rid of it. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/bpi.S | 12 ++---------- arch/arm64/kernel/cpu_errata.c | 9 +++------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index 0af46cfdbbf3..4cae34e5a24e 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -56,21 +56,13 @@ ENTRY(__bp_harden_hyp_vecs_start) ENTRY(__bp_harden_hyp_vecs_end) -.macro smccc_workaround_1 inst +ENTRY(__smccc_workaround_1_smc_start) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - \inst #0 + smc #0 ldp x2, x3, [sp, #(8 * 0)] ldp x0, x1, [sp, #(8 * 2)] add sp, sp, #(8 * 4) -.endm - -ENTRY(__smccc_workaround_1_smc_start) - smccc_workaround_1 smc ENTRY(__smccc_workaround_1_smc_end) - -ENTRY(__smccc_workaround_1_hvc_start) - smccc_workaround_1 hvc -ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4204b668df7a..6e565d8d4f71 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -85,8 +85,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; -extern char __smccc_workaround_1_hvc_start[]; -extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -131,8 +129,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, #else #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -#define __smccc_workaround_1_hvc_start NULL -#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -206,8 +202,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) if ((int)res.a0 < 0) return; cb = call_hvc_arch_workaround_1; - smccc_start = __smccc_workaround_1_hvc_start; - smccc_end = __smccc_workaround_1_hvc_end; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; case PSCI_CONDUIT_SMC: From 7ec258d023de4f118c942fe57647acbb89c8d863 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 24 Oct 2019 14:48:17 +0200 Subject: [PATCH 0080/3715] arm64: cpufeature: Detect SSBS and advertise to userspace [ Upstream commit d71be2b6c0e19180b5f80a6d42039cc074a693a2 ] Armv8.5 introduces a new PSTATE bit known as Speculative Store Bypass Safe (SSBS) which can be used as a mitigation against Spectre variant 4. Additionally, a CPU may provide instructions to manipulate PSTATE.SSBS directly, so that userspace can toggle the SSBS control without trapping to the kernel. This patch probes for the existence of SSBS and advertise the new instructions to userspace if they exist. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/sysreg.h | 16 ++++++++++++---- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++-- arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 0ed9f7951097..2f8bd0388905 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,8 @@ #define ARM64_HARDEN_BRANCH_PREDICTOR 24 #define ARM64_SSBD 25 #define ARM64_MISMATCHED_CACHE_TYPE 26 +#define ARM64_SSBS 27 -#define ARM64_NCAPS 27 +#define ARM64_NCAPS 28 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5f391630d0f4..4724909642e7 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -297,6 +297,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_WXN (1 << 19) #define SCTLR_ELx_I (1 << 12) @@ -316,7 +317,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 21) | (1 << 24) | \ (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -330,7 +331,7 @@ #define SCTLR_EL2_SET (ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -354,7 +355,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -371,7 +372,7 @@ SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -417,6 +418,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ebc9fd869577..2a0d76698f34 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -145,6 +145,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -345,7 +350,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -625,7 +630,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1045,6 +1049,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, }, #endif + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + }, {}, }; @@ -1087,6 +1101,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 2188db11b654..9ff64e04e63d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -80,6 +80,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; From 8a235006cd422ad1c8ad1e3c2cfde1281ac52e48 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 24 Oct 2019 14:48:18 +0200 Subject: [PATCH 0081/3715] arm64: ssbd: Add support for PSTATE.SSBS rather than trapping to EL3 [ Upstream commit 8f04e8e6e29c93421a95b61cad62e3918425eac7 ] On CPUs with support for PSTATE.SSBS, the kernel can toggle the SSBD state without needing to call into firmware. This patch hooks into the existing SSBD infrastructure so that SSBS is used on CPUs that support it, but it's all made horribly complicated by the very real possibility of big/little systems that don't uniformly provide the new capability. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas [ardb: add #include of asm/compat.h] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/processor.h | 7 +++++ arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/sysreg.h | 3 ++ arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/cpu_errata.c | 26 ++++++++++++++-- arch/arm64/kernel/cpufeature.c | 45 ++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 4 +++ arch/arm64/kernel/ssbd.c | 22 ++++++++++++++ 8 files changed, 107 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9b6ac522a71a..ec1725c6df21 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -153,6 +153,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -169,6 +173,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= COMPAT_PSR_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 1b2a253de6a1..b466d763a90d 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4724909642e7..50a89bcf9072 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,11 +86,14 @@ #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ (!!x)<<8 | 0x1f) #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ (!!x)<<8 | 0x1f) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ + (!!x)<<8 | 0x1f) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 67d4c33974e8..eea58f8ec355 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -45,6 +45,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_Q_BIT 0x08000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6e565d8d4f71..105741487a86 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -304,6 +304,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -328,6 +336,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -376,7 +389,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -389,7 +401,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -399,6 +410,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2a0d76698f34..d8e89b5d99ee 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -925,6 +925,48 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(CRm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << CRm_shift), + .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1049,6 +1091,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, }, #endif +#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, @@ -1058,7 +1101,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, }, +#endif {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9e773732520c..532ad6be9c2b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -296,6 +296,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 0560738c1d5c..58de005cd756 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,13 +3,32 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include #include #include #include +#include #include +#include #include +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD */ @@ -45,12 +64,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -58,6 +79,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; From 5bed8225f130e7cab41b87f9baf5e5a5fba79211 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 24 Oct 2019 14:48:19 +0200 Subject: [PATCH 0082/3715] KVM: arm64: Set SCTLR_EL2.DSSBS if SSBD is forcefully disabled and !vhe [ Upstream commit 7c36447ae5a090729e7b129f24705bb231a07e0b ] When running without VHE, it is necessary to set SCTLR_EL2.DSSBS if SSBD has been forcefully disabled on the kernel command-line. Acked-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b01ad3489bd8..f982c9d1d10b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -356,6 +356,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); void __kvm_set_tpidr_el2(u64 tpidr_el2); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -380,6 +382,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, - (u64)kvm_ksym_ref(kvm_host_cpu_state); kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index e19d89cabf2a..3773311ffcd0 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -188,3 +188,14 @@ void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) { asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} From 985d934b2e97743c5b65e28f923cf6c4eedbfeb7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2019 14:48:20 +0200 Subject: [PATCH 0083/3715] arm64: fix SSBS sanitization [ Upstream commit f54dada8274643e3ff4436df0ea124aeedc43cae ] In valid_user_regs() we treat SSBS as a RES0 bit, and consequently it is unexpectedly cleared when we restore a sigframe or fiddle with GPRs via ptrace. This patch fixes valid_user_regs() to account for this, updating the function to refer to the latest ARM ARM (ARM DDI 0487D.a). For AArch32 tasks, SSBS appears in bit 23 of SPSR_EL1, matching its position in the AArch32-native PSR format, and we don't need to translate it as we have to for DIT. There are no other bit assignments that we need to account for today. As the recent documentation describes the DIT bit, we can drop our comment regarding DIT. While removing SSBS from the RES0 masks, existing inconsistent whitespace is corrected. Fixes: d71be2b6c0e19180 ("arm64: cpufeature: Detect SSBS and advertise to userspace") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d8ff8f26db6d..242527f29c41 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1402,19 +1402,20 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) } /* - * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a - * We also take into account DIT (bit 24), which is not yet documented, and - * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be - * allocated an EL0 meaning in future. + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a. + * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is + * not described in ARM DDI 0487D.a. + * We treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may + * be allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ - GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) static int valid_compat_regs(struct user_pt_regs *regs) { From b863eee5bbcd03c816be66c656db99a428244ff7 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 24 Oct 2019 14:48:21 +0200 Subject: [PATCH 0084/3715] arm64: Add sysfs vulnerability show for spectre-v1 [ Upstream commit 3891ebccace188af075ce143d8b072b65e90f695 ] spectre-v1 has been mitigated and the mitigation is always active. Report this to userspace via sysfs Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Acked-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 105741487a86..8b0a141bd01d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -638,3 +638,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} From 8fda007949a8f8bf7dc761c3053c8d4750a9c3ec Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:22 +0200 Subject: [PATCH 0085/3715] arm64: add sysfs vulnerability show for meltdown [ Upstream commit 1b3ccf4be0e7be8c4bd8522066b6cbc92591e912 ] We implement page table isolation as a mitigation for meltdown. Report this to userspace via sysfs. Signed-off-by: Jeremy Linton Reviewed-by: Suzuki K Poulose Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 58 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d8e89b5d99ee..b782e98633da 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -824,7 +824,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus ID_AA64PFR0_FP_SHIFT) < 0; } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, @@ -842,6 +842,16 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }; char const *str = "command line option"; + bool meltdown_safe; + + meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); + + /* Defer to CPU feature registers */ + if (has_cpuid_feature(entry, scope)) + meltdown_safe = true; + + if (!meltdown_safe) + __meltdown_safe = false; /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -853,6 +863,19 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, __kpti_forced = -1; } + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) { + if (!__kpti_forced) { + str = "KASLR"; + __kpti_forced = 1; + } + } + + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + pr_info_once("kernel page table isolation disabled by kernel configuration\n"); + return false; + } + /* Forced? */ if (__kpti_forced) { pr_info_once("kernel page table isolation forced %s by %s\n", @@ -860,18 +883,10 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return __kpti_forced > 0; } - /* Useful for KASLR robustness */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return true; - - /* Don't force KPTI for CPUs that are not vulnerable */ - if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) - return false; - - /* Defer to CPU feature registers */ - return !has_cpuid_feature(entry, scope); + return !meltdown_safe; } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static void kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { @@ -896,6 +911,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) return; } +#else +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) +{ +} +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ static int __init parse_kpti(char *str) { @@ -909,7 +930,6 @@ static int __init parse_kpti(char *str) return 0; } early_param("kpti", parse_kpti); -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { @@ -1056,7 +1076,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = hyp_offset_low, }, -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, @@ -1072,7 +1091,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, -#endif { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, @@ -1629,3 +1647,15 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__meltdown_safe) + return sprintf(buf, "Not affected\n"); + + if (arm64_kernel_unmapped_at_el0()) + return sprintf(buf, "Mitigation: PTI\n"); + + return sprintf(buf, "Vulnerable\n"); +} From 88ba8b6d41a3803c18a1efbd79d9b2f95199d42c Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 24 Oct 2019 14:48:23 +0200 Subject: [PATCH 0086/3715] arm64: enable generic CPU vulnerabilites support [ Upstream commit 61ae1321f06c4489c724c803e9b8363dea576da3 ] Enable CPU vulnerabilty show functions for spectre_v1, spectre_v2, meltdown and store-bypass. Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c30cd78b6918..e296ae3e20f4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -49,6 +49,7 @@ config ARM64 select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE From be5a03c9c98259f6d3837171bb33a61120da6716 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:24 +0200 Subject: [PATCH 0087/3715] arm64: Always enable ssb vulnerability detection [ Upstream commit d42281b6e49510f078ace15a8ea10f71e6262581 ] Ensure we are always able to detect whether or not the CPU is affected by SSB, so that we can later advertise this to userspace. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren [will: Use IS_ENABLED instead of #ifdef] Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 4 ---- arch/arm64/kernel/cpu_errata.c | 9 +++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9776c19d03d4..166f81b7afee 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -493,11 +493,7 @@ static inline int arm64_get_ssbd_state(void) #endif } -#ifdef CONFIG_ARM64_SSBD void arm64_set_ssbd_mitigation(bool state); -#else -static inline void arm64_set_ssbd_mitigation(bool state) {} -#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8b0a141bd01d..86c4f4e51427 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -231,7 +231,6 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ -#ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; @@ -304,6 +303,11 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { + pr_info_once("SSBD disabled by kernel configuration\n"); + return; + } + if (this_cpu_has_cap(ARM64_SSBS)) { if (state) asm volatile(SET_PSTATE_SSBS(0)); @@ -423,7 +427,6 @@ out_printmsg: return required; } -#endif /* CONFIG_ARM64_SSBD */ #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ @@ -627,14 +630,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = enable_smccc_arch_workaround_1, }, #endif -#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypass Disable", .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .capability = ARM64_SSBD, .matches = has_ssbd_mitigation, }, -#endif { } }; From 02fd5d7f6d027912901a68d671c0449295966cd7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:25 +0200 Subject: [PATCH 0088/3715] arm64: Provide a command line to disable spectre_v2 mitigation [ Upstream commit e5ce5e7267ddcbe13ab9ead2542524e1b7993e5a ] There are various reasons, such as benchmarking, to disable spectrev2 mitigation on a machine. Provide a command-line option to do so. Signed-off-by: Jeremy Linton Reviewed-by: Suzuki K Poulose Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++---- arch/arm64/kernel/cpu_errata.c | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 188a7db8501b..5205740ed39b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2745,10 +2745,10 @@ (bounds check bypass). With this option data leaks are possible in the system. - nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 - (indirect branch prediction) vulnerability. System may - allow data leaks with this option, which is equivalent - to spectre_v2=off. + nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for + the Spectre variant 2 (indirect branch prediction) + vulnerability. System may allow data leaks with this + option. nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 86c4f4e51427..5c3f8c712aae 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -181,6 +181,14 @@ static void qcom_link_stack_sanitization(void) : "=&r" (tmp)); } +static bool __nospectre_v2; +static int __init parse_nospectre_v2(char *str) +{ + __nospectre_v2 = true; + return 0; +} +early_param("nospectre_v2", parse_nospectre_v2); + static void enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { @@ -192,6 +200,11 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) if (!entry->matches(entry, SCOPE_LOCAL_CPU)) return; + if (__nospectre_v2) { + pr_info_once("spectrev2 mitigation disabled by command line option\n"); + return; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) return; From 806ac9a792672cca6e506af0a2d020b9230f10ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Oct 2019 14:48:26 +0200 Subject: [PATCH 0089/3715] arm64: Advertise mitigation of Spectre-v2, or lack thereof [ Upstream commit 73f38166095947f3b86b02fbed6bd592223a7ac8 ] We currently have a list of CPUs affected by Spectre-v2, for which we check that the firmware implements ARCH_WORKAROUND_1. It turns out that not all firmwares do implement the required mitigation, and that we fail to let the user know about it. Instead, let's slightly revamp our checks, and rely on a whitelist of cores that are known to be non-vulnerable, and let the user know the status of the mitigation in the kernel log. Signed-off-by: Marc Zyngier Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 108 +++++++++++++++++---------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5c3f8c712aae..bf6d8aa9b45a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -98,9 +98,9 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) +static void install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) { static int last_slot = -1; static DEFINE_SPINLOCK(bp_lock); @@ -130,7 +130,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, +static void install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { @@ -138,23 +138,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM */ -static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, - bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - u64 pfr0; - - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return; - - pfr0 = read_cpuid(ID_AA64PFR0_EL1); - if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) - return; - - __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); -} - #include #include #include @@ -189,31 +172,27 @@ static int __init parse_nospectre_v2(char *str) } early_param("nospectre_v2", parse_nospectre_v2); -static void -enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) +/* + * -1: No workaround + * 0: No workaround required + * 1: Workaround installed + */ +static int detect_harden_bp_fw(void) { bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return; - - if (__nospectre_v2) { - pr_info_once("spectrev2 mitigation disabled by command line option\n"); - return; - } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return; + return -1; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return; + return -1; cb = call_hvc_arch_workaround_1; /* This is a guest, no need to patch KVM vectors */ smccc_start = NULL; @@ -224,23 +203,23 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return; + return -1; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return; + return -1; } if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) cb = qcom_link_stack_sanitization; - install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); + install_bp_hardening_cb(cb, smccc_start, smccc_end); - return; + return 1; } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ @@ -479,23 +458,48 @@ out_printmsg: CAP_MIDR_RANGE_LIST(midr_list) #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - /* - * List of CPUs where we need to issue a psci call to - * harden the branch predictor. + * List of CPUs that do not need any Spectre-v2 mitigation at all. */ -static const struct midr_range arm64_bp_harden_smccc_cpus[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - {}, +static const struct midr_range spectre_v2_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + { /* sentinel */ } }; +static bool __maybe_unused +check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) +{ + int need_wa; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + /* If the CPU has CSV2 set, we're safe */ + if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), + ID_AA64PFR0_CSV2_SHIFT)) + return false; + + /* Alternatively, we have a list of unaffected CPUs */ + if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) + return false; + + /* Fallback to firmware detection */ + need_wa = detect_harden_bp_fw(); + if (!need_wa) + return false; + + /* forced off */ + if (__nospectre_v2) { + pr_info_once("spectrev2 mitigation disabled by command line option\n"); + return false; + } + + if (need_wa < 0) + pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); + + return (need_wa > 0); +} #endif const struct arm64_cpu_capabilities arm64_errata[] = { @@ -639,8 +643,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), - .cpu_enable = enable_smccc_arch_workaround_1, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = check_branch_predictor, }, #endif { From 3e91f3eacc91d9d6116ed56ea339f858958ba3ee Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:27 +0200 Subject: [PATCH 0090/3715] arm64: Always enable spectre-v2 vulnerability detection [ Upstream commit 8c1e3d2bb44cbb998cb28ff9a18f105fee7f1eb3 ] Ensure we are always able to detect whether or not the CPU is affected by Spectre-v2, so that we can later advertise this to userspace. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index bf6d8aa9b45a..647c533cfd90 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -76,7 +76,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) config_sctlr_el1(SCTLR_EL1_UCT, 0); } -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include #include @@ -217,11 +216,11 @@ static int detect_harden_bp_fw(void) ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) cb = qcom_link_stack_sanitization; - install_bp_hardening_cb(cb, smccc_start, smccc_end); + if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) + install_bp_hardening_cb(cb, smccc_start, smccc_end); return 1; } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); @@ -457,7 +456,6 @@ out_printmsg: .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -489,6 +487,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) if (!need_wa) return false; + if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { + pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); + __hardenbp_enab = false; + return false; + } + /* forced off */ if (__nospectre_v2) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); @@ -500,7 +504,6 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } -#endif const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ @@ -640,13 +643,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, }, -#endif { .desc = "Speculative Store Bypass Disable", .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, From 04da7c4665c0b02f124e80888b23d3bfde86a99a Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:28 +0200 Subject: [PATCH 0091/3715] arm64: add sysfs vulnerability show for spectre-v2 [ Upstream commit d2532e27b5638bb2e2dd52b80b7ea2ec65135377 ] Track whether all the cores in the machine are vulnerable to Spectre-v2, and whether all the vulnerable cores have been mitigated. We then expose this information to userspace via sysfs. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 647c533cfd90..809a736f38a9 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -456,6 +456,10 @@ out_printmsg: .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) +/* Track overall mitigation state. We are only mitigated if all cores are ok */ +static bool __hardenbp_enab = true; +static bool __spectrev2_safe = true; + /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -466,6 +470,10 @@ static const struct midr_range spectre_v2_safe_list[] = { { /* sentinel */ } }; +/* + * Track overall bp hardening for all heterogeneous cores in the machine. + * We are only considered "safe" if all booted cores are known safe. + */ static bool __maybe_unused check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) { @@ -487,6 +495,8 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) if (!need_wa) return false; + __spectrev2_safe = false; + if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); __hardenbp_enab = false; @@ -496,11 +506,14 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) /* forced off */ if (__nospectre_v2) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); + __hardenbp_enab = false; return false; } - if (need_wa < 0) + if (need_wa < 0) { pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); + __hardenbp_enab = false; + } return (need_wa > 0); } @@ -663,3 +676,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, { return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__spectrev2_safe) + return sprintf(buf, "Not affected\n"); + + if (__hardenbp_enab) + return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + + return sprintf(buf, "Vulnerable\n"); +} From 325758118d72e0723f5eae6eb01844101f8505b3 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 24 Oct 2019 14:48:29 +0200 Subject: [PATCH 0092/3715] arm64: add sysfs vulnerability show for speculative store bypass [ Upstream commit 526e065dbca6df0b5a130b84b836b8b3c9f54e21 ] Return status based on ssbd_state and __ssb_safe. If the mitigation is disabled, or the firmware isn't responding then return the expected machine state based on a whitelist of known good cores. Given a heterogeneous machine, the overall machine vulnerability defaults to safe but is reset to unsafe when we miss the whitelist and the firmware doesn't explicitly tell us the core is safe. In order to make that work we delay transitioning to vulnerable until we know the firmware isn't responding to avoid a case where we miss the whitelist, but the firmware goes ahead and reports the core is not vulnerable. If all the cores in the machine have SSBS, then __ssb_safe will remain true. Tested-by: Stefan Wahren Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 809a736f38a9..2898130c3156 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -225,6 +225,7 @@ static int detect_harden_bp_fw(void) DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; +static bool __ssb_safe = true; static const struct ssbd_options { const char *str; @@ -328,6 +329,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, struct arm_smccc_res res; bool required = true; s32 val; + bool this_cpu_safe = false; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -336,8 +338,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } + /* delay setting __ssb_safe until we get a firmware response */ + if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) + this_cpu_safe = true; + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -354,6 +362,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -362,14 +372,18 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (val) { case SMCCC_RET_NOT_SUPPORTED: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; + /* machines with mixed mitigation requirements must not return this */ case SMCCC_RET_NOT_REQUIRED: pr_info_once("%s mitigation not required\n", entry->desc); ssbd_state = ARM64_SSBD_MITIGATED; return false; case SMCCC_RET_SUCCESS: + __ssb_safe = false; required = true; break; @@ -379,6 +393,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: WARN_ON(1); + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -419,6 +435,14 @@ out_printmsg: return required; } +/* known invulnerable cores */ +static const struct midr_range arm64_ssb_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + {}, +}; + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -666,6 +690,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .capability = ARM64_SSBD, .matches = has_ssbd_mitigation, + .midr_range_list = arm64_ssb_cpus, }, { } @@ -688,3 +713,20 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (__ssb_safe) + return sprintf(buf, "Not affected\n"); + + switch (ssbd_state) { + case ARM64_SSBD_KERNEL: + case ARM64_SSBD_FORCE_ENABLE: + if (IS_ENABLED(CONFIG_ARM64_SSBD)) + return sprintf(buf, + "Mitigation: Speculative Store Bypass disabled via prctl\n"); + } + + return sprintf(buf, "Vulnerable\n"); +} From 19cb87c3c13583cd2b23deeda19505cb10cd880d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 24 Oct 2019 14:48:30 +0200 Subject: [PATCH 0093/3715] arm64: ssbs: Don't treat CPUs with SSBS as unaffected by SSB [ Upstream commit eb337cdfcd5dd3b10522c2f34140a73a4c285c30 ] SSBS provides a relatively cheap mitigation for SSB, but it is still a mitigation and its presence does not indicate that the CPU is unaffected by the vulnerability. Tweak the mitigation logic so that we report the correct string in sysfs. Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2898130c3156..55526738ccbc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -333,15 +333,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - if (this_cpu_has_cap(ARM64_SSBS)) { - required = false; - goto out_printmsg; - } - /* delay setting __ssb_safe until we get a firmware response */ if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) this_cpu_safe = true; + if (this_cpu_has_cap(ARM64_SSBS)) { + if (!this_cpu_safe) + __ssb_safe = false; + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) From cdf5048e996d09e079932e2f6fa8a75fa19b7b96 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Oct 2019 14:48:31 +0200 Subject: [PATCH 0094/3715] arm64: Force SSBS on context switch [ Upstream commit cbdf8a189a66001c36007bf0f5c975d0376c5c3a ] On a CPU that doesn't support SSBS, PSTATE[12] is RES0. In a system where only some of the CPUs implement SSBS, we end-up losing track of the SSBS bit across task migration. To address this issue, let's force the SSBS bit on context switch. Fixes: 8f04e8e6e29c ("arm64: ssbd: Add support for PSTATE.SSBS rather than trapping to EL3") Signed-off-by: Marc Zyngier [will: inverted logic and added comments] Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/processor.h | 14 ++++++++++++-- arch/arm64/kernel/process.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ec1725c6df21..9eb95ab19924 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -148,6 +148,16 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) regs->pc = pc; } +static inline void set_ssbs_bit(struct pt_regs *regs) +{ + regs->pstate |= PSR_SSBS_BIT; +} + +static inline void set_compat_ssbs_bit(struct pt_regs *regs) +{ + regs->pstate |= PSR_AA32_SSBS_BIT; +} + static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { @@ -155,7 +165,7 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate = PSR_MODE_EL0t; if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - regs->pstate |= PSR_SSBS_BIT; + set_ssbs_bit(regs); regs->sp = sp; } @@ -174,7 +184,7 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, #endif if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - regs->pstate |= PSR_AA32_SSBS_BIT; + set_compat_ssbs_bit(regs); regs->compat_sp = sp; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 532ad6be9c2b..243fd247d04e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -298,7 +298,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, childregs->pstate |= PSR_UAO_BIT; if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - childregs->pstate |= PSR_SSBS_BIT; + set_ssbs_bit(childregs); p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; @@ -339,6 +339,32 @@ void uao_thread_switch(struct task_struct *next) } } +/* + * Force SSBS state on context-switch, since it may be lost after migrating + * from a CPU which treats the bit as RES0 in a heterogeneous system. + */ +static void ssbs_thread_switch(struct task_struct *next) +{ + struct pt_regs *regs = task_pt_regs(next); + + /* + * Nothing to do for kernel threads, but 'regs' may be junk + * (e.g. idle task) so check the flags and bail early. + */ + if (unlikely(next->flags & PF_KTHREAD)) + return; + + /* If the mitigation is enabled, then we leave SSBS clear. */ + if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || + test_tsk_thread_flag(next, TIF_SSBD)) + return; + + if (compat_user_mode(regs)) + set_compat_ssbs_bit(regs); + else if (user_mode(regs)) + set_ssbs_bit(regs); +} + /* * We store our current task in sp_el0, which is clobbered by userspace. Keep a * shadow copy so that we can restore this upon entry from userspace. @@ -367,6 +393,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, contextidr_thread_switch(next); entry_task_switch(next); uao_thread_switch(next); + ssbs_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case From cd91a0fcab932f31f0dcc42f321131974813e567 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Oct 2019 14:48:32 +0200 Subject: [PATCH 0095/3715] arm64: Use firmware to detect CPUs that are not affected by Spectre-v2 [ Upstream commit 517953c2c47f9c00a002f588ac856a5bc70cede3 ] The SMCCC ARCH_WORKAROUND_1 service can indicate that although the firmware knows about the Spectre-v2 mitigation, this particular CPU is not vulnerable, and it is thus not necessary to call the firmware on this CPU. Let's use this information to our benefit. Signed-off-by: Marc Zyngier Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 55526738ccbc..ca718250d5bd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -190,22 +190,36 @@ static int detect_harden_bp_fw(void) case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; + break; + default: return -1; - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; + } break; case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + default: return -1; - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; + } break; default: From 5fff7a398c266c8c202a24e573327ba2c1566524 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 24 Oct 2019 14:48:33 +0200 Subject: [PATCH 0096/3715] arm64/speculation: Support 'mitigations=' cmdline option [ Upstream commit a111b7c0f20e13b54df2fa959b3dc0bdf1925ae6 ] Configure arm64 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, and Speculative Store Bypass. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf [will: reorder checks so KASLR implies KPTI and SSBS is affected by cmdline] Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 8 +++++--- arch/arm64/kernel/cpu_errata.c | 6 +++++- arch/arm64/kernel/cpufeature.c | 8 +++++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5205740ed39b..b67a6cd08ca1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2389,8 +2389,8 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - [X86,PPC,S390] Control optional mitigations for CPU - vulnerabilities. This is a set of curated, + [X86,PPC,S390,ARM64] Control optional mitigations for + CPU vulnerabilities. This is a set of curated, arch-independent options, each of which is an aggregation of existing arch-specific options. @@ -2399,12 +2399,14 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] + kpti=0 [ARM64] nospectre_v1 [PPC] nobp=0 [S390] nospectre_v1 [X86] - nospectre_v2 [X86,PPC,S390] + nospectre_v2 [X86,PPC,S390,ARM64] spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] + ssbd=force-off [ARM64] l1tf=off [X86] mds=off [X86] diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ca718250d5bd..7d15f4cb6393 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,9 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (cpu_mitigations_off()) + ssbd_state = ARM64_SSBD_FORCE_DISABLE; + /* delay setting __ssb_safe until we get a firmware response */ if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) this_cpu_safe = true; @@ -544,7 +548,7 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) } /* forced off */ - if (__nospectre_v2) { + if (__nospectre_v2 || cpu_mitigations_off()) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); __hardenbp_enab = false; return false; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b782e98633da..15ce2c8b9ee2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -841,7 +842,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }; - char const *str = "command line option"; + char const *str = "kpti command line option"; bool meltdown_safe; meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); @@ -871,6 +872,11 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } } + if (cpu_mitigations_off() && !__kpti_forced) { + str = "mitigations=off"; + __kpti_forced = -1; + } + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { pr_info_once("kernel page table isolation disabled by kernel configuration\n"); return false; From 00b36385c534be1d51240dfbcb522d790e384f5c Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 18 Oct 2019 15:38:48 -0700 Subject: [PATCH 0097/3715] MIPS: tlbex: Fix build_restore_pagemask KScratch restore commit b42aa3fd5957e4daf4b69129e5ce752a2a53e7d6 upstream. build_restore_pagemask() will restore the value of register $1/$at when its restore_scratch argument is non-zero, and aims to do so by filling a branch delay slot. Commit 0b24cae4d535 ("MIPS: Add missing EHB in mtc0 -> mfc0 sequence.") added an EHB instruction (Execution Hazard Barrier) prior to restoring $1 from a KScratch register, in order to resolve a hazard that can result in stale values of the KScratch register being observed. In particular, P-class CPUs from MIPS with out of order execution pipelines such as the P5600 & P6600 are affected. Unfortunately this EHB instruction was inserted in the branch delay slot causing the MFC0 instruction which performs the restoration to no longer execute along with the branch. The result is that the $1 register isn't actually restored, ie. the TLB refill exception handler clobbers it - which is exactly the problem the EHB is meant to avoid for the P-class CPUs. Similarly build_get_pgd_vmalloc() will restore the value of $1/$at when its mode argument equals refill_scratch, and suffers from the same problem. Fix this by in both cases moving the EHB earlier in the emitted code. There's no reason it needs to immediately precede the MFC0 - it simply needs to be between the MTC0 & MFC0. This bug only affects Cavium Octeon systems which use build_fast_tlb_refill_handler(). Signed-off-by: Paul Burton Fixes: 0b24cae4d535 ("MIPS: Add missing EHB in mtc0 -> mfc0 sequence.") Cc: Dmitry Korotin Cc: stable@vger.kernel.org # v3.15+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/tlbex.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index dc495578d44d..b55c74a7f7a4 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -658,6 +658,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, int restore_scratch) { if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); @@ -672,12 +679,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -926,6 +931,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } if (mode != not_refill && check_for_high_segbits) { uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + /* * We get here if we are an xsseg address, or if we are * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. @@ -942,12 +951,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { uasm_i_nop(p); } From 928289ae23aa06fdc0fcfd26d03ed1bea689838f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Oct 2019 12:02:01 +0100 Subject: [PATCH 0098/3715] staging: wlan-ng: fix exit return when sme->key_idx >= NUM_WEPKEYS commit 153c5d8191c26165dbbd2646448ca7207f7796d0 upstream. Currently the exit return path when sme->key_idx >= NUM_WEPKEYS is via label 'exit' and this checks if result is non-zero, however result has not been initialized and contains garbage. Fix this by replacing the goto with a return with the error code. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: 0ca6d8e74489 ("Staging: wlan-ng: replace switch-case statements with macro") Signed-off-by: Colin Ian King Cc: stable Link: https://lore.kernel.org/r/20191014110201.9874-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/cfg80211.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index 42912257e2b9..07b807ceae6d 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -490,10 +490,8 @@ static int prism2_connect(struct wiphy *wiphy, struct net_device *dev, /* Set the encryption - we only support wep */ if (is_wep) { if (sme->key) { - if (sme->key_idx >= NUM_WEPKEYS) { - err = -EINVAL; - goto exit; - } + if (sme->key_idx >= NUM_WEPKEYS) + return -EINVAL; result = prism2_domibset_uint32(wlandev, DIDmib_dot11smt_dot11PrivacyTable_dot11WEPDefaultKeyID, From 5550eabaae33311e26919f3b140e6bb7c3b74319 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 3 Sep 2019 12:18:39 +0200 Subject: [PATCH 0099/3715] scsi: sd: Ignore a failure to sync cache due to lack of authorization commit 21e3d6c81179bbdfa279efc8de456c34b814cfd2 upstream. I've got a report about a UAS drive enclosure reporting back Sense: Logical unit access not authorized if the drive it holds is password protected. While the drive is obviously unusable in that state as a mass storage device, it still exists as a sd device and when the system is asked to perform a suspend of the drive, it will be sent a SYNCHRONIZE CACHE. If that fails due to password protection, the error must be ignored. Cc: Link: https://lore.kernel.org/r/20190903101840.16483-1-oneukum@suse.com Signed-off-by: Oliver Neukum Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3b70f7bb7fe6..35cea5827a7a 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1658,7 +1658,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) /* we need to evaluate the error return */ if (scsi_sense_valid(sshdr) && (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20)) /* invalid command */ + sshdr->asc == 0x20 || /* invalid command */ + (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */ /* this is no error here */ return 0; From 1e8260fd7fe5e87002e2485ab97cd1d96eaccc9e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 1 Oct 2019 16:48:39 +0900 Subject: [PATCH 0100/3715] scsi: core: save/restore command resid for error handling commit 8f8fed0cdbbd6cdbf28d9ebe662f45765d2f7d39 upstream. When a non-passthrough command is terminated with CHECK CONDITION, request sense is executed by hijacking the command descriptor. Since scsi_eh_prep_cmnd() and scsi_eh_restore_cmnd() do not save/restore the original command resid, the value returned on failure of the original command is lost and replaced with the value set by the execution of the request sense command. This value may in many instances be unaligned to the device sector size, causing sd_done() to print a warning message about the incorrect unaligned resid before the command is retried. Fix this problem by saving the original command residual in struct scsi_eh_save using scsi_eh_prep_cmnd() and restoring it in scsi_eh_restore_cmnd(). In addition, to make sure that the request sense command is executed with a correctly initialized command structure, also reset the residual to 0 in scsi_eh_prep_cmnd() after saving the original command value in struct scsi_eh_save. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191001074839.1994-1-damien.lemoal@wdc.com Signed-off-by: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_error.c | 3 +++ include/scsi/scsi_eh.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index cf70f0bb8375..bdec5f429440 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -935,6 +935,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, ses->sdb = scmd->sdb; ses->next_rq = scmd->request->next_rq; ses->result = scmd->result; + ses->resid_len = scmd->req.resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; @@ -946,6 +947,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->request->next_rq = NULL; scmd->result = 0; + scmd->req.resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -999,6 +1001,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) scmd->sdb = ses->sdb; scmd->request->next_rq = ses->next_rq; scmd->result = ses->result; + scmd->req.resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 2b7e227960e1..91f403341dd7 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { /* saved state */ int result; + unsigned int resid_len; int eh_eflags; enum dma_data_direction data_direction; unsigned underflow; From a5bb723f80e004c3e64c60da68158b21152c4636 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 15 Oct 2019 21:05:56 +0800 Subject: [PATCH 0101/3715] scsi: core: try to get module before removing device commit 77c301287ebae86cc71d03eb3806f271cb14da79 upstream. We have a test case like block/001 in blktests, which will create a scsi device by loading scsi_debug module and then try to delete the device by sysfs interface. At the same time, it may remove the scsi_debug module. And getting a invalid paging request BUG_ON as following: [ 34.625854] BUG: unable to handle page fault for address: ffffffffa0016bb8 [ 34.629189] Oops: 0000 [#1] SMP PTI [ 34.629618] CPU: 1 PID: 450 Comm: bash Tainted: G W 5.4.0-rc3+ #473 [ 34.632524] RIP: 0010:scsi_proc_hostdir_rm+0x5/0xa0 [ 34.643555] CR2: ffffffffa0016bb8 CR3: 000000012cd88000 CR4: 00000000000006e0 [ 34.644545] Call Trace: [ 34.644907] scsi_host_dev_release+0x6b/0x1f0 [ 34.645511] device_release+0x74/0x110 [ 34.646046] kobject_put+0x116/0x390 [ 34.646559] put_device+0x17/0x30 [ 34.647041] scsi_target_dev_release+0x2b/0x40 [ 34.647652] device_release+0x74/0x110 [ 34.648186] kobject_put+0x116/0x390 [ 34.648691] put_device+0x17/0x30 [ 34.649157] scsi_device_dev_release_usercontext+0x2e8/0x360 [ 34.649953] execute_in_process_context+0x29/0x80 [ 34.650603] scsi_device_dev_release+0x20/0x30 [ 34.651221] device_release+0x74/0x110 [ 34.651732] kobject_put+0x116/0x390 [ 34.652230] sysfs_unbreak_active_protection+0x3f/0x50 [ 34.652935] sdev_store_delete.cold.4+0x71/0x8f [ 34.653579] dev_attr_store+0x1b/0x40 [ 34.654103] sysfs_kf_write+0x3d/0x60 [ 34.654603] kernfs_fop_write+0x174/0x250 [ 34.655165] __vfs_write+0x1f/0x60 [ 34.655639] vfs_write+0xc7/0x280 [ 34.656117] ksys_write+0x6d/0x140 [ 34.656591] __x64_sys_write+0x1e/0x30 [ 34.657114] do_syscall_64+0xb1/0x400 [ 34.657627] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 34.658335] RIP: 0033:0x7f156f337130 During deleting scsi target, the scsi_debug module have been removed. Then, sdebug_driver_template belonged to the module cannot be accessd, resulting in scsi_proc_hostdir_rm() BUG_ON. To fix the bug, we add scsi_device_get() in sdev_store_delete() to try to increase refcount of module, avoiding the module been removed. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191015130556.18061-1-yuyufen@huawei.com Signed-off-by: Yufen Yu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 8ce12ffcbb7a..ffb44d77a01b 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -722,6 +722,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kernfs_node *kn; + struct scsi_device *sdev = to_scsi_device(dev); + + /* + * We need to try to get module, avoiding the module been removed + * during delete. + */ + if (scsi_device_get(sdev)) + return -ENODEV; kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); @@ -736,9 +744,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, * state into SDEV_DEL. */ device_remove_file(dev, attr); - scsi_remove_device(to_scsi_device(dev)); + scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); + scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); From a7f64a34fa1b39a22fbf8cffa6aa1624182d9a81 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Oct 2019 10:35:36 -0700 Subject: [PATCH 0102/3715] scsi: ch: Make it possible to open a ch device multiple times again commit 6a0990eaa768dfb7064f06777743acc6d392084b upstream. Clearing ch->device in ch_release() is wrong because that pointer must remain valid until ch_remove() is called. This patch fixes the following crash the second time a ch device is opened: BUG: kernel NULL pointer dereference, address: 0000000000000790 RIP: 0010:scsi_device_get+0x5/0x60 Call Trace: ch_open+0x4c/0xa0 [ch] chrdev_open+0xa2/0x1c0 do_dentry_open+0x13a/0x380 path_openat+0x591/0x1470 do_filp_open+0x91/0x100 do_sys_open+0x184/0x220 do_syscall_64+0x5f/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 085e56766f74 ("scsi: ch: add refcounting") Cc: Hannes Reinecke Cc: Link: https://lore.kernel.org/r/20191009173536.247889-1-bvanassche@acm.org Reported-by: Rob Turk Suggested-by: Rob Turk Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index c535c52e72e5..3f7c25d104fe 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -578,7 +578,6 @@ ch_release(struct inode *inode, struct file *file) scsi_changer *ch = file->private_data; scsi_device_put(ch->device); - ch->device = NULL; file->private_data = NULL; kref_put(&ch->ref, ch_destroy); return 0; From 1c725772e6f23698d3ee414637a3426e752bbd20 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 16 Sep 2019 12:45:48 -0700 Subject: [PATCH 0103/3715] Input: da9063 - fix capability and drop KEY_SLEEP commit afce285b859cea91c182015fc9858ea58c26cd0e upstream. Since commit f889beaaab1c ("Input: da9063 - report KEY_POWER instead of KEY_SLEEP during power key-press") KEY_SLEEP isn't supported anymore. This caused input device to not generate any events if "dlg,disable-key-power" is set. Fix this by unconditionally setting KEY_POWER capability, and not declaring KEY_SLEEP. Fixes: f889beaaab1c ("Input: da9063 - report KEY_POWER instead of KEY_SLEEP during power key-press") Signed-off-by: Marco Felsch Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/da9063_onkey.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c index 3e9c353d82ef..a01b25facf46 100644 --- a/drivers/input/misc/da9063_onkey.c +++ b/drivers/input/misc/da9063_onkey.c @@ -248,10 +248,7 @@ static int da9063_onkey_probe(struct platform_device *pdev) onkey->input->phys = onkey->phys; onkey->input->dev.parent = &pdev->dev; - if (onkey->key_power) - input_set_capability(onkey->input, EV_KEY, KEY_POWER); - - input_set_capability(onkey->input, EV_KEY, KEY_SLEEP); + input_set_capability(onkey->input, EV_KEY, KEY_POWER); INIT_DELAYED_WORK(&onkey->work, da9063_poll_on); From 7b9f7a928255a232012be55cb95db30e963b83a7 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 11 Oct 2019 17:22:09 -0700 Subject: [PATCH 0104/3715] Input: synaptics-rmi4 - avoid processing unknown IRQs commit 363c53875aef8fce69d4a2d0873919ccc7d9e2ad upstream. rmi_process_interrupt_requests() calls handle_nested_irq() for each interrupt status bit it finds. If the irq domain mapping for this bit had not yet been set up, then it ends up calling handle_nested_irq(0), which causes a NULL pointer dereference. There's already code that masks the irq_status bits coming out of the hardware with current_irq_mask, presumably to avoid this situation. However current_irq_mask seems to more reflect the actual mask set in the hardware rather than the IRQs software has set up and registered for. For example, in rmi_driver_reset_handler(), the current_irq_mask is initialized based on what is read from the hardware. If the reset value of this mask enables IRQs that Linux has not set up yet, then we end up in this situation. There appears to be a third unused bitmask that used to serve this purpose, fn_irq_bits. Use that bitmask instead of current_irq_mask to avoid calling handle_nested_irq() on IRQs that have not yet been set up. Signed-off-by: Evan Green Reviewed-by: Andrew Duggan Link: https://lore.kernel.org/r/20191008223657.163366-1-evgreen@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 997ccae7ee05..bae46816a3b3 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -165,7 +165,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) } mutex_lock(&data->irq_mutex); - bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask, + bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits, data->irq_count); /* * At this point, irq_status has all bits that are set in the @@ -412,6 +412,8 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev, bitmap_copy(data->current_irq_mask, data->new_irq_mask, data->num_of_irq_regs); + bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count); + error_unlock: mutex_unlock(&data->irq_mutex); return error; @@ -425,6 +427,8 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev, struct device *dev = &rmi_dev->dev; mutex_lock(&data->irq_mutex); + bitmap_andnot(data->fn_irq_bits, + data->fn_irq_bits, mask, data->irq_count); bitmap_andnot(data->new_irq_mask, data->current_irq_mask, mask, data->irq_count); From b20d0e89a6e3bd53b539cae97405f317774e3d50 Mon Sep 17 00:00:00 2001 From: Junya Monden Date: Wed, 16 Oct 2019 14:42:55 +0200 Subject: [PATCH 0105/3715] ASoC: rsnd: Reinitialize bit clock inversion flag for every format setting commit 22e58665a01006d05f0239621f7d41cacca96cc4 upstream. Unlike other format-related DAI parameters, rdai->bit_clk_inv flag is not properly re-initialized when setting format for new stream processing. The inversion, if requested, is then applied not to default, but to a previous value, which leads to SCKP bit in SSICR register being set incorrectly. Fix this by re-setting the flag to its initial value, determined by format. Fixes: 1a7889ca8aba3 ("ASoC: rsnd: fixup SND_SOC_DAIFMT_xB_xF behavior") Cc: Andrew Gabbasov Cc: Jiada Wang Cc: Timo Wischer Cc: stable@vger.kernel.org # v3.17+ Signed-off-by: Junya Monden Signed-off-by: Eugeniu Rosca Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20191016124255.7442-1-erosca@de.adit-jv.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 710c01cd2ad2..ab0bbef7eb48 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -676,6 +676,7 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) } /* set format */ + rdai->bit_clk_inv = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: rdai->sys_delay = 0; From 63eb9c2849bc377c6bbf491f752c6cc6b9b75bca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 4 Oct 2019 10:51:32 +0100 Subject: [PATCH 0106/3715] cfg80211: wext: avoid copying malformed SSIDs commit 4ac2813cc867ae563a1ba5a9414bfb554e5796fa upstream. Ensure the SSID element is bounds-checked prior to invoking memcpy() with its length field, when copying to userspace. Cc: Cc: Kees Cook Reported-by: Nicolas Waisman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191004095132.15777-2-will@kernel.org [adjust commit log a bit] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/wext-sme.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index c67d7a82ab13..73fd0eae08ca 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + int ret = 0; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) @@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (ie) { data->flags = 1; data->length = ie[1]; - memcpy(ssid, ie + 2, data->length); + if (data->length > IW_ESSID_MAX_SIZE) + ret = -EINVAL; + else + memcpy(ssid, ie + 2, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { @@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, } wdev_unlock(wdev); - return 0; + return ret; } int cfg80211_mgd_wext_siwap(struct net_device *dev, From 7a19f258f811f14e4e9b14b057dd606d03c7a586 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 4 Oct 2019 10:51:31 +0100 Subject: [PATCH 0107/3715] mac80211: Reject malformed SSID elements commit 4152561f5da3fca92af7179dd538ea89e248f9d0 upstream. Although this shouldn't occur in practice, it's a good idea to bounds check the length field of the SSID element prior to using it for things like allocations or memcpy operations. Cc: Cc: Kees Cook Reported-by: Nicolas Waisman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191004095132.15777-1-will@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d91db72b9e9e..36bd59ff49c4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2430,7 +2430,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, rcu_read_lock(); ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); - if (WARN_ON_ONCE(ssid == NULL)) + if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) ssid_len = 0; else ssid_len = ssid[1]; @@ -4756,7 +4757,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); - if (!ssidie) { + if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; From f991b1fa0ded689f980cc25312b5003f35add8bf Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 2 Apr 2019 11:30:37 +0800 Subject: [PATCH 0108/3715] drm/edid: Add 6 bpc quirk for SDC panel in Lenovo G50 commit 11bcf5f78905b90baae8fb01e16650664ed0cb00 upstream. Another panel that needs 6BPC quirk. BugLink: https://bugs.launchpad.net/bugs/1819968 Cc: # v4.8+ Reviewed-by: Alex Deucher Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20190402033037.21877-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ed01e3aae0e8..dfdc7d3147fb 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -164,6 +164,9 @@ static const struct edid_quirk { /* Medion MD 30217 PG */ { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 }, + /* Lenovo G50 */ + { "SDC", 18514, EDID_QUIRK_FORCE_6BPC }, + /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, From df984262a4bcd76f0ef7071ee73329b0fc430d59 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Oct 2019 18:28:17 +0200 Subject: [PATCH 0109/3715] drm/amdgpu: Bail earlier when amdgpu.cik_/si_support is not set to 1 commit 984d7a929ad68b7be9990fc9c5cfa5d5c9fc7942 upstream. Bail from the pci_driver probe function instead of from the drm_driver load function. This avoid /dev/dri/card0 temporarily getting registered and then unregistered again, sending unwanted add / remove udev events to userspace. Specifically this avoids triggering the (userspace) bug fixed by this plymouth merge-request: https://gitlab.freedesktop.org/plymouth/plymouth/merge_requests/59 Note that despite that being a userspace bug, not sending unnecessary udev events is a good idea in general. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1490490 Reviewed-by: Daniel Vetter Signed-off-by: Hans de Goede Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 35 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 35 ------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4dd68d821353..4894d8a87c04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -572,6 +572,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret == -EPROBE_DEFER) return ret; +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ ret = amdgpu_kick_out_firmware_fb(pdev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 884ed359f249..c93e72d8ac5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -87,41 +87,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; From 872f16e35f53c8d3fb296051016cf761f718a31a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:16 -0700 Subject: [PATCH 0110/3715] drivers/base/memory.c: don't access uninitialized memmaps in soft_offline_page_store() commit 641fe2e9387a36f9ee01d7c69382d1fe147a5e98 upstream. Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. Right now, when trying to soft-offline a PFN that resides on a memory block that was never onlined, one gets a misleading error with CONFIG_PAGE_POISONING: :/# echo 5637144576 > /sys/devices/system/memory/soft_offline_page [ 23.097167] soft offline: 0x150000 page already poisoned But the actual result depends on the garbage in the memmap. soft_offline_page() can only work with online pages, it returns -EIO in case of ZONE_DEVICE. Make sure to only forward pages that are online (iow, managed by the buddy) and, therefore, have an initialized memmap. Add a check against pfn_to_online_page() and similarly return -EIO. Link: http://lkml.kernel.org/r/20191010141200.8985-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Acked-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 1d60b58a8c19..c617e00f4361 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -552,6 +552,9 @@ store_soft_offline_page(struct device *dev, pfn >>= PAGE_SHIFT; if (!pfn_valid(pfn)) return -ENXIO; + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ + if (!pfn_to_online_page(pfn)) + return -EIO; ret = soft_offline_page(pfn_to_page(pfn), 0); return ret == 0 ? count : ret; } From 80b9274e3fc2b64a2634a121fc9a7ea512a52d2d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:20 -0700 Subject: [PATCH 0111/3715] fs/proc/page.c: don't access uninitialized memmaps in fs/proc/page.c commit aad5f69bc161af489dbb5934868bd347282f0764 upstream. There are three places where we access uninitialized memmaps, namely: - /proc/kpagecount - /proc/kpageflags - /proc/kpagecgroup We have initialized memmaps either when the section is online or when the page was initialized to the ZONE_DEVICE. Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. For example, not onlining a DIMM during boot and calling /proc/kpagecount with CONFIG_PAGE_POISONING: :/# cat /proc/kpagecount > tmp.test BUG: unable to handle page fault for address: fffffffffffffffe #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 114616067 P4D 114616067 PUD 114618067 PMD 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 469 Comm: cat Not tainted 5.4.0-rc1-next-20191004+ #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 RIP: 0010:kpagecount_read+0xce/0x1e0 Code: e8 09 83 e0 3f 48 0f a3 02 73 2d 4c 89 e7 48 c1 e7 06 48 03 3d ab 51 01 01 74 1d 48 8b 57 08 480 RSP: 0018:ffffa14e409b7e78 EFLAGS: 00010202 RAX: fffffffffffffffe RBX: 0000000000020000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 00007f76b5595000 RDI: fffff35645000000 RBP: 00007f76b5595000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 R13: 0000000000020000 R14: 00007f76b5595000 R15: ffffa14e409b7f08 FS: 00007f76b577d580(0000) GS:ffff8f41bd400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffffe CR3: 0000000078960000 CR4: 00000000000006f0 Call Trace: proc_reg_read+0x3c/0x60 vfs_read+0xc5/0x180 ksys_read+0x68/0xe0 do_syscall_64+0x5c/0xa0 entry_SYSCALL_64_after_hwframe+0x49/0xbe For now, let's drop support for ZONE_DEVICE from the three pseudo files in order to fix this. To distinguish offline memory (with garbage memmap) from ZONE_DEVICE memory with properly initialized memmaps, we would have to check get_dev_pagemap() and pfn_zone_device_reserved() right now. The usage of both (especially, special casing devmem) is frowned upon and needs to be reworked. The fundamental issue we have is: if (pfn_to_online_page(pfn)) { /* memmap initialized */ } else if (pfn_valid(pfn)) { /* * ??? * a) offline memory. memmap garbage. * b) devmem: memmap initialized to ZONE_DEVICE. * c) devmem: reserved for driver. memmap garbage. * (d) devmem: memmap currently initializing - garbage) */ } We'll leave the pfn_zone_device_reserved() check in stable_page_flags() in place as that function is also used from memory failure. We now no longer dump information about pages that are not in use anymore - offline. Link: http://lkml.kernel.org/r/20191009142435.3975-2-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Qian Cai Acked-by: Michal Hocko Cc: Dan Williams Cc: Alexey Dobriyan Cc: Stephen Rothwell Cc: Toshiki Fukasawa Cc: Pankaj gupta Cc: Mike Rapoport Cc: Anthony Yznaga Cc: "Aneesh Kumar K.V" Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/page.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 1491918a33c3..0c952c217118 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -42,10 +42,12 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); + if (!ppage || PageSlab(ppage)) pcount = 0; else @@ -214,10 +216,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (put_user(stable_page_flags(ppage), out)) { ret = -EFAULT; @@ -259,10 +262,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (ppage) ino = page_cgroup_ino(ppage); From e2e84418dec6eb784fa7f5b0a118457d88857dcf Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 1 Oct 2019 12:49:49 +0200 Subject: [PATCH 0112/3715] scsi: zfcp: fix reaction on bit error threshold notification [ Upstream commit 2190168aaea42c31bff7b9a967e7b045f07df095 ] On excessive bit errors for the FCP channel ingress fibre path, the channel notifies us. Previously, we only emitted a kernel message and a trace record. Since performance can become suboptimal with I/O timeouts due to bit errors, we now stop using an FCP device by default on channel notification so multipath on top can timely failover to other paths. A new module parameter zfcp.ber_stop can be used to get zfcp old behavior. User explanation of new kernel message: * Description: * The FCP channel reported that its bit error threshold has been exceeded. * These errors might result from a problem with the physical components * of the local fibre link into the FCP channel. * The problem might be damage or malfunction of the cable or * cable connection between the FCP channel and * the adjacent fabric switch port or the point-to-point peer. * Find details about the errors in the HBA trace for the FCP device. * The zfcp device driver closed down the FCP device * to limit the performance impact from possible I/O command timeouts. * User action: * Check for problems on the local fibre link, ensure that fibre optics are * clean and functional, and all cables are properly plugged. * After the repair action, you can manually recover the FCP device by * writing "0" into its "failed" sysfs attribute. * If recovery through sysfs is not possible, set the CHPID of the device * offline and back online on the service element. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: #2.6.30+ Link: https://lore.kernel.org/r/20191001104949.42810-1-maier@linux.ibm.com Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/s390/scsi/zfcp_fsf.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 00fb98f7b2cd..94d1bcc83fa2 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -21,6 +21,11 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; +static bool ber_stop = true; +module_param(ber_stop, bool, 0600); +MODULE_PARM_DESC(ber_stop, + "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); + static void zfcp_fsf_request_timeout_handler(unsigned long data) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; @@ -230,10 +235,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) case FSF_STATUS_READ_SENSE_DATA_AVAIL: break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - dev_warn(&adapter->ccw_device->dev, - "The error threshold for checksum statistics " - "has been exceeded\n"); zfcp_dbf_hba_bit_err("fssrh_3", req); + if (ber_stop) { + dev_warn(&adapter->ccw_device->dev, + "All paths over this FCP device are disused because of excessive bit errors\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); + } else { + dev_warn(&adapter->ccw_device->dev, + "The error threshold for checksum statistics has been exceeded\n"); + } break; case FSF_STATUS_READ_LINK_DOWN: zfcp_fsf_status_read_link_down(req); From 504593dd854f66d808f578783959dbcb97a520a2 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 14 Oct 2019 14:11:51 -0700 Subject: [PATCH 0113/3715] mm/slub: fix a deadlock in show_slab_objects() commit e4f8e513c3d353c134ad4eef9fd0bba12406c7c8 upstream. A long time ago we fixed a similar deadlock in show_slab_objects() [1]. However, it is apparently due to the commits like 01fb58bcba63 ("slab: remove synchronous synchronize_sched() from memcg cache deactivation path") and 03afc0e25f7f ("slab: get_online_mems for kmem_cache_{create,destroy,shrink}"), this kind of deadlock is back by just reading files in /sys/kernel/slab which will generate a lockdep splat below. Since the "mem_hotplug_lock" here is only to obtain a stable online node mask while racing with NUMA node hotplug, in the worst case, the results may me miscalculated while doing NUMA node hotplug, but they shall be corrected by later reads of the same files. WARNING: possible circular locking dependency detected ------------------------------------------------------ cat/5224 is trying to acquire lock: ffff900012ac3120 (mem_hotplug_lock.rw_sem){++++}, at: show_slab_objects+0x94/0x3a8 but task is already holding lock: b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (kn->count#45){++++}: lock_acquire+0x31c/0x360 __kernfs_remove+0x290/0x490 kernfs_remove+0x30/0x44 sysfs_remove_dir+0x70/0x88 kobject_del+0x50/0xb0 sysfs_slab_unlink+0x2c/0x38 shutdown_cache+0xa0/0xf0 kmemcg_cache_shutdown_fn+0x1c/0x34 kmemcg_workfn+0x44/0x64 process_one_work+0x4f4/0x950 worker_thread+0x390/0x4bc kthread+0x1cc/0x1e8 ret_from_fork+0x10/0x18 -> #1 (slab_mutex){+.+.}: lock_acquire+0x31c/0x360 __mutex_lock_common+0x16c/0xf78 mutex_lock_nested+0x40/0x50 memcg_create_kmem_cache+0x38/0x16c memcg_kmem_cache_create_func+0x3c/0x70 process_one_work+0x4f4/0x950 worker_thread+0x390/0x4bc kthread+0x1cc/0x1e8 ret_from_fork+0x10/0x18 -> #0 (mem_hotplug_lock.rw_sem){++++}: validate_chain+0xd10/0x2bcc __lock_acquire+0x7f4/0xb8c lock_acquire+0x31c/0x360 get_online_mems+0x54/0x150 show_slab_objects+0x94/0x3a8 total_objects_show+0x28/0x34 slab_attr_show+0x38/0x54 sysfs_kf_seq_show+0x198/0x2d4 kernfs_seq_show+0xa4/0xcc seq_read+0x30c/0x8a8 kernfs_fop_read+0xa8/0x314 __vfs_read+0x88/0x20c vfs_read+0xd8/0x10c ksys_read+0xb0/0x120 __arm64_sys_read+0x54/0x88 el0_svc_handler+0x170/0x240 el0_svc+0x8/0xc other info that might help us debug this: Chain exists of: mem_hotplug_lock.rw_sem --> slab_mutex --> kn->count#45 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(kn->count#45); lock(slab_mutex); lock(kn->count#45); lock(mem_hotplug_lock.rw_sem); *** DEADLOCK *** 3 locks held by cat/5224: #0: 9eff00095b14b2a0 (&p->lock){+.+.}, at: seq_read+0x4c/0x8a8 #1: 0eff008997041480 (&of->mutex){+.+.}, at: kernfs_seq_start+0x34/0xf0 #2: b8ff009693eee398 (kn->count#45){++++}, at: kernfs_seq_start+0x44/0xf0 stack backtrace: Call trace: dump_backtrace+0x0/0x248 show_stack+0x20/0x2c dump_stack+0xd0/0x140 print_circular_bug+0x368/0x380 check_noncircular+0x248/0x250 validate_chain+0xd10/0x2bcc __lock_acquire+0x7f4/0xb8c lock_acquire+0x31c/0x360 get_online_mems+0x54/0x150 show_slab_objects+0x94/0x3a8 total_objects_show+0x28/0x34 slab_attr_show+0x38/0x54 sysfs_kf_seq_show+0x198/0x2d4 kernfs_seq_show+0xa4/0xcc seq_read+0x30c/0x8a8 kernfs_fop_read+0xa8/0x314 __vfs_read+0x88/0x20c vfs_read+0xd8/0x10c ksys_read+0xb0/0x120 __arm64_sys_read+0x54/0x88 el0_svc_handler+0x170/0x240 el0_svc+0x8/0xc I think it is important to mention that this doesn't expose the show_slab_objects to use-after-free. There is only a single path that might really race here and that is the slab hotplug notifier callback __kmem_cache_shrink (via slab_mem_going_offline_callback) but that path doesn't really destroy kmem_cache_node data structures. [1] http://lkml.iu.edu/hypermail/linux/kernel/1101.0/02850.html [akpm@linux-foundation.org: add comment explaining why we don't need mem_hotplug_lock] Link: http://lkml.kernel.org/r/1570192309-10132-1-git-send-email-cai@lca.pw Fixes: 01fb58bcba63 ("slab: remove synchronous synchronize_sched() from memcg cache deactivation path") Fixes: 03afc0e25f7f ("slab: get_online_mems for kmem_cache_{create,destroy,shrink}") Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Vladimir Davydov Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 220d42e592ef..07aeb129f3f8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4790,7 +4790,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } - get_online_mems(); + /* + * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex" + * already held which will conflict with an existing lock order: + * + * mem_hotplug_lock->slab_mutex->kernfs_mutex + * + * We don't really need mem_hotplug_lock (to hold off + * slab_mem_going_offline_callback) here because slab's memory hot + * unplug code doesn't destroy the kmem_cache->node[] data. + */ + #ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { struct kmem_cache_node *n; @@ -4831,7 +4841,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif - put_online_mems(); kfree(nodes); return x + sprintf(buf + x, "\n"); } From 13e9cf786d03cbabd24f3e0bffb6151d17c6029b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 18 Oct 2019 20:19:29 -0700 Subject: [PATCH 0114/3715] mm/page_owner: don't access uninitialized memmaps when reading /proc/pagetypeinfo commit a26ee565b6cd8dc2bf15ff6aa70bbb28f928b773 upstream. Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. For example, when not onlining a memory block that is spanned by a zone and reading /proc/pagetypeinfo with CONFIG_DEBUG_VM_PGFLAGS and CONFIG_PAGE_POISONING, we can trigger a kernel BUG: :/# echo 1 > /sys/devices/system/memory/memory40/online :/# echo 1 > /sys/devices/system/memory/memory42/online :/# cat /proc/pagetypeinfo > test.file page:fffff2c585200000 is uninitialized and poisoned raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) There is not page extension available. ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:1107! invalid opcode: 0000 [#1] SMP NOPTI Please note that this change does not affect ZONE_DEVICE, because pagetypeinfo_showmixedcount_print() is called from mm/vmstat.c:pagetypeinfo_showmixedcount() only for populated zones, and ZONE_DEVICE is never populated (zone->present_pages always 0). [david@redhat.com: move check to outer loop, add comment, rephrase description] Link: http://lkml.kernel.org/r/20191011140638.8160-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visible after d0dc12e86b319 Signed-off-by: Qian Cai Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Thomas Gleixner Cc: "Peter Zijlstra (Intel)" Cc: Miles Chen Cc: Mike Rapoport Cc: Qian Cai Cc: Greg Kroah-Hartman Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_owner.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index a71fe4c623ef..6ac05a6ff2d1 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -273,7 +273,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { - if (!pfn_valid(pfn)) { + page = pfn_to_online_page(pfn); + if (!page) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } @@ -281,13 +282,13 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); block_end_pfn = min(block_end_pfn, end_pfn); - page = pfn_to_page(pfn); pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { if (!pfn_valid_within(pfn)) continue; + /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); if (page_zone(page) != zone) From 5e76d606661c22e1b7f011163800ff4ceb0f398c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:20:05 -0700 Subject: [PATCH 0115/3715] hugetlbfs: don't access uninitialized memmaps in pfn_range_valid_gigantic() commit f231fe4235e22e18d847e05cbe705deaca56580a upstream. Uninitialized memmaps contain garbage and in the worst case trigger kernel BUGs, especially with CONFIG_PAGE_POISONING. They should not get touched. Let's make sure that we only consider online memory (managed by the buddy) that has initialized memmaps. ZONE_DEVICE is not applicable. page_zone() will call page_to_nid(), which will trigger VM_BUG_ON_PGFLAGS(PagePoisoned(page), page) with CONFIG_PAGE_POISONING and CONFIG_DEBUG_VM_PGFLAGS when called on uninitialized memmaps. This can be the case when an offline memory block (e.g., never onlined) is spanned by a zone. Note: As explained by Michal in [1], alloc_contig_range() will verify the range. So it boils down to the wrong access in this function. [1] http://lkml.kernel.org/r/20180423000943.GO17484@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20191015120717.4858-1-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Michal Hocko Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8ca0075a5464..310656b4ede6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1081,11 +1081,10 @@ static bool pfn_range_valid_gigantic(struct zone *z, struct page *page; for (i = start_pfn; i < end_pfn; i++) { - if (!pfn_valid(i)) + page = pfn_to_online_page(i); + if (!page) return false; - page = pfn_to_page(i); - if (page_zone(page) != z) return false; From bfa25cba54e4d3c4d0168167aca23dc8b6cbe2b8 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 14 Oct 2019 15:48:19 -0700 Subject: [PATCH 0116/3715] xtensa: drop EXPORT_SYMBOL for outs*/ins* commit 8b39da985194aac2998dd9e3a22d00b596cebf1e upstream. Custom outs*/ins* implementations are long gone from the xtensa port, remove matching EXPORT_SYMBOLs. This fixes the following build warnings issued by modpost since commit 15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL* functions"): WARNING: "insb" [vmlinux] is a static EXPORT_SYMBOL WARNING: "insw" [vmlinux] is a static EXPORT_SYMBOL WARNING: "insl" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsb" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsw" [vmlinux] is a static EXPORT_SYMBOL WARNING: "outsl" [vmlinux] is a static EXPORT_SYMBOL Cc: stable@vger.kernel.org Fixes: d38efc1f150f ("xtensa: adopt generic io routines") Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/xtensa_ksyms.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 672391003e40..dc7b470a423a 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -114,13 +114,6 @@ EXPORT_SYMBOL(__invalidate_icache_range); // FIXME EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(outsb); -EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(outsl); -EXPORT_SYMBOL(insb); -EXPORT_SYMBOL(insw); -EXPORT_SYMBOL(insl); - extern long common_exception_return; EXPORT_SYMBOL(common_exception_return); From 4b0bd39cc8618efdd8e44bea0f67cd6b2040335c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 4 Oct 2019 19:23:37 +0200 Subject: [PATCH 0117/3715] parisc: Fix vmap memory leak in ioremap()/iounmap() commit 513f7f747e1cba81f28a436911fba0b485878ebd upstream. Sven noticed that calling ioremap() and iounmap() multiple times leads to a vmap memory leak: vmap allocation for size 4198400 failed: use vmalloc= to increase size It seems we missed calling vunmap() in iounmap(). Signed-off-by: Helge Deller Noticed-by: Sven Schnelle Cc: # v3.16+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/ioremap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index 92a9b5f12f98..f29f682352f0 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -3,7 +3,7 @@ * arch/parisc/mm/ioremap.c * * (C) Copyright 1995 1996 Linus Torvalds - * (C) Copyright 2001-2006 Helge Deller + * (C) Copyright 2001-2019 Helge Deller * (C) Copyright 2005 Kyle McMartin */ @@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - vfree(addr); + vunmap(addr); return NULL; } @@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } EXPORT_SYMBOL(__ioremap); -void iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *io_addr) { - if (addr > high_memory) - return vfree((void *) (PAGE_MASK & (unsigned long __force) addr)); + unsigned long addr = (unsigned long)io_addr & PAGE_MASK; + + if (is_vmalloc_addr((void *)addr)) + vunmap((void *)addr); } EXPORT_SYMBOL(iounmap); From 34b3ce218aebc0025f8fa07d4b718c90ef630991 Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Mon, 14 Oct 2019 10:59:23 +0200 Subject: [PATCH 0118/3715] CIFS: avoid using MID 0xFFFF commit 03d9a9fe3f3aec508e485dd3dcfa1e99933b4bdb upstream. According to MS-CIFS specification MID 0xFFFF should not be used by the CIFS client, but we actually do. Besides, this has proven to cause races leading to oops between SendReceive2/cifs_demultiplex_thread. On SMB1, MID is a 2 byte value easy to reach in CurrentMid which may conflict with an oplock break notification request coming from server Signed-off-by: Roberto Bergantinos Corpas Reviewed-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index f50d3d0b9b87..483458340b10 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -181,6 +181,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; + /* avoid 0xFFFF MID */ + if (cur_mid == 0xffff) + cur_mid++; /* * This nested loop looks more expensive than it is. From 74825dbefb8db903818b96dc1c7e767c3d6d295b Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Tue, 24 Sep 2019 16:03:55 -0500 Subject: [PATCH 0119/3715] x86/boot/64: Make level2_kernel_pgt pages invalid outside kernel area commit 2aa85f246c181b1fa89f27e8e20c5636426be624 upstream. Our hardware (UV aka Superdome Flex) has address ranges marked reserved by the BIOS. Access to these ranges is caught as an error, causing the BIOS to halt the system. Initial page tables mapped a large range of physical addresses that were not checked against the list of BIOS reserved addresses, and sometimes included reserved addresses in part of the mapped range. Including the reserved range in the map allowed processor speculative accesses to the reserved range, triggering a BIOS halt. Used early in booting, the page table level2_kernel_pgt addresses 1 GiB divided into 2 MiB pages, and it was set up to linearly map a full 1 GiB of physical addresses that included the physical address range of the kernel image, as chosen by KASLR. But this also included a large range of unused addresses on either side of the kernel image. And unlike the kernel image's physical address range, this extra mapped space was not checked against the BIOS tables of usable RAM addresses. So there were times when the addresses chosen by KASLR would result in processor accessible mappings of BIOS reserved physical addresses. The kernel code did not directly access any of this extra mapped space, but having it mapped allowed the processor to issue speculative accesses into reserved memory, causing system halts. This was encountered somewhat rarely on a normal system boot, and much more often when starting the crash kernel if "crashkernel=512M,high" was specified on the command line (this heavily restricts the physical address of the crash kernel, in our case usually within 1 GiB of reserved space). The solution is to invalidate the pages of this table outside the kernel image's space before the page table is activated. It fixes this problem on our hardware. [ bp: Touchups. ] Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Acked-by: Kirill A. Shutemov Cc: Baoquan He Cc: Brijesh Singh Cc: dimitri.sivanich@hpe.com Cc: Feng Tang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jordan Borgner Cc: Juergen Gross Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Cc: stable@vger.kernel.org Cc: Thomas Gleixner Cc: x86-ml Cc: Zhenzhong Duan Link: https://lkml.kernel.org/r/9c011ee51b081534a7a15065b1681d200298b530.1569358539.git.steve.wahl@hpe.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head64.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7c67d8939f3e..e00ccbcc2913 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -145,13 +145,31 @@ unsigned long __head __startup_64(unsigned long physaddr, * we might write invalid pmds, when the kernel is relocated * cleanup_highmap() fixes this up along with the mappings * beyond _end. + * + * Only the region occupied by the kernel image has so far + * been checked against the table of usable memory regions + * provided by the firmware, so invalidate pages outside that + * region. A page table entry that maps to a reserved area of + * memory would allow processor speculation into that area, + * and on some hardware (particularly the UV platform) even + * speculative access to some reserved areas is caught as an + * error, causing the BIOS to halt the system. */ pmd = fixup_pointer(level2_kernel_pgt, physaddr); - for (i = 0; i < PTRS_PER_PMD; i++) { + + /* invalidate pages before the kernel image */ + for (i = 0; i < pmd_index((unsigned long)_text); i++) + pmd[i] &= ~_PAGE_PRESENT; + + /* fixup pages that are part of the kernel image */ + for (; i <= pmd_index((unsigned long)_end); i++) if (pmd[i] & _PAGE_PRESENT) pmd[i] += load_delta; - } + + /* invalidate pages after the kernel image */ + for (; i < PTRS_PER_PMD; i++) + pmd[i] &= ~_PAGE_PRESENT; /* * Fixup phys_base - remove the memory encryption mask to obtain From 135230fc32ebad7186ed49da8845db1d61e5881f Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 1 Oct 2019 10:46:31 -0500 Subject: [PATCH 0120/3715] pinctrl: armada-37xx: fix control of pins 32 and up commit 20504fa1d2ffd5d03cdd9dc9c9dd4ed4579b97ef upstream. The 37xx configuration registers are only 32 bits long, so pins 32-35 spill over into the next register. The calculation for the register address was done, but the bitmask was not, so any configuration to pin 32 or above resulted in a bitmask that overflowed and performed no action. Fix the register / offset calculation to also adjust the offset. Fixes: 5715092a458c ("pinctrl: armada-37xx: Add gpio support") Signed-off-by: Patrick Williams Acked-by: Gregory CLEMENT Cc: Link: https://lore.kernel.org/r/20191001154634.96165-1-alpawi@amazon.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index c5fe7d4a9065..1ee54c6864d2 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -205,11 +205,11 @@ static const struct armada_37xx_pin_data armada_37xx_pin_sb = { }; static inline void armada_37xx_update_reg(unsigned int *reg, - unsigned int offset) + unsigned int *offset) { /* We never have more than 2 registers */ - if (offset >= GPIO_PER_REG) { - offset -= GPIO_PER_REG; + if (*offset >= GPIO_PER_REG) { + *offset -= GPIO_PER_REG; *reg += sizeof(u32); } } @@ -373,7 +373,7 @@ static inline void armada_37xx_irq_update_reg(unsigned int *reg, { int offset = irqd_to_hwirq(d); - armada_37xx_update_reg(reg, offset); + armada_37xx_update_reg(reg, &offset); } static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, @@ -383,7 +383,7 @@ static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); return regmap_update_bits(info->regmap, reg, mask, 0); @@ -396,7 +396,7 @@ static int armada_37xx_gpio_get_direction(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -410,7 +410,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask, val, ret; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); ret = regmap_update_bits(info->regmap, reg, mask, mask); @@ -431,7 +431,7 @@ static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset) unsigned int reg = INPUT_VAL; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -446,7 +446,7 @@ static void armada_37xx_gpio_set(struct gpio_chip *chip, unsigned int offset, unsigned int reg = OUTPUT_VAL; unsigned int mask, val; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); val = value ? mask : 0; From 2b9a35e61261c6fa9d15967a8fec9c36efc6ceab Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 1 Oct 2019 10:51:38 -0500 Subject: [PATCH 0121/3715] pinctrl: armada-37xx: swap polarity on LED group commit b835d6953009dc350d61402a854b5a7178d8c615 upstream. The configuration registers for the LED group have inverted polarity, which puts the GPIO into open-drain state when used in GPIO mode. Switch to '0' for GPIO and '1' for LED modes. Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support for Armada 37xx") Signed-off-by: Patrick Williams Cc: Link: https://lore.kernel.org/r/20191001155154.99710-1-alpawi@amazon.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 1ee54c6864d2..262f591ad8a6 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -170,10 +170,10 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = { PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19), BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19), 18, 2, "gpio", "uart"), - PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"), - PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"), - PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"), - PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"), + PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"), + PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"), + PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"), + PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"), }; From e06e89fe9a2e6ef5394f28685d07ae4dd3e6ce66 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 10 Oct 2019 10:39:26 +0800 Subject: [PATCH 0122/3715] btrfs: block-group: Fix a memory leak due to missing btrfs_put_block_group() commit 4b654acdae850f48b8250b9a578a4eaa518c7a6f upstream. In btrfs_read_block_groups(), if we have an invalid block group which has mixed type (DATA|METADATA) while the fs doesn't have MIXED_GROUPS feature, we error out without freeing the block group cache. This patch will add the missing btrfs_put_block_group() to prevent memory leak. Note for stable backports: the file to patch in versions <= 5.3 is fs/btrfs/extent-tree.c Fixes: 49303381f19a ("Btrfs: bail out if block group has different mixed flag") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 10dee8245558..fd15f396b3a0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10255,6 +10255,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->key.objectid); + btrfs_put_block_group(cache); ret = -EINVAL; goto error; } From 0d70b3a0e34719e9e31d0ab39dc3820f815a78b1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 5 Oct 2019 13:21:01 +0200 Subject: [PATCH 0123/3715] memstick: jmb38x_ms: Fix an error handling path in 'jmb38x_ms_probe()' commit 28c9fac09ab0147158db0baeec630407a5e9b892 upstream. If 'jmb38x_ms_count_slots()' returns 0, we must undo the previous 'pci_request_regions()' call. Goto 'err_out_int' to fix it. Fixes: 60fdd931d577 ("memstick: add support for JMicron jmb38x MemoryStick host controller") Cc: stable@vger.kernel.org Signed-off-by: Christophe JAILLET Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/memstick/host/jmb38x_ms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 48db922075e2..08fa6400d255 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -947,7 +947,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev, if (!cnt) { rc = -ENODEV; pci_dev_busy = 1; - goto err_out; + goto err_out_int; } jm = kzalloc(sizeof(struct jmb38x_ms) From 5f466713989250938624afa79dc33bae20920700 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Oct 2019 01:29:10 +0200 Subject: [PATCH 0124/3715] cpufreq: Avoid cpufreq_suspend() deadlock on system shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 65650b35133ff20f0c9ef0abd5c3c66dbce3ae57 upstream. It is incorrect to set the cpufreq syscore shutdown callback pointer to cpufreq_suspend(), because that function cannot be run in the syscore stage of system shutdown for two reasons: (a) it may attempt to carry out actions depending on devices that have already been shut down at that point and (b) the RCU synchronization carried out by it may not be able to make progress then. The latter issue has been present since commit 45975c7d21a1 ("rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds"), but the former one has been there since commit 90de2a4aa9f3 ("cpufreq: suspend cpufreq governors on shutdown") regardless. Fix that by dropping cpufreq_syscore_ops altogether and making device_shutdown() call cpufreq_suspend() directly before shutting down devices, which is along the lines of what system-wide power management does. Fixes: 45975c7d21a1 ("rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds") Fixes: 90de2a4aa9f3 ("cpufreq: suspend cpufreq governors on shutdown") Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.0+ # 4.0+ Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 +++ drivers/cpufreq/cpufreq.c | 10 ---------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2ec9af90cd28..2b0a1054535c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -2845,6 +2846,8 @@ void device_shutdown(void) wait_for_device_probe(); device_block_probing(); + cpufreq_suspend(); + spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fceb18d26db8..4aa3c5331666 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2570,14 +2570,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); -/* - * Stop cpufreq at shutdown to make sure it isn't holding any locks - * or mutexes when secondary CPUs are halted. - */ -static struct syscore_ops cpufreq_syscore_ops = { - .shutdown = cpufreq_suspend, -}; - struct kobject *cpufreq_global_kobject; EXPORT_SYMBOL(cpufreq_global_kobject); @@ -2589,8 +2581,6 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); - register_syscore_ops(&cpufreq_syscore_ops); - return 0; } module_param(off, int, 0444); From aae4447e36bc4926a86d0bd8ca7f7a4cb2b1182d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 18 Oct 2019 09:45:49 +0200 Subject: [PATCH 0125/3715] xen/netback: fix error path of xenvif_connect_data() commit 3d5c1a037d37392a6859afbde49be5ba6a70a6b3 upstream. xenvif_connect_data() calls module_put() in case of error. This is wrong as there is no related module_get(). Remove the superfluous module_put(). Fixes: 279f438e36c0a7 ("xen-netback: Don't destroy the netdev until the vif is shut down") Cc: # 3.12 Signed-off-by: Juergen Gross Reviewed-by: Paul Durrant Reviewed-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index d465071656b5..2641e76d03d9 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -718,7 +718,6 @@ err_unmap: xenvif_unmap_frontend_data_rings(queue); netif_napi_del(&queue->napi); err: - module_put(THIS_MODULE); return err; } From 47ed959d13a4a2154e0f537f85af88ab14a8dcd4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Oct 2019 13:25:00 +0200 Subject: [PATCH 0126/3715] PCI: PM: Fix pci_power_up() commit 45144d42f299455911cc29366656c7324a3a7c97 upstream. There is an arbitrary difference between the system resume and runtime resume code paths for PCI devices regarding the delay to apply when switching the devices from D3cold to D0. Namely, pci_restore_standard_config() used in the runtime resume code path calls pci_set_power_state() which in turn invokes __pci_start_power_transition() to power up the device through the platform firmware and that function applies the transition delay (as per PCI Express Base Specification Revision 2.0, Section 6.6.1). However, pci_pm_default_resume_early() used in the system resume code path calls pci_power_up() which doesn't apply the delay at all and that causes issues to occur during resume from suspend-to-idle on some systems where the delay is required. Since there is no reason for that difference to exist, modify pci_power_up() to follow pci_set_power_state() more closely and invoke __pci_start_power_transition() from there to call the platform firmware to power up the device (in case that's necessary). Fixes: db288c9c5f9d ("PCI / PM: restore the original behavior of pci_set_power_state()") Reported-by: Daniel Drake Tested-by: Daniel Drake Link: https://lore.kernel.org/linux-pm/CAD8Lp44TYxrMgPLkHCqF9hv6smEurMXvmmvmtyFhZ6Q4SE+dig@mail.gmail.com/T/#m21be74af263c6a34f36e0fc5c77c5449d9406925 Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Cc: 3.10+ # 3.10+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 044b208f7f6a..c847b5554db6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -748,19 +748,6 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) } } -/** - * pci_power_up - Put the given device into D0 forcibly - * @dev: PCI device to power up - */ -void pci_power_up(struct pci_dev *dev) -{ - if (platform_pci_power_manageable(dev)) - platform_pci_set_power_state(dev, PCI_D0); - - pci_raw_set_power_state(dev, PCI_D0); - pci_update_current_state(dev, PCI_D0); -} - /** * pci_platform_power_transition - Use platform to change device power state * @dev: PCI device to handle. @@ -939,6 +926,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) } EXPORT_SYMBOL(pci_set_power_state); +/** + * pci_power_up - Put the given device into D0 forcibly + * @dev: PCI device to power up + */ +void pci_power_up(struct pci_dev *dev) +{ + __pci_start_power_transition(dev, PCI_D0); + pci_raw_set_power_state(dev, PCI_D0); + pci_update_current_state(dev, PCI_D0); +} + /** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended From 9e223bdc1cf7da5c98f852225e347574e669950f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 12 Dec 2017 17:33:03 -0800 Subject: [PATCH 0127/3715] KVM: X86: introduce invalidate_gpa argument to tlb flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c2ba05ccfde2f069a66c0462e5b5ef8a517dcc9c upstream. Introduce a new bool invalidate_gpa argument to kvm_x86_ops->tlb_flush, it will be used by later patches to just flush guest tlb. For VMX, this will use INVVPID instead of INVEPT, which will invalidate combined mappings while keeping guest-physical mappings. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Peter Zijlstra Cc: "Jitindar SIngh, Suraj" Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 14 +++++++------- arch/x86/kvm/vmx.c | 21 +++++++++++---------- arch/x86/kvm/x86.c | 6 +++--- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 94af073476ce..314b0d18494d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -973,7 +973,7 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu); + void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); void (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 093e7f567e69..7ab9c5bc8d13 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -299,7 +299,7 @@ static int vgif = true; module_param(vgif, int, 0444); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -static void svm_flush_tlb(struct kvm_vcpu *vcpu); +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); static int nested_svm_exit_handled(struct vcpu_svm *svm); @@ -2097,7 +2097,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -2438,7 +2438,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -3111,7 +3111,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; svm->nested.intercept = nested_vmcb->control.intercept; - svm_flush_tlb(&svm->vcpu); + svm_flush_tlb(&svm->vcpu, true); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; @@ -4947,7 +4947,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void svm_flush_tlb(struct kvm_vcpu *vcpu) +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5288,7 +5288,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -5302,7 +5302,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static int is_disabled(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7784b02312ca..8950092da5c2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4427,9 +4427,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) +static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, + bool invalidate_gpa) { - if (enable_ept) { + if (enable_ept && (invalidate_gpa || !enable_vpid)) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); @@ -4438,15 +4439,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) } } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) { if (enable_ept) - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -4644,7 +4645,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ept_load_pdptrs(vcpu); } - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); vmcs_writel(GUEST_CR3, guest_cr3); } @@ -8314,7 +8315,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - __vmx_flush_tlb(vcpu, vmx->nested.vpid02); + __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); @@ -11214,11 +11215,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true); } } else { vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } } @@ -11921,7 +11922,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * L1's vpid. TODO: move to a more elaborate solution, giving * each L2 its own vpid and exposing the vpid feature to L1. */ - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } /* Restore posted intr vector. */ if (nested_cpu_has_posted_intr(vmcs12)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 98b990f13ae0..484afd667d1c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6943,10 +6943,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { ++vcpu->stat.tlb_flush; - kvm_x86_ops->tlb_flush(vcpu); + kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa); } void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, @@ -7017,7 +7017,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_vcpu_flush_tlb(vcpu); + kvm_vcpu_flush_tlb(vcpu, true); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; From 060065111a71922ab2126ed46f44668d53510665 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 9 May 2018 16:56:04 -0400 Subject: [PATCH 0128/3715] kvm: vmx: Introduce lapic_mode enumeration commit 588716494258899389206fa50426e78cc9df89b9 upstream. The local APIC can be in one of three modes: disabled, xAPIC or x2APIC. (A fourth mode, "invalid," is included for completeness.) Using the new enumeration can make some of the APIC mode logic easier to read. In kvm_set_apic_base, for instance, it is clear that one cannot transition directly from x2APIC mode to xAPIC mode or directly from APIC disabled to x2APIC mode. Signed-off-by: Jim Mattson Signed-off-by: Krish Sadhukhan [Check invalid bits even if msr_info->host_initiated. Reported by Wanpeng Li. - Paolo] Signed-off-by: Paolo Bonzini Cc: "Jitindar SIngh, Suraj" Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.h | 14 ++++++++++++++ arch/x86/kvm/x86.c | 26 +++++++++++++++----------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4b9935a38347..bc3446d3cfdf 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -16,6 +16,13 @@ #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) +enum lapic_mode { + LAPIC_MODE_DISABLED = 0, + LAPIC_MODE_INVALID = X2APIC_ENABLE, + LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE, + LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE, +}; + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -89,6 +96,7 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -220,4 +228,10 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); + +static inline enum lapic_mode kvm_apic_mode(u64 apic_base) +{ + return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); +} + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 484afd667d1c..4927d0f5be13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -306,23 +306,27 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_apic_base); +enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) +{ + return kvm_apic_mode(kvm_get_apic_base(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_get_apic_mode); + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - u64 old_state = vcpu->arch.apic_base & - (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - u64 new_state = msr_info->data & - (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); + enum lapic_mode new_mode = kvm_apic_mode(msr_info->data); u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); - if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) - return 1; - if (!msr_info->host_initiated && - ((new_state == MSR_IA32_APICBASE_ENABLE && - old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || - (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && - old_state == 0))) + if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) return 1; + if (!msr_info->host_initiated) { + if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) + return 1; + if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) + return 1; + } kvm_lapic_set_base(vcpu, msr_info->data); return 0; From f5ae861d9ad8c7834a255ceaee39fc9cb0e56163 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 26 Apr 2018 13:09:50 -0700 Subject: [PATCH 0129/3715] kvm: apic: Flush TLB after APIC mode/address change if VPIDs are in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a468f2dbf921d02f5107378501693137a812999b upstream. Currently, KVM flushes the TLB after a change to the APIC access page address or the APIC mode when EPT mode is enabled. However, even in shadow paging mode, a TLB flush is needed if VPIDs are being used, as specified in the Intel SDM Section 29.4.5. So replace vmx_flush_tlb_ept_only() with vmx_flush_tlb(), which will flush if either EPT or VPIDs are in use. Signed-off-by: Junaid Shahid Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář Cc: "Jitindar SIngh, Suraj" Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8950092da5c2..a40dc657ea98 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4444,12 +4444,6 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } -static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) -{ - if (enable_ept) - vmx_flush_tlb(vcpu, true); -} - static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -9320,7 +9314,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -9348,7 +9342,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) !nested_cpu_has2(get_vmcs12(&vmx->vcpu), SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } } @@ -11243,7 +11237,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } } else if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* @@ -12198,7 +12192,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, } else if (!nested_cpu_has_ept(vmcs12) && nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ From b425d011e83d220d3be0a19561d6b33d11358fa5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 9 May 2018 16:56:05 -0400 Subject: [PATCH 0130/3715] kvm: vmx: Basic APIC virtualization controls have three settings commit 8d860bbeedef97fe981d28fa7b71d77f3b29563f upstream. Previously, we toggled between SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE and SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, depending on whether or not the EXTD bit was set in MSR_IA32_APICBASE. However, if the local APIC is disabled, we should not set either of these APIC virtualization control bits. Signed-off-by: Jim Mattson Signed-off-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini Cc: "Jitindar SIngh, Suraj" Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 12 ++++----- arch/x86/kvm/svm.c | 4 +-- arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++------------- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 314b0d18494d..00c12158a5dc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -998,7 +998,7 @@ struct kvm_x86_ops { void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7b9ad9de4f37..2307f63efd20 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1967,13 +1967,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } } - if ((old_value ^ value) & X2APIC_ENABLE) { - if (value & X2APIC_ENABLE) { - kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); - } else - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); - } + if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) + kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); + + if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) + kvm_x86_ops->set_virtual_apic_mode(vcpu); apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7ab9c5bc8d13..f6adc8db0e32 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4589,7 +4589,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { return; } @@ -5713,7 +5713,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, - .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .set_virtual_apic_mode = svm_set_virtual_apic_mode, .get_enable_apicv = svm_get_enable_apicv, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a40dc657ea98..02c0326dc259 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -591,7 +591,8 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - bool change_vmcs01_virtual_x2apic_mode; + bool change_vmcs01_virtual_apic_mode; + /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -9290,31 +9291,43 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } -static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { u32 sec_exec_control; + if (!lapic_in_kernel(vcpu)) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; + to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_has_vmx_virtualize_x2apic_mode()) - return; - if (!cpu_need_tpr_shadow(vcpu)) return; sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); - if (set) { - sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - } else { - sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); + switch (kvm_get_apic_mode(vcpu)) { + case LAPIC_MODE_INVALID: + WARN_ONCE(true, "Invalid local APIC state"); + case LAPIC_MODE_DISABLED: + break; + case LAPIC_MODE_XAPIC: + if (flexpriority_enabled) { + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb(vcpu, true); + } + break; + case LAPIC_MODE_X2APIC: + if (cpu_has_vmx_virtualize_x2apic_mode()) + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + break; } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -12185,10 +12198,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); - if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { - vmx->nested.change_vmcs01_virtual_x2apic_mode = false; - vmx_set_virtual_x2apic_mode(vcpu, - vcpu->arch.apic_base & X2APIC_ENABLE); + if (vmx->nested.change_vmcs01_virtual_apic_mode) { + vmx->nested.change_vmcs01_virtual_apic_mode = false; + vmx_set_virtual_apic_mode(vcpu); } else if (!nested_cpu_has_ept(vmcs12) && nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -12749,7 +12761,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, - .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .get_enable_apicv = vmx_get_enable_apicv, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, From 1db19d6805d9dc5c79f8a19dddde324dbf0a33f9 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Tue, 1 Oct 2019 18:56:11 +0200 Subject: [PATCH 0131/3715] RDMA/cxgb4: Do not dma memory off of the stack commit 3840c5b78803b2b6cc1ff820100a74a092c40cbb upstream. Nicolas pointed out that the cxgb4 driver is doing dma off of the stack, which is generally considered a very bad thing. On some architectures it could be a security problem, but odds are none of them actually run this driver, so it's just a "normal" bug. Resolve this by allocating the memory for a message off of the heap instead of the stack. kmalloc() always will give us a proper memory location that DMA will work correctly from. Link: https://lore.kernel.org/r/20191001165611.GA3542072@kroah.com Reported-by: Nicolas Waisman Tested-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/mem.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index b5784cb145f5..805429bbc916 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -260,13 +260,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -276,6 +280,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -290,28 +295,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb); + sizeof(*tpt), tpt, skb); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -319,6 +324,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } From ddef1e8e3f6eb26034833b7255e3fa584d54a230 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Oct 2019 09:17:49 +0100 Subject: [PATCH 0132/3715] Linux 4.14.151 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d96b277ffc9..db996459d047 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 150 +SUBLEVEL = 151 EXTRAVERSION = NAME = Petit Gorille From 00d4e50d44ad6f8ba666cbc790d6b3097b1fa6ca Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Sep 2019 10:44:44 +0200 Subject: [PATCH 0133/3715] BACKPORT: dm bufio: call adjust_total_allocated from __link_buffer and __unlink_buffer Move the call to adjust_total_allocated() to __link_buffer() and __unlink_buffer() so that only used buffers are counted. Reserved buffers are not. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit 26d2ef0cd0f7c12aa331b502c1c1460b85ebd04f) Change-Id: Ia47e99082a4c1a2acf5f5b5aab1a21fd8b6300df Signed-off-by: Martijn Coenen --- drivers/md/dm-bufio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 1e17e6421da3..12661e6339d7 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -465,8 +465,6 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) return NULL; } - adjust_total_allocated(b->data_mode, (long)c->block_size); - #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING memset(&b->stack_trace, 0, sizeof(b->stack_trace)); #endif @@ -480,8 +478,6 @@ static void free_buffer(struct dm_buffer *b) { struct dm_bufio_client *c = b->c; - adjust_total_allocated(b->data_mode, -(long)c->block_size); - free_buffer_data(c, b->data, b->data_mode); kfree(b); } @@ -499,6 +495,8 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) list_add(&b->lru_list, &c->lru[dirty]); __insert(b->c, b); b->last_accessed = jiffies; + + adjust_total_allocated(b->data_mode, (long)c->block_size); } /* @@ -513,6 +511,8 @@ static void __unlink_buffer(struct dm_buffer *b) c->n_buffers[b->list_mode]--; __remove(b->c, b); list_del(&b->lru_list); + + adjust_total_allocated(b->data_mode, -(long)c->block_size); } /* From 21c0b0ea9e3c6ba43946bdbb4909157a787e8e42 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Sep 2019 10:44:45 +0200 Subject: [PATCH 0134/3715] BACKPORT: dm bufio: refactor adjust_total_allocated Refactor adjust_total_allocated() so that it takes a bool argument indicating if it should add or subtract the buffer size. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit d0a328a385d2d1ab87e7a959d91c1841ed5a498f) Change-Id: I25522f4991d17c56f709f7c904f4480a427decbf Signed-off-by: Martijn Coenen --- drivers/md/dm-bufio.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 12661e6339d7..0225b239c626 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -319,14 +319,22 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) /*----------------------------------------------------------------*/ -static void adjust_total_allocated(enum data_mode data_mode, long diff) +static void adjust_total_allocated(struct dm_buffer *b, bool unlink) { + enum data_mode data_mode; + long diff; + static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, &dm_bufio_allocated_get_free_pages, &dm_bufio_allocated_vmalloc, }; + data_mode = b->data_mode; + diff = (long)b->c->block_size; + if (unlink) + diff = -diff; + spin_lock(¶m_spinlock); *class_ptr[data_mode] += diff; @@ -496,7 +504,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) __insert(b->c, b); b->last_accessed = jiffies; - adjust_total_allocated(b->data_mode, (long)c->block_size); + adjust_total_allocated(b, false); } /* @@ -512,7 +520,7 @@ static void __unlink_buffer(struct dm_buffer *b) __remove(b->c, b); list_del(&b->lru_list); - adjust_total_allocated(b->data_mode, -(long)c->block_size); + adjust_total_allocated(b, true); } /* From 43a03bd5848c51f2a54e5c546cd61602ef6b6c13 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Sep 2019 10:44:46 +0200 Subject: [PATCH 0135/3715] BACKPORT: dm bufio: introduce a global queue Rename param_spinlock to global_spinlock and introduce a global queue of all used buffers. The queue will be used in the following commits. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit af53badc0cd8a511b016a10c0cccf916692f1fc2) Change-Id: I00fac757cdb8dadf23ed6140076dc5217468d137 Signed-off-by: Martijn Coenen --- drivers/md/dm-bufio.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 0225b239c626..ef4b646984c2 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -146,6 +146,7 @@ enum data_mode { struct dm_buffer { struct rb_node node; struct list_head lru_list; + struct list_head global_list; sector_t block; void *data; enum data_mode data_mode; @@ -222,7 +223,9 @@ static unsigned long dm_bufio_cache_size; */ static unsigned long dm_bufio_cache_size_latch; -static DEFINE_SPINLOCK(param_spinlock); +static DEFINE_SPINLOCK(global_spinlock); + +static LIST_HEAD(global_queue); /* * Buffers are freed after this timeout @@ -335,7 +338,7 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink) if (unlink) diff = -diff; - spin_lock(¶m_spinlock); + spin_lock(&global_spinlock); *class_ptr[data_mode] += diff; @@ -344,7 +347,13 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink) if (dm_bufio_current_allocated > dm_bufio_peak_allocated) dm_bufio_peak_allocated = dm_bufio_current_allocated; - spin_unlock(¶m_spinlock); + if (!unlink) { + list_add(&b->global_list, &global_queue); + } else { + list_del(&b->global_list); + } + + spin_unlock(&global_spinlock); } /* From e4f8bb0f133d157b880a3c8b81c8b64789fc917a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Sep 2019 10:44:47 +0200 Subject: [PATCH 0136/3715] BACKPORT: dm bufio: remove old-style buffer cleanup Remove code that cleans up buffers if the cache size grows over the limit. The next commit will introduce a new global cleanup. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit b132ff333201784e67654663360a1a4fb103ebe2) Change-Id: I6ceb363093cd2a73a51ca284630bd97466bc1be3 Signed-off-by: Martijn Coenen --- drivers/md/dm-bufio.c | 58 +++---------------------------------------- 1 file changed, 3 insertions(+), 55 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index ef4b646984c2..3fb6f973c99e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -33,7 +33,7 @@ #define DM_BUFIO_MEMORY_PERCENT 2 #define DM_BUFIO_VMALLOC_PERCENT 25 -#define DM_BUFIO_WRITEBACK_PERCENT 75 +#define DM_BUFIO_WRITEBACK_RATIO 3 /* * Check buffer ages in this interval (seconds) @@ -241,11 +241,6 @@ static unsigned long dm_bufio_current_allocated; /*----------------------------------------------------------------*/ -/* - * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count - */ -static unsigned long dm_bufio_cache_size_per_client; - /* * The current number of clients. */ @@ -257,8 +252,7 @@ static int dm_bufio_client_count; static LIST_HEAD(dm_bufio_all_clients); /* - * This mutex protects dm_bufio_cache_size_latch, - * dm_bufio_cache_size_per_client and dm_bufio_client_count + * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count */ static DEFINE_MUTEX(dm_bufio_clients_lock); @@ -374,9 +368,6 @@ static void __cache_size_refresh(void) dm_bufio_default_cache_size); dm_bufio_cache_size_latch = dm_bufio_default_cache_size; } - - dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch / - (dm_bufio_client_count ? : 1); } /* @@ -964,33 +955,6 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, } } -/* - * Get writeback threshold and buffer limit for a given client. - */ -static void __get_memory_limit(struct dm_bufio_client *c, - unsigned long *threshold_buffers, - unsigned long *limit_buffers) -{ - unsigned long buffers; - - if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { - if (mutex_trylock(&dm_bufio_clients_lock)) { - __cache_size_refresh(); - mutex_unlock(&dm_bufio_clients_lock); - } - } - - buffers = dm_bufio_cache_size_per_client >> - (c->sectors_per_block_bits + SECTOR_SHIFT); - - if (buffers < c->minimum_buffers) - buffers = c->minimum_buffers; - - *limit_buffers = buffers; - *threshold_buffers = mult_frac(buffers, - DM_BUFIO_WRITEBACK_PERCENT, 100); -} - /* * Check if we're over watermark. * If we are over threshold_buffers, start freeing buffers. @@ -999,23 +963,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, static void __check_watermark(struct dm_bufio_client *c, struct list_head *write_list) { - unsigned long threshold_buffers, limit_buffers; - - __get_memory_limit(c, &threshold_buffers, &limit_buffers); - - while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] > - limit_buffers) { - - struct dm_buffer *b = __get_unclaimed_buffer(c); - - if (!b) - return; - - __free_buffer_wake(b); - cond_resched(); - } - - if (c->n_buffers[LIST_DIRTY] > threshold_buffers) + if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO) __write_dirty_buffers_async(c, 1, write_list); } From 6409e7e01d114abdeeaa2fa0a23fe293d800dad5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 12 Sep 2019 12:07:23 -0400 Subject: [PATCH 0137/3715] BACKPORT: dm bufio: introduce a global cache replacement This commit introduces a global cache replacement (instead of per-client cleanup). If one bufio client uses the cache heavily and another client is not using it, we want to let the first client use most of the cache. The old algorithm would partition the cache equally betwen the clients and that is sub-optimal. For cache replacement, we use the clock algorithm because it doesn't require taking any lock when the buffer is accessed. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit 6e913b28cd279212c4580dbd8b2cf9dcd4740cfb) Change-Id: Iad25b7058d3da32ae07959a348fa75178ff9c860 Signed-off-by: Martijn Coenen --- drivers/md/dm-bufio.c | 98 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 3fb6f973c99e..9440da8b0120 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -34,6 +34,7 @@ #define DM_BUFIO_MEMORY_PERCENT 2 #define DM_BUFIO_VMALLOC_PERCENT 25 #define DM_BUFIO_WRITEBACK_RATIO 3 +#define DM_BUFIO_LOW_WATERMARK_RATIO 16 /* * Check buffer ages in this interval (seconds) @@ -151,6 +152,7 @@ struct dm_buffer { void *data; enum data_mode data_mode; unsigned char list_mode; /* LIST_* */ + unsigned accessed; unsigned hold_count; blk_status_t read_error; blk_status_t write_error; @@ -227,6 +229,8 @@ static DEFINE_SPINLOCK(global_spinlock); static LIST_HEAD(global_queue); +static unsigned long global_num = 0; + /* * Buffers are freed after this timeout */ @@ -256,6 +260,11 @@ static LIST_HEAD(dm_bufio_all_clients); */ static DEFINE_MUTEX(dm_bufio_clients_lock); +static struct workqueue_struct *dm_bufio_wq; +static struct delayed_work dm_bufio_cleanup_old_work; +static struct work_struct dm_bufio_replacement_work; + + #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING static void buffer_record_stack(struct dm_buffer *b) { @@ -341,10 +350,16 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink) if (dm_bufio_current_allocated > dm_bufio_peak_allocated) dm_bufio_peak_allocated = dm_bufio_current_allocated; + b->accessed = 1; + if (!unlink) { list_add(&b->global_list, &global_queue); + global_num++; + if (dm_bufio_current_allocated > dm_bufio_cache_size) + queue_work(dm_bufio_wq, &dm_bufio_replacement_work); } else { list_del(&b->global_list); + global_num--; } spin_unlock(&global_spinlock); @@ -530,6 +545,8 @@ static void __relink_lru(struct dm_buffer *b, int dirty) { struct dm_bufio_client *c = b->c; + b->accessed = 1; + BUG_ON(!c->n_buffers[b->list_mode]); c->n_buffers[b->list_mode]--; @@ -1830,6 +1847,74 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) dm_bufio_unlock(c); } +static void do_global_cleanup(struct work_struct *w) +{ + struct dm_bufio_client *locked_client = NULL; + struct dm_bufio_client *current_client; + struct dm_buffer *b; + unsigned spinlock_hold_count; + unsigned long threshold = dm_bufio_cache_size - + dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO; + unsigned long loops = global_num * 2; + + mutex_lock(&dm_bufio_clients_lock); + + while (1) { + cond_resched(); + + spin_lock(&global_spinlock); + if (unlikely(dm_bufio_current_allocated <= threshold)) + break; + + spinlock_hold_count = 0; +get_next: + if (!loops--) + break; + if (unlikely(list_empty(&global_queue))) + break; + b = list_entry(global_queue.prev, struct dm_buffer, global_list); + + if (b->accessed) { + b->accessed = 0; + list_move(&b->global_list, &global_queue); + if (likely(++spinlock_hold_count < 16)) + goto get_next; + spin_unlock(&global_spinlock); + continue; + } + + current_client = b->c; + if (unlikely(current_client != locked_client)) { + if (locked_client) + dm_bufio_unlock(locked_client); + + if (!dm_bufio_trylock(current_client)) { + spin_unlock(&global_spinlock); + dm_bufio_lock(current_client); + locked_client = current_client; + continue; + } + + locked_client = current_client; + } + + spin_unlock(&global_spinlock); + + if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) { + spin_lock(&global_spinlock); + list_move(&b->global_list, &global_queue); + spin_unlock(&global_spinlock); + } + } + + spin_unlock(&global_spinlock); + + if (locked_client) + dm_bufio_unlock(locked_client); + + mutex_unlock(&dm_bufio_clients_lock); +} + static void cleanup_old_buffers(void) { unsigned long max_age_hz = get_max_age_hz(); @@ -1845,14 +1930,11 @@ static void cleanup_old_buffers(void) mutex_unlock(&dm_bufio_clients_lock); } -static struct workqueue_struct *dm_bufio_wq; -static struct delayed_work dm_bufio_work; - static void work_fn(struct work_struct *w) { cleanup_old_buffers(); - queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, DM_BUFIO_WORK_TIMER_SECS * HZ); } @@ -1897,8 +1979,9 @@ static int __init dm_bufio_init(void) if (!dm_bufio_wq) return -ENOMEM; - INIT_DELAYED_WORK(&dm_bufio_work, work_fn); - queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); + INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); + queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, DM_BUFIO_WORK_TIMER_SECS * HZ); return 0; @@ -1912,7 +1995,8 @@ static void __exit dm_bufio_exit(void) int bug = 0; int i; - cancel_delayed_work_sync(&dm_bufio_work); + cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); + flush_workqueue(dm_bufio_wq); destroy_workqueue(dm_bufio_wq); for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) From 2b2bb0cce0a521371357353894d2fb62de06119c Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 5 Nov 2019 13:02:42 -0800 Subject: [PATCH 0138/3715] ANDROID: don't enable TOOLS_SUPPORT_RELR in all{mod,yes}config We can't have this option enabled by default in all{mod,yes}config builds because the tools might not support RELR. Follow the pattern used elsewhere in the kernel and specify "depends on !COMPILE_TEST" in order to prevent it from being turned on in these builds. Bug: 143966059 Signed-off-by: Peter Collingbourne Change-Id: I0c728bd8973791b1879257b343aa53bd1f91c3f1 --- init/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index b1cabfa234b2..2ebd9ff7f552 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -18,6 +18,9 @@ config DEFCONFIG_LIST config TOOLS_SUPPORT_RELR bool "Declare tool support for RELR" + # Prevent this from being enabled by default in allyesconfig or + # allmodconfig builds. + depends on !COMPILE_TEST config CONSTRUCTORS bool From f7c80e49524b3e45ddf142b9cfa9a639fb229899 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 28 Oct 2019 04:59:17 -0400 Subject: [PATCH 0139/3715] zram: fix race between backing_dev_show and backing_dev_store [ Upstream commit f7daefe4231e57381d92c2e2ad905a899c28e402 ] CPU0: CPU1: backing_dev_show backing_dev_store ...... ...... file = zram->backing_dev; down_read(&zram->init_lock); down_read(&zram->init_init_lock) file_path(file, ...); zram->backing_dev = backing_dev; up_read(&zram->init_lock); up_read(&zram->init_lock); gets the value of zram->backing_dev too early in backing_dev_show, which resultin the value being NULL at the beginning, and not NULL later. backtrace: d_path+0xcc/0x174 file_path+0x10/0x18 backing_dev_show+0x40/0xb4 dev_attr_show+0x20/0x54 sysfs_kf_seq_show+0x9c/0x10c kernfs_seq_show+0x28/0x30 seq_read+0x184/0x488 kernfs_fop_read+0x5c/0x1a4 __vfs_read+0x44/0x128 vfs_read+0xa0/0x138 SyS_read+0x54/0xb4 Link: http://lkml.kernel.org/r/1571046839-16814-1-git-send-email-chenwandun@huawei.com Signed-off-by: Chenwandun Acked-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Jens Axboe Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 133178c9b2cf..1b4e195c0d3c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -291,13 +291,14 @@ static void reset_bdev(struct zram *zram) static ssize_t backing_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct file *file; struct zram *zram = dev_to_zram(dev); - struct file *file = zram->backing_dev; char *p; ssize_t ret; down_read(&zram->init_lock); - if (!zram_wb_enabled(zram)) { + file = zram->backing_dev; + if (!file) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; From f62931ec9ded76f0f16359d7b58dbb271326b4ac Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 23 Nov 2017 16:15:43 -0500 Subject: [PATCH 0140/3715] dm snapshot: use mutex instead of rw_semaphore [ Upstream commit ae1093be5a0ef997833e200a0dafb9ed0b1ff4fe ] The rw_semaphore is acquired for read only in two places, neither is performance-critical. So replace it with a mutex -- which is more efficient. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 84 +++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b502debc6df3..8b1556e77a0a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -48,7 +48,7 @@ struct dm_exception_table { }; struct dm_snapshot { - struct rw_semaphore lock; + struct mutex lock; struct dm_dev *origin; struct dm_dev *cow; @@ -456,9 +456,9 @@ static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) continue; - down_read(&s->lock); + mutex_lock(&s->lock); active = s->active; - up_read(&s->lock); + mutex_unlock(&s->lock); if (active) { if (snap_src) @@ -926,7 +926,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) int r; chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; - down_write(&s->lock); + mutex_lock(&s->lock); /* * Process chunks (and associated exceptions) in reverse order @@ -941,7 +941,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) b = __release_queued_bios_after_merge(s); out: - up_write(&s->lock); + mutex_unlock(&s->lock); if (b) flush_bios(b); @@ -1000,9 +1000,9 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) if (linear_chunks < 0) { DMERR("Read error in exception store: " "shutting down merge"); - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } goto shut; } @@ -1043,10 +1043,10 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) previous_count = read_pending_exceptions_done_count(); } - down_write(&s->lock); + mutex_lock(&s->lock); s->first_merging_chunk = old_chunk; s->num_merging_chunks = linear_chunks; - up_write(&s->lock); + mutex_unlock(&s->lock); /* Wait until writes to all 'linear_chunks' drain */ for (i = 0; i < linear_chunks; i++) @@ -1088,10 +1088,10 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) return; shut: - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; b = __release_queued_bios_after_merge(s); - up_write(&s->lock); + mutex_unlock(&s->lock); error_bios(b); merge_shutdown(s); @@ -1190,7 +1190,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->exception_start_sequence = 0; s->exception_complete_sequence = 0; INIT_LIST_HEAD(&s->out_of_order_list); - init_rwsem(&s->lock); + mutex_init(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); s->state_bits = 0; @@ -1357,9 +1357,9 @@ static void snapshot_dtr(struct dm_target *ti) /* Check whether exception handover must be cancelled */ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest && (s == snap_src)) { - down_write(&snap_dest->lock); + mutex_lock(&snap_dest->lock); snap_dest->valid = 0; - up_write(&snap_dest->lock); + mutex_unlock(&snap_dest->lock); DMERR("Cancelling snapshot handover."); } up_read(&_origins_lock); @@ -1390,6 +1390,8 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + mutex_destroy(&s->lock); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); @@ -1477,7 +1479,7 @@ static void pending_complete(void *context, int success) if (!success) { /* Read/write error - snapshot is unusable */ - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -EIO); error = 1; goto out; @@ -1485,14 +1487,14 @@ static void pending_complete(void *context, int success) e = alloc_completed_exception(GFP_NOIO); if (!e) { - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -ENOMEM); error = 1; goto out; } *e = pe->e; - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid) { free_completed_exception(e); error = 1; @@ -1517,7 +1519,7 @@ out: full_bio->bi_end_io = pe->full_bio_end_io; increment_pending_exceptions_done_count(); - up_write(&s->lock); + mutex_unlock(&s->lock); /* Submit any pending write bios */ if (error) { @@ -1716,7 +1718,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) /* FIXME: should only take write lock if we need * to copy an exception */ - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_data_dir(bio) == WRITE)) { @@ -1739,9 +1741,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - up_write(&s->lock); + mutex_unlock(&s->lock); pe = alloc_pending_exception(s); - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid || s->snapshot_overflowed) { free_pending_exception(pe); @@ -1776,7 +1778,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) bio->bi_iter.bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_full_bio(pe, bio); goto out; } @@ -1786,7 +1788,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_copy(pe); goto out; } @@ -1796,7 +1798,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); out: return r; } @@ -1832,7 +1834,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); - down_write(&s->lock); + mutex_lock(&s->lock); /* Full merging snapshots are redirected to the origin */ if (!s->valid) @@ -1863,12 +1865,12 @@ redirect_to_origin: bio_set_dev(bio, s->origin->bdev); if (bio_data_dir(bio) == WRITE) { - up_write(&s->lock); + mutex_unlock(&s->lock); return do_origin(s->origin, bio); } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); return r; } @@ -1900,7 +1902,7 @@ static int snapshot_preresume(struct dm_target *ti) down_read(&_origins_lock); (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_read(&snap_src->lock); + mutex_lock(&snap_src->lock); if (s == snap_src) { DMERR("Unable to resume snapshot source until " "handover completes."); @@ -1910,7 +1912,7 @@ static int snapshot_preresume(struct dm_target *ti) "source is suspended."); r = -EINVAL; } - up_read(&snap_src->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1956,11 +1958,11 @@ static void snapshot_resume(struct dm_target *ti) (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_write(&snap_src->lock); - down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); + mutex_lock(&snap_src->lock); + mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); __handover_exceptions(snap_src, snap_dest); - up_write(&snap_dest->lock); - up_write(&snap_src->lock); + mutex_unlock(&snap_dest->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1975,9 +1977,9 @@ static void snapshot_resume(struct dm_target *ti) /* Now we have correct chunk size, reregister */ reregister_snapshot(s); - down_write(&s->lock); + mutex_lock(&s->lock); s->active = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) @@ -2017,7 +2019,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) DMEMIT("Invalid"); @@ -2042,7 +2044,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT("Unknown"); } - up_write(&snap->lock); + mutex_unlock(&snap->lock); break; @@ -2108,7 +2110,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, if (dm_target_is_snapshot_merge(snap->ti)) continue; - down_write(&snap->lock); + mutex_lock(&snap->lock); /* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) @@ -2135,9 +2137,9 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe = __lookup_pending_exception(snap, chunk); if (!pe) { - up_write(&snap->lock); + mutex_unlock(&snap->lock); pe = alloc_pending_exception(snap); - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) { free_pending_exception(pe); @@ -2180,7 +2182,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, } next_snapshot: - up_write(&snap->lock); + mutex_unlock(&snap->lock); if (pe_to_start_now) { start_copy(pe_to_start_now); From cd83f10afd9d7c0e37b46b8d0c261a31499db210 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2019 06:14:17 -0400 Subject: [PATCH 0141/3715] dm snapshot: introduce account_start_copy() and account_end_copy() [ Upstream commit a2f83e8b0c82c9500421a26c49eb198b25fcdea3 ] This simple refactoring moves code for modifying the semaphore cow_count into separate functions to prepare for changes that will extend these methods to provide for a more sophisticated mechanism for COW throttling. Signed-off-by: Mikulas Patocka Reviewed-by: Nikos Tsironis Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8b1556e77a0a..a9575122a0c6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1399,6 +1399,16 @@ static void snapshot_dtr(struct dm_target *ti) kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + down(&s->cow_count); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + up(&s->cow_count); +} + /* * Flush a list of buffers. */ @@ -1581,7 +1591,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1605,7 +1615,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1625,7 +1635,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); From d14a94d03cc99c9d7df1ad7081701d7702680b8c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Oct 2019 06:15:53 -0400 Subject: [PATCH 0142/3715] dm snapshot: rework COW throttling to fix deadlock [ Upstream commit b21555786f18cd77f2311ad89074533109ae3ffa ] Commit 721b1d98fb517a ("dm snapshot: Fix excessive memory usage and workqueue stalls") introduced a semaphore to limit the maximum number of in-flight kcopyd (COW) jobs. The implementation of this throttling mechanism is prone to a deadlock: 1. One or more threads write to the origin device causing COW, which is performed by kcopyd. 2. At some point some of these threads might reach the s->cow_count semaphore limit and block in down(&s->cow_count), holding a read lock on _origins_lock. 3. Someone tries to acquire a write lock on _origins_lock, e.g., snapshot_ctr(), which blocks because the threads at step (2) already hold a read lock on it. 4. A COW operation completes and kcopyd runs dm-snapshot's completion callback, which ends up calling pending_complete(). pending_complete() tries to resubmit any deferred origin bios. This requires acquiring a read lock on _origins_lock, which blocks. This happens because the read-write semaphore implementation gives priority to writers, meaning that as soon as a writer tries to enter the critical section, no readers will be allowed in, until all writers have completed their work. So, pending_complete() waits for the writer at step (3) to acquire and release the lock. This writer waits for the readers at step (2) to release the read lock and those readers wait for pending_complete() (the kcopyd thread) to signal the s->cow_count semaphore: DEADLOCK. The above was thoroughly analyzed and documented by Nikos Tsironis as part of his initial proposal for fixing this deadlock, see: https://www.redhat.com/archives/dm-devel/2019-October/msg00001.html Fix this deadlock by reworking COW throttling so that it waits without holding any locks. Add a variable 'in_progress' that counts how many kcopyd jobs are running. A function wait_for_in_progress() will sleep if 'in_progress' is over the limit. It drops _origins_lock in order to avoid the deadlock. Reported-by: Guruswamy Basavaiah Reported-by: Nikos Tsironis Reviewed-by: Nikos Tsironis Tested-by: Nikos Tsironis Fixes: 721b1d98fb51 ("dm snapshot: Fix excessive memory usage and workqueue stalls") Cc: stable@vger.kernel.org # v5.0+ Depends-on: 4a3f111a73a8c ("dm snapshot: introduce account_start_copy() and account_end_copy()") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-snap.c | 80 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a9575122a0c6..95c564b60d79 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "dm.h" @@ -106,8 +105,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + struct wait_queue_head in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -158,8 +157,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -1206,7 +1205,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1396,17 +1395,54 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } static void account_start_copy(struct dm_snapshot *s) { - down(&s->cow_count); + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); } static void account_end_copy(struct dm_snapshot *s) { - up(&s->cow_count); + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; } /* @@ -1424,7 +1460,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1437,7 +1473,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1726,8 +1762,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; - /* FIXME: should only take write lock if we need - * to copy an exception */ + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && @@ -1876,7 +1915,7 @@ redirect_to_origin: if (bio_data_dir(bio) == WRITE) { mutex_unlock(&s->lock); - return do_origin(s->origin, bio); + return do_origin(s->origin, bio, false); } out_unlock: @@ -2213,15 +2252,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2334,7 +2382,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, From 1de0fd828661a068a574381c5bcd3b0a19f522c5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 5 Jun 2018 05:26:33 -0400 Subject: [PATCH 0143/3715] dm: Use kzalloc for all structs with embedded biosets/mempools [ Upstream commit d377535405686f735b90a8ad4ba269484cd7c96e ] mempool_init()/bioset_init() require that the mempools/biosets be zeroed first; they probably should not _require_ this, but not allocating those structs with kzalloc is a fairly nonsensical thing to do (calling mempool_exit()/bioset_exit() on an uninitialized mempool/bioset is legal and safe, but only works if said memory was zeroed.) Acked-by: Mike Snitzer Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/dm-bio-prison-v1.c | 2 +- drivers/md/dm-bio-prison-v2.c | 2 +- drivers/md/dm-io.c | 2 +- drivers/md/dm-kcopyd.c | 2 +- drivers/md/dm-region-hash.c | 2 +- drivers/md/dm-snap.c | 2 +- drivers/md/dm-thin.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c index 874841f0fc83..10532a76688e 100644 --- a/drivers/md/dm-bio-prison-v1.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -33,7 +33,7 @@ static struct kmem_cache *_cell_cache; */ struct dm_bio_prison *dm_bio_prison_create(void) { - struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL); if (!prison) return NULL; diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c index 8ce3a1a588cf..c34ec615420f 100644 --- a/drivers/md/dm-bio-prison-v2.c +++ b/drivers/md/dm-bio-prison-v2.c @@ -35,7 +35,7 @@ static struct kmem_cache *_cell_cache; */ struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) { - struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + struct dm_bio_prison_v2 *prison = kzalloc(sizeof(*prison), GFP_KERNEL); if (!prison) return NULL; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index b4357ed4d541..56e2c0e079d7 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -50,7 +50,7 @@ struct dm_io_client *dm_io_client_create(void) struct dm_io_client *client; unsigned min_ios = dm_get_reserved_bio_based_ios(); - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bd9a45b94b55..7ca2b1aaa79d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -892,7 +892,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro int r = -ENOMEM; struct dm_kcopyd_client *kc; - kc = kmalloc(sizeof(*kc), GFP_KERNEL); + kc = kzalloc(sizeof(*kc), GFP_KERNEL); if (!kc) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 85c32b22a420..91c6f6d72eee 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -179,7 +179,7 @@ struct dm_region_hash *dm_region_hash_create( ; nr_buckets >>= 1; - rh = kmalloc(sizeof(*rh), GFP_KERNEL); + rh = kzalloc(sizeof(*rh), GFP_KERNEL); if (!rh) { DMERR("unable to allocate region hash memory"); return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 95c564b60d79..2170f6c118b8 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1136,7 +1136,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) origin_mode = FMODE_WRITE; } - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) { ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index aa7795990989..0ee5eae71690 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2962,7 +2962,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, return (struct pool *)pmd; } - pool = kmalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) { *error = "Error allocating memory for pool"; err_p = ERR_PTR(-ENOMEM); From c9c4c606965387206a3993d7cc556aa66d53c647 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 27 Jan 2019 17:59:53 -0800 Subject: [PATCH 0144/3715] f2fs: flush quota blocks after turnning it off [ Upstream commit 0e0667b625cf64243df83171bff61f9d350b9ca5 ] After quota_off, we'll get some dirty blocks. If put_super don't have a chance to flush them by checkpoint, it causes NULL pointer exception in end_io after iput(node_inode). (e.g., by checkpoint=disable) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e70975ca723b..0f3209b23c94 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1523,6 +1523,12 @@ void f2fs_quota_off_umount(struct super_block *sb) set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); } } + /* + * In case of checkpoint=disable, we must flush quota blocks. + * This can cause NULL exception for node_inode in end_io, since + * put_super already dropped it. + */ + sync_filesystem(sb); } int f2fs_get_projid(struct inode *inode, kprojid_t *projid) From bedb8799772d9a41a3fcefe83117797589c47c08 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 29 Nov 2018 16:09:31 -0800 Subject: [PATCH 0145/3715] scsi: lpfc: Fix a duplicate 0711 log message number. [ Upstream commit 2c4c91415a05677acc5c8131a5eb472d4aa96ae1 ] Renumber one of the 0711 log messages so there isn't a duplication. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 4ade13d72deb..07cb671bb855 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4152,7 +4152,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, /* If pCmd was set to NULL from abort path, do not call scsi_done */ if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) { lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, - "0711 FCP cmd already NULL, sid: 0x%06x, " + "5688 FCP cmd already NULL, sid: 0x%06x, " "did: 0x%06x, oxid: 0x%04x\n", vport->fc_myDID, (pnode) ? pnode->nlp_DID : 0, From 9fae7f0a49ddfe8ebc8181e321f42e90fe15b69e Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Wed, 12 Sep 2018 15:31:56 +0100 Subject: [PATCH 0146/3715] sc16is7xx: Fix for "Unexpected interrupt: 8" [ Upstream commit 30ec514d440cf2c472c8e4b0079af2c731f71a3e ] The SC16IS752 has an Enhanced Feature Register which is aliased at the same address as the Interrupt Identification Register; accessing it requires that a magic value is written to the Line Configuration Register. If an interrupt is raised while the EFR is mapped in then the ISR won't be able to access the IIR, leading to the "Unexpected interrupt" error messages. Avoid the problem by claiming a mutex around accesses to the EFR register, also claiming the mutex in the interrupt handler work item (this is equivalent to disabling interrupts to interlock against a non-threaded interrupt handler). See: https://github.com/raspberrypi/linux/issues/2529 Signed-off-by: Phil Elwell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index e48523da47ac..c1655aba131f 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -333,6 +333,7 @@ struct sc16is7xx_port { struct kthread_worker kworker; struct task_struct *kworker_task; struct kthread_work irq_work; + struct mutex efr_lock; struct sc16is7xx_one p[0]; }; @@ -504,6 +505,21 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) div /= 4; } + /* In an amazing feat of design, the Enhanced Features Register shares + * the address of the Interrupt Identification Register, and is + * switched in by writing a magic value (0xbf) to the Line Control + * Register. Any interrupt firing during this time will see the EFR + * where it expects the IIR to be, leading to "Unexpected interrupt" + * messages. + * + * Prevent this possibility by claiming a mutex while accessing the + * EFR, and claiming the same mutex from within the interrupt handler. + * This is similar to disabling the interrupt, but that doesn't work + * because the bulk of the interrupt processing is run as a workqueue + * job in thread context. + */ + mutex_lock(&s->efr_lock); + lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); /* Open the LCR divisors for configuration */ @@ -519,6 +535,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&s->efr_lock); + sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, prescaler); @@ -701,6 +719,8 @@ static void sc16is7xx_ist(struct kthread_work *ws) { struct sc16is7xx_port *s = to_sc16is7xx_port(ws, irq_work); + mutex_lock(&s->efr_lock); + while (1) { bool keep_polling = false; int i; @@ -710,6 +730,8 @@ static void sc16is7xx_ist(struct kthread_work *ws) if (!keep_polling) break; } + + mutex_unlock(&s->efr_lock); } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) @@ -904,6 +926,9 @@ static void sc16is7xx_set_termios(struct uart_port *port, if (!(termios->c_cflag & CREAD)) port->ignore_status_mask |= SC16IS7XX_LSR_BRK_ERROR_MASK; + /* As above, claim the mutex while accessing the EFR. */ + mutex_lock(&s->efr_lock); + sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); @@ -925,6 +950,8 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&s->efr_lock); + /* Get baud rate generator configuration */ baud = uart_get_baud_rate(port, termios, old, port->uartclk / 16 / 4 / 0xffff, @@ -1187,6 +1214,7 @@ static int sc16is7xx_probe(struct device *dev, s->regmap = regmap; s->devtype = devtype; dev_set_drvdata(dev, s); + mutex_init(&s->efr_lock); kthread_init_worker(&s->kworker); kthread_init_work(&s->irq_work, sc16is7xx_ist); From 578828ddb50647124253017742731ae2de7844e6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 30 Oct 2018 15:10:39 -0700 Subject: [PATCH 0147/3715] powerpc/powernv: hold device_hotplug_lock when calling memtrace_offline_pages() [ Upstream commit 5666848774ef43d3db5151ec518f1deb63515c20 ] Let's perform all checking + offlining + removing under device_hotplug_lock, so nobody can mess with these devices via sysfs concurrently. [david@redhat.com: take device_hotplug_lock outside of loop] Link: http://lkml.kernel.org/r/20180927092554.13567-6-david@redhat.com Link: http://lkml.kernel.org/r/20180925091457.28651-6-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Pavel Tatashin Reviewed-by: Rashmica Gupta Acked-by: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Rashmica Gupta Cc: Michael Neuling Cc: Boris Ostrovsky Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Heiko Carstens Cc: John Allen Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Juergen Gross Cc: Kate Stewart Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Martin Schwidefsky Cc: Mathieu Malaterre Cc: Michal Hocko Cc: Nathan Fontenot Cc: Oscar Salvador Cc: Philippe Ombredanne Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: YASUAKI ISHIMATSU Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/memtrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index c9a6d4f3403c..cfbd242c3e01 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -99,6 +99,7 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +/* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { u64 end_pfn = start_pfn + nr_pages - 1; @@ -139,6 +140,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) /* Trace memory needs to be aligned to the size */ end_pfn = round_down(end_pfn - nr_pages, nr_pages); + lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { /* @@ -147,7 +149,6 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) * we never try to remove memory that spans two iomem * resources. */ - lock_device_hotplug(); end_pfn = base_pfn + nr_pages; for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { remove_memory(nid, pfn << PAGE_SHIFT, bytes); @@ -156,6 +157,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) return base_pfn << PAGE_SHIFT; } } + unlock_device_hotplug(); return 0; } From 43e10b379f94ebcf1b2d2564d10c623083f81703 Mon Sep 17 00:00:00 2001 From: Julian Sax Date: Wed, 24 Oct 2018 22:40:26 +0200 Subject: [PATCH 0148/3715] HID: i2c-hid: add Direkt-Tek DTLAPY133-1 to descriptor override [ Upstream commit 399474e4c1100bca264ed14fa3ad0d68fab484d8 ] This device uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list. Reported-by: Tim Aldridge Signed-off-by: Julian Sax Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index cac262a912c1..89f2976f9c53 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -330,6 +330,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Direkt-Tek DTLAPY133-1", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "DTLAPY133-1"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Mediacom Flexbook Edge 11", .matches = { From 290acb8c92b543a969111220c6b3c1322ac9e39d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 25 Jan 2019 11:59:01 -0800 Subject: [PATCH 0149/3715] x86/cpu: Add Atom Tremont (Jacobsville) [ Upstream commit 00ae831dfe4474ef6029558f5eb3ef0332d80043 ] Add the Atom Tremont model number to the Intel family list. [ Tony: Also update comment at head of file to say "_X" suffix is also used for microserver parts. ] Signed-off-by: Kan Liang Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Andy Shevchenko Cc: Aristeu Rozanski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Mauro Carvalho Chehab Cc: Megha Dey Cc: Peter Zijlstra Cc: Qiuxu Zhuo Cc: Rajneesh Bhardwaj Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190125195902.17109-4-tony.luck@intel.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/intel-family.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 038e4b63b56b..5cd7d4e1579d 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -6,7 +6,7 @@ * "Big Core" Processors (Branded as Core, Xeon, etc...) * * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. + * "Extreme" ones, like Broadwell-E, or Atom microserver. * * Things ending in "2" are usually because we have no better * name for them. There's no processor called "SILVERMONT2". @@ -68,6 +68,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ From a460bf7d68f305a11f87c5dc2ca282e4b8656120 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 26 Dec 2018 15:31:56 +0100 Subject: [PATCH 0150/3715] HID: i2c-hid: Add Odys Winbook 13 to descriptor override [ Upstream commit f8f807441eefddc3c6d8a378421f0ede6361d565 ] The Odys Winbook 13 uses a SIPODEV SP1064 touchpad, which does not supply descriptors, add this to the DMI descriptor override list, fixing the touchpad not working. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1526312 Reported-by: Rene Wagner Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 89f2976f9c53..fd1b6eea6d2f 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -346,6 +346,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Odys Winbook 13", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AXDIA International GmbH"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "WINBOOK 13"), + }, + .driver_data = (void *)&sipodev_desc + }, { } /* Terminate list */ }; From b9d60b4072038f1e30ee4e1903cb5a53bf1b879b Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 31 Oct 2018 15:41:42 +0800 Subject: [PATCH 0151/3715] clk: boston: unregister clks on failure in clk_boston_setup() [ Upstream commit 8b627f616ed63dcaf922369fc14a5daf8ad03038 ] The registered clks should unregister when something wrong happens before going out in function clk_boston_setup(). Signed-off-by: Yi Wang Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imgtec/clk-boston.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/clk/imgtec/clk-boston.c b/drivers/clk/imgtec/clk-boston.c index f5d54a64d33c..dddda45127a8 100644 --- a/drivers/clk/imgtec/clk-boston.c +++ b/drivers/clk/imgtec/clk-boston.c @@ -73,31 +73,39 @@ static void __init clk_boston_setup(struct device_node *np) hw = clk_hw_register_fixed_rate(NULL, "input", NULL, 0, in_freq); if (IS_ERR(hw)) { pr_err("failed to register input clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_input; } onecell->hws[BOSTON_CLK_INPUT] = hw; hw = clk_hw_register_fixed_rate(NULL, "sys", "input", 0, sys_freq); if (IS_ERR(hw)) { pr_err("failed to register sys clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_sys; } onecell->hws[BOSTON_CLK_SYS] = hw; hw = clk_hw_register_fixed_rate(NULL, "cpu", "input", 0, cpu_freq); if (IS_ERR(hw)) { pr_err("failed to register cpu clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_cpu; } onecell->hws[BOSTON_CLK_CPU] = hw; err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, onecell); - if (err) + if (err) { pr_err("failed to add DT provider: %d\n", err); + goto fail_clk_add; + } return; -error: +fail_clk_add: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_CPU]); +fail_cpu: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_SYS]); +fail_sys: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_INPUT]); +fail_input: kfree(onecell); } From 37ce64502d7ec7647876f213e31bb9564a248cf5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 14 Nov 2018 18:11:18 -0800 Subject: [PATCH 0152/3715] scripts/setlocalversion: Improve -dirty check with git-status --no-optional-locks [ Upstream commit ff64dd4857303dd5550faed9fd598ac90f0f2238 ] git-diff-index does not refresh the index for you, so using it for a "-dirty" check can give misleading results. Commit 6147b1cf19651 ("scripts/setlocalversion: git: Make -dirty check more robust") tried to fix this by switching to git-status, but it overlooked the fact that git-status also writes to the .git directory of the source tree, which is definitely not kosher for an out-of-tree (O=) build. That is getting reverted. Fortunately, git-status now supports avoiding writing to the index via the --no-optional-locks flag, as of git 2.14. It still calculates an up-to-date index, but it avoids writing it out to the .git directory. So, let's retry the solution from commit 6147b1cf19651 using this new flag first, and if it fails, we assume this is an older version of git and just use the old git-diff-index method. It's hairy to get the 'grep -vq' (inverted matching) correct by stashing the output of git-status (you have to be careful about the difference betwen "empty stdin" and "blank line on stdin"), so just pipe the output directly to grep and use a regex that's good enough for both the git-status and git-diff-index version. Cc: Christian Kujau Cc: Guenter Roeck Suggested-by: Alexander Kapshuk Signed-off-by: Brian Norris Tested-by: Genki Sky Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/setlocalversion | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 71f39410691b..365b3c2b8f43 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -73,8 +73,16 @@ scm_version() printf -- '-svn%s' "`git svn find-rev $head`" fi - # Check for uncommitted changes - if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then + # Check for uncommitted changes. + # First, with git-status, but --no-optional-locks is only + # supported in git >= 2.14, so fall back to git-diff-index if + # it fails. Note that git-diff-index does not refresh the + # index, so it may give misleading results. See + # git-update-index(1), git-diff-index(1), and git-status(1). + if { + git --no-optional-locks status -uno --porcelain 2>/dev/null || + git diff-index --name-only HEAD + } | grep -qvE '^(.. )?scripts/package'; then printf '%s' -dirty fi From 07c8ac768cce606b1e68b11fde18e69ea3139776 Mon Sep 17 00:00:00 2001 From: NOGUCHI Hiroshi Date: Tue, 29 Jan 2019 13:31:05 +0900 Subject: [PATCH 0153/3715] HID: Add ASUS T100CHI keyboard dock battery quirks [ Upstream commit a767ffea05d2737f6542cd78458a84a157fa216d ] Add ASUS Transbook T100CHI/T90CHI keyboard dock into battery quirk list, in order to add specific implementation in hid-asus. Signed-off-by: NOGUCHI Hiroshi Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d723185de3ba..9d24fb0715ba 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -328,6 +328,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_3), HID_BATTERY_QUIRK_IGNORE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), + HID_BATTERY_QUIRK_IGNORE }, {} }; From e9d94c641a1f9143dc9b187d3300d5f11f006860 Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Fri, 1 Feb 2019 13:52:31 +0100 Subject: [PATCH 0154/3715] usb: handle warm-reset port requests on hub resume [ Upstream commit 4fdc1790e6a9ef22399c6bc6e63b80f4609f3b7e ] On plug-in of my USB-C device, its USB_SS_PORT_LS_SS_INACTIVE link state bit is set. Greping all the kernel for this bit shows that the port status requests a warm-reset this way. This just happens, if its the only device on the root hub, the hub therefore resumes and the HCDs status_urb isn't yet available. If a warm-reset request is detected, this sets the hubs event_bits, which will prevent any auto-suspend and allows the hubs workqueue to warm-reset the port later in port_event. Signed-off-by: Jan-Marek Glogowski Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/hub.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b543a4730ef2..bb20aa433e98 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -104,6 +104,8 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); +static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, + u16 portstatus); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -1110,6 +1112,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) USB_PORT_FEAT_ENABLE); } + /* Make sure a warm-reset request is handled by port_event */ + if (type == HUB_RESUME && + hub_port_warm_reset_required(hub, port1, portstatus)) + set_bit(port1, hub->event_bits); + /* * Add debounce if USB3 link is in polling/link training state. * Link will automatically transition to Enabled state after From 9375241055edc43e488d4b0f397373176b8b95b2 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 19 Jan 2019 10:00:30 +0100 Subject: [PATCH 0155/3715] rtc: pcf8523: set xtal load capacitance from DT [ Upstream commit 189927e719e36ceefbb8037f23d3849e47833aef ] Add support for specifying the xtal load capacitance in the DT node. The pcf8523 supports xtal load capacitance of 7pF or 12.5pF. If the rtc has the wrong configuration the time will drift several hours/week. The driver use the default value 12.5pF. The DT may specify either 7000fF or 12500fF. (The DT uses femto Farad to avoid decimal numbers). Other values are warned and the driver uses the default value. Signed-off-by: Sam Ravnborg Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf8523.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 3c8c6f942e67..a06792966ea9 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -94,8 +94,9 @@ static int pcf8523_voltage_low(struct i2c_client *client) return !!(value & REG_CONTROL3_BLF); } -static int pcf8523_select_capacitance(struct i2c_client *client, bool high) +static int pcf8523_load_capacitance(struct i2c_client *client) { + u32 load; u8 value; int err; @@ -103,14 +104,24 @@ static int pcf8523_select_capacitance(struct i2c_client *client, bool high) if (err < 0) return err; - if (!high) - value &= ~REG_CONTROL1_CAP_SEL; - else + load = 12500; + of_property_read_u32(client->dev.of_node, "quartz-load-femtofarads", + &load); + + switch (load) { + default: + dev_warn(&client->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500", + load); + /* fall through */ + case 12500: value |= REG_CONTROL1_CAP_SEL; + break; + case 7000: + value &= ~REG_CONTROL1_CAP_SEL; + break; + } err = pcf8523_write(client, REG_CONTROL1, value); - if (err < 0) - return err; return err; } @@ -307,9 +318,10 @@ static int pcf8523_probe(struct i2c_client *client, if (!pcf) return -ENOMEM; - err = pcf8523_select_capacitance(client, true); + err = pcf8523_load_capacitance(client); if (err < 0) - return err; + dev_warn(&client->dev, "failed to set xtal load capacitance: %d", + err); err = pcf8523_set_pm(client, 0); if (err < 0) From aec76c5a1507761c0b983ae856ee0e23c01794c2 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Tue, 12 Feb 2019 16:29:51 +0000 Subject: [PATCH 0156/3715] mlxsw: spectrum: Set LAG port collector only when active [ Upstream commit 48ebab31d424fbdb8ede8914923bec671a933246 ] The LAG port collecting (receive) function was mistakenly set when the port was registered as a LAG member, while it should be set only when the port collection state is set to true. Set LAG port to collecting when it is set to distributing, as described in the IEEE link aggregation standard coupled control mux machine state diagram. Signed-off-by: Nir Dotan Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5c74787f903b..a909aa315a92 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4077,9 +4077,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; - err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); - if (err) - goto err_col_port_enable; mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, mlxsw_sp_port->local_port); @@ -4094,8 +4091,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4114,7 +4109,6 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); /* Any VLANs configured on the port are no longer valid */ @@ -4159,21 +4153,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } -static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool lag_tx_enabled) +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - if (lag_tx_enabled) - return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, - mlxsw_sp_port->lag_id); - else - return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, - mlxsw_sp_port->lag_id); + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; } static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, struct netdev_lag_lower_state_info *info) { - return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); } static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -4309,8 +4338,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); } else { - mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, - false); + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); } From fb000f5b9ceb8f15c059f9dba5507c842661b290 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Jan 2019 14:14:51 +0100 Subject: [PATCH 0157/3715] ALSA: hda/realtek - Apply ALC294 hp init also for S4 resume [ Upstream commit f6ef4e0e284251ff795c541db1129c84515ed044 ] The init sequence for ALC294 headphone stuff is needed not only for the boot up time but also for the resume from hibernation, where the device is switched from the boot kernel without sound driver to the suspended image. Since we record the PM event in the device power_state field, we can now recognize the call pattern and apply the sequence conditionally. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5412952557f7..8d6c5be38736 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3246,7 +3246,9 @@ static void alc294_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->done_hp_init) { + /* required only at boot or S4 resume time */ + if (!spec->done_hp_init || + codec->core.dev.power.power_state.event == PM_EVENT_RESTORE) { alc294_hp_init(codec); spec->done_hp_init = true; } From 10af43d90c5625fba9e2008564767e4910ba1473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20A=2E=20M=2E=20Magalh=C3=A3es?= Date: Thu, 7 Feb 2019 18:59:41 -0500 Subject: [PATCH 0158/3715] media: vimc: Remove unused but set variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5515e414f42bf2769caae15b634004d456658284 ] Remove unused but set variables to clean up the code and avoid warning. Signed-off-by: Lucas A. M. Magalhães Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/vimc/vimc-sensor.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/media/platform/vimc/vimc-sensor.c b/drivers/media/platform/vimc/vimc-sensor.c index 70cee5c0c89a..29a16f8a4123 100644 --- a/drivers/media/platform/vimc/vimc-sensor.c +++ b/drivers/media/platform/vimc/vimc-sensor.c @@ -200,13 +200,6 @@ static void *vimc_sen_process_frame(struct vimc_ent_device *ved, { struct vimc_sen_device *vsen = container_of(ved, struct vimc_sen_device, ved); - const struct vimc_pix_map *vpix; - unsigned int frame_size; - - /* Calculate the frame size */ - vpix = vimc_pix_map_by_code(vsen->mbus_format.code); - frame_size = vsen->mbus_format.width * vpix->bpp * - vsen->mbus_format.height; tpg_fill_plane_buffer(&vsen->tpg, 0, 0, vsen->frame); return vsen->frame; From 5c28d84db418bb35bdaf8741032fb283d51ca4f0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Feb 2019 16:36:48 -0800 Subject: [PATCH 0159/3715] exec: load_script: Do not exec truncated interpreter path [ Upstream commit b5372fe5dc84235dbe04998efdede3c4daa866a9 ] Commit 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string") was trying to protect against a confused exec of a truncated interpreter path. However, it was overeager and also refused to truncate arguments as well, which broke userspace, and it was reverted. This attempts the protection again, but allows arguments to remain truncated. In an effort to improve readability, helper functions and comments have been added. Co-developed-by: Linus Torvalds Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Oleg Nesterov Cc: Samuel Dionne-Riel Cc: Richard Weinberger Cc: Graham Christensen Cc: Michal Hocko Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/binfmt_script.c | 57 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..e996174cbfc0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -14,13 +14,30 @@ #include #include +static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } +static inline char *next_non_spacetab(char *first, const char *last) +{ + for (; first <= last; first++) + if (!spacetab(*first)) + return first; + return NULL; +} +static inline char *next_terminator(char *first, const char *last) +{ + for (; first <= last; first++) + if (spacetab(*first) || !*first) + return first; + return NULL; +} + static int load_script(struct linux_binprm *bprm) { const char *i_arg, *i_name; - char *cp; + char *cp, *buf_end; struct file *file; int retval; + /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; @@ -33,18 +50,40 @@ static int load_script(struct linux_binprm *bprm) if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; - /* - * This section does the #! interpretation. - * Sorta complicated, but hopefully it will work. -TYT - */ - + /* Release since we are not mapping a binary into memory. */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + /* + * This section handles parsing the #! line into separate + * interpreter path and argument strings. We must be careful + * because bprm->buf is not yet guaranteed to be NUL-terminated + * (though the buffer will have trailing NUL padding when the + * file size was smaller than the buffer size). + * + * We do not want to exec a truncated interpreter path, so either + * we find a newline (which indicates nothing is truncated), or + * we find a space/tab/NUL after the interpreter path (which + * itself may be preceded by spaces/tabs). Truncating the + * arguments is fine: the interpreter can re-read the script to + * parse them on its own. + */ + buf_end = bprm->buf + sizeof(bprm->buf) - 1; + cp = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); + if (!cp) { + cp = next_non_spacetab(bprm->buf + 2, buf_end); + if (!cp) + return -ENOEXEC; /* Entire buf is spaces/tabs */ + /* + * If there is no later space/tab/NUL we must assume the + * interpreter path is truncated. + */ + if (!next_terminator(cp, buf_end)) + return -ENOEXEC; + cp = buf_end; + } + /* NUL-terminate the buffer and any trailing spaces/tabs. */ *cp = '\0'; while (cp > bprm->buf) { cp--; From dbd7e918517c59e29e3c12f4a37a01d5fe314ff4 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Fri, 1 Mar 2019 11:54:19 -0500 Subject: [PATCH 0160/3715] PCI/PME: Fix possible use-after-free on remove [ Upstream commit 7cf58b79b3072029af127ae865ffc6f00f34b1f8 ] In remove(), ensure that the PME work cannot run after kfree() is called. Otherwise, this could result in a use-after-free. This issue was detected with the help of Coccinelle. Signed-off-by: Sven Van Asbroeck Signed-off-by: Bjorn Helgaas Cc: Sinan Kaya Cc: Frederick Lawler Cc: Mika Westerberg Cc: Keith Busch Cc: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/pci/pcie/pme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index c2e6e3d1073f..5500660bbb10 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -441,6 +441,7 @@ static void pcie_pme_remove(struct pcie_device *srv) pcie_pme_disable_interrupt(srv->port, data); free_irq(srv->irq, srv); + cancel_work_sync(&data->work); kfree(data); } From 369c69642e1114e242b97a44228dc325b9fb90c8 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Fri, 15 Feb 2019 16:43:03 -0500 Subject: [PATCH 0161/3715] power: supply: max14656: fix potential use-after-free [ Upstream commit 252fbeb86ceffa549af9842cefca2412d53a7653 ] Explicitly cancel/sync the irq_work delayed work, otherwise there's a chance that it will run after the device is removed, which would result in a use-after-free. Note that cancel/sync should happen: - after irq's have been disabled, as the isr re-schedules the work - before the power supply is unregistered, because the work func uses the power supply handle. Cc: Alexander Kurz Signed-off-by: Sven Van Asbroeck Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- .../power/supply/max14656_charger_detector.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/max14656_charger_detector.c b/drivers/power/supply/max14656_charger_detector.c index d19307f791c6..9e6472834e37 100644 --- a/drivers/power/supply/max14656_charger_detector.c +++ b/drivers/power/supply/max14656_charger_detector.c @@ -240,6 +240,14 @@ static enum power_supply_property max14656_battery_props[] = { POWER_SUPPLY_PROP_MANUFACTURER, }; +static void stop_irq_work(void *data) +{ + struct max14656_chip *chip = data; + + cancel_delayed_work_sync(&chip->irq_work); +} + + static int max14656_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -278,8 +286,6 @@ static int max14656_probe(struct i2c_client *client, if (ret) return -ENODEV; - INIT_DELAYED_WORK(&chip->irq_work, max14656_irq_worker); - chip->detect_psy = devm_power_supply_register(dev, &chip->psy_desc, &psy_cfg); if (IS_ERR(chip->detect_psy)) { @@ -287,6 +293,13 @@ static int max14656_probe(struct i2c_client *client, return -EINVAL; } + INIT_DELAYED_WORK(&chip->irq_work, max14656_irq_worker); + ret = devm_add_action(dev, stop_irq_work, chip); + if (ret) { + dev_err(dev, "devm_add_action %d failed\n", ret); + return ret; + } + ret = devm_request_irq(dev, chip->irq, max14656_irq, IRQF_TRIGGER_FALLING, MAX14656_NAME, chip); From 253935ae19c622bfb245c5cbfa4dcaadf5e211e7 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sun, 1 Sep 2019 12:54:10 +0200 Subject: [PATCH 0162/3715] iio: adc: meson_saradc: Fix memory allocation order [ Upstream commit de10ac47597e7a3596b27631d0d5ce5f48d2c099 ] meson_saradc's irq handler uses priv->regmap so make sure that it is allocated before the irq get enabled. This also fixes crash when CONFIG_DEBUG_SHIRQ is enabled, as device managed resources are freed in the inverted order they had been allocated, priv->regmap was freed before the spurious fake irq that CONFIG_DEBUG_SHIRQ adds called the handler. Fixes: 3af109131b7eb8 ("iio: adc: meson-saradc: switch from polling to interrupt mode") Reported-by: Elie Roudninski Signed-off-by: Remi Pommarel Reviewed-by: Martin Blumenstingl Tested-by: Elie ROUDNINSKI Reviewed-by: Kevin Hilman Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/meson_saradc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 2515badf8b28..9b2121f24926 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -976,6 +976,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); + priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, + priv->data->regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (!irq) return -EINVAL; @@ -985,11 +990,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (ret) return ret; - priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, - priv->data->regmap_config); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); - priv->clkin = devm_clk_get(&pdev->dev, "clkin"); if (IS_ERR(priv->clkin)) { dev_err(&pdev->dev, "failed to get clkin\n"); From 6e026ddea96c418cda11fadbd755ee389f20b324 Mon Sep 17 00:00:00 2001 From: Pascal Bouwmann Date: Thu, 29 Aug 2019 07:29:41 +0200 Subject: [PATCH 0163/3715] iio: fix center temperature of bmc150-accel-core [ Upstream commit 6c59a962e081df6d8fe43325bbfabec57e0d4751 ] The center temperature of the supported devices stored in the constant BMC150_ACCEL_TEMP_CENTER_VAL is not 24 degrees but 23 degrees. It seems that some datasheets were inconsistent on this value leading to the error. For most usecases will only make minor difference so not queued for stable. Signed-off-by: Pascal Bouwmann Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/bmc150-accel-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 807299dd45eb..7e86a5b7ec4e 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -125,7 +125,7 @@ #define BMC150_ACCEL_SLEEP_1_SEC 0x0F #define BMC150_ACCEL_REG_TEMP 0x08 -#define BMC150_ACCEL_TEMP_CENTER_VAL 24 +#define BMC150_ACCEL_TEMP_CENTER_VAL 23 #define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2)) #define BMC150_AUTO_SUSPEND_DELAY_MS 2000 From b2e6d509cf8f21a15df8dd1774772403198dff7c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Sep 2019 12:59:23 -0700 Subject: [PATCH 0164/3715] libsubcmd: Make _FORTIFY_SOURCE defines dependent on the feature [ Upstream commit 4b0b2b096da9d296e0e5668cdfba8613bd6f5bc8 ] Unconditionally defining _FORTIFY_SOURCE can break tools that don't work with it, such as memory sanitizers: https://github.com/google/sanitizers/wiki/AddressSanitizer#faq Fixes: 4b6ab94eabe4 ("perf subcmd: Create subcmd library") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20190925195924.152834-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/subcmd/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index ed61fb3a46c0..5b2cd5e58df0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -20,7 +20,13 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC + +ifeq ($(DEBUG),0) + ifeq ($(feature-fortify-source), 1) + CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 + endif +endif ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 From ffc50de292802381bbd8fb79e7b58a677a56d7e3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Sep 2019 12:59:24 -0700 Subject: [PATCH 0165/3715] perf tests: Avoid raising SEGV using an obvious NULL dereference [ Upstream commit e3e2cf3d5b1fe800b032e14c0fdcd9a6fb20cf3b ] An optimized build such as: make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-O3 will turn the dereference operation into a ud2 instruction, raising a SIGILL rather than a SIGSEGV. Use raise(..) for correctness and clarity. Similar issues were addressed in Numfor Mbiziwo-Tiapo's patch: https://lkml.org/lkml/2019/7/8/1234 Committer testing: Before: [root@quaco ~]# perf test hooks 55: perf hooks : Ok [root@quaco ~]# perf test -v hooks 55: perf hooks : --- start --- test child forked, pid 17092 SIGSEGV is observed as expected, try to recover. Fatal error (SEGFAULT) in perf hook 'test' test child finished with 0 ---- end ---- perf hooks: Ok [root@quaco ~]# After: [root@quaco ~]# perf test hooks 55: perf hooks : Ok [root@quaco ~]# perf test -v hooks 55: perf hooks : --- start --- test child forked, pid 17909 SIGSEGV is observed as expected, try to recover. Fatal error (SEGFAULT) in perf hook 'test' test child finished with 0 ---- end ---- perf hooks: Ok [root@quaco ~]# Fixes: a074865e60ed ("perf tools: Introduce perf hooks") Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lore.kernel.org/lkml/20190925195924.152834-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/perf-hooks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index a693bcf017ea..44c16fd11bf6 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -20,12 +20,11 @@ static void sigsegv_handler(int sig __maybe_unused) static void the_hook(void *_hook_flags) { int *hook_flags = _hook_flags; - int *p = NULL; *hook_flags = 1234; /* Generate a segfault, test perf_hooks__recover */ - *p = 0; + raise(SIGSEGV); } int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) From bbb089eb22cb32fad57869a81b106c4df39ff85b Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Sat, 28 Sep 2019 01:39:00 +0000 Subject: [PATCH 0166/3715] perf map: Fix overlapped map handling [ Upstream commit ee212d6ea20887c0ef352be8563ca13dbf965906 ] Whenever an mmap/mmap2 event occurs, the map tree must be updated to add a new entry. If a new map overlaps a previous map, the overlapped section of the previous map is effectively unmapped, but the non-overlapping sections are still valid. maps__fixup_overlappings() is responsible for creating any new map entries from the previously overlapped map. It optionally creates a before and an after map. When creating the after map the existing code failed to adjust the map.pgoff. This meant the new after map would incorrectly calculate the file offset for the ip. This results in incorrect symbol name resolution for any ip in the after region. Make maps__fixup_overlappings() correctly populate map.pgoff. Add an assert that new mapping matches old mapping at the beginning of the after map. Committer-testing: Validated correct parsing of libcoreclr.so symbols from .NET Core 3.0 preview9 (which didn't strip symbols). Preparation: ~/dotnet3.0-preview9/dotnet new webapi -o perfSymbol cd perfSymbol ~/dotnet3.0-preview9/dotnet publish perf record ~/dotnet3.0-preview9/dotnet \ bin/Debug/netcoreapp3.0/publish/perfSymbol.dll ^C Before: perf script --show-mmap-events 2>&1 | grep -e MMAP -e unknown |\ grep libcoreclr.so | head -n 4 dotnet 1907 373352.698780: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615726000(0x768000) @ 0 08:02 5510620 765057155]: \ r-xp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701091: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615974000(0x1000) @ 0x24e000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701241: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615c42000(0x1000) @ 0x51c000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.705249: 250000 cpu-clock: \ 7fe6159a1f99 [unknown] \ (.../3.0.0-preview9-19423-09/libcoreclr.so) After: perf script --show-mmap-events 2>&1 | grep -e MMAP -e unknown |\ grep libcoreclr.so | head -n 4 dotnet 1907 373352.698780: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615726000(0x768000) @ 0 08:02 5510620 765057155]: \ r-xp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701091: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615974000(0x1000) @ 0x24e000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so dotnet 1907 373352.701241: PERF_RECORD_MMAP2 1907/1907: \ [0x7fe615c42000(0x1000) @ 0x51c000 08:02 5510620 765057155]: \ rwxp .../3.0.0-preview9-19423-09/libcoreclr.so All the [unknown] symbols were resolved. Signed-off-by: Steve MacLean Tested-by: Brian Robbins Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Davidlohr Bueso Cc: Eric Saint-Etienne Cc: John Keeping Cc: John Salem Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Tom McDonald Link: http://lore.kernel.org/lkml/BN8PR21MB136270949F22A6A02335C238F7800@BN8PR21MB1362.namprd21.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4e7bd2750122..63db9872c880 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "symbol.h" +#include #include #include #include @@ -737,6 +738,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } after->start = map->end; + after->pgoff += map->end - pos->start; + assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end)); __map_groups__insert(pos->groups, after); if (verbose >= 2 && !use_browser) map__fprintf(after, fp); From 1862e1188c9dfcc87f4c4e36ef1947d8f83e9eff Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Sep 2019 16:35:45 -0700 Subject: [PATCH 0167/3715] perf jevents: Fix period for Intel fixed counters [ Upstream commit 6bdfd9f118bd59cf0f85d3bf4b72b586adea17c1 ] The Intel fixed counters use a special table to override the JSON information. During this override the period information from the JSON file got dropped, which results in inst_retired.any and similar running with frequency mode instead of a period. Just specify the expected period in the table. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190927233546.11533-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/pmu-events/jevents.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 94a7cabe9b82..6f9f247b4516 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -342,12 +342,12 @@ static struct fixed { const char *name; const char *event; } fixed[] = { - { "inst_retired.any", "event=0xc0" }, - { "inst_retired.any_p", "event=0xc0" }, - { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, - { "cpu_clk_unhalted.thread", "event=0x3c" }, - { "cpu_clk_unhalted.core", "event=0x3c" }, - { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { "inst_retired.any", "event=0xc0,period=2000003" }, + { "inst_retired.any_p", "event=0xc0,period=2000003" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" }, + { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" }, { NULL, NULL}, }; From 8c2a577c879c43085c65d7130b581bb53376b3ea Mon Sep 17 00:00:00 2001 From: Connor Kuehl Date: Fri, 27 Sep 2019 14:44:15 -0700 Subject: [PATCH 0168/3715] staging: rtl8188eu: fix null dereference when kzalloc fails [ Upstream commit 955c1532a34305f2f780b47f0c40cc7c65500810 ] If kzalloc() returns NULL, the error path doesn't stop the flow of control from entering rtw_hal_read_chip_version() which dereferences the null pointer. Fix this by adding a 'goto' to the error path to more gracefully handle the issue and avoid proceeding with initialization steps that we're no longer prepared to handle. Also update the debug message to be more consistent with the other debug messages in this function. Addresses-Coverity: ("Dereference after null check") Signed-off-by: Connor Kuehl Link: https://lore.kernel.org/r/20190927214415.899-1-connor.kuehl@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 2fc7056cbff7..77c339a93525 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -357,8 +357,10 @@ static struct adapter *rtw_usb_if1_init(struct dvobj_priv *dvobj, } padapter->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL); - if (!padapter->HalData) - DBG_88E("cant not alloc memory for HAL DATA\n"); + if (!padapter->HalData) { + DBG_88E("Failed to allocate memory for HAL data\n"); + goto free_adapter; + } /* step read_chip_version */ rtw_hal_read_chip_version(padapter); From 8f7b8f1d14d714181b35e69f4cb73f02ea8f0156 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 25 Sep 2019 09:45:42 -0500 Subject: [PATCH 0169/3715] RDMA/hfi1: Prevent memory leak in sdma_init [ Upstream commit 34b3be18a04ecdc610aae4c48e5d1b799d8689f6 ] In sdma_init if rhashtable_init fails the allocated memory for tmp_sdma_rht should be released. Fixes: 5a52a7acf7e2 ("IB/hfi1: NULL pointer dereference when freeing rhashtable") Link: https://lore.kernel.org/r/20190925144543.10141-1-navid.emamdoost@gmail.com Signed-off-by: Navid Emamdoost Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/sdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..741938409f8e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1529,8 +1529,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); From a7913a4126e513f7c3e312dfdddc86ab53c05b51 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:54 -0700 Subject: [PATCH 0170/3715] RDMA/iwcm: Fix a lock inversion issue [ Upstream commit b66f31efbdad95ec274345721d99d1d835e6de01 ] This patch fixes the lock inversion complaint: ============================================ WARNING: possible recursive locking detected 5.3.0-rc7-dbg+ #1 Not tainted -------------------------------------------- kworker/u16:6/171 is trying to acquire lock: 00000000035c6e6c (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x78/0x4a0 [rdma_cm] but task is already holding lock: 00000000bc7c307d (&id_priv->handler_mutex){+.+.}, at: iw_conn_req_handler+0x151/0x680 [rdma_cm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&id_priv->handler_mutex); lock(&id_priv->handler_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/u16:6/171: #0: 00000000e2eaa773 ((wq_completion)iw_cm_wq){+.+.}, at: process_one_work+0x472/0xac0 #1: 000000001efd357b ((work_completion)(&work->work)#3){+.+.}, at: process_one_work+0x476/0xac0 #2: 00000000bc7c307d (&id_priv->handler_mutex){+.+.}, at: iw_conn_req_handler+0x151/0x680 [rdma_cm] stack backtrace: CPU: 3 PID: 171 Comm: kworker/u16:6 Not tainted 5.3.0-rc7-dbg+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: iw_cm_wq cm_work_handler [iw_cm] Call Trace: dump_stack+0x8a/0xd6 __lock_acquire.cold+0xe1/0x24d lock_acquire+0x106/0x240 __mutex_lock+0x12e/0xcb0 mutex_lock_nested+0x1f/0x30 rdma_destroy_id+0x78/0x4a0 [rdma_cm] iw_conn_req_handler+0x5c9/0x680 [rdma_cm] cm_work_handler+0xe62/0x1100 [iw_cm] process_one_work+0x56d/0xac0 worker_thread+0x7a/0x5d0 kthread+0x1bc/0x210 ret_from_fork+0x24/0x30 This is not a bug as there are actually two lock classes here. Link: https://lore.kernel.org/r/20190930231707.48259-3-bvanassche@acm.org Fixes: de910bd92137 ("RDMA/cma: Simplify locking needed for serialization of callbacks") Signed-off-by: Bart Van Assche Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7c5eca312aa8..f698c6a28c14 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2212,9 +2212,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); From 8cb0ef81ba927d776dabe7fd3f649c580d2cdeba Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 20 Aug 2019 02:56:34 +0000 Subject: [PATCH 0171/3715] HID: hyperv: Use in-place iterator API in the channel callback [ Upstream commit 6a297c90efa68b2864483193b8bfb0d19478600c ] Simplify the ring buffer handling with the in-place API. Also avoid the dynamic allocation and the memory leak in the channel callback function. Signed-off-by: Dexuan Cui Acked-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 56 +++++++--------------------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 5f1de24206ab..220b3e5c9c39 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -322,60 +322,24 @@ static void mousevsc_on_receive(struct hv_device *device, static void mousevsc_on_channel_callback(void *context) { - const int packet_size = 0x100; - int ret; struct hv_device *device = context; - u32 bytes_recvd; - u64 req_id; struct vmpacket_descriptor *desc; - unsigned char *buffer; - int bufferlen = packet_size; - - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - do { - ret = vmbus_recvpacket_raw(device->channel, buffer, - bufferlen, &bytes_recvd, &req_id); - - switch (ret) { - case 0: - if (bytes_recvd <= 0) { - kfree(buffer); - return; - } - desc = (struct vmpacket_descriptor *)buffer; - - switch (desc->type) { - case VM_PKT_COMP: - break; - - case VM_PKT_DATA_INBAND: - mousevsc_on_receive(device, desc); - break; - - default: - pr_err("unhandled packet type %d, tid %llx len %d\n", - desc->type, req_id, bytes_recvd); - break; - } + foreach_vmbus_pkt(desc, device->channel) { + switch (desc->type) { + case VM_PKT_COMP: break; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - - if (!buffer) - return; + case VM_PKT_DATA_INBAND: + mousevsc_on_receive(device, desc); + break; + default: + pr_err("Unhandled packet type %d, tid %llx len %d\n", + desc->type, desc->trans_id, desc->len8 * 8); break; } - } while (1); - + } } static int mousevsc_connect_to_vsp(struct hv_device *device) From fba217b35e518fa3b0f322bd97280326f93dab45 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Thu, 26 Sep 2019 14:29:38 +0800 Subject: [PATCH 0172/3715] nfs: Fix nfsi->nrequests count error on nfs_inode_remove_request [ Upstream commit 33ea5aaa87cdae0f9af4d6b7ee4f650a1a36fd1d ] When xfstests testing, there are some WARNING as below: WARNING: CPU: 0 PID: 6235 at fs/nfs/inode.c:122 nfs_clear_inode+0x9c/0xd8 Modules linked in: CPU: 0 PID: 6235 Comm: umount.nfs Hardware name: linux,dummy-virt (DT) pstate: 60000005 (nZCv daif -PAN -UAO) pc : nfs_clear_inode+0x9c/0xd8 lr : nfs_evict_inode+0x60/0x78 sp : fffffc000f68fc00 x29: fffffc000f68fc00 x28: fffffe00c53155c0 x27: fffffe00c5315000 x26: fffffc0009a63748 x25: fffffc000f68fd18 x24: fffffc000bfaaf40 x23: fffffc000936d3c0 x22: fffffe00c4ff5e20 x21: fffffc000bfaaf40 x20: fffffe00c4ff5d10 x19: fffffc000c056000 x18: 000000000000003c x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000040 x14: 0000000000000228 x13: fffffc000c3a2000 x12: 0000000000000045 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : fffffc00084b027c x5 : fffffc0009a64000 x4 : fffffe00c0e77400 x3 : fffffc000c0563a8 x2 : fffffffffffffffb x1 : 000000000000764e x0 : 0000000000000001 Call trace: nfs_clear_inode+0x9c/0xd8 nfs_evict_inode+0x60/0x78 evict+0x108/0x380 dispose_list+0x70/0xa0 evict_inodes+0x194/0x210 generic_shutdown_super+0xb0/0x220 nfs_kill_super+0x40/0x88 deactivate_locked_super+0xb4/0x120 deactivate_super+0x144/0x160 cleanup_mnt+0x98/0x148 __cleanup_mnt+0x38/0x50 task_work_run+0x114/0x160 do_notify_resume+0x2f8/0x308 work_pending+0x8/0x14 The nrequest should be increased/decreased only if PG_INODE_REF flag was setted. But in the nfs_inode_remove_request function, it maybe decrease when no PG_INODE_REF flag, this maybe lead nrequests count error. Reported-by: Hulk Robot Signed-off-by: ZhangXiaoxu Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3c1e46f4bce3..01b9d9341b54 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -783,7 +783,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; - atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -796,8 +795,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_unlock(&mapping->private_lock); } - if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); + atomic_long_dec(&nfsi->nrequests); + } } static void From 1252a3d40569c214db579f253f4be71eb91b31e8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 2 Oct 2019 10:49:35 +0100 Subject: [PATCH 0173/3715] arm64: ftrace: Ensure synchronisation in PLT setup for Neoverse-N1 #1542419 [ Upstream commit dd8a1f13488438c6c220b7cafa500baaf21a6e53 ] CPUs affected by Neoverse-N1 #1542419 may execute a stale instruction if it was recently modified. The affected sequence requires freshly written instructions to be executable before a branch to them is updated. There are very few places in the kernel that modify executable text, all but one come with sufficient synchronisation: * The module loader's flush_module_icache() calls flush_icache_range(), which does a kick_all_cpus_sync() * bpf_int_jit_compile() calls flush_icache_range(). * Kprobes calls aarch64_insn_patch_text(), which does its work in stop_machine(). * static keys and ftrace both patch between nops and branches to existing kernel code (not generated code). The affected sequence is the interaction between ftrace and modules. The module PLT is cleaned using __flush_icache_range() as the trampoline shouldn't be executable until we update the branch to it. Drop the double-underscore so that this path runs kick_all_cpus_sync() too. Signed-off-by: James Morse Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/ftrace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index fac79d75d1d9..6eefd5873aef 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -119,7 +119,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. + * fetch before we patch in the branch. Although the + * architecture doesn't require an IPI in this case, + * Neoverse-N1 erratum #1542419 does require one + * if the TLB maintenance in module_enable_ro() is + * skipped due to rodata_enabled. It doesn't seem worth + * it to make it conditional given that this is + * certainly not a fast-path. */ flush_icache_range((unsigned long)&dst[0], (unsigned long)&dst[1]); From ef371fdb91306d3716df18ba912c26bd7313e232 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 10 Sep 2019 06:11:29 +0200 Subject: [PATCH 0174/3715] tty: serial: owl: Fix the link time qualifier of 'owl_uart_exit()' [ Upstream commit 6264dab6efd6069f0387efb078a9960b5642377b ] 'exit' functions should be marked as __exit, not __init. Fixes: fc60a8b675bd ("tty: serial: owl: Implement console driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20190910041129.6978-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/owl-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index b9c859365334..d774f6013d7b 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -754,7 +754,7 @@ static int __init owl_uart_init(void) return ret; } -static void __init owl_uart_exit(void) +static void __exit owl_uart_exit(void) { platform_driver_unregister(&owl_uart_platform_driver); uart_unregister_driver(&owl_uart_driver); From ef445dfa588c8c504386fd43b93af26dcec63d99 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Sep 2019 19:15:12 -0700 Subject: [PATCH 0175/3715] tty: n_hdlc: fix build on SPARC [ Upstream commit 47a7e5e97d4edd7b14974d34f0e5a5560fad2915 ] Fix tty driver build on SPARC by not using __exitdata. It appears that SPARC does not support section .exit.data. Fixes these build errors: `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o `.exit.data' referenced in section `.exit.text' of drivers/tty/n_hdlc.o: defined in discarded section `.exit.data' of drivers/tty/n_hdlc.o Reported-by: kbuild test robot Fixes: 063246641d4a ("format-security: move static strings to const") Signed-off-by: Randy Dunlap Cc: Kees Cook Cc: Greg Kroah-Hartman Cc: "David S. Miller" Cc: Andrew Morton Link: https://lore.kernel.org/r/675e7bd9-955b-3ff3-1101-a973b58b5b75@infradead.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_hdlc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 08bd6b965847..e83dea8d6633 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -969,6 +969,11 @@ static int __init n_hdlc_init(void) } /* end of init_module() */ +#ifdef CONFIG_SPARC +#undef __exitdata +#define __exitdata +#endif + static const char hdlc_unregister_ok[] __exitdata = KERN_INFO "N_HDLC: line discipline unregistered\n"; static const char hdlc_unregister_fail[] __exitdata = From 784aafad04942ea6b3101e385218ec287c68a24e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 2 Oct 2019 14:28:23 +0200 Subject: [PATCH 0176/3715] gpio: max77620: Use correct unit for debounce times [ Upstream commit fffa6af94894126994a7600c6f6f09b892e89fa9 ] The gpiod_set_debounce() function takes the debounce time in microseconds. Adjust the switch/case values in the MAX77620 GPIO to use the correct unit. Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20191002122825.3948322-1-thierry.reding@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-max77620.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index 538bce4b5b42..ac6c1c0548b6 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -163,13 +163,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio, case 0: val = MAX77620_CNFG_GPIO_DBNC_None; break; - case 1 ... 8: + case 1000 ... 8000: val = MAX77620_CNFG_GPIO_DBNC_8ms; break; - case 9 ... 16: + case 9000 ... 16000: val = MAX77620_CNFG_GPIO_DBNC_16ms; break; - case 17 ... 32: + case 17000 ... 32000: val = MAX77620_CNFG_GPIO_DBNC_32ms; break; default: From 9e7a7eaa9c6c1d1308a685b1133927214878dc7b Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Tue, 1 Oct 2019 16:34:13 +0900 Subject: [PATCH 0177/3715] fs: cifs: mute -Wunused-const-variable message [ Upstream commit dd19c106a36690b47bb1acc68372f2b472b495b8 ] After 'Initial git repository build' commit, 'mapping_table_ERRHRD' variable has not been used. So 'mapping_table_ERRHRD' const variable could be removed to mute below warning message: fs/cifs/netmisc.c:120:40: warning: unused variable 'mapping_table_ERRHRD' [-Wunused-const-variable] static const struct smb_to_posix_error mapping_table_ERRHRD[] = { ^ Signed-off-by: Austin Kim Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/netmisc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index cc88f4f0325e..bed973330227 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -130,10 +130,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {0, 0} }; -static const struct smb_to_posix_error mapping_table_ERRHRD[] = { - {0, 0} -}; - /* * Convert a string containing text IPv4 or IPv6 address to binary form. * From 62f353a1bf55da65d92b14492ca9bc6fd84d8dce Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 6 Oct 2019 11:33:11 -0500 Subject: [PATCH 0178/3715] serial: mctrl_gpio: Check for NULL pointer [ Upstream commit 37e3ab00e4734acc15d96b2926aab55c894f4d9c ] When using mctrl_gpio_to_gpiod, it dereferences gpios into a single requested GPIO. This dereferencing can break if gpios is NULL, so this patch adds a NULL check before dereferencing it. If gpios is NULL, this function will also return NULL. Signed-off-by: Adam Ford Reviewed-by: Yegor Yefremov Link: https://lore.kernel.org/r/20191006163314.23191-1-aford173@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/serial_mctrl_gpio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c index 42e42e3e7a6e..388f71046849 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -69,6 +69,9 @@ EXPORT_SYMBOL_GPL(mctrl_gpio_set); struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios, enum mctrl_gpio_idx gidx) { + if (gpios == NULL) + return NULL; + return gpios->gpio[gidx]; } EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod); From 72a4cc09f0ec71753d8ba4add1d92dd1955eaed0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 2 Oct 2019 18:58:58 +0200 Subject: [PATCH 0179/3715] efi/cper: Fix endianness of PCIe class code [ Upstream commit 6fb9367a15d1a126d222d738b2702c7958594a5f ] The CPER parser assumes that the class code is big endian, but at least on this edk2-derived Intel Purley platform it's little endian: efi: EFI v2.50 by EDK II BIOS ID:PLYDCRB1.86B.0119.R05.1701181843 DMI: Intel Corporation PURLEY/PURLEY, BIOS PLYDCRB1.86B.0119.R05.1701181843 01/18/2017 {1}[Hardware Error]: device_id: 0000:5d:00.0 {1}[Hardware Error]: slot: 0 {1}[Hardware Error]: secondary_bus: 0x5e {1}[Hardware Error]: vendor_id: 0x8086, device_id: 0x2030 {1}[Hardware Error]: class_code: 000406 ^^^^^^ (should be 060400) Signed-off-by: Lukas Wunner Signed-off-by: Ard Biesheuvel Cc: Ben Dooks Cc: Dave Young Cc: Jarkko Sakkinen Cc: Jerry Snitselaar Cc: Linus Torvalds Cc: Lyude Paul Cc: Matthew Garrett Cc: Octavian Purdila Cc: Peter Jones Cc: Peter Zijlstra Cc: Scott Talbert Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux-integrity@vger.kernel.org Link: https://lkml.kernel.org/r/20191002165904.8819-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/cper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index db404aab82b2..209dc5aefc31 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -498,7 +498,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, pcie->device_id.vendor_id, pcie->device_id.device_id); p = pcie->device_id.class_code; - printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); + printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]); } if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, From a59335e1f8a77ba42c4c3620996dd120fd2ab801 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 2 Oct 2019 18:59:04 +0200 Subject: [PATCH 0180/3715] efi/x86: Do not clean dummy variable in kexec path [ Upstream commit 2ecb7402cfc7f22764e7bbc80790e66eadb20560 ] kexec reboot fails randomly in UEFI based KVM guest. The firmware just resets while calling efi_delete_dummy_variable(); Unfortunately I don't know how to debug the firmware, it is also possible a potential problem on real hardware as well although nobody reproduced it. The intention of the efi_delete_dummy_variable is to trigger garbage collection when entering virtual mode. But SetVirtualAddressMap can only run once for each physical reboot, thus kexec_enter_virtual_mode() is not necessarily a good place to clean a dummy object. Drop the efi_delete_dummy_variable so that kexec reboot can work. Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Acked-by: Matthew Garrett Cc: Ben Dooks Cc: Jarkko Sakkinen Cc: Jerry Snitselaar Cc: Linus Torvalds Cc: Lukas Wunner Cc: Lyude Paul Cc: Octavian Purdila Cc: Peter Jones Cc: Peter Zijlstra Cc: Scott Talbert Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux-integrity@vger.kernel.org Link: https://lkml.kernel.org/r/20191002165904.8819-8-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..335a62e74a2e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -893,9 +893,6 @@ static void __init kexec_enter_virtual_mode(void) if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); - - /* clean DUMMY object */ - efi_delete_dummy_variable(); #endif } From 11d172c77de4b7a771c65af2e8e2ef4eaf9a1304 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sun, 6 Oct 2019 15:12:32 +0200 Subject: [PATCH 0181/3715] MIPS: include: Mark __cmpxchg as __always_inline [ Upstream commit 88356d09904bc606182c625575237269aeece22e ] Commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") allows compiler to uninline functions marked as 'inline'. In cace of cmpxchg this would cause to reference function __cmpxchg_called_with_bad_pointer, which is a error case for catching bugs and will not happen for correct code, if __cmpxchg is inlined. Signed-off-by: Thomas Bogendoerfer [paul.burton@mips.com: s/__cmpxchd/__cmpxchg in subject] Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/include/asm/cmpxchg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 89e9fb7976fe..895f91b9e89c 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -146,8 +146,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline +unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) { switch (size) { case 1: From 588c0f3282207a7b25951de0e96e5485b3bfa62d Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 30 Sep 2019 16:44:41 -0400 Subject: [PATCH 0182/3715] x86/xen: Return from panic notifier [ Upstream commit c6875f3aacf2a5a913205accddabf0bfb75cac76 ] Currently execution of panic() continues until Xen's panic notifier (xen_panic_event()) is called at which point we make a hypercall that never returns. This means that any notifier that is supposed to be called later as well as significant part of panic() code (such as pstore writes from kmsg_dump()) is never executed. There is no reason for xen_panic_event() to be this last point in execution since panic()'s emergency_restart() will call into xen_emergency_restart() from where we can perform our hypercall. Nevertheless, we will provide xen_legacy_crash boot option that will preserve original behavior during crash. This option could be used, for example, if running kernel dumper (which happens after panic notifiers) is undesirable. Reported-by: James Dingwall Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Signed-off-by: Sasha Levin --- .../admin-guide/kernel-parameters.txt | 4 +++ arch/x86/xen/enlighten.c | 28 +++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b67a6cd08ca1..671f518b09ee 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4875,6 +4875,10 @@ the unplug protocol never -- do not unplug even if version check succeeds + xen_legacy_crash [X86,XEN] + Crash from Xen panic notifier, without executing late + panic() code such as dumping handler. + xen_nopvspin [X86,XEN] Disables the ticketlock slowpath using Xen PV optimizations. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 515d5e4414c2..00fc683a2011 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -259,19 +259,41 @@ void xen_reboot(int reason) BUG(); } +static int reboot_reason = SHUTDOWN_reboot; +static bool xen_legacy_crash; void xen_emergency_restart(void) { - xen_reboot(SHUTDOWN_reboot); + xen_reboot(reboot_reason); } static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) { + if (xen_legacy_crash) + xen_reboot(SHUTDOWN_crash); + + reboot_reason = SHUTDOWN_crash; + + /* + * If panic_timeout==0 then we are supposed to wait forever. + * However, to preserve original dom0 behavior we have to drop + * into hypervisor. (domU behavior is controlled by its + * config file) + */ + if (panic_timeout == 0) + panic_timeout = -1; + } return NOTIFY_DONE; } +static int __init parse_xen_legacy_crash(char *arg) +{ + xen_legacy_crash = true; + return 0; +} +early_param("xen_legacy_crash", parse_xen_legacy_crash); + static struct notifier_block xen_panic_block = { .notifier_call = xen_panic_event, .priority = INT_MIN From 7864c58e02ad9c8ee3c1a2712a6df9a26093a3b5 Mon Sep 17 00:00:00 2001 From: Jia Guo Date: Sun, 6 Oct 2019 17:57:47 -0700 Subject: [PATCH 0183/3715] ocfs2: clear zero in unaligned direct IO [ Upstream commit 7a243c82ea527cd1da47381ad9cd646844f3b693 ] Unused portion of a part-written fs-block-sized block is not set to zero in unaligned append direct write.This can lead to serious data inconsistencies. Ocfs2 manage disk with cluster size(for example, 1M), part-written in one cluster will change the cluster state from UN-WRITTEN to WRITTEN, VFS(function dio_zero_block) doesn't do the cleaning because bh's state is not set to NEW in function ocfs2_dio_wr_get_block when we write a WRITTEN cluster. For example, the cluster size is 1M, file size is 8k and we direct write from 14k to 15k, then 12k~14k and 15k~16k will contain dirty data. We have to deal with two cases: 1.The starting position of direct write is outside the file. 2.The starting position of direct write is located in the file. We need set bh's state to NEW in the first case. In the second case, we need mapped twice because bh's state of area out file should be set to NEW while area in file not. [akpm@linux-foundation.org: coding style fixes] Link: http://lkml.kernel.org/r/5292e287-8f1a-fd4a-1a14-661e555e0bed@huawei.com Signed-off-by: Jia Guo Reviewed-by: Yiwen Jiang Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/aops.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 99550f4bd159..ebeec7530cb6 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2151,13 +2151,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct ocfs2_dio_write_ctxt *dwc = NULL; struct buffer_head *di_bh = NULL; u64 p_blkno; - loff_t pos = iblock << inode->i_sb->s_blocksize_bits; + unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; + loff_t pos = iblock << i_blkbits; + sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; unsigned len, total_len = bh_result->b_size; int ret = 0, first_get_block = 0; len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); len = min(total_len, len); + /* + * bh_result->b_size is count in get_more_blocks according to write + * "pos" and "end", we need map twice to return different buffer state: + * 1. area in file size, not set NEW; + * 2. area out file size, set NEW. + * + * iblock endblk + * |--------|---------|---------|--------- + * |<-------area in file------->| + */ + + if ((iblock <= endblk) && + ((iblock + ((len - 1) >> i_blkbits)) > endblk)) + len = (endblk - iblock + 1) << i_blkbits; + mlog(0, "get block of %lu at %llu:%u req %u\n", inode->i_ino, pos, len, total_len); @@ -2241,6 +2258,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, if (desc->c_needs_zero) set_buffer_new(bh_result); + if (iblock > endblk) + set_buffer_new(bh_result); + /* May sleep in end_io. It should not happen in a irq context. So defer * it to dio work queue. */ set_buffer_defer_completion(bh_result); From 982706449a532e9ccbfe2d995d3c3cf8def6a1d8 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 6 Oct 2019 17:57:50 -0700 Subject: [PATCH 0184/3715] fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry() [ Upstream commit 56e94ea132bb5c2c1d0b60a6aeb34dcb7d71a53d ] In ocfs2_xa_prepare_entry(), there is an if statement on line 2136 to check whether loc->xl_entry is NULL: if (loc->xl_entry) When loc->xl_entry is NULL, it is used on line 2158: ocfs2_xa_add_entry(loc, name_hash); loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); and line 2164: ocfs2_xa_add_namevalue(loc, xi); loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); loc->xl_entry->xe_name_len = xi->xi_name_len; Thus, possible null-pointer dereferences may occur. To fix these bugs, if loc-xl_entry is NULL, ocfs2_xa_prepare_entry() abnormally returns with -EINVAL. These bugs are found by a static analysis tool STCheck written by us. [akpm@linux-foundation.org: remove now-unused ocfs2_xa_add_entry()] Link: http://lkml.kernel.org/r/20190726101447.9153-1-baijiaju1990@gmail.com Signed-off-by: Jia-Ju Bai Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/xattr.c | 56 ++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 77740ef5a8e8..eca49da6d7e0 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1497,18 +1497,6 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, return loc->xl_ops->xlo_check_space(loc, xi); } -static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) -{ - loc->xl_ops->xlo_add_entry(loc, name_hash); - loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); - /* - * We can't leave the new entry's xe_name_offset at zero or - * add_namevalue() will go nuts. We set it to the size of our - * storage so that it can never be less than any other entry. - */ - loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); -} - static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { @@ -2140,29 +2128,31 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, if (rc) goto out; - if (loc->xl_entry) { - if (ocfs2_xa_can_reuse_entry(loc, xi)) { - orig_value_size = loc->xl_entry->xe_value_size; - rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); - if (rc) - goto out; - goto alloc_value; - } + if (!loc->xl_entry) { + rc = -EINVAL; + goto out; + } - if (!ocfs2_xattr_is_local(loc->xl_entry)) { - orig_clusters = ocfs2_xa_value_clusters(loc); - rc = ocfs2_xa_value_truncate(loc, 0, ctxt); - if (rc) { - mlog_errno(rc); - ocfs2_xa_cleanup_value_truncate(loc, - "overwriting", - orig_clusters); - goto out; - } + if (ocfs2_xa_can_reuse_entry(loc, xi)) { + orig_value_size = loc->xl_entry->xe_value_size; + rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); + if (rc) + goto out; + goto alloc_value; + } + + if (!ocfs2_xattr_is_local(loc->xl_entry)) { + orig_clusters = ocfs2_xa_value_clusters(loc); + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); + if (rc) { + mlog_errno(rc); + ocfs2_xa_cleanup_value_truncate(loc, + "overwriting", + orig_clusters); + goto out; } - ocfs2_xa_wipe_namevalue(loc); - } else - ocfs2_xa_add_entry(loc, name_hash); + } + ocfs2_xa_wipe_namevalue(loc); /* * If we get here, we have a blank entry. Fill it. We grow our From 16508e0aa53aac12d1dba97aaca4f8acd6faaeeb Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 6 Oct 2019 17:57:54 -0700 Subject: [PATCH 0185/3715] fs: ocfs2: fix a possible null-pointer dereference in ocfs2_write_end_nolock() [ Upstream commit 583fee3e12df0e6f1f66f063b989d8e7fed0e65a ] In ocfs2_write_end_nolock(), there are an if statement on lines 1976, 2047 and 2058, to check whether handle is NULL: if (handle) When handle is NULL, it is used on line 2045: ocfs2_update_inode_fsync_trans(handle, inode, 1); oi->i_sync_tid = handle->h_transaction->t_tid; Thus, a possible null-pointer dereference may occur. To fix this bug, handle is checked before calling ocfs2_update_inode_fsync_trans(). This bug is found by a static analysis tool STCheck written by us. Link: http://lkml.kernel.org/r/20190726033705.32307-1-baijiaju1990@gmail.com Signed-off-by: Jia-Ju Bai Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/aops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ebeec7530cb6..7de0c9562b70 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2054,7 +2054,8 @@ out_write_size: inode->i_mtime = inode->i_ctime = current_time(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_update_inode_fsync_trans(handle, inode, 1); + if (handle) + ocfs2_update_inode_fsync_trans(handle, inode, 1); } if (handle) ocfs2_journal_dirty(handle, wc->w_di_bh); From 22731e226b8f47fd4843673ed68d5e7c43c48ebc Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 6 Oct 2019 17:57:57 -0700 Subject: [PATCH 0186/3715] fs: ocfs2: fix a possible null-pointer dereference in ocfs2_info_scan_inode_alloc() [ Upstream commit 2abb7d3b12d007c30193f48bebed781009bebdd2 ] In ocfs2_info_scan_inode_alloc(), there is an if statement on line 283 to check whether inode_alloc is NULL: if (inode_alloc) When inode_alloc is NULL, it is used on line 287: ocfs2_inode_lock(inode_alloc, &bh, 0); ocfs2_inode_lock_full_nested(inode, ...) struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); Thus, a possible null-pointer dereference may occur. To fix this bug, inode_alloc is checked on line 286. This bug is found by a static analysis tool STCheck written by us. Link: http://lkml.kernel.org/r/20190726033717.32359-1-baijiaju1990@gmail.com Signed-off-by: Jia-Ju Bai Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index ab30c005cc4b..9fa98abecfc6 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -290,7 +290,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, if (inode_alloc) inode_lock(inode_alloc); - if (o2info_coherent(&fi->ifi_req)) { + if (inode_alloc && o2info_coherent(&fi->ifi_req)) { status = ocfs2_inode_lock(inode_alloc, &bh, 0); if (status < 0) { mlog_errno(status); From 6234d485e30c0d8c904d695c97ab74eb56174576 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Sep 2019 23:42:42 +0200 Subject: [PATCH 0187/3715] sched/vtime: Fix guest/system mis-accounting on task switch [ Upstream commit 68e7a4d66b0ce04bf18ff2ffded5596ab3618585 ] vtime_account_system() assumes that the target task to account cputime to is always the current task. This is most often true indeed except on task switch where we call: vtime_common_task_switch(prev) vtime_account_system(prev) Here prev is the scheduling-out task where we account the cputime to. It doesn't match current that is already the scheduling-in task at this stage of the context switch. So we end up checking the wrong task flags to determine if we are accounting guest or system time to the previous task. As a result the wrong task is used to check if the target is running in guest mode. We may then spuriously account or leak either system or guest time on task switch. Fix this assumption and also turn vtime_guest_enter/exit() to use the task passed in parameter as well to avoid future similar issues. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Fixes: 2a42eb9594a1 ("sched/cputime: Accumulate vtime on top of nsec clocksource") Link: https://lkml.kernel.org/r/20190925214242.21873-1-frederic@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/cputime.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 14d2dbf97c53..45c2cd37fe6b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -738,7 +738,7 @@ void vtime_account_system(struct task_struct *tsk) write_seqcount_begin(&vtime->seqcount); /* We might have scheduled out from guest path */ - if (current->flags & PF_VCPU) + if (tsk->flags & PF_VCPU) vtime_account_guest(tsk, vtime); else __vtime_account_system(tsk, vtime); @@ -781,7 +781,7 @@ void vtime_guest_enter(struct task_struct *tsk) */ write_seqcount_begin(&vtime->seqcount); __vtime_account_system(tsk, vtime); - current->flags |= PF_VCPU; + tsk->flags |= PF_VCPU; write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_enter); @@ -792,7 +792,7 @@ void vtime_guest_exit(struct task_struct *tsk) write_seqcount_begin(&vtime->seqcount); vtime_account_guest(tsk, vtime); - current->flags &= ~PF_VCPU; + tsk->flags &= ~PF_VCPU; write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_exit); From a7731cb5a229c476b0069b8e2a7d0793e1bf8577 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 1 Aug 2019 18:57:41 +0000 Subject: [PATCH 0188/3715] perf/x86/amd: Change/fix NMI latency mitigation to use a timestamp [ Upstream commit df4d29732fdad43a51284f826bec3e6ded177540 ] It turns out that the NMI latency workaround from commit: 6d3edaae16c6 ("x86/perf/amd: Resolve NMI latency issues for active PMCs") ends up being too conservative and results in the perf NMI handler claiming NMIs too easily on AMD hardware when the NMI watchdog is active. This has an impact, for example, on the hpwdt (HPE watchdog timer) module. This module can produce an NMI that is used to reset the system. It registers an NMI handler for the NMI_UNKNOWN type and relies on the fact that nothing has claimed an NMI so that its handler will be invoked when the watchdog device produces an NMI. After the referenced commit, the hpwdt module is unable to process its generated NMI if the NMI watchdog is active, because the current NMI latency mitigation results in the NMI being claimed by the perf NMI handler. Update the AMD perf NMI latency mitigation workaround to, instead, use a window of time. Whenever a PMC is handled in the perf NMI handler, set a timestamp which will act as a perf NMI window. Any NMIs arriving within that window will be claimed by perf. Anything outside that window will not be claimed by perf. The value for the NMI window is set to 100 msecs. This is a conservative value that easily covers any NMI latency in the hardware. While this still results in a window in which the hpwdt module will not receive its NMI, the window is now much, much smaller. Signed-off-by: Tom Lendacky Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jerry Hoemann Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6d3edaae16c6 ("x86/perf/amd: Resolve NMI latency issues for active PMCs") Link: https://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 27ade3cb6482..defb536aebce 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -4,12 +4,14 @@ #include #include #include +#include #include #include #include "../perf_event.h" -static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); +static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); +static unsigned long perf_nmi_window; static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -640,11 +642,12 @@ static void amd_pmu_disable_event(struct perf_event *event) * handler when multiple PMCs are active or PMC overflow while handling some * other source of an NMI. * - * Attempt to mitigate this by using the number of active PMCs to determine - * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset - * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the - * number of active PMCs or 2. The value of 2 is used in case an NMI does not - * arrive at the LAPIC in time to be collapsed into an already pending NMI. + * Attempt to mitigate this by creating an NMI window in which un-handled NMIs + * received during this window will be claimed. This prevents extending the + * window past when it is possible that latent NMIs should be received. The + * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has + * handled a counter. When an un-handled NMI is received, it will be claimed + * only if arriving within that window. */ static int amd_pmu_handle_irq(struct pt_regs *regs) { @@ -662,21 +665,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) handled = x86_pmu_handle_irq(regs); /* - * If a counter was handled, record the number of possible remaining - * NMIs that can occur. + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. */ if (handled) { - this_cpu_write(perf_nmi_counter, - min_t(unsigned int, 2, active)); + this_cpu_write(perf_nmi_tstamp, + jiffies + perf_nmi_window); return handled; } - if (!this_cpu_read(perf_nmi_counter)) + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) return NMI_DONE; - this_cpu_dec(perf_nmi_counter); - return NMI_HANDLED; } @@ -908,6 +909,9 @@ static int __init amd_core_pmu_init(void) if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; + /* Avoid calulating the value each time in the NMI handler */ + perf_nmi_window = msecs_to_jiffies(100); + switch (boot_cpu_data.x86) { case 0x15: pr_cont("Fam15h "); From ca1e2d0660a9f53a87b88738c43e64fd376eb026 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 12:06:00 +0200 Subject: [PATCH 0189/3715] MIPS: include: Mark __xchg as __always_inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46f1619500d022501a4f0389f9f4c349ab46bb86 ] Commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") allows compiler to uninline functions marked as 'inline'. In cace of __xchg this would cause to reference function __xchg_called_with_bad_pointer, which is an error case for catching bugs and will not happen for correct code, if __xchg is inlined. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/include/asm/cmpxchg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 895f91b9e89c..520ca166cbed 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -73,8 +73,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void) extern unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline +unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: From 205cb89c17cb490ffbfe55c9e6ae1c6ee5366ea5 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 17:10:56 +0200 Subject: [PATCH 0190/3715] MIPS: fw: sni: Fix out of bounds init of o32 stack [ Upstream commit efcb529694c3b707dc0471b312944337ba16e4dd ] Use ARRAY_SIZE to caluculate the top of the o32 stack. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/fw/sni/sniprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c index 6aa264b9856a..7c6151d412bd 100644 --- a/arch/mips/fw/sni/sniprom.c +++ b/arch/mips/fw/sni/sniprom.c @@ -42,7 +42,7 @@ /* O32 stack has to be 8-byte aligned. */ static u64 o32_stk[4096]; -#define O32_STK &o32_stk[sizeof(o32_stk)] +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \ __asm__(#fun " = call_o32") From ff9edf3f778cb853a18dd00e1c1820df8a4e183e Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 19 Sep 2019 11:44:27 +0530 Subject: [PATCH 0191/3715] nbd: fix possible sysfs duplicate warning [ Upstream commit 862488105b84ca744b3d8ff131e0fcfe10644be1 ] 1. nbd_put takes the mutex and drops nbd->ref to 0. It then does idr_remove and drops the mutex. 2. nbd_genl_connect takes the mutex. idr_find/idr_for_each fails to find an existing device, so it does nbd_dev_add. 3. just before the nbd_put could call nbd_dev_remove or not finished totally, but if nbd_dev_add try to add_disk, we can hit: debugfs: Directory 'nbd1' with parent 'block' already present! This patch will make sure all the disk add/remove stuff are done by holding the nbd_index_mutex lock. Reported-by: Mike Christie Reviewed-by: Josef Bacik Signed-off-by: Xiubo Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a23460084955..3e4500440796 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -228,8 +228,8 @@ static void nbd_put(struct nbd_device *nbd) if (refcount_dec_and_mutex_lock(&nbd->refs, &nbd_index_mutex)) { idr_remove(&nbd_index_idr, nbd->index); - mutex_unlock(&nbd_index_mutex); nbd_dev_remove(nbd); + mutex_unlock(&nbd_index_mutex); } } From 640fb32d61f32d1011bf9a96805e5a8d033abcea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 Oct 2019 09:58:54 -0400 Subject: [PATCH 0192/3715] NFSv4: Fix leak of clp->cl_acceptor string [ Upstream commit 1047ec868332034d1fbcb2fae19fe6d4cb869ff2 ] Our client can issue multiple SETCLIENTID operations to the same server in some circumstances. Ensure that calls to nfs4_proc_setclientid() after the first one do not overwrite the previously allocated cl_acceptor string. unreferenced object 0xffff888461031800 (size 32): comm "mount.nfs", pid 2227, jiffies 4294822467 (age 1407.749s) hex dump (first 32 bytes): 6e 66 73 40 6b 6c 69 6d 74 2e 69 62 2e 31 30 31 nfs@klimt.ib.101 35 67 72 61 6e 67 65 72 2e 6e 65 74 00 00 00 00 5granger.net.... backtrace: [<00000000ab820188>] __kmalloc+0x128/0x176 [<00000000eeaf4ec8>] gss_stringify_acceptor+0xbd/0x1a7 [auth_rpcgss] [<00000000e85e3382>] nfs4_proc_setclientid+0x34e/0x46c [nfsv4] [<000000003d9cf1fa>] nfs40_discover_server_trunking+0x7a/0xed [nfsv4] [<00000000b81c3787>] nfs4_discover_server_trunking+0x81/0x244 [nfsv4] [<000000000801b55f>] nfs4_init_client+0x1b0/0x238 [nfsv4] [<00000000977daf7f>] nfs4_set_client+0xfe/0x14d [nfsv4] [<0000000053a68a2a>] nfs4_create_server+0x107/0x1db [nfsv4] [<0000000088262019>] nfs4_remote_mount+0x2c/0x59 [nfsv4] [<00000000e84a2fd0>] legacy_get_tree+0x2d/0x4c [<00000000797e947c>] vfs_get_tree+0x20/0xc7 [<00000000ecabaaa8>] fc_mount+0xe/0x36 [<00000000f15fafc2>] vfs_kern_mount+0x74/0x8d [<00000000a3ff4e26>] nfs_do_root_mount+0x8a/0xa3 [nfsv4] [<00000000d1c2b337>] nfs4_try_mount+0x58/0xad [nfsv4] [<000000004c9bddee>] nfs_fs_mount+0x820/0x869 [nfs] Fixes: f11b2a1cfbf5 ("nfs4: copy acceptor name from context ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6409ff4876cb..af062e9f4580 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5655,6 +5655,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, } status = task->tk_status; if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } From fbfd8dadf521784fd2fbb1cfc61e318979593c41 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 8 Oct 2019 17:02:32 +0200 Subject: [PATCH 0193/3715] s390/uaccess: avoid (false positive) compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 062795fcdcb2d22822fb42644b1d76a8ad8439b3 ] Depending on inlining decisions by the compiler, __get/put_user_fn might become out of line. Then the compiler is no longer able to tell that size can only be 1,2,4 or 8 due to the check in __get/put_user resulting in false positives like ./arch/s390/include/asm/uaccess.h: In function ‘__put_user_fn’: ./arch/s390/include/asm/uaccess.h:113:9: warning: ‘rc’ may be used uninitialized in this function [-Wmaybe-uninitialized] 113 | return rc; | ^~ ./arch/s390/include/asm/uaccess.h: In function ‘__get_user_fn’: ./arch/s390/include/asm/uaccess.h:143:9: warning: ‘rc’ may be used uninitialized in this function [-Wmaybe-uninitialized] 143 | return rc; | ^~ These functions are supposed to be always inlined. Mark it as such. Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 689eae8d3859..bd7a19a0aecf 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -95,7 +95,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x810000UL; int rc; @@ -125,7 +125,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x81UL; int rc; From 2a7ad49ad210e80780964807f241eeb3bdc23f1f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 11 Oct 2019 16:21:34 +0200 Subject: [PATCH 0194/3715] tracing: Initialize iter->seq after zeroing in tracing_read_pipe() [ Upstream commit d303de1fcf344ff7c15ed64c3f48a991c9958775 ] A customer reported the following softlockup: [899688.160002] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [test.sh:16464] [899688.160002] CPU: 0 PID: 16464 Comm: test.sh Not tainted 4.12.14-6.23-azure #1 SLE12-SP4 [899688.160002] RIP: 0010:up_write+0x1a/0x30 [899688.160002] Kernel panic - not syncing: softlockup: hung tasks [899688.160002] RIP: 0010:up_write+0x1a/0x30 [899688.160002] RSP: 0018:ffffa86784d4fde8 EFLAGS: 00000257 ORIG_RAX: ffffffffffffff12 [899688.160002] RAX: ffffffff970fea00 RBX: 0000000000000001 RCX: 0000000000000000 [899688.160002] RDX: ffffffff00000001 RSI: 0000000000000080 RDI: ffffffff970fea00 [899688.160002] RBP: ffffffffffffffff R08: ffffffffffffffff R09: 0000000000000000 [899688.160002] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8b59014720d8 [899688.160002] R13: ffff8b59014720c0 R14: ffff8b5901471090 R15: ffff8b5901470000 [899688.160002] tracing_read_pipe+0x336/0x3c0 [899688.160002] __vfs_read+0x26/0x140 [899688.160002] vfs_read+0x87/0x130 [899688.160002] SyS_read+0x42/0x90 [899688.160002] do_syscall_64+0x74/0x160 It caught the process in the middle of trace_access_unlock(). There is no loop. So, it must be looping in the caller tracing_read_pipe() via the "waitagain" label. Crashdump analyze uncovered that iter->seq was completely zeroed at this point, including iter->seq.seq.size. It means that print_trace_line() was never able to print anything and there was no forward progress. The culprit seems to be in the code: /* reset all but tr, trace, and overruns */ memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); It was added by the commit 53d0aa773053ab182877 ("ftrace: add logic to record overruns"). It was v2.6.27-rc1. It was the time when iter->seq looked like: struct trace_seq { unsigned char buffer[PAGE_SIZE]; unsigned int len; }; There was no "size" variable and zeroing was perfectly fine. The solution is to reinitialize the structure after or without zeroing. Link: http://lkml.kernel.org/r/20191011142134.11997-1-pmladek@suse.com Signed-off-by: Petr Mladek Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bbe5a857c082..286bbad7681b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5764,6 +5764,7 @@ waitagain: sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); cpumask_clear(iter->started); + trace_seq_init(&iter->seq); iter->pos = -1; trace_event_read_lock(); From 4df728651b8a99693c69962d8e5a5b9e5a3bbcc7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 17 Oct 2019 16:27:34 -0500 Subject: [PATCH 0195/3715] nbd: verify socket is supported during setup [ Upstream commit cf1b2326b734896734c6e167e41766f9cee7686a ] nbd requires socket families to support the shutdown method so the nbd recv workqueue can be woken up from its sock_recvmsg call. If the socket does not support the callout we will leave recv works running or get hangs later when the device or module is removed. This adds a check during socket connection/reconnection to make sure the socket being passed in supports the needed callout. Reported-by: syzbot+24c12fa8d218ed26011a@syzkaller.appspotmail.com Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Tested-by: Richard W.M. Jones Signed-off-by: Mike Christie Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3e4500440796..f3d0bc9a9905 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -912,6 +912,25 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } +static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, + int *err) +{ + struct socket *sock; + + *err = 0; + sock = sockfd_lookup(fd, err); + if (!sock) + return NULL; + + if (sock->ops->shutdown == sock_no_shutdown) { + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); + *err = -EINVAL; + return NULL; + } + + return sock; +} + static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, bool netlink) { @@ -921,7 +940,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -973,7 +992,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) int i; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; From d7030f05a84cf6ce9587dce5fab9774666597cd5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Oct 2019 17:11:15 +0300 Subject: [PATCH 0196/3715] USB: legousbtower: fix a signedness bug in tower_probe() [ Upstream commit fd47a417e75e2506eb3672ae569b1c87e3774155 ] The problem is that sizeof() is unsigned long so negative error codes are type promoted to high positive values and the condition becomes false. Fixes: 1d427be4a39d ("USB: legousbtower: fix slab info leak at probe") Signed-off-by: Dan Carpenter Acked-by: Johan Hovold Link: https://lore.kernel.org/r/20191011141115.GA4521@mwanda Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/legousbtower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 378a565ec989..a1ed6be87471 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -881,7 +881,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device get_version_reply, sizeof(*get_version_reply), 1000); - if (result < sizeof(*get_version_reply)) { + if (result != sizeof(*get_version_reply)) { if (result >= 0) result = -EIO; dev_err(idev, "get version request failed: %d\n", result); From 54b9f5791846d2de59e8c65502b3f1071f65424f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 Oct 2019 11:42:59 -0700 Subject: [PATCH 0197/3715] net_sched: check cops->tcf_block in tc_bind_tclass() commit 8b142a00edcf8422ca48b8de88d286efb500cb53 upstream At least sch_red and sch_tbf don't implement ->tcf_block() while still have a non-zero tc "class". Instead of adding nop implementations to each of such qdisc's, we can just relax the check of cops->tcf_block() in tc_bind_tclass(). They don't support TC filter anyway. Reported-by: syzbot+21b29db13c065852f64b@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Zubin Mithra Signed-off-by: Sasha Levin --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 637949b576c6..296e95f72eb1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1695,6 +1695,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; + if (!cops->tcf_block) + return; block = cops->tcf_block(q, cl); if (!block) return; From 56ab84408da105d46aa565e9221bf9926e814b91 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 4 Jul 2018 08:46:07 +0300 Subject: [PATCH 0198/3715] thunderbolt: Use 32-bit writes when writing ring producer/consumer [ Upstream commit 943795219d3cb9f8ce6ce51cad3ffe1f61e95c6b ] The register access should be using 32-bit reads/writes according to the datasheet. With the previous generation hardware 16-bit writes have been working but starting with ICL this is not the case anymore so fix producer/consumer register update to use correct width register address. Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Tested-by: Mario Limonciello Signed-off-by: Sasha Levin --- drivers/thunderbolt/nhi.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 16c607075ede..af44e6e6b3bf 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -134,9 +134,20 @@ static void __iomem *ring_options_base(struct tb_ring *ring) return io; } -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) { - iowrite16(value, ring_desc_base(ring) + offset); + /* + * The other 16-bits in the register is read-only and writes to it + * are ignored by the hardware so we can save one ioread32() by + * filling the read-only bits with zeroes. + */ + iowrite32(cons, ring_desc_base(ring) + 8); +} + +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) +{ + /* See ring_iowrite_cons() above for explanation */ + iowrite32(prod << 16, ring_desc_base(ring) + 8); } static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) @@ -188,7 +199,10 @@ static void ring_write_descriptors(struct tb_ring *ring) descriptor->sof = frame->sof; } ring->head = (ring->head + 1) % ring->size; - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); + if (ring->is_tx) + ring_iowrite_prod(ring, ring->head); + else + ring_iowrite_cons(ring, ring->head); } } @@ -461,7 +475,7 @@ void ring_stop(struct tb_ring *ring) ring_iowrite32options(ring, 0, 0); ring_iowrite64desc(ring, 0, 0); - ring_iowrite16desc(ring, 0, ring->is_tx ? 10 : 8); + ring_iowrite32desc(ring, 0, 8); ring_iowrite32desc(ring, 0, 12); ring->head = 0; ring->tail = 0; From ce1dee3c1a88a534f561310f16e2eed3e686f28a Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Sat, 3 Aug 2019 20:29:04 -0400 Subject: [PATCH 0199/3715] ath6kl: fix a NULL-ptr-deref bug in ath6kl_usb_alloc_urb_from_pipe() [ Upstream commit 39d170b3cb62ba98567f5c4f40c27b5864b304e5 ] The `ar_usb` field of `ath6kl_usb_pipe_usb_pipe` objects are initialized to point to the containing `ath6kl_usb` object according to endpoint descriptors read from the device side, as shown below in `ath6kl_usb_setup_pipe_resources`: for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; // get the address from endpoint descriptor pipe_num = ath6kl_usb_get_logical_pipe_num(ar_usb, endpoint->bEndpointAddress, &urbcount); ...... // select the pipe object pipe = &ar_usb->pipes[pipe_num]; // initialize the ar_usb field pipe->ar_usb = ar_usb; } The driver assumes that the addresses reported in endpoint descriptors from device side to be complete. If a device is malicious and does not report complete addresses, it may trigger NULL-ptr-deref `ath6kl_usb_alloc_urb_from_pipe` and `ath6kl_usb_free_urb_to_pipe`. This patch fixes the bug by preventing potential NULL-ptr-deref (CVE-2019-15098). Signed-off-by: Hui Peng Reported-by: Hui Peng Reported-by: Mathias Payer Reviewed-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath6kl/usb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 4defb7a0330f..53b66e9434c9 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -132,6 +132,10 @@ ath6kl_usb_alloc_urb_from_pipe(struct ath6kl_usb_pipe *pipe) struct ath6kl_urb_context *urb_context = NULL; unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return NULL; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); if (!list_empty(&pipe->urb_list_head)) { urb_context = @@ -150,6 +154,10 @@ static void ath6kl_usb_free_urb_to_pipe(struct ath6kl_usb_pipe *pipe, { unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); pipe->urb_cnt++; From 525c270c1fac4ba6659e12ccd3348606e53fe9ea Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: [PATCH 0200/3715] fuse: flush dirty data/metadata before non-truncate setattr commit b24e7598db62386a95a3c8b9c75630c5d56fe077 upstream. If writeback cache is enabled, then writes might get reordered with chmod/chown/utimes. The problem with this is that performing the write in the fuse daemon might itself change some of these attributes. In such case the following sequence of operations will result in file ending up with the wrong mode, for example: int fd = open ("suid", O_WRONLY|O_CREAT|O_EXCL); write (fd, "1", 1); fchown (fd, 0, 0); fchmod (fd, 04755); close (fd); This patch fixes this by flushing pending writes before performing chown/chmod/utimes. Reported-by: Giuseppe Scrivano Tested-by: Giuseppe Scrivano Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d933ecb7a08c..b79bba77652a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1650,6 +1650,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (attr->ia_valid & ATTR_SIZE) is_truncate = true; + /* Flush dirty data/metadata before non-truncate SETATTR */ + if (is_wb && S_ISREG(inode->i_mode) && + attr->ia_valid & + (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | + ATTR_TIMES_SET)) { + err = write_inode_now(inode, true); + if (err) + return err; + + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); + } + if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From 93f4021f0dd62089db7c56a767f6a92cb9473dee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: [PATCH 0201/3715] fuse: truncate pending writes on O_TRUNC commit e4648309b85a78f8c787457832269a8712a8673e upstream. Make sure cached writes are not reordered around open(..., O_TRUNC), with the obvious wrong results. Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34cbec8e6850..969584c99c54 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -201,7 +201,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; - bool lock_inode = (file->f_flags & O_TRUNC) && + bool is_wb_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && fc->writeback_cache; @@ -209,16 +209,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (lock_inode) + if (is_wb_truncate) { inode_lock(inode); + fuse_set_nowrite(inode); + } err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); - if (lock_inode) + if (is_wb_truncate) { + fuse_release_nowrite(inode); inode_unlock(inode); + } return err; } From 317314cb8061083622a1e44efd9cfe99b545c0ed Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 26 Oct 2019 12:06:20 +0900 Subject: [PATCH 0202/3715] ALSA: bebob: Fix prototype of helper function to return negative value commit f2bbdbcb075f3977a53da3bdcb7cd460bc8ae5f2 upstream. A helper function of ALSA bebob driver returns negative value in a function which has a prototype to return unsigned value. This commit fixes it by changing the prototype. Fixes: eb7b3a056cd8 ("ALSA: bebob: Add commands and connections/streams management") Cc: # v3.16+ Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191026030620.12077-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/bebob_stream.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 4d3034a68bdf..be2c056eb62d 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -253,8 +253,7 @@ end: return err; } -static unsigned int -map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) +static int map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) { unsigned int sec, sections, ch, channels; unsigned int pcm, midi, location; From 224db95138624aea6070df2446fd199233f42755 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 24 Oct 2019 19:44:39 +0800 Subject: [PATCH 0203/3715] ALSA: hda/realtek - Fix 2 front mics of codec 0x623 commit 8a6c55d0f883e9a7e7c91841434f3b6bbf932bb2 upstream. These 2 ThinkCentres installed a new realtek codec ID 0x623, it has 2 front mics with the same location on pin 0x18 and 0x19. Apply fixup ALC283_FIXUP_HEADSET_MIC to change 1 front mic location to right, then pulseaudio can handle them. One "Front Mic" and one "Mic" will be shown, and audio output works fine. Signed-off-by: Aaron Ma Cc: Link: https://lore.kernel.org/r/20191024114439.31522-1-aaron.ma@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8d6c5be38736..0fc309975d42 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6614,6 +6614,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), From 9d7336ac28f7a169f327fe6a30debbc01682b366 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 24 Oct 2019 15:13:32 +0800 Subject: [PATCH 0204/3715] ALSA: hda/realtek - Add support for ALC623 commit f0778871a13889b86a65d4ad34bef8340af9d082 upstream. Support new codec ALC623. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/ed97b6a8bd9445ecb48bc763d9aaba7a@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0fc309975d42..404c50ab28fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -375,6 +375,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0672: alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ break; + case 0x10ec0623: + alc_update_coef_idx(codec, 0x19, 1<<13, 0); + break; case 0x10ec0668: alc_update_coef_idx(codec, 0x7, 3<<13, 0); break; @@ -2757,6 +2760,7 @@ enum { ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, ALC269_TYPE_ALC300, + ALC269_TYPE_ALC623, ALC269_TYPE_ALC700, }; @@ -2792,6 +2796,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: case ALC269_TYPE_ALC300: + case ALC269_TYPE_ALC623: case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; @@ -7274,6 +7279,9 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0623: + spec->codec_variant = ALC269_TYPE_ALC623; + break; case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: @@ -8354,6 +8362,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0623, "ALC623", patch_alc269), HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), From 0b5a7e7d8d85160d874ce382ba690a41a158ecc7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 23 Oct 2019 11:34:33 -0400 Subject: [PATCH 0205/3715] UAS: Revert commit 3ae62a42090f ("UAS: fix alignment of scatter/gather segments") commit 1186f86a71130a7635a20843e355bb880c7349b2 upstream. Commit 3ae62a42090f ("UAS: fix alignment of scatter/gather segments"), copying a similar commit for usb-storage, attempted to solve a problem involving scatter-gather I/O and USB/IP by setting the virt_boundary_mask for mass-storage devices. However, it now turns out that the analogous change in usb-storage interacted badly with commit 09324d32d2a0 ("block: force an unlimited segment size on queues with a virt boundary"), which was added later. A typical error message is: ehci-pci 0000:00:13.2: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 97 (slots) There is no longer any reason to keep the virt_boundary_mask setting in the uas driver. It was needed in the first place only for handling devices with a block size smaller than the maxpacket size and where the host controller was not capable of fully general scatter-gather operation (that is, able to merge two SG segments into a single USB packet). But: High-speed or slower connections never use a bulk maxpacket value larger than 512; The SCSI layer does not handle block devices with a block size smaller than 512 bytes; All the host controllers capable of SuperSpeed operation can handle fully general SG; Since commit ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") was merged, the USB/IP driver can also handle SG. Therefore all supported device/controller combinations should be okay with no need for any special virt_boundary_mask. So in order to head off potential problems similar to those affecting usb-storage, this patch reverts commit 3ae62a42090f. Signed-off-by: Alan Stern CC: Oliver Neukum CC: Acked-by: Christoph Hellwig Fixes: 3ae62a42090f ("UAS: fix alignment of scatter/gather segments") Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910231132470.1878-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 21c8925a4116..1e62f2134b3a 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -796,29 +796,9 @@ static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; - int maxp; sdev->hostdata = devinfo; - /* - * We have two requirements here. We must satisfy the requirements - * of the physical HC and the demands of the protocol, as we - * definitely want no additional memory allocation in this path - * ruling out using bounce buffers. - * - * For a transmission on USB to continue we must never send - * a package that is smaller than maxpacket. Hence the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. - * If the HC does not ensure that through SG, - * the upper layer must do that. We must assume nothing - * about the capabilities off the HC, so we use the most - * pessimistic requirement. - */ - - maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * The protocol has no requirements on alignment in the strict sense. * Controllers may or may not have alignment restrictions. From 94f5de2eefae22c449e367c2dacafe869af73e3f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 28 Oct 2019 10:54:26 -0400 Subject: [PATCH 0206/3715] USB: gadget: Reject endpoints with 0 maxpacket value commit 54f83b8c8ea9b22082a496deadf90447a326954e upstream. Endpoints with a maxpacket length of 0 are probably useless. They can't transfer any data, and it's not at all unlikely that a UDC will crash or hang when trying to handle a non-zero-length usb_request for such an endpoint. Indeed, dummy-hcd gets a divide error when trying to calculate the remainder of a transfer length by the maxpacket value, as discovered by the syzbot fuzzer. Currently the gadget core does not check for endpoints having a maxpacket value of 0. This patch adds a check to usb_ep_enable(), preventing such endpoints from being used. As far as I know, none of the gadget drivers in the kernel tries to create an endpoint with maxpacket = 0, but until now there has been nothing to prevent userspace programs under gadgetfs or configfs from doing it. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+8ab8bf161038a8768553@syzkaller.appspotmail.com CC: Acked-by: Felipe Balbi Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910281052370.1485-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index ad315c4c6f35..4c6d612990ba 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -107,6 +107,17 @@ int usb_ep_enable(struct usb_ep *ep) if (ep->enabled) goto out; + /* UDC drivers can't handle endpoints with maxpacket size 0 */ + if (usb_endpoint_maxp(ep->desc) == 0) { + /* + * We should log an error message here, but we can't call + * dev_err() because there's no way to find the gadget + * given only ep. + */ + ret = -EINVAL; + goto out; + } + ret = ep->ops->enable(ep, ep->desc); if (ret) goto out; From 98ada19cc6512627e771dffa99363ad2e25bf6b2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Oct 2019 11:48:06 -0400 Subject: [PATCH 0207/3715] usb-storage: Revert commit 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") commit 9a976949613132977098fc49510b46fa8678d864 upstream. Commit 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") attempted to solve a problem involving scatter-gather I/O and USB/IP by setting the virt_boundary_mask for mass-storage devices. However, it now turns out that this interacts badly with commit 09324d32d2a0 ("block: force an unlimited segment size on queues with a virt boundary"), which was added later. A typical error message is: ehci-pci 0000:00:13.2: swiotlb buffer is full (sz: 327680 bytes), total 32768 (slots), used 97 (slots) There is no longer any reason to keep the virt_boundary_mask setting for usb-storage. It was needed in the first place only for handling devices with a block size smaller than the maxpacket size and where the host controller was not capable of fully general scatter-gather operation (that is, able to merge two SG segments into a single USB packet). But: High-speed or slower connections never use a bulk maxpacket value larger than 512; The SCSI layer does not handle block devices with a block size smaller than 512 bytes; All the host controllers capable of SuperSpeed operation can handle fully general SG; Since commit ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") was merged, the USB/IP driver can also handle SG. Therefore all supported device/controller combinations should be okay with no need for any special virt_boundary_mask. So in order to fix the swiotlb problem, this patch reverts commit 747668dbc061. Reported-and-tested-by: Piergiorgio Sartor Link: https://marc.info/?l=linux-usb&m=157134199501202&w=2 Signed-off-by: Alan Stern CC: Seth Bollinger CC: Fixes: 747668dbc061 ("usb-storage: Set virt_boundary_mask to avoid SG overflows") Acked-by: Christoph Hellwig Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910211145520.1673-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index afb4b0bf47b3..fd5398efce41 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -81,7 +81,6 @@ static const char* host_info(struct Scsi_Host *host) static int slave_alloc (struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); - int maxp; /* * Set the INQUIRY transfer length to 36. We don't use any of @@ -90,15 +89,6 @@ static int slave_alloc (struct scsi_device *sdev) */ sdev->inquiry_len = 36; - /* - * USB has unusual scatter-gather requirements: the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. Fortunately this value is always a - * power of 2. Inform the block layer about this requirement. - */ - maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * Some host controllers may have alignment requirements. * We'll play it safe by requiring 512-byte alignment always. From 739ad3b6e596cd87e212df5998e3ea86baf0e496 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 16:32:02 +0200 Subject: [PATCH 0208/3715] USB: ldusb: fix ring-buffer locking commit d98ee2a19c3334e9343df3ce254b496f1fc428eb upstream. The custom ring-buffer implementation was merged without any locking or explicit memory barriers, but a spinlock was later added by commit 9d33efd9a791 ("USB: ldusb bugfix"). The lock did not cover the update of the tail index once the entry had been processed, something which could lead to memory corruption on weakly ordered architectures or due to compiler optimisations. Specifically, a completion handler running on another CPU might observe the incremented tail index and update the entry before ld_usb_read() is done with it. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Fixes: 9d33efd9a791 ("USB: ldusb bugfix") Cc: stable # 2.6.13 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022143203.5260-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index cd92ae1231bc..457fc0ae70e1 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -498,11 +498,11 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = -EFAULT; goto unlock_exit; } - dev->ring_tail = (dev->ring_tail+1) % ring_buffer_size; - retval = bytes_to_read; spin_lock_irq(&dev->rbsl); + dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size; + if (dev->buffer_overflow) { dev->buffer_overflow = 0; spin_unlock_irq(&dev->rbsl); From 0675c81abe0424be839cbc64a4dd233de8297170 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 17:31:27 +0200 Subject: [PATCH 0209/3715] USB: ldusb: fix control-message timeout commit 52403cfbc635d28195167618690595013776ebde upstream. USB control-message timeouts are specified in milliseconds, not jiffies. Waiting 83 minutes for a transfer to complete is a bit excessive. Fixes: 2824bd250f0b ("[PATCH] USB: add ldusb driver") Cc: stable # 2.6.13 Reported-by: syzbot+a4fbb3bb76cda0ea4e58@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022153127.22295-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/ldusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 457fc0ae70e1..6387545b17ea 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -583,7 +583,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, 1 << 8, 0, dev->interrupt_out_buffer, bytes_to_write, - USB_CTRL_SET_TIMEOUT * HZ); + USB_CTRL_SET_TIMEOUT); if (retval < 0) dev_err(&dev->intf->dev, "Couldn't submit HID_REQ_SET_REPORT %d\n", From 906f9f252be650eee94f2f1f7d9ac1efdb136044 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Oct 2019 11:23:53 +0100 Subject: [PATCH 0210/3715] USB: serial: whiteheat: fix potential slab corruption commit 1251dab9e0a2c4d0d2d48370ba5baa095a5e8774 upstream. Fix a user-controlled slab buffer overflow due to a missing sanity check on the bulk-out transfer buffer used for control requests. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191029102354.2733-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 55cebc1e6fec..d4951f9a8459 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -575,6 +575,10 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, command_port = port->serial->port[COMMAND_PORT]; command_info = usb_get_serial_port_data(command_port); + + if (command_port->bulk_out_size < datasize + 1) + return -EIO; + mutex_lock(&command_info->mutex); command_info->command_finished = false; From 294da39e05db61cbad1f3367de15176215fdf422 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Oct 2019 11:23:54 +0100 Subject: [PATCH 0211/3715] USB: serial: whiteheat: fix line-speed endianness commit 84968291d7924261c6a0624b9a72f952398e258b upstream. Add missing endianness conversion when setting the line speed so that this driver might work also on big-endian machines. Also use an unsigned format specifier in the corresponding debug message. Signed-off-by: Johan Hovold Cc: stable Link: https://lore.kernel.org/r/20191029102354.2733-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 9 ++++++--- drivers/usb/serial/whiteheat.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index d4951f9a8459..163ede42af20 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -652,6 +652,7 @@ static void firm_setup_port(struct tty_struct *tty) struct device *dev = &port->dev; struct whiteheat_port_settings port_settings; unsigned int cflag = tty->termios.c_cflag; + speed_t baud; port_settings.port = port->port_number + 1; @@ -712,11 +713,13 @@ static void firm_setup_port(struct tty_struct *tty) dev_dbg(dev, "%s - XON = %2x, XOFF = %2x\n", __func__, port_settings.xon, port_settings.xoff); /* get the baud rate wanted */ - port_settings.baud = tty_get_baud_rate(tty); - dev_dbg(dev, "%s - baud rate = %d\n", __func__, port_settings.baud); + baud = tty_get_baud_rate(tty); + port_settings.baud = cpu_to_le32(baud); + dev_dbg(dev, "%s - baud rate = %u\n", __func__, baud); /* fixme: should set validated settings */ - tty_encode_baud_rate(tty, port_settings.baud, port_settings.baud); + tty_encode_baud_rate(tty, baud, baud); + /* handle any settings that aren't specified in the tty structure */ port_settings.lloop = 0; diff --git a/drivers/usb/serial/whiteheat.h b/drivers/usb/serial/whiteheat.h index 38065df4d2d8..30169c859a74 100644 --- a/drivers/usb/serial/whiteheat.h +++ b/drivers/usb/serial/whiteheat.h @@ -91,7 +91,7 @@ struct whiteheat_simple { struct whiteheat_port_settings { __u8 port; /* port number (1 to N) */ - __u32 baud; /* any value 7 - 460800, firmware calculates + __le32 baud; /* any value 7 - 460800, firmware calculates best fit; arrives little endian */ __u8 bits; /* 5, 6, 7, or 8 */ __u8 stop; /* 1 or 2, default 1 (2 = 1.5 if bits = 5) */ From 661c68cf1787d5754c3cb43a6c5934ca568e604b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Oct 2019 13:21:50 -0700 Subject: [PATCH 0212/3715] scsi: target: cxgbit: Fix cxgbit_fw4_ack() commit fc5b220b2dcf8b512d9bd46fd17f82257e49bf89 upstream. Use the pointer 'p' after having tested that pointer instead of before. Fixes: 5cadafb236df ("target/cxgbit: Fix endianness annotations") Cc: Varun Prakash Cc: Nicholas Bellinger Cc: Link: https://lore.kernel.org/r/20191023202150.22173-1-bvanassche@acm.org Reported-by: Dan Carpenter Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0c00bb27c9c5..c764b292f6ba 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1767,7 +1767,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) while (credits) { struct sk_buff *p = cxgbit_sock_peek_wr(csk); - const u32 csum = (__force u32)p->csum; + u32 csum; if (unlikely(!p)) { pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n", @@ -1776,6 +1776,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) break; } + csum = (__force u32)p->csum; if (unlikely(credits < csum)) { pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n", csk, csk->tid, From 8390ff01a88d2f0dab63e8c50ba7e666f52a38ab Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Oct 2019 20:56:26 +0200 Subject: [PATCH 0213/3715] HID: i2c-hid: add Trekstor Primebook C11B to descriptor override commit 09f3dbe474735df13dd8a66d3d1231048d9b373f upstream. The Primebook C11B uses the SIPODEV SP1064 touchpad. There are 2 versions of this 2-in-1 and the touchpad in the older version does not supply descriptors, so it has to be added to the override list. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index fd1b6eea6d2f..10af8585c820 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -322,6 +322,25 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + /* + * There are at least 2 Primebook C11B versions, the older + * version has a product-name of "Primebook C11B", and a + * bios version / release / firmware revision of: + * V2.1.2 / 05/03/2018 / 18.2 + * The new version has "PRIMEBOOK C11B" as product-name and a + * bios version / release / firmware revision of: + * CFALKSW05_BIOS_V1.1.2 / 11/19/2018 / 19.2 + * Only the older version needs this quirk, note the newer + * version will not match as it has a different product-name. + */ + .ident = "Trekstor Primebook C11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { From e13a3d84494ca7b560551a9927231e7ecf27d96a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 3 Oct 2019 14:53:59 -0400 Subject: [PATCH 0214/3715] HID: Fix assumption that devices have inputs commit d9d4b1e46d9543a82c23f6df03f4ad697dab361b upstream. The syzbot fuzzer found a slab-out-of-bounds write bug in the hid-gaff driver. The problem is caused by the driver's assumption that the device must have an input report. While this will be true for all normal HID input devices, a suitably malicious device can violate the assumption. The same assumption is present in over a dozen other HID drivers. This patch fixes them by checking that the list of hid_inputs for the hid_device is nonempty before allowing it to be used. Reported-and-tested-by: syzbot+403741a091bf41d4ae79@syzkaller.appspotmail.com Signed-off-by: Alan Stern CC: Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-axff.c | 11 +++++++++-- drivers/hid/hid-dr.c | 12 +++++++++--- drivers/hid/hid-emsff.c | 12 +++++++++--- drivers/hid/hid-gaff.c | 12 +++++++++--- drivers/hid/hid-holtekff.c | 12 +++++++++--- drivers/hid/hid-lg2ff.c | 12 +++++++++--- drivers/hid/hid-lg3ff.c | 11 +++++++++-- drivers/hid/hid-lg4ff.c | 11 +++++++++-- drivers/hid/hid-lgff.c | 11 +++++++++-- drivers/hid/hid-logitech-hidpp.c | 11 +++++++++-- drivers/hid/hid-sony.c | 12 +++++++++--- drivers/hid/hid-tmff.c | 12 +++++++++--- drivers/hid/hid-zpff.c | 12 +++++++++--- 13 files changed, 117 insertions(+), 34 deletions(-) diff --git a/drivers/hid/hid-axff.c b/drivers/hid/hid-axff.c index a594e478a1e2..843aed4dec80 100644 --- a/drivers/hid/hid-axff.c +++ b/drivers/hid/hid-axff.c @@ -75,13 +75,20 @@ static int axff_init(struct hid_device *hid) { struct axff_device *axff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int field_count = 0; int i, j; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c index 818ea7d93533..309969b8dc2e 100644 --- a/drivers/hid/hid-dr.c +++ b/drivers/hid/hid-dr.c @@ -87,13 +87,19 @@ static int drff_init(struct hid_device *hid) { struct drff_device *drff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-emsff.c b/drivers/hid/hid-emsff.c index d82d75bb11f7..80f9a02dfa69 100644 --- a/drivers/hid/hid-emsff.c +++ b/drivers/hid/hid-emsff.c @@ -59,13 +59,19 @@ static int emsff_init(struct hid_device *hid) { struct emsff_device *emsff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c index 2d8cead3adca..5a02c50443cb 100644 --- a/drivers/hid/hid-gaff.c +++ b/drivers/hid/hid-gaff.c @@ -77,14 +77,20 @@ static int gaff_init(struct hid_device *hid) { struct gaff_device *gaff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct list_head *report_ptr = report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 9325545fc3ae..3e84551cca9c 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -140,13 +140,19 @@ static int holtekff_init(struct hid_device *hid) { struct holtekff_device *holtekff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output report found\n"); return -ENODEV; diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index 0e3fb1a7e421..6909d9c2fc67 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -62,11 +62,17 @@ int lg2ff_init(struct hid_device *hid) { struct lg2ff_device *lg2ff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); if (!report) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index 8c2da183d3bc..acf739fc4060 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -129,12 +129,19 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff3_joystick_ac; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) return -ENODEV; diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 127f1335a1da..1b109a5cf922 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -1261,8 +1261,8 @@ static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_produc int lg4ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); @@ -1274,6 +1274,13 @@ int lg4ff_init(struct hid_device *hid) int mmode_ret, mmode_idx = -1; u16 real_product_id; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index e1394af0ae7b..1871cdcd1e0a 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -127,12 +127,19 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff_joystick; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -ENODEV; diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b705cbb58ca6..4706fb852eaf 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -1867,8 +1867,8 @@ static void hidpp_ff_destroy(struct ff_device *ff) static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) { struct hid_device *hid = hidpp->hid_dev; - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice); struct ff_device *ff; @@ -1877,6 +1877,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) int error, j, num_slots; u8 version; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (!dev) { hid_err(hid, "Struct input_dev not set!\n"); return -EINVAL; diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 6ce9b5e1a06f..c8b07a182c0b 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2163,9 +2163,15 @@ static int sony_play_effect(struct input_dev *dev, void *data, static int sony_init_ff(struct sony_sc *sc) { - struct hid_input *hidinput = list_entry(sc->hdev->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; + + if (list_empty(&sc->hdev->inputs)) { + hid_err(sc->hdev, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list); + input_dev = hidinput->input; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, sony_play_effect); diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c index cfa0cb22c9b3..d98e471a5f7b 100644 --- a/drivers/hid/hid-tmff.c +++ b/drivers/hid/hid-tmff.c @@ -136,12 +136,18 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits) struct tmff_device *tmff; struct hid_report *report; struct list_head *report_list; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + input_dev = hidinput->input; + tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL); if (!tmff) return -ENOMEM; diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index a29756c6ca02..4e7e01be99b1 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -66,11 +66,17 @@ static int zpff_init(struct hid_device *hid) { struct zpff_device *zpff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int i, error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + for (i = 0; i < 4; i++) { report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); if (!report) From eb045a172952adde158e85b0e8d8028e516545d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 23 Aug 2019 21:15:27 +0200 Subject: [PATCH 0215/3715] HID: fix error message in hid_open_report() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b3a81c777dcb093020680490ab970d85e2f6f04f upstream. On HID report descriptor parsing error the code displays bogus pointer instead of error offset (subtracts start=NULL from end). Make the message more useful by displaying correct error offset and include total buffer size for reference. This was carried over from ancient times - "Fixed" commit just promoted the message from DEBUG to ERROR. Cc: stable@vger.kernel.org Fixes: 8c3d52fc393b ("HID: make parser more verbose about parsing errors by default") Signed-off-by: Michał Mirosław Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 7c8049a5bd99..0b0fa257299d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -979,6 +979,7 @@ int hid_open_report(struct hid_device *device) __u8 *start; __u8 *buf; __u8 *end; + __u8 *next; int ret; static int (*dispatch_type[])(struct hid_parser *parser, struct hid_item *item) = { @@ -1032,7 +1033,8 @@ int hid_open_report(struct hid_device *device) device->collection_size = HID_DEFAULT_NUM_COLLECTIONS; ret = -EINVAL; - while ((start = fetch_item(start, end, &item)) != NULL) { + while ((next = fetch_item(start, end, &item)) != NULL) { + start = next; if (item.format != HID_ITEM_FORMAT_SHORT) { hid_err(device, "unexpected long global item\n"); @@ -1061,7 +1063,8 @@ int hid_open_report(struct hid_device *device) } } - hid_err(device, "item fetching failed at offset %d\n", (int)(end - start)); + hid_err(device, "item fetching failed at offset %u/%u\n", + size - (unsigned int)(end - start), size); err: vfree(parser); hid_close_report(device); From 2b56c89ae8345cc328e4f8d5c1949acc5e51ea74 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Tue, 29 Oct 2019 10:30:03 +0100 Subject: [PATCH 0216/3715] nl80211: fix validation of mesh path nexthop commit 1fab1b89e2e8f01204a9c05a39fd0b6411a48593 upstream. Mesh path nexthop should be a ethernet address, but current validation checks against 4 byte integers. Cc: stable@vger.kernel.org Fixes: 2ec600d672e74 ("nl80211/cfg80211: support for mesh, sta dumping") Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20191029093003.10355-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ff31feeee8e3..9627c52c3f93 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -283,7 +283,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, - [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_BINARY, + .len = ETH_ALEN }, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, From 119e9aef452a6b6976d20dac8f35aa2dc3e01348 Mon Sep 17 00:00:00 2001 From: Yihui ZENG Date: Fri, 25 Oct 2019 12:31:48 +0300 Subject: [PATCH 0217/3715] s390/cmm: fix information leak in cmm_timeout_handler() commit b8e51a6a9db94bc1fb18ae831b3dab106b5a4b5f upstream. The problem is that we were putting the NUL terminator too far: buf[sizeof(buf) - 1] = '\0'; If the user input isn't NUL terminated and they haven't initialized the whole buffer then it leads to an info leak. The NUL terminator should be: buf[len - 1] = '\0'; Signed-off-by: Yihui Zeng Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter [heiko.carstens@de.ibm.com: keep semantics of how *lenp and *ppos are handled] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/cmm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 829c63dbc81a..c0e96bdac80a 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -307,16 +307,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, } if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) + len = min(*lenp, sizeof(buf)); + if (copy_from_user(buf, buffer, len)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; + buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(nr, seconds); + *ppos += *lenp; } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -324,9 +324,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, len = *lenp; if (copy_to_user(buffer, buf, len)) return -EFAULT; + *lenp = len; + *ppos += len; } - *lenp = len; - *ppos += len; return 0; } From c12fc95b876904ef2e8037554eb395609974bbea Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Oct 2019 11:03:27 +0100 Subject: [PATCH 0218/3715] s390/idle: fix cpu idle time calculation commit 3d7efa4edd07be5c5c3ffa95ba63e97e070e1f3f upstream. The idle time reported in /proc/stat sometimes incorrectly contains huge values on s390. This is caused by a bug in arch_cpu_idle_time(). The kernel tries to figure out when a different cpu entered idle by accessing its per-cpu data structure. There is an ordering problem: if the remote cpu has an idle_enter value which is not zero, and an idle_exit value which is zero, it is assumed it is idle since "now". The "now" timestamp however is taken before the idle_enter value is read. Which in turn means that "now" can be smaller than idle_enter of the remote cpu. Unconditionally subtracting idle_enter from "now" can thus lead to a negative value (aka large unsigned value). Fix this by moving the get_tod_clock() invocation out of the loop. While at it also make the code a bit more readable. A similar bug also exists for show_idle_time(). Fix this is as well. Cc: Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/idle.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) From 343e5699b74e8b9806b8627d59aa00243f5c96c8 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 29 Oct 2019 15:30:51 +0000 Subject: [PATCH 0219/3715] arm64: Ensure VM_WRITE|VM_SHARED ptes are clean by default commit aa57157be69fb599bd4c38a4b75c5aad74a60ec0 upstream. Shared and writable mappings (__S.1.) should be clean (!dirty) initially and made dirty on a subsequent write either through the hardware DBM (dirty bit management) mechanism or through a write page fault. A clean pte for the arm64 kernel is one that has PTE_RDONLY set and PTE_DIRTY clear. The PAGE_SHARED{,_EXEC} attributes have PTE_WRITE set (PTE_DBM) and PTE_DIRTY clear. Prior to commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"), it was the responsibility of set_pte_at() to set the PTE_RDONLY bit and mark the pte clean if the software PTE_DIRTY bit was not set. However, the above commit removed the pte_sw_dirty() check and the subsequent setting of PTE_RDONLY in set_pte_at() while leaving the PAGE_SHARED{,_EXEC} definitions unchanged. The result is that shared+writable mappings are now dirty by default Fix the above by explicitly setting PTE_RDONLY in PAGE_SHARED{,_EXEC}. In addition, remove the superfluous PTE_DIRTY bit from the kernel PROT_* attributes. Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Cc: # 4.14.x- Cc: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable-prot.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2db84df5eb42..20e45733afa4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -43,11 +43,11 @@ #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -71,8 +71,9 @@ #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) From 1b940dd55d42133791d7ba4841adfcf436fab98e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 18 Oct 2019 07:43:21 -0400 Subject: [PATCH 0220/3715] rtlwifi: Fix potential overflow on P2P code commit 8c55dedb795be8ec0cf488f98c03a1c2176f7fb1 upstream. Nicolas Waisman noticed that even though noa_len is checked for a compatible length it's still possible to overrun the buffers of p2pinfo since there's no check on the upper bound of noa_num. Bound noa_num against P2P_MAX_NOA_NUM. Reported-by: Nicolas Waisman Signed-off-by: Laura Abbott Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/ps.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index f6d00613c53d..e1297809535f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -774,6 +774,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == @@ -868,6 +871,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == From 7a7313ae7163cb2c206abf071a4e098a3ba7f140 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 23 Oct 2019 08:31:38 -0700 Subject: [PATCH 0221/3715] dmaengine: cppi41: Fix cppi41_dma_prep_slave_sg() when idle commit bacdcb6675e170bb2e8d3824da220e10274f42a7 upstream. Yegor Yefremov reported that musb and ftdi uart can fail for the first open of the uart unless connected using a hub. This is because the first dma call done by musb_ep_program() must wait if cppi41 is PM runtime suspended. Otherwise musb_ep_program() continues with other non-dma packets before the DMA transfer is started causing at least ftdi uarts to fail to receive data. Let's fix the issue by waking up cppi41 with PM runtime calls added to cppi41_dma_prep_slave_sg() and return NULL if still idled. This way we have musb_ep_program() continue with PIO until cppi41 is awake. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Reported-by: Yegor Yefremov Signed-off-by: Tony Lindgren Cc: stable@vger.kernel.org # v4.9+ Link: https://lore.kernel.org/r/20191023153138.23442-1-tony@atomide.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/cppi41.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index ddd4a3932127..cf119a8ccdd5 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -585,9 +585,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( enum dma_transfer_direction dir, unsigned long tx_flags, void *context) { struct cppi41_channel *c = to_cpp41_chan(chan); + struct dma_async_tx_descriptor *txd = NULL; + struct cppi41_dd *cdd = c->cdd; struct cppi41_desc *d; struct scatterlist *sg; unsigned int i; + int error; + + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + + return NULL; + } + + if (cdd->is_suspended) + goto err_out_not_ready; d = c->desc; for_each_sg(sgl, sg, sg_len, i) { @@ -610,7 +623,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( d++; } - return &c->txd; + txd = &c->txd; + +err_out_not_ready: + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); + + return txd; } static void cppi41_compute_td_desc(struct cppi41_desc *d) From d9139c010ab57713eca549542a6765e9ec3eb7e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:24 -0700 Subject: [PATCH 0222/3715] llc: fix sk_buff leak in llc_sap_state_process() commit c6ee11c39fcc1fb55130748990a8f199e76263b4 upstream. syzbot reported: BUG: memory leak unreferenced object 0xffff888116270800 (size 224): comm "syz-executor641", pid 7047, jiffies 4294947360 (age 13.860s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 20 e1 2a 81 88 ff ff 00 40 3d 2a 81 88 ff ff . .*.....@=*.... backtrace: [<000000004d41b4cc>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<000000004d41b4cc>] slab_post_alloc_hook mm/slab.h:439 [inline] [<000000004d41b4cc>] slab_alloc_node mm/slab.c:3269 [inline] [<000000004d41b4cc>] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579 [<00000000506a5965>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198 [<000000001ba5a161>] alloc_skb include/linux/skbuff.h:1058 [inline] [<000000001ba5a161>] alloc_skb_with_frags+0x5f/0x250 net/core/skbuff.c:5327 [<0000000047d9c78b>] sock_alloc_send_pskb+0x269/0x2a0 net/core/sock.c:2225 [<000000003828fe54>] sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2242 [<00000000e34d94f9>] llc_ui_sendmsg+0x10a/0x540 net/llc/af_llc.c:933 [<00000000de2de3fb>] sock_sendmsg_nosec net/socket.c:652 [inline] [<00000000de2de3fb>] sock_sendmsg+0x54/0x70 net/socket.c:671 [<000000008fe16e7a>] __sys_sendto+0x148/0x1f0 net/socket.c:1964 [...] The bug is that llc_sap_state_process() always takes an extra reference to the skb, but sometimes neither llc_sap_next_state() nor llc_sap_state_process() itself drops this reference. Fix it by changing llc_sap_next_state() to never consume a reference to the skb, rather than sometimes do so and sometimes not. Then remove the extra skb_get() and kfree_skb() from llc_sap_state_process(). Reported-by: syzbot+6bf095f9becf5efef645@syzkaller.appspotmail.com Reported-by: syzbot+31c16aa4202dace3812e@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/llc/llc_s_ac.c | 12 +++++++++--- net/llc/llc_sap.c | 23 ++++++++--------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index a94bd56bcac6..7ae4cc684d3a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7534950e60a 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -197,29 +197,22 @@ out: * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. + * + * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - /* - * We have to hold the skb, because llc_sap_next_state - * will kfree it in the sending path and we need to - * look at the skb->cb, where we encode llc_sap_state_ev. - */ - skb_get(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); - if (ev->ind_cfm_flag == LLC_IND) { - if (skb->sk->sk_state == TCP_LISTEN) - kfree_skb(skb); - else { - llc_save_primitive(skb->sk, skb, ev->prim); - /* queue skb to the user. */ - if (sock_queue_rcv_skb(skb->sk, skb)) - kfree_skb(skb); - } + if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { + llc_save_primitive(skb->sk, skb, ev->prim); + + /* queue skb to the user. */ + if (sock_queue_rcv_skb(skb->sk, skb) == 0) + return; } kfree_skb(skb); } From c87091ed19935f90b6cfefd8e984c41b47caed65 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:25 -0700 Subject: [PATCH 0223/3715] llc: fix sk_buff leak in llc_conn_service() commit b74555de21acd791f12c4a1aeaf653dd7ac21133 upstream. syzbot reported: BUG: memory leak unreferenced object 0xffff88811eb3de00 (size 224): comm "syz-executor559", pid 7315, jiffies 4294943019 (age 10.300s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 a0 38 24 81 88 ff ff 00 c0 f2 15 81 88 ff ff ..8$............ backtrace: [<000000008d1c66a1>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<000000008d1c66a1>] slab_post_alloc_hook mm/slab.h:439 [inline] [<000000008d1c66a1>] slab_alloc_node mm/slab.c:3269 [inline] [<000000008d1c66a1>] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579 [<00000000447d9496>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198 [<000000000cdbf82f>] alloc_skb include/linux/skbuff.h:1058 [inline] [<000000000cdbf82f>] llc_alloc_frame+0x66/0x110 net/llc/llc_sap.c:54 [<000000002418b52e>] llc_conn_ac_send_sabme_cmd_p_set_x+0x2f/0x140 net/llc/llc_c_ac.c:777 [<000000001372ae17>] llc_exec_conn_trans_actions net/llc/llc_conn.c:475 [inline] [<000000001372ae17>] llc_conn_service net/llc/llc_conn.c:400 [inline] [<000000001372ae17>] llc_conn_state_process+0x1ac/0x640 net/llc/llc_conn.c:75 [<00000000f27e53c1>] llc_establish_connection+0x110/0x170 net/llc/llc_if.c:109 [<00000000291b2ca0>] llc_ui_connect+0x10e/0x370 net/llc/af_llc.c:477 [<000000000f9c740b>] __sys_connect+0x11d/0x170 net/socket.c:1840 [...] The bug is that most callers of llc_conn_send_pdu() assume it consumes a reference to the skb, when actually due to commit b85ab56c3f81 ("llc: properly handle dev_queue_xmit() return value") it doesn't. Revert most of that commit, and instead make the few places that need llc_conn_send_pdu() to *not* consume a reference call skb_get() before. Fixes: b85ab56c3f81 ("llc: properly handle dev_queue_xmit() return value") Reported-by: syzbot+6b825a6494a04cc0e3f7@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/llc_conn.h | 2 +- net/llc/llc_c_ac.c | 8 ++++++-- net/llc/llc_conn.c | 32 +++++++++----------------------- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index df528a623548..ea985aa7a6c5 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 4b60f68cb492..8354ae40ec85 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; @@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 56c3fb5cc805..444c13e752a0 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -30,7 +30,7 @@ #endif static int llc_find_offset(int state, int ev_type); -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb); +static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, struct llc_conn_state_trans *trans, @@ -193,11 +193,11 @@ out_skb_put: return rc; } -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) { /* queue PDU to send to MAC layer */ skb_queue_tail(&sk->sk_write_queue, skb); - return llc_conn_send_pdus(sk, skb); + llc_conn_send_pdus(sk); } /** @@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -340,16 +340,12 @@ out: /** * llc_conn_send_pdus - Sends queued PDUs * @sk: active connection - * @hold_skb: the skb held by caller, or NULL if does not care * - * Sends queued pdus to MAC layer for transmission. When @hold_skb is - * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent - * successfully, or 1 for failure. + * Sends queued pdus to MAC layer for transmission. */ -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) +static void llc_conn_send_pdus(struct sock *sk) { struct sk_buff *skb; - int ret = 0; while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -361,20 +357,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb); if (!skb2) break; - dev_queue_xmit(skb2); - } else { - bool is_target = skb == hold_skb; - int rc; - - if (is_target) - skb_get(skb); - rc = dev_queue_xmit(skb); - if (is_target) - ret = rc; + skb = skb2; } + dev_queue_xmit(skb); } - - return ret; } /** From 43159c9ec156e7363ba24528fced7a9d7b2f3134 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:28 +0100 Subject: [PATCH 0224/3715] rxrpc: Fix call ref leak commit c48fc11b69e95007109206311b0187a3090591f3 upstream. When sendmsg() finds a call to continue on with, if the call is in an inappropriate state, it doesn't release the ref it just got on that call before returning an error. This causes the following symptom to show up with kasan: BUG: KASAN: use-after-free in rxrpc_send_keepalive+0x8a2/0x940 net/rxrpc/output.c:635 Read of size 8 at addr ffff888064219698 by task kworker/0:3/11077 where line 635 is: whdr.epoch = htonl(peer->local->rxnet->epoch); The local endpoint (which cannot be pinned by the call) has been released, but not the peer (which is pinned by the call). Fix this by releasing the call in the error path. Fixes: 37411cad633f ("rxrpc: Fix potential NULL-pointer exception") Reported-by: syzbot+d850c266e3df14da1d31@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/sendmsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 016e293681b8..a980b49d7a4f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -586,6 +586,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: case RXRPC_CALL_SERVER_ACCEPTING: + rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; default: From 2a571bd399084762a67519f35ef586407fbd3cb2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Oct 2019 18:40:59 +0200 Subject: [PATCH 0225/3715] NFC: pn533: fix use-after-free and memleaks commit 6af3aa57a0984e061f61308fe181a9a12359fecc upstream. The driver would fail to deregister and its class device and free related resources on late probe errors. Reported-by: syzbot+cb035c75c03dbe34b796@syzkaller.appspotmail.com Fixes: 32ecc75ded72 ("NFC: pn533: change order operations in dev registation") Signed-off-by: Johan Hovold Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn533/usb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 5d823e965883..fcb57d64d97e 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -559,18 +559,25 @@ static int pn533_usb_probe(struct usb_interface *interface, rc = pn533_finalize_setup(priv); if (rc) - goto error; + goto err_deregister; usb_set_intfdata(interface, phy); return 0; +err_deregister: + pn533_unregister_device(phy->priv); error: + usb_kill_urb(phy->in_urb); + usb_kill_urb(phy->out_urb); + usb_kill_urb(phy->ack_urb); + usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); + kfree(phy->ack_buffer); return rc; } From 5f1b77f0137b62e781a0d7a7cf7bb94157da9d9c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2019 15:43:01 -0700 Subject: [PATCH 0226/3715] bonding: fix potential NULL deref in bond_update_slave_arr commit a7137534b597b7c303203e6bc3ed87e87a273bb8 upstream. syzbot got a NULL dereference in bond_update_slave_arr() [1], happening after a failure to allocate bond->slave_arr A workqueue (bond_slave_arr_handler) is supposed to retry the allocation later, but if the slave is removed before the workqueue had a chance to complete, bond->slave_arr can still be NULL. [1] Failed to build slave-array. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI Modules linked in: Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:bond_update_slave_arr.cold+0xc6/0x198 drivers/net/bonding/bond_main.c:4039 RSP: 0018:ffff88018fe33678 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffc9000290b000 RDX: 0000000000000000 RSI: ffffffff82b63037 RDI: ffff88019745ea20 RBP: ffff88018fe33760 R08: ffff880170754280 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88019745ea00 R14: 0000000000000000 R15: ffff88018fe338b0 FS: 00007febd837d700(0000) GS:ffff8801dad00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004540a0 CR3: 00000001c242e005 CR4: 00000000001626f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] __bond_release_one+0x43e/0x500 drivers/net/bonding/bond_main.c:1923 [] bond_release drivers/net/bonding/bond_main.c:2039 [inline] [] bond_do_ioctl+0x416/0x870 drivers/net/bonding/bond_main.c:3562 [] dev_ifsioc+0x6f4/0x940 net/core/dev_ioctl.c:328 [] dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:495 [] sock_do_ioctl+0x1bd/0x300 net/socket.c:1088 [] sock_ioctl+0x300/0x5d0 net/socket.c:1196 [] vfs_ioctl fs/ioctl.c:47 [inline] [] file_ioctl fs/ioctl.c:501 [inline] [] do_vfs_ioctl+0xacb/0x1300 fs/ioctl.c:688 [] SYSC_ioctl fs/ioctl.c:705 [inline] [] SyS_ioctl+0xb6/0xe0 fs/ioctl.c:696 [] do_syscall_64+0x528/0x770 arch/x86/entry/common.c:305 [] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: ee6377147409 ("bonding: Simplify the xmit function for modes that use xmit_hash") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Mahesh Bandewar Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c1eeba1906fd..1cc4c99aa504 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3992,7 +3992,7 @@ out: * this to-be-skipped slave to send a packet out. */ old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; idx < old_arr->count; idx++) { + for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { if (skipslave == old_arr->arr[idx]) { old_arr->arr[idx] = old_arr->arr[old_arr->count-1]; From 18efc2e3889e4d4a0e4901d342fc7206a30c0b9e Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Tue, 15 Oct 2019 22:20:20 +0200 Subject: [PATCH 0227/3715] net: usb: sr9800: fix uninitialized local variable commit 77b6d09f4ae66d42cd63b121af67780ae3d1a5e9 upstream. Make sure res does not contain random value if the call to sr_read_cmd fails for some reason. Reported-by: syzbot+f1842130bbcfb335bac1@syzkaller.appspotmail.com Signed-off-by: Valentin Vidic Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/sr9800.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 35f39f23d881..8f8c9ede88c2 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -336,7 +336,7 @@ static void sr_set_multicast(struct net_device *net) static int sr_mdio_read(struct net_device *net, int phy_id, int loc) { struct usbnet *dev = netdev_priv(net); - __le16 res; + __le16 res = 0; mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); From cd7d823f30fa290c9a28274ed16a42c7721049f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2019 13:11:26 -0700 Subject: [PATCH 0228/3715] sch_netem: fix rcu splat in netem_enqueue() commit 159d2c7d8106177bd9a986fd005a311fe0d11285 upstream. qdisc_root() use from netem_enqueue() triggers a lockdep warning. __dev_queue_xmit() uses rcu_read_lock_bh() which is not equivalent to rcu_read_lock() + local_bh_disable_bh as far as lockdep is concerned. WARNING: suspicious RCU usage 5.3.0-rc7+ #0 Not tainted ----------------------------- include/net/sch_generic.h:492 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syz-executor427/8855: #0: 00000000b5525c01 (rcu_read_lock_bh){....}, at: lwtunnel_xmit_redirect include/net/lwtunnel.h:92 [inline] #0: 00000000b5525c01 (rcu_read_lock_bh){....}, at: ip_finish_output2+0x2dc/0x2570 net/ipv4/ip_output.c:214 #1: 00000000b5525c01 (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x20a/0x3650 net/core/dev.c:3804 #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: spin_lock include/linux/spinlock.h:338 [inline] #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: __dev_xmit_skb net/core/dev.c:3502 [inline] #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: __dev_queue_xmit+0x14b8/0x3650 net/core/dev.c:3838 stack backtrace: CPU: 0 PID: 8855 Comm: syz-executor427 Not tainted 5.3.0-rc7+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5357 qdisc_root include/net/sch_generic.h:492 [inline] netem_enqueue+0x1cfb/0x2d80 net/sched/sch_netem.c:479 __dev_xmit_skb net/core/dev.c:3527 [inline] __dev_queue_xmit+0x15d2/0x3650 net/core/dev.c:3838 dev_queue_xmit+0x18/0x20 net/core/dev.c:3902 neigh_hh_output include/net/neighbour.h:500 [inline] neigh_output include/net/neighbour.h:509 [inline] ip_finish_output2+0x1726/0x2570 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x5fc/0xb90 net/ipv4/ip_output.c:290 ip_finish_output+0x38/0x1f0 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_mc_output+0x292/0xf40 net/ipv4/ip_output.c:417 dst_output include/net/dst.h:436 [inline] ip_local_out+0xbb/0x190 net/ipv4/ip_output.c:125 ip_send_skb+0x42/0xf0 net/ipv4/ip_output.c:1555 udp_send_skb.isra.0+0x6b2/0x1160 net/ipv4/udp.c:887 udp_sendmsg+0x1e96/0x2820 net/ipv4/udp.c:1174 inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x3e2/0x920 net/socket.c:2311 __sys_sendmmsg+0x1bf/0x4d0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2439 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 5 +++++ net/sched/sch_netem.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f59acacaa265..37876d842f2e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -305,6 +305,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) return q; } +static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) +{ + return rcu_dereference_bh(qdisc->dev_queue->qdisc); +} + static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) { return qdisc->dev_queue->qdisc_sleeping; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 787aa52e5991..6266121a03f9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -469,7 +469,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root(sch); + struct Qdisc *rootq = qdisc_root_bh(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; From 8afb9f5344c42d648e565df3239f8746a7a7ed8f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 20 May 2018 16:39:10 +0800 Subject: [PATCH 0229/3715] sctp: fix the issue that flags are ignored when using kernel_connect commit 644fbdeacf1d3edd366e44b8ba214de9d1dd66a9 upstream. Now sctp uses inet_dgram_connect as its proto_ops .connect, and the flags param can't be passed into its proto .connect where this flags is really needed. sctp works around it by getting flags from socket file in __sctp_connect. It works for connecting from userspace, as inherently the user sock has socket file and it passes f_flags as the flags param into the proto_ops .connect. However, the sock created by sock_create_kern doesn't have a socket file, and it passes the flags (like O_NONBLOCK) by using the flags param in kernel_connect, which calls proto_ops .connect later. So to fix it, this patch defines a new proto_ops .connect for sctp, sctp_inet_connect, which calls __sctp_connect() directly with this flags param. After this, the sctp's proto .connect can be removed. Note that sctp_inet_connect doesn't need to do some checks that are not needed for sctp, which makes thing better than with inet_dgram_connect. Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/sctp.h | 2 ++ net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 56 ++++++++++++++++++++++++++++------------- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 749a42882437..c713bd62428f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c */ +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7eb06fa75730..53a66ee1331f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -974,7 +974,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6af871b1c297..01f88e9abbc6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1019,7 +1019,7 @@ static const struct proto_ops inet_seqpacket_ops = { .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ .bind = inet_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a18e9be77216..c9c23ca1a4dc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1076,7 +1076,7 @@ out: */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, - int addrs_size, + int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct net *net = sock_net(sk); @@ -1094,7 +1094,6 @@ static int __sctp_connect(struct sock *sk, union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; - unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1244,13 +1243,7 @@ static int __sctp_connect(struct sock *sk, sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; - /* in-kernel sockets don't generally have a file allocated to them - * if all they do is call sock_create_kern(). - */ - if (sk->sk_socket->file) - f_flags = sk->sk_socket->file->f_flags; - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (assoc_id) *assoc_id = asoc->assoc_id; @@ -1345,7 +1338,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, { struct sockaddr *kaddrs; gfp_t gfp = GFP_KERNEL; - int err = 0; + int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1365,11 +1358,18 @@ static int __sctp_setsockopt_connectx(struct sock *sk, return -ENOMEM; if (__copy_from_user(kaddrs, addrs, addrs_size)) { - err = -EFAULT; - } else { - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + kfree(kaddrs); + return -EFAULT; } + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + flags = sk->sk_socket->file->f_flags; + + err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); + kfree(kaddrs); return err; @@ -4166,16 +4166,26 @@ out_nounlock: * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) + int addr_len, int flags) { - int err = 0; + struct inet_sock *inet = inet_sk(sk); struct sctp_af *af; + int err = 0; lock_sock(sk); pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); + /* We may need to bind the socket. */ + if (!inet->inet_num) { + if (sk->sk_prot->get_port(sk, 0)) { + release_sock(sk); + return -EAGAIN; + } + inet->inet_sport = htons(inet->inet_num); + } + /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { @@ -4184,13 +4194,25 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, /* Pass correct addr len to common routine (so it knows there * is only one address being passed. */ - err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); } release_sock(sk); return err; } +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return -EOPNOTSUPP; + + return sctp_connect(sock->sk, uaddr, addr_len, flags); +} + /* FIXME: Write comments. */ static int sctp_disconnect(struct sock *sk, int flags) { @@ -8298,7 +8320,6 @@ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8337,7 +8358,6 @@ struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, From 7c3c0d51129a1914e36f1942b1c226e894859f08 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jun 2019 16:31:39 +0800 Subject: [PATCH 0230/3715] sctp: not bind the socket in sctp_connect commit 9b6c08878e23adb7cc84bdca94d8a944b03f099e upstream. Now when sctp_connect() is called with a wrong sa_family, it binds to a port but doesn't set bp->port, then sctp_get_af_specific will return NULL and sctp_connect() returns -EINVAL. Then if sctp_bind() is called to bind to another port, the last port it has bound will leak due to bp->port is NULL by then. sctp_connect() doesn't need to bind ports, as later __sctp_connect will do it if bp->port is NULL. So remove it from sctp_connect(). While at it, remove the unnecessary sockaddr.sa_family len check as it's already done in sctp_inet_connect. Fixes: 644fbdeacf1d ("sctp: fix the issue that flags are ignored when using kernel_connect") Reported-by: syzbot+079bf326b38072f849d9@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c9c23ca1a4dc..4045d203b7d4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4168,34 +4168,17 @@ out_nounlock: static int sctp_connect(struct sock *sk, struct sockaddr *addr, int addr_len, int flags) { - struct inet_sock *inet = inet_sk(sk); struct sctp_af *af; - int err = 0; + int err = -EINVAL; lock_sock(sk); - pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); - /* We may need to bind the socket. */ - if (!inet->inet_num) { - if (sk->sk_prot->get_port(sk, 0)) { - release_sock(sk); - return -EAGAIN; - } - inet->inet_sport = htons(inet->inet_num); - } - /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); - if (!af || addr_len < af->sockaddr_len) { - err = -EINVAL; - } else { - /* Pass correct addr len to common routine (so it knows there - * is only one address being passed. - */ + if (af && addr_len >= af->sockaddr_len) err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); - } release_sock(sk); return err; From b195f26ab82529e8e8ea4525ce6aef5e694c3393 Mon Sep 17 00:00:00 2001 From: Vratislav Bendel Date: Tue, 6 Mar 2018 17:07:44 -0800 Subject: [PATCH 0231/3715] xfs: Correctly invert xfs_buftarg LRU isolation logic commit 19957a181608d25c8f4136652d0ea00b3738972d upstream. Due to an inverted logic mistake in xfs_buftarg_isolate() the xfs_buffers with zero b_lru_ref will take another trip around LRU, while isolating buffers with non-zero b_lru_ref. Additionally those isolated buffers end up right back on the LRU once they are released, because b_lru_ref remains elevated. Fix that circuitous route by leaving them on the LRU as originally intended. Signed-off-by: Vratislav Bendel Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Alex Lyakas Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 16f93d7356b7..e4a623956df5 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1702,7 +1702,7 @@ xfs_buftarg_isolate( * zero. If the value is already zero, we need to reclaim the * buffer, otherwise it gets another trip through the LRU. */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) { spin_unlock(&bp->b_lock); return LRU_ROTATE; } From b617db2ebf29bc60768df01a831d79582630ee8a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Mar 2019 17:11:10 +0100 Subject: [PATCH 0232/3715] ALSA: timer: Simplify error path in snd_timer_open() [ Upstream commit 41672c0c24a62699d20aab53b98d843b16483053 ] Just a minor refactoring to use the standard goto for error paths in snd_timer_open() instead of open code. The first mutex_lock() is moved to the beginning of the function to make the code clearer. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 2c0f292226d7..b50f7601cc2b 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -254,19 +254,20 @@ int snd_timer_open(struct snd_timer_instance **ti, struct snd_timer_instance *timeri = NULL; int err; + mutex_lock(®ister_mutex); if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) { /* open a slave instance */ if (tid->dev_sclass <= SNDRV_TIMER_SCLASS_NONE || tid->dev_sclass > SNDRV_TIMER_SCLASS_OSS_SEQUENCER) { pr_debug("ALSA: timer: invalid slave class %i\n", tid->dev_sclass); - return -EINVAL; + err = -EINVAL; + goto unlock; } - mutex_lock(®ister_mutex); timeri = snd_timer_instance_new(owner, NULL); if (!timeri) { - mutex_unlock(®ister_mutex); - return -ENOMEM; + err = -ENOMEM; + goto unlock; } timeri->slave_class = tid->dev_sclass; timeri->slave_id = tid->device; @@ -277,13 +278,10 @@ int snd_timer_open(struct snd_timer_instance **ti, snd_timer_close_locked(timeri); timeri = NULL; } - mutex_unlock(®ister_mutex); - *ti = timeri; - return err; + goto unlock; } /* open a master instance */ - mutex_lock(®ister_mutex); timer = snd_timer_find(tid); #ifdef CONFIG_MODULES if (!timer) { @@ -294,25 +292,26 @@ int snd_timer_open(struct snd_timer_instance **ti, } #endif if (!timer) { - mutex_unlock(®ister_mutex); - return -ENODEV; + err = -ENODEV; + goto unlock; } if (!list_empty(&timer->open_list_head)) { timeri = list_entry(timer->open_list_head.next, struct snd_timer_instance, open_list); if (timeri->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { - mutex_unlock(®ister_mutex); - return -EBUSY; + err = -EBUSY; + timeri = NULL; + goto unlock; } } if (timer->num_instances >= timer->max_instances) { - mutex_unlock(®ister_mutex); - return -EBUSY; + err = -EBUSY; + goto unlock; } timeri = snd_timer_instance_new(owner, timer); if (!timeri) { - mutex_unlock(®ister_mutex); - return -ENOMEM; + err = -ENOMEM; + goto unlock; } /* take a card refcount for safe disconnection */ if (timer->card) @@ -321,16 +320,16 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri->slave_id = slave_id; if (list_empty(&timer->open_list_head) && timer->hw.open) { - int err = timer->hw.open(timer); + err = timer->hw.open(timer); if (err) { kfree(timeri->owner); kfree(timeri); + timeri = NULL; if (timer->card) put_device(&timer->card->card_dev); module_put(timer->module); - mutex_unlock(®ister_mutex); - return err; + goto unlock; } } @@ -341,6 +340,8 @@ int snd_timer_open(struct snd_timer_instance **ti, snd_timer_close_locked(timeri); timeri = NULL; } + + unlock: mutex_unlock(®ister_mutex); *ti = timeri; return err; From 1ff779c530064839511d5b327cd2ade7a17a2651 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2019 22:42:57 +0100 Subject: [PATCH 0233/3715] ALSA: timer: Fix mutex deadlock at releasing card [ Upstream commit a39331867335d4a94b6165e306265c9e24aca073 ] When a card is disconnected while in use, the system waits until all opened files are closed then releases the card. This is done via put_device() of the card device in each device release code. The recently reported mutex deadlock bug happens in this code path; snd_timer_close() for the timer device deals with the global register_mutex and it calls put_device() there. When this timer device is the last one, the card gets freed and it eventually calls snd_timer_free(), which has again the protection with the global register_mutex -- boom. Basically put_device() call itself is race-free, so a relative simple workaround is to move this put_device() call out of the mutex. For achieving that, in this patch, snd_timer_close_locked() got a new argument to store the card device pointer in return, and each caller invokes put_device() with the returned object after the mutex unlock. Reported-and-tested-by: Kirill A. Shutemov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index b50f7601cc2b..161ab19cb722 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -240,7 +240,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master) return 0; } -static int snd_timer_close_locked(struct snd_timer_instance *timeri); +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put); /* * open a timer instance @@ -252,6 +253,7 @@ int snd_timer_open(struct snd_timer_instance **ti, { struct snd_timer *timer; struct snd_timer_instance *timeri = NULL; + struct device *card_dev_to_put = NULL; int err; mutex_lock(®ister_mutex); @@ -275,7 +277,7 @@ int snd_timer_open(struct snd_timer_instance **ti, list_add_tail(&timeri->open_list, &snd_timer_slave_list); err = snd_timer_check_slave(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } goto unlock; @@ -327,7 +329,7 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri = NULL; if (timer->card) - put_device(&timer->card->card_dev); + card_dev_to_put = &timer->card->card_dev; module_put(timer->module); goto unlock; } @@ -337,12 +339,15 @@ int snd_timer_open(struct snd_timer_instance **ti, timer->num_instances++; err = snd_timer_check_master(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } unlock: mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); *ti = timeri; return err; } @@ -352,7 +357,8 @@ EXPORT_SYMBOL(snd_timer_open); * close a timer instance * call this with register_mutex down. */ -static int snd_timer_close_locked(struct snd_timer_instance *timeri) +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put) { struct snd_timer *timer = NULL; struct snd_timer_instance *slave, *tmp; @@ -404,7 +410,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) timer->hw.close(timer); /* release a card refcount for safe disconnection */ if (timer->card) - put_device(&timer->card->card_dev); + *card_devp_to_put = &timer->card->card_dev; module_put(timer->module); } @@ -416,14 +422,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) */ int snd_timer_close(struct snd_timer_instance *timeri) { + struct device *card_dev_to_put = NULL; int err; if (snd_BUG_ON(!timeri)) return -ENXIO; mutex_lock(®ister_mutex); - err = snd_timer_close_locked(timeri); + err = snd_timer_close_locked(timeri, &card_dev_to_put); mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); return err; } EXPORT_SYMBOL(snd_timer_close); From 79fd25943d0fcedd032c0a337c8ff3e01eda999d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Oct 2019 09:10:56 +0100 Subject: [PATCH 0234/3715] Revert "ALSA: hda: Flush interrupts on disabling" [ Upstream commit 1a7f60b9df614bb36d14dc0c0bc898a31b2b506f ] This reverts commit caa8422d01e983782548648e125fd617cadcec3f. It turned out that this commit caused a regression at shutdown / reboot, as the synchronize_irq() calls seems blocking the whole shutdown. Also another part of the change about shuffling the call order looks suspicious; the azx_stop_chip() call disables the CORB / RIRB while the others may still need the CORB/RIRB update. Since the original commit itself was a cargo-fix, let's revert the whole patch. Fixes: caa8422d01e9 ("ALSA: hda: Flush interrupts on disabling") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=205333 BugLinK: https://bugs.freedesktop.org/show_bug.cgi?id=111174 Signed-off-by: Takashi Iwai Cc: Chris Wilson Link: https://lore.kernel.org/r/20191028081056.22010-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/hda/hdac_controller.c | 2 -- sound/pci/hda/hda_intel.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 3377f0bc2828..778b42ba90b8 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -442,8 +442,6 @@ static void azx_int_disable(struct hdac_bus *bus) list_for_each_entry(azx_dev, &bus->stream_list, list) snd_hdac_stream_updateb(azx_dev, SD_CTL, SD_INT_MASK, 0); - synchronize_irq(bus->irq); - /* disable SIE for all streams */ snd_hdac_chip_writeb(bus, INTCTL, 0); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b42ab80ee607..96e9b3944b92 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1413,9 +1413,9 @@ static int azx_free(struct azx *chip) } if (bus->chip_init) { - azx_stop_chip(chip); azx_clear_irq_pending(chip); azx_stop_all_streams(chip); + azx_stop_chip(chip); } if (bus->irq >= 0) From c9fda4f22428e09728b611ec9100157199039bfe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Nov 2019 12:43:42 +0100 Subject: [PATCH 0235/3715] Linux 4.14.152 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index db996459d047..1d7f47334ca2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 151 +SUBLEVEL = 152 EXTRAVERSION = NAME = Petit Gorille From f1bf45bfb925b9a11c4d2d029765ac6700ad3858 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Fri, 13 Sep 2019 08:25:31 -0700 Subject: [PATCH 0236/3715] ANDROID: regression introduced override_creds=off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Solve a regression introduced by commit 272fcd1ca7ceb252b1c3a2961110c7c1722707cf ("ANDROID: overlayfs: override_creds=off option bypass creator_cred") where a crash is observed a crash in ovl_create_or_link() when a simple re-direction command in vendor directory. /vendor/bin/ > /vendor/bin/test_log.txt 2>&1& After further debugging we see that if the output is redirected to a file which doesn’t exist we see this stack: [ 377.382745] ovl_create_or_link+0xac/0x710 [ 377.382745] ovl_create_object+0xb8/0x110 [ 377.382745] ovl_create+0x34/0x40 [ 377.382745] path_openat+0xd44/0x15a8 [ 377.382745] do_filp_open+0x80/0x128 [ 377.382745] do_sys_open+0x140/0x250 [ 377.382745] __arm64_sys_openat+0x2c/0x38 ovl_override_creds returns NULL because the override_cred flag is set to false. This causes ovl_revert_creds also to fail. There is another call to check override_cred in override_cred call which overrides the creds permanently as there no revert_creds associated. So whenever next commit_cred is called we see the crash as the credentials are permanently overridden. Signed-off-by: Mark Salyzyn Tested-by: Rishabh/Jeevan Bug: 140816499 Change-Id: Icd0d9be82fc57af5ead1eeab99f79adf3adf62ef --- fs/overlayfs/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f2bfc9b15c37..c232ee5c01b3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -469,7 +469,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, bool origin) { int err; - const struct cred *old_cred; + const struct cred *old_cred, *hold_cred = NULL; struct cred *override_cred; struct dentry *parent = dentry->d_parent; @@ -504,7 +504,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, goto out_revert_creds; } } - put_cred(override_creds(override_cred)); + hold_cred = override_creds(override_cred); put_cred(override_cred); if (!ovl_dentry_is_whiteout(dentry)) @@ -515,7 +515,9 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, hardlink); } out_revert_creds: - ovl_revert_creds(old_cred); + ovl_revert_creds(old_cred ?: hold_cred); + if (old_cred && hold_cred) + put_cred(hold_cred); if (!err) { struct inode *realinode = d_inode(ovl_dentry_upper(dentry)); From c26ebf5874b3bd5ed02f203a75f6b6e7f319db03 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 15 Jul 2019 15:35:13 -0700 Subject: [PATCH 0237/3715] ANDROID: Add optional __get xattr method paired to __vfs_getxattr Add an optional __get xattr method that would be called, if set, only in __vfs_getxattr instead of the regular get xattr method. Signed-off-by: Mark Salyzyn Bug: 133515582 Bug: 136124883 Bug: 129319403 Change-Id: If9f6cf3e0d964e77af769244bb1e8f6aee4f4445 --- fs/xattr.c | 11 ++++++++++- include/linux/xattr.h | 7 +++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 3e5d157d9b8c..35ead9b764c6 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -307,6 +307,9 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); + if (unlikely(handler->__get)) + return handler->__get(handler, dentry, inode, name, value, + size); if (!handler->get) return -EOPNOTSUPP; return handler->get(handler, dentry, inode, name, value, size); @@ -318,6 +321,7 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; + const struct xattr_handler *handler; error = xattr_permission(inode, name, MAY_READ); if (error) @@ -340,7 +344,12 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) return ret; } nolsm: - return __vfs_getxattr(dentry, inode, name, value, size); + handler = xattr_resolve_name(inode, &name); + if (IS_ERR(handler)) + return PTR_ERR(handler); + if (!handler->get) + return -EOPNOTSUPP; + return handler->get(handler, dentry, inode, name, value, size); } EXPORT_SYMBOL_GPL(vfs_getxattr); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d70f77a4b62a..7f538d232179 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -30,10 +30,13 @@ struct xattr_handler { const char *prefix; int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); - int (*get)(const struct xattr_handler *, struct dentry *dentry, + int (*get)(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); - int (*set)(const struct xattr_handler *, struct dentry *dentry, + int (*__get)(const struct xattr_handler *handler, struct dentry *dentry, + struct inode *inode, const char *name, void *buffer, + size_t size); + int (*set)(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; From f7fedd5c15c1e424a6f910362a928abe49855bf2 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 15 Jul 2019 15:46:45 -0700 Subject: [PATCH 0238/3715] ANDROID: overlayfs: add __get xattr method Because of the overlayfs getxattr recursion, the incoming inode fails to update the selinux sid resulting in avc denials being reported against a target context of u:object_r:unlabeled:s0. Solution is to add a _get xattr method that calls the __vfs_getxattr handler so that the context can be read in, rather than being denied with an -EACCES when vfs_getxattr handler is called. Signed-off-by: Mark Salyzyn Bug: 133515582 Bug: 136124883 Bug: 129319403 Change-Id: Ia39543c5ce617976f14d790fb88e471d575ffd65 --- fs/overlayfs/inode.c | 15 +++++++++++++++ fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/super.c | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 44f75736f29d..c078ae4b5a33 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -244,6 +244,21 @@ out: return err; } +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) +{ + ssize_t res; + const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); + + old_cred = ovl_override_creds(dentry->d_sb); + res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value, + size); + ovl_revert_creds(old_cred); + return res; +} + int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c0307da67a52..f4569f341f69 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -280,6 +280,8 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags); int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 46cb8314f903..01cd5a2cbe35 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -733,6 +733,14 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } +static int __maybe_unused +__ovl_posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return __ovl_xattr_get(dentry, inode, handler->name, buffer, size); +} + static int __maybe_unused ovl_posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, @@ -813,6 +821,13 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, return ovl_xattr_get(dentry, inode, name, buffer, size); } +static int __ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return __ovl_xattr_get(dentry, inode, name, buffer, size); +} + static int ovl_other_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -826,6 +841,7 @@ ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .get = ovl_posix_acl_xattr_get, + .__get = __ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -834,6 +850,7 @@ ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .get = ovl_posix_acl_xattr_get, + .__get = __ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -846,6 +863,7 @@ static const struct xattr_handler ovl_own_xattr_handler = { static const struct xattr_handler ovl_other_xattr_handler = { .prefix = "", /* catch all */ .get = ovl_other_xattr_get, + .__get = __ovl_other_xattr_get, .set = ovl_other_xattr_set, }; From 6e5cc2351dea9ef5e7db67de83dc7b3b97c9196c Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 23 Jul 2019 13:53:48 -0700 Subject: [PATCH 0239/3715] BACKPORT: ANDROID: overlayfs: internal getxattr operations without sepolicy checking Check impure, opaque, origin & meta xattr with no sepolicy audit (using __vfs_getxattr) since these operations are internal to overlayfs operations and do not disclose any data. This became an issue for credential override off since sys_admin would have been required by the caller; whereas would have been inherently present for the creator since it performed the mount. This is a change in operations since we do not check in the new ovl_vfs_getxattr function if the credential override is off or not. Reasoning is that the sepolicy check is unnecessary overhead, especially since the check can be expensive. (cherry picked from commit 9f32911c57f72aa76acf375c66f57c88a8516ef7 ("ANDROID: overlayfs: internal getxattr operations without sepolicy checking")) Signed-off-by: Mark Salyzyn Bug: 133515582 Bug: 136124883 Bug: 129319403 Change-Id: I34d99cc46e9e87a79efc8d05f85980bbc137f7eb --- fs/overlayfs/inode.c | 5 +++-- fs/overlayfs/namei.c | 17 +++++++++-------- fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/util.c | 10 ++++++++-- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index c078ae4b5a33..0743fb4cc3c0 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -539,12 +539,13 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, int nlink_diff; int nlink; char buf[13]; - int err; + ssize_t err; if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) return fallback; - err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); + err = ovl_vfs_getxattr(upperdentry, OVL_XATTR_NLINK, + &buf, sizeof(buf) - 1); if (err < 0) goto fail; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 2f08467880cf..009136588d0c 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -29,10 +29,10 @@ struct ovl_lookup_data { static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, size_t prelen, const char *post) { - int res; + ssize_t res; char *s, *next, *buf = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return 0; @@ -45,7 +45,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, if (res == 0) goto invalid; - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); if (res < 0) goto fail; if (res == 0) @@ -99,10 +99,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry) static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) { - int res; + ssize_t res; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -116,7 +116,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); if (res < 0) goto fail; @@ -142,10 +142,11 @@ out: return NULL; fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + pr_warn_ratelimited("overlayfs: failed to get origin (%zi)\n", res); goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", + (int)res, fh); goto out; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f4569f341f69..fdf83a10a9d3 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -188,6 +188,8 @@ void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); void ovl_revert_creds(const struct cred *oldcred); +ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); struct super_block *ovl_same_sb(struct super_block *sb); bool ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index ce179c2ce616..e2244c134c9c 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -52,6 +52,12 @@ void ovl_revert_creds(const struct cred *old_cred) revert_creds(old_cred); } +ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size) +{ + return __vfs_getxattr(dentry, d_inode(dentry), name, buf, size); +} + struct super_block *ovl_same_sb(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; @@ -339,13 +345,13 @@ void ovl_copy_up_end(struct dentry *dentry) bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) { - int res; + ssize_t res; char val; if (!d_is_dir(dentry)) return false; - res = vfs_getxattr(dentry, name, &val, 1); + res = ovl_vfs_getxattr(dentry, name, &val, 1); if (res == 1 && val == 'y') return true; From f40abacc8ac019606f670f4b62f1a633dac8316e Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 6 Nov 2019 09:15:00 -0800 Subject: [PATCH 0240/3715] ANDROID: overlayfs: fix printk format ssize_t value replace %i with %zi. Signed-off-by: Mark Salyzyn Bug: 133515582 Bug: 136124883 Bug: 129319403 Change-Id: I6e3d579b9e1fe66b2ba35913b6c73b41fdaf03ce --- fs/overlayfs/inode.c | 2 +- fs/overlayfs/namei.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 0743fb4cc3c0..ce48ed6ba052 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -567,7 +567,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, return nlink; fail: - pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%zi)\n", upperdentry, err); return fallback; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 009136588d0c..6071196a7b63 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -85,7 +85,7 @@ err_free: kfree(buf); return 0; fail: - pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); + pr_warn_ratelimited("overlayfs: failed to get redirect (%zi)\n", res); goto err_free; invalid: pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); From e373d231cba3ea7e93934c066ccd03e0c075ce6e Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Mon, 16 Apr 2018 14:27:02 +0200 Subject: [PATCH 0241/3715] UPSTREAM: HID: add driver for Valve Steam Controller There are two ways to connect the Steam Controller: directly to the USB or with the USB wireless adapter. Both methods are similar, but the wireless adapter can connect up to 4 devices at the same time. The wired device will appear as 3 interfaces: a virtual mouse, a virtual keyboard and a custom HID device. The wireless device will appear as 5 interfaces: a virtual keyboard and 4 custom HID devices, that will remain silent until a device is actually connected. The custom HID device has a report descriptor with all vendor specific usages, so the hid-generic is not very useful. In a PC/SteamBox Valve Steam Client provices a software translation by using hidraw and a creates a uinput virtual gamepad and XTest keyboard/mouse. This driver intercepts the hidraw usage, so it can get out of the way when the Steam Client is in use. (cherry picked from commit c164d6abf3841ffacfdb757c10616f9cb1f67276 ("HID: add driver for Valve Steam Controller")) https://github.com/torvalds/linux/commit/c164d6abf3841ffacfdb757c10616f9cb1f67276 Signed-off-by: Rodrigo Rivas Costa Signed-off-by: Jiri Kosina Change-Id: Ic435163998560885afb2a6d71b1604de21fb8b11 Signed-Off-By: Siarhei Vishniakou Bug: 136263708 Test: connected steam controller to Android --- drivers/hid/Kconfig | 8 + drivers/hid/Makefile | 1 + drivers/hid/hid-core.c | 5 + drivers/hid/hid-ids.h | 5 + drivers/hid/hid-steam.c | 973 ++++++++++++++++++++++++++++++++++++++++ include/linux/hid.h | 1 + 6 files changed, 993 insertions(+) create mode 100644 drivers/hid/hid-steam.c diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index eca4c9d97110..e51c529035cb 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -813,6 +813,14 @@ config HID_SPEEDLINK ---help--- Support for Speedlink Vicious and Divine Cezanne mouse. +config HID_STEAM + tristate "Steam Controller support" + depends on HID + ---help--- + Say Y here if you have a Steam Controller if you want to use it + without running the Steam Client. It supports both the wired and + the wireless adaptor. + config HID_STEELSERIES tristate "Steelseries SRW-S1 steering wheel support" depends on HID diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index 235bd2a7b333..e146c257285a 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_HID_SAMSUNG) += hid-samsung.o obj-$(CONFIG_HID_SMARTJOYPLUS) += hid-sjoy.o obj-$(CONFIG_HID_SONY) += hid-sony.o obj-$(CONFIG_HID_SPEEDLINK) += hid-speedlink.o +obj-$(CONFIG_HID_STEAM) += hid-steam.o obj-$(CONFIG_HID_STEELSERIES) += hid-steelseries.o obj-$(CONFIG_HID_SUNPLUS) += hid-sunplus.o obj-$(CONFIG_HID_GREENASIA) += hid-gaff.o diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0b0fa257299d..4502719debe8 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2387,6 +2387,11 @@ static const struct hid_device_id hid_have_special_driver[] = { #if IS_ENABLED(CONFIG_HID_UDRAW_PS3) { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) }, #endif +#if IS_ENABLED(CONFIG_HID_STEAM) + { HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER) }, + { HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER_BT) }, +#endif #if IS_ENABLED(CONFIG_HID_WALTOP) { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1e2e6e58256a..9872ae83df5f 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -996,6 +996,11 @@ #define USB_VENDOR_ID_STANTUM_SITRONIX 0x1403 #define USB_DEVICE_ID_MTP_SITRONIX 0x5001 +#define USB_VENDOR_ID_VALVE 0x28de +#define USB_DEVICE_ID_STEAM_CONTROLLER 0x1102 +#define USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS 0x1142 +#define USB_DEVICE_ID_STEAM_CONTROLLER_BT 0x1106 + #define USB_VENDOR_ID_STEELSERIES 0x1038 #define USB_DEVICE_ID_STEELSERIES_SRWS1 0x1410 diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c new file mode 100644 index 000000000000..8f999b3baf94 --- /dev/null +++ b/drivers/hid/hid-steam.c @@ -0,0 +1,973 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HID driver for Valve Steam Controller + * + * Copyright (c) 2018 Rodrigo Rivas Costa + * + * Supports both the wired and wireless interfaces. + * + * This controller has a builtin emulation of mouse and keyboard: the right pad + * can be used as a mouse, the shoulder buttons are mouse buttons, A and B + * buttons are ENTER and ESCAPE, and so on. This is implemented as additional + * HID interfaces. + * + * This is known as the "lizard mode", because apparently lizards like to use + * the computer from the coach, without a proper mouse and keyboard. + * + * This driver will disable the lizard mode when the input device is opened + * and re-enable it when the input device is closed, so as not to break user + * mode behaviour. The lizard_mode parameter can be used to change that. + * + * There are a few user space applications (notably Steam Client) that use + * the hidraw interface directly to create input devices (XTest, uinput...). + * In order to avoid breaking them this driver creates a layered hidraw device, + * so it can detect when the client is running and then: + * - it will not send any command to the controller. + * - this input device will be disabled, to avoid double input of the same + * user action. + * + * For additional functions, such as changing the right-pad margin or switching + * the led, you can use the user-space tool at: + * + * https://github.com/rodrigorc/steamctrl + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hid-ids.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rodrigo Rivas Costa "); + +static bool lizard_mode = true; + +static DEFINE_MUTEX(steam_devices_lock); +static LIST_HEAD(steam_devices); + +#define STEAM_QUIRK_WIRELESS BIT(0) + +/* Touch pads are 40 mm in diameter and 65535 units */ +#define STEAM_PAD_RESOLUTION 1638 +/* Trigger runs are about 5 mm and 256 units */ +#define STEAM_TRIGGER_RESOLUTION 51 +/* Joystick runs are about 5 mm and 256 units */ +#define STEAM_JOYSTICK_RESOLUTION 51 + +#define STEAM_PAD_FUZZ 256 + +/* + * Commands that can be sent in a feature report. + * Thanks to Valve for some valuable hints. + */ +#define STEAM_CMD_SET_MAPPINGS 0x80 +#define STEAM_CMD_CLEAR_MAPPINGS 0x81 +#define STEAM_CMD_GET_MAPPINGS 0x82 +#define STEAM_CMD_GET_ATTRIB 0x83 +#define STEAM_CMD_GET_ATTRIB_LABEL 0x84 +#define STEAM_CMD_DEFAULT_MAPPINGS 0x85 +#define STEAM_CMD_FACTORY_RESET 0x86 +#define STEAM_CMD_WRITE_REGISTER 0x87 +#define STEAM_CMD_CLEAR_REGISTER 0x88 +#define STEAM_CMD_READ_REGISTER 0x89 +#define STEAM_CMD_GET_REGISTER_LABEL 0x8a +#define STEAM_CMD_GET_REGISTER_MAX 0x8b +#define STEAM_CMD_GET_REGISTER_DEFAULT 0x8c +#define STEAM_CMD_SET_MODE 0x8d +#define STEAM_CMD_DEFAULT_MOUSE 0x8e +#define STEAM_CMD_FORCEFEEDBAK 0x8f +#define STEAM_CMD_REQUEST_COMM_STATUS 0xb4 +#define STEAM_CMD_GET_SERIAL 0xae + +/* Some useful register ids */ +#define STEAM_REG_LPAD_MODE 0x07 +#define STEAM_REG_RPAD_MODE 0x08 +#define STEAM_REG_RPAD_MARGIN 0x18 +#define STEAM_REG_LED 0x2d +#define STEAM_REG_GYRO_MODE 0x30 + +/* Raw event identifiers */ +#define STEAM_EV_INPUT_DATA 0x01 +#define STEAM_EV_CONNECT 0x03 +#define STEAM_EV_BATTERY 0x04 + +/* Values for GYRO_MODE (bitmask) */ +#define STEAM_GYRO_MODE_OFF 0x0000 +#define STEAM_GYRO_MODE_STEERING 0x0001 +#define STEAM_GYRO_MODE_TILT 0x0002 +#define STEAM_GYRO_MODE_SEND_ORIENTATION 0x0004 +#define STEAM_GYRO_MODE_SEND_RAW_ACCEL 0x0008 +#define STEAM_GYRO_MODE_SEND_RAW_GYRO 0x0010 + +/* Other random constants */ +#define STEAM_SERIAL_LEN 10 + +struct steam_device { + struct list_head list; + spinlock_t lock; + struct hid_device *hdev, *client_hdev; + struct mutex mutex; + bool client_opened, input_opened; + struct input_dev __rcu *input; + unsigned long quirks; + struct work_struct work_connect; + bool connected; + char serial_no[STEAM_SERIAL_LEN + 1]; +}; + +static int steam_recv_report(struct steam_device *steam, + u8 *data, int size) +{ + struct hid_report *r; + u8 *buf; + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (hid_report_len(r) < 64) + return -EINVAL; + + buf = hid_alloc_report_buf(r, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * The report ID is always 0, so strip the first byte from the output. + * hid_report_len() is not counting the report ID, so +1 to the length + * or else we get a EOVERFLOW. We are safe from a buffer overflow + * because hid_alloc_report_buf() allocates +7 bytes. + */ + ret = hid_hw_raw_request(steam->hdev, 0x00, + buf, hid_report_len(r) + 1, + HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + if (ret > 0) + memcpy(data, buf + 1, min(size, ret - 1)); + kfree(buf); + return ret; +} + +static int steam_send_report(struct steam_device *steam, + u8 *cmd, int size) +{ + struct hid_report *r; + u8 *buf; + unsigned int retries = 50; + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (hid_report_len(r) < 64) + return -EINVAL; + + buf = hid_alloc_report_buf(r, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* The report ID is always 0 */ + memcpy(buf + 1, cmd, size); + + /* + * Sometimes the wireless controller fails with EPIPE + * when sending a feature report. + * Doing a HID_REQ_GET_REPORT and waiting for a while + * seems to fix that. + */ + do { + ret = hid_hw_raw_request(steam->hdev, 0, + buf, size + 1, + HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + if (ret != -EPIPE) + break; + msleep(20); + } while (--retries); + + kfree(buf); + if (ret < 0) + hid_err(steam->hdev, "%s: error %d (%*ph)\n", __func__, + ret, size, cmd); + return ret; +} + +static inline int steam_send_report_byte(struct steam_device *steam, u8 cmd) +{ + return steam_send_report(steam, &cmd, 1); +} + +static int steam_write_registers(struct steam_device *steam, + /* u8 reg, u16 val */...) +{ + /* Send: 0x87 len (reg valLo valHi)* */ + u8 reg; + u16 val; + u8 cmd[64] = {STEAM_CMD_WRITE_REGISTER, 0x00}; + va_list args; + + va_start(args, steam); + for (;;) { + reg = va_arg(args, int); + if (reg == 0) + break; + val = va_arg(args, int); + cmd[cmd[1] + 2] = reg; + cmd[cmd[1] + 3] = val & 0xff; + cmd[cmd[1] + 4] = val >> 8; + cmd[1] += 3; + } + va_end(args); + + return steam_send_report(steam, cmd, 2 + cmd[1]); +} + +static int steam_get_serial(struct steam_device *steam) +{ + /* + * Send: 0xae 0x15 0x01 + * Recv: 0xae 0x15 0x01 serialnumber (10 chars) + */ + int ret; + u8 cmd[] = {STEAM_CMD_GET_SERIAL, 0x15, 0x01}; + u8 reply[3 + STEAM_SERIAL_LEN + 1]; + + ret = steam_send_report(steam, cmd, sizeof(cmd)); + if (ret < 0) + return ret; + ret = steam_recv_report(steam, reply, sizeof(reply)); + if (ret < 0) + return ret; + if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != 0x01) + return -EIO; + reply[3 + STEAM_SERIAL_LEN] = 0; + strlcpy(steam->serial_no, reply + 3, sizeof(steam->serial_no)); + return 0; +} + +/* + * This command requests the wireless adaptor to post an event + * with the connection status. Useful if this driver is loaded when + * the controller is already connected. + */ +static inline int steam_request_conn_status(struct steam_device *steam) +{ + return steam_send_report_byte(steam, STEAM_CMD_REQUEST_COMM_STATUS); +} + +static void steam_set_lizard_mode(struct steam_device *steam, bool enable) +{ + if (enable) { + /* enable esc, enter, cursors */ + steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS); + /* enable mouse */ + steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MOUSE); + steam_write_registers(steam, + STEAM_REG_RPAD_MARGIN, 0x01, /* enable margin */ + 0); + } else { + /* disable esc, enter, cursor */ + steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS); + steam_write_registers(steam, + STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */ + STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */ + 0); + } +} + +static void steam_update_lizard_mode(struct steam_device *steam) +{ + mutex_lock(&steam->mutex); + if (!steam->client_opened) { + if (steam->input_opened) + steam_set_lizard_mode(steam, false); + else + steam_set_lizard_mode(steam, lizard_mode); + } + mutex_unlock(&steam->mutex); +} + +static int steam_input_open(struct input_dev *dev) +{ + struct steam_device *steam = input_get_drvdata(dev); + int ret; + + ret = hid_hw_open(steam->hdev); + if (ret) + return ret; + + mutex_lock(&steam->mutex); + steam->input_opened = true; + if (!steam->client_opened && lizard_mode) + steam_set_lizard_mode(steam, false); + mutex_unlock(&steam->mutex); + return 0; +} + +static void steam_input_close(struct input_dev *dev) +{ + struct steam_device *steam = input_get_drvdata(dev); + + mutex_lock(&steam->mutex); + steam->input_opened = false; + if (!steam->client_opened && lizard_mode) + steam_set_lizard_mode(steam, true); + mutex_unlock(&steam->mutex); + + hid_hw_close(steam->hdev); +} + +static int steam_register(struct steam_device *steam) +{ + struct hid_device *hdev = steam->hdev; + struct input_dev *input; + int ret; + + rcu_read_lock(); + input = rcu_dereference(steam->input); + rcu_read_unlock(); + if (input) { + dbg_hid("%s: already connected\n", __func__); + return 0; + } + + /* + * Unlikely, but getting the serial could fail, and it is not so + * important, so make up a serial number and go on. + */ + if (steam_get_serial(steam) < 0) + strlcpy(steam->serial_no, "XXXXXXXXXX", + sizeof(steam->serial_no)); + + hid_info(hdev, "Steam Controller '%s' connected", + steam->serial_no); + + input = input_allocate_device(); + if (!input) + return -ENOMEM; + + input_set_drvdata(input, steam); + input->dev.parent = &hdev->dev; + input->open = steam_input_open; + input->close = steam_input_close; + + input->name = (steam->quirks & STEAM_QUIRK_WIRELESS) ? + "Wireless Steam Controller" : + "Steam Controller"; + input->phys = hdev->phys; + input->uniq = steam->serial_no; + input->id.bustype = hdev->bus; + input->id.vendor = hdev->vendor; + input->id.product = hdev->product; + input->id.version = hdev->version; + + input_set_capability(input, EV_KEY, BTN_TR2); + input_set_capability(input, EV_KEY, BTN_TL2); + input_set_capability(input, EV_KEY, BTN_TR); + input_set_capability(input, EV_KEY, BTN_TL); + input_set_capability(input, EV_KEY, BTN_Y); + input_set_capability(input, EV_KEY, BTN_B); + input_set_capability(input, EV_KEY, BTN_X); + input_set_capability(input, EV_KEY, BTN_A); + input_set_capability(input, EV_KEY, BTN_DPAD_UP); + input_set_capability(input, EV_KEY, BTN_DPAD_RIGHT); + input_set_capability(input, EV_KEY, BTN_DPAD_LEFT); + input_set_capability(input, EV_KEY, BTN_DPAD_DOWN); + input_set_capability(input, EV_KEY, BTN_SELECT); + input_set_capability(input, EV_KEY, BTN_MODE); + input_set_capability(input, EV_KEY, BTN_START); + input_set_capability(input, EV_KEY, BTN_GEAR_DOWN); + input_set_capability(input, EV_KEY, BTN_GEAR_UP); + input_set_capability(input, EV_KEY, BTN_THUMBR); + input_set_capability(input, EV_KEY, BTN_THUMBL); + input_set_capability(input, EV_KEY, BTN_THUMB); + input_set_capability(input, EV_KEY, BTN_THUMB2); + + input_set_abs_params(input, ABS_HAT2Y, 0, 255, 0, 0); + input_set_abs_params(input, ABS_HAT2X, 0, 255, 0, 0); + input_set_abs_params(input, ABS_X, -32767, 32767, 0, 0); + input_set_abs_params(input, ABS_Y, -32767, 32767, 0, 0); + input_set_abs_params(input, ABS_RX, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_RY, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_HAT0X, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_HAT0Y, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_abs_set_res(input, ABS_X, STEAM_JOYSTICK_RESOLUTION); + input_abs_set_res(input, ABS_Y, STEAM_JOYSTICK_RESOLUTION); + input_abs_set_res(input, ABS_RX, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_RY, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT0X, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT0Y, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT2Y, STEAM_TRIGGER_RESOLUTION); + input_abs_set_res(input, ABS_HAT2X, STEAM_TRIGGER_RESOLUTION); + + ret = input_register_device(input); + if (ret) + goto input_register_fail; + + rcu_assign_pointer(steam->input, input); + + return 0; + +input_register_fail: + input_free_device(input); + return ret; +} + +static void steam_unregister(struct steam_device *steam) +{ + struct input_dev *input; + + rcu_read_lock(); + input = rcu_dereference(steam->input); + rcu_read_unlock(); + + if (input) { + RCU_INIT_POINTER(steam->input, NULL); + synchronize_rcu(); + hid_info(steam->hdev, "Steam Controller '%s' disconnected", + steam->serial_no); + input_unregister_device(input); + } +} + +static void steam_work_connect_cb(struct work_struct *work) +{ + struct steam_device *steam = container_of(work, struct steam_device, + work_connect); + unsigned long flags; + bool connected; + int ret; + + spin_lock_irqsave(&steam->lock, flags); + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (connected) { + ret = steam_register(steam); + if (ret) { + hid_err(steam->hdev, + "%s:steam_register failed with error %d\n", + __func__, ret); + } + } else { + steam_unregister(steam); + } +} + +static bool steam_is_valve_interface(struct hid_device *hdev) +{ + struct hid_report_enum *rep_enum; + + /* + * The wired device creates 3 interfaces: + * 0: emulated mouse. + * 1: emulated keyboard. + * 2: the real game pad. + * The wireless device creates 5 interfaces: + * 0: emulated keyboard. + * 1-4: slots where up to 4 real game pads will be connected to. + * We know which one is the real gamepad interface because they are the + * only ones with a feature report. + */ + rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; + return !list_empty(&rep_enum->report_list); +} + +static int steam_client_ll_parse(struct hid_device *hdev) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + + return hid_parse_report(hdev, steam->hdev->dev_rdesc, + steam->hdev->dev_rsize); +} + +static int steam_client_ll_start(struct hid_device *hdev) +{ + return 0; +} + +static void steam_client_ll_stop(struct hid_device *hdev) +{ +} + +static int steam_client_ll_open(struct hid_device *hdev) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + int ret; + + ret = hid_hw_open(steam->hdev); + if (ret) + return ret; + + mutex_lock(&steam->mutex); + steam->client_opened = true; + mutex_unlock(&steam->mutex); + return ret; +} + +static void steam_client_ll_close(struct hid_device *hdev) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + + mutex_lock(&steam->mutex); + steam->client_opened = false; + if (steam->input_opened) + steam_set_lizard_mode(steam, false); + else + steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); + + hid_hw_close(steam->hdev); +} + +static int steam_client_ll_raw_request(struct hid_device *hdev, + unsigned char reportnum, u8 *buf, + size_t count, unsigned char report_type, + int reqtype) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + + return hid_hw_raw_request(steam->hdev, reportnum, buf, count, + report_type, reqtype); +} + +static struct hid_ll_driver steam_client_ll_driver = { + .parse = steam_client_ll_parse, + .start = steam_client_ll_start, + .stop = steam_client_ll_stop, + .open = steam_client_ll_open, + .close = steam_client_ll_close, + .raw_request = steam_client_ll_raw_request, +}; + +static struct hid_device *steam_create_client_hid(struct hid_device *hdev) +{ + struct hid_device *client_hdev; + + client_hdev = hid_allocate_device(); + if (IS_ERR(client_hdev)) + return client_hdev; + + client_hdev->ll_driver = &steam_client_ll_driver; + client_hdev->dev.parent = hdev->dev.parent; + client_hdev->bus = hdev->bus; + client_hdev->vendor = hdev->vendor; + client_hdev->product = hdev->product; + strlcpy(client_hdev->name, hdev->name, + sizeof(client_hdev->name)); + strlcpy(client_hdev->phys, hdev->phys, + sizeof(client_hdev->phys)); + /* + * Since we use the same device info than the real interface to + * trick userspace, we will be calling steam_probe recursively. + * We need to recognize the client interface somehow. + */ + client_hdev->group = HID_GROUP_STEAM; + return client_hdev; +} + +static int steam_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + struct steam_device *steam; + int ret; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, + "%s:parse of hid interface failed\n", __func__); + return ret; + } + + /* + * The virtual client_dev is only used for hidraw. + * Also avoid the recursive probe. + */ + if (hdev->group == HID_GROUP_STEAM) + return hid_hw_start(hdev, HID_CONNECT_HIDRAW); + /* + * The non-valve interfaces (mouse and keyboard emulation) are + * connected without changes. + */ + if (!steam_is_valve_interface(hdev)) + return hid_hw_start(hdev, HID_CONNECT_DEFAULT); + + steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); + if (!steam) { + ret = -ENOMEM; + goto steam_alloc_fail; + } + steam->hdev = hdev; + hid_set_drvdata(hdev, steam); + spin_lock_init(&steam->lock); + mutex_init(&steam->mutex); + steam->quirks = id->driver_data; + INIT_WORK(&steam->work_connect, steam_work_connect_cb); + + steam->client_hdev = steam_create_client_hid(hdev); + if (IS_ERR(steam->client_hdev)) { + ret = PTR_ERR(steam->client_hdev); + goto client_hdev_fail; + } + hid_set_drvdata(steam->client_hdev, steam); + + /* + * With the real steam controller interface, do not connect hidraw. + * Instead, create the client_hid and connect that. + */ + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); + if (ret) + goto hid_hw_start_fail; + + ret = hid_add_device(steam->client_hdev); + if (ret) + goto client_hdev_add_fail; + + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, + "%s:hid_hw_open for wireless\n", + __func__); + goto hid_hw_open_fail; + } + hid_info(hdev, "Steam wireless receiver connected"); + steam_request_conn_status(steam); + } else { + ret = steam_register(steam); + if (ret) { + hid_err(hdev, + "%s:steam_register failed with error %d\n", + __func__, ret); + goto input_register_fail; + } + } + + mutex_lock(&steam_devices_lock); + steam_update_lizard_mode(steam); + list_add(&steam->list, &steam_devices); + mutex_unlock(&steam_devices_lock); + + return 0; + +hid_hw_open_fail: +input_register_fail: +client_hdev_add_fail: + hid_hw_stop(hdev); +hid_hw_start_fail: + hid_destroy_device(steam->client_hdev); +client_hdev_fail: + cancel_work_sync(&steam->work_connect); +steam_alloc_fail: + hid_err(hdev, "%s: failed with error %d\n", + __func__, ret); + return ret; +} + +static void steam_remove(struct hid_device *hdev) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + + if (!steam || hdev->group == HID_GROUP_STEAM) { + hid_hw_stop(hdev); + return; + } + + mutex_lock(&steam_devices_lock); + list_del(&steam->list); + mutex_unlock(&steam_devices_lock); + + hid_destroy_device(steam->client_hdev); + steam->client_opened = false; + cancel_work_sync(&steam->work_connect); + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + hid_info(hdev, "Steam wireless receiver disconnected"); + hid_hw_close(hdev); + } + hid_hw_stop(hdev); + steam_unregister(steam); +} + +static void steam_do_connect_event(struct steam_device *steam, bool connected) +{ + unsigned long flags; + + spin_lock_irqsave(&steam->lock, flags); + steam->connected = connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (schedule_work(&steam->work_connect) == 0) + dbg_hid("%s: connected=%d event already queued\n", + __func__, connected); +} + +/* + * Some input data in the protocol has the opposite sign. + * Clamp the values to 32767..-32767 so that the range is + * symmetrical and can be negated safely. + */ +static inline s16 steam_le16(u8 *data) +{ + s16 x = (s16) le16_to_cpup((__le16 *)data); + + return x == -32768 ? -32767 : x; +} + +/* + * The size for this message payload is 60. + * The known values are: + * (* values are not sent through wireless) + * (* accelerator/gyro is disabled by default) + * Offset| Type | Mapped to |Meaning + * -------+-------+-----------+-------------------------- + * 4-7 | u32 | -- | sequence number + * 8-10 | 24bit | see below | buttons + * 11 | u8 | ABS_HAT2Y | left trigger + * 12 | u8 | ABS_HAT2X | right trigger + * 13-15 | -- | -- | always 0 + * 16-17 | s16 | ABS_X/ABS_HAT0X | X value + * 18-19 | s16 | ABS_Y/ABS_HAT0Y | Y value + * 20-21 | s16 | ABS_RX | right-pad X value + * 22-23 | s16 | ABS_RY | right-pad Y value + * 24-25 | s16 | -- | * left trigger + * 26-27 | s16 | -- | * right trigger + * 28-29 | s16 | -- | * accelerometer X value + * 30-31 | s16 | -- | * accelerometer Y value + * 32-33 | s16 | -- | * accelerometer Z value + * 34-35 | s16 | -- | gyro X value + * 36-36 | s16 | -- | gyro Y value + * 38-39 | s16 | -- | gyro Z value + * 40-41 | s16 | -- | quaternion W value + * 42-43 | s16 | -- | quaternion X value + * 44-45 | s16 | -- | quaternion Y value + * 46-47 | s16 | -- | quaternion Z value + * 48-49 | -- | -- | always 0 + * 50-51 | s16 | -- | * left trigger (uncalibrated) + * 52-53 | s16 | -- | * right trigger (uncalibrated) + * 54-55 | s16 | -- | * joystick X value (uncalibrated) + * 56-57 | s16 | -- | * joystick Y value (uncalibrated) + * 58-59 | s16 | -- | * left-pad X value + * 60-61 | s16 | -- | * left-pad Y value + * 62-63 | u16 | -- | * battery voltage + * + * The buttons are: + * Bit | Mapped to | Description + * ------+------------+-------------------------------- + * 8.0 | BTN_TR2 | right trigger fully pressed + * 8.1 | BTN_TL2 | left trigger fully pressed + * 8.2 | BTN_TR | right shoulder + * 8.3 | BTN_TL | left shoulder + * 8.4 | BTN_Y | button Y + * 8.5 | BTN_B | button B + * 8.6 | BTN_X | button X + * 8.7 | BTN_A | button A + * 9.0 | BTN_DPAD_UP | lef-pad up + * 9.1 | BTN_DPAD_RIGHT | lef-pad right + * 9.2 | BTN_DPAD_LEFT | lef-pad left + * 9.3 | BTN_DPAD_DOWN | lef-pad down + * 9.4 | BTN_SELECT | menu left + * 9.5 | BTN_MODE | steam logo + * 9.6 | BTN_START | menu right + * 9.7 | BTN_GEAR_DOWN | left back lever + * 10.0 | BTN_GEAR_UP | right back lever + * 10.1 | -- | left-pad clicked + * 10.2 | BTN_THUMBR | right-pad clicked + * 10.3 | BTN_THUMB | left-pad touched (but see explanation below) + * 10.4 | BTN_THUMB2 | right-pad touched + * 10.5 | -- | unknown + * 10.6 | BTN_THUMBL | joystick clicked + * 10.7 | -- | lpad_and_joy + */ + +static void steam_do_input_event(struct steam_device *steam, + struct input_dev *input, u8 *data) +{ + /* 24 bits of buttons */ + u8 b8, b9, b10; + s16 x, y; + bool lpad_touched, lpad_and_joy; + + b8 = data[8]; + b9 = data[9]; + b10 = data[10]; + + input_report_abs(input, ABS_HAT2Y, data[11]); + input_report_abs(input, ABS_HAT2X, data[12]); + + /* + * These two bits tells how to interpret the values X and Y. + * lpad_and_joy tells that the joystick and the lpad are used at the + * same time. + * lpad_touched tells whether X/Y are to be read as lpad coord or + * joystick values. + * (lpad_touched || lpad_and_joy) tells if the lpad is really touched. + */ + lpad_touched = b10 & BIT(3); + lpad_and_joy = b10 & BIT(7); + x = steam_le16(data + 16); + y = -steam_le16(data + 18); + + input_report_abs(input, lpad_touched ? ABS_HAT0X : ABS_X, x); + input_report_abs(input, lpad_touched ? ABS_HAT0Y : ABS_Y, y); + /* Check if joystick is centered */ + if (lpad_touched && !lpad_and_joy) { + input_report_abs(input, ABS_X, 0); + input_report_abs(input, ABS_Y, 0); + } + /* Check if lpad is untouched */ + if (!(lpad_touched || lpad_and_joy)) { + input_report_abs(input, ABS_HAT0X, 0); + input_report_abs(input, ABS_HAT0Y, 0); + } + + input_report_abs(input, ABS_RX, steam_le16(data + 20)); + input_report_abs(input, ABS_RY, -steam_le16(data + 22)); + + input_event(input, EV_KEY, BTN_TR2, !!(b8 & BIT(0))); + input_event(input, EV_KEY, BTN_TL2, !!(b8 & BIT(1))); + input_event(input, EV_KEY, BTN_TR, !!(b8 & BIT(2))); + input_event(input, EV_KEY, BTN_TL, !!(b8 & BIT(3))); + input_event(input, EV_KEY, BTN_Y, !!(b8 & BIT(4))); + input_event(input, EV_KEY, BTN_B, !!(b8 & BIT(5))); + input_event(input, EV_KEY, BTN_X, !!(b8 & BIT(6))); + input_event(input, EV_KEY, BTN_A, !!(b8 & BIT(7))); + input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); + input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); + input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); + input_event(input, EV_KEY, BTN_GEAR_DOWN, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GEAR_UP, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_THUMBR, !!(b10 & BIT(2))); + input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); + input_event(input, EV_KEY, BTN_THUMB, lpad_touched || lpad_and_joy); + input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(4))); + input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); + input_event(input, EV_KEY, BTN_DPAD_RIGHT, !!(b9 & BIT(1))); + input_event(input, EV_KEY, BTN_DPAD_LEFT, !!(b9 & BIT(2))); + input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); + + input_sync(input); +} + +static int steam_raw_event(struct hid_device *hdev, + struct hid_report *report, u8 *data, + int size) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + struct input_dev *input; + + if (!steam) + return 0; + + if (steam->client_opened) + hid_input_report(steam->client_hdev, HID_FEATURE_REPORT, + data, size, 0); + /* + * All messages are size=64, all values little-endian. + * The format is: + * Offset| Meaning + * -------+-------------------------------------------- + * 0-1 | always 0x01, 0x00, maybe protocol version? + * 2 | type of message + * 3 | length of the real payload (not checked) + * 4-n | payload data, depends on the type + * + * There are these known types of message: + * 0x01: input data (60 bytes) + * 0x03: wireless connect/disconnect (1 byte) + * 0x04: battery status (11 bytes) + */ + + if (size != 64 || data[0] != 1 || data[1] != 0) + return 0; + + switch (data[2]) { + case STEAM_EV_INPUT_DATA: + if (steam->client_opened) + return 0; + rcu_read_lock(); + input = rcu_dereference(steam->input); + if (likely(input)) { + steam_do_input_event(steam, input, data); + } else { + dbg_hid("%s: input data without connect event\n", + __func__); + steam_do_connect_event(steam, true); + } + rcu_read_unlock(); + break; + case STEAM_EV_CONNECT: + /* + * The payload of this event is a single byte: + * 0x01: disconnected. + * 0x02: connected. + */ + switch (data[4]) { + case 0x01: + steam_do_connect_event(steam, false); + break; + case 0x02: + steam_do_connect_event(steam, true); + break; + } + break; + case STEAM_EV_BATTERY: + /* TODO: battery info */ + break; + } + return 0; +} + +static int steam_param_set_lizard_mode(const char *val, + const struct kernel_param *kp) +{ + struct steam_device *steam; + int ret; + + ret = param_set_bool(val, kp); + if (ret) + return ret; + + mutex_lock(&steam_devices_lock); + list_for_each_entry(steam, &steam_devices, list) { + steam_update_lizard_mode(steam); + } + mutex_unlock(&steam_devices_lock); + return 0; +} + +static const struct kernel_param_ops steam_lizard_mode_ops = { + .set = steam_param_set_lizard_mode, + .get = param_get_bool, +}; + +module_param_cb(lizard_mode, &steam_lizard_mode_ops, &lizard_mode, 0644); +MODULE_PARM_DESC(lizard_mode, + "Enable mouse and keyboard emulation (lizard mode) when the gamepad is not in use"); + +static const struct hid_device_id steam_controllers[] = { + { /* Wired Steam Controller */ + HID_USB_DEVICE(USB_VENDOR_ID_VALVE, + USB_DEVICE_ID_STEAM_CONTROLLER) + }, + { /* Wireless Steam Controller */ + HID_USB_DEVICE(USB_VENDOR_ID_VALVE, + USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS), + .driver_data = STEAM_QUIRK_WIRELESS + }, + {} +}; + +MODULE_DEVICE_TABLE(hid, steam_controllers); + +static struct hid_driver steam_controller_driver = { + .name = "hid-steam", + .id_table = steam_controllers, + .probe = steam_probe, + .remove = steam_remove, + .raw_event = steam_raw_event, +}; + +module_hid_driver(steam_controller_driver); \ No newline at end of file diff --git a/include/linux/hid.h b/include/linux/hid.h index 3656a04d764b..462005543529 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -363,6 +363,7 @@ struct hid_item { #define HID_GROUP_RMI 0x0100 #define HID_GROUP_WACOM 0x0101 #define HID_GROUP_LOGITECH_DJ_DEVICE 0x0102 +#define HID_GROUP_STEAM 0x0103 /* * HID protocol status From bfcd98131be2f25d5520c0bd736c59478ad7b491 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Mon, 16 Apr 2018 14:27:03 +0200 Subject: [PATCH 0242/3715] UPSTREAM: HID: steam: add battery device. The wireless Steam Controller is battery operated, so add the battery device and power information. Signed-off-by: Jiri Kosina (cherry picked from commit f82719790751742be02142a6fa3ff5c62c52a6ae ("HID: steam: add battery device")) https://github.com/torvalds/linux/commit/f82719790751742be02142a6fa3ff5c62c52a6ae Change-Id: Ie945fffa96103a164c333b2c6c51bca44a41423b Signed-Off-By: Siarhei Vishniakou Bug: 136263708 Test: connected steam controller to Android --- drivers/hid/hid-steam.c | 141 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 8f999b3baf94..8c9418e722c4 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "hid-ids.h" MODULE_LICENSE("GPL"); @@ -118,6 +119,10 @@ struct steam_device { struct work_struct work_connect; bool connected; char serial_no[STEAM_SERIAL_LEN + 1]; + struct power_supply_desc battery_desc; + struct power_supply __rcu *battery; + u8 battery_charge; + u16 voltage; }; static int steam_recv_report(struct steam_device *steam, @@ -316,6 +321,85 @@ static void steam_input_close(struct input_dev *dev) hid_hw_close(steam->hdev); } +static enum power_supply_property steam_battery_props[] = { + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_SCOPE, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CAPACITY, +}; + +static int steam_battery_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct steam_device *steam = power_supply_get_drvdata(psy); + unsigned long flags; + s16 volts; + u8 batt; + int ret = 0; + + spin_lock_irqsave(&steam->lock, flags); + volts = steam->voltage; + batt = steam->battery_charge; + spin_unlock_irqrestore(&steam->lock, flags); + + switch (psp) { + case POWER_SUPPLY_PROP_PRESENT: + val->intval = 1; + break; + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_DEVICE; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + val->intval = volts * 1000; /* mV -> uV */ + break; + case POWER_SUPPLY_PROP_CAPACITY: + val->intval = batt; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int steam_battery_register(struct steam_device *steam) +{ + struct power_supply *battery; + struct power_supply_config battery_cfg = { .drv_data = steam, }; + unsigned long flags; + int ret; + + steam->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY; + steam->battery_desc.properties = steam_battery_props; + steam->battery_desc.num_properties = ARRAY_SIZE(steam_battery_props); + steam->battery_desc.get_property = steam_battery_get_property; + steam->battery_desc.name = devm_kasprintf(&steam->hdev->dev, + GFP_KERNEL, "steam-controller-%s-battery", + steam->serial_no); + if (!steam->battery_desc.name) + return -ENOMEM; + + /* avoid the warning of 0% battery while waiting for the first info */ + spin_lock_irqsave(&steam->lock, flags); + steam->voltage = 3000; + steam->battery_charge = 100; + spin_unlock_irqrestore(&steam->lock, flags); + + battery = power_supply_register(&steam->hdev->dev, + &steam->battery_desc, &battery_cfg); + if (IS_ERR(battery)) { + ret = PTR_ERR(battery); + hid_err(steam->hdev, + "%s:power_supply_register failed with error %d\n", + __func__, ret); + return ret; + } + rcu_assign_pointer(steam->battery, battery); + power_supply_powers(battery, &steam->hdev->dev); + return 0; +} + static int steam_register(struct steam_device *steam) { struct hid_device *hdev = steam->hdev; @@ -409,6 +493,10 @@ static int steam_register(struct steam_device *steam) rcu_assign_pointer(steam->input, input); + /* ignore battery errors, we can live without it */ + if (steam->quirks & STEAM_QUIRK_WIRELESS) + steam_battery_register(steam); + return 0; input_register_fail: @@ -419,11 +507,18 @@ input_register_fail: static void steam_unregister(struct steam_device *steam) { struct input_dev *input; + struct power_supply *battery; rcu_read_lock(); input = rcu_dereference(steam->input); + battery = rcu_dereference(steam->battery); rcu_read_unlock(); + if (battery) { + RCU_INIT_POINTER(steam->battery, NULL); + synchronize_rcu(); + power_supply_unregister(battery); + } if (input) { RCU_INIT_POINTER(steam->input, NULL); synchronize_rcu(); @@ -851,12 +946,44 @@ static void steam_do_input_event(struct steam_device *steam, input_sync(input); } +/* + * The size for this message payload is 11. + * The known values are: + * Offset| Type | Meaning + * -------+-------+--------------------------- + * 4-7 | u32 | sequence number + * 8-11 | -- | always 0 + * 12-13 | u16 | voltage (mV) + * 14 | u8 | battery percent + */ +static void steam_do_battery_event(struct steam_device *steam, + struct power_supply *battery, u8 *data) +{ + unsigned long flags; + + s16 volts = steam_le16(data + 12); + u8 batt = data[14]; + + /* Creating the battery may have failed */ + rcu_read_lock(); + battery = rcu_dereference(steam->battery); + if (likely(battery)) { + spin_lock_irqsave(&steam->lock, flags); + steam->voltage = volts; + steam->battery_charge = batt; + spin_unlock_irqrestore(&steam->lock, flags); + power_supply_changed(battery); + } + rcu_read_unlock(); +} + static int steam_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct steam_device *steam = hid_get_drvdata(hdev); struct input_dev *input; + struct power_supply *battery; if (!steam) return 0; @@ -914,7 +1041,19 @@ static int steam_raw_event(struct hid_device *hdev, } break; case STEAM_EV_BATTERY: - /* TODO: battery info */ + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + rcu_read_lock(); + battery = rcu_dereference(steam->battery); + if (likely(battery)) { + steam_do_battery_event(steam, battery, data); + } else { + dbg_hid( + "%s: battery data without connect event\n", + __func__); + steam_do_connect_event(steam, true); + } + rcu_read_unlock(); + } break; } return 0; From 43dfe605ed56be21f84ae64cec1821d34a55304d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 15 May 2018 10:58:31 +0200 Subject: [PATCH 0243/3715] UPSTREAM: HID: steam: add missing fields in client initialization ->product, ->version and ->type fields in the client struct were left out unitialized from the hid device fields; fix that. Reported-by: Rodrigo Rivas Costa Signed-off-by: Jiri Kosina (cherry picked from commit 165e2cad5a74bddbe6ca2c42fc5d2816f5e67795 ("HID: steam: add missing fields in client initialization")) https://github.com/torvalds/linux/commit/165e2cad5a74bddbe6ca2c42fc5d2816f5e67795 Change-Id: I6f13c2ad6f86a4e06f81dc93616407f9149753ed Signed-Off-By: Siarhei Vishniakou Bug: 136263708 Test: connect steam controller --- drivers/hid/hid-steam.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 8c9418e722c4..a92538e5fe9a 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -651,6 +651,9 @@ static struct hid_device *steam_create_client_hid(struct hid_device *hdev) client_hdev->bus = hdev->bus; client_hdev->vendor = hdev->vendor; client_hdev->product = hdev->product; + client_hdev->version = hdev->version; + client_hdev->type = hdev->type; + client_hdev->country = hdev->country; strlcpy(client_hdev->name, hdev->name, sizeof(client_hdev->name)); strlcpy(client_hdev->phys, hdev->phys, From 701a5c79f1ca8d8879186513fc5c07cf88ab987a Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Tue, 22 May 2018 22:10:06 +0200 Subject: [PATCH 0244/3715] UPSTREAM: HID: steam: use hid_device.driver_data instead of hid_set_drvdata() When creating the low-level hidraw device, the reference to steam_device was stored using hid_set_drvdata(). But this value is not guaranteed to be kept when set before calling probe. If this pointer is reset, it crashes when opening the emulated hidraw device. It looks like hid_set_drvdata() is for users "avobe" this hid_device, while hid_device.driver_data it for users "below" this one. In this case, we are creating a virtual hidraw device, so we must use hid_device.driver_data. Signed-off-by: Rodrigo Rivas Costa Tested-by: Mariusz Ceier Signed-off-by: Jiri Kosina (cherry picked from commit 4bff980f920693693d7a529c06a1bd1e7f77603a ("HID: steam: use hid_device.driver_data instead of hid_set_drvdata()") https://github.com/torvalds/linux/commit/4bff980f920693693d7a529c06a1bd1e7f77603a Change-Id: I650db1803c53bc055be4574b9190df57b6b8aad3 Signed-Off-By: Siarhei Vishniakou Bug: 136263708 Test: connect steam controller --- drivers/hid/hid-steam.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index a92538e5fe9a..e9b07120909c 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -573,7 +573,7 @@ static bool steam_is_valve_interface(struct hid_device *hdev) static int steam_client_ll_parse(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; return hid_parse_report(hdev, steam->hdev->dev_rdesc, steam->hdev->dev_rsize); @@ -590,7 +590,7 @@ static void steam_client_ll_stop(struct hid_device *hdev) static int steam_client_ll_open(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; int ret; ret = hid_hw_open(steam->hdev); @@ -605,7 +605,7 @@ static int steam_client_ll_open(struct hid_device *hdev) static void steam_client_ll_close(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; mutex_lock(&steam->mutex); steam->client_opened = false; @@ -623,7 +623,7 @@ static int steam_client_ll_raw_request(struct hid_device *hdev, size_t count, unsigned char report_type, int reqtype) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; return hid_hw_raw_request(steam->hdev, reportnum, buf, count, report_type, reqtype); @@ -710,7 +710,7 @@ static int steam_probe(struct hid_device *hdev, ret = PTR_ERR(steam->client_hdev); goto client_hdev_fail; } - hid_set_drvdata(steam->client_hdev, steam); + steam->client_hdev->driver_data = steam; /* * With the real steam controller interface, do not connect hidraw. From e2b9551763b4f89b3aa5e1997633b8b2addb2964 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Sun, 14 Oct 2018 19:36:43 +0200 Subject: [PATCH 0245/3715] UPSTREAM: HID: steam: remove input device when a hid client is running. Previously, when a HID client such as the Steam Client was running, this driver disabled its input device to avoid doubling the input events. While it worked mostly fine, some games got confused by the idle gamepad, and switched to two player mode, or asked the user to choose which gamepad to use. Other games just crashed, probably a bug in Unity [1]. With this commit, when a HID client starts, the input device is removed; when the HID client ends the input device is recreated. [1]: https://github.com/ValveSoftware/steam-for-linux/issues/5645 (cherry-picked from commit 385a4886778f6d6e61eff1d4d295af332d7130e1 ("HID: steam: remove input device when a hid client is running")) https://github.com/torvalds/linux/commit/385a4886778f6d6e61eff1d4d295af332d7130e1 Bug: 136263708 Signed-off-by: Rodrigo Rivas Costa Signed-off-by: Jiri Kosina Signed-off-by: Siarhei Vishniakou Change-Id: I8b93bb46c508e44d275168933b53ad738795d511 --- drivers/hid/hid-steam.c | 154 +++++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 64 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index e9b07120909c..304cdc86c01d 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -23,8 +23,9 @@ * In order to avoid breaking them this driver creates a layered hidraw device, * so it can detect when the client is running and then: * - it will not send any command to the controller. - * - this input device will be disabled, to avoid double input of the same + * - this input device will be removed, to avoid double input of the same * user action. + * When the client is closed, this input device will be created again. * * For additional functions, such as changing the right-pad margin or switching * the led, you can use the user-space tool at: @@ -113,7 +114,7 @@ struct steam_device { spinlock_t lock; struct hid_device *hdev, *client_hdev; struct mutex mutex; - bool client_opened, input_opened; + bool client_opened; struct input_dev __rcu *input; unsigned long quirks; struct work_struct work_connect; @@ -279,18 +280,6 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) } } -static void steam_update_lizard_mode(struct steam_device *steam) -{ - mutex_lock(&steam->mutex); - if (!steam->client_opened) { - if (steam->input_opened) - steam_set_lizard_mode(steam, false); - else - steam_set_lizard_mode(steam, lizard_mode); - } - mutex_unlock(&steam->mutex); -} - static int steam_input_open(struct input_dev *dev) { struct steam_device *steam = input_get_drvdata(dev); @@ -301,7 +290,6 @@ static int steam_input_open(struct input_dev *dev) return ret; mutex_lock(&steam->mutex); - steam->input_opened = true; if (!steam->client_opened && lizard_mode) steam_set_lizard_mode(steam, false); mutex_unlock(&steam->mutex); @@ -313,7 +301,6 @@ static void steam_input_close(struct input_dev *dev) struct steam_device *steam = input_get_drvdata(dev); mutex_lock(&steam->mutex); - steam->input_opened = false; if (!steam->client_opened && lizard_mode) steam_set_lizard_mode(steam, true); mutex_unlock(&steam->mutex); @@ -400,7 +387,7 @@ static int steam_battery_register(struct steam_device *steam) return 0; } -static int steam_register(struct steam_device *steam) +static int steam_input_register(struct steam_device *steam) { struct hid_device *hdev = steam->hdev; struct input_dev *input; @@ -414,17 +401,6 @@ static int steam_register(struct steam_device *steam) return 0; } - /* - * Unlikely, but getting the serial could fail, and it is not so - * important, so make up a serial number and go on. - */ - if (steam_get_serial(steam) < 0) - strlcpy(steam->serial_no, "XXXXXXXXXX", - sizeof(steam->serial_no)); - - hid_info(hdev, "Steam Controller '%s' connected", - steam->serial_no); - input = input_allocate_device(); if (!input) return -ENOMEM; @@ -492,11 +468,6 @@ static int steam_register(struct steam_device *steam) goto input_register_fail; rcu_assign_pointer(steam->input, input); - - /* ignore battery errors, we can live without it */ - if (steam->quirks & STEAM_QUIRK_WIRELESS) - steam_battery_register(steam); - return 0; input_register_fail: @@ -504,27 +475,88 @@ input_register_fail: return ret; } -static void steam_unregister(struct steam_device *steam) +static void steam_input_unregister(struct steam_device *steam) { struct input_dev *input; + rcu_read_lock(); + input = rcu_dereference(steam->input); + rcu_read_unlock(); + if (!input) + return; + RCU_INIT_POINTER(steam->input, NULL); + synchronize_rcu(); + input_unregister_device(input); +} + +static void steam_battery_unregister(struct steam_device *steam) +{ struct power_supply *battery; rcu_read_lock(); - input = rcu_dereference(steam->input); battery = rcu_dereference(steam->battery); rcu_read_unlock(); - if (battery) { - RCU_INIT_POINTER(steam->battery, NULL); - synchronize_rcu(); - power_supply_unregister(battery); + if (!battery) + return; + RCU_INIT_POINTER(steam->battery, NULL); + synchronize_rcu(); + power_supply_unregister(battery); +} + +static int steam_register(struct steam_device *steam) +{ + int ret; + + /* + * This function can be called several times in a row with the + * wireless adaptor, without steam_unregister() between them, because + * another client send a get_connection_status command, for example. + * The battery and serial number are set just once per device. + */ + if (!steam->serial_no[0]) { + /* + * Unlikely, but getting the serial could fail, and it is not so + * important, so make up a serial number and go on. + */ + if (steam_get_serial(steam) < 0) + strlcpy(steam->serial_no, "XXXXXXXXXX", + sizeof(steam->serial_no)); + + hid_info(steam->hdev, "Steam Controller '%s' connected", + steam->serial_no); + + /* ignore battery errors, we can live without it */ + if (steam->quirks & STEAM_QUIRK_WIRELESS) + steam_battery_register(steam); + + mutex_lock(&steam_devices_lock); + list_add(&steam->list, &steam_devices); + mutex_unlock(&steam_devices_lock); } - if (input) { - RCU_INIT_POINTER(steam->input, NULL); - synchronize_rcu(); + + mutex_lock(&steam->mutex); + if (!steam->client_opened) { + steam_set_lizard_mode(steam, lizard_mode); + ret = steam_input_register(steam); + } else { + ret = 0; + } + mutex_unlock(&steam->mutex); + + return ret; +} + +static void steam_unregister(struct steam_device *steam) +{ + steam_battery_unregister(steam); + steam_input_unregister(steam); + if (steam->serial_no[0]) { hid_info(steam->hdev, "Steam Controller '%s' disconnected", steam->serial_no); - input_unregister_device(input); + mutex_lock(&steam_devices_lock); + list_del(&steam->list); + mutex_unlock(&steam_devices_lock); + steam->serial_no[0] = 0; } } @@ -600,6 +632,9 @@ static int steam_client_ll_open(struct hid_device *hdev) mutex_lock(&steam->mutex); steam->client_opened = true; mutex_unlock(&steam->mutex); + + steam_input_unregister(steam); + return ret; } @@ -609,13 +644,13 @@ static void steam_client_ll_close(struct hid_device *hdev) mutex_lock(&steam->mutex); steam->client_opened = false; - if (steam->input_opened) - steam_set_lizard_mode(steam, false); - else - steam_set_lizard_mode(steam, lizard_mode); mutex_unlock(&steam->mutex); hid_hw_close(steam->hdev); + if (steam->connected) { + steam_set_lizard_mode(steam, lizard_mode); + steam_input_register(steam); + } } static int steam_client_ll_raw_request(struct hid_device *hdev, @@ -744,11 +779,6 @@ static int steam_probe(struct hid_device *hdev, } } - mutex_lock(&steam_devices_lock); - steam_update_lizard_mode(steam); - list_add(&steam->list, &steam_devices); - mutex_unlock(&steam_devices_lock); - return 0; hid_hw_open_fail: @@ -774,10 +804,6 @@ static void steam_remove(struct hid_device *hdev) return; } - mutex_lock(&steam_devices_lock); - list_del(&steam->list); - mutex_unlock(&steam_devices_lock); - hid_destroy_device(steam->client_hdev); steam->client_opened = false; cancel_work_sync(&steam->work_connect); @@ -792,12 +818,14 @@ static void steam_remove(struct hid_device *hdev) static void steam_do_connect_event(struct steam_device *steam, bool connected) { unsigned long flags; + bool changed; spin_lock_irqsave(&steam->lock, flags); + changed = steam->connected != connected; steam->connected = connected; spin_unlock_irqrestore(&steam->lock, flags); - if (schedule_work(&steam->work_connect) == 0) + if (changed && schedule_work(&steam->work_connect) == 0) dbg_hid("%s: connected=%d event already queued\n", __func__, connected); } @@ -1019,13 +1047,8 @@ static int steam_raw_event(struct hid_device *hdev, return 0; rcu_read_lock(); input = rcu_dereference(steam->input); - if (likely(input)) { + if (likely(input)) steam_do_input_event(steam, input, data); - } else { - dbg_hid("%s: input data without connect event\n", - __func__); - steam_do_connect_event(steam, true); - } rcu_read_unlock(); break; case STEAM_EV_CONNECT: @@ -1074,7 +1097,10 @@ static int steam_param_set_lizard_mode(const char *val, mutex_lock(&steam_devices_lock); list_for_each_entry(steam, &steam_devices, list) { - steam_update_lizard_mode(steam); + mutex_lock(&steam->mutex); + if (!steam->client_opened) + steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); } mutex_unlock(&steam_devices_lock); return 0; From 4743de43c2d63f0ee2e78ce7a5be28d1d0adb1eb Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Wed, 6 Feb 2019 22:27:54 +0100 Subject: [PATCH 0246/3715] UPSTREAM: HID: steam: fix boot loop with bluetooth firmware There is a new firmware for the Steam Controller with support for BLE connections. When using such a device with a wired connection, it reboots itself every 10 seconds unless an application has opened it. Doing hid_hw_open() unconditionally on probe fixes the issue, and the code becomes simpler. (cherry-picked from commit cf28aee292e102740e49f74385b4b89c00050763 ("HID: steam: fix boot loop with bluetooth firmware")) https://github.com/torvalds/linux/commit/cf28aee292e102740e49f74385b4b89c00050763 Bug: 136263708 Signed-off-by: Rodrigo Rivas Costa Signed-off-by: Jiri Kosina Signed-off-by: Siarhei Vishniakou Change-Id: I55b573d6db1bc336626ce4a6563255374da590ae --- drivers/hid/hid-steam.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 304cdc86c01d..58c155f236e3 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -283,11 +283,6 @@ static void steam_set_lizard_mode(struct steam_device *steam, bool enable) static int steam_input_open(struct input_dev *dev) { struct steam_device *steam = input_get_drvdata(dev); - int ret; - - ret = hid_hw_open(steam->hdev); - if (ret) - return ret; mutex_lock(&steam->mutex); if (!steam->client_opened && lizard_mode) @@ -304,8 +299,6 @@ static void steam_input_close(struct input_dev *dev) if (!steam->client_opened && lizard_mode) steam_set_lizard_mode(steam, true); mutex_unlock(&steam->mutex); - - hid_hw_close(steam->hdev); } static enum power_supply_property steam_battery_props[] = { @@ -623,11 +616,6 @@ static void steam_client_ll_stop(struct hid_device *hdev) static int steam_client_ll_open(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; - int ret; - - ret = hid_hw_open(steam->hdev); - if (ret) - return ret; mutex_lock(&steam->mutex); steam->client_opened = true; @@ -635,7 +623,7 @@ static int steam_client_ll_open(struct hid_device *hdev) steam_input_unregister(steam); - return ret; + return 0; } static void steam_client_ll_close(struct hid_device *hdev) @@ -646,7 +634,6 @@ static void steam_client_ll_close(struct hid_device *hdev) steam->client_opened = false; mutex_unlock(&steam->mutex); - hid_hw_close(steam->hdev); if (steam->connected) { steam_set_lizard_mode(steam, lizard_mode); steam_input_register(steam); @@ -759,14 +746,15 @@ static int steam_probe(struct hid_device *hdev, if (ret) goto client_hdev_add_fail; + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, + "%s:hid_hw_open\n", + __func__); + goto hid_hw_open_fail; + } + if (steam->quirks & STEAM_QUIRK_WIRELESS) { - ret = hid_hw_open(hdev); - if (ret) { - hid_err(hdev, - "%s:hid_hw_open for wireless\n", - __func__); - goto hid_hw_open_fail; - } hid_info(hdev, "Steam wireless receiver connected"); steam_request_conn_status(steam); } else { @@ -781,8 +769,8 @@ static int steam_probe(struct hid_device *hdev, return 0; -hid_hw_open_fail: input_register_fail: +hid_hw_open_fail: client_hdev_add_fail: hid_hw_stop(hdev); hid_hw_start_fail: @@ -809,8 +797,8 @@ static void steam_remove(struct hid_device *hdev) cancel_work_sync(&steam->work_connect); if (steam->quirks & STEAM_QUIRK_WIRELESS) { hid_info(hdev, "Steam wireless receiver disconnected"); - hid_hw_close(hdev); } + hid_hw_close(hdev); hid_hw_stop(hdev); steam_unregister(steam); } From 81144e705f48ede315d34f0aa08bf3b907b86d66 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Fri, 15 Mar 2019 20:09:10 +0100 Subject: [PATCH 0247/3715] UPSTREAM: HID: steam: fix deadlock with input devices. When using this driver with the wireless dongle and some usermode program that monitors every input device (acpid, for example), while another usermode client opens and closes the low-level device repeadedly, the system eventually deadlocks. The reason is that steam_input_register_device() must not be called with the mutex held, because the input subsystem has its own synchronization that clashes with this one: it is possible that steam_input_open() is called before input_register_device() returns, and since steam_input_open() needs to lock the mutex, it deadlocks. However we must hold the mutex when calling any function that sends commands to the controller. If not, random commands end up falling fail. Reported-by: Simon Gene Gottlieb Signed-off-by: Rodrigo Rivas Costa Tested-by: Simon Gene Gottlieb Signed-off-by: Jiri Kosina Bug: 136263708 Change-Id: Ia0c37b3117dc605a30b2e1fb5030282bf2e11a11 (cherry picked from commit 6b538cc21334b83f09b25dec4aa2d2726bf07ed0 ("HID: steam: fix deadlock with input devices.")) https://github.com/torvalds/linux/commit/6b538cc21334b83f09b25dec4aa2d2726bf07ed0 Signed-off-by: Siarhei Vishniakou --- drivers/hid/hid-steam.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 58c155f236e3..44e1eefc5b24 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -499,6 +499,7 @@ static void steam_battery_unregister(struct steam_device *steam) static int steam_register(struct steam_device *steam) { int ret; + bool client_opened; /* * This function can be called several times in a row with the @@ -511,9 +512,11 @@ static int steam_register(struct steam_device *steam) * Unlikely, but getting the serial could fail, and it is not so * important, so make up a serial number and go on. */ + mutex_lock(&steam->mutex); if (steam_get_serial(steam) < 0) strlcpy(steam->serial_no, "XXXXXXXXXX", sizeof(steam->serial_no)); + mutex_unlock(&steam->mutex); hid_info(steam->hdev, "Steam Controller '%s' connected", steam->serial_no); @@ -528,14 +531,16 @@ static int steam_register(struct steam_device *steam) } mutex_lock(&steam->mutex); - if (!steam->client_opened) { + client_opened = steam->client_opened; + if (!client_opened) steam_set_lizard_mode(steam, lizard_mode); - ret = steam_input_register(steam); - } else { - ret = 0; - } mutex_unlock(&steam->mutex); + if (!client_opened) + ret = steam_input_register(steam); + else + ret = 0; + return ret; } @@ -630,14 +635,21 @@ static void steam_client_ll_close(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; + unsigned long flags; + bool connected; + + spin_lock_irqsave(&steam->lock, flags); + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + mutex_lock(&steam->mutex); steam->client_opened = false; + if (connected) + steam_set_lizard_mode(steam, lizard_mode); mutex_unlock(&steam->mutex); - if (steam->connected) { - steam_set_lizard_mode(steam, lizard_mode); + if (connected) steam_input_register(steam); - } } static int steam_client_ll_raw_request(struct hid_device *hdev, From 80d0c02e28601ba0631d6dd92b93d16fda76832c Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Mon, 9 Sep 2019 14:05:27 +0530 Subject: [PATCH 0248/3715] arm64: dts: Fix gpio to pinmux mapping [ Upstream commit 965f6603e3335a953f4f876792074cb36bf65f7f ] There are total of 151 non-secure gpio (0-150) and four pins of pinmux (91, 92, 93 and 94) are not mapped to any gpio pin, hence update same in DT. Fixes: 8aa428cc1e2e ("arm64: dts: Add pinctrl DT nodes for Stingray SOC") Signed-off-by: Rayagonda Kokatanur Reviewed-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi | 5 +++-- arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi index 15214d05fec1..8c20d4a0cb4e 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi @@ -42,13 +42,14 @@ pinmux: pinmux@0014029c { compatible = "pinctrl-single"; - reg = <0x0014029c 0x250>; + reg = <0x0014029c 0x26c>; #address-cells = <1>; #size-cells = <1>; pinctrl-single,register-width = <32>; pinctrl-single,function-mask = <0xf>; pinctrl-single,gpio-range = < - &range 0 154 MODE_GPIO + &range 0 91 MODE_GPIO + &range 95 60 MODE_GPIO >; range: gpio-range { #pinctrl-single,gpio-range-cells = <3>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 2b76293b51c8..3d2921ef2935 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -444,8 +444,7 @@ <&pinmux 108 16 27>, <&pinmux 135 77 6>, <&pinmux 141 67 4>, - <&pinmux 145 149 6>, - <&pinmux 151 91 4>; + <&pinmux 145 149 6>; }; i2c1: i2c@000e0000 { From 16bacd2121cd91a92fbe0e2479dc71a31946e437 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 29 Sep 2019 17:58:48 +0800 Subject: [PATCH 0249/3715] regulator: ti-abb: Fix timeout in ti_abb_wait_txdone/ti_abb_clear_all_txdone [ Upstream commit f64db548799e0330897c3203680c2ee795ade518 ] ti_abb_wait_txdone() may return -ETIMEDOUT when ti_abb_check_txdone() returns true in the latest iteration of the while loop because the timeout value is abb->settling_time + 1. Similarly, ti_abb_clear_all_txdone() may return -ETIMEDOUT when ti_abb_check_txdone() returns false in the latest iteration of the while loop. Fix it. Signed-off-by: Axel Lin Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20190929095848.21960-1-axel.lin@ingics.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/ti-abb-regulator.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index d2f994298753..6d17357b3a24 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -173,19 +173,14 @@ static int ti_abb_wait_txdone(struct device *dev, struct ti_abb *abb) while (timeout++ <= abb->settling_time) { status = ti_abb_check_txdone(abb); if (status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** @@ -205,19 +200,14 @@ static int ti_abb_clear_all_txdone(struct device *dev, const struct ti_abb *abb) status = ti_abb_check_txdone(abb); if (!status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** From e48c52747169e4a98a8efe589a24dfb7a08f704b Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Sun, 29 Sep 2019 10:09:57 -0700 Subject: [PATCH 0250/3715] regulator: pfuze100-regulator: Variable "val" in pfuze100_regulator_probe() could be uninitialized [ Upstream commit 1252b283141f03c3dffd139292c862cae10e174d ] In function pfuze100_regulator_probe(), variable "val" could be initialized if regmap_read() fails. However, "val" is used to decide the control flow later in the if statement, which is potentially unsafe. Signed-off-by: Yizhuo Link: https://lore.kernel.org/r/20190929170957.14775-1-yzhai003@ucr.edu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pfuze100-regulator.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 659e516455be..4f205366d8ae 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -632,7 +632,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client, /* SW2~SW4 high bit check and modify the voltage value table */ if (i >= sw_check_start && i <= sw_check_end) { - regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val); + ret = regmap_read(pfuze_chip->regmap, + desc->vsel_reg, &val); + if (ret) { + dev_err(&client->dev, "Fails to read from the register.\n"); + return ret; + } + if (val & sw_hi) { if (pfuze_chip->chip_id == PFUZE3000) { desc->volt_table = pfuze3000_sw2hi; From f3be8f22b34cea1815e1f72e722bef125a3db804 Mon Sep 17 00:00:00 2001 From: Stuart Henderson Date: Wed, 2 Oct 2019 09:42:40 +0100 Subject: [PATCH 0251/3715] ASoC: wm_adsp: Don't generate kcontrols without READ flags [ Upstream commit 3ae7359c0e39f42a96284d6798fc669acff38140 ] User space always expects to be able to read ALSA controls, so ensure no kcontrols are generated without an appropriate READ flag. In the case of a read of such a control zeros will be returned. Signed-off-by: Stuart Henderson Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20191002084240.21589-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d632a0511d62..158ce68bc9bf 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1169,8 +1169,7 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len) } if (in) { - if (in & WMFW_CTL_FLAG_READABLE) - out |= rd; + out |= rd; if (in & WMFW_CTL_FLAG_WRITEABLE) out |= wr; if (in & WMFW_CTL_FLAG_VOLATILE) From d8708726ef89ae67f6ac0103664cb8ea161eba09 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 2 Oct 2019 16:30:37 +0100 Subject: [PATCH 0252/3715] ASoc: rockchip: i2s: Fix RPM imbalance [ Upstream commit b1e620e7d32f5aad5353cc3cfc13ed99fea65d3a ] If rockchip_pcm_platform_register() fails, e.g. upon deferring to wait for an absent DMA channel, we return without disabling RPM, which makes subsequent re-probe attempts scream with errors about the unbalanced enable. Don't do that. Fixes: ebb75c0bdba2 ("ASoC: rockchip: i2s: Adjust devm usage") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/bcb12a849a05437fb18372bc7536c649b94bdf07.1570029862.git.robin.murphy@arm.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 66fc13a2396a..0e07e3dea7de 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -676,7 +676,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); if (ret) { dev_err(&pdev->dev, "Could not register PCM\n"); - return ret; + goto err_suspend; } return 0; From 460c6cc9fee9b75787bf6cb157a6440e23010f8d Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 16 Aug 2019 17:58:12 -0500 Subject: [PATCH 0253/3715] ARM: dts: logicpd-torpedo-som: Remove twl_keypad [ Upstream commit 6b512b0ee091edcb8e46218894e4c917d919d3dc ] The TWL4030 used on the Logit PD Torpedo SOM does not have the keypad pins routed. This patch disables the twl_keypad driver to remove some splat during boot: twl4030_keypad 48070000.i2c:twl@48:keypad: missing or malformed property linux,keymap: -22 twl4030_keypad 48070000.i2c:twl@48:keypad: Failed to build keymap twl4030_keypad: probe of 48070000.i2c:twl@48:keypad failed with error -22 Signed-off-by: Adam Ford [tony@atomide.com: removed error time stamps] Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index fe4cbdc72359..7265d7072b5c 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -270,3 +270,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; From 91aa77f738cc6e1decb8a56480968ab7ce09432f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 Sep 2019 11:14:26 +0300 Subject: [PATCH 0254/3715] pinctrl: ns2: Fix off by one bugs in ns2_pinmux_enable() [ Upstream commit 39b65fbb813089e366b376bd8acc300b6fd646dc ] The pinctrl->functions[] array has pinctrl->num_functions elements and the pinctrl->groups[] array is the same way. These are set in ns2_pinmux_probe(). So the > comparisons should be >= so that we don't read one element beyond the end of the array. Fixes: b5aa1006e4a9 ("pinctrl: ns2: add pinmux driver support for Broadcom NS2 SoC") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20190926081426.GB2332@mwanda Acked-by: Scott Branden Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-ns2-mux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c index 4b5cf0e0f16e..951090faa6a9 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c @@ -640,8 +640,8 @@ static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev, const struct ns2_pin_function *func; const struct ns2_pin_group *grp; - if (grp_select > pinctrl->num_groups || - func_select > pinctrl->num_functions) + if (grp_select >= pinctrl->num_groups || + func_select >= pinctrl->num_functions) return -EINVAL; func = &pinctrl->functions[func_select]; From a7a7df16593867c4fa81de38ca08da2f7dd53230 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 31 Aug 2019 17:01:58 +0100 Subject: [PATCH 0255/3715] ARM: mm: fix alignment handler faults under memory pressure [ Upstream commit 67e15fa5b487adb9b78a92789eeff2d6ec8f5cee ] When the system has high memory pressure, the page containing the instruction may be paged out. Using probe_kernel_address() means that if the page is swapped out, the resulting page fault will not be handled because page faults are disabled by this function. Use get_user() to read the instruction instead. Reported-by: Jing Xiangfeng Fixes: b255188f90e2 ("ARM: fix scheduling while atomic warning in alignment handling code") Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/alignment.c | 44 +++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 2c96190e018b..96b17a870b91 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -768,6 +768,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -775,10 +805,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -787,15 +817,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -804,8 +833,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { From a1bd25c9adfe11d8a8842508bac357dde72a7fb8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 7 Oct 2019 15:57:01 +0200 Subject: [PATCH 0256/3715] scsi: scsi_dh_alua: handle RTPG sense code correctly during state transitions [ Upstream commit b6ce6fb121a655aefe41dccc077141c102145a37 ] Some arrays are not capable of returning RTPG data during state transitioning, but rather return an 'LUN not accessible, asymmetric access state transition' sense code. In these cases we can set the state to 'transitioning' directly and don't need to evaluate the RTPG data (which we won't have anyway). Link: https://lore.kernel.org/r/20191007135701.32389-1-hare@suse.de Reviewed-by: Laurence Oberman Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_alua.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 41f5f6410163..135376ee2cbf 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -523,6 +523,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) unsigned int tpg_desc_tbl_off; unsigned char orig_transition_tmo; unsigned long flags; + bool transitioning_sense = false; if (!pg->expiry) { unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; @@ -567,13 +568,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) goto retry; } /* - * Retry on ALUA state transition or if any - * UNIT ATTENTION occurred. + * If the array returns with 'ALUA state transition' + * sense code here it cannot return RTPG data during + * transition. So set the state to 'transitioning' directly. */ if (sense_hdr.sense_key == NOT_READY && - sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) - err = SCSI_DH_RETRY; - else if (sense_hdr.sense_key == UNIT_ATTENTION) + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { + transitioning_sense = true; + goto skip_rtpg; + } + /* + * Retry on any other UNIT ATTENTION occurred. + */ + if (sense_hdr.sense_key == UNIT_ATTENTION) err = SCSI_DH_RETRY; if (err == SCSI_DH_RETRY && pg->expiry != 0 && time_before(jiffies, pg->expiry)) { @@ -661,7 +668,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) off = 8 + (desc[7] * 4); } + skip_rtpg: spin_lock_irqsave(&pg->lock, flags); + if (transitioning_sense) + pg->state = SCSI_ACCESS_STATE_TRANSITIONING; + sdev_printk(KERN_INFO, sdev, "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), From d350c62ddcf3353805b0d0d9dfe1f3eb7f5a4ffd Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 17:11:18 +0200 Subject: [PATCH 0257/3715] scsi: sni_53c710: fix compilation error [ Upstream commit 0ee6211408a8e939428f662833c7301394125b80 ] Drop out memory dev_printk() with wrong device pointer argument. [mkp: typo] Link: https://lore.kernel.org/r/20191009151118.32350-1-tbogendoerfer@suse.de Signed-off-by: Thomas Bogendoerfer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sni_53c710.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/sni_53c710.c b/drivers/scsi/sni_53c710.c index 1f9a087daf69..3102a75984d3 100644 --- a/drivers/scsi/sni_53c710.c +++ b/drivers/scsi/sni_53c710.c @@ -78,10 +78,8 @@ static int snirm710_probe(struct platform_device *dev) base = res->start; hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); - if (!hostdata) { - dev_printk(KERN_ERR, dev, "Failed to allocate host data\n"); + if (!hostdata) return -ENOMEM; - } hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_BIT_MASK(32)); From c5d8eb7af5d39fbaa0418fdeeedf8cedd8ac127e Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 9 Oct 2019 17:11:28 +0200 Subject: [PATCH 0258/3715] scsi: fix kconfig dependency warning related to 53C700_LE_ON_BE [ Upstream commit 8cbf0c173aa096dda526d1ccd66fc751c31da346 ] When building a kernel with SCSI_SNI_53C710 enabled, Kconfig warns: WARNING: unmet direct dependencies detected for 53C700_LE_ON_BE Depends on [n]: SCSI_LOWLEVEL [=y] && SCSI [=y] && SCSI_LASI700 [=n] Selected by [y]: - SCSI_SNI_53C710 [=y] && SCSI_LOWLEVEL [=y] && SNI_RM [=y] && SCSI [=y] Add the missing depends SCSI_SNI_53C710 to 53C700_LE_ON_BE to fix it. Link: https://lore.kernel.org/r/20191009151128.32411-1-tbogendoerfer@suse.de Signed-off-by: Thomas Bogendoerfer Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 41366339b950..881906dc33b8 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -966,7 +966,7 @@ config SCSI_SNI_53C710 config 53C700_LE_ON_BE bool - depends on SCSI_LASI700 + depends on SCSI_LASI700 || SCSI_SNI_53C710 default y config SCSI_STEX From 7bd450384128808ef146715ef40241070c2635ac Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 7 Oct 2019 08:43:42 +0800 Subject: [PATCH 0259/3715] ARM: dts: imx7s: Correct GPT's ipg clock source [ Upstream commit 252b9e21bcf46b0d16f733f2e42b21fdc60addee ] i.MX7S/D's GPT ipg clock should be from GPT clock root and controlled by CCM's GPT CCGR, using correct clock source for GPT ipg clock instead of IMX7D_CLK_DUMMY. Fixes: 3ef79ca6bd1d ("ARM: dts: imx7d: use imx7s.dtsi as base device tree") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index bf15efbe8a71..836550f2297a 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -450,7 +450,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT1_ROOT_CLK>, <&clks IMX7D_GPT1_ROOT_CLK>; clock-names = "ipg", "per"; }; @@ -459,7 +459,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT2_ROOT_CLK>, <&clks IMX7D_GPT2_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -469,7 +469,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT3_ROOT_CLK>, <&clks IMX7D_GPT3_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -479,7 +479,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT4_ROOT_CLK>, <&clks IMX7D_GPT4_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; From 3c46bafbce9e4aa83594b59c8461e14223df50e6 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Tue, 15 Oct 2019 10:54:14 +0800 Subject: [PATCH 0260/3715] perf c2c: Fix memory leak in build_cl_output() [ Upstream commit ae199c580da1754a2b051321eeb76d6dacd8707b ] There is a memory leak problem in the failure paths of build_cl_output(), so fix it. Signed-off-by: Yunfeng Ye Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Feilong Lin Cc: Hu Shiyuan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/4d3c0178-5482-c313-98e1-f82090d2d456@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-c2c.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 32e64a8a6443..bec7a2f1fb4d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2454,6 +2454,7 @@ static int build_cl_output(char *cl_sort, bool no_source) bool add_sym = false; bool add_dso = false; bool add_src = false; + int ret = 0; if (!buf) return -ENOMEM; @@ -2472,7 +2473,8 @@ static int build_cl_output(char *cl_sort, bool no_source) add_dso = true; } else if (strcmp(tok, "offset")) { pr_err("unrecognized sort token: %s\n", tok); - return -EINVAL; + ret = -EINVAL; + goto err; } } @@ -2495,13 +2497,15 @@ static int build_cl_output(char *cl_sort, bool no_source) add_sym ? "symbol," : "", add_dso ? "dso," : "", add_src ? "cl_srcline," : "", - "node") < 0) - return -ENOMEM; + "node") < 0) { + ret = -ENOMEM; + goto err; + } c2c.show_src = add_src; - +err: free(buf); - return 0; + return ret; } static int setup_coalesce(const char *coalesce, bool no_source) From a14db515f1a113aed76a367822e11d9d2f02eb51 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Wed, 16 Oct 2019 16:38:45 +0800 Subject: [PATCH 0261/3715] perf kmem: Fix memory leak in compact_gfp_flags() [ Upstream commit 1abecfcaa7bba21c9985e0136fa49836164dd8fd ] The memory @orig_flags is allocated by strdup(), it is freed on the normal path, but leak to free on the error path. Fix this by adding free(orig_flags) on the error path. Fixes: 0e11115644b3 ("perf kmem: Print gfp flags in human readable string") Signed-off-by: Yunfeng Ye Cc: Alexander Shishkin Cc: Feilong Lin Cc: Hu Shiyuan Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/f9e9f458-96f3-4a97-a1d5-9feec2420e07@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-kmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9e693ce4b73b..ce786f363476 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -687,6 +687,7 @@ static char *compact_gfp_flags(char *gfp_flags) new = realloc(new_flags, len + strlen(cpt) + 2); if (new == NULL) { free(new_flags); + free(orig_flags); return NULL; } From f2ffdcec15f34b9ce0856f42ee6a7ea09767a5bc Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 30 Aug 2019 13:22:02 +0300 Subject: [PATCH 0262/3715] ARM: davinci: dm365: Fix McBSP dma_slave_map entry [ Upstream commit 564b6bb9d42d31fc80c006658cf38940a9b99616 ] dm365 have only single McBSP, so the device name is without .0 Fixes: 0c750e1fe481d ("ARM: davinci: dm365: Add dma_slave_map to edma") Signed-off-by: Peter Ujfalusi Signed-off-by: Sekhar Nori Signed-off-by: Sasha Levin --- arch/arm/mach-davinci/dm365.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf..d80b2290ac2e 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -856,8 +856,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, From 5c992eb99d57827f474a713385602c01d509ae08 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 14 Oct 2019 20:29:04 +0200 Subject: [PATCH 0263/3715] scsi: target: core: Do not overwrite CDB byte 1 [ Upstream commit 27e84243cb63601a10e366afe3e2d05bb03c1cb5 ] passthrough_parse_cdb() - used by TCMU and PSCSI - attepts to reset the LUN field of SCSI-2 CDBs (bits 5,6,7 of byte 1). The current code is wrong as for newer commands not having the LUN field it overwrites relevant command bits (e.g. for SECURITY PROTOCOL IN / OUT). We think this code was unnecessary from the beginning or at least it is no longer useful. So we remove it entirely. Link: https://lore.kernel.org/r/12498eab-76fd-eaad-1316-c2827badb76a@ts.fujitsu.com Signed-off-by: Bodo Stroesser Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 84742125f773..92b52d2314b5 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1151,27 +1151,6 @@ passthrough_parse_cdb(struct se_cmd *cmd, struct se_device *dev = cmd->se_dev; unsigned int size; - /* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } - /* * For REPORT LUNS we always need to emulate the response, for everything * else, pass it up. From c1cb67a75753979f411c4200794a2b82c0fff225 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Mon, 21 Oct 2019 06:06:14 +0100 Subject: [PATCH 0264/3715] ARM: 8926/1: v7m: remove register save to stack before svc [ Upstream commit 2ecb287998a47cc0a766f6071f63bc185f338540 ] r0-r3 & r12 registers are saved & restored, before & after svc respectively. Intention was to preserve those registers across thread to handler mode switch. On v7-M, hardware saves the register context upon exception in AAPCS complaint way. Restoring r0-r3 & r12 is done from stack location where hardware saves it, not from the location on stack where these registers were saved. To clarify, on stm32f429 discovery board: 1. before svc, sp - 0x90009ff8 2. r0-r3,r12 saved to 0x90009ff8 - 0x9000a00b 3. upon svc, h/w decrements sp by 32 & pushes registers onto stack 4. after svc, sp - 0x90009fd8 5. r0-r3,r12 restored from 0x90009fd8 - 0x90009feb Above means r0-r3,r12 is not restored from the location where they are saved, but since hardware pushes the registers onto stack, the registers are restored correctly. Note that during register saving to stack (step 2), it goes past 0x9000a000. And it seems, based on objdump, there are global symbols residing there, and it perhaps can cause issues on a non-XIP Kernel (on XIP, data section is setup later). Based on the analysis above, manually saving registers onto stack is at best no-op and at worst can cause data section corruption. Hence remove storing of registers onto stack before svc. Fixes: b70cd406d7fe ("ARM: 8671/1: V7M: Preserve registers across switch from Thread to Handler mode") Signed-off-by: afzal mohammed Acked-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7m.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 92e84181933a..c68408d51c4b 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -135,7 +135,6 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i From 9f4ca715dae36ff5b737d56165df1b7964a2cb3e Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 13:58:43 -0500 Subject: [PATCH 0265/3715] of: unittest: fix memory leak in unittest_data_add [ Upstream commit e13de8fe0d6a51341671bbe384826d527afe8d44 ] In unittest_data_add, a copy buffer is created via kmemdup. This buffer is leaked if of_fdt_unflatten_tree fails. The release for the unittest_data buffer is added. Fixes: b951f9dc7f25 ("Enabling OF selftest to run without machine's devicetree") Signed-off-by: Navid Emamdoost Reviewed-by: Frank Rowand Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 7c6aff761800..87650d42682f 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -1002,6 +1002,7 @@ static int __init unittest_data_add(void) of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node); if (!unittest_data_node) { pr_warn("%s: No tree to attach; not running tests\n", __func__); + kfree(unittest_data); return -ENODATA; } of_node_set_flag(unittest_data_node, OF_DETACHED); From 17b62bb161c586ee76fa221a9f525322ad0550e1 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 22 Oct 2019 21:11:00 +0200 Subject: [PATCH 0266/3715] MIPS: bmips: mark exception vectors as char arrays [ Upstream commit e4f5cb1a9b27c0f94ef4f5a0178a3fde2d3d0e9e ] The vectors span more than one byte, so mark them as arrays. Fixes the following build error when building when using GCC 8.3: In file included from ./include/linux/string.h:19, from ./include/linux/bitmap.h:9, from ./include/linux/cpumask.h:12, from ./arch/mips/include/asm/processor.h:15, from ./arch/mips/include/asm/thread_info.h:16, from ./include/linux/thread_info.h:38, from ./include/asm-generic/preempt.h:5, from ./arch/mips/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:81, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/bootmem.h:8, from arch/mips/bcm63xx/prom.c:10: arch/mips/bcm63xx/prom.c: In function 'prom_init': ./arch/mips/include/asm/string.h:162:11: error: '__builtin_memcpy' forming offset [2, 32] is out of the bounds [0, 1] of object 'bmips_smp_movevec' with type 'char' [-Werror=array-bounds] __ret = __builtin_memcpy((dst), (src), __len); \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/bcm63xx/prom.c:97:3: note: in expansion of macro 'memcpy' memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); ^~~~~~ In file included from arch/mips/bcm63xx/prom.c:14: ./arch/mips/include/asm/bmips.h:80:13: note: 'bmips_smp_movevec' declared here extern char bmips_smp_movevec; Fixes: 18a1eef92dcd ("MIPS: BMIPS: Introduce bmips.h") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Signed-off-by: Sasha Levin --- arch/mips/bcm63xx/prom.c | 2 +- arch/mips/include/asm/bmips.h | 10 +++++----- arch/mips/kernel/smp-bmips.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 7019e2967009..bbbf8057565b 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -84,7 +84,7 @@ void __init prom_init(void) * Here we will start up CPU1 in the background and ask it to * reconfigure itself then go back to sleep. */ - memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); + memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20); __sync(); set_c0_cause(C_SW0); cpumask_set_cpu(1, &bmips_booted_mask); diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index b3e2975f83d3..a564915fddc4 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void) #endif } -extern char bmips_reset_nmi_vec; -extern char bmips_reset_nmi_vec_end; -extern char bmips_smp_movevec; -extern char bmips_smp_int_vec; -extern char bmips_smp_int_vec_end; +extern char bmips_reset_nmi_vec[]; +extern char bmips_reset_nmi_vec_end[]; +extern char bmips_smp_movevec[]; +extern char bmips_smp_int_vec[]; +extern char bmips_smp_int_vec_end[]; extern int bmips_smp_enabled; extern int bmips_cpu_offset; diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 382d12eb88f0..45fbcbbf2504 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -457,10 +457,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end) static inline void bmips_nmi_handler_setup(void) { - bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, - &bmips_reset_nmi_vec_end); - bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, - &bmips_smp_int_vec_end); + bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec, + bmips_reset_nmi_vec_end); + bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec, + bmips_smp_int_vec_end); } struct reset_vec_info { From 6dde6239ddc2264855d86d191c6fcc0a435067d9 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Tue, 15 Oct 2019 15:11:58 +0200 Subject: [PATCH 0267/3715] i2c: stm32f7: remove warning when compiling with W=1 [ Upstream commit 348e46fbb4cdb2aead79aee1fd8bb25ec5fd25db ] Remove the following warning: drivers/i2c/busses/i2c-stm32f7.c:315: warning: cannot understand function prototype: 'struct stm32f7_i2c_spec i2c_specs[] = Replace a comment starting with /** by simply /* to avoid having it interpreted as a kernel-doc comment. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-stm32f7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index d8cbe149925b..14f60751729e 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -219,7 +219,7 @@ struct stm32f7_i2c_dev { struct stm32f7_i2c_timings timing; }; -/** +/* * All these values are coming from I2C Specification, Version 6.0, 4th of * April 2014. * From 3bb65a1a407f6ac364aa10111be788a1313225a7 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 23 Oct 2019 05:02:33 -0400 Subject: [PATCH 0268/3715] cifs: Fix cifsInodeInfo lock_sem deadlock when reconnect occurs [ Upstream commit d46b0da7a33dd8c99d969834f682267a45444ab3 ] There's a deadlock that is possible and can easily be seen with a test where multiple readers open/read/close of the same file and a disruption occurs causing reconnect. The deadlock is due a reader thread inside cifs_strict_readv calling down_read and obtaining lock_sem, and then after reconnect inside cifs_reopen_file calling down_read a second time. If in between the two down_read calls, a down_write comes from another process, deadlock occurs. CPU0 CPU1 ---- ---- cifs_strict_readv() down_read(&cifsi->lock_sem); _cifsFileInfo_put OR cifs_new_fileinfo down_write(&cifsi->lock_sem); cifs_reopen_file() down_read(&cifsi->lock_sem); Fix the above by changing all down_write(lock_sem) calls to down_write_trylock(lock_sem)/msleep() loop, which in turn makes the second down_read call benign since it will never block behind the writer while holding lock_sem. Signed-off-by: Dave Wysochanski Suggested-by: Ronnie Sahlberg Reviewed--by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/cifsproto.h | 1 + fs/cifs/file.c | 23 +++++++++++++++-------- fs/cifs/smb2file.c | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7b7ab10a9db1..600bb838c15b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1210,6 +1210,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ + /* + * NOTE: Some code paths call down_read(lock_sem) twice, so + * we must always use use cifs_down_write() instead of down_write() + * for this semaphore to avoid deadlocks. + */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ccdb42f71b2e..3a7fb8e750e9 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -149,6 +149,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void cifs_down_write(struct rw_semaphore *sem); extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 71a960da7cce..40f22932343c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -280,6 +280,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode) return has_locks; } +void +cifs_down_write(struct rw_semaphore *sem) +{ + while (!down_write_trylock(sem)) + msleep(10); +} + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -305,7 +312,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add(&fdlocks->llist, &cinode->llist); up_write(&cinode->lock_sem); @@ -457,7 +464,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - down_write(&cifsi->lock_sem); + cifs_down_write(&cifsi->lock_sem); list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); @@ -1011,7 +1018,7 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); } @@ -1033,7 +1040,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, &conf_lock, CIFS_LOCK_OP); @@ -1055,7 +1062,7 @@ try_again: (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_del_init(&lock->blist); } @@ -1108,7 +1115,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1314,7 +1321,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) int rc = 0; /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1505,7 +1512,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 1add404618f0..2c809233084b 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -139,7 +139,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur = buf; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < From 6e57ba09b35bd189b43da1019b3698b3e0b0d501 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Oct 2019 15:56:28 -0400 Subject: [PATCH 0269/3715] nbd: handle racing with error'ed out commands [ Upstream commit 7ce23e8e0a9cd38338fc8316ac5772666b565ca9 ] We hit the following warning in production print_req_error: I/O error, dev nbd0, sector 7213934408 flags 80700 ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 25 PID: 32407 at lib/refcount.c:190 refcount_sub_and_test_checked+0x53/0x60 Workqueue: knbd-recv recv_work [nbd] RIP: 0010:refcount_sub_and_test_checked+0x53/0x60 Call Trace: blk_mq_free_request+0xb7/0xf0 blk_mq_complete_request+0x62/0xf0 recv_work+0x29/0xa1 [nbd] process_one_work+0x1f5/0x3f0 worker_thread+0x2d/0x3d0 ? rescuer_thread+0x340/0x340 kthread+0x111/0x130 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x1f/0x30 ---[ end trace b079c3c67f98bb7c ]--- This was preceded by us timing out everything and shutting down the sockets for the device. The problem is we had a request in the queue at the same time, so we completed the request twice. This can actually happen in a lot of cases, we fail to get a ref on our config, we only have one connection and just error out the command, etc. Fix this by checking cmd->status in nbd_read_stat. We only change this under the cmd->lock, so we are safe to check this here and see if we've already error'ed this command out, which would indicate that we've completed it as well. Reviewed-by: Mike Christie Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index f3d0bc9a9905..34dfadd4dcd4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -648,6 +648,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } + if (cmd->status != BLK_STS_OK) { + dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n", + req); + ret = -ENOENT; + goto out; + } if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", req); From 86295a16e62beb9ca9b75daaa2b5e0546dcd5e92 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 30 Oct 2019 20:17:57 +0530 Subject: [PATCH 0270/3715] cxgb4: fix panic when attaching to ULD fail [ Upstream commit fc89cc358fb64e2429aeae0f37906126636507ec ] Release resources when attaching to ULD fail. Otherwise, data mismatch is seen between LLD and ULD later on, which lead to kernel panic when accessing resources that should not even exist in the first place. Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD") Signed-off-by: Shahjada Abul Husain Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 8441ce3541af..ad4c9f17d77c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -670,10 +670,10 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support; } -static void uld_attach(struct adapter *adap, unsigned int uld) +static int uld_attach(struct adapter *adap, unsigned int uld) { - void *handle; struct cxgb4_lld_info lli; + void *handle; uld_init(adap, &lli); uld_queue_init(adap, uld, &lli); @@ -683,7 +683,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) dev_warn(adap->pdev_dev, "could not attach to the %s driver, error %ld\n", adap->uld[uld].name, PTR_ERR(handle)); - return; + return PTR_ERR(handle); } adap->uld[uld].handle = handle; @@ -691,23 +691,24 @@ static void uld_attach(struct adapter *adap, unsigned int uld) if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); + + return 0; } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods +/* cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. */ int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) { - int ret = 0; unsigned int adap_idx = 0; struct adapter *adap; + int ret = 0; if (type >= CXGB4_ULD_MAX) return -EINVAL; @@ -741,12 +742,16 @@ int cxgb4_register_uld(enum cxgb4_uld type, if (ret) goto free_irq; adap->uld[type] = *p; - uld_attach(adap, type); + ret = uld_attach(adap, type); + if (ret) + goto free_txq; adap_idx++; } mutex_unlock(&uld_mutex); return 0; +free_txq: + release_sge_txq_uld(adap, type); free_irq: if (adap->flags & FULL_INIT_DONE) quiesce_rx_uld(adap, type); From 03c2ac95a5913cdd0e34cd7217ef022764c2c4b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 07:57:55 -0800 Subject: [PATCH 0271/3715] dccp: do not leak jiffies on the wire [ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ] For some reason I missed the case of DCCP passive flows in my previous patch. Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire") Signed-off-by: Eric Dumazet Reported-by: Thiemo Nagel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8e08cea6f178..17afa03cab3a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = jiffies; + newinet->inet_id = prandom_u32(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; From 2261df133d4211b5c1aeecc16f1ce004939e7f1e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2019 13:00:04 -0700 Subject: [PATCH 0272/3715] net: annotate accesses to sk->sk_incoming_cpu [ Upstream commit 7170a977743b72cf3eb46ef6ef89885dc7ad3621 ] This socket field can be read and written by concurrent cpus. Use READ_ONCE() and WRITE_ONCE() annotations to document this, and avoid some compiler 'optimizations'. KCSAN reported : BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0: sk_incoming_cpu_update include/net/sock.h:953 [inline] tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337 do_softirq kernel/softirq.c:329 [inline] __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189 read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1: sk_incoming_cpu_update include/net/sock.h:952 [inline] tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 4 ++-- net/core/sock.c | 4 ++-- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 60eef7f1ac05..879990a8ffcb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -916,8 +916,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk) { int cpu = raw_smp_processor_id(); - if (unlikely(sk->sk_incoming_cpu != cpu)) - sk->sk_incoming_cpu = cpu; + if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) + WRITE_ONCE(sk->sk_incoming_cpu, cpu); } static inline void sock_rps_record_flow_hash(__u32 hash) diff --git a/net/core/sock.c b/net/core/sock.c index 5f466db916ee..a6f33a717f41 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1039,7 +1039,7 @@ set_rcvbuf: break; case SO_INCOMING_CPU: - sk->sk_incoming_cpu = val; + WRITE_ONCE(sk->sk_incoming_cpu, val); break; case SO_CNX_ADVICE: @@ -1351,7 +1351,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_INCOMING_CPU: - v.val = sk->sk_incoming_cpu; + v.val = READ_ONCE(sk->sk_incoming_cpu); break; case SO_MEMINFO: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24b066c32e06..1f26627c7fad 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -193,7 +193,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 54343dc29cb4..a1594ab11bf6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -419,7 +419,7 @@ static int compute_score(struct sock *sk, struct net *net, score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 6dc93ac28261..228983a5531b 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -118,7 +118,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9f7bfeb90fb0..a2ba7356fa65 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -170,7 +170,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; From 2d8cd1c2e556e39ae6eb9d5e629c5c0f1801cf06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Oct 2019 10:54:44 -0700 Subject: [PATCH 0273/3715] net: annotate lockless accesses to sk->sk_napi_id [ Upstream commit ee8d153d46a3b98c064ee15c0c0a3bbf1450e5a1 ] We already annotated most accesses to sk->sk_napi_id We missed sk_mark_napi_id() and sk_mark_napi_id_once() which might be called without socket lock held in UDP stack. KCSAN reported : BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0: sk_mark_napi_id include/net/busy_poll.h:125 [inline] __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1: sk_mark_napi_id include/net/busy_poll.h:125 [inline] __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline] udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: e68b6e50fa35 ("udp: enable busy polling for all sockets") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/busy_poll.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 71c72a939bf8..c86fcadccbd7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -134,7 +134,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - sk->sk_napi_id = skb->napi_id; + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } @@ -143,8 +143,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - if (!sk->sk_napi_id) - sk->sk_napi_id = skb->napi_id; + if (!READ_ONCE(sk->sk_napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } From 78c5ddd6aec86aa79f8d7261c54fc01993486478 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 31 Oct 2019 15:54:05 -0700 Subject: [PATCH 0274/3715] net: dsa: bcm_sf2: Fix IMP setup for port different than 8 [ Upstream commit 5fc0f21246e50afdf318b5a3a941f7f4f57b8947 ] Since it became possible for the DSA core to use a CPU port different than 8, our bcm_sf2_imp_setup() function was broken because it assumes that registers are applicable to port 8. In particular, the port's MAC is going to stay disabled, so make sure we clear the RX_DIS and TX_DIS bits if we are not configured for port 8. Fixes: 9f91484f6fcc ("net: dsa: make "label" property optional for dsa2") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0132921f408a..604c5abc08eb 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -106,22 +106,11 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) unsigned int i; u32 reg, offset; - if (priv->type == BCM7445_DEVICE_ID) - offset = CORE_STS_OVERRIDE_IMP; - else - offset = CORE_STS_OVERRIDE_IMP2; - /* Enable the port memories */ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ - reg = core_readl(priv, CORE_IMP_CTL); - reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); - reg &= ~(RX_DIS | TX_DIS); - core_writel(priv, reg, CORE_IMP_CTL); - /* Enable forwarding */ core_writel(priv, SW_FWDG_EN, CORE_SWMODE); @@ -140,10 +129,27 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) bcm_sf2_brcm_hdr_setup(priv, port); - /* Force link status for IMP port */ - reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS); - core_writel(priv, reg, offset); + if (port == 8) { + if (priv->type == BCM7445_DEVICE_ID) + offset = CORE_STS_OVERRIDE_IMP; + else + offset = CORE_STS_OVERRIDE_IMP2; + + /* Force link status for IMP port */ + reg = core_readl(priv, offset); + reg |= (MII_SW_OR | LINK_STS); + core_writel(priv, reg, offset); + + /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ + reg = core_readl(priv, CORE_IMP_CTL); + reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_IMP_CTL); + } else { + reg = core_readl(priv, CORE_G_PCTL_PORT(port)); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_G_PCTL_PORT(port)); + } } static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) From 52fb9ecc3bb93cec8f309d5c7cdd201c3ba8cab1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 25 Oct 2019 13:47:24 +1100 Subject: [PATCH 0275/3715] net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum [ Upstream commit 88824e3bf29a2fcacfd9ebbfe03063649f0f3254 ] We are calling the checksum helper after the dma_map_single() call to map the packet. This is incorrect as the checksumming code will touch the packet from the CPU. This means the cache won't be properly flushes (or the bounce buffering will leave us with the unmodified packet to DMA). This moves the calculation of the checksum & vlan tags to before the DMA mapping. This also has the side effect of fixing another bug: If the checksum helper fails, we goto "drop" to drop the packet, which will not unmap the DMA mapping. Signed-off-by: Benjamin Herrenschmidt Fixes: 05690d633f30 ("ftgmac100: Upgrade to NETIF_F_HW_CSUM") Reviewed-by: Vijay Khemka Tested-by: Vijay Khemka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/faraday/ftgmac100.c | 25 ++++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9ed8e4b81530..bfda315a3f1b 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -734,6 +734,18 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb, */ nfrags = skb_shinfo(skb)->nr_frags; + /* Setup HW checksumming */ + csum_vlan = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && + !ftgmac100_prep_tx_csum(skb, &csum_vlan)) + goto drop; + + /* Add VLAN tag */ + if (skb_vlan_tag_present(skb)) { + csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; + csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; + } + /* Get header len */ len = skb_headlen(skb); @@ -760,19 +772,6 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb, if (nfrags == 0) f_ctl_stat |= FTGMAC100_TXDES0_LTS; txdes->txdes3 = cpu_to_le32(map); - - /* Setup HW checksumming */ - csum_vlan = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL && - !ftgmac100_prep_tx_csum(skb, &csum_vlan)) - goto drop; - - /* Add VLAN tag */ - if (skb_vlan_tag_present(skb)) { - csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; - csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; - } - txdes->txdes1 = cpu_to_le32(csum_vlan); /* Next descriptor */ From a704289342f8efe32c0270d25b9e0e05481e8ed1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Oct 2019 13:50:27 -0700 Subject: [PATCH 0276/3715] net: fix sk_page_frag() recursion from memory reclaim [ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ] sk_page_frag() optimizes skb_frag allocations by using per-task skb_frag cache when it knows it's the only user. The condition is determined by seeing whether the socket allocation mask allows blocking - if the allocation may block, it obviously owns the task's context and ergo exclusively owns current->task_frag. Unfortunately, this misses recursion through memory reclaim path. Please take a look at the following backtrace. [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10 ... tcp_sendmsg+0x27/0x40 sock_sendmsg+0x30/0x40 sock_xmit.isra.24+0xa1/0x170 [nbd] nbd_send_cmd+0x1d2/0x690 [nbd] nbd_queue_rq+0x1b5/0x3b0 [nbd] __blk_mq_try_issue_directly+0x108/0x1b0 blk_mq_request_issue_directly+0xbd/0xe0 blk_mq_try_issue_list_directly+0x41/0xb0 blk_mq_sched_insert_requests+0xa2/0xe0 blk_mq_flush_plug_list+0x205/0x2a0 blk_flush_plug_list+0xc3/0xf0 [1] blk_finish_plug+0x21/0x2e _xfs_buf_ioapply+0x313/0x460 __xfs_buf_submit+0x67/0x220 xfs_buf_read_map+0x113/0x1a0 xfs_trans_read_buf_map+0xbf/0x330 xfs_btree_read_buf_block.constprop.42+0x95/0xd0 xfs_btree_lookup_get_block+0x95/0x170 xfs_btree_lookup+0xcc/0x470 xfs_bmap_del_extent_real+0x254/0x9a0 __xfs_bunmapi+0x45c/0xab0 xfs_bunmapi+0x15/0x30 xfs_itruncate_extents_flags+0xca/0x250 xfs_free_eofblocks+0x181/0x1e0 xfs_fs_destroy_inode+0xa8/0x1b0 destroy_inode+0x38/0x70 dispose_list+0x35/0x50 prune_icache_sb+0x52/0x70 super_cache_scan+0x120/0x1a0 do_shrink_slab+0x120/0x290 shrink_slab+0x216/0x2b0 shrink_node+0x1b6/0x4a0 do_try_to_free_pages+0xc6/0x370 try_to_free_mem_cgroup_pages+0xe3/0x1e0 try_charge+0x29e/0x790 mem_cgroup_charge_skmem+0x6a/0x100 __sk_mem_raise_allocated+0x18e/0x390 __sk_mem_schedule+0x2a/0x40 [0] tcp_sendmsg_locked+0x8eb/0xe10 tcp_sendmsg+0x27/0x40 sock_sendmsg+0x30/0x40 ___sys_sendmsg+0x26d/0x2b0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x42/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 In [0], tcp_send_msg_locked() was using current->page_frag when it called sk_wmem_schedule(). It already calculated how many bytes can be fit into current->page_frag. Due to memory pressure, sk_wmem_schedule() called into memory reclaim path which called into xfs and then IO issue path. Because the filesystem in question is backed by nbd, the control goes back into the tcp layer - back into tcp_sendmsg_locked(). nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes sense - it's in the process of freeing memory and wants to be able to, e.g., drop clean pages to make forward progress. However, this confused sk_page_frag() called from [2]. Because it only tests whether the allocation allows blocking which it does, it now thinks current->page_frag can be used again although it already was being used in [0]. After [2] used current->page_frag, the offset would be increased by the used amount. When the control returns to [0], current->page_frag's offset is increased and the previously calculated number of bytes now may overrun the end of allocated memory leading to silent memory corruptions. Fix it by adding gfpflags_normal_context() which tests sleepable && !reclaim and use it to determine whether to use current->task_frag. v2: Eric didn't like gfp flags being tested twice. Introduce a new helper gfpflags_normal_context() and combine the two tests. Signed-off-by: Tejun Heo Cc: Josef Bacik Cc: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/gfp.h | 23 +++++++++++++++++++++++ include/net/sock.h | 11 ++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b041f94678de..79d3dab45ceb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -313,6 +313,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +/** + * gfpflags_normal_context - is gfp_flags a normal sleepable context? + * @gfp_flags: gfp_flags to test + * + * Test whether @gfp_flags indicates that the allocation is from the + * %current context and allowed to sleep. + * + * An allocation being allowed to block doesn't mean it owns the %current + * context. When direct reclaim path tries to allocate memory, the + * allocation context is nested inside whatever %current was doing at the + * time of the original allocation. The nested allocation may be allowed + * to block but modifying anything %current owns can corrupt the outer + * context's expectations. + * + * %true result from this function indicates that the allocation context + * can sleep and use anything that's associated with %current. + */ +static inline bool gfpflags_normal_context(const gfp_t gfp_flags) +{ + return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == + __GFP_DIRECT_RECLAIM; +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/net/sock.h b/include/net/sock.h index 879990a8ffcb..7ec4d0bd8d12 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2131,12 +2131,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * sk_page_frag - return an appropriate page_frag * @sk: socket * - * If socket allocation mode allows current thread to sleep, it means its - * safe to use the per task page_frag instead of the per socket one. + * Use the per task page_frag instead of the per socket one for + * optimization when we know that we're in the normal context and owns + * everything that's associated with %current. + * + * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest + * inside other socket operations and end up recursing into sk_page_frag() + * while it's already in use. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_allow_blocking(sk->sk_allocation)) + if (gfpflags_normal_context(sk->sk_allocation)) return ¤t->task_frag; return &sk->sk_frag; From f0a12f90bb6a5c1a88cc887a27aae4d4db0ca20e Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Mon, 28 Oct 2019 13:09:46 +0800 Subject: [PATCH 0277/3715] net: hisilicon: Fix ping latency when deal with high throughput [ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ] This is due to error in over budget processing. When dealing with high throughput, the used buffers that exceeds the budget is not cleaned up. In addition, it takes a lot of cycles to clean up the used buffer, and then the buffer where the valid data is located can take effect. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hip04_eth.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 1bfe9544b3c1..17cbe8145dcd 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -174,6 +174,7 @@ struct hip04_priv { dma_addr_t rx_phys[RX_DESC_NUM]; unsigned int rx_head; unsigned int rx_buf_size; + unsigned int rx_cnt_remaining; struct device_node *phy_node; struct phy_device *phy; @@ -487,7 +488,6 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); struct net_device *ndev = priv->ndev; struct net_device_stats *stats = &ndev->stats; - unsigned int cnt = hip04_recv_cnt(priv); struct rx_desc *desc; struct sk_buff *skb; unsigned char *buf; @@ -500,8 +500,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) /* clean up tx descriptors */ tx_remaining = hip04_tx_reclaim(ndev, false); - - while (cnt && !last) { + priv->rx_cnt_remaining += hip04_recv_cnt(priv); + while (priv->rx_cnt_remaining && !last) { buf = priv->rx_buf[priv->rx_head]; skb = build_skb(buf, priv->rx_buf_size); if (unlikely(!skb)) { @@ -547,11 +547,13 @@ refill: hip04_set_recv_desc(priv, phys); priv->rx_head = RX_NEXT(priv->rx_head); - if (rx >= budget) + if (rx >= budget) { + --priv->rx_cnt_remaining; goto done; + } - if (--cnt == 0) - cnt = hip04_recv_cnt(priv); + if (--priv->rx_cnt_remaining == 0) + priv->rx_cnt_remaining += hip04_recv_cnt(priv); } if (!(priv->reg_inten & RCV_INT)) { @@ -636,6 +638,7 @@ static int hip04_mac_open(struct net_device *ndev) int i; priv->rx_head = 0; + priv->rx_cnt_remaining = 0; priv->tx_head = 0; priv->tx_tail = 0; hip04_reset_ppe(priv); From 77da9481248c717f2538587dbdabb297f513ebc5 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 27 Oct 2019 16:39:15 +0200 Subject: [PATCH 0278/3715] net/mlx4_core: Dynamically set guaranteed amount of counters per VF [ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ] Prior to this patch, the amount of counters guaranteed per VF in the resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was set regardless if the VF was single or dual port. This caused several VFs to have no guaranteed counters although the system could satisfy their request. The fix is to dynamically guarantee counters, based on each VF specification. Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") Signed-off-by: Eran Ben Elisha Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlx4/resource_tracker.c | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 53ca6cf316dc..66e8054a8966 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev) priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } -static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) +static int +mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + int vf) { - /* reduce the sink counter */ - return (dev->caps.max_counters - 1 - - (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) - / MLX4_MAX_PORTS; + struct mlx4_active_ports actv_ports; + int ports, counters_guaranteed; + + /* For master, only allocate according to the number of phys ports */ + if (vf == mlx4_master_func_num(dev)) + return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; + + /* calculate real number of ports for the VF */ + actv_ports = mlx4_get_active_ports(dev, vf); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; + + /* If we do not have enough counters for this VF, do not + * allocate any for it. '-1' to reduce the sink counter. + */ + if ((res_alloc->res_reserved + counters_guaranteed) > + (dev->caps.max_counters - 1)) + return 0; + + return counters_guaranteed; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) @@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; - int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); priv->mfunc.master.res_tracker.slave_list = kzalloc(dev->num_slaves * sizeof(struct slave_list), @@ -601,16 +619,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; - if (t == mlx4_master_func_num(dev)) - res_alloc->guaranteed[t] = - MLX4_PF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else if (t <= max_vfs_guarantee_counter) - res_alloc->guaranteed[t] = - MLX4_VF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else - res_alloc->guaranteed[t] = 0; + res_alloc->guaranteed[t] = + mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); break; default: break; From 90bddaf253383b8a228726363f7a61dbda85facc Mon Sep 17 00:00:00 2001 From: zhanglin Date: Sat, 26 Oct 2019 15:54:16 +0800 Subject: [PATCH 0279/3715] net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol() [ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ] memset() the structure ethtool_wolinfo that has padded bytes but the padded bytes have not been zeroed out. Signed-off-by: zhanglin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 97569d3e1937..7822defa5a5d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1450,11 +1450,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + struct ethtool_wolinfo wol; if (!dev->ethtool_ops->get_wol) return -EOPNOTSUPP; + memset(&wol, 0, sizeof(struct ethtool_wolinfo)); + wol.cmd = ETHTOOL_GWOL; dev->ethtool_ops->get_wol(dev, &wol); if (copy_to_user(useraddr, &wol, sizeof(wol))) From 866e751db1eb72bd8fcb856e71a979804f31dc50 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 31 Oct 2019 16:24:36 -0700 Subject: [PATCH 0280/3715] selftests: net: reuseport_dualstack: fix uninitalized parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d64479a3e3f9924074ca7b50bd72fa5211dca9c1 ] This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN) occasionally due to the uninitialized length parameter. Initialize it to fix this, and also use int for "test_family" to comply with the API standard. Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets") Reported-by: Maciej Żenczykowski Signed-off-by: Eric Dumazet Signed-off-by: Wei Wang Cc: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/reuseport_dualstack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fe3230c55986..fb7a59ed759e 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto) { struct epoll_event ev; int epfd, i, test_fd; - uint16_t test_family; + int test_family; socklen_t len; epfd = epoll_create(1); @@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto) send_from_v4(proto); test_fd = receive_once(epfd, proto); + len = sizeof(test_family); if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) error(1, errno, "failed to read socket domain"); if (test_family != AF_INET) From 3a04f47897cd4ec1d8179de60efde8a90398f63c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Oct 2019 11:43:31 -0700 Subject: [PATCH 0281/3715] udp: fix data-race in udp_set_dev_scratch() [ Upstream commit a793183caa9afae907a0d7ddd2ffd57329369bf5 ] KCSAN reported a data-race in udp_set_dev_scratch() [1] The issue here is that we must not write over skb fields if skb is shared. A similar issue has been fixed in commit 89c22d8c3b27 ("net: Fix skb csum races when peeking") While we are at it, use a helper only dealing with udp_skb_scratch(skb)->csum_unnecessary, as this allows udp_set_dev_scratch() to be called once and thus inlined. [1] BUG: KCSAN: data-race in udp_set_dev_scratch / udpv6_recvmsg write to 0xffff888120278317 of 1 bytes by task 10411 on cpu 1: udp_set_dev_scratch+0xea/0x200 net/ipv4/udp.c:1308 __first_packet_length+0x147/0x420 net/ipv4/udp.c:1556 first_packet_length+0x68/0x2a0 net/ipv4/udp.c:1579 udp_poll+0xea/0x110 net/ipv4/udp.c:2720 sock_poll+0xed/0x250 net/socket.c:1256 vfs_poll include/linux/poll.h:90 [inline] do_select+0x7d0/0x1020 fs/select.c:534 core_sys_select+0x381/0x550 fs/select.c:677 do_pselect.constprop.0+0x11d/0x160 fs/select.c:759 __do_sys_pselect6 fs/select.c:784 [inline] __se_sys_pselect6 fs/select.c:769 [inline] __x64_sys_pselect6+0x12e/0x170 fs/select.c:769 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff888120278317 of 1 bytes by task 10413 on cpu 0: udp_skb_csum_unnecessary include/net/udp.h:358 [inline] udpv6_recvmsg+0x43e/0xe90 net/ipv6/udp.c:310 inet6_recvmsg+0xbb/0x240 net/ipv6/af_inet6.c:592 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10413 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Paolo Abeni Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a1594ab11bf6..4002b41cf015 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1195,6 +1195,20 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } +static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) +{ + /* We come here after udp_lib_checksum_complete() returned 0. + * This means that __skb_checksum_complete() might have + * set skb->csum_valid to 1. + * On 64bit platforms, we can set csum_unnecessary + * to true, but only if the skb is not shared. + */ +#if BITS_PER_LONG == 64 + if (!skb_shared(skb)) + udp_skb_scratch(skb)->csum_unnecessary = true; +#endif +} + static int udp_skb_truesize(struct sk_buff *skb) { return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; @@ -1430,10 +1444,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, *total += skb->truesize; kfree_skb(skb); } else { - /* the csum related bits could be changed, refresh - * the scratch area - */ - udp_set_dev_scratch(skb); + udp_skb_csum_unnecessary_set(skb); break; } } From 5f45a3c0127762772bd6faba543f577ea5f700e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:52 -0700 Subject: [PATCH 0282/3715] net: add READ_ONCE() annotation in __skb_wait_for_more_packets() [ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ] __skb_wait_for_more_packets() can be called while other cpus can feed packets to the socket receive queue. KCSAN reported : BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470 __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1: __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100 __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683 udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index dcb333e95702..632adc4301e2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (sk->sk_receive_queue.prev != skb) + if (READ_ONCE(sk->sk_receive_queue.prev) != skb) goto out; /* Socket shut down? */ From bc8f227bdb31a2d1640430bd6fe6695527c37d96 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 16 Sep 2019 14:54:20 +0300 Subject: [PATCH 0283/3715] net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget [ Upstream commit 9df86bdb6746d7fcfc2fda715f7a7c3d0ddb2654 ] When CQE compression is enabled, compressed CQEs use the following structure: a title is followed by one or many blocks, each containing 8 mini CQEs (except the last, which may contain fewer mini CQEs). Due to NAPI budget restriction, a complete structure is not always parsed in one NAPI run, and some blocks with mini CQEs may be deferred to the next NAPI poll call - we have the mlx5e_decompress_cqes_cont call in the beginning of mlx5e_poll_rx_cq. However, if the budget is extremely low, some blocks may be left even after that, but the code that follows the mlx5e_decompress_cqes_cont call doesn't check it and assumes that a new CQE begins, which may not be the case. In such cases, random memory corruptions occur. An extremely low NAPI budget of 8 is used when busy_poll or busy_read is active. This commit adds a check to make sure that the previous compressed CQE has been completely parsed after mlx5e_decompress_cqes_cont, otherwise it prevents a new CQE from being fetched in the middle of a compressed CQE. This commit fixes random crashes in __build_skb, __page_pool_put_page and other not-related-directly places, that used to happen when both CQE compression and busy_poll/busy_read were enabled. Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index eec7c2ef067a..bf311a3c3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1093,8 +1093,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; - if (cq->decmprs_left) + if (cq->decmprs_left) { work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (cq->decmprs_left || work_done >= budget) + goto out; + } cqe = mlx5_cqwq_get_cqe(&cq->wq); if (!cqe) { From 12300056a6e98f7d845ae8952bd958c4f906b784 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 5 Oct 2019 15:05:18 -0700 Subject: [PATCH 0284/3715] net: dsa: b53: Do not clear existing mirrored port mask [ Upstream commit c763ac436b668d7417f0979430ec0312ede4093d ] Clearing the existing bitmask of mirrored ports essentially prevents us from capturing more than one port at any given time. This is clearly wrong, do not clear the bitmask prior to setting up the new port. Reported-by: Hubert Feurstein Fixes: ed3af5fd08eb ("net: dsa: b53: Add support for port mirroring") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/b53/b53_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index acf64d4cd94c..434e6dced6b7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1431,7 +1431,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, loc = B53_EG_MIR_CTL; b53_read16(dev, B53_MGMT_PAGE, loc, ®); - reg &= ~MIRROR_MASK; reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, loc, reg); From 03a70959304028f2eed3713f03bd1c92585699b0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 17 Oct 2019 21:29:26 +0200 Subject: [PATCH 0285/3715] net: usb: lan78xx: Connect PHY before registering MAC [ Upstream commit 38b4fe320119859c11b1dc06f6b4987a16344fa1 ] As soon as the netdev is registers, the kernel can start using the interface. If the driver connects the MAC to the PHY after the netdev is registered, there is a race condition where the interface can be opened without having the PHY connected. Change the order to close this race condition. Fixes: 92571a1aae40 ("lan78xx: Connect phy early") Reported-by: Daniel Wagner Signed-off-by: Andrew Lunn Tested-by: Daniel Wagner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 24b994c68bcc..78a12d7b96e8 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3642,10 +3642,14 @@ static int lan78xx_probe(struct usb_interface *intf, /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; + ret = lan78xx_phy_init(dev); + if (ret < 0) + goto out4; + ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out4; + goto out5; } usb_set_intfdata(intf, dev); @@ -3658,14 +3662,10 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); - ret = lan78xx_phy_init(dev); - if (ret < 0) - goto out5; - return 0; out5: - unregister_netdev(netdev); + phy_disconnect(netdev->phydev); out4: usb_free_urb(dev->urb_intr); out3: From adea9fd3b159cf25d2567b112bc7b84479447e72 Mon Sep 17 00:00:00 2001 From: Kazutoshi Noguchi Date: Mon, 21 Oct 2019 00:03:07 +0900 Subject: [PATCH 0286/3715] r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2 [ Upstream commit b3060531979422d5bb18d80226f978910284dc70 ] This device is sold as 'ThinkPad USB-C Dock Gen 2 (40AS)'. Chipset is RTL8153 and works with r8152. Without this, the generic cdc_ether grabs the device, and the device jam connected networks up when the machine suspends. Signed-off-by: Kazutoshi Noguchi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index ffd15f5f836f..6c7a169d906a 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -800,6 +800,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c0964281ab98..6a86a03c5e95 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5324,6 +5324,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, From af3924a97ea10ec3761fd4d7f9f2046febee6a04 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 18 Oct 2019 17:02:46 -0400 Subject: [PATCH 0287/3715] net: dsa: fix switch tree list [ Upstream commit 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd ] If there are multiple switch trees on the device, only the last one will be listed, because the arguments of list_add_tail are swapped. Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/dsa2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 045d8a176279..0796355e74c1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -62,7 +62,7 @@ static struct dsa_switch_tree *dsa_add_dst(u32 tree) return NULL; dst->tree = tree; INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); + list_add_tail(&dst->list, &dsa_switch_trees); kref_init(&dst->refcount); return dst; From 761f73c166c3b0fecf43692762ee0e03092480f0 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Oct 2019 16:06:32 -0700 Subject: [PATCH 0288/3715] net: bcmgenet: reset 40nm EPHY on energy detect [ Upstream commit 25382b991d252aed961cd434176240f9de6bb15f ] The EPHY integrated into the 40nm Set-Top Box devices can falsely detect energy when connected to a disabled peer interface. When the peer interface is enabled the EPHY will detect and report the link as active, but on occasion may get into a state where it is not able to exchange data with the connected GENET MAC. This issue has not been observed when the link parameters are auto-negotiated; however, it has been observed with a manually configured link. It has been empirically determined that issuing a soft reset to the EPHY when energy is detected prevents it from getting into this bad state. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d857df8ebdb4..1cc4fb27c13b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1985,6 +1985,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) */ if (priv->internal_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + int0_enable |= UMAC_IRQ_PHY_DET_R; } else if (priv->ext_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { @@ -2608,6 +2610,10 @@ static void bcmgenet_irq_task(struct work_struct *work) bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); } + if (status & UMAC_IRQ_PHY_DET_R && + priv->dev->phydev->autoneg != AUTONEG_ENABLE) + phy_init_hw(priv->dev->phydev); + /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, @@ -2713,8 +2719,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } /* all other interested interrupts handled in bottom half */ - status &= (UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_MPD_R); + status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_MPD_R | UMAC_IRQ_PHY_DET_R); if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); From 3af6b2ad90138bec29f8949baa90b0d742c91eb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:48 -0700 Subject: [PATCH 0289/3715] net: add skb_queue_empty_lockless() [ Upstream commit d7d16a89350ab263484c0aa2b523dd3a234e4a80 ] Some paths call skb_queue_empty() without holding the queue lock. We must use a barrier in order to not let the compiler do strange things, and avoid KCSAN splats. Adding a barrier in skb_queue_empty() might be overkill, I prefer adding a new helper to clearly identify points where the callers might be lockless. This might help us finding real bugs. The corresponding WRITE_ONCE() should add zero cost for current compilers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fdb0cd0699b6..248806103fb7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1345,6 +1345,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (const struct sk_buff *) list; } +/** + * skb_queue_empty_lockless - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + * This variant can be used in lockless contexts. + */ +static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) +{ + return READ_ONCE(list->next) == (const struct sk_buff *) list; +} + + /** * skb_queue_is_last - check if skb is the last entry in the queue * @list: queue head @@ -1709,9 +1722,11 @@ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; + /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */ + WRITE_ONCE(newsk->next, next); + WRITE_ONCE(newsk->prev, prev); + WRITE_ONCE(next->prev, newsk); + WRITE_ONCE(prev->next, newsk); list->qlen++; } @@ -1722,11 +1737,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list, struct sk_buff *first = list->next; struct sk_buff *last = list->prev; - first->prev = prev; - prev->next = first; + WRITE_ONCE(first->prev, prev); + WRITE_ONCE(prev->next, first); - last->next = next; - next->prev = last; + WRITE_ONCE(last->next, next); + WRITE_ONCE(next->prev, last); } /** @@ -1867,8 +1882,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - next->prev = prev; - prev->next = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); } /** From 399c6029936ceb15b9c2aec25c1ae1f4e68835a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:49 -0700 Subject: [PATCH 0290/3715] udp: use skb_queue_empty_lockless() [ Upstream commit 137a0dbe3426fd7bcfe3f8117b36a87b3590e4eb ] syzbot reported a data-race [1]. We should use skb_queue_empty_lockless() to document that we are not ensuring a mutual exclusion and silence KCSAN. [1] BUG: KCSAN: data-race in __skb_recv_udp / __udp_enqueue_schedule_skb write to 0xffff888122474b50 of 8 bytes by interrupt on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] __udp_enqueue_schedule_skb+0x2c1/0x410 net/ipv4/udp.c:1470 __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 read to 0xffff888122474b50 of 8 bytes by task 8921 on cpu 1: skb_queue_empty include/linux/skbuff.h:1494 [inline] __skb_recv_udp+0x18d/0x500 net/ipv4/udp.c:1653 udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 8921 Comm: syz-executor.4 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4002b41cf015..b88456a6b69b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1468,7 +1468,7 @@ static int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); skb = __first_packet_length(sk, rcvq, &total); - if (!skb && !skb_queue_empty(sk_queue)) { + if (!skb && !skb_queue_empty_lockless(sk_queue)) { spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, rcvq); spin_unlock(&sk_queue->lock); @@ -1543,7 +1543,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, return skb; } - if (skb_queue_empty(sk_queue)) { + if (skb_queue_empty_lockless(sk_queue)) { spin_unlock_bh(&queue->lock); goto busy_check; } @@ -1570,7 +1570,7 @@ busy_check: break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(sk_queue)); + } while (!skb_queue_empty_lockless(sk_queue)); /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && From 8e7bea435919082c9f74c53d2cb3ceac13f3d6b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:50 -0700 Subject: [PATCH 0291/3715] net: use skb_queue_empty_lockless() in poll() handlers [ Upstream commit 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 ] Many poll() handlers are lockless. Using skb_queue_empty_lockless() instead of skb_queue_empty() is more appropriate. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/capi/capi.c | 2 +- net/atm/common.c | 2 +- net/bluetooth/af_bluetooth.c | 4 ++-- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 4 ++-- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/nfc/llcp_sock.c | 4 ++-- net/phonet/socket.c | 4 ++-- net/sctp/socket.c | 4 ++-- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 6 +++--- net/vmw_vsock/af_vsock.c | 2 +- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 91efbf0f19f9..da45049de97e 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -743,7 +743,7 @@ capi_poll(struct file *file, poll_table *wait) poll_wait(file, &(cdev->recvwait), wait); mask = POLLOUT | POLLWRNORM; - if (!skb_queue_empty(&cdev->recvqueue)) + if (!skb_queue_empty_lockless(&cdev->recvqueue)) mask |= POLLIN | POLLRDNORM; return mask; } diff --git a/net/atm/common.c b/net/atm/common.c index 9e812c782a37..0fd2d26d4c6e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -667,7 +667,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* writable? */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b216e697deac..b48d54783e5d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -460,7 +460,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -470,7 +470,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == BT_CLOSED) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 632d5a416d97..df936d2f58bd 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -953,7 +953,7 @@ static unsigned int caif_poll(struct file *file, mask |= POLLRDHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= POLLIN | POLLRDNORM; diff --git a/net/core/datagram.c b/net/core/datagram.c index 632adc4301e2..4336c720ce4b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -844,7 +844,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -854,7 +854,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c1f59a53f68f..ee0669d0d867 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -574,7 +574,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR; return mask; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b88456a6b69b..ab3f272a0884 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2550,7 +2550,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM; sock_rps_record_flow(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index a51bfba19b9e..694a43c05eb9 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -567,11 +567,11 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == LLCP_CLOSED) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1b050dd17393..a1df36f3bb6e 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -352,9 +352,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (sk->sk_state == TCP_CLOSE) return POLLERR; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; - if (!skb_queue_empty(&pn->ctrlreq_queue)) + if (!skb_queue_empty_lockless(&pn->ctrlreq_queue)) mask |= POLLPRI; if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4045d203b7d4..8bcce38521da 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7371,7 +7371,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask = 0; /* Is there any exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -7380,7 +7380,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* The association is either gone or not ready. */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f13fb607c563..21929ba196eb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -714,14 +714,14 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, /* fall thru' */ case TIPC_LISTEN: case TIPC_CONNECTING: - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; case TIPC_OPEN: if (!tsk->cong_link_cnt) mask |= POLLOUT; if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) + (!skb_queue_empty_lockless(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; case TIPC_DISCONNECTING: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2adfcc6dec5a..4de9dfd14d09 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2665,7 +2665,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table mask |= POLLRDHUP | POLLIN | POLLRDNORM; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2693,7 +2693,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -2703,7 +2703,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 148c949cdfe7..1939b77e98b7 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -880,7 +880,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * the queue and write as long as the socket isn't shutdown for * sending. */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { mask |= POLLIN | POLLRDNORM; } From 0d50fc2bb4c748951c05280716113cffc3efbb7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:51 -0700 Subject: [PATCH 0292/3715] net: use skb_queue_empty_lockless() in busy poll contexts [ Upstream commit 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 ] Busy polling usually runs without locks. Let's use skb_queue_empty_lockless() instead of skb_queue_empty() Also uses READ_ONCE() in __skb_try_recv_datagram() to address a similar potential problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/ipv4/tcp.c | 2 +- net/sctp/socket.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 4336c720ce4b..85fcca395fad 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -281,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (sk->sk_receive_queue.prev != *last); + } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); error = -EAGAIN; diff --git a/net/core/sock.c b/net/core/sock.c index a6f33a717f41..7ccbcd853cbc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3381,7 +3381,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty(&sk->sk_receive_queue) || + return !skb_queue_empty_lockless(&sk->sk_receive_queue) || sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ee0669d0d867..8f07655718f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1787,7 +1787,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 8bcce38521da..d1c917851b0c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7716,7 +7716,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk_can_busy_loop(sk)) { sk_busy_loop(sk, noblock); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; } From 41a230fef787315d982dd915ef2a33a136e9a1d5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Oct 2019 01:24:32 +0800 Subject: [PATCH 0293/3715] vxlan: check tun_info options_len properly [ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ] This patch is to improve the tun_info options_len by dropping the skb when TUNNEL_VXLAN_OPT is set but options_len is less than vxlan_metadata. This can void a potential out-of-bounds access on ip_tun_info. Fixes: ee122c79d422 ("vxlan: Flow based tunneling") Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2fbaa279988e..6d26bbd190dd 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2169,8 +2169,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->options_len) + if (info->options_len) { + if (info->options_len < sizeof(*md)) + goto drop; md = ip_tunnel_info_opts(info); + } ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; From b0c64eda9996479c9b135c23f5b18a0314af4a98 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 28 Oct 2019 23:19:35 +0800 Subject: [PATCH 0294/3715] erspan: fix the tun_info options_len check for erspan [ Upstream commit 2eb8d6d2910cfe3dc67dc056f26f3dd9c63d47cd ] The check for !md doens't really work for ip_tunnel_info_opts(info) which only does info + 1. Also to avoid out-of-bounds access on info, it should ensure options_len is not less than erspan_metadata in both erspan_xmit() and ip6erspan_tunnel_xmit(). Fixes: 1a66a836da ("gre: add collect_md mode to ERSPAN tunnel") Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 347be2ea78d4..71ff2531d973 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -592,6 +592,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } + if (tun_info->options_len < sizeof(*md)) + goto err_free_rt; + md = ip_tunnel_info_opts(tun_info); if (!md) goto err_free_rt; From 4fa6002a34db62f5a5d536f30fc9dedd1ce7ee84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Nov 2019 10:32:19 -0700 Subject: [PATCH 0295/3715] inet: stop leaking jiffies on the wire [ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ] Historically linux tried to stick to RFC 791, 1122, 2003 for IPv4 ID field generation. RFC 6864 made clear that no matter how hard we try, we can not ensure unicity of IP ID within maximum lifetime for all datagrams with a given source address/destination address/protocol tuple. Linux uses a per socket inet generator (inet_id), initialized at connection startup with a XOR of 'jiffies' and other fields that appear clear on the wire. Thiemo Nagel pointed that this strategy is a privacy concern as this provides 16 bits of entropy to fingerprint devices. Let's switch to a random starting point, this is just as good as far as RFC 6864 is concerned and does not leak anything critical. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: Thiemo Nagel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- net/sctp/socket.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 17afa03cab3a..176bddacc16e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = dp->dccps_iss ^ jiffies; + inet->inet_id = prandom_u32(); err = dccp_connect(sk); rt = NULL; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f915abff1350..d3eddfd13875 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = jiffies; + inet->inet_id = prandom_u32(); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0569718e3656..44a41ac2b0ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -245,7 +245,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr); } - inet->inet_id = tp->write_seq ^ jiffies; + inet->inet_id = prandom_u32(); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1368,7 +1368,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = newtp->write_seq ^ jiffies; + newinet->inet_id = prandom_u32(); if (!dst) { dst = inet_csk_route_child_sock(sk, newsk, req); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d1c917851b0c..09cda66d0567 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8136,7 +8136,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = asoc->next_tsn ^ jiffies; + newinet->inet_id = prandom_u32(); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; From a9de6f42e945cdb24b59c7ab7ebad1eba6cb5875 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2019 07:57:46 -0700 Subject: [PATCH 0296/3715] net/flow_dissector: switch to siphash commit 55667441c84fa5e0911a0aac44fb059c15ba6da2 upstream. UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet Reported-by: Jonathan Berger Reported-by: Amit Klein Reported-by: Benny Pinkas Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 3 ++- include/net/flow_dissector.h | 3 ++- include/net/fq.h | 2 +- include/net/fq_impl.h | 4 +-- net/core/flow_dissector.c | 48 +++++++++++++++--------------------- net/sched/sch_hhf.c | 8 +++--- net/sched/sch_sfb.c | 13 +++++----- net/sched/sch_sfq.c | 14 ++++++----- 8 files changed, 46 insertions(+), 49 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 248806103fb7..ec00d9264e5c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1228,7 +1228,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..227dc0a84172 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,7 @@ #include #include +#include #include /** @@ -229,7 +230,7 @@ struct flow_dissector { struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic - struct flow_dissector_key_basic basic; + struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_keyid keyid; diff --git a/include/net/fq.h b/include/net/fq.h index 6d8521a30c5c..2c7687902789 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -70,7 +70,7 @@ struct fq { struct list_head backlogs; spinlock_t lock; u32 flows_cnt; - u32 perturbation; + siphash_key_t perturbation; u32 limit; u32 memory_limit; u32 memory_usage; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index ac1a2317941e..46903e23eab9 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -105,7 +105,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, lockdep_assert_held(&fq->lock); - hash = skb_get_hash_perturb(skb, fq->perturbation); + hash = skb_get_hash_perturb(skb, &fq->perturbation); idx = reciprocal_scale(hash, fq->flows_cnt); flow = &fq->flows[idx]; @@ -255,7 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt) INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); - fq->perturbation = prandom_u32(); + get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e2e716003ede..7be5c20a93a5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -889,45 +889,34 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { - size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); - BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != - sizeof(*flow) - sizeof(flow->addrs)); + size_t len = offsetof(typeof(*flow), addrs) - FLOW_KEYS_HASH_OFFSET; switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - diff -= sizeof(flow->addrs.v4addrs); + len += sizeof(flow->addrs.v4addrs); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - diff -= sizeof(flow->addrs.v6addrs); + len += sizeof(flow->addrs.v6addrs); break; case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + len += sizeof(flow->addrs.tipcaddrs); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return len; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -993,14 +982,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1010,12 +1000,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1063,7 +1054,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1083,13 +1074,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index fa256f8038af..c73475c3a464 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -4,11 +4,11 @@ * Copyright (C) 2013 Nandita Dukkipati */ -#include #include #include #include #include +#include #include #include @@ -125,7 +125,7 @@ struct wdrr_bucket { struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; - u32 perturbation; /* hash perturbation */ + siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit @@ -263,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_get_hash_perturb(skb, q->perturbation); + hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; @@ -578,7 +578,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index cc39e170b4aa..04f15e0aeaa8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ struct sfb_bucket { * (Section 4.4 of SFB reference : moving hash functions) */ struct sfb_bins { - u32 perturbation; /* jhash perturbation */ + siphash_key_t perturbation; /* siphash key */ struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; }; @@ -221,7 +221,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = prandom_u32(); + get_random_bytes(&q->bins[slot].perturbation, + sizeof(q->bins[slot].perturbation)); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -317,9 +318,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); } else { - sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); } @@ -355,7 +356,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, - q->bins[slot].perturbation); + &q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3fbf20126045..cbc54ddfe076 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -121,7 +121,7 @@ struct sfq_sched_data { u8 headdrop; u8 maxdepth; /* limit of packets per flow */ - u32 perturbation; + siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ @@ -160,7 +160,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); + return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -609,9 +609,11 @@ static void sfq_perturbation(unsigned long arg) struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; + get_random_bytes(&nkey, sizeof(nkey)); spin_lock(root_lock); - q->perturbation = prandom_u32(); + q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -690,7 +692,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); @@ -746,7 +748,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt); From 7556e63b40f78a43553acd9f4912ceff322fad6f Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Thu, 17 Oct 2019 08:26:06 -0700 Subject: [PATCH 0297/3715] dmaengine: qcom: bam_dma: Fix resource leak commit 7667819385457b4aeb5fac94f67f52ab52cc10d5 upstream. bam_dma_terminate_all() will leak resources if any of the transactions are committed to the hardware (present in the desc fifo), and not complete. Since bam_dma_terminate_all() does not cause the hardware to be updated, the hardware will still operate on any previously committed transactions. This can cause memory corruption if the memory for the transaction has been reassigned, and will cause a sync issue between the BAM and its client(s). Fix this by properly updating the hardware in bam_dma_terminate_all(). Fixes: e7c0fe2a5c84 ("dmaengine: add Qualcomm BAM dma driver") Signed-off-by: Jeffrey Hugo Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191017152606.34120-1-jeffrey.l.hugo@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/qcom/bam_dma.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 8fbf175fdcc7..57c5cc51f862 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -690,7 +690,21 @@ static int bam_dma_terminate_all(struct dma_chan *chan) /* remove all transactions, including active transaction */ spin_lock_irqsave(&bchan->vc.lock, flag); + /* + * If we have transactions queued, then some might be committed to the + * hardware in the desc fifo. The only way to reset the desc fifo is + * to do a hardware reset (either by pipe or the entire block). + * bam_chan_init_hw() will trigger a pipe reset, and also reinit the + * pipe. If the pipe is left disabled (default state after pipe reset) + * and is accessed by a connected hardware engine, a fatal error in + * the BAM will occur. There is a small window where this could happen + * with bam_chan_init_hw(), but it is assumed that the caller has + * stopped activity on any attached hardware engine. Make sure to do + * this first so that the BAM hardware doesn't cause memory corruption + * by accessing freed resources. + */ if (bchan->curr_txd) { + bam_chan_init_hw(bchan, bchan->curr_txd->dir); list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued); bchan->curr_txd = NULL; } From 0b0c518c176dcbaebfb269a58de8470c6733405d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 17 Dec 2018 10:14:53 +0100 Subject: [PATCH 0298/3715] sched/wake_q: Fix wakeup ordering for wake_q commit 4c4e3731564c8945ac5ac90fc2a1e1f21cb79c92 upstream. Notable cmpxchg() does not provide ordering when it fails, however wake_q_add() requires ordering in this specific case too. Without this it would be possible for the concurrent wakeup to not observe our prior state. Andrea Parri provided: C wake_up_q-wake_q_add { int next = 0; int y = 0; } P0(int *next, int *y) { int r0; /* in wake_up_q() */ WRITE_ONCE(*next, 1); /* node->next = NULL */ smp_mb(); /* implied by wake_up_process() */ r0 = READ_ONCE(*y); } P1(int *next, int *y) { int r1; /* in wake_q_add() */ WRITE_ONCE(*y, 1); /* wake_cond = true */ smp_mb__before_atomic(); r1 = cmpxchg_relaxed(next, 1, 2); } exists (0:r0=0 /\ 1:r1=0) This "exists" clause cannot be satisfied according to the LKMM: Test wake_up_q-wake_q_add Allowed States 3 0:r0=0; 1:r1=1; 0:r0=1; 1:r1=0; 0:r0=1; 1:r1=1; No Witnesses Positive: 0 Negative: 3 Condition exists (0:r0=0 /\ 1:r1=0) Observation wake_up_q-wake_q_add Never 0 3 Reported-by: Yongji Xie Signed-off-by: Peter Zijlstra (Intel) Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Signed-off-by: Ingo Molnar Signed-off-by: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 32ba789c544c..bbf8b32fc69e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -432,10 +432,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) * its already queued (either by us or someone else) and will get the * wakeup due to that. * - * This cmpxchg() implies a full barrier, which pairs with the write - * barrier implied by the wakeup in wake_up_q(). + * In order to ensure that a pending wakeup will observe our pending + * state, even in the failed case, an explicit smp_mb() must be used. */ - if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) return; get_task_struct(task); From b125188db9b12a3d6f7fc7d3a66929f2ffae7872 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 30 Mar 2018 13:15:26 +0900 Subject: [PATCH 0299/3715] kbuild: use -fmacro-prefix-map to make __FILE__ a relative path [ Upstream commit a73619a845d5625079cc1b3b820f44c899618388 ] The __FILE__ macro is used everywhere in the kernel to locate the file printing the log message, such as WARN_ON(), etc. If the kernel is built out of tree, this can be a long absolute path, like this: WARNING: CPU: 1 PID: 1 at /path/to/build/directory/arch/arm64/kernel/foo.c:... This is because Kbuild runs in the objtree instead of the srctree, then __FILE__ is expanded to a file path prefixed with $(srctree)/. Commit 9da0763bdd82 ("kbuild: Use relative path when building in a subdir of the source tree") improved this to some extent; $(srctree) becomes ".." if the objtree is a child of the srctree. For other cases of out-of-tree build, __FILE__ is still the absolute path. It also means the kernel image depends on where it was built. A brand-new option from GCC, -fmacro-prefix-map, solves this problem. If your compiler supports it, __FILE__ is the relative path from the srctree regardless of O= option. This provides more readable log and more reproducible builds. Please note __FILE__ is always an absolute path for external modules. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 1d7f47334ca2..61660387eb34 100644 --- a/Makefile +++ b/Makefile @@ -840,6 +840,9 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) # Require designated initializers for all marked structures KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) +# change __FILE__ to the relative path from the srctree +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) + # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) From 2103cc67dbfbcfc5c5838d4d0859631e9f3dfce3 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 17 Jul 2019 11:06:26 -0500 Subject: [PATCH 0300/3715] kbuild: add -fcf-protection=none when using retpoline flags [ Upstream commit 29be86d7f9cb18df4123f309ac7857570513e8bc ] The gcc -fcf-protection=branch option is not compatible with -mindirect-branch=thunk-extern. The latter is used when CONFIG_RETPOLINE is selected, and this will fail to build with a gcc which has -fcf-protection=branch enabled by default. Adding -fcf-protection=none when building with retpoline enabled prevents such build failures. Signed-off-by: Seth Forshee Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 61660387eb34..52aaa6150099 100644 --- a/Makefile +++ b/Makefile @@ -843,6 +843,12 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) +# ensure -fcf-protection is disabled when using retpoline as it is +# incompatible with -mindirect-branch=thunk-extern +ifdef CONFIG_RETPOLINE +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif + # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) From 6434e433ef0a380b3e1c7ce6f773962030aae614 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 4 Sep 2019 08:42:30 +0200 Subject: [PATCH 0301/3715] platform/x86: pmc_atom: Add Siemens SIMATIC IPC227E to critclk_systems DMI table commit ad0d315b4d4e7138f43acf03308192ec00e9614d upstream. The SIMATIC IPC227E uses the PMC clock for on-board components and gets stuck during boot if the clock is disabled. Therefore, add this device to the critical systems list. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: Jan Kiszka Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/pmc_atom.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 63a6881c7078..971ae892c611 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -475,6 +475,13 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_BOARD_NAME, "CB6363"), }, }, + { + .ident = "SIMATIC IPC227E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"), + }, + }, { /*sentinel*/ } }; From 7682407ab91c951f4774d6cd45de04b2549a462f Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 17 Sep 2019 14:38:15 +0200 Subject: [PATCH 0302/3715] iio: adc: stm32-adc: move registers definitions commit 31922f62bb527d749b99dbc776e514bcba29b7fe upstream. Move STM32 ADC registers definitions to common header. This is precursor patch to: - iio: adc: stm32-adc: fix a race when using several adcs with dma and irq It keeps registers definitions as a whole block, to ease readability and allow simple access path to EOC bits (readl) in stm32-adc-core driver. Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc-core.c | 27 ------- drivers/iio/adc/stm32-adc-core.h | 134 +++++++++++++++++++++++++++++++ drivers/iio/adc/stm32-adc.c | 107 ------------------------ 3 files changed, 134 insertions(+), 134 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 804198eb0eef..b0d346837a19 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -33,36 +33,9 @@ #include "stm32-adc-core.h" -/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ -#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) - -/* STM32F4_ADC_CSR - bit fields */ -#define STM32F4_EOC3 BIT(17) -#define STM32F4_EOC2 BIT(9) -#define STM32F4_EOC1 BIT(1) - -/* STM32F4_ADC_CCR - bit fields */ -#define STM32F4_ADC_ADCPRE_SHIFT 16 -#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) - /* STM32 F4 maximum analog clock rate (from datasheet) */ #define STM32F4_ADC_MAX_CLK_RATE 36000000 -/* STM32H7 - common registers for all ADC instances */ -#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) - -/* STM32H7_ADC_CSR - bit fields */ -#define STM32H7_EOC_SLV BIT(18) -#define STM32H7_EOC_MST BIT(2) - -/* STM32H7_ADC_CCR - bit fields */ -#define STM32H7_PRESC_SHIFT 18 -#define STM32H7_PRESC_MASK GENMASK(21, 18) -#define STM32H7_CKMODE_SHIFT 16 -#define STM32H7_CKMODE_MASK GENMASK(17, 16) - /* STM32 H7 maximum analog clock rate (from datasheet) */ #define STM32H7_ADC_MAX_CLK_RATE 36000000 diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 250ee958a669..2cb9bf8b3ac4 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -39,6 +39,140 @@ #define STM32_ADC_MAX_ADCS 3 #define STM32_ADCX_COMN_OFFSET 0x300 +/* STM32F4 - Registers for each ADC instance */ +#define STM32F4_ADC_SR 0x00 +#define STM32F4_ADC_CR1 0x04 +#define STM32F4_ADC_CR2 0x08 +#define STM32F4_ADC_SMPR1 0x0C +#define STM32F4_ADC_SMPR2 0x10 +#define STM32F4_ADC_HTR 0x24 +#define STM32F4_ADC_LTR 0x28 +#define STM32F4_ADC_SQR1 0x2C +#define STM32F4_ADC_SQR2 0x30 +#define STM32F4_ADC_SQR3 0x34 +#define STM32F4_ADC_JSQR 0x38 +#define STM32F4_ADC_JDR1 0x3C +#define STM32F4_ADC_JDR2 0x40 +#define STM32F4_ADC_JDR3 0x44 +#define STM32F4_ADC_JDR4 0x48 +#define STM32F4_ADC_DR 0x4C + +/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ +#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) + +/* STM32F4_ADC_SR - bit fields */ +#define STM32F4_STRT BIT(4) +#define STM32F4_EOC BIT(1) + +/* STM32F4_ADC_CR1 - bit fields */ +#define STM32F4_RES_SHIFT 24 +#define STM32F4_RES_MASK GENMASK(25, 24) +#define STM32F4_SCAN BIT(8) +#define STM32F4_EOCIE BIT(5) + +/* STM32F4_ADC_CR2 - bit fields */ +#define STM32F4_SWSTART BIT(30) +#define STM32F4_EXTEN_SHIFT 28 +#define STM32F4_EXTEN_MASK GENMASK(29, 28) +#define STM32F4_EXTSEL_SHIFT 24 +#define STM32F4_EXTSEL_MASK GENMASK(27, 24) +#define STM32F4_EOCS BIT(10) +#define STM32F4_DDS BIT(9) +#define STM32F4_DMA BIT(8) +#define STM32F4_ADON BIT(0) + +/* STM32F4_ADC_CSR - bit fields */ +#define STM32F4_EOC3 BIT(17) +#define STM32F4_EOC2 BIT(9) +#define STM32F4_EOC1 BIT(1) + +/* STM32F4_ADC_CCR - bit fields */ +#define STM32F4_ADC_ADCPRE_SHIFT 16 +#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) + +/* STM32H7 - Registers for each ADC instance */ +#define STM32H7_ADC_ISR 0x00 +#define STM32H7_ADC_IER 0x04 +#define STM32H7_ADC_CR 0x08 +#define STM32H7_ADC_CFGR 0x0C +#define STM32H7_ADC_SMPR1 0x14 +#define STM32H7_ADC_SMPR2 0x18 +#define STM32H7_ADC_PCSEL 0x1C +#define STM32H7_ADC_SQR1 0x30 +#define STM32H7_ADC_SQR2 0x34 +#define STM32H7_ADC_SQR3 0x38 +#define STM32H7_ADC_SQR4 0x3C +#define STM32H7_ADC_DR 0x40 +#define STM32H7_ADC_CALFACT 0xC4 +#define STM32H7_ADC_CALFACT2 0xC8 + +/* STM32H7 - common registers for all ADC instances */ +#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) + +/* STM32H7_ADC_ISR - bit fields */ +#define STM32H7_EOC BIT(2) +#define STM32H7_ADRDY BIT(0) + +/* STM32H7_ADC_IER - bit fields */ +#define STM32H7_EOCIE STM32H7_EOC + +/* STM32H7_ADC_CR - bit fields */ +#define STM32H7_ADCAL BIT(31) +#define STM32H7_ADCALDIF BIT(30) +#define STM32H7_DEEPPWD BIT(29) +#define STM32H7_ADVREGEN BIT(28) +#define STM32H7_LINCALRDYW6 BIT(27) +#define STM32H7_LINCALRDYW5 BIT(26) +#define STM32H7_LINCALRDYW4 BIT(25) +#define STM32H7_LINCALRDYW3 BIT(24) +#define STM32H7_LINCALRDYW2 BIT(23) +#define STM32H7_LINCALRDYW1 BIT(22) +#define STM32H7_ADCALLIN BIT(16) +#define STM32H7_BOOST BIT(8) +#define STM32H7_ADSTP BIT(4) +#define STM32H7_ADSTART BIT(2) +#define STM32H7_ADDIS BIT(1) +#define STM32H7_ADEN BIT(0) + +/* STM32H7_ADC_CFGR bit fields */ +#define STM32H7_EXTEN_SHIFT 10 +#define STM32H7_EXTEN_MASK GENMASK(11, 10) +#define STM32H7_EXTSEL_SHIFT 5 +#define STM32H7_EXTSEL_MASK GENMASK(9, 5) +#define STM32H7_RES_SHIFT 2 +#define STM32H7_RES_MASK GENMASK(4, 2) +#define STM32H7_DMNGT_SHIFT 0 +#define STM32H7_DMNGT_MASK GENMASK(1, 0) + +enum stm32h7_adc_dmngt { + STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ + STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ + STM32H7_DMNGT_DFSDM, /* DFSDM mode */ + STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ +}; + +/* STM32H7_ADC_CALFACT - bit fields */ +#define STM32H7_CALFACT_D_SHIFT 16 +#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) +#define STM32H7_CALFACT_S_SHIFT 0 +#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) + +/* STM32H7_ADC_CALFACT2 - bit fields */ +#define STM32H7_LINCALFACT_SHIFT 0 +#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) + +/* STM32H7_ADC_CSR - bit fields */ +#define STM32H7_EOC_SLV BIT(18) +#define STM32H7_EOC_MST BIT(2) + +/* STM32H7_ADC_CCR - bit fields */ +#define STM32H7_PRESC_SHIFT 18 +#define STM32H7_PRESC_MASK GENMASK(21, 18) +#define STM32H7_CKMODE_SHIFT 16 +#define STM32H7_CKMODE_MASK GENMASK(17, 16) + /** * struct stm32_adc_common - stm32 ADC driver common data (for all instances) * @base: control registers base cpu addr diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 04be8bd951be..e59cbc9ad4f6 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -40,113 +40,6 @@ #include "stm32-adc-core.h" -/* STM32F4 - Registers for each ADC instance */ -#define STM32F4_ADC_SR 0x00 -#define STM32F4_ADC_CR1 0x04 -#define STM32F4_ADC_CR2 0x08 -#define STM32F4_ADC_SMPR1 0x0C -#define STM32F4_ADC_SMPR2 0x10 -#define STM32F4_ADC_HTR 0x24 -#define STM32F4_ADC_LTR 0x28 -#define STM32F4_ADC_SQR1 0x2C -#define STM32F4_ADC_SQR2 0x30 -#define STM32F4_ADC_SQR3 0x34 -#define STM32F4_ADC_JSQR 0x38 -#define STM32F4_ADC_JDR1 0x3C -#define STM32F4_ADC_JDR2 0x40 -#define STM32F4_ADC_JDR3 0x44 -#define STM32F4_ADC_JDR4 0x48 -#define STM32F4_ADC_DR 0x4C - -/* STM32F4_ADC_SR - bit fields */ -#define STM32F4_STRT BIT(4) -#define STM32F4_EOC BIT(1) - -/* STM32F4_ADC_CR1 - bit fields */ -#define STM32F4_RES_SHIFT 24 -#define STM32F4_RES_MASK GENMASK(25, 24) -#define STM32F4_SCAN BIT(8) -#define STM32F4_EOCIE BIT(5) - -/* STM32F4_ADC_CR2 - bit fields */ -#define STM32F4_SWSTART BIT(30) -#define STM32F4_EXTEN_SHIFT 28 -#define STM32F4_EXTEN_MASK GENMASK(29, 28) -#define STM32F4_EXTSEL_SHIFT 24 -#define STM32F4_EXTSEL_MASK GENMASK(27, 24) -#define STM32F4_EOCS BIT(10) -#define STM32F4_DDS BIT(9) -#define STM32F4_DMA BIT(8) -#define STM32F4_ADON BIT(0) - -/* STM32H7 - Registers for each ADC instance */ -#define STM32H7_ADC_ISR 0x00 -#define STM32H7_ADC_IER 0x04 -#define STM32H7_ADC_CR 0x08 -#define STM32H7_ADC_CFGR 0x0C -#define STM32H7_ADC_SMPR1 0x14 -#define STM32H7_ADC_SMPR2 0x18 -#define STM32H7_ADC_PCSEL 0x1C -#define STM32H7_ADC_SQR1 0x30 -#define STM32H7_ADC_SQR2 0x34 -#define STM32H7_ADC_SQR3 0x38 -#define STM32H7_ADC_SQR4 0x3C -#define STM32H7_ADC_DR 0x40 -#define STM32H7_ADC_CALFACT 0xC4 -#define STM32H7_ADC_CALFACT2 0xC8 - -/* STM32H7_ADC_ISR - bit fields */ -#define STM32H7_EOC BIT(2) -#define STM32H7_ADRDY BIT(0) - -/* STM32H7_ADC_IER - bit fields */ -#define STM32H7_EOCIE STM32H7_EOC - -/* STM32H7_ADC_CR - bit fields */ -#define STM32H7_ADCAL BIT(31) -#define STM32H7_ADCALDIF BIT(30) -#define STM32H7_DEEPPWD BIT(29) -#define STM32H7_ADVREGEN BIT(28) -#define STM32H7_LINCALRDYW6 BIT(27) -#define STM32H7_LINCALRDYW5 BIT(26) -#define STM32H7_LINCALRDYW4 BIT(25) -#define STM32H7_LINCALRDYW3 BIT(24) -#define STM32H7_LINCALRDYW2 BIT(23) -#define STM32H7_LINCALRDYW1 BIT(22) -#define STM32H7_ADCALLIN BIT(16) -#define STM32H7_BOOST BIT(8) -#define STM32H7_ADSTP BIT(4) -#define STM32H7_ADSTART BIT(2) -#define STM32H7_ADDIS BIT(1) -#define STM32H7_ADEN BIT(0) - -/* STM32H7_ADC_CFGR bit fields */ -#define STM32H7_EXTEN_SHIFT 10 -#define STM32H7_EXTEN_MASK GENMASK(11, 10) -#define STM32H7_EXTSEL_SHIFT 5 -#define STM32H7_EXTSEL_MASK GENMASK(9, 5) -#define STM32H7_RES_SHIFT 2 -#define STM32H7_RES_MASK GENMASK(4, 2) -#define STM32H7_DMNGT_SHIFT 0 -#define STM32H7_DMNGT_MASK GENMASK(1, 0) - -enum stm32h7_adc_dmngt { - STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ - STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ - STM32H7_DMNGT_DFSDM, /* DFSDM mode */ - STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ -}; - -/* STM32H7_ADC_CALFACT - bit fields */ -#define STM32H7_CALFACT_D_SHIFT 16 -#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) -#define STM32H7_CALFACT_S_SHIFT 0 -#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) - -/* STM32H7_ADC_CALFACT2 - bit fields */ -#define STM32H7_LINCALFACT_SHIFT 0 -#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) - /* Number of linear calibration shadow registers / LINCALRDYW control bits */ #define STM32H7_LINCALFACT_NUM 6 From bf31532edb095a82852baaf1694dd593f9322e3f Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 17 Sep 2019 14:38:16 +0200 Subject: [PATCH 0303/3715] iio: adc: stm32-adc: fix a race when using several adcs with dma and irq commit dcb10920179ab74caf88a6f2afadecfc2743b910 upstream. End of conversion may be handled by using IRQ or DMA. There may be a race when two conversions complete at the same time on several ADCs. EOC can be read as 'set' for several ADCs, with: - an ADC configured to use IRQs. EOCIE bit is set. The handler is normally called in this case. - an ADC configured to use DMA. EOCIE bit isn't set. EOC triggers the DMA request instead. It's then automatically cleared by DMA read. But the handler gets called due to status bit is temporarily set (IRQ triggered by the other ADC). So both EOC status bit in CSR and EOCIE control bit must be checked before invoking the interrupt handler (e.g. call ISR only for IRQ-enabled ADCs). Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc-core.c | 43 +++++++++++++++++++++++++++++--- drivers/iio/adc/stm32-adc-core.h | 1 + 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index b0d346837a19..bc9ebcc6508a 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -45,12 +45,16 @@ * @eoc1: adc1 end of conversion flag in @csr * @eoc2: adc2 end of conversion flag in @csr * @eoc3: adc3 end of conversion flag in @csr + * @ier: interrupt enable register offset for each adc + * @eocie_msk: end of conversion interrupt enable mask in @ier */ struct stm32_adc_common_regs { u32 csr; u32 eoc1_msk; u32 eoc2_msk; u32 eoc3_msk; + u32 ier; + u32 eocie_msk; }; struct stm32_adc_priv; @@ -244,6 +248,8 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { .eoc1_msk = STM32F4_EOC1, .eoc2_msk = STM32F4_EOC2, .eoc3_msk = STM32F4_EOC3, + .ier = STM32F4_ADC_CR1, + .eocie_msk = STM32F4_EOCIE, }; /* STM32H7 common registers definitions */ @@ -251,8 +257,24 @@ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = { .csr = STM32H7_ADC_CSR, .eoc1_msk = STM32H7_EOC_MST, .eoc2_msk = STM32H7_EOC_SLV, + .ier = STM32H7_ADC_IER, + .eocie_msk = STM32H7_EOCIE, }; +static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = { + 0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2, +}; + +static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv, + unsigned int adc) +{ + u32 ier, offset = stm32_adc_offset[adc]; + + ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier); + + return ier & priv->cfg->regs->eocie_msk; +} + /* ADC common interrupt for all instances */ static void stm32_adc_irq_handler(struct irq_desc *desc) { @@ -263,13 +285,28 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); status = readl_relaxed(priv->common.base + priv->cfg->regs->csr); - if (status & priv->cfg->regs->eoc1_msk) + /* + * End of conversion may be handled by using IRQ or DMA. There may be a + * race here when two conversions complete at the same time on several + * ADCs. EOC may be read 'set' for several ADCs, with: + * - an ADC configured to use DMA (EOC triggers the DMA request, and + * is then automatically cleared by DR read in hardware) + * - an ADC configured to use IRQs (EOCIE bit is set. The handler must + * be called in this case) + * So both EOC status bit in CSR and EOCIE control bit must be checked + * before invoking the interrupt handler (e.g. call ISR only for + * IRQ-enabled ADCs). + */ + if (status & priv->cfg->regs->eoc1_msk && + stm32_adc_eoc_enabled(priv, 0)) generic_handle_irq(irq_find_mapping(priv->domain, 0)); - if (status & priv->cfg->regs->eoc2_msk) + if (status & priv->cfg->regs->eoc2_msk && + stm32_adc_eoc_enabled(priv, 1)) generic_handle_irq(irq_find_mapping(priv->domain, 1)); - if (status & priv->cfg->regs->eoc3_msk) + if (status & priv->cfg->regs->eoc3_msk && + stm32_adc_eoc_enabled(priv, 2)) generic_handle_irq(irq_find_mapping(priv->domain, 2)); chained_irq_exit(chip, desc); diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 2cb9bf8b3ac4..9f8559cf86c4 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -37,6 +37,7 @@ * -------------------------------------------------------- */ #define STM32_ADC_MAX_ADCS 3 +#define STM32_ADC_OFFSET 0x100 #define STM32_ADCX_COMN_OFFSET 0x300 /* STM32F4 - Registers for each ADC instance */ From e40f21667951a2239862bd3b149495a9037b453e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Oct 2019 13:35:00 +0530 Subject: [PATCH 0304/3715] powerpc/mm: Fixup tlbie vs store ordering issue on POWER9 commit a5d4b5891c2f1f865a2def1eb0030f534e77ff86 upstream. On POWER9, under some circumstances, a broadcast TLB invalidation might complete before all previous stores have drained, potentially allowing stale stores from becoming visible after the invalidation. This works around it by doubling up those TLB invalidations which was verified by HW to be sufficient to close the risk window. This will be documented in a yet-to-be-published errata. Cc: stable@vger.kernel.org # v4.14 Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V [mpe: Enable the feature in the DT CPU features code for all Power9, rename the feature to CPU_FTR_P9_TLBIE_BUG per benh.] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20180323045627.16800-3-aneesh.kumar@linux.vnet.ibm.com/ [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cputable.h | 4 ++- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 ++ arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 +++++++ arch/powerpc/mm/hash_native_64.c | 16 +++++++++- arch/powerpc/mm/pgtable_64.c | 1 + arch/powerpc/mm/tlb-radix.c | 41 +++++++++++++++++++------- 7 files changed, 66 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 53b31c2bcdf4..e143017d7549 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -215,6 +215,7 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #ifndef __ASSEMBLY__ @@ -475,7 +476,8 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2dba206b065a..15059e2446de 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -742,6 +742,9 @@ static __init void cpufeatures_cpu_quirks(void) */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; + + if ((version & 0xffff0000) == 0x004e0000) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 27a41695fcfd..559cba16dbe0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -160,6 +160,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 4962d537c186..b18966a368af 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -448,6 +448,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : "r" (rbvalues[i]), "r" (kvm->arch.lpid)); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 640cf566e986..96797bff5937 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -104,6 +104,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -181,6 +190,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -674,7 +684,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -746,6 +756,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 12f95b1f7d07..48ed34d52ffd 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -491,6 +491,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 4b295cfd5f7e..1a4912c5e5a2 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -23,6 +23,33 @@ #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + static inline void __tlbiel_pid(unsigned long pid, int set, unsigned long ric) { @@ -80,6 +107,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); trace_tlbie(0, 0, rb, rs, ric, prs, r); } @@ -105,19 +133,10 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, static inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); } /* From 169795c893f424cd889aa106e971628c780b81a3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Oct 2019 13:35:01 +0530 Subject: [PATCH 0305/3715] powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions commit 677733e296b5c7a37c47da391fc70a43dc40bd67 upstream. The store ordering vs tlbie issue mentioned in commit a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") is fixed for Nimbus 2.3 and Cumulus 1.3 revisions. We don't need to apply the fixup if we are running on them We can only do this on PowerNV. On pseries guest with kvm we still don't support redoing the feature fixup after migration. So we should be enabling all the workarounds needed, because whe can possibly migrate between DD 2.3 and DD 2.2 Cc: stable@vger.kernel.org # v4.14 Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190924035254.24612-1-aneesh.kumar@linux.ibm.com [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/dt_cpu_ftrs.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 15059e2446de..753759a3c8e9 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -733,9 +733,35 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) return true; } +/* + * Handle POWER9 broadcast tlbie invalidation issue using + * cpu feature flag. + */ +static __init void update_tlbie_feature_flag(unsigned long pvr) +{ + if (PVR_VER(pvr) == PVR_POWER9) { + /* + * Set the tlbie feature flag for anything below + * Nimbus DD 2.3 and Cumulus DD 1.3 + */ + if ((pvr & 0xe000) == 0) { + /* Nimbus */ + if ((pvr & 0xfff) < 0x203) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } else if ((pvr & 0xc000) == 0) { + /* Cumulus */ + if ((pvr & 0xfff) < 0x103) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } else { + WARN_ONCE(1, "Unknown PVR"); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } + } +} + static __init void cpufeatures_cpu_quirks(void) { - int version = mfspr(SPRN_PVR); + unsigned long version = mfspr(SPRN_PVR); /* * Not all quirks can be derived from the cpufeatures device tree. @@ -743,8 +769,7 @@ static __init void cpufeatures_cpu_quirks(void) if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; - if ((version & 0xffff0000) == 0x004e0000) - cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + update_tlbie_feature_flag(version); } static void __init cpufeatures_setup_finished(void) From 3f5d5b30cd7284d2f8983561f753ee23aa410b58 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Oct 2019 13:35:02 +0530 Subject: [PATCH 0306/3715] powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag commit 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 upstream. Rename the #define to indicate this is related to store vs tlbie ordering issue. In the next patch, we will be adding another feature flag that is used to handles ERAT flush vs tlbie ordering issue. Cc: stable@vger.kernel.org # v4.14 Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190924035254.24612-2-aneesh.kumar@linux.ibm.com [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cputable.h | 4 ++-- arch/powerpc/kernel/dt_cpu_ftrs.c | 6 +++--- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/tlb-radix.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index e143017d7549..6a0dfce96d8c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -215,7 +215,7 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) -#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) +#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #ifndef __ASSEMBLY__ @@ -477,7 +477,7 @@ enum { CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_P9_TLBIE_BUG) + CPU_FTR_P9_TLBIE_STQ_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 753759a3c8e9..b61b6b1ebf43 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -747,14 +747,14 @@ static __init void update_tlbie_feature_flag(unsigned long pvr) if ((pvr & 0xe000) == 0) { /* Nimbus */ if ((pvr & 0xfff) < 0x203) - cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; } else if ((pvr & 0xc000) == 0) { /* Cumulus */ if ((pvr & 0xfff) < 0x103) - cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; } else { WARN_ONCE(1, "Unknown PVR"); - cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; } } } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 559cba16dbe0..7f8f2a0189df 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -160,7 +160,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b18966a368af..9439fe213070 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -449,7 +449,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, "r" (rbvalues[i]), "r" (kvm->arch.lpid)); } - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { /* * Need the extra ptesync to make sure we don't * re-order the tlbie diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 96797bff5937..09b9263e3cc6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -106,7 +106,7 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) { - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { /* Need the extra ptesync to ensure we don't reorder tlbie*/ asm volatile("ptesync": : :"memory"); ___tlbie(vpn, psize, apsize, ssize); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 1a4912c5e5a2..5081e03b5e40 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -44,7 +44,7 @@ static inline void fixup_tlbie(void) unsigned long pid = 0; unsigned long va = ((1UL << 52) - 1); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); } From ae11e9fddd4107926d0671a255f49577a1291d7d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Oct 2019 13:35:03 +0530 Subject: [PATCH 0307/3715] powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9 commit 047e6575aec71d75b765c22111820c4776cd1c43 upstream. On POWER9, under some circumstances, a broadcast TLB invalidation will fail to invalidate the ERAT cache on some threads when there are parallel mtpidr/mtlpidr happening on other threads of the same core. This can cause stores to continue to go to a page after it's unmapped. The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie flush. This additional TLB flush will cause the ERAT cache invalidation. Since we are using PID=0 or LPID=0, we don't get filtered out by the TLB snoop filtering logic. We need to still follow this up with another tlbie to take care of store vs tlbie ordering issue explained in commit: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9"). The presence of ERAT cache implies we can still get new stores and they may miss store queue marking flush. Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cputable.h | 3 +- arch/powerpc/kernel/dt_cpu_ftrs.c | 2 + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 42 ++++++++++++++----- arch/powerpc/mm/hash_native_64.c | 28 +++++++++++-- arch/powerpc/mm/tlb-radix.c | 65 ++++++++++++++++++++++++----- 5 files changed, 116 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 6a0dfce96d8c..7e3ccf21830e 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -217,6 +217,7 @@ enum { #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) #define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) +#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #ifndef __ASSEMBLY__ @@ -477,7 +478,7 @@ enum { CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_P9_TLBIE_STQ_BUG) + CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index b61b6b1ebf43..2357df60de95 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -756,6 +756,8 @@ static __init void update_tlbie_feature_flag(unsigned long pvr) WARN_ONCE(1, "Unknown PVR"); cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; } + + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG; } } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9439fe213070..669b547385f3 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -429,6 +429,37 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } +static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) +{ + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rb_value), "r" (lpid)); + } +} + static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { @@ -449,16 +480,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, "r" (rbvalues[i]), "r" (kvm->arch.lpid)); } - if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { - /* - * Need the extra ptesync to make sure we don't - * re-order the tlbie - */ - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : - "r" (rbvalues[0]), "r" (kvm->arch.lpid)); - } - + fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 09b9263e3cc6..a4b6efbf667b 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -104,8 +104,30 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } -static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, + int apsize, int ssize) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { /* Need the extra ptesync to ensure we don't reorder tlbie*/ asm volatile("ptesync": : :"memory"); @@ -190,7 +212,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); - fixup_tlbie(vpn, psize, apsize, ssize); + fixup_tlbie_vpn(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -759,7 +781,7 @@ static void native_flush_hash_range(unsigned long number, int local) /* * Just do one more with the last used values. */ - fixup_tlbie(vpn, psize, psize, ssize); + fixup_tlbie_vpn(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 5081e03b5e40..41e782f126d6 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -39,14 +39,18 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static inline void fixup_tlbie(void) + +static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap) { - unsigned long pid = 0; - unsigned long va = ((1UL << 52) - 1); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); + } if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { asm volatile("ptesync": : :"memory"); - __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); } } @@ -95,23 +99,64 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } -static inline void _tlbie_pid(unsigned long pid, unsigned long ric) +static inline void __tlbie_pid(unsigned long pid, unsigned long ric) { unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ - asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - fixup_tlbie(); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void fixup_tlbie_pid(unsigned long pid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_pid(0, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) +{ + asm volatile("ptesync": : :"memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + fixup_tlbie_pid(pid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + fixup_tlbie_pid(pid); + } + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + static inline void _tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -135,7 +180,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, { asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); - fixup_tlbie(); + fixup_tlbie_va(va, pid, ap); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } From 5fda11bacfac1b72b0f241a76fad16ded2439ee9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Oct 2019 13:35:04 +0530 Subject: [PATCH 0308/3715] selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue commit 93cad5f789951eaa27c3392b15294b4e51253944 upstream. Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Aneesh Kumar K.V [mpe: Some minor fixes to make it build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190924035254.24612-4-aneesh.kumar@linux.ibm.com [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/mm/Makefile | 2 + .../testing/selftests/powerpc/mm/tlbie_test.c | 734 ++++++++++++++++++ 2 files changed, 736 insertions(+) create mode 100644 tools/testing/selftests/powerpc/mm/tlbie_test.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index bf315bcbe663..fae8c52cf8f0 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -3,6 +3,7 @@ noarg: $(MAKE) -C ../ TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao +TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile include ../../lib.mk @@ -14,3 +15,4 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 +$(OUTPUT)/tlbie_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c new file mode 100644 index 000000000000..9868a5ddd847 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp. + */ + +/* + * + * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store + * sequence in a loop. The same threads also rung a context switch task + * that does sched_yield() in loop. + * + * The snapshot thread mark the mmap area PROT_READ in between, make a copy + * and copy it back to the original area. This helps us to detect if any + * store continued to happen after we marked the memory PROT_READ. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void dcbf(volatile unsigned int *addr) +{ + __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory"); +} + +static void err_msg(char *msg) +{ + + time_t now; + time(&now); + printf("=================================\n"); + printf(" Error: %s\n", msg); + printf(" %s", ctime(&now)); + printf("=================================\n"); + exit(1); +} + +static char *map1; +static char *map2; +static pid_t rim_process_pid; + +/* + * A "rim-sequence" is defined to be the sequence of the following + * operations performed on a memory word: + * 1) FLUSH the contents of that word. + * 2) LOAD the contents of that word. + * 3) COMPARE the contents of that word with the content that was + * previously stored at that word + * 4) STORE new content into that word. + * + * The threads in this test that perform the rim-sequence are termed + * as rim_threads. + */ + +/* + * A "corruption" is defined to be the failed COMPARE operation in a + * rim-sequence. + * + * A rim_thread that detects a corruption informs about it to all the + * other rim_threads, and the mem_snapshot thread. + */ +static volatile unsigned int corruption_found; + +/* + * This defines the maximum number of rim_threads in this test. + * + * The THREAD_ID_BITS denote the number of bits required + * to represent the thread_ids [0..MAX_THREADS - 1]. + * We are being a bit paranoid here and set it to 8 bits, + * though 6 bits suffice. + * + */ +#define MAX_THREADS 64 +#define THREAD_ID_BITS 8 +#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1) +static unsigned int rim_thread_ids[MAX_THREADS]; +static pthread_t rim_threads[MAX_THREADS]; + + +/* + * Each rim_thread works on an exclusive "chunk" of size + * RIM_CHUNK_SIZE. + * + * The ith rim_thread works on the ith chunk. + * + * The ith chunk begins at + * map1 + (i * RIM_CHUNK_SIZE) + */ +#define RIM_CHUNK_SIZE 1024 +#define BITS_PER_BYTE 8 +#define WORD_SIZE (sizeof(unsigned int)) +#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE) +#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE) + +static inline char *compute_chunk_start_addr(unsigned int thread_id) +{ + char *chunk_start; + + chunk_start = (char *)((unsigned long)map1 + + (thread_id * RIM_CHUNK_SIZE)); + + return chunk_start; +} + +/* + * The "word-offset" of a word-aligned address inside a chunk, is + * defined to be the number of words that precede the address in that + * chunk. + * + * WORD_OFFSET_BITS denote the number of bits required to represent + * the word-offsets of all the word-aligned addresses of a chunk. + */ +#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK)) +#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1) + +static inline unsigned int compute_word_offset(char *start, unsigned int *addr) +{ + unsigned int delta_bytes, ret; + delta_bytes = (unsigned long)addr - (unsigned long)start; + + ret = delta_bytes/WORD_SIZE; + + return ret; +} + +/* + * A "sweep" is defined to be the sequential execution of the + * rim-sequence by a rim_thread on its chunk one word at a time, + * starting from the first word of its chunk and ending with the last + * word of its chunk. + * + * Each sweep of a rim_thread is uniquely identified by a sweep_id. + * SWEEP_ID_BITS denote the number of bits required to represent + * the sweep_ids of rim_threads. + * + * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS, + * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below. + */ +#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS)) +#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1) + +/* + * A "store-pattern" is the word-pattern that is stored into a word + * location in the 4)STORE step of the rim-sequence. + * + * In the store-pattern, we shall encode: + * + * - The thread-id of the rim_thread performing the store + * (The most significant THREAD_ID_BITS) + * + * - The word-offset of the address into which the store is being + * performed (The next WORD_OFFSET_BITS) + * + * - The sweep_id of the current sweep in which the store is + * being performed. (The lower SWEEP_ID_BITS) + * + * Store Pattern: 32 bits + * |------------------|--------------------|---------------------------------| + * | Thread id | Word offset | sweep_id | + * |------------------|--------------------|---------------------------------| + * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS + * + * In the store pattern, the (Thread-id + Word-offset) uniquely identify the + * address to which the store is being performed i.e, + * address == map1 + + * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE) + * + * And the sweep_id in the store pattern identifies the time when the + * store was performed by the rim_thread. + * + * We shall use this property in the 3)COMPARE step of the + * rim-sequence. + */ +#define SWEEP_ID_SHIFT 0 +#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS) +#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS) + +/* + * Compute the store pattern for a given thread with id @tid, at + * location @addr in the sweep identified by @sweep_id + */ +static inline unsigned int compute_store_pattern(unsigned int tid, + unsigned int *addr, + unsigned int sweep_id) +{ + unsigned int ret = 0; + char *start = compute_chunk_start_addr(tid); + unsigned int word_offset = compute_word_offset(start, addr); + + ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT; + ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT; + ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT; + return ret; +} + +/* Extract the thread-id from the given store-pattern */ +static inline unsigned int extract_tid(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK; + return ret; +} + +/* Extract the word-offset from the given store-pattern */ +static inline unsigned int extract_word_offset(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK; + + return ret; +} + +/* Extract the sweep-id from the given store-pattern */ +static inline unsigned int extract_sweep_id(unsigned int pattern) + +{ + unsigned int ret; + + ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK; + + return ret; +} + +/************************************************************ + * * + * Logging the output of the verification * + * * + ************************************************************/ +#define LOGDIR_NAME_SIZE 100 +static char logdir[LOGDIR_NAME_SIZE]; + +static FILE *fp[MAX_THREADS]; +static const char logfilename[] ="Thread-%02d-Chunk"; + +static inline void start_verification_log(unsigned int tid, + unsigned int *addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + FILE *f; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[2] = "/"; + char *chunk_start = compute_chunk_start_addr(tid); + unsigned int size = RIM_CHUNK_SIZE; + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + f = fopen(path, "w"); + + if (!f) { + err_msg("Unable to create logfile\n"); + } + + fp[tid] = f; + + fprintf(f, "----------------------------------------------------------\n"); + fprintf(f, "PID = %d\n", rim_process_pid); + fprintf(f, "Thread id = %02d\n", tid); + fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start); + fprintf(f, "Chunk Size = %d\n", size); + fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr); + fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id); + fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void log_anamoly(unsigned int tid, unsigned int *addr, + unsigned int expected, unsigned int observed) +{ + FILE *f = fp[tid]; + + fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n", + tid, (unsigned long)addr, expected, observed); + fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected)); + fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed)); + fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected)); + fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed)); + fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected)); + fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed)); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) +{ + FILE *f = fp[tid]; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[] = "/"; + + fclose(f); + + if (nr_anamolies == 0) { + remove(path); + return; + } + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", + tid, nr_anamolies, path); +} + +/* + * When a COMPARE step of a rim-sequence fails, the rim_thread informs + * everyone else via the shared_memory pointed to by + * corruption_found variable. On seeing this, every thread verifies the + * content of its chunk as follows. + * + * Suppose a thread identified with @tid was about to store (but not + * yet stored) to @next_store_addr in its current sweep identified + * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id. + * + * This implies that for all the addresses @addr < @next_store_addr, + * Thread @tid has already performed a store as part of its current + * sweep. Hence we expect the content of such @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | cur_sweep_id | + * |-------------------------------------------------| + * + * Since Thread @tid is yet to perform stores on address + * @next_store_addr and above, we expect the content of such an + * address @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | prev_sweep_id | + * |-------------------------------------------------| + * + * The verifier function @verify_chunk does this verification and logs + * any anamolies that it finds. + */ +static void verify_chunk(unsigned int tid, unsigned int *next_store_addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + unsigned int *iter_ptr; + unsigned int size = RIM_CHUNK_SIZE; + unsigned int expected; + unsigned int observed; + char *chunk_start = compute_chunk_start_addr(tid); + + int nr_anamolies = 0; + + start_verification_log(tid, next_store_addr, + cur_sweep_id, prev_sweep_id); + + for (iter_ptr = (unsigned int *)chunk_start; + (unsigned long)iter_ptr < (unsigned long)chunk_start + size; + iter_ptr++) { + unsigned int expected_sweep_id; + + if (iter_ptr < next_store_addr) { + expected_sweep_id = cur_sweep_id; + } else { + expected_sweep_id = prev_sweep_id; + } + + expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id); + + dcbf((volatile unsigned int*)iter_ptr); //Flush before reading + observed = *iter_ptr; + + if (observed != expected) { + nr_anamolies++; + log_anamoly(tid, iter_ptr, expected, observed); + } + } + + end_verification_log(tid, nr_anamolies); +} + +static void set_pthread_cpu(pthread_t th, int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + /* haven't reproduced with this setting, it kills random preemption which may be a factor */ + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static void set_mycpu(int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static volatile int segv_wait; + +static void segv_handler(int signo, siginfo_t *info, void *extra) +{ + while (segv_wait) { + sched_yield(); + } + +} + +static void set_segv_handler(void) +{ + struct sigaction sa; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = segv_handler; + + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +int timeout = 0; +/* + * This function is executed by every rim_thread. + * + * This function performs sweeps over the exclusive chunks of the + * rim_threads executing the rim-sequence one word at a time. + */ +static void *rim_fn(void *arg) +{ + unsigned int tid = *((unsigned int *)arg); + + int size = RIM_CHUNK_SIZE; + char *chunk_start = compute_chunk_start_addr(tid); + + unsigned int prev_sweep_id; + unsigned int cur_sweep_id = 0; + + /* word access */ + unsigned int pattern = cur_sweep_id; + unsigned int *pattern_ptr = &pattern; + unsigned int *w_ptr, read_data; + + set_segv_handler(); + + /* + * Let us initialize the chunk: + * + * Each word-aligned address addr in the chunk, + * is initialized to : + * |-------------------------------------------------| + * | tid | word_offset(addr) | 0 | + * |-------------------------------------------------| + */ + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + *w_ptr = *pattern_ptr; + } + + while (!corruption_found && !timeout) { + prev_sweep_id = cur_sweep_id; + cur_sweep_id = cur_sweep_id + 1; + + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + unsigned int old_pattern; + + /* + * Compute the pattern that we would have + * stored at this location in the previous + * sweep. + */ + old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id); + + /* + * FLUSH:Ensure that we flush the contents of + * the cache before loading + */ + dcbf((volatile unsigned int*)w_ptr); //Flush + + /* LOAD: Read the value */ + read_data = *w_ptr; //Load + + /* + * COMPARE: Is it the same as what we had stored + * in the previous sweep ? It better be! + */ + if (read_data != old_pattern) { + /* No it isn't! Tell everyone */ + corruption_found = 1; + } + + /* + * Before performing a store, let us check if + * any rim_thread has found a corruption. + */ + if (corruption_found || timeout) { + /* + * Yes. Someone (including us!) has found + * a corruption :( + * + * Let us verify that our chunk is + * correct. + */ + /* But first, let us allow the dust to settle down! */ + verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id); + + return 0; + } + + /* + * Compute the new pattern that we are going + * to write to this location + */ + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + + /* + * STORE: Now let us write this pattern into + * the location + */ + *w_ptr = *pattern_ptr; + } + } + + return NULL; +} + + +static unsigned long start_cpu = 0; +static unsigned long nrthreads = 4; + +static pthread_t mem_snapshot_thread; + +static void *mem_snapshot_fn(void *arg) +{ + int page_size = getpagesize(); + size_t size = page_size; + void *tmp = malloc(size); + + while (!corruption_found && !timeout) { + /* Stop memory migration once corruption is found */ + segv_wait = 1; + + mprotect(map1, size, PROT_READ); + + /* + * Load from the working alias (map1). Loading from map2 + * also fails. + */ + memcpy(tmp, map1, size); + + /* + * Stores must go via map2 which has write permissions, but + * the corrupted data tends to be seen in the snapshot buffer, + * so corruption does not appear to be introduced at the + * copy-back via map2 alias here. + */ + memcpy(map2, tmp, size); + /* + * Before releasing other threads, must ensure the copy + * back to + */ + asm volatile("sync" ::: "memory"); + mprotect(map1, size, PROT_READ|PROT_WRITE); + asm volatile("sync" ::: "memory"); + segv_wait = 0; + + usleep(1); /* This value makes a big difference */ + } + + return 0; +} + +void alrm_sighandler(int sig) +{ + timeout = 1; +} + +int main(int argc, char *argv[]) +{ + int c; + int page_size = getpagesize(); + time_t now; + int i, dir_error; + pthread_attr_t attr; + key_t shm_key = (key_t) getpid(); + int shmid, run_time = 20 * 60; + struct sigaction sa_alrm; + + snprintf(logdir, LOGDIR_NAME_SIZE, + "/tmp/logdir-%u", (unsigned int)getpid()); + while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) { + switch(c) { + case 'r': + start_cpu = strtoul(optarg, NULL, 10); + break; + case 'h': + printf("%s [-r ] [-n ] [-l ] [-t ]\n", argv[0]); + exit(0); + break; + case 'n': + nrthreads = strtoul(optarg, NULL, 10); + break; + case 'l': + strncpy(logdir, optarg, LOGDIR_NAME_SIZE); + break; + case 't': + run_time = strtoul(optarg, NULL, 10); + break; + default: + printf("invalid option\n"); + exit(0); + break; + } + } + + if (nrthreads > MAX_THREADS) + nrthreads = MAX_THREADS; + + shmid = shmget(shm_key, page_size, IPC_CREAT|0666); + if (shmid < 0) { + err_msg("Failed shmget\n"); + } + + map1 = shmat(shmid, NULL, 0); + if (map1 == (void *) -1) { + err_msg("Failed shmat"); + } + + map2 = shmat(shmid, NULL, 0); + if (map2 == (void *) -1) { + err_msg("Failed shmat"); + } + + dir_error = mkdir(logdir, 0755); + + if (dir_error) { + err_msg("Failed mkdir"); + } + + printf("start_cpu list:%lu\n", start_cpu); + printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads); + printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2); + printf("logdir at : %s\n", logdir); + printf("Timeout: %d seconds\n", run_time); + + time(&now); + printf("=================================\n"); + printf(" Starting Test\n"); + printf(" %s", ctime(&now)); + printf("=================================\n"); + + for (i = 0; i < nrthreads; i++) { + if (1 && !fork()) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + set_mycpu(start_cpu + i); + for (;;) + sched_yield(); + exit(0); + } + } + + + sa_alrm.sa_handler = &alrm_sighandler; + sigemptyset(&sa_alrm.sa_mask); + sa_alrm.sa_flags = 0; + + if (sigaction(SIGALRM, &sa_alrm, 0) == -1) { + err_msg("Failed signal handler registration\n"); + } + + alarm(run_time); + + pthread_attr_init(&attr); + for (i = 0; i < nrthreads; i++) { + rim_thread_ids[i] = i; + pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]); + set_pthread_cpu(rim_threads[i], start_cpu + i); + } + + pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1); + set_pthread_cpu(mem_snapshot_thread, start_cpu + i); + + + pthread_join(mem_snapshot_thread, NULL); + for (i = 0; i < nrthreads; i++) { + pthread_join(rim_threads[i], NULL); + } + + if (!timeout) { + time(&now); + printf("=================================\n"); + printf(" Data Corruption Detected\n"); + printf(" %s", ctime(&now)); + printf(" See logfiles in %s\n", logdir); + printf("=================================\n"); + return 1; + } + return 0; +} From 158807d914dd4e43c2b2f420f26297785369b197 Mon Sep 17 00:00:00 2001 From: "Desnes A. Nunes do Rosario" Date: Thu, 17 Oct 2019 13:35:05 +0530 Subject: [PATCH 0309/3715] selftests/powerpc: Fix compile error on tlbie_test due to newer gcc commit 5b216ea1c40cf06eead15054c70e238c9bd4729e upstream. Newer versions of GCC (>= 9) demand that the size of the string to be copied must be explicitly smaller than the size of the destination. Thus, the NULL char has to be taken into account on strncpy. This will avoid the following compiling error: tlbie_test.c: In function 'main': tlbie_test.c:639:4: error: 'strncpy' specified bound 100 equals destination size strncpy(logdir, optarg, LOGDIR_NAME_SIZE); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Desnes A. Nunes do Rosario Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191003211010.9711-1-desnesn@linux.ibm.com [sandipan: Backported to v4.14] Signed-off-by: Sandipan Das Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/mm/tlbie_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 9868a5ddd847..f85a0938ab25 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -636,7 +636,7 @@ int main(int argc, char *argv[]) nrthreads = strtoul(optarg, NULL, 10); break; case 'l': - strncpy(logdir, optarg, LOGDIR_NAME_SIZE); + strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); break; case 't': run_time = strtoul(optarg, NULL, 10); From 4762bcd451a9e92e79d5146d3d4a5ffe2b4e0ec5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 10 Nov 2019 11:25:43 +0100 Subject: [PATCH 0310/3715] Linux 4.14.153 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 52aaa6150099..2819ed540ce2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 152 +SUBLEVEL = 153 EXTRAVERSION = NAME = Petit Gorille From 0ac69147fd8c0cb61ebecc160e6a3c81517ed48a Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 10 Nov 2016 19:18:53 -0700 Subject: [PATCH 0311/3715] ANDROID: nf: IDLETIMER: Fix possible use before initialization in idletimer_resume idletimer_resume() assumes that the PM_SUSPEND_PREPARE notifier is sent before PM_POST_SUSPEND so that timer->last_suspend_time is initialized. However, it is possible for PM_POST_SUSPEND to be sent first if there is an error returned from another driver's PM_SUSPEND_PREPARE notifier. As a result, structures are accessed without initialization. The arguments to set_normalized_timespec are very large and unexpected. timer->last_suspend_time has the value of {.tv_sec = 0x6b6b6b6b6b6b6b6b, .tv_nsec=0x6b6b6b6b6b6b6b6b}. Since really large iterations are required, this operation takes more than a minute and causes the CPU to trigger a spinbug since the timestamp lock is held. Call stack - - set_normalized_timespec - timespec_sub - idletimer_resume - notifier_call_chain - __blocking_notifier_call_chain - pm_notifier_call_chain Add a flag indicating whether the current value of timer->last_suspend is valid. Detected with CONFIG_SLUB_DEBUG & CONFIG_DEBUG_SPINLOCK in arm64. Bug: 140404598 Fixes: f0c2df2b1228a ("ANDROID: netfilter: xt_IDLETIMER: Add new netlink msg type") Change-Id: I95328b0ac85dba819ff9cef751c3d07300c232f1 Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Todd Kjos (cherry picked from commit 7ded4359680d3fb593cbc5c90d84bb5e5083eda9) --- net/netfilter/xt_IDLETIMER.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f1f0711e2a4a..6cdc9a0364ea 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -70,6 +70,7 @@ struct idletimer_tg { bool send_nl_msg; bool active; uid_t uid; + bool suspend_time_valid; }; static LIST_HEAD(idletimer_tg_list); @@ -239,8 +240,13 @@ static int idletimer_resume(struct notifier_block *notifier, switch (pm_event) { case PM_SUSPEND_PREPARE: get_monotonic_boottime(&timer->last_suspend_time); + timer->suspend_time_valid = true; break; case PM_POST_SUSPEND: + if (!timer->suspend_time_valid) + break; + timer->suspend_time_valid = false; + spin_lock_bh(×tamp_lock); if (!timer->active) { spin_unlock_bh(×tamp_lock); @@ -291,7 +297,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; - info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; From 982b261b87f8d956f6df7506bcfc209ac2fdc665 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 1 Nov 2019 21:56:42 -0700 Subject: [PATCH 0312/3715] bonding: fix state transition issue in link monitoring [ Upstream commit 1899bb325149e481de31a4f32b59ea6f24e176ea ] Since de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring"), the bonding driver has utilized two separate variables to indicate the next link state a particular slave should transition to. Each is used to communicate to a different portion of the link state change commit logic; one to the bond_miimon_commit function itself, and another to the state transition logic. Unfortunately, the two variables can become unsynchronized, resulting in incorrect link state transitions within bonding. This can cause slaves to become stuck in an incorrect link state until a subsequent carrier state transition. The issue occurs when a special case in bond_slave_netdev_event sets slave->link directly to BOND_LINK_FAIL. On the next pass through bond_miimon_inspect after the slave goes carrier up, the BOND_LINK_FAIL case will set the proposed next state (link_new_state) to BOND_LINK_UP, but the new_link to BOND_LINK_DOWN. The setting of the final link state from new_link comes after that from link_new_state, and so the slave will end up incorrectly in _DOWN state. Resolve this by combining the two variables into one. Reported-by: Aleksei Zakharov Reported-by: Sha Zhang Cc: Mahesh Bandewar Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring") Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 41 ++++++++++++++++----------------- include/net/bonding.h | 3 +-- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1cc4c99aa504..cf8385a22de5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2055,8 +2055,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !rcu_dereference(bond->curr_active_slave); bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; - slave->link_new_state = slave->link; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2092,7 +2091,7 @@ static int bond_miimon_inspect(struct bonding *bond) } if (slave->delay <= 0) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; continue; } @@ -2131,7 +2130,7 @@ static int bond_miimon_inspect(struct bonding *bond) slave->delay = 0; if (slave->delay <= 0) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; ignore_updelay = false; continue; @@ -2151,7 +2150,7 @@ static void bond_miimon_commit(struct bonding *bond) struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after @@ -2244,8 +2243,8 @@ static void bond_miimon_commit(struct bonding *bond) default: netdev_err(bond->dev, "invalid new link %d on slave %s\n", - slave->new_link, slave->dev->name); - slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state, slave->dev->name); + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; } @@ -2644,13 +2643,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2677,7 +2676,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2709,8 +2708,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) goto re_arm; bond_for_each_slave(bond, slave, iter) { - if (slave->new_link != BOND_LINK_NOCHANGE) - slave->link = slave->new_link; + if (slave->link_new_state != BOND_LINK_NOCHANGE) + slave->link = slave->link_new_state; } if (slave_state_changed) { @@ -2733,9 +2732,9 @@ re_arm: } /* Called to inspect slaves for active-backup mode ARP monitor link state - * changes. Sets new_link in slaves to specify what action should take - * place for the slave. Returns 0 if no changes are found, >0 if changes - * to link states must be committed. + * changes. Sets proposed link state in slaves to specify what action + * should take place for the slave. Returns 0 if no changes are found, >0 + * if changes to link states must be committed. * * Called with rcu_read_lock held. */ @@ -2747,12 +2746,12 @@ static int bond_ab_arp_inspect(struct bonding *bond) int commit = 0; bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); last_rx = slave_last_rx(bond, slave); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; } continue; @@ -2780,7 +2779,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && !bond_time_in_interval(bond, last_rx, 3)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -2793,7 +2792,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_is_active_slave(slave) && (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, last_rx, 2))) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } } @@ -2813,7 +2812,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *slave; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue; @@ -2866,7 +2865,7 @@ static void bond_ab_arp_commit(struct bonding *bond) default: netdev_err(bond->dev, "impossible: new_link %d on slave %s\n", - slave->new_link, slave->dev->name); + slave->link_new_state, slave->dev->name); continue; } diff --git a/include/net/bonding.h b/include/net/bonding.h index 04008209506a..b0f20bc0fd4a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -149,7 +149,6 @@ struct slave { unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ - s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ @@ -523,7 +522,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state) static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == slave->link_new_state) + if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; From 0a94a5d5bb078cbdbf1b596ce4dedbba4ea67fcd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 Nov 2019 09:48:01 +0100 Subject: [PATCH 0313/3715] CDC-NCM: handle incomplete transfer of MTU [ Upstream commit 332f989a3b0041b810836c5c3747e59aad7e9d0b ] A malicious device may give half an answer when asked for its MTU. The driver will proceed after this with a garbage MTU. Anything but a complete answer must be treated as an error. V2: used sizeof as request by Alexander Reported-and-tested-by: syzbot+0631d878823ce2411636@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ncm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index ab28487e6048..d53b4a41c583 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -578,8 +578,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) /* read current mtu value from device */ err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); - if (err < 0) { + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); + if (err < sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } @@ -590,7 +590,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) max_datagram_size = cpu_to_le16(ctx->max_datagram_size); err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err < 0) dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); From b0b4aca6d67d3ee3ed859d9df8bcd100328fb05c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 7 Nov 2019 18:29:52 +0000 Subject: [PATCH 0314/3715] ipv4: Fix table id reference in fib_sync_down_addr [ Upstream commit e0a312629fefa943534fc46f7bfbe6de3fdaf463 ] Hendrik reported routes in the main table using source address are not removed when the address is removed. The problem is that fib_sync_down_addr does not account for devices in the default VRF which are associated with the main table. Fix by updating the table id reference. Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Reported-by: Hendrik Donner Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e76b8a7bb891..eff703cb13b6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1471,8 +1471,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) From cd6be628d78a5a7d97789883b2e69b3529d213b4 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 8 Nov 2019 10:00:44 +0000 Subject: [PATCH 0315/3715] net: ethernet: octeon_mgmt: Account for second possible VLAN header [ Upstream commit e4dd5608033efe7b6030cde359bfdbaeb73bc22d ] Octeon's input ring-buffer entry has 14 bits-wide size field, so to account for second possible VLAN header max_mtu must be further reduced. Fixes: 109cc16526c6d ("ethernet/cavium: use core min/max MTU checking") Signed-off-by: Alexander Sverdlin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 45c51277e0cf..61701ba2ac72 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1497,7 +1497,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; - netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM; + netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; mac = of_get_mac_address(pdev->dev.of_node); From 5ec98a56092582835ecb8c552a8da48d1b2bf5d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 20:08:19 -0800 Subject: [PATCH 0316/3715] net: fix data-race in neigh_event_send() [ Upstream commit 1b53d64435d56902fc234ff2507142d971a09687 ] KCSAN reported the following data-race [1] The fix will also prevent the compiler from optimizing out the condition. [1] BUG: KCSAN: data-race in neigh_resolve_output / neigh_resolve_output write to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 1: neigh_event_send include/net/neighbour.h:443 [inline] neigh_resolve_output+0x78/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:618 read to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 0: neigh_event_send include/net/neighbour.h:442 [inline] neigh_resolve_output+0x57/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/neighbour.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 393099b1901a..1d6b98119a1d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -429,8 +429,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; - if (neigh->used != now) - neigh->used = now; + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; From 79acc2c403c24de693709b7344df748b6441757c Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Mon, 4 Nov 2019 17:54:22 -0700 Subject: [PATCH 0317/3715] net: qualcomm: rmnet: Fix potential UAF when unregistering [ Upstream commit e7a86c687e64ab24f88330ad24ecc9442ce40c5a ] During the exit/unregistration process of the RmNet driver, the function rmnet_unregister_real_device() is called to handle freeing the driver's internal state and removing the RX handler on the underlying physical device. However, the order of operations this function performs is wrong and can lead to a use after free of the rmnet_port structure. Before calling netdev_rx_handler_unregister(), this port structure is freed with kfree(). If packets are received on any RmNet devices before synchronize_net() completes, they will attempt to use this already-freed port structure when processing the packet. As such, before cleaning up any other internal state, the RX handler must be unregistered in order to guarantee that no further packets will arrive on the device. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 1e33aea59f50..7d8303e45f09 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -84,10 +84,10 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, if (port->nr_rmnet_devs) return -EINVAL; - kfree(port); - netdev_rx_handler_unregister(real_dev); + kfree(port); + /* release reference on real_dev */ dev_put(real_dev); From c7c24046c8e970fdda5c6f3998b728fb7f0b49df Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Thu, 7 Nov 2019 11:57:01 +0100 Subject: [PATCH 0318/3715] net: usb: qmi_wwan: add support for DW5821e with eSIM support [ Upstream commit e497df686e8fed8c1dd69179010656362858edb3 ] Exactly same layout as the default DW5821e module, just a different vid/pid. The QMI interface is exposed in USB configuration #1: P: Vendor=413c ProdID=81e0 Rev=03.18 S: Manufacturer=Dell Inc. S: Product=DW5821e-eSIM Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e406a05e79dc..8ed538295d09 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1286,6 +1286,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ From 5f311e37e9592bbfebac148135ed39ede2ad0d10 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 5 Nov 2019 16:34:07 +0800 Subject: [PATCH 0319/3715] NFC: fdp: fix incorrect free object [ Upstream commit 517ce4e93368938b204451285e53014549804868 ] The address of fw_vsc_cfg is on stack. Releasing it with devm_kfree() is incorrect, which may result in a system crash or other security impacts. The expected object to free is *fw_vsc_cfg. Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/fdp/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c4da50e07bbc..4020c11a9415 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -267,7 +267,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, *fw_vsc_cfg, len); if (r) { - devm_kfree(dev, fw_vsc_cfg); + devm_kfree(dev, *fw_vsc_cfg); goto vsc_read_err; } } else { From 6c234cb7fc9d948e2edb2c22804e14ea598640fb Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 14:29:50 +0800 Subject: [PATCH 0320/3715] nfc: netlink: fix double device reference drop [ Upstream commit 025ec40b81d785a98f76b8bdb509ac10773b4f12 ] The function nfc_put_device(dev) is called twice to drop the reference to dev when there is no associated local llcp. Remove one of them to fix the bug. Fixes: 52feb444a903 ("NFC: Extend netlink interface for LTO, RW, and MIUX parameters support") Fixes: d9b8d8e19b07 ("NFC: llcp: Service Name Lookup netlink interface") Signed-off-by: Pan Bian Reviewed-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 5f2acd029da5..7b8d4d235a3a 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1100,7 +1100,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } @@ -1160,7 +1159,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } From 12e3d9b35c0e9cd12e1c2027344a1ca99ca2044d Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 09:33:20 +0800 Subject: [PATCH 0321/3715] NFC: st21nfca: fix double free [ Upstream commit 99a8efbb6e30b72ac98cecf81103f847abffb1e5 ] The variable nfcid_skb is not changed in the callee nfc_hci_get_param() if error occurs. Consequently, the freed variable nfcid_skb will be freed again, resulting in a double free bug. Set nfcid_skb to NULL after releasing it to fix the bug. Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st21nfca/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index e803fdfa9189..f37069b53b20 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -719,6 +719,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev, NFC_PROTO_FELICA_MASK; } else { kfree_skb(nfcid_skb); + nfcid_skb = NULL; /* P2P in type A */ r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE, ST21NFCA_RF_READER_F_NFCID1, From a49ecc872d61a90a7ab78513113ffd124b29576a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 8 Nov 2019 02:42:30 -0800 Subject: [PATCH 0322/3715] qede: fix NULL pointer deref in __qede_remove() [ Upstream commit deabc87111c690097c03765ea017cd500f7376fc ] While rebooting the system with SR-IOV vfs enabled leads to below crash due to recurrence of __qede_remove() on the VF devices (first from .shutdown() flow of the VF itself and another from PF's .shutdown() flow executing pci_disable_sriov()) This patch adds a safeguard in __qede_remove() flow to fix this, so that driver doesn't attempt to remove "already removed" devices. [ 194.360134] BUG: unable to handle kernel NULL pointer dereference at 00000000000008dc [ 194.360227] IP: [] __qede_remove+0x24/0x130 [qede] [ 194.360304] PGD 0 [ 194.360325] Oops: 0000 [#1] SMP [ 194.360360] Modules linked in: tcp_lp fuse tun bridge stp llc devlink bonding ip_set nfnetlink ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi dell_smbios iTCO_wdt iTCO_vendor_support dell_wmi_descriptor dcdbas vfat fat pcc_cpufreq skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd qedr ib_core pcspkr ses enclosure joydev ipmi_ssif sg i2c_i801 lpc_ich mei_me mei wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_pad acpi_power_meter xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel mgag200 [ 194.361044] qede i2c_algo_bit drm_kms_helper qed syscopyarea sysfillrect nvme sysimgblt fb_sys_fops ttm nvme_core mpt3sas crc8 ptp drm pps_core ahci raid_class scsi_transport_sas libahci libata drm_panel_orientation_quirks nfit libnvdimm dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ip_tables] [ 194.361297] CPU: 51 PID: 7996 Comm: reboot Kdump: loaded Not tainted 3.10.0-1062.el7.x86_64 #1 [ 194.361359] Hardware name: Dell Inc. PowerEdge MX840c/0740HW, BIOS 2.4.6 10/15/2019 [ 194.361412] task: ffff9cea9b360000 ti: ffff9ceabebdc000 task.ti: ffff9ceabebdc000 [ 194.361463] RIP: 0010:[] [] __qede_remove+0x24/0x130 [qede] [ 194.361534] RSP: 0018:ffff9ceabebdfac0 EFLAGS: 00010282 [ 194.361570] RAX: 0000000000000000 RBX: ffff9cd013846098 RCX: 0000000000000000 [ 194.361621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9cd013846098 [ 194.361668] RBP: ffff9ceabebdfae8 R08: 0000000000000000 R09: 0000000000000000 [ 194.361715] R10: 00000000bfe14201 R11: ffff9ceabfe141e0 R12: 0000000000000000 [ 194.361762] R13: ffff9cd013846098 R14: 0000000000000000 R15: ffff9ceab5e48000 [ 194.361810] FS: 00007f799c02d880(0000) GS:ffff9ceacb0c0000(0000) knlGS:0000000000000000 [ 194.361865] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 194.361903] CR2: 00000000000008dc CR3: 0000001bdac76000 CR4: 00000000007607e0 [ 194.361953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 194.362002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 194.362051] PKRU: 55555554 [ 194.362073] Call Trace: [ 194.362109] [] qede_remove+0x10/0x20 [qede] [ 194.362180] [] pci_device_remove+0x3e/0xc0 [ 194.362240] [] __device_release_driver+0x82/0xf0 [ 194.362285] [] device_release_driver+0x23/0x30 [ 194.362343] [] pci_stop_bus_device+0x84/0xa0 [ 194.362388] [] pci_stop_and_remove_bus_device+0x12/0x20 [ 194.362450] [] pci_iov_remove_virtfn+0xaf/0x160 [ 194.362496] [] sriov_disable+0x3c/0xf0 [ 194.362534] [] pci_disable_sriov+0x23/0x30 [ 194.362599] [] qed_sriov_disable+0x5e3/0x650 [qed] [ 194.362658] [] ? kfree+0x106/0x140 [ 194.362709] [] ? qed_free_stream_mem+0x70/0x90 [qed] [ 194.362754] [] ? kfree+0x106/0x140 [ 194.362803] [] qed_slowpath_stop+0x1a9/0x1d0 [qed] [ 194.362854] [] __qede_remove+0xae/0x130 [qede] [ 194.362904] [] qede_shutdown+0x10/0x20 [qede] [ 194.362956] [] pci_device_shutdown+0x3a/0x60 [ 194.363010] [] device_shutdown+0xfb/0x1f0 [ 194.363066] [] kernel_restart_prepare+0x36/0x40 [ 194.363107] [] kernel_restart+0x12/0x60 [ 194.363146] [] SYSC_reboot+0x229/0x260 [ 194.363196] [] ? handle_mm_fault+0x39d/0x9b0 [ 194.363253] [] ? __switch_to+0x151/0x580 [ 194.363304] [] ? __schedule+0x448/0x9c0 [ 194.363343] [] SyS_reboot+0xe/0x10 [ 194.363387] [] system_call_fastpath+0x25/0x2a [ 194.363430] Code: f9 e9 37 ff ff ff 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 4c 8d af 98 00 00 00 41 54 4c 89 ef 41 89 f4 53 e8 4c e4 55 f9 <80> b8 dc 08 00 00 01 48 89 c3 4c 8d b8 c0 08 00 00 4c 8b b0 c0 [ 194.363712] RIP [] __qede_remove+0x24/0x130 [qede] [ 194.363764] RSP [ 194.363791] CR2: 00000000000008dc Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: Sudarsana Kalluru Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qede/qede_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 6eab2c632c75..dab202f343c6 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1052,8 +1052,16 @@ enum qede_remove_mode { static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) { struct net_device *ndev = pci_get_drvdata(pdev); - struct qede_dev *edev = netdev_priv(ndev); - struct qed_dev *cdev = edev->cdev; + struct qede_dev *edev; + struct qed_dev *cdev; + + if (!ndev) { + dev_info(&pdev->dev, "Device has already been removed\n"); + return; + } + + edev = netdev_priv(ndev); + cdev = edev->cdev; DP_INFO(edev, "Starting qede_remove\n"); From f536bdb834c0c54743721e7e5000d0006671b848 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Nov 2019 17:55:47 +0100 Subject: [PATCH 0323/3715] ALSA: timer: Fix incorrectly assigned timer instance commit e7af6307a8a54f0b873960b32b6a644f2d0fbd97 upstream. The clean up commit 41672c0c24a6 ("ALSA: timer: Simplify error path in snd_timer_open()") unified the error handling code paths with the standard goto, but it introduced a subtle bug: the timer instance is stored in snd_timer_open() incorrectly even if it returns an error. This may eventually lead to UAF, as spotted by fuzzer. The culprit is the snd_timer_open() code checks the SNDRV_TIMER_IFLG_EXCLUSIVE flag with the common variable timeri. This variable is supposed to be the newly created instance, but we (ab-)used it for a temporary check before the actual creation of a timer instance. After that point, there is another check for the max number of instances, and it bails out if over the threshold. Before the refactoring above, it worked fine because the code returned directly from that point. After the refactoring, however, it jumps to the unified error path that stores the timeri variable in return -- even if it returns an error. Unfortunately this stored value is kept in the caller side (snd_timer_user_tselect()) in tu->timeri. This causes inconsistency later, as if the timer was successfully assigned. In this patch, we fix it by not re-using timeri variable but a temporary variable for testing the exclusive connection, so timeri remains NULL at that point. Fixes: 41672c0c24a6 ("ALSA: timer: Simplify error path in snd_timer_open()") Reported-and-tested-by: Tristan Madani Cc: Link: https://lore.kernel.org/r/20191106165547.23518-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 161ab19cb722..c60dfd52e8a6 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -298,11 +298,11 @@ int snd_timer_open(struct snd_timer_instance **ti, goto unlock; } if (!list_empty(&timer->open_list_head)) { - timeri = list_entry(timer->open_list_head.next, + struct snd_timer_instance *t = + list_entry(timer->open_list_head.next, struct snd_timer_instance, open_list); - if (timeri->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { + if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { err = -EBUSY; - timeri = NULL; goto unlock; } } From 2e3f0caf9f62735a5d997003bf1e17bedd0014d6 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Nov 2019 00:09:20 +0900 Subject: [PATCH 0324/3715] ALSA: bebob: fix to detect configured source of sampling clock for Focusrite Saffire Pro i/o series commit 706ad6746a66546daf96d4e4a95e46faf6cf689a upstream. For Focusrite Saffire Pro i/o, the lowest 8 bits of register represents configured source of sampling clock. The next lowest 8 bits represents whether the configured source is actually detected or not just after the register is changed for the source. Current implementation evaluates whole the register to detect configured source. This results in failure due to the next lowest 8 bits when the source is connected in advance. This commit fixes the bug. Fixes: 25784ec2d034 ("ALSA: bebob: Add support for Focusrite Saffire/SaffirePro series") Cc: # v3.16+ Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191102150920.20367-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/bebob_focusrite.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/firewire/bebob/bebob_focusrite.c b/sound/firewire/bebob/bebob_focusrite.c index 52b8b61ecddd..62d989edd129 100644 --- a/sound/firewire/bebob/bebob_focusrite.c +++ b/sound/firewire/bebob/bebob_focusrite.c @@ -28,6 +28,8 @@ #define SAFFIRE_CLOCK_SOURCE_SPDIF 1 /* clock sources as returned from register of Saffire Pro 10 and 26 */ +#define SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK 0x000000ff +#define SAFFIREPRO_CLOCK_SOURCE_DETECT_MASK 0x0000ff00 #define SAFFIREPRO_CLOCK_SOURCE_INTERNAL 0 #define SAFFIREPRO_CLOCK_SOURCE_SKIP 1 /* never used on hardware */ #define SAFFIREPRO_CLOCK_SOURCE_SPDIF 2 @@ -190,6 +192,7 @@ saffirepro_both_clk_src_get(struct snd_bebob *bebob, unsigned int *id) map = saffirepro_clk_maps[1]; /* In a case that this driver cannot handle the value of register. */ + value &= SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK; if (value >= SAFFIREPRO_CLOCK_SOURCE_COUNT || map[value] < 0) { err = -EIO; goto end; From bd4bb87222365ce1161bf78409d5a66d0637926d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Nov 2019 14:43:16 +0100 Subject: [PATCH 0325/3715] ALSA: hda/ca0132 - Fix possible workqueue stall commit 15c2b3cc09a31620914955cb2a89c277c18ee999 upstream. The unsolicited event handler for the headphone jack on CA0132 codec driver tries to reschedule the another delayed work with cancel_delayed_work_sync(). It's no good idea, unfortunately, especially after we changed the work queue to the standard global one; this may lead to a stall because both works are using the same global queue. Fix it by dropping the _sync but does call cancel_delayed_work() instead. Fixes: 993884f6a26c ("ALSA: hda/ca0132 - Delay HP amp turnon.") BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1155836 Cc: Link: https://lore.kernel.org/r/20191105134316.19294-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 119f3b504765..9876d8dc2ede 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4440,7 +4440,7 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) /* Delay enabling the HP amp, to let the mic-detection * state machine run. */ - cancel_delayed_work_sync(&spec->unsol_hp_work); + cancel_delayed_work(&spec->unsol_hp_work); schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); tbl = snd_hda_jack_tbl_get(codec, cb->nid); if (tbl) From d25eb9d6b723afa23e818892c026adc2021ac8cd Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 5 Nov 2019 21:16:30 -0800 Subject: [PATCH 0326/3715] mm: thp: handle page cache THP correctly in PageTransCompoundMap commit 169226f7e0d275c1879551f37484ef6683579a5c upstream. We have a usecase to use tmpfs as QEMU memory backend and we would like to take the advantage of THP as well. But, our test shows the EPT is not PMD mapped even though the underlying THP are PMD mapped on host. The number showed by /sys/kernel/debug/kvm/largepage is much less than the number of PMD mapped shmem pages as the below: 7f2778200000-7f2878200000 rw-s 00000000 00:14 262232 /dev/shm/qemu_back_mem.mem.Hz2hSf (deleted) Size: 4194304 kB [snip] AnonHugePages: 0 kB ShmemPmdMapped: 579584 kB [snip] Locked: 0 kB cat /sys/kernel/debug/kvm/largepages 12 And some benchmarks do worse than with anonymous THPs. By digging into the code we figured out that commit 127393fbe597 ("mm: thp: kvm: fix memory corruption in KVM with THP enabled") checks if there is a single PTE mapping on the page for anonymous THP when setting up EPT map. But the _mapcount < 0 check doesn't work for page cache THP since every subpage of page cache THP would get _mapcount inc'ed once it is PMD mapped, so PageTransCompoundMap() always returns false for page cache THP. This would prevent KVM from setting up PMD mapped EPT entry. So we need handle page cache THP correctly. However, when page cache THP's PMD gets split, kernel just remove the map instead of setting up PTE map like what anonymous THP does. Before KVM calls get_user_pages() the subpages may get PTE mapped even though it is still a THP since the page cache THP may be mapped by other processes at the mean time. Checking its _mapcount and whether the THP has PTE mapped or not. Although this may report some false negative cases (PTE mapped by other processes), it looks not trivial to make this accurate. With this fix /sys/kernel/debug/kvm/largepage would show reasonable pages are PMD mapped by EPT as the below: 7fbeaee00000-7fbfaee00000 rw-s 00000000 00:14 275464 /dev/shm/qemu_back_mem.mem.SKUvat (deleted) Size: 4194304 kB [snip] AnonHugePages: 0 kB ShmemPmdMapped: 557056 kB [snip] Locked: 0 kB cat /sys/kernel/debug/kvm/largepages 271 And the benchmarks are as same as anonymous THPs. [yang.shi@linux.alibaba.com: v4] Link: http://lkml.kernel.org/r/1571865575-42913-1-git-send-email-yang.shi@linux.alibaba.com Link: http://lkml.kernel.org/r/1571769577-89735-1-git-send-email-yang.shi@linux.alibaba.com Fixes: dd78fedde4b9 ("rmap: support file thp") Signed-off-by: Yang Shi Reported-by: Gang Deng Tested-by: Gang Deng Suggested-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 5 ----- include/linux/mm_types.h | 5 +++++ include/linux/page-flags.h | 20 ++++++++++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ee0eae215210..858ce84ac7c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -549,11 +549,6 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e41ef532c4ce..be5d445bac98 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,11 @@ struct page_frag_cache { typedef unsigned long vm_flags_t; +static inline atomic_t *compound_mapcount_ptr(struct page *page) +{ + return &page[1].compound_mapcount; +} + /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 584b14c774c1..5f966c94732b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -565,12 +565,28 @@ static inline int PageTransCompound(struct page *page) * * Unlike PageTransCompound, this is safe to be called only while * split_huge_pmd() cannot run from under us, like if protected by the - * MMU notifier, otherwise it may result in page->_mapcount < 0 false + * MMU notifier, otherwise it may result in page->_mapcount check false * positives. + * + * We have to treat page cache THP differently since every subpage of it + * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE + * mapped in the current process so comparing subpage's _mapcount to + * compound_mapcount to filter out PTE mapped case. */ static inline int PageTransCompoundMap(struct page *page) { - return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0; + struct page *head; + + if (!PageTransCompound(page)) + return 0; + + if (PageAnon(page)) + return atomic_read(&page->_mapcount) < 0; + + head = compound_head(page); + /* File THP is PMD mapped and not PTE mapped */ + return atomic_read(&page->_mapcount) == + atomic_read(compound_mapcount_ptr(head)); } /* From fce52ed9e964d91ce5a02e90a22ab72e26b59046 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 5 Nov 2019 21:16:40 -0800 Subject: [PATCH 0327/3715] mm, vmstat: hide /proc/pagetypeinfo from normal users commit abaed0112c1db08be15a784a2c5c8a8b3063cdd3 upstream. /proc/pagetypeinfo is a debugging tool to examine internal page allocator state wrt to fragmentation. It is not very useful for any other use so normal users really do not need to read this file. Waiman Long has noticed that reading this file can have negative side effects because zone->lock is necessary for gathering data and that a) interferes with the page allocator and its users and b) can lead to hard lockups on large machines which have very long free_list. Reduce both issues by simply not exporting the file to regular users. Link: http://lkml.kernel.org/r/20191025072610.18526-2-mhocko@kernel.org Fixes: 467c996c1e19 ("Print out statistics in relation to fragmentation avoidance to /proc/pagetypeinfo") Signed-off-by: Michal Hocko Reported-by: Waiman Long Acked-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Waiman Long Acked-by: Rafael Aquini Acked-by: David Rientjes Reviewed-by: Andrew Morton Cc: David Hildenbrand Cc: Johannes Weiner Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Jann Horn Cc: Song Liu Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 28c45c26f901..ba9168326413 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1951,7 +1951,7 @@ void __init init_mm_internals(void) #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations); - proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations); + proc_create("pagetypeinfo", 0400, NULL, &pagetypeinfo_file_operations); proc_create("vmstat", 0444, NULL, &vmstat_file_operations); proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations); #endif From e8b4d457b13ea736ec5946e7f548a67c6472b61c Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 5 Nov 2019 21:16:57 -0800 Subject: [PATCH 0328/3715] dump_stack: avoid the livelock of the dump_lock commit 5cbf2fff3bba8d3c6a4d47c1754de1cf57e2b01f upstream. In the current code, we use the atomic_cmpxchg() to serialize the output of the dump_stack(), but this implementation suffers the thundering herd problem. We have observed such kind of livelock on a Marvell cn96xx board(24 cpus) when heavily using the dump_stack() in a kprobe handler. Actually we can let the competitors to wait for the releasing of the lock before jumping to atomic_cmpxchg(). This will definitely mitigate the thundering herd problem. Thanks Linus for the suggestion. [akpm@linux-foundation.org: fix comment] Link: http://lkml.kernel.org/r/20191030031637.6025-1-haokexin@gmail.com Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()") Signed-off-by: Kevin Hao Suggested-by: Linus Torvalds Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/dump_stack.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c5edbedd364d..287ea178f0fa 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -46,7 +46,12 @@ retry: was_locked = 1; } else { local_irq_restore(flags); - cpu_relax(); + /* + * Wait for the lock to release before jumping to + * atomic_cmpxchg() in order to mitigate the thundering herd + * problem. + */ + do { cpu_relax(); } while (atomic_read(&dump_lock) != -1); goto retry; } From 16dff535dd061de9d8dd6d9e83f00b04cccc94ba Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 26 Sep 2019 19:16:41 -0600 Subject: [PATCH 0329/3715] tools: gpio: Use !building_out_of_srctree to determine srctree commit 4a6a6f5c4aeedb72db871d60bfcca89835f317aa upstream. make TARGETS=gpio kselftest fails with: Makefile:23: tools/build/Makefile.include: No such file or directory When the gpio tool make is invoked from tools Makefile, srctree is cleared and the current logic check for srctree equals to empty string to determine srctree location from CURDIR. When the build in invoked from selftests/gpio Makefile, the srctree is set to "." and the same logic used for srctree equals to empty is needed to determine srctree. Check building_out_of_srctree undefined as the condition for both cases to fix "make TARGETS=gpio kselftest" build failure. Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- tools/gpio/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 240eda014b37..f8bc8656a544 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -3,7 +3,11 @@ include ../scripts/Makefile.include bindir ?= /usr/bin -ifeq ($(srctree),) +# This will work when gpio is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is set to ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif From ce117617cbefd5803d63f4eed50b0ac277995b57 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Nov 2019 00:27:11 +0100 Subject: [PATCH 0330/3715] perf tools: Fix time sorting commit 722ddfde366fd46205456a9c5ff9b3359dc9a75e upstream. The final sort might get confused when the comparison is done over bigger numbers than int like for -s time. Check the following report for longer workloads: $ perf report -s time -F time,overhead --stdio Fix hist_entry__sort() to properly return int64_t and not possible cut int. Fixes: 043ca389a318 ("perf tools: Use hpp formats to sort final output") Signed-off-by: Jiri Olsa Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org # v3.16+ Link: http://lore.kernel.org/lkml/20191104232711.16055-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 5b8bc1fd943d..c1f9615b02f7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1504,7 +1504,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) +static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; From 53c12cc84eee81d01c6c9ae6009c2cf019c55049 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Oct 2019 10:21:28 -0400 Subject: [PATCH 0331/3715] drm/radeon: fix si_enable_smc_cac() failed issue commit 2c409ba81be25516afe05ae27a4a15da01740b01 upstream. Need to set the dte flag on this asic. Port the fix from amdgpu: 5cb818b861be114 ("drm/amd/amdgpu: fix si_enable_smc_cac() failed issue") Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 90d5b41007bf..9e5645e4cb55 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1956,6 +1956,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: From ab19c1ddaa02c8a99dffcec578ec30979e853e22 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 6 Nov 2019 11:59:46 -0800 Subject: [PATCH 0332/3715] HID: wacom: generic: Treat serial number and related fields as unsigned commit ff479731c3859609530416a18ddb3db5db019b66 upstream. The HID descriptors for most Wacom devices oddly declare the serial number and other related fields as signed integers. When these numbers are ingested by the HID subsystem, they are automatically sign-extended into 32-bit integers. We treat the fields as unsigned elsewhere in the kernel and userspace, however, so this sign-extension causes problems. In particular, the sign-extended tool ID sent to userspace as ABS_MISC does not properly match unsigned IDs used by xf86-input-wacom and libwacom. We introduce a function 'wacom_s32tou' that can undo the automatic sign extension performed by 'hid_snto32'. We call this function when processing the serial number and related fields to ensure that we are dealing with and reporting the unsigned form. We opt to use this method rather than adding a descriptor fixup in 'wacom_hid_usage_quirk' since it should be more robust in the face of future devices. Ref: https://github.com/linuxwacom/input-wacom/issues/134 Fixes: f85c9dc678 ("HID: wacom: generic: Support tool ID and additional tool types") CC: # v4.10+ Signed-off-by: Jason Gerecke Reviewed-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom.h | 15 +++++++++++++++ drivers/hid/wacom_wac.c | 10 ++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h index 3c37c3cbf6f1..9c0900c35b23 100644 --- a/drivers/hid/wacom.h +++ b/drivers/hid/wacom.h @@ -205,6 +205,21 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac, } } +/* + * Convert a signed 32-bit integer to an unsigned n-bit integer. Undoes + * the normally-helpful work of 'hid_snto32' for fields that use signed + * ranges for questionable reasons. + */ +static inline __u32 wacom_s32tou(s32 value, __u8 n) +{ + switch (n) { + case 8: return ((__u8)value); + case 16: return ((__u16)value); + case 32: return ((__u32)value); + } + return value & (1 << (n - 1)) ? value & (~(~0U << n)) : value; +} + extern const struct hid_device_id wacom_ids[]; void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 2e0c4df6ad08..1eb868403664 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2182,7 +2182,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field case HID_DG_TOOLSERIALNUMBER: if (value) { wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); - wacom_wac->serial[0] |= (__u32)value; + wacom_wac->serial[0] |= wacom_s32tou(value, field->report_size); } return; case HID_DG_TWIST: @@ -2198,15 +2198,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field return; case WACOM_HID_WD_SERIALHI: if (value) { + __u32 raw_value = wacom_s32tou(value, field->report_size); + wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF); - wacom_wac->serial[0] |= ((__u64)value) << 32; + wacom_wac->serial[0] |= ((__u64)raw_value) << 32; /* * Non-USI EMR devices may contain additional tool type * information here. See WACOM_HID_WD_TOOLTYPE case for * more details. */ if (value >> 20 == 1) { - wacom_wac->id[0] |= value & 0xFFFFF; + wacom_wac->id[0] |= raw_value & 0xFFFFF; } } return; @@ -2218,7 +2220,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field * bitwise OR so the complete value can be built * up over time :( */ - wacom_wac->id[0] |= value; + wacom_wac->id[0] |= wacom_s32tou(value, field->report_size); return; case WACOM_HID_WD_OFFSETLEFT: if (features->offset_left && value != features->offset_left) From 67f5c06adf616c22d78e4b580b3899c9a8ed8366 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Nov 2019 15:41:05 +0000 Subject: [PATCH 0333/3715] arm64: Do not mask out PTE_RDONLY in pte_same() commit 6767df245f4736d0cf0c6fb7cf9cf94b27414245 upstream. Following commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"), the PTE_RDONLY bit is no longer managed by set_pte_at() but built into the PAGE_* attribute definitions. Consequently, pte_same() must include this bit when checking two PTEs for equality. Remove the arm64-specific pte_same() function, practically reverting commit 747a70e60b72 ("arm64: Fix copy-on-write referencing in HugeTLB") Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Cc: # 4.14.x- Cc: Will Deacon Cc: Steve Capper Reported-by: John Stultz Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4cf248185e6f..aa3b8dd8fc35 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -258,23 +258,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t pte_a, pte_t pte_b) -{ - pteval_t lhs, rhs; - - lhs = pte_val(pte_a); - rhs = pte_val(pte_b); - - if (pte_present(pte_a)) - lhs &= ~PTE_RDONLY; - - if (pte_present(pte_b)) - rhs &= ~PTE_RDONLY; - - return (lhs == rhs); -} - /* * Huge pte definitions. */ From 58af68f4f16de69243dea8ee501da43dd22d6d65 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 25 Oct 2019 14:05:24 +0100 Subject: [PATCH 0334/3715] ceph: fix use-after-free in __ceph_remove_cap() commit ea60ed6fcf29eebc78f2ce91491e6309ee005a01 upstream. KASAN reports a use-after-free when running xfstest generic/531, with the following trace: [ 293.903362] kasan_report+0xe/0x20 [ 293.903365] rb_erase+0x1f/0x790 [ 293.903370] __ceph_remove_cap+0x201/0x370 [ 293.903375] __ceph_remove_caps+0x4b/0x70 [ 293.903380] ceph_evict_inode+0x4e/0x360 [ 293.903386] evict+0x169/0x290 [ 293.903390] __dentry_kill+0x16f/0x250 [ 293.903394] dput+0x1c6/0x440 [ 293.903398] __fput+0x184/0x330 [ 293.903404] task_work_run+0xb9/0xe0 [ 293.903410] exit_to_usermode_loop+0xd3/0xe0 [ 293.903413] do_syscall_64+0x1a0/0x1c0 [ 293.903417] entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens because __ceph_remove_cap() may queue a cap release (__ceph_queue_cap_release) which can be scheduled before that cap is removed from the inode list with rb_erase(&cap->ci_node, &ci->i_caps); And, when this finally happens, the use-after-free will occur. This can be fixed by removing the cap from the inode list before being removed from the session list, and thus eliminating the risk of an UAF. Cc: stable@vger.kernel.org Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index df95e39ccd45..c3a3ee74e2d8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -935,6 +935,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + /* remove from inode's cap rbtree, and clear auth cap */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -970,11 +975,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_unlock(&session->s_cap_lock); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - if (removed) ceph_put_cap(mdsc, cap); From 0e65dac6c9ec068f9cc1902535207f9c8bc57948 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 Oct 2019 13:53:29 +0000 Subject: [PATCH 0335/3715] ceph: add missing check in d_revalidate snapdir handling commit 1f08529c84cfecaf1261ed9b7e17fab18541c58f upstream. We should not play with dcache without parent locked... Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 879bc0825093..3818027c12f5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1347,6 +1347,7 @@ retry_lookup: dout(" final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && + test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { struct dentry *dn = req->r_dentry; struct inode *dir = req->r_parent; From ba18d43fd9d0e383bdea7f108e08f2eef83a104b Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 25 Oct 2019 17:04:20 +0200 Subject: [PATCH 0336/3715] iio: adc: stm32-adc: fix stopping dma commit e6afcf6c598d6f3a0c9c408bfeddb3f5730608b0 upstream. There maybe a race when using dmaengine_terminate_all(). The predisable routine may call iio_triggered_buffer_predisable() prior to a pending DMA callback. Adopt dmaengine_terminate_sync() to ensure there's no pending DMA request before calling iio_triggered_buffer_predisable(). Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index e59cbc9ad4f6..258a4712167a 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1343,7 +1343,7 @@ static int stm32_adc_dma_start(struct iio_dev *indio_dev) cookie = dmaengine_submit(desc); ret = dma_submit_error(cookie); if (ret) { - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); return ret; } @@ -1416,7 +1416,7 @@ static int stm32_adc_buffer_predisable(struct iio_dev *indio_dev) dev_err(&indio_dev->dev, "predisable failed\n"); if (adc->dma_chan) - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); if (stm32_adc_set_trig(indio_dev, NULL)) dev_err(&indio_dev->dev, "Can't clear trigger\n"); From 52c71b2283f3b695c29c0488fb2043c6d077e3ff Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 8 Oct 2019 17:15:37 +0300 Subject: [PATCH 0337/3715] iio: imu: adis16480: make sure provided frequency is positive commit 24e1eb5c0d78cfb9750b690bbe997d4d59170258 upstream. It could happen that either `val` or `val2` [provided from userspace] is negative. In that case the computed frequency could get a weird value. Fix this by checking that neither of the 2 variables is negative, and check that the computed result is not-zero. Fixes: e4f959390178 ("iio: imu: adis16480 switch sampling frequency attr to core support") Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16480.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 12898424d838..6f975538996c 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -266,8 +266,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2) struct adis16480 *st = iio_priv(indio_dev); unsigned int t; + if (val < 0 || val2 < 0) + return -EINVAL; + t = val * 1000 + val2 / 1000; - if (t <= 0) + if (t == 0) return -EINVAL; t = 2460000 / t; From d380065d7943c5b31b67cd2d5c2ee6745567c7fe Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Sun, 6 Oct 2019 16:29:56 +0200 Subject: [PATCH 0338/3715] iio: srf04: fix wrong limitation in distance measuring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 431f7667bd6889a274913162dfd19cce9d84848e upstream. The measured time value in the driver is limited to the maximum distance which can be read by the sensor. This limitation was wrong and is fixed by this patch. It also takes into account that we are supporting a variety of sensors today and that the recently added sensors have a higher maximum distance range. Changes in v2: - Added a Tested-by Suggested-by: Zbyněk Kocur Tested-by: Zbyněk Kocur Signed-off-by: Andreas Klinger Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/proximity/srf04.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/iio/proximity/srf04.c b/drivers/iio/proximity/srf04.c index e37667f933b3..8a6ab9691832 100644 --- a/drivers/iio/proximity/srf04.c +++ b/drivers/iio/proximity/srf04.c @@ -105,7 +105,7 @@ static int srf04_read(struct srf04_data *data) udelay(10); gpiod_set_value(data->gpiod_trig, 0); - /* it cannot take more than 20 ms */ + /* it should not take more than 20 ms until echo is rising */ ret = wait_for_completion_killable_timeout(&data->rising, HZ/50); if (ret < 0) { mutex_unlock(&data->lock); @@ -115,7 +115,8 @@ static int srf04_read(struct srf04_data *data) return -ETIMEDOUT; } - ret = wait_for_completion_killable_timeout(&data->falling, HZ/50); + /* it cannot take more than 50 ms until echo is falling */ + ret = wait_for_completion_killable_timeout(&data->falling, HZ/20); if (ret < 0) { mutex_unlock(&data->lock); return ret; @@ -130,19 +131,19 @@ static int srf04_read(struct srf04_data *data) dt_ns = ktime_to_ns(ktime_dt); /* - * measuring more than 3 meters is beyond the capabilities of - * the sensor + * measuring more than 6,45 meters is beyond the capabilities of + * the supported sensors * ==> filter out invalid results for not measuring echos of * another us sensor * * formula: - * distance 3 m - * time = ---------- = --------- = 9404389 ns - * speed 319 m/s + * distance 6,45 * 2 m + * time = ---------- = ------------ = 40438871 ns + * speed 319 m/s * * using a minimum speed at -20 °C of 319 m/s */ - if (dt_ns > 9404389) + if (dt_ns > 40438871) return -EIO; time_ns = dt_ns; @@ -154,20 +155,20 @@ static int srf04_read(struct srf04_data *data) * with Temp in °C * and speed in m/s * - * use 343 m/s as ultrasonic speed at 20 °C here in absence of the + * use 343,5 m/s as ultrasonic speed at 20 °C here in absence of the * temperature * * therefore: - * time 343 - * distance = ------ * ----- - * 10^6 2 + * time 343,5 time * 106 + * distance = ------ * ------- = ------------ + * 10^6 2 617176 * with time in ns * and distance in mm (one way) * - * because we limit to 3 meters the multiplication with 343 just + * because we limit to 6,45 meters the multiplication with 106 just * fits into 32 bit */ - distance_mm = time_ns * 343 / 2000000; + distance_mm = time_ns * 106 / 617176; return distance_mm; } From f8385a4e9ed83dc727aa51cdc51c23a37159c181 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 31 Oct 2019 11:06:24 +0100 Subject: [PATCH 0339/3715] netfilter: nf_tables: Align nft_expr private data to 64-bit commit 250367c59e6ba0d79d702a059712d66edacd4a1a upstream. Invoking the following commands on a 32-bit architecture with strict alignment requirements (such as an ARMv7-based Raspberry Pi) results in an alignment exception: # nft add table ip test-ip4 # nft add chain ip test-ip4 output { type filter hook output priority 0; } # nft add rule ip test-ip4 output quota 1025 bytes Alignment trap: not handling instruction e1b26f9f at [<7f4473f8>] Unhandled fault: alignment exception (0x001) at 0xb832e824 Internal error: : 1 [#1] PREEMPT SMP ARM Hardware name: BCM2835 [<7f4473fc>] (nft_quota_do_init [nft_quota]) [<7f447448>] (nft_quota_init [nft_quota]) [<7f4260d0>] (nf_tables_newrule [nf_tables]) [<7f4168dc>] (nfnetlink_rcv_batch [nfnetlink]) [<7f416bd0>] (nfnetlink_rcv [nfnetlink]) [<8078b334>] (netlink_unicast) [<8078b664>] (netlink_sendmsg) [<8071b47c>] (sock_sendmsg) [<8071bd18>] (___sys_sendmsg) [<8071ce3c>] (__sys_sendmsg) [<8071ce94>] (sys_sendmsg) The reason is that nft_quota_do_init() calls atomic64_set() on an atomic64_t which is only aligned to 32-bit, not 64-bit, because it succeeds struct nft_expr in memory which only contains a 32-bit pointer. Fix by aligning the nft_expr private data to 64-bit. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 59a4f50ffe8d..a9704c57430d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -759,7 +759,8 @@ struct nft_expr_ops { */ struct nft_expr { const struct nft_expr_ops *ops; - unsigned char data[]; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) From ef1bf406881d61512911b56c77e96a1489f136b6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 24 Aug 2019 17:49:55 +0300 Subject: [PATCH 0340/3715] netfilter: ipset: Fix an error code in ip_set_sockfn_get() commit 30b7244d79651460ff114ba8f7987ed94c86b99a upstream. The copy_to_user() function returns the number of bytes remaining to be copied. In this code, that positive return is checked at the end of the function and we return zero/success. What we should do instead is return -EFAULT. Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Dan Carpenter Signed-off-by: Jozsef Kadlecsik Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dbf17d3596a6..94d74ec61f42 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1950,8 +1950,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_version->version = IPSET_PROTOCOL; - ret = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); + if (copy_to_user(user, req_version, + sizeof(struct ip_set_req_version))) + ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { @@ -2008,7 +2009,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } /* end of switch(op) */ copy: - ret = copy_to_user(user, data, copylen); + if (copy_to_user(user, data, copylen)) + ret = -EFAULT; done: vfree(data); From 5b300c0df1c1c603c0bb136eb7d827202215631f Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:50 +0200 Subject: [PATCH 0341/3715] intel_th: pci: Add Comet Lake PCH support commit 3adbb5718dd5264666ddbc2b9b43799d292e9cb6 upstream. This adds support for Intel TH on Comet Lake PCH. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191028070651.9770-7-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 140b18d858e8..05b57425b9a9 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -183,6 +183,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Ice Lake NNPI */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), From a061cd0bc0b1b7365a57d15622f1ca2b90dd9b07 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 28 Oct 2019 09:06:51 +0200 Subject: [PATCH 0342/3715] intel_th: pci: Add Jasper Lake PCH support commit 9d55499d8da49e9261e95a490f3fda41d955f505 upstream. This adds support for Intel TH on Jasper Lake PCH. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191028070651.9770-8-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 05b57425b9a9..7486d5d67186 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -198,6 +198,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Jasper Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; From 0d9e2feded19e7fc8212acc0202befcfef96bb97 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:14 +0200 Subject: [PATCH 0343/3715] can: usb_8dev: fix use-after-free on disconnect commit 3759739426186a924675651b388d1c3963c5710e upstream. The driver was accessing its driver data after having freed it. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Cc: stable # 3.9 Cc: Bernd Krumboeck Cc: Wolfgang Grandegger Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/usb_8dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 27861c417c94..3e4416473607 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -1007,9 +1007,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_netdev(priv->netdev); - free_candev(priv->netdev); - unlink_all_urbs(priv); + free_candev(priv->netdev); } } From 774e85d16ac2fe18fba76c143aedb8dfbb212f80 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Tue, 1 Oct 2019 09:40:36 +0200 Subject: [PATCH 0344/3715] can: c_can: c_can_poll(): only read status register after status IRQ commit 3cb3eaac52c0f145d895f4b6c22834d5f02b8569 upstream. When the status register is read without the status IRQ pending, the chip may not raise the interrupt line for an upcoming status interrupt and the driver may miss a status interrupt. It is critical that the BUSOFF status interrupt is forwarded to the higher layers, since no more interrupts will follow without intervention. Thanks to Wolfgang and Joe for bringing up the first idea. Signed-off-by: Kurt Van Dijck Cc: Wolfgang Grandegger Cc: Joe Burmeister Fixes: fa39b54ccf28 ("can: c_can: Get rid of pointless interrupts") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/c_can/c_can.c | 25 ++++++++++++++++++++----- drivers/net/can/c_can/c_can.h | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 606b7d8ffe13..9b61bfbea6cd 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -97,6 +97,9 @@ #define BTR_TSEG2_SHIFT 12 #define BTR_TSEG2_MASK (0x7 << BTR_TSEG2_SHIFT) +/* interrupt register */ +#define INT_STS_PENDING 0x8000 + /* brp extension register */ #define BRP_EXT_BRPE_MASK 0x0f #define BRP_EXT_BRPE_SHIFT 0 @@ -1029,10 +1032,16 @@ static int c_can_poll(struct napi_struct *napi, int quota) u16 curr, last = priv->last_status; int work_done = 0; - priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); - /* Ack status on C_CAN. D_CAN is self clearing */ - if (priv->type != BOSCH_D_CAN) - priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + /* Only read the status register if a status interrupt was pending */ + if (atomic_xchg(&priv->sie_pending, 0)) { + priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); + /* Ack status on C_CAN. D_CAN is self clearing */ + if (priv->type != BOSCH_D_CAN) + priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + } else { + /* no change detected ... */ + curr = last; + } /* handle state changes */ if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) { @@ -1083,10 +1092,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct c_can_priv *priv = netdev_priv(dev); + int reg_int; - if (!priv->read_reg(priv, C_CAN_INT_REG)) + reg_int = priv->read_reg(priv, C_CAN_INT_REG); + if (!reg_int) return IRQ_NONE; + /* save for later use */ + if (reg_int & INT_STS_PENDING) + atomic_set(&priv->sie_pending, 1); + /* disable all interrupts and schedule the NAPI */ c_can_irq_control(priv, false); napi_schedule(&priv->napi); diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 8acdc7fa4792..d5567a7c1c6d 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -198,6 +198,7 @@ struct c_can_priv { struct net_device *dev; struct device *device; atomic_t tx_active; + atomic_t sie_pending; unsigned long tx_dir; int last_status; u16 (*read_reg) (const struct c_can_priv *priv, enum reg index); From 3ee2b01c435ba5393764e3b187d3b728745ecf94 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 8 Oct 2019 10:35:44 +0200 Subject: [PATCH 0345/3715] can: peak_usb: fix a potential out-of-sync while decoding packets commit de280f403f2996679e2607384980703710576fed upstream. When decoding a buffer received from PCAN-USB, the first timestamp read in a packet is a 16-bit coded time base, and the next ones are an 8-bit offset to this base, regardless of the type of packet read. This patch corrects a potential loss of synchronization by using a timestamp index read from the buffer, rather than an index of received data packets, to determine on the sizeof the timestamp to be read from the packet being decoded. Signed-off-by: Stephane Grosjean Fixes: 46be265d3388 ("can: usb: PEAK-System Technik PCAN-USB specific part") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 838545ce468d..e626c2afbbb1 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -108,7 +108,7 @@ struct pcan_usb_msg_context { u8 *end; u8 rec_cnt; u8 rec_idx; - u8 rec_data_idx; + u8 rec_ts_idx; struct net_device *netdev; struct pcan_usb *pdev; }; @@ -552,10 +552,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc, mc->ptr += PCAN_USB_CMD_ARGS; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { - int err = pcan_usb_decode_ts(mc, !mc->rec_idx); + int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx); if (err) return err; + + /* Next packet in the buffer will have a timestamp on a single + * byte + */ + mc->rec_ts_idx++; } switch (f) { @@ -638,10 +643,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) cf->can_dlc = get_can_dlc(rec_len); - /* first data packet timestamp is a word */ - if (pcan_usb_decode_ts(mc, !mc->rec_data_idx)) + /* Only first packet timestamp is a word */ + if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx)) goto decode_failed; + /* Next packet in the buffer will have a timestamp on a single byte */ + mc->rec_ts_idx++; + /* read data */ memset(cf->data, 0x0, sizeof(cf->data)); if (status_len & PCAN_USB_STATUSLEN_RTR) { @@ -695,7 +703,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf) /* handle normal can frames here */ } else { err = pcan_usb_decode_data(&mc, sl); - mc.rec_data_idx++; } } From f0468c0630a52ed3909321de8c057c9f8e5e8ada Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: [PATCH 0346/3715] can: rx-offload: can_rx_offload_queue_sorted(): fix error handling, avoid skb mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ca913f1ac024559ebc17f0b599af262f0ad997c9 upstream. If the rx-offload skb_queue is full can_rx_offload_queue_sorted() will not queue the skb and return with an error. None of the callers of this function, issue a kfree_skb() to free the not queued skb. This results in a memory leak. This patch fixes the problem by freeing the skb in case of a full queue. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the callers, as this function might be used in both the rx and tx path. Fixes: 55059f2b7f86 ("can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions") Cc: linux-stable Cc: Martin Hundebøll Reported-by: Martin Hundebøll Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index d227db45fec9..1a7c183e6678 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -216,8 +216,10 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, unsigned long flags; if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; From 3f5e99731cd55f9be6468e9af7985a838ce7255b Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 21:44:38 -0500 Subject: [PATCH 0347/3715] can: gs_usb: gs_can_open(): prevent memory leak commit fb5be6a7b4863ecc44963bb80ca614584b6c7817 upstream. In gs_can_open() if usb_submit_urb() fails the allocated urb should be released. Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Cc: linux-stable Signed-off-by: Navid Emamdoost Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 8bf80ad9dc44..bfbf80949600 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -631,6 +631,7 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_urb(urb); break; } From 72e535208bc328e0617a8adb242bf8d89cdfd7d4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:13 +0200 Subject: [PATCH 0348/3715] can: mcba_usb: fix use-after-free on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4d6636498c41891d0482a914dd570343a838ad79 upstream. The driver was accessing its driver data after having freed it. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: stable # 4.12 Cc: Remigiusz Kołłątaj Reported-by: syzbot+e29b17e5042bbc56fae9@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/mcba_usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index e0c24abce16c..070e1ba79736 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -887,9 +887,8 @@ static void mcba_usb_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_candev(priv->netdev); - free_candev(priv->netdev); - mcba_urb_unlink(priv); + free_candev(priv->netdev); } static struct usb_driver mcba_usb_driver = { From 89a23a5d03956b44eac1a778806449577d690c6c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Oct 2019 10:27:05 +0200 Subject: [PATCH 0349/3715] can: peak_usb: fix slab info leak commit f7a1337f0d29b98733c8824e165fca3371d7d4fd upstream. Fix a small slab info leak due to a failure to clear the command buffer at allocation. The first 16 bytes of the command buffer are always sent to the device in pcan_usb_send_cmd() even though only the first two may have been initialised in case no argument payload is provided (e.g. when waiting for a response). Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Cc: stable # 3.4 Reported-by: syzbot+863724e7128e14b26732@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 059282a6065c..85d92f129af2 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -776,7 +776,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev = netdev_priv(netdev); /* allocate a buffer large enough to send commands */ - dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); + dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; goto lbl_free_candev; From 2661af799789d9c810e54124585ffc76705b0857 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 7 Oct 2017 16:02:21 +0200 Subject: [PATCH 0350/3715] configfs: Fix bool initialization/comparison commit 3f6928c347707a65cee10a9f54b85ad5fb078b3f upstream. Bool initializations should use true and false. Bool tests don't need comparisons. Signed-off-by: Thomas Meyer Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 39da1103d341..62580dba3552 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -166,7 +166,7 @@ configfs_read_bin_file(struct file *file, char __user *buf, retval = -ETXTBSY; goto out; } - buffer->read_in_progress = 1; + buffer->read_in_progress = true; if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ @@ -325,7 +325,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf, len = -ETXTBSY; goto out; } - buffer->write_in_progress = 1; + buffer->write_in_progress = true; /* buffer grows? */ if (*ppos + count > buffer->bin_buffer_size) { @@ -429,8 +429,8 @@ static int check_perm(struct inode * inode, struct file * file, int type) } mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; - buffer->read_in_progress = 0; - buffer->write_in_progress = 0; + buffer->read_in_progress = false; + buffer->write_in_progress = false; buffer->ops = ops; file->private_data = buffer; goto Done; @@ -488,10 +488,10 @@ static int configfs_release_bin_file(struct inode *inode, struct file *filp) ssize_t len = 0; int ret; - buffer->read_in_progress = 0; + buffer->read_in_progress = false; if (buffer->write_in_progress) { - buffer->write_in_progress = 0; + buffer->write_in_progress = false; len = bin_attr->write(item, buffer->bin_buffer, buffer->bin_buffer_size); From 3e651dd60272220ebfab745c929950feb2a90e49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Aug 2019 11:30:03 -0400 Subject: [PATCH 0351/3715] configfs: stash the data we need into configfs_buffer at open time commit ff4dd081977da56566a848f071aed8fa92d604a1 upstream. simplifies the ->read()/->write()/->release() instances nicely Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/file.c | 227 +++++++++++++++++++-------------------------- 1 file changed, 94 insertions(+), 133 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 62580dba3552..c05ffda74a91 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -53,24 +53,18 @@ struct configfs_buffer { bool write_in_progress; char *bin_buffer; int bin_buffer_size; + int cb_max_size; + struct config_item *item; + struct module *owner; + union { + struct configfs_attribute *attr; + struct configfs_bin_attribute *bin_attr; + }; }; -/** - * fill_read_buffer - allocate and fill buffer from item. - * @dentry: dentry pointer. - * @buffer: data buffer for file. - * - * Allocate @buffer->page, if it hasn't been already, then call the - * config_item's show() method to fill the buffer with this attribute's - * data. - * This is called only once, on the file's first read. - */ -static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer) +static int fill_read_buffer(struct configfs_buffer * buffer) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); - int ret = 0; ssize_t count; if (!buffer->page) @@ -78,15 +72,15 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf if (!buffer->page) return -ENOMEM; - count = attr->show(item, buffer->page); + count = buffer->attr->show(buffer->item, buffer->page); + if (count < 0) + return count; + if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE)) + return -EIO; - BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); - if (count >= 0) { - buffer->needs_read_fill = 0; - buffer->count = count; - } else - ret = count; - return ret; + buffer->needs_read_fill = 0; + buffer->count = count; + return 0; } /** @@ -111,12 +105,13 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf static ssize_t configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t retval = 0; mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { - if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + retval = fill_read_buffer(buffer); + if (retval) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", @@ -153,9 +148,6 @@ configfs_read_bin_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); ssize_t retval = 0; ssize_t len = min_t(size_t, count, PAGE_SIZE); @@ -170,14 +162,14 @@ configfs_read_bin_file(struct file *file, char __user *buf, if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ - len = bin_attr->read(item, NULL, 0); + len = buffer->bin_attr->read(buffer->item, NULL, 0); if (len <= 0) { retval = len; goto out; } /* do not exceed the maximum value */ - if (bin_attr->cb_max_size && len > bin_attr->cb_max_size) { + if (buffer->cb_max_size && len > buffer->cb_max_size) { retval = -EFBIG; goto out; } @@ -190,7 +182,8 @@ configfs_read_bin_file(struct file *file, char __user *buf, buffer->bin_buffer_size = len; /* perform second read to fill buffer */ - len = bin_attr->read(item, buffer->bin_buffer, len); + len = buffer->bin_attr->read(buffer->item, + buffer->bin_buffer, len); if (len < 0) { retval = len; vfree(buffer->bin_buffer); @@ -240,25 +233,10 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size return error ? -EFAULT : count; } - -/** - * flush_write_buffer - push buffer to config_item. - * @dentry: dentry to the attribute - * @buffer: data buffer for file. - * @count: number of bytes - * - * Get the correct pointers for the config_item and the attribute we're - * dealing with, then call the store() method for the attribute, - * passing the buffer that we acquired in fill_write_buffer(). - */ - static int -flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count) +flush_write_buffer(struct configfs_buffer *buffer, size_t count) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); - - return attr->store(item, buffer->page, count); + return buffer->attr->store(buffer->item, buffer->page, count); } @@ -282,13 +260,13 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size static ssize_t configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t len; mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, len); + len = flush_write_buffer(buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); @@ -313,8 +291,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); void *tbuf = NULL; ssize_t len; @@ -330,8 +306,8 @@ configfs_write_bin_file(struct file *file, const char __user *buf, /* buffer grows? */ if (*ppos + count > buffer->bin_buffer_size) { - if (bin_attr->cb_max_size && - *ppos + count > bin_attr->cb_max_size) { + if (buffer->cb_max_size && + *ppos + count > buffer->cb_max_size) { len = -EFBIG; goto out; } @@ -363,31 +339,45 @@ out: return len; } -static int check_perm(struct inode * inode, struct file * file, int type) +static int __configfs_open_file(struct inode *inode, struct file *file, int type) { - struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(file->f_path.dentry); - struct configfs_bin_attribute *bin_attr = NULL; - struct configfs_buffer * buffer; - struct configfs_item_operations * ops = NULL; - int error = 0; + struct dentry *dentry = file->f_path.dentry; + struct configfs_attribute *attr; + struct configfs_buffer *buffer; + int error; - if (!item || !attr) - goto Einval; + error = -ENOMEM; + buffer = kzalloc(sizeof(struct configfs_buffer), GFP_KERNEL); + if (!buffer) + goto out; - if (type & CONFIGFS_ITEM_BIN_ATTR) - bin_attr = to_bin_attr(file->f_path.dentry); + error = -EINVAL; + buffer->item = configfs_get_config_item(dentry->d_parent); + if (!buffer->item) + goto out_free_buffer; - /* Grab the module reference for this attribute if we have one */ - if (!try_module_get(attr->ca_owner)) { - error = -ENODEV; - goto Done; + attr = to_attr(dentry); + if (!attr) + goto out_put_item; + + if (type & CONFIGFS_ITEM_BIN_ATTR) { + buffer->bin_attr = to_bin_attr(dentry); + buffer->cb_max_size = buffer->bin_attr->cb_max_size; + } else { + buffer->attr = attr; } - if (item->ci_type) - ops = item->ci_type->ct_item_ops; - else - goto Eaccess; + buffer->owner = attr->ca_owner; + /* Grab the module reference for this attribute if we have one */ + error = -ENODEV; + if (!try_module_get(buffer->owner)) + goto out_put_item; + + error = -EACCES; + if (!buffer->item->ci_type) + goto out_put_module; + + buffer->ops = buffer->item->ci_type->ct_item_ops; /* File needs write support. * The inode's perms must say it's ok, @@ -395,13 +385,11 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_WRITE) { if (!(inode->i_mode & S_IWUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->store) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->write) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->write) + goto out_put_module; } /* File needs read support. @@ -410,90 +398,65 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->show) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->read) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->read) + goto out_put_module; } - /* No error? Great, allocate a buffer for the file, and store it - * it in file->private_data for easy access. - */ - buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL); - if (!buffer) { - error = -ENOMEM; - goto Enomem; - } mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; buffer->read_in_progress = false; buffer->write_in_progress = false; - buffer->ops = ops; file->private_data = buffer; - goto Done; + return 0; - Einval: - error = -EINVAL; - goto Done; - Eaccess: - error = -EACCES; - Enomem: - module_put(attr->ca_owner); - Done: - if (error && item) - config_item_put(item); +out_put_module: + module_put(buffer->owner); +out_put_item: + config_item_put(buffer->item); +out_free_buffer: + kfree(buffer); +out: return error; } static int configfs_release(struct inode *inode, struct file *filp) { - struct config_item * item = to_item(filp->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(filp->f_path.dentry); - struct module * owner = attr->ca_owner; - struct configfs_buffer * buffer = filp->private_data; + struct configfs_buffer *buffer = filp->private_data; - if (item) - config_item_put(item); - /* After this point, attr should not be accessed. */ - module_put(owner); - - if (buffer) { - if (buffer->page) - free_page((unsigned long)buffer->page); - mutex_destroy(&buffer->mutex); - kfree(buffer); - } + if (buffer->item) + config_item_put(buffer->item); + module_put(buffer->owner); + if (buffer->page) + free_page((unsigned long)buffer->page); + mutex_destroy(&buffer->mutex); + kfree(buffer); return 0; } static int configfs_open_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_ATTR); } static int configfs_open_bin_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_BIN_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_BIN_ATTR); } -static int configfs_release_bin_file(struct inode *inode, struct file *filp) +static int configfs_release_bin_file(struct inode *inode, struct file *file) { - struct configfs_buffer *buffer = filp->private_data; - struct dentry *dentry = filp->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); - ssize_t len = 0; - int ret; + struct configfs_buffer *buffer = file->private_data; buffer->read_in_progress = false; if (buffer->write_in_progress) { buffer->write_in_progress = false; - len = bin_attr->write(item, buffer->bin_buffer, + /* result of ->release() is ignored */ + buffer->bin_attr->write(buffer->item, buffer->bin_buffer, buffer->bin_buffer_size); /* vfree on NULL is safe */ @@ -503,10 +466,8 @@ static int configfs_release_bin_file(struct inode *inode, struct file *filp) buffer->needs_read_fill = 1; } - ret = configfs_release(inode, filp); - if (len < 0) - return len; - return ret; + configfs_release(inode, file); + return 0; } From 5f7f9c7cde01202de82814e77b68c819f61a46c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Aug 2019 23:13:30 -0400 Subject: [PATCH 0352/3715] configfs_register_group() shouldn't be (and isn't) called in rmdirable parts commit f19e4ed1e1edbfa3c9ccb9fed17759b7d6db24c6 upstream. revert cc57c07343bd "configfs: fix registered group removal" It was an attempt to handle something that fundamentally doesn't work - configfs_register_group() should never be done in a part of tree that can be rmdir'ed. And in mainline it never had been, so let's not borrow trouble; the fix was racy anyway, it would take a lot more to make that work and desired semantics is not clear. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/dir.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a1985a9ad2d6..64fdb12e6ad6 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1782,16 +1782,6 @@ void configfs_unregister_group(struct config_group *group) struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; - mutex_lock(&subsys->su_mutex); - if (!group->cg_item.ci_parent->ci_group) { - /* - * The parent has already been unlinked and detached - * due to a rmdir. - */ - goto unlink_group; - } - mutex_unlock(&subsys->su_mutex); - inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); @@ -1806,7 +1796,6 @@ void configfs_unregister_group(struct config_group *group) dput(dentry); mutex_lock(&subsys->su_mutex); -unlink_group: unlink_group(group); mutex_unlock(&subsys->su_mutex); } From 2f41c26ed4f617f768e6d9b82306dbe5bb667d95 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Aug 2019 19:56:13 -0400 Subject: [PATCH 0353/3715] configfs: new object reprsenting tree fragments commit 47320fbe11a6059ae502c9c16b668022fdb4cf76 upstream. Refcounted, hangs of configfs_dirent, created by operations that add fragments to configfs tree (mkdir and configfs_register_{subsystem,group}). Will be used in the next commit to provide exclusion between fragment removal and ->show/->store calls. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/configfs_internal.h | 15 ++++- fs/configfs/dir.c | 105 +++++++++++++++++++++++++------- fs/configfs/file.c | 4 +- 3 files changed, 97 insertions(+), 27 deletions(-) diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index ccc31fa6f1a7..16eb59adf5aa 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -34,6 +34,15 @@ #include #include +struct configfs_fragment { + atomic_t frag_count; + struct rw_semaphore frag_sem; + bool frag_dead; +}; + +void put_fragment(struct configfs_fragment *); +struct configfs_fragment *get_fragment(struct configfs_fragment *); + struct configfs_dirent { atomic_t s_count; int s_dependent_count; @@ -48,6 +57,7 @@ struct configfs_dirent { #ifdef CONFIG_LOCKDEP int s_depth; #endif + struct configfs_fragment *s_frag; }; #define CONFIGFS_ROOT 0x0001 @@ -75,8 +85,8 @@ extern int configfs_create(struct dentry *, umode_t mode, void (*init)(struct in extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_create_bin_file(struct config_item *, const struct configfs_bin_attribute *); -extern int configfs_make_dirent(struct configfs_dirent *, - struct dentry *, void *, umode_t, int); +extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, + void *, umode_t, int, struct configfs_fragment *); extern int configfs_dirent_is_ready(struct configfs_dirent *); extern void configfs_hash_and_remove(struct dentry * dir, const char * name); @@ -151,6 +161,7 @@ static inline void release_configfs_dirent(struct configfs_dirent * sd) { if (!(sd->s_type & CONFIGFS_ROOT)) { kfree(sd->s_iattr); + put_fragment(sd->s_frag); kmem_cache_free(configfs_dir_cachep, sd); } } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 64fdb12e6ad6..1362208a8618 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -164,11 +164,38 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) #endif /* CONFIG_LOCKDEP */ +static struct configfs_fragment *new_fragment(void) +{ + struct configfs_fragment *p; + + p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL); + if (p) { + atomic_set(&p->frag_count, 1); + init_rwsem(&p->frag_sem); + p->frag_dead = false; + } + return p; +} + +void put_fragment(struct configfs_fragment *frag) +{ + if (frag && atomic_dec_and_test(&frag->frag_count)) + kfree(frag); +} + +struct configfs_fragment *get_fragment(struct configfs_fragment *frag) +{ + if (likely(frag)) + atomic_inc(&frag->frag_count); + return frag; +} + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, - void *element, int type) + void *element, int type, + struct configfs_fragment *frag) { struct configfs_dirent * sd; @@ -188,6 +215,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *paren kmem_cache_free(configfs_dir_cachep, sd); return ERR_PTR(-ENOENT); } + sd->s_frag = get_fragment(frag); list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); @@ -222,11 +250,11 @@ static int configfs_dirent_exists(struct configfs_dirent *parent_sd, int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, - umode_t mode, int type) + umode_t mode, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element, type); + sd = configfs_new_dirent(parent_sd, element, type, frag); if (IS_ERR(sd)) return PTR_ERR(sd); @@ -273,7 +301,8 @@ static void init_symlink(struct inode * inode) * until it is validated by configfs_dir_set_ready() */ -static int configfs_create_dir(struct config_item *item, struct dentry *dentry) +static int configfs_create_dir(struct config_item *item, struct dentry *dentry, + struct configfs_fragment *frag) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; @@ -286,7 +315,8 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry) return error; error = configfs_make_dirent(p->d_fsdata, dentry, item, mode, - CONFIGFS_DIR | CONFIGFS_USET_CREATING); + CONFIGFS_DIR | CONFIGFS_USET_CREATING, + frag); if (unlikely(error)) return error; @@ -351,9 +381,10 @@ int configfs_create_link(struct configfs_symlink *sl, { int err = 0; umode_t mode = S_IFLNK | S_IRWXUGO; + struct configfs_dirent *p = parent->d_fsdata; - err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode, - CONFIGFS_ITEM_LINK); + err = configfs_make_dirent(p, dentry, sl, mode, + CONFIGFS_ITEM_LINK, p->s_frag); if (!err) { err = configfs_create(dentry, mode, init_symlink); if (err) { @@ -612,7 +643,8 @@ static int populate_attrs(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry); + struct dentry *dentry, + struct configfs_fragment *frag); static void configfs_detach_group(struct config_item *item); static void detach_groups(struct config_group *group) @@ -660,7 +692,8 @@ static void detach_groups(struct config_group *group) * try using vfs_mkdir. Just a thought. */ static int create_default_group(struct config_group *parent_group, - struct config_group *group) + struct config_group *group, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; @@ -676,7 +709,7 @@ static int create_default_group(struct config_group *parent_group, d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, - &group->cg_item, child); + &group->cg_item, child, frag); if (!ret) { sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; @@ -690,13 +723,14 @@ static int create_default_group(struct config_group *parent_group, return ret; } -static int populate_groups(struct config_group *group) +static int populate_groups(struct config_group *group, + struct configfs_fragment *frag) { struct config_group *new_group; int ret = 0; list_for_each_entry(new_group, &group->default_groups, group_entry) { - ret = create_default_group(group, new_group); + ret = create_default_group(group, new_group, frag); if (ret) { detach_groups(group); break; @@ -810,11 +844,12 @@ static void link_group(struct config_group *parent_group, struct config_group *g */ static int configfs_attach_item(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; - ret = configfs_create_dir(item, dentry); + ret = configfs_create_dir(item, dentry, frag); if (!ret) { ret = populate_attrs(item); if (ret) { @@ -844,12 +879,13 @@ static void configfs_detach_item(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); if (!ret) { sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; @@ -865,7 +901,7 @@ static int configfs_attach_group(struct config_item *parent_item, */ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); configfs_adjust_dir_dirent_depth_before_populate(sd); - ret = populate_groups(to_config_group(item)); + ret = populate_groups(to_config_group(item), frag); if (ret) { configfs_detach_item(item); d_inode(dentry)->i_flags |= S_DEAD; @@ -1260,6 +1296,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct configfs_dirent *sd; struct config_item_type *type; struct module *subsys_owner = NULL, *new_item_owner = NULL; + struct configfs_fragment *frag; char *name; sd = dentry->d_parent->d_fsdata; @@ -1278,6 +1315,12 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode goto out; } + frag = new_fragment(); + if (!frag) { + ret = -ENOMEM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; @@ -1380,9 +1423,9 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode spin_unlock(&configfs_dirent_lock); if (group) - ret = configfs_attach_group(parent_item, item, dentry); + ret = configfs_attach_group(parent_item, item, dentry, frag); else - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; @@ -1419,6 +1462,7 @@ out_put: * reference. */ config_item_put(parent_item); + put_fragment(frag); out: return ret; @@ -1587,7 +1631,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL, 0); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else @@ -1743,8 +1787,13 @@ int configfs_register_group(struct config_group *parent_group, { struct configfs_subsystem *subsys = parent_group->cg_subsys; struct dentry *parent; + struct configfs_fragment *frag; int ret; + frag = new_fragment(); + if (!frag) + return -ENOMEM; + mutex_lock(&subsys->su_mutex); link_group(parent_group, group); mutex_unlock(&subsys->su_mutex); @@ -1752,7 +1801,7 @@ int configfs_register_group(struct config_group *parent_group, parent = parent_group->cg_item.ci_dentry; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - ret = create_default_group(parent_group, group); + ret = create_default_group(parent_group, group, frag); if (ret) goto err_out; @@ -1760,12 +1809,14 @@ int configfs_register_group(struct config_group *parent_group, configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(parent)); + put_fragment(frag); return 0; err_out: inode_unlock(d_inode(parent)); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); + put_fragment(frag); return ret; } EXPORT_SYMBOL(configfs_register_group); @@ -1852,10 +1903,17 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; + struct configfs_fragment *frag; + + frag = new_fragment(); + if (!frag) + return -ENOMEM; root = configfs_pin_fs(); - if (IS_ERR(root)) + if (IS_ERR(root)) { + put_fragment(frag); return PTR_ERR(root); + } if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; @@ -1871,7 +1929,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, - dentry); + dentry, frag); if (err) { BUG_ON(d_inode(dentry)); d_drop(dentry); @@ -1889,6 +1947,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) unlink_group(group); configfs_release_fs(); } + put_fragment(frag); return err; } diff --git a/fs/configfs/file.c b/fs/configfs/file.c index c05ffda74a91..a2b7944db12e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -502,7 +502,7 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, - CONFIGFS_ITEM_ATTR); + CONFIGFS_ITEM_ATTR, parent_sd->s_frag); inode_unlock(d_inode(dir)); return error; @@ -524,7 +524,7 @@ int configfs_create_bin_file(struct config_item *item, inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode, - CONFIGFS_ITEM_BIN_ATTR); + CONFIGFS_ITEM_BIN_ATTR, parent_sd->s_frag); inode_unlock(dir->d_inode); return error; From 09e21253d17f53bdb5aac0e0dbd057a29fcbe8d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 31 Aug 2019 09:43:43 +0200 Subject: [PATCH 0354/3715] configfs: provide exclusion between IO and removals commit b0841eefd9693827afb9888235e26ddd098f9cef upstream. Make sure that attribute methods are not called after the item has been removed from the tree. To do so, we * at the point of no return in removals, grab ->frag_sem exclusive and mark the fragment dead. * call the methods of attributes with ->frag_sem taken shared and only after having verified that the fragment is still alive. The main benefit is for method instances - they are guaranteed that the objects they are accessing *and* all ancestors are still there. Another win is that we don't need to bother with extra refcount on config_item when opening a file - the item will be alive for as long as it stays in the tree, and we won't touch it/attributes/any associated data after it's been removed from the tree. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/dir.c | 23 ++++++++++++++ fs/configfs/file.c | 77 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 81 insertions(+), 19 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 1362208a8618..c2ef617d2f97 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1474,6 +1474,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; + struct configfs_fragment *frag; struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; @@ -1531,6 +1532,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } } while (ret == -EAGAIN); + frag = sd->s_frag; + if (down_write_killable(&frag->frag_sem)) { + spin_lock(&configfs_dirent_lock); + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + return -EINTR; + } + frag->frag_dead = true; + up_write(&frag->frag_sem); + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1832,6 +1843,12 @@ void configfs_unregister_group(struct config_group *group) struct configfs_subsystem *subsys = group->cg_subsys; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; + + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); @@ -1957,12 +1974,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *root = dentry->d_sb->s_root; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; if (dentry->d_parent != root) { pr_err("Tried to unregister non-subsystem!\n"); return; } + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); + inode_lock_nested(d_inode(root), I_MUTEX_PARENT); inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index a2b7944db12e..bb0a427517e9 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -62,22 +62,32 @@ struct configfs_buffer { }; }; - -static int fill_read_buffer(struct configfs_buffer * buffer) +static inline struct configfs_fragment *to_frag(struct file *file) { - ssize_t count; + struct configfs_dirent *sd = file->f_path.dentry->d_fsdata; + + return sd->s_frag; +} + +static int fill_read_buffer(struct file *file, struct configfs_buffer *buffer) +{ + struct configfs_fragment *frag = to_frag(file); + ssize_t count = -ENOENT; if (!buffer->page) buffer->page = (char *) get_zeroed_page(GFP_KERNEL); if (!buffer->page) return -ENOMEM; - count = buffer->attr->show(buffer->item, buffer->page); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + count = buffer->attr->show(buffer->item, buffer->page); + up_read(&frag->frag_sem); + if (count < 0) return count; if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE)) return -EIO; - buffer->needs_read_fill = 0; buffer->count = count; return 0; @@ -110,7 +120,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { - retval = fill_read_buffer(buffer); + retval = fill_read_buffer(file, buffer); if (retval) goto out; } @@ -147,6 +157,7 @@ static ssize_t configfs_read_bin_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct configfs_fragment *frag = to_frag(file); struct configfs_buffer *buffer = file->private_data; ssize_t retval = 0; ssize_t len = min_t(size_t, count, PAGE_SIZE); @@ -162,7 +173,12 @@ configfs_read_bin_file(struct file *file, char __user *buf, if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ - len = buffer->bin_attr->read(buffer->item, NULL, 0); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, NULL, 0); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len <= 0) { retval = len; goto out; @@ -182,8 +198,13 @@ configfs_read_bin_file(struct file *file, char __user *buf, buffer->bin_buffer_size = len; /* perform second read to fill buffer */ - len = buffer->bin_attr->read(buffer->item, - buffer->bin_buffer, len); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, + buffer->bin_buffer, len); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len < 0) { retval = len; vfree(buffer->bin_buffer); @@ -234,9 +255,16 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size } static int -flush_write_buffer(struct configfs_buffer *buffer, size_t count) +flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t count) { - return buffer->attr->store(buffer->item, buffer->page, count); + struct configfs_fragment *frag = to_frag(file); + int res = -ENOENT; + + down_read(&frag->frag_sem); + if (!frag->frag_dead) + res = buffer->attr->store(buffer->item, buffer->page, count); + up_read(&frag->frag_sem); + return res; } @@ -266,7 +294,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(buffer, len); + len = flush_write_buffer(file, buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); @@ -342,6 +370,7 @@ out: static int __configfs_open_file(struct inode *inode, struct file *file, int type) { struct dentry *dentry = file->f_path.dentry; + struct configfs_fragment *frag = to_frag(file); struct configfs_attribute *attr; struct configfs_buffer *buffer; int error; @@ -351,8 +380,13 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type if (!buffer) goto out; + error = -ENOENT; + down_read(&frag->frag_sem); + if (unlikely(frag->frag_dead)) + goto out_free_buffer; + error = -EINVAL; - buffer->item = configfs_get_config_item(dentry->d_parent); + buffer->item = to_item(dentry->d_parent); if (!buffer->item) goto out_free_buffer; @@ -410,6 +444,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type buffer->read_in_progress = false; buffer->write_in_progress = false; file->private_data = buffer; + up_read(&frag->frag_sem); return 0; out_put_module: @@ -417,6 +452,7 @@ out_put_module: out_put_item: config_item_put(buffer->item); out_free_buffer: + up_read(&frag->frag_sem); kfree(buffer); out: return error; @@ -426,8 +462,6 @@ static int configfs_release(struct inode *inode, struct file *filp) { struct configfs_buffer *buffer = filp->private_data; - if (buffer->item) - config_item_put(buffer->item); module_put(buffer->owner); if (buffer->page) free_page((unsigned long)buffer->page); @@ -453,12 +487,17 @@ static int configfs_release_bin_file(struct inode *inode, struct file *file) buffer->read_in_progress = false; if (buffer->write_in_progress) { + struct configfs_fragment *frag = to_frag(file); buffer->write_in_progress = false; - /* result of ->release() is ignored */ - buffer->bin_attr->write(buffer->item, buffer->bin_buffer, - buffer->bin_buffer_size); - + down_read(&frag->frag_sem); + if (!frag->frag_dead) { + /* result of ->release() is ignored */ + buffer->bin_attr->write(buffer->item, + buffer->bin_buffer, + buffer->bin_buffer_size); + } + up_read(&frag->frag_sem); /* vfree on NULL is safe */ vfree(buffer->bin_buffer); buffer->bin_buffer = NULL; From e49b85dd3c7f2bb13f5bd8dd4869e30a82acbba7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Aug 2019 11:51:18 -0400 Subject: [PATCH 0355/3715] configfs: fix a deadlock in configfs_symlink() commit 351e5d869e5ac10cb40c78b5f2d7dfc816ad4587 upstream. Configfs abuses symlink(2). Unlike the normal filesystems, it wants the target resolved at symlink(2) time, like link(2) would've done. The problem is that ->symlink() is called with the parent directory locked exclusive, so resolving the target inside the ->symlink() is easily deadlocked. Short of really ugly games in sys_symlink() itself, all we can do is to unlock the parent before resolving the target and relock it after. However, that invalidates the checks done by the caller of ->symlink(), so we have to * check that dentry is still where it used to be (it couldn't have been moved, but it could've been unhashed) * recheck that it's still negative (somebody else might've successfully created a symlink with the same name while we were looking the target up) * recheck the permissions on the parent directory. Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/configfs/symlink.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 9993cdb81e7d..147a6b779ab9 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -157,11 +157,42 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna !type->ct_item_ops->allow_link) goto out_put; + /* + * This is really sick. What they wanted was a hybrid of + * link(2) and symlink(2) - they wanted the target resolved + * at syscall time (as link(2) would've done), be a directory + * (which link(2) would've refused to do) *AND* be a deep + * fucking magic, making the target busy from rmdir POV. + * symlink(2) is nothing of that sort, and the locking it + * gets matches the normal symlink(2) semantics. Without + * attempts to resolve the target (which might very well + * not even exist yet) done prior to locking the parent + * directory. This perversion, OTOH, needs to resolve + * the target, which would lead to obvious deadlocks if + * attempted with any directories locked. + * + * Unfortunately, that garbage is userland ABI and we should've + * said "no" back in 2005. Too late now, so we get to + * play very ugly games with locking. + * + * Try *ANYTHING* of that sort in new code, and you will + * really regret it. Just ask yourself - what could a BOFH + * do to me and do I want to find it out first-hand? + * + * AV, a thoroughly annoyed bastard. + */ + inode_unlock(dir); ret = get_target(symname, &path, &target_item, dentry->d_sb); + inode_lock(dir); if (ret) goto out_put; - ret = type->ct_item_ops->allow_link(parent_item, target_item); + if (dentry->d_inode || d_unhashed(dentry)) + ret = -EEXIST; + else + ret = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (!ret) + ret = type->ct_item_ops->allow_link(parent_item, target_item); if (!ret) { mutex_lock(&configfs_symlink_mutex); ret = create_link(parent_item, target_item, dentry); From c813b4467a2291d14dd9c90f9542ad55eb1ec315 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 5 Sep 2019 10:17:45 -0600 Subject: [PATCH 0356/3715] usb: dwc3: Allow disabling of metastability workaround commit 42bf02ec6e420e541af9a47437d0bdf961ca2972 upstream Some platforms (e.g. TI's DRA7 USB2 instance) have more trouble with the metastability workaround as it supports only a High-Speed PHY and the PHY can enter into an Erratic state [1] when the controller is set in SuperSpeed mode as part of the metastability workaround. This causes upto 2 seconds delay in enumeration on DRA7's USB2 instance in gadget mode. If these platforms can be better off without the workaround, provide a device tree property to suggest that so the workaround is avoided. [1] Device mode enumeration trace showing PHY Erratic Error. irq/90-dwc3-969 [000] d... 52.323145: dwc3_event: event (00000901): Erratic Error [U0] irq/90-dwc3-969 [000] d... 52.560646: dwc3_event: event (00000901): Erratic Error [U0] irq/90-dwc3-969 [000] d... 52.798144: dwc3_event: event (00000901): Erratic Error [U0] Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/dwc3.txt | 2 ++ drivers/usb/dwc3/core.c | 3 +++ drivers/usb/dwc3/core.h | 3 +++ drivers/usb/dwc3/gadget.c | 6 ++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt index 52fb41046b34..44e8bab159ad 100644 --- a/Documentation/devicetree/bindings/usb/dwc3.txt +++ b/Documentation/devicetree/bindings/usb/dwc3.txt @@ -47,6 +47,8 @@ Optional properties: from P0 to P1/P2/P3 without delay. - snps,dis-tx-ipgap-linecheck-quirk: when set, disable u2mac linestate check during HS transmit. + - snps,dis_metastability_quirk: when set, disable metastability workaround. + CAUTION: use only if you are absolutely sure of it. - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal utmi_l1_suspend_n, false when asserts utmi_sleep_n - snps,hird-threshold: HIRD threshold diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 945330ea8d5c..9b093978bd24 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1115,6 +1115,9 @@ static void dwc3_get_properties(struct dwc3 *dwc) device_property_read_u32(dev, "snps,quirk-frame-length-adjustment", &dwc->fladj); + dwc->dis_metastability_quirk = device_property_read_bool(dev, + "snps,dis_metastability_quirk"); + dwc->lpm_nyet_threshold = lpm_nyet_threshold; dwc->tx_de_emphasis = tx_de_emphasis; diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index abd1142c9e4d..40bf0e0768d9 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -869,6 +869,7 @@ struct dwc3_scratchpad_array { * 1 - -3.5dB de-emphasis * 2 - No de-emphasis * 3 - Reserved + * @dis_metastability_quirk: set to disable metastability quirk. * @imod_interval: set the interrupt moderation interval in 250ns * increments or 0 to disable. */ @@ -1025,6 +1026,8 @@ struct dwc3 { unsigned tx_de_emphasis_quirk:1; unsigned tx_de_emphasis:2; + unsigned dis_metastability_quirk:1; + u16 imod_interval; }; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1b99d44e52b9..5916340c4162 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2034,7 +2034,8 @@ static void dwc3_gadget_set_speed(struct usb_gadget *g, * STAR#9000525659: Clock Domain Crossing on DCTL in * USB 2.0 Mode */ - if (dwc->revision < DWC3_REVISION_220A) { + if (dwc->revision < DWC3_REVISION_220A && + !dwc->dis_metastability_quirk) { reg |= DWC3_DCFG_SUPERSPEED; } else { switch (speed) { @@ -3265,7 +3266,8 @@ int dwc3_gadget_init(struct dwc3 *dwc) * is less than super speed because we don't have means, yet, to tell * composite.c that we are USB 2.0 + LPM ECN. */ - if (dwc->revision < DWC3_REVISION_220A) + if (dwc->revision < DWC3_REVISION_220A && + !dwc->dis_metastability_quirk) dev_info(dwc->dev, "changing max_speed on rev %08x\n", dwc->revision); From 1863e7fef65ca61bf10f8fccebc2f123bd5f10f9 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 5 Sep 2019 10:17:46 -0600 Subject: [PATCH 0357/3715] mfd: palmas: Assign the right powerhold mask for tps65917 commit 572ff4d560be3784205b224cd67d6715620092d7 upstream The powerhold mask for TPS65917 is different when comapred to the other palmas versions. Hence assign the right mask that enables power off of tps65917 pmic correctly. Signed-off-by: Keerthy Signed-off-by: Lee Jones Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/palmas.c | 10 +++++++++- include/linux/mfd/palmas.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index 3922a93f9f92..663a2398b6b1 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -430,6 +430,7 @@ static void palmas_power_off(void) { unsigned int addr; int ret, slave; + u8 powerhold_mask; struct device_node *np = palmas_dev->dev->of_node; if (of_property_read_bool(np, "ti,palmas-override-powerhold")) { @@ -437,8 +438,15 @@ static void palmas_power_off(void) PALMAS_PRIMARY_SECONDARY_PAD2); slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE); + if (of_device_is_compatible(np, "ti,tps65917")) + powerhold_mask = + TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK; + else + powerhold_mask = + PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK; + ret = regmap_update_bits(palmas_dev->regmap[slave], addr, - PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK, 0); + powerhold_mask, 0); if (ret) dev_err(palmas_dev->dev, "Unable to write PRIMARY_SECONDARY_PAD2 %d\n", diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index 6dec43826303..cffb23b8bd70 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -3733,6 +3733,9 @@ enum usb_irq_events { #define TPS65917_REGEN3_CTRL_MODE_ACTIVE 0x01 #define TPS65917_REGEN3_CTRL_MODE_ACTIVE_SHIFT 0x00 +/* POWERHOLD Mask field for PRIMARY_SECONDARY_PAD2 register */ +#define TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK 0xC + /* Registers for function RESOURCE */ #define TPS65917_REGEN1_CTRL 0x2 #define TPS65917_PLLEN_CTRL 0x3 From c3a387268218a4c5b4d6c7a5d49bdcdc19ade57e Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 5 Sep 2019 10:17:47 -0600 Subject: [PATCH 0358/3715] ASoC: tlv320aic31xx: Handle inverted BCLK in non-DSP modes commit dcb407b257af06fa58b0544ec01ec9e0d3927e02 upstream Currently BCLK inverting is only handled when the DAI format is DSP, but the BCLK may be inverted in any supported mode. Without this using this CODEC in any other mode than DSP with the BCLK inverted leads to bad sampling timing and very poor audio quality. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tlv320aic31xx.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 54a87a905eb6..d3bd0bf15ddb 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -924,6 +924,18 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } + /* signal polarity */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + iface_reg2 |= AIC31XX_BCLKINV_MASK; + break; + default: + dev_err(codec->dev, "Invalid DAI clock signal polarity\n"); + return -EINVAL; + } + /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: @@ -931,16 +943,12 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_DSP_A: dsp_a_val = 0x1; case SND_SOC_DAIFMT_DSP_B: - /* NOTE: BCLKINV bit value 1 equas NB and 0 equals IB */ - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - iface_reg2 |= AIC31XX_BCLKINV_MASK; - break; - case SND_SOC_DAIFMT_IB_NF: - break; - default: - return -EINVAL; - } + /* + * NOTE: This CODEC samples on the falling edge of BCLK in + * DSP mode, this is inverted compared to what most DAIs + * expect, so we invert for this mode + */ + iface_reg2 ^= AIC31XX_BCLKINV_MASK; iface_reg1 |= (AIC31XX_DSP_MODE << AIC31XX_IFACE1_DATATYPE_SHIFT); break; From 6442c97b5d846bf5ef6987e0ba5d35d31e58bc29 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Thu, 5 Sep 2019 10:17:48 -0600 Subject: [PATCH 0359/3715] mtd: spi-nor: enable 4B opcodes for mx66l51235l commit d342b6a973af459f6104cad6effc8efc71a0558d upstream Signed-off-by: Roman Yeryomin Signed-off-by: Cyrille Pitchen Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 34ecc12ee3d9..6c013341ef09 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1030,7 +1030,7 @@ static const struct flash_info spi_nor_ids[] = { { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) }, { "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) }, - { "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, { "mx66u51235f", INFO(0xc2253a, 0, 64 * 1024, 1024, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, { "mx66l1g45g", INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "mx66l1g55g", INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) }, From cb47222af149a0959d2536b0868ea6fab59bee5b Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Thu, 5 Sep 2019 10:17:49 -0600 Subject: [PATCH 0360/3715] mtd: spi-nor: cadence-quadspi: add a delay in write sequence commit 61dc8493bae9ba82a1c72edbc6c6065f6a94456a upstream As per 66AK2G02 TRM[1] SPRUHY8F section 11.15.5.3 Indirect Access Controller programming sequence, a delay equal to couple of QSPI master clock(~5ns) is required after setting CQSPI_REG_INDIRECTWR_START bit and writing data to the flash. Introduce a quirk flag CQSPI_NEEDS_WR_DELAY to handle this and set this flag for TI 66AK2G SoC. [1]http://www.ti.com/lit/ug/spruhy8f/spruhy8f.pdf Signed-off-by: Vignesh R Acked-by: Marek Vasut Signed-off-by: Cyrille Pitchen Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/cadence-quadspi.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index f22dd34f4f83..ff4edf4bb23c 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -38,6 +38,9 @@ #define CQSPI_NAME "cadence-qspi" #define CQSPI_MAX_CHIPSELECT 16 +/* Quirks */ +#define CQSPI_NEEDS_WR_DELAY BIT(0) + struct cqspi_st; struct cqspi_flash_pdata { @@ -76,6 +79,7 @@ struct cqspi_st { u32 fifo_depth; u32 fifo_width; u32 trigger_address; + u32 wr_delay; struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT]; }; @@ -623,6 +627,15 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, reinit_completion(&cqspi->transfer_complete); writel(CQSPI_REG_INDIRECTWR_START_MASK, reg_base + CQSPI_REG_INDIRECTWR); + /* + * As per 66AK2G02 TRM SPRUHY8F section 11.15.5.3 Indirect Access + * Controller programming sequence, couple of cycles of + * QSPI_REF_CLK delay is required for the above bit to + * be internally synchronized by the QSPI module. Provide 5 + * cycles of delay. + */ + if (cqspi->wr_delay) + ndelay(cqspi->wr_delay); while (remaining > 0) { size_t write_words, mod_bytes; @@ -1184,6 +1197,7 @@ static int cqspi_probe(struct platform_device *pdev) struct cqspi_st *cqspi; struct resource *res; struct resource *res_ahb; + unsigned long data; int ret; int irq; @@ -1241,6 +1255,10 @@ static int cqspi_probe(struct platform_device *pdev) } cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk); + data = (unsigned long)of_device_get_match_data(dev); + if (data & CQSPI_NEEDS_WR_DELAY) + cqspi->wr_delay = 5 * DIV_ROUND_UP(NSEC_PER_SEC, + cqspi->master_ref_clk_hz); ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, pdev->name, cqspi); @@ -1312,7 +1330,14 @@ static const struct dev_pm_ops cqspi__dev_pm_ops = { #endif static const struct of_device_id cqspi_dt_ids[] = { - {.compatible = "cdns,qspi-nor",}, + { + .compatible = "cdns,qspi-nor", + .data = (void *)0, + }, + { + .compatible = "ti,k2g-qspi", + .data = (void *)CQSPI_NEEDS_WR_DELAY, + }, { /* end of table */ } }; From e8cff9cabe821dc20d4aa7b1c95719bfae656ca8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Sep 2019 10:17:50 -0600 Subject: [PATCH 0361/3715] misc: pci_endpoint_test: Prevent some integer overflows commit 378f79cab12b669928f3a4037f023837ead2ce0c upstream "size + max" can have an arithmetic overflow when we're allocating: orig_src_addr = dma_alloc_coherent(dev, size + alignment, ... I've added a few checks to prevent that. Fixes: 13107c60681f ("misc: pci_endpoint_test: Add support to provide aligned buffer addresses") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 9849bf183299..504fa680825d 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -226,6 +226,9 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) u32 src_crc32; u32 dst_crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_src_addr = dma_alloc_coherent(dev, size + alignment, &orig_src_phys_addr, GFP_KERNEL); if (!orig_src_addr) { @@ -311,6 +314,9 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) size_t alignment = test->alignment; u32 crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { @@ -369,6 +375,9 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) size_t alignment = test->alignment; u32 crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { From 354d644056094689337ecb6dd29b30ee7cf3a23f Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 5 Sep 2019 10:17:51 -0600 Subject: [PATCH 0362/3715] PCI: dra7xx: Add shutdown handler to cleanly turn off clocks commit 9c049bea083fea21373b8baf51fe49acbe24e105 upstream Add shutdown handler to cleanly turn off clocks. This will help in cases of kexec where in a new kernel can boot abruptly. Signed-off-by: Keerthy Signed-off-by: Bjorn Helgaas Acked-by: Kishon Vijay Abraham I Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-dra7xx.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c index 7f5dfa169d0f..2e0d0b29cdcb 100644 --- a/drivers/pci/dwc/pci-dra7xx.c +++ b/drivers/pci/dwc/pci-dra7xx.c @@ -817,6 +817,22 @@ static int dra7xx_pcie_resume_noirq(struct device *dev) } #endif +void dra7xx_pcie_shutdown(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev); + int ret; + + dra7xx_pcie_stop_link(dra7xx->pci); + + ret = pm_runtime_put_sync(dev); + if (ret < 0) + dev_dbg(dev, "pm_runtime_put_sync failed\n"); + + pm_runtime_disable(dev); + dra7xx_pcie_disable_phy(dra7xx); +} + static const struct dev_pm_ops dra7xx_pcie_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend, dra7xx_pcie_resume) SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend_noirq, @@ -830,5 +846,6 @@ static struct platform_driver dra7xx_pcie_driver = { .suppress_bind_attrs = true, .pm = &dra7xx_pcie_pm_ops, }, + .shutdown = dra7xx_pcie_shutdown, }; builtin_platform_driver_probe(dra7xx_pcie_driver, dra7xx_pcie_probe); From 3f2beab56a2b6c5eb91d8fb089efef1635c6c059 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 5 Sep 2019 10:17:52 -0600 Subject: [PATCH 0363/3715] misc: pci_endpoint_test: Fix BUG_ON error during pci_disable_msi() commit b7636e816adcb52bc96b6fb7bc9d141cbfd17ddb upstream pci_disable_msi() throws a Kernel BUG if the driver has successfully requested an IRQ and not released it. Fix it here by freeing IRQs before invoking pci_disable_msi(). Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 504fa680825d..230f1e8538dc 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -92,6 +92,7 @@ struct pci_endpoint_test { void __iomem *bar[6]; struct completion irq_raised; int last_irq; + int num_irqs; /* mutex to protect the ioctls */ struct mutex mutex; struct miscdevice miscdev; @@ -514,6 +515,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); if (irq < 0) dev_err(dev, "failed to get MSI interrupts\n"); + test->num_irqs = irq; } err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler, @@ -581,6 +583,9 @@ err_iounmap: pci_iounmap(pdev, test->bar[bar]); } + for (i = 0; i < irq; i++) + devm_free_irq(dev, pdev->irq + i, test); + err_disable_msi: pci_disable_msi(pdev); pci_release_regions(pdev); @@ -594,6 +599,7 @@ err_disable_pdev: static void pci_endpoint_test_remove(struct pci_dev *pdev) { int id; + int i; enum pci_barno bar; struct pci_endpoint_test *test = pci_get_drvdata(pdev); struct miscdevice *misc_device = &test->miscdev; @@ -609,6 +615,8 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } + for (i = 0; i < test->num_irqs; i++) + devm_free_irq(&pdev->dev, pdev->irq + i, test); pci_disable_msi(pdev); pci_release_regions(pdev); pci_disable_device(pdev); From e36f4a7c8822fd5bcb95a4aecfd92f6129be1a5c Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Sep 2019 10:17:53 -0600 Subject: [PATCH 0364/3715] mailbox: reset txdone_method TXDONE_BY_POLL if client knows_txdone commit 33cd7123ac0ba5360656ae27db453de5b9aa711f upstream Currently the mailbox framework sets txdone_method to TXDONE_BY_POLL if the controller sets txdone_by_poll. However some clients can have a mechanism to do TXDONE_BY_ACK which they can specify by knows_txdone. However, we endup setting both TXDONE_BY_POLL and TXDONE_BY_ACK in that case. In such scenario, we may end up with below warnings as the tx ticker is run both by mailbox framework and the client. WARNING: CPU: 1 PID: 0 at kernel/time/hrtimer.c:805 hrtimer_forward+0x88/0xd8 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.12.0-rc5 #242 Hardware name: ARM LTD ARM Juno Development Platform task: ffff8009768ca700 task.stack: ffff8009768f8000 PC is at hrtimer_forward+0x88/0xd8 LR is at txdone_hrtimer+0xd4/0xf8 Call trace: hrtimer_forward+0x88/0xd8 __hrtimer_run_queues+0xe4/0x158 hrtimer_interrupt+0xa4/0x220 arch_timer_handler_phys+0x30/0x40 handle_percpu_devid_irq+0x78/0x130 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x5c/0xb8 gic_handle_irq+0x54/0xa8 This patch fixes the issue by resetting TXDONE_BY_POLL if client has set knows_txdone. Cc: Alexey Klimov Signed-off-by: Sudeep Holla Signed-off-by: Jassi Brar Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox.c | 4 ++-- drivers/mailbox/pcc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 44b49a2676f0..055c90b8253c 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -351,7 +351,7 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index) init_completion(&chan->tx_complete); if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) - chan->txdone_method |= TXDONE_BY_ACK; + chan->txdone_method = TXDONE_BY_ACK; spin_unlock_irqrestore(&chan->lock, flags); @@ -420,7 +420,7 @@ void mbox_free_channel(struct mbox_chan *chan) spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; - if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + if (chan->txdone_method == TXDONE_BY_ACK) chan->txdone_method = TXDONE_BY_POLL; module_put(chan->mbox->dev->driver->owner); diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 9b7005e1345e..27c2294be51a 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -266,7 +266,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, init_completion(&chan->tx_complete); if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) - chan->txdone_method |= TXDONE_BY_ACK; + chan->txdone_method = TXDONE_BY_ACK; spin_unlock_irqrestore(&chan->lock, flags); @@ -312,7 +312,7 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; - if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + if (chan->txdone_method == TXDONE_BY_ACK) chan->txdone_method = TXDONE_BY_POLL; spin_unlock_irqrestore(&chan->lock, flags); From a0128f369072548e4f3a0e9862a268fedf32c618 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 5 Sep 2019 10:17:54 -0600 Subject: [PATCH 0365/3715] ASoC: tlv320dac31xx: mark expected switch fall-through commit 09fc38c1af4cb888255e9ecf267bf9757c12885d upstream In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1195220 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tlv320aic31xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index d3bd0bf15ddb..cc95c15ceceb 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -941,7 +941,7 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_I2S: break; case SND_SOC_DAIFMT_DSP_A: - dsp_a_val = 0x1; + dsp_a_val = 0x1; /* fall through */ case SND_SOC_DAIFMT_DSP_B: /* * NOTE: This CODEC samples on the falling edge of BCLK in From 6a65aa55cb0ec892c40ab907af83452f7dea2a4a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 5 Sep 2019 10:17:55 -0600 Subject: [PATCH 0366/3715] ASoC: davinci-mcasp: Handle return value of devm_kasprintf commit 0c8b794c4a10aaf7ac0d4a49be2b2638e2038adb upstream devm_kasprintf() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/davinci/davinci-mcasp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 0480ec4c8035..af6cd8b874f5 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1894,6 +1894,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_common", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_common_irq_handler, IRQF_ONESHOT | IRQF_SHARED, @@ -1911,6 +1915,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_rx", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_rx_irq_handler, IRQF_ONESHOT, irq_name, mcasp); @@ -1926,6 +1934,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_tx", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_tx_irq_handler, IRQF_ONESHOT, irq_name, mcasp); From d74abcff60ea82b15fee3c2298ba6cf761c4ad6c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 5 Sep 2019 10:17:56 -0600 Subject: [PATCH 0367/3715] ASoC: davinci: Kill BUG_ON() usage commit befff4fbc27e19b14b343eb4a65d8f75d38b6230 upstream Don't use BUG_ON() for a non-critical sanity check on production systems. This patch replaces with a softer WARN_ON() and an error path. Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/davinci/davinci-mcasp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index af6cd8b874f5..b4e6f4a04cb6 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1748,7 +1748,8 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp) PTR_ERR(chan)); return PTR_ERR(chan); } - BUG_ON(!chan->device || !chan->device->dev); + if (WARN_ON(!chan->device || !chan->device->dev)) + return -EINVAL; if (chan->device->dev->of_node) ret = of_property_read_string(chan->device->dev->of_node, From f5eca5cfb3a48e1699574ab3f8df2304f355758d Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Thu, 5 Sep 2019 10:17:57 -0600 Subject: [PATCH 0368/3715] ASoC: davinci-mcasp: Fix an error handling path in 'davinci_mcasp_probe()' commit 1b8b68b05d1868404316d32e20782b00442aba90 upstream All error handling paths in this function 'goto err' except this one. If one of the 2 previous memory allocations fails, we should go through the existing error handling path. Otherwise there is an unbalanced pm_runtime_enable()/pm_runtime_disable(). Fixes: dd55ff8346a9 ("ASoC: davinci-mcasp: Add set_tdm_slots() support") Signed-off-by: Christophe JAILLET Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/davinci/davinci-mcasp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index b4e6f4a04cb6..07bac9ea65c4 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -2022,8 +2022,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) GFP_KERNEL); if (!mcasp->chconstr[SNDRV_PCM_STREAM_PLAYBACK].list || - !mcasp->chconstr[SNDRV_PCM_STREAM_CAPTURE].list) - return -ENOMEM; + !mcasp->chconstr[SNDRV_PCM_STREAM_CAPTURE].list) { + ret = -ENOMEM; + goto err; + } ret = davinci_mcasp_set_ch_constraints(mcasp); if (ret) From dcb86e921dc7e5c2ed524404ceee77175bca5727 Mon Sep 17 00:00:00 2001 From: Claudio Foellmi Date: Thu, 5 Sep 2019 10:17:58 -0600 Subject: [PATCH 0369/3715] i2c: omap: Trigger bus recovery in lockup case commit 93367bfca98f36cece57c01dbce6ea1b4ac58245 upstream A very conservative check for bus activity (to prevent interference in multimaster setups) prevented the bus recovery methods from being triggered in the case that SDA or SCL was stuck low. This defeats the purpose of the recovery mechanism, which was introduced for exactly this situation (a slave device keeping SDA pulled down). Also added a check to make sure SDA is low before attempting recovery. If SDA is not stuck low, recovery will not help, so we can skip it. Note that bus lockups can persist across reboots. The only other options are to reset or power cycle the offending slave device, and many i2c slaves do not even have a reset pin. If we see that one of the lines is low for the entire timeout duration, we can actually be sure that there is no other master driving the bus. It is therefore save for us to attempt a bus recovery. Signed-off-by: Claudio Foellmi Tested-by: Vignesh R Reviewed-by: Grygorii Strashko [wsa: fixed one return code to -EBUSY] Signed-off-by: Wolfram Sang Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-omap.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 12ba183693d6..a03564f41ad0 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -486,6 +486,22 @@ static int omap_i2c_init(struct omap_i2c_dev *omap) return 0; } +/* + * Try bus recovery, but only if SDA is actually low. + */ +static int omap_i2c_recover_bus(struct omap_i2c_dev *omap) +{ + u16 systest; + + systest = omap_i2c_read_reg(omap, OMAP_I2C_SYSTEST_REG); + if ((systest & OMAP_I2C_SYSTEST_SCL_I_FUNC) && + (systest & OMAP_I2C_SYSTEST_SDA_I_FUNC)) + return 0; /* bus seems to already be fine */ + if (!(systest & OMAP_I2C_SYSTEST_SCL_I_FUNC)) + return -EBUSY; /* recovery would not fix SCL */ + return i2c_recover_bus(&omap->adapter); +} + /* * Waiting on Bus Busy */ @@ -496,7 +512,7 @@ static int omap_i2c_wait_for_bb(struct omap_i2c_dev *omap) timeout = jiffies + OMAP_I2C_TIMEOUT; while (omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG) & OMAP_I2C_STAT_BB) { if (time_after(jiffies, timeout)) - return i2c_recover_bus(&omap->adapter); + return omap_i2c_recover_bus(omap); msleep(1); } @@ -577,8 +593,13 @@ static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *omap) } if (time_after(jiffies, timeout)) { + /* + * SDA or SCL were low for the entire timeout without + * any activity detected. Most likely, a slave is + * locking up the bus with no master driving the clock. + */ dev_warn(omap->dev, "timeout waiting for bus ready\n"); - return -ETIMEDOUT; + return omap_i2c_recover_bus(omap); } msleep(1); From 185c5fa6b2ad67b811c1886cc5b8013a04747a87 Mon Sep 17 00:00:00 2001 From: Zumeng Chen Date: Thu, 5 Sep 2019 10:17:59 -0600 Subject: [PATCH 0370/3715] cpufreq: ti-cpufreq: add missing of_node_put() commit 248aefdcc3a7e0cfbd014946b4dead63e750e71b upstream call of_node_put to release the refcount of np. Signed-off-by: Zumeng Chen Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/ti-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 4bf47de6101f..cadc324bedb4 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -205,6 +205,7 @@ static int ti_cpufreq_init(void) np = of_find_node_by_path("/"); match = of_match_node(ti_cpufreq_of_match, np); + of_node_put(np); if (!match) return -ENODEV; From 508806ef60b5101e74868a723b34f2083071231e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 31 Oct 2017 15:26:00 +0200 Subject: [PATCH 0371/3715] ARM: dts: dra7: Disable USB metastability workaround for USB2 commit b8c9c6fa2002b8fd4a9710f76f80f99c6046d48c upstream. The metastability workaround causes Erratic errors [1] on the HighSpeed USB PHY which can cause upto 2 seconds delay in enumerating to a USB host while in Gadget mode. Disable the Run/Stop metastability workaround to avoid this ill effect. We are aware that this opens up the opportunity for Run/Stop metastability, however this issue has never been observed in TI releases so we think that Run/Stop metastability is a lesser evil than the PHY Erratic errors. So disable it. [1] USB controller trace during gadget enumeration irq/90-dwc3-969 [000] d... 52.323145: dwc3_event: event (00000901): Erratic Error [U0] irq/90-dwc3-969 [000] d... 52.560646: dwc3_event: event (00000901): Erratic Error [U0] irq/90-dwc3-969 [000] d... 52.798144: dwc3_event: event (00000901): Erratic Error [U0] Signed-off-by: Roger Quadros Acked-by: Felipe Balbi Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 0bf354024ef5..09686d73f947 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1540,6 +1540,7 @@ dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; + snps,dis_metastability_quirk; }; }; From 3dec71e388f95382d83ebb5589f0016eac4a6d2b Mon Sep 17 00:00:00 2001 From: Dave Chiluk Date: Tue, 23 Jul 2019 11:44:26 -0500 Subject: [PATCH 0372/3715] sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices commit de53fd7aedb100f03e5d2231cfce0e4993282425 upstream. It has been observed, that highly-threaded, non-cpu-bound applications running under cpu.cfs_quota_us constraints can hit a high percentage of periods throttled while simultaneously not consuming the allocated amount of quota. This use case is typical of user-interactive non-cpu bound applications, such as those running in kubernetes or mesos when run on multiple cpu cores. This has been root caused to cpu-local run queue being allocated per cpu bandwidth slices, and then not fully using that slice within the period. At which point the slice and quota expires. This expiration of unused slice results in applications not being able to utilize the quota for which they are allocated. The non-expiration of per-cpu slices was recently fixed by 'commit 512ac999d275 ("sched/fair: Fix bandwidth timer clock drift condition")'. Prior to that it appears that this had been broken since at least 'commit 51f2176d74ac ("sched/fair: Fix unlocked reads of some cfs_b->quota/period")' which was introduced in v3.16-rc1 in 2014. That added the following conditional which resulted in slices never being expired. if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; Because this was broken for nearly 5 years, and has recently been fixed and is now being noticed by many users running kubernetes (https://github.com/kubernetes/kubernetes/issues/67577) it is my opinion that the mechanisms around expiring runtime should be removed altogether. This allows quota already allocated to per-cpu run-queues to live longer than the period boundary. This allows threads on runqueues that do not use much CPU to continue to use their remaining slice over a longer period of time than cpu.cfs_period_us. However, this helps prevent the above condition of hitting throttling while also not fully utilizing your cpu quota. This theoretically allows a machine to use slightly more than its allotted quota in some periods. This overflow would be bounded by the remaining quota left on each per-cpu runqueueu. This is typically no more than min_cfs_rq_runtime=1ms per cpu. For CPU bound tasks this will change nothing, as they should theoretically fully utilize all of their quota in each period. For user-interactive tasks as described above this provides a much better user/application experience as their cpu utilization will more closely match the amount they requested when they hit throttling. This means that cpu limits no longer strictly apply per period for non-cpu bound applications, but that they are still accurate over longer timeframes. This greatly improves performance of high-thread-count, non-cpu bound applications with low cfs_quota_us allocation on high-core-count machines. In the case of an artificial testcase (10ms/100ms of quota on 80 CPU machine), this commit resulted in almost 30x performance improvement, while still maintaining correct cpu quota restrictions. That testcase is available at https://github.com/indeedeng/fibtest. Fixes: 512ac999d275 ("sched/fair: Fix bandwidth timer clock drift condition") Signed-off-by: Dave Chiluk Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Phil Auld Reviewed-by: Ben Segall Cc: Ingo Molnar Cc: John Hammond Cc: Jonathan Corbet Cc: Kyle Anderson Cc: Gabriel Munos Cc: Peter Oskolkov Cc: Cong Wang Cc: Brendan Gregg Link: https://lkml.kernel.org/r/1563900266-19734-2-git-send-email-chiluk+linux@indeed.com Signed-off-by: Greg Kroah-Hartman --- Documentation/scheduler/sched-bwc.txt | 45 +++++++++++++++++ kernel/sched/fair.c | 70 +++------------------------ kernel/sched/sched.h | 4 -- 3 files changed, 52 insertions(+), 67 deletions(-) diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt index f6b1873f68ab..de583fbbfe42 100644 --- a/Documentation/scheduler/sched-bwc.txt +++ b/Documentation/scheduler/sched-bwc.txt @@ -90,6 +90,51 @@ There are two ways in which a group may become throttled: In case b) above, even though the child may have runtime remaining it will not be allowed to until the parent's runtime is refreshed. +CFS Bandwidth Quota Caveats +--------------------------- +Once a slice is assigned to a cpu it does not expire. However all but 1ms of +the slice may be returned to the global pool if all threads on that cpu become +unrunnable. This is configured at compile time by the min_cfs_rq_runtime +variable. This is a performance tweak that helps prevent added contention on +the global lock. + +The fact that cpu-local slices do not expire results in some interesting corner +cases that should be understood. + +For cgroup cpu constrained applications that are cpu limited this is a +relatively moot point because they will naturally consume the entirety of their +quota as well as the entirety of each cpu-local slice in each period. As a +result it is expected that nr_periods roughly equal nr_throttled, and that +cpuacct.usage will increase roughly equal to cfs_quota_us in each period. + +For highly-threaded, non-cpu bound applications this non-expiration nuance +allows applications to briefly burst past their quota limits by the amount of +unused slice on each cpu that the task group is running on (typically at most +1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only +applies if quota had been assigned to a cpu and then not fully used or returned +in previous periods. This burst amount will not be transferred between cores. +As a result, this mechanism still strictly limits the task group to quota +average usage, albeit over a longer time window than a single period. This +also limits the burst ability to no more than 1ms per cpu. This provides +better more predictable user experience for highly threaded applications with +small quota limits on high core count machines. It also eliminates the +propensity to throttle these applications while simultanously using less than +quota amounts of cpu. Another way to say this, is that by allowing the unused +portion of a slice to remain valid across periods we have decreased the +possibility of wastefully expiring quota on cpu-local silos that don't need a +full slice's amount of cpu time. + +The interaction between cpu-bound and non-cpu-bound-interactive applications +should also be considered, especially when single core usage hits 100%. If you +gave each of these applications half of a cpu-core and they both got scheduled +on the same CPU it is theoretically possible that the non-cpu bound application +will use up to 1ms additional quota in some periods, thereby preventing the +cpu-bound application from fully using its quota by that same amount. In these +instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to +decide which application is chosen to run, as they will both be runnable and +have remaining quota. This runtime discrepancy will be made up in the following +periods when the interactive application idles. + Examples -------- 1. Limit a group to 1 CPU worth of runtime. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 55a33009f9a5..d5c032ec193d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4106,8 +4106,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; - cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); - cfs_b->expires_seq++; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4129,8 +4127,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); - u64 amount = 0, min_amount, expires; - int expires_seq; + u64 amount = 0, min_amount; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4147,61 +4144,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } - expires_seq = cfs_b->expires_seq; - expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); cfs_rq->runtime_remaining += amount; - /* - * we may have advanced our local expiration to account for allowed - * spread between our sched_clock and the one on which runtime was - * issued. - */ - if (cfs_rq->expires_seq != expires_seq) { - cfs_rq->expires_seq = expires_seq; - cfs_rq->runtime_expires = expires; - } return cfs_rq->runtime_remaining > 0; } -/* - * Note: This depends on the synchronization provided by sched_clock and the - * fact that rq->clock snapshots this value. - */ -static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) -{ - struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); - - /* if the deadline is ahead of our clock, nothing to do */ - if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0)) - return; - - if (cfs_rq->runtime_remaining < 0) - return; - - /* - * If the local deadline has passed we have to consider the - * possibility that our sched_clock is 'fast' and the global deadline - * has not truly expired. - * - * Fortunately we can check determine whether this the case by checking - * whether the global deadline(cfs_b->expires_seq) has advanced. - */ - if (cfs_rq->expires_seq == cfs_b->expires_seq) { - /* extend local deadline, drift is bounded above by 2 ticks */ - cfs_rq->runtime_expires += TICK_NSEC; - } else { - /* global deadline is ahead, expiration has passed */ - cfs_rq->runtime_remaining = 0; - } -} - static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) { /* dock delta_exec before expiring quota (as it could span periods) */ cfs_rq->runtime_remaining -= delta_exec; - expire_cfs_rq_runtime(cfs_rq); if (likely(cfs_rq->runtime_remaining > 0)) return; @@ -4387,8 +4340,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) resched_curr(rq); } -static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, - u64 remaining, u64 expires) +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) { struct cfs_rq *cfs_rq; u64 runtime; @@ -4413,7 +4365,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, remaining -= runtime; cfs_rq->runtime_remaining += runtime; - cfs_rq->runtime_expires = expires; /* we check whether we're throttled above */ if (cfs_rq->runtime_remaining > 0) @@ -4438,7 +4389,7 @@ next: */ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { - u64 runtime, runtime_expires; + u64 runtime; int throttled; /* no need to continue the timer with no bandwidth constraint */ @@ -4466,8 +4417,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) /* account preceding periods in which throttling occurred */ cfs_b->nr_throttled += overrun; - runtime_expires = cfs_b->runtime_expires; - /* * This check is repeated as we are holding onto the new bandwidth while * we unthrottle. This can potentially race with an unthrottled group @@ -4480,8 +4429,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ - runtime = distribute_cfs_runtime(cfs_b, runtime, - runtime_expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); cfs_b->distribute_running = 0; @@ -4558,8 +4506,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) return; raw_spin_lock(&cfs_b->lock); - if (cfs_b->quota != RUNTIME_INF && - cfs_rq->runtime_expires == cfs_b->runtime_expires) { + if (cfs_b->quota != RUNTIME_INF) { cfs_b->runtime += slack_runtime; /* we are under rq->lock, defer unthrottling using a timer */ @@ -4591,7 +4538,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) { u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); - u64 expires; /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); @@ -4608,7 +4554,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) runtime = cfs_b->runtime; - expires = cfs_b->runtime_expires; if (runtime) cfs_b->distribute_running = 1; @@ -4617,11 +4562,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (!runtime) return; - runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); - if (expires == cfs_b->runtime_expires) - cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->runtime -= min(runtime, cfs_b->runtime); cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 452b56923c6d..268f560ec998 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -280,8 +280,6 @@ struct cfs_bandwidth { ktime_t period; u64 quota, runtime; s64 hierarchical_quota; - u64 runtime_expires; - int expires_seq; short idle, period_active; struct hrtimer period_timer, slack_timer; @@ -489,8 +487,6 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; - int expires_seq; - u64 runtime_expires; s64 runtime_remaining; u64 throttled_clock, throttled_clock_task; From da73a178efaccdfffe440047471cc4d40320a811 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 20 Aug 2019 14:40:55 -0400 Subject: [PATCH 0373/3715] sched/fair: Fix -Wunused-but-set-variable warnings commit 763a9ec06c409dcde2a761aac4bb83ff3938e0b3 upstream. Commit: de53fd7aedb1 ("sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices") introduced a few compilation warnings: kernel/sched/fair.c: In function '__refill_cfs_bandwidth_runtime': kernel/sched/fair.c:4365:6: warning: variable 'now' set but not used [-Wunused-but-set-variable] kernel/sched/fair.c: In function 'start_cfs_bandwidth': kernel/sched/fair.c:4992:6: warning: variable 'overrun' set but not used [-Wunused-but-set-variable] Also, __refill_cfs_bandwidth_runtime() does no longer update the expiration time, so fix the comments accordingly. Signed-off-by: Qian Cai Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Reviewed-by: Dave Chiluk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: pauld@redhat.com Fixes: de53fd7aedb1 ("sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices") Link: https://lkml.kernel.org/r/1566326455-8038-1-git-send-email-cai@lca.pw Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d5c032ec193d..feeb52880d35 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4091,21 +4091,16 @@ static inline u64 sched_cfs_bandwidth_slice(void) } /* - * Replenish runtime according to assigned quota and update expiration time. - * We use sched_clock_cpu directly instead of rq->clock to avoid adding - * additional synchronization around rq->lock. + * Replenish runtime according to assigned quota. We use sched_clock_cpu + * directly instead of rq->clock to avoid adding additional synchronization + * around rq->lock. * * requires cfs_b->lock */ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) { - u64 now; - - if (cfs_b->quota == RUNTIME_INF) - return; - - now = sched_clock_cpu(smp_processor_id()); - cfs_b->runtime = cfs_b->quota; + if (cfs_b->quota != RUNTIME_INF) + cfs_b->runtime = cfs_b->quota; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) From 75644ed66fd2985c7119309c1c48090736e71851 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:46 -0800 Subject: [PATCH 0374/3715] lib/scatterlist: Introduce sgl_alloc() and sgl_free() commit e80a0af4759a164214f02da157a3800753ce135f upstream. Many kernel drivers contain code that allocates and frees both a scatterlist and the pages that populate that scatterlist. Introduce functions in lib/scatterlist.c that perform these tasks instead of duplicating this functionality in multiple drivers. Only include these functions in the build if CONFIG_SGL_ALLOC=y to avoid that the kernel size increases if this functionality is not used. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/scatterlist.h | 10 ++++ lib/Kconfig | 4 ++ lib/scatterlist.c | 105 ++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d87dfa41142d..8b7bce207229 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -267,6 +267,16 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, unsigned long offset, unsigned long size, gfp_t gfp_mask); +#ifdef CONFIG_SGL_ALLOC +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p); +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p); +void sgl_free_order(struct scatterlist *sgl, int order); +void sgl_free(struct scatterlist *sgl); +#endif /* CONFIG_SGL_ALLOC */ + size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); diff --git a/lib/Kconfig b/lib/Kconfig index b1445b22a6de..8396c4cfa1ab 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -413,6 +413,10 @@ config HAS_DMA depends on !NO_DMA default y +config SGL_ALLOC + bool + default n + config DMA_NOOP_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 355f2e90b72c..11fce289d116 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -433,6 +433,111 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +#ifdef CONFIG_SGL_ALLOC + +/** + * sgl_alloc_order - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist. Must be at least one + * @order: Second argument for alloc_pages() + * @chainable: Whether or not to allocate an extra element in the scatterlist + * for scatterlist chaining purposes + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist that have pages + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p) +{ + struct scatterlist *sgl, *sg; + struct page *page; + unsigned int nent, nalloc; + u32 elem_len; + + nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); + /* Check for integer overflow */ + if (length > (nent << (PAGE_SHIFT + order))) + return NULL; + nalloc = nent; + if (chainable) { + /* Check for integer overflow */ + if (nalloc + 1 < nalloc) + return NULL; + nalloc++; + } + sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), + (gfp & ~GFP_DMA) | __GFP_ZERO); + if (!sgl) + return NULL; + + sg_init_table(sgl, nent); + sg = sgl; + while (length) { + elem_len = min_t(u64, length, PAGE_SIZE << order); + page = alloc_pages(gfp, order); + if (!page) { + sgl_free(sgl); + return NULL; + } + + sg_set_page(sg, page, elem_len, 0); + length -= elem_len; + sg = sg_next(sg); + } + WARN_ON_ONCE(sg); + if (nent_p) + *nent_p = nent; + return sgl; +} +EXPORT_SYMBOL(sgl_alloc_order); + +/** + * sgl_alloc - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p) +{ + return sgl_alloc_order(length, 0, false, gfp, nent_p); +} +EXPORT_SYMBOL(sgl_alloc); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + struct scatterlist *sg; + struct page *page; + + for (sg = sgl; sg; sg = sg_next(sg)) { + page = sg_page(sg); + if (page) + __free_pages(page, order); + } + kfree(sgl); +} +EXPORT_SYMBOL(sgl_free_order); + +/** + * sgl_free - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + */ +void sgl_free(struct scatterlist *sgl) +{ + sgl_free_order(sgl, 0); +} +EXPORT_SYMBOL(sgl_free); + +#endif /* CONFIG_SGL_ALLOC */ + void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset) From 21e952b4495a5bfb7f65467dc2e36393782a0fa8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 24 Jan 2019 14:46:42 -0700 Subject: [PATCH 0375/3715] usbip: Fix vhci_urb_enqueue() URB null transfer buffer error path commit 2c904963b1dd2acd4bc785b6c72e10a6283c2081 upstream. Fix vhci_urb_enqueue() to print debug msg and return error instead of failing with BUG_ON. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 84e2d7edaa5c..14b17ea81deb 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -716,8 +716,10 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag } vdev = &vhci_hcd->vdev[portnum-1]; - /* patch to usb_sg_init() is in 2.5.60 */ - BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length); + if (!urb->transfer_buffer && urb->transfer_buffer_length) { + dev_dbg(dev, "Null URB transfer buffer\n"); + return -EINVAL; + } spin_lock_irqsave(&vhci->lock, flags); From a422263cee459e50be15d6a93a299e5503e849df Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Dec 2017 10:05:15 -0700 Subject: [PATCH 0376/3715] usbip: stub_rx: fix static checker warning on unnecessary checks commit 10c90120930628e8b959bf58d4a0aaef3ae5d945 upstream. Fix the following static checker warnings: The patch c6688ef9f297: "usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input" from Dec 7, 2017, leads to the following static checker warning: drivers/usb/usbip/stub_rx.c:346 get_pipe() warn: impossible condition '(pdu->u.cmd_submit.transfer_buffer_length > ((~0 >> 1))) => (s32min-s32max > s32max)' drivers/usb/usbip/stub_rx.c:486 stub_recv_cmd_submit() warn: always true condition '(pdu->u.cmd_submit.transfer_buffer_length <= ((~0 >> 1))) => (s32min-s32max <= s32max)' Reported-by: Dan Carpenter Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 777a4058c407..d47176f9c310 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -353,14 +353,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) epd = &ep->desc; - /* validate transfer_buffer_length */ - if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { - dev_err(&sdev->udev->dev, - "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", - pdu->u.cmd_submit.transfer_buffer_length); - return -1; - } - if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -487,8 +479,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0 && - pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); From cf05a68eba4016cd8497022d6e6c3b19666fbcc4 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Wed, 28 Aug 2019 12:27:41 +0900 Subject: [PATCH 0377/3715] usbip: Implement SG support to vhci-hcd and stub driver commit ea44d190764b4422af4d1c29eaeb9e69e353b406 upstream. There are bugs on vhci with usb 3.0 storage device. In USB, each SG list entry buffer should be divisible by the bulk max packet size. But with native SG support, this problem doesn't matter because the SG buffer is treated as contiguous buffer. But without native SG support, USB storage driver breaks SG list into several URBs and the error occurs because of a buffer size of URB that cannot be divided by the bulk max packet size. The error situation is as follows. When USB Storage driver requests 31.5 KB data and has SG list which has 3584 bytes buffer followed by 7 4096 bytes buffer for some reason. USB Storage driver splits this SG list into several URBs because VHCI doesn't support SG and sends them separately. So the first URB buffer size is 3584 bytes. When receiving data from device, USB 3.0 device sends data packet of 1024 bytes size because the max packet size of BULK pipe is 1024 bytes. So device sends 4096 bytes. But the first URB buffer has only 3584 bytes buffer size. So host controller terminates the transfer even though there is more data to receive. So, vhci needs to support SG transfer to prevent this error. In this patch, vhci supports SG regardless of whether the server's host controller supports SG or not, because stub driver splits SG list into several URBs if the server's host controller doesn't support SG. To support SG, vhci sets URB_DMA_MAP_SG flag in urb->transfer_flags if URB has SG list and this flag will tell stub driver to use SG list. After receiving urb from stub driver, vhci clear URB_DMA_MAP_SG flag to avoid unnecessary DMA unmapping in HCD. vhci sends each SG list entry to stub driver. Then, stub driver sees the total length of the buffer and allocates SG table and pages according to the total buffer length calling sgl_alloc(). After stub driver receives completed URB, it again sends each SG list entry to vhci. If the server's host controller doesn't support SG, stub driver breaks a single SG request into several URBs and submits them to the server's host controller. When all the split URBs are completed, stub driver reassembles the URBs into a single return command and sends it to vhci. Moreover, in the situation where vhci supports SG, but stub driver does not, or vice versa, usbip works normally. Because there is no protocol modification, there is no problem in communication between server and client even if the one has a kernel without SG support. In the case of vhci supports SG and stub driver doesn't, because vhci sends only the total length of the buffer to stub driver as it did before the patch applied, stub driver only needs to allocate the required length of buffers using only kmalloc() regardless of whether vhci supports SG or not. But stub driver has to allocate buffer with kmalloc() as much as the total length of SG buffer which is quite huge when vhci sends SG request, so it has overhead in buffer allocation in this situation. If stub driver needs to send data buffer to vhci because of IN pipe, stub driver also sends only total length of buffer as metadata and then sends real data as vhci does. Then vhci receive data from stub driver and store it to the corresponding buffer of SG list entry. And for the case of stub driver supports SG and vhci doesn't, since the USB storage driver checks that vhci doesn't support SG and sends the request to stub driver by splitting the SG list into multiple URBs, stub driver allocates a buffer for each URB with kmalloc() as it did before this patch. * Test environment Test uses two difference machines and two different kernel version to make mismatch situation between the client and the server where vhci supports SG, but stub driver does not, or vice versa. All tests are conducted in both full SG support that both vhci and stub support SG and half SG support that is the mismatch situation. Test kernel version is 5.3-rc6 with commit "usb: add a HCD_DMA flag instead of guestimating DMA capabilities" to avoid unnecessary DMA mapping and unmapping. - Test kernel version - 5.3-rc6 with SG support - 5.1.20-200.fc29.x86_64 without SG support * SG support test - Test devices - Super-speed storage device - SanDisk Ultra USB 3.0 - High-speed storage device - SMI corporation USB 2.0 flash drive - Test description Test read and write operation of mass storage device that uses the BULK transfer. In test, the client reads and writes files whose size is over 1G and it works normally. * Regression test - Test devices - Super-speed device - Logitech Brio webcam - High-speed device - Logitech C920 HD Pro webcam - Full-speed device - Logitech bluetooth mouse - Britz BR-Orion speaker - Low-speed device - Logitech wired mouse - Test description Moving and click test for mouse. To test the webcam, use gnome-cheese. To test the speaker, play music and video on the client. All works normally. * VUDC compatibility test VUDC also works well with this patch. Tests are done with two USB gadget created by CONFIGFS USB gadget. Both use the BULK pipe. 1. Serial gadget 2. Mass storage gadget - Serial gadget test Serial gadget on the host sends and receives data using cat command on the /dev/ttyGS. The client uses minicom to communicate with the serial gadget. - Mass storage gadget test After connecting the gadget with vhci, use "dd" to test read and write operation on the client side. Read - dd if=/dev/sd iflag=direct of=/dev/null bs=1G count=1 Write - dd if= iflag=direct of=/dev/sd bs=1G count=1 Signed-off-by: Suwan Kim Acked-by: Shuah khan Link: https://lore.kernel.org/r/20190828032741.12234-1-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/Kconfig | 1 + drivers/usb/usbip/stub.h | 7 +- drivers/usb/usbip/stub_main.c | 57 ++++++--- drivers/usb/usbip/stub_rx.c | 204 ++++++++++++++++++++++--------- drivers/usb/usbip/stub_tx.c | 99 +++++++++++---- drivers/usb/usbip/usbip_common.c | 59 ++++++--- drivers/usb/usbip/vhci_hcd.c | 12 +- drivers/usb/usbip/vhci_rx.c | 3 + drivers/usb/usbip/vhci_tx.c | 66 ++++++++-- 9 files changed, 381 insertions(+), 127 deletions(-) diff --git a/drivers/usb/usbip/Kconfig b/drivers/usb/usbip/Kconfig index a20b65cb6678..8276a20ecea7 100644 --- a/drivers/usb/usbip/Kconfig +++ b/drivers/usb/usbip/Kconfig @@ -2,6 +2,7 @@ config USBIP_CORE tristate "USB/IP support" depends on NET select USB_COMMON + select SGL_ALLOC ---help--- This enables pushing USB packets over IP to allow remote machines direct access to USB devices. It provides the diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h index 84c0599b45b7..d9d14d875949 100644 --- a/drivers/usb/usbip/stub.h +++ b/drivers/usb/usbip/stub.h @@ -66,7 +66,11 @@ struct stub_priv { unsigned long seqnum; struct list_head list; struct stub_device *sdev; - struct urb *urb; + struct urb **urbs; + struct scatterlist *sgl; + int num_urbs; + int completed_urbs; + int urb_status; int unlinking; }; @@ -100,6 +104,7 @@ extern struct usb_device_driver stub_driver; struct bus_id_priv *get_busid_priv(const char *busid); void put_busid_priv(struct bus_id_priv *bid); int del_match_busid(char *busid); +void stub_free_priv_and_urb(struct stub_priv *priv); void stub_device_cleanup_urbs(struct stub_device *sdev); /* stub_rx.c */ diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 108dd65fbfbc..2dc662cf0694 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "usbip_common.h" #include "stub.h" @@ -297,13 +298,49 @@ static struct stub_priv *stub_priv_pop_from_listhead(struct list_head *listhead) struct stub_priv *priv, *tmp; list_for_each_entry_safe(priv, tmp, listhead, list) { - list_del(&priv->list); + list_del_init(&priv->list); return priv; } return NULL; } +void stub_free_priv_and_urb(struct stub_priv *priv) +{ + struct urb *urb; + int i; + + for (i = 0; i < priv->num_urbs; i++) { + urb = priv->urbs[i]; + + if (!urb) + return; + + kfree(urb->setup_packet); + urb->setup_packet = NULL; + + + if (urb->transfer_buffer && !priv->sgl) { + kfree(urb->transfer_buffer); + urb->transfer_buffer = NULL; + } + + if (urb->num_sgs) { + sgl_free(urb->sg); + urb->sg = NULL; + urb->num_sgs = 0; + } + + usb_free_urb(urb); + } + if (!list_empty(&priv->list)) + list_del(&priv->list); + if (priv->sgl) + sgl_free(priv->sgl); + kfree(priv->urbs); + kmem_cache_free(stub_priv_cache, priv); +} + static struct stub_priv *stub_priv_pop(struct stub_device *sdev) { unsigned long flags; @@ -330,25 +367,15 @@ done: void stub_device_cleanup_urbs(struct stub_device *sdev) { struct stub_priv *priv; - struct urb *urb; + int i; dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { - urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", - priv->seqnum); - usb_kill_urb(urb); + for (i = 0; i < priv->num_urbs; i++) + usb_kill_urb(priv->urbs[i]); - kmem_cache_free(stub_priv_cache, priv); - - kfree(urb->transfer_buffer); - urb->transfer_buffer = NULL; - - kfree(urb->setup_packet); - urb->setup_packet = NULL; - - usb_free_urb(urb); + stub_free_priv_and_urb(priv); } } diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index d47176f9c310..8812d3edade1 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "usbip_common.h" #include "stub.h" @@ -215,7 +216,7 @@ static void tweak_special_requests(struct urb *urb) static int stub_recv_cmd_unlink(struct stub_device *sdev, struct usbip_header *pdu) { - int ret; + int ret, i; unsigned long flags; struct stub_priv *priv; @@ -260,12 +261,14 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, * so a driver in a client host will know the failure * of the unlink request ? */ - ret = usb_unlink_urb(priv->urb); - if (ret != -EINPROGRESS) - dev_err(&priv->urb->dev->dev, - "failed to unlink a urb # %lu, ret %d\n", - priv->seqnum, ret); - + for (i = priv->completed_urbs; i < priv->num_urbs; i++) { + ret = usb_unlink_urb(priv->urbs[i]); + if (ret != -EINPROGRESS) + dev_err(&priv->urbs[i]->dev->dev, + "failed to unlink %d/%d urb of seqnum %lu, ret %d\n", + i + 1, priv->num_urbs, + priv->seqnum, ret); + } return 0; } @@ -450,14 +453,36 @@ static void masking_bogus_flags(struct urb *urb) urb->transfer_flags &= allowed; } +static int stub_recv_xbuff(struct usbip_device *ud, struct stub_priv *priv) +{ + int ret; + int i; + + for (i = 0; i < priv->num_urbs; i++) { + ret = usbip_recv_xbuff(ud, priv->urbs[i]); + if (ret < 0) + break; + } + + return ret; +} + static void stub_recv_cmd_submit(struct stub_device *sdev, struct usbip_header *pdu) { - int ret; struct stub_priv *priv; struct usbip_device *ud = &sdev->ud; struct usb_device *udev = sdev->udev; + struct scatterlist *sgl = NULL, *sg; + void *buffer = NULL; + unsigned long long buf_len; + int nents; + int num_urbs = 1; int pipe = get_pipe(sdev, pdu); + int use_sg = pdu->u.cmd_submit.transfer_flags & URB_DMA_MAP_SG; + int support_sg = 1; + int np = 0; + int ret, i; if (pipe == -1) return; @@ -466,76 +491,139 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, if (!priv) return; - /* setup a urb */ - if (usb_pipeisoc(pipe)) - priv->urb = usb_alloc_urb(pdu->u.cmd_submit.number_of_packets, - GFP_KERNEL); - else - priv->urb = usb_alloc_urb(0, GFP_KERNEL); - - if (!priv->urb) { - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; - } + buf_len = (unsigned long long)pdu->u.cmd_submit.transfer_buffer_length; /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0) { - priv->urb->transfer_buffer = - kzalloc(pdu->u.cmd_submit.transfer_buffer_length, - GFP_KERNEL); - if (!priv->urb->transfer_buffer) { - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; + if (buf_len) { + if (use_sg) { + sgl = sgl_alloc(buf_len, GFP_KERNEL, &nents); + if (!sgl) + goto err_malloc; + } else { + buffer = kzalloc(buf_len, GFP_KERNEL); + if (!buffer) + goto err_malloc; } } - /* copy urb setup packet */ - priv->urb->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, 8, - GFP_KERNEL); - if (!priv->urb->setup_packet) { - dev_err(&udev->dev, "allocate setup_packet\n"); - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; + /* Check if the server's HCD supports SG */ + if (use_sg && !udev->bus->sg_tablesize) { + /* + * If the server's HCD doesn't support SG, break a single SG + * request into several URBs and map each SG list entry to + * corresponding URB buffer. The previously allocated SG + * list is stored in priv->sgl (If the server's HCD support SG, + * SG list is stored only in urb->sg) and it is used as an + * indicator that the server split single SG request into + * several URBs. Later, priv->sgl is used by stub_complete() and + * stub_send_ret_submit() to reassemble the divied URBs. + */ + support_sg = 0; + num_urbs = nents; + priv->completed_urbs = 0; + pdu->u.cmd_submit.transfer_flags &= ~URB_DMA_MAP_SG; } - /* set other members from the base header of pdu */ - priv->urb->context = (void *) priv; - priv->urb->dev = udev; - priv->urb->pipe = pipe; - priv->urb->complete = stub_complete; + /* allocate urb array */ + priv->num_urbs = num_urbs; + priv->urbs = kmalloc_array(num_urbs, sizeof(*priv->urbs), GFP_KERNEL); + if (!priv->urbs) + goto err_urbs; - usbip_pack_pdu(pdu, priv->urb, USBIP_CMD_SUBMIT, 0); + /* setup a urb */ + if (support_sg) { + if (usb_pipeisoc(pipe)) + np = pdu->u.cmd_submit.number_of_packets; + priv->urbs[0] = usb_alloc_urb(np, GFP_KERNEL); + if (!priv->urbs[0]) + goto err_urb; - if (usbip_recv_xbuff(ud, priv->urb) < 0) + if (buf_len) { + if (use_sg) { + priv->urbs[0]->sg = sgl; + priv->urbs[0]->num_sgs = nents; + priv->urbs[0]->transfer_buffer = NULL; + } else { + priv->urbs[0]->transfer_buffer = buffer; + } + } + + /* copy urb setup packet */ + priv->urbs[0]->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, + 8, GFP_KERNEL); + if (!priv->urbs[0]->setup_packet) { + usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); + return; + } + + usbip_pack_pdu(pdu, priv->urbs[0], USBIP_CMD_SUBMIT, 0); + } else { + for_each_sg(sgl, sg, nents, i) { + priv->urbs[i] = usb_alloc_urb(0, GFP_KERNEL); + /* The URBs which is previously allocated will be freed + * in stub_device_cleanup_urbs() if error occurs. + */ + if (!priv->urbs[i]) + goto err_urb; + + usbip_pack_pdu(pdu, priv->urbs[i], USBIP_CMD_SUBMIT, 0); + priv->urbs[i]->transfer_buffer = sg_virt(sg); + priv->urbs[i]->transfer_buffer_length = sg->length; + } + priv->sgl = sgl; + } + + for (i = 0; i < num_urbs; i++) { + /* set other members from the base header of pdu */ + priv->urbs[i]->context = (void *) priv; + priv->urbs[i]->dev = udev; + priv->urbs[i]->pipe = pipe; + priv->urbs[i]->complete = stub_complete; + + /* no need to submit an intercepted request, but harmless? */ + tweak_special_requests(priv->urbs[i]); + + masking_bogus_flags(priv->urbs[i]); + } + + if (stub_recv_xbuff(ud, priv) < 0) return; - if (usbip_recv_iso(ud, priv->urb) < 0) + if (usbip_recv_iso(ud, priv->urbs[0]) < 0) return; - /* no need to submit an intercepted request, but harmless? */ - tweak_special_requests(priv->urb); - - masking_bogus_flags(priv->urb); /* urb is now ready to submit */ - ret = usb_submit_urb(priv->urb, GFP_KERNEL); + for (i = 0; i < priv->num_urbs; i++) { + ret = usb_submit_urb(priv->urbs[i], GFP_KERNEL); - if (ret == 0) - usbip_dbg_stub_rx("submit urb ok, seqnum %u\n", - pdu->base.seqnum); - else { - dev_err(&udev->dev, "submit_urb error, %d\n", ret); - usbip_dump_header(pdu); - usbip_dump_urb(priv->urb); + if (ret == 0) + usbip_dbg_stub_rx("submit urb ok, seqnum %u\n", + pdu->base.seqnum); + else { + dev_err(&udev->dev, "submit_urb error, %d\n", ret); + usbip_dump_header(pdu); + usbip_dump_urb(priv->urbs[i]); - /* - * Pessimistic. - * This connection will be discarded. - */ - usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT); + /* + * Pessimistic. + * This connection will be discarded. + */ + usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT); + break; + } } usbip_dbg_stub_rx("Leave\n"); + return; + +err_urb: + kfree(priv->urbs); +err_urbs: + kfree(buffer); + sgl_free(sgl); +err_malloc: + usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); } /* recv a pdu */ diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 96aa375b80d9..45c34a37432e 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -19,25 +19,11 @@ #include #include +#include #include "usbip_common.h" #include "stub.h" -static void stub_free_priv_and_urb(struct stub_priv *priv) -{ - struct urb *urb = priv->urb; - - kfree(urb->setup_packet); - urb->setup_packet = NULL; - - kfree(urb->transfer_buffer); - urb->transfer_buffer = NULL; - - list_del(&priv->list); - kmem_cache_free(stub_priv_cache, priv); - usb_free_urb(urb); -} - /* be in spin_lock_irqsave(&sdev->priv_lock, flags) */ void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum, __u32 status) @@ -99,6 +85,22 @@ void stub_complete(struct urb *urb) break; } + /* + * If the server breaks single SG request into the several URBs, the + * URBs must be reassembled before sending completed URB to the vhci. + * Don't wake up the tx thread until all the URBs are completed. + */ + if (priv->sgl) { + priv->completed_urbs++; + + /* Only save the first error status */ + if (urb->status && !priv->urb_status) + priv->urb_status = urb->status; + + if (priv->completed_urbs < priv->num_urbs) + return; + } + /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { @@ -170,18 +172,22 @@ static int stub_send_ret_submit(struct stub_device *sdev) size_t total_size = 0; while ((priv = dequeue_from_priv_tx(sdev)) != NULL) { - int ret; - struct urb *urb = priv->urb; + struct urb *urb = priv->urbs[0]; struct usbip_header pdu_header; struct usbip_iso_packet_descriptor *iso_buffer = NULL; struct kvec *iov = NULL; + struct scatterlist *sg; + u32 actual_length = 0; int iovnum = 0; + int ret; + int i; txsize = 0; memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); - if (urb->actual_length > 0 && !urb->transfer_buffer) { + if (urb->actual_length > 0 && !urb->transfer_buffer && + !urb->num_sgs) { dev_err(&sdev->udev->dev, "urb: actual_length %d transfer_buffer null\n", urb->actual_length); @@ -190,6 +196,11 @@ static int stub_send_ret_submit(struct stub_device *sdev) if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) iovnum = 2 + urb->number_of_packets; + else if (usb_pipein(urb->pipe) && urb->actual_length > 0 && + urb->num_sgs) + iovnum = 1 + urb->num_sgs; + else if (usb_pipein(urb->pipe) && priv->sgl) + iovnum = 1 + priv->num_urbs; else iovnum = 2; @@ -206,6 +217,15 @@ static int stub_send_ret_submit(struct stub_device *sdev) setup_ret_submit_pdu(&pdu_header, urb); usbip_dbg_stub_tx("setup txdata seqnum: %d\n", pdu_header.base.seqnum); + + if (priv->sgl) { + for (i = 0; i < priv->num_urbs; i++) + actual_length += priv->urbs[i]->actual_length; + + pdu_header.u.ret_submit.status = priv->urb_status; + pdu_header.u.ret_submit.actual_length = actual_length; + } + usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; @@ -214,12 +234,47 @@ static int stub_send_ret_submit(struct stub_device *sdev) txsize += sizeof(pdu_header); /* 2. setup transfer buffer */ - if (usb_pipein(urb->pipe) && + if (usb_pipein(urb->pipe) && priv->sgl) { + /* If the server split a single SG request into several + * URBs because the server's HCD doesn't support SG, + * reassemble the split URB buffers into a single + * return command. + */ + for (i = 0; i < priv->num_urbs; i++) { + iov[iovnum].iov_base = + priv->urbs[i]->transfer_buffer; + iov[iovnum].iov_len = + priv->urbs[i]->actual_length; + iovnum++; + } + txsize += actual_length; + } else if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) != PIPE_ISOCHRONOUS && urb->actual_length > 0) { - iov[iovnum].iov_base = urb->transfer_buffer; - iov[iovnum].iov_len = urb->actual_length; - iovnum++; + if (urb->num_sgs) { + unsigned int copy = urb->actual_length; + int size; + + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + if (copy == 0) + break; + + if (copy < sg->length) + size = copy; + else + size = sg->length; + + iov[iovnum].iov_base = sg_virt(sg); + iov[iovnum].iov_len = size; + + iovnum++; + copy -= size; + } + } else { + iov[iovnum].iov_base = urb->transfer_buffer; + iov[iovnum].iov_len = urb->actual_length; + iovnum++; + } txsize += urb->actual_length; } else if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 7f0d22131121..da03451328cd 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -695,8 +695,12 @@ EXPORT_SYMBOL_GPL(usbip_pad_iso); /* some members of urb must be substituted before. */ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) { - int ret; + struct scatterlist *sg; + int ret = 0; + int recv; int size; + int copy; + int i; if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { /* the direction of urb must be OUT. */ @@ -716,29 +720,48 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) if (!(size > 0)) return 0; - if (size > urb->transfer_buffer_length) { + if (size > urb->transfer_buffer_length) /* should not happen, probably malicious packet */ - if (ud->side == USBIP_STUB) { - usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); - return 0; - } else { - usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); - return -EPIPE; - } - } + goto error; - ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); - if (ret != size) { - dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); - if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { - usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); - } else { - usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); - return -EPIPE; + if (urb->num_sgs) { + copy = size; + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + int recv_size; + + if (copy < sg->length) + recv_size = copy; + else + recv_size = sg->length; + + recv = usbip_recv(ud->tcp_socket, sg_virt(sg), + recv_size); + + if (recv != recv_size) + goto error; + + copy -= recv; + ret += recv; } + + if (ret != size) + goto error; + } else { + ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); + if (ret != size) + goto error; } return ret; + +error: + dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); + if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) + usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); + else + usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); + + return -EPIPE; } EXPORT_SYMBOL_GPL(usbip_recv_xbuff); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 14b17ea81deb..253e0affd396 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -716,7 +716,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag } vdev = &vhci_hcd->vdev[portnum-1]; - if (!urb->transfer_buffer && urb->transfer_buffer_length) { + if (!urb->transfer_buffer && !urb->num_sgs && + urb->transfer_buffer_length) { dev_dbg(dev, "Null URB transfer buffer\n"); return -EINVAL; } @@ -1162,6 +1163,15 @@ static int vhci_setup(struct usb_hcd *hcd) hcd->speed = HCD_USB3; hcd->self.root_hub->speed = USB_SPEED_SUPER; } + + /* + * Support SG. + * sg_tablesize is an arbitrary value to alleviate memory pressure + * on the host. + */ + hcd->self.sg_tablesize = 32; + hcd->self.no_sg_constraint = 1; + return 0; } diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 1343037d00f9..3f998b605f03 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -104,6 +104,9 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); + if (urb->num_sgs) + urb->transfer_flags &= ~URB_DMA_MAP_SG; + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index a9a663a578b6..93c139d884f3 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -19,6 +19,7 @@ #include #include +#include #include "usbip_common.h" #include "vhci.h" @@ -64,19 +65,23 @@ static struct vhci_priv *dequeue_from_priv_tx(struct vhci_device *vdev) static int vhci_send_cmd_submit(struct vhci_device *vdev) { + struct usbip_iso_packet_descriptor *iso_buffer = NULL; struct vhci_priv *priv = NULL; + struct scatterlist *sg; struct msghdr msg; - struct kvec iov[3]; + struct kvec *iov; size_t txsize; size_t total_size = 0; + int iovnum; + int err = -ENOMEM; + int i; while ((priv = dequeue_from_priv_tx(vdev)) != NULL) { int ret; struct urb *urb = priv->urb; struct usbip_header pdu_header; - struct usbip_iso_packet_descriptor *iso_buffer = NULL; txsize = 0; memset(&pdu_header, 0, sizeof(pdu_header)); @@ -86,18 +91,45 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", priv->seqnum); + if (urb->num_sgs && usb_pipeout(urb->pipe)) + iovnum = 2 + urb->num_sgs; + else + iovnum = 3; + + iov = kcalloc(iovnum, sizeof(*iov), GFP_KERNEL); + if (!iov) { + usbip_event_add(&vdev->ud, SDEV_EVENT_ERROR_MALLOC); + return -ENOMEM; + } + + if (urb->num_sgs) + urb->transfer_flags |= URB_DMA_MAP_SG; + /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); usbip_header_correct_endian(&pdu_header, 1); + iovnum = 0; - iov[0].iov_base = &pdu_header; - iov[0].iov_len = sizeof(pdu_header); + iov[iovnum].iov_base = &pdu_header; + iov[iovnum].iov_len = sizeof(pdu_header); txsize += sizeof(pdu_header); + iovnum++; /* 2. setup transfer buffer */ if (!usb_pipein(urb->pipe) && urb->transfer_buffer_length > 0) { - iov[1].iov_base = urb->transfer_buffer; - iov[1].iov_len = urb->transfer_buffer_length; + if (urb->num_sgs && + !usb_endpoint_xfer_isoc(&urb->ep->desc)) { + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + iov[iovnum].iov_base = sg_virt(sg); + iov[iovnum].iov_len = sg->length; + iovnum++; + } + } else { + iov[iovnum].iov_base = urb->transfer_buffer; + iov[iovnum].iov_len = + urb->transfer_buffer_length; + iovnum++; + } txsize += urb->transfer_buffer_length; } @@ -109,23 +141,26 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) if (!iso_buffer) { usbip_event_add(&vdev->ud, SDEV_EVENT_ERROR_MALLOC); - return -1; + goto err_iso_buffer; } - iov[2].iov_base = iso_buffer; - iov[2].iov_len = len; + iov[iovnum].iov_base = iso_buffer; + iov[iovnum].iov_len = len; + iovnum++; txsize += len; } - ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 3, txsize); + ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, iovnum, + txsize); if (ret != txsize) { pr_err("sendmsg failed!, ret=%d for %zd\n", ret, txsize); - kfree(iso_buffer); usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP); - return -1; + err = -EPIPE; + goto err_tx; } + kfree(iov); kfree(iso_buffer); usbip_dbg_vhci_tx("send txdata\n"); @@ -133,6 +168,13 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) } return total_size; + +err_tx: + kfree(iso_buffer); +err_iso_buffer: + kfree(iov); + + return err; } static struct vhci_unlink *dequeue_from_unlink_tx(struct vhci_device *vdev) From 2db8e5be929ea01266e24c757a0a81d2768c5d20 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Thu, 4 Jul 2019 20:34:28 +0530 Subject: [PATCH 0378/3715] PCI: tegra: Enable Relaxed Ordering only for Tegra20 & Tegra30 commit 7be142caabc4780b13a522c485abc806de5c4114 upstream. The PCI Tegra controller conversion to a device tree configurable driver in commit d1523b52bff3 ("PCI: tegra: Move PCIe driver to drivers/pci/host") implied that code for the driver can be compiled in for a kernel supporting multiple platforms. Unfortunately, a blind move of the code did not check that some of the quirks that were applied in arch/arm (eg enabling Relaxed Ordering on all PCI devices - since the quirk hook erroneously matches PCI_ANY_ID for both Vendor-ID and Device-ID) are now applied in all kernels that compile the PCI Tegra controlled driver, DT and ACPI alike. This is completely wrong, in that enablement of Relaxed Ordering is only required by default in Tegra20 platforms as described in the Tegra20 Technical Reference Manual (available at https://developer.nvidia.com/embedded/downloads#?search=tegra%202 in Section 34.1, where it is mentioned that Relaxed Ordering bit needs to be enabled in its root ports to avoid deadlock in hardware) and in the Tegra30 platforms for the same reasons (unfortunately not documented in the TRM). There is no other strict requirement on PCI devices Relaxed Ordering enablement on any other Tegra platforms or PCI host bridge driver. Fix this quite upsetting situation by limiting the vendor and device IDs to which the Relaxed Ordering quirk applies to the root ports in question, reported above. Signed-off-by: Vidya Sagar [lorenzo.pieralisi@arm.com: completely rewrote the commit log/fixes tag] Signed-off-by: Lorenzo Pieralisi Acked-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-tegra.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 8efd086c57c9..5bf874f30466 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -607,12 +607,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class); -/* Tegra PCIE requires relaxed ordering */ +/* Tegra20 and Tegra30 PCIE requires relaxed ordering */ static void tegra_pcie_relax_enable(struct pci_dev *dev) { pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); } -DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_relax_enable); static int tegra_pcie_request_resources(struct tegra_pcie *pcie) { From 2406e8896e3416c5185d10d804d546e28ecf4773 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 26 Sep 2019 16:20:58 +0530 Subject: [PATCH 0379/3715] dmaengine: xilinx_dma: Fix control reg update in vdma_channel_set_config [ Upstream commit 6c6de1ddb1be3840f2ed5cc9d009a622720940c9 ] In vdma_channel_set_config clear the delay, frame count and master mask before updating their new values. It avoids programming incorrect state when input parameters are different from default. Signed-off-by: Radhey Shyam Pandey Acked-by: Appana Durga Kedareswara rao Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/1569495060-18117-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 8722bcba489d..2db352308e5c 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -72,6 +72,9 @@ #define XILINX_DMA_DMACR_CIRC_EN BIT(1) #define XILINX_DMA_DMACR_RUNSTOP BIT(0) #define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) +#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16) +#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8) #define XILINX_DMA_REG_DMASR 0x0004 #define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) @@ -2057,8 +2060,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.gen_lock = cfg->gen_lock; chan->config.master = cfg->master; + dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN; if (cfg->gen_lock && chan->genlock) { dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK; dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; } @@ -2072,11 +2077,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.delay = cfg->delay; if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK; dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; chan->config.coalesc = cfg->coalesc; } if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK; dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; chan->config.delay = cfg->delay; } From 54b9b5a469a5fbd16eeb68222b86eaa43dacffd0 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 16 Oct 2019 08:15:59 +0800 Subject: [PATCH 0380/3715] HID: intel-ish-hid: fix wrong error handling in ishtp_cl_alloc_tx_ring() [ Upstream commit 16ff7bf6dbcc6f77d2eec1ac9120edf44213c2f1 ] When allocating tx ring buffers failed, should free tx buffers, not rx buffers. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ishtp/client-buffers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c index b9b917d2d50d..c41dbb167c91 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c +++ b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c @@ -90,7 +90,7 @@ int ishtp_cl_alloc_tx_ring(struct ishtp_cl *cl) return 0; out: dev_err(&cl->device->dev, "error in allocating Tx pool\n"); - ishtp_cl_free_rx_ring(cl); + ishtp_cl_free_tx_ring(cl); return -ENOMEM; } From 9f8d038bb276418cd128b7ebe0f2c8690916c446 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 8 Oct 2019 00:07:30 +0300 Subject: [PATCH 0381/3715] RDMA/qedr: Fix reported firmware version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b806c94ee44e53233b8ce6c92d9078d9781786a5 ] Remove spaces from the reported firmware version string. Actual value: $ cat /sys/class/infiniband/qedr0/fw_ver 8. 37. 7. 0 Expected value: $ cat /sys/class/infiniband/qedr0/fw_ver 8.37.7.0 Fixes: ec72fce401c6 ("qedr: Add support for RoCE HW init") Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Link: https://lore.kernel.org/r/20191007210730.7173-1-kamalheib1@gmail.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ddb05b42e5e6..3e48ed64760b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -73,7 +73,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } From d905f0cea5e0f29b8aced5a38b1ce243051887de Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 22:20:34 -0500 Subject: [PATCH 0382/3715] net/mlx5: prevent memory leak in mlx5_fpga_conn_create_cq [ Upstream commit c8c2a057fdc7de1cd16f4baa51425b932a42eb39 ] In mlx5_fpga_conn_create_cq if mlx5_vector2eqn fails the allocated memory should be released. Fixes: 537a50574175 ("net/mlx5: FPGA, Add high-speed connection routines") Signed-off-by: Navid Emamdoost Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c4392f741c5f..5212428031a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -462,8 +462,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) } err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); - if (err) + if (err) { + kvfree(in); goto err_cqwq; + } cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); From ec663d07c0d7b40b04aacf348fc648ca36c89994 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 18 Oct 2019 16:04:58 +0200 Subject: [PATCH 0383/3715] scsi: qla2xxx: fixup incorrect usage of host_byte [ Upstream commit 66cf50e65b183c863825f5c28a818e3f47a72e40 ] DRIVER_ERROR is a a driver byte setting, not a host byte. The qla2xxx driver should rather return DID_ERROR here to be in line with the other drivers. Link: https://lore.kernel.org/r/20191018140458.108278-1-hare@suse.de Signed-off-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_bsg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 2ea0ef93f5cb..7472d3882ad4 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -258,7 +258,7 @@ qla2x00_process_els(struct bsg_job *bsg_job) srb_t *sp; const char *type; int req_sg_cnt, rsp_sg_cnt; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); uint16_t nextlid = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { @@ -433,7 +433,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); int req_sg_cnt, rsp_sg_cnt; uint16_t loop_id; struct fc_port *fcport; @@ -1951,7 +1951,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); struct qla_mt_iocb_rqst_fx00 *piocb_rqst; srb_t *sp; int req_sg_cnt = 0, rsp_sg_cnt = 0; From 8466db95431f39fd8eec0ce057eab5e4711307de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Oct 2019 16:34:19 +0300 Subject: [PATCH 0384/3715] RDMA/uverbs: Prevent potential underflow [ Upstream commit a9018adfde809d44e71189b984fa61cc89682b5e ] The issue is in drivers/infiniband/core/uverbs_std_types_cq.c in the UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE) function. We check that: if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { But we don't check if "attr.comp_vector" is negative. It could potentially lead to an array underflow. My concern would be where cq->vector is used in the create_cq() function from the cxgb4 driver. And really "attr.comp_vector" is appears as a u32 to user space so that's the right type to use. Fixes: 9ee79fce3642 ("IB/core: Add completion queue (cq) object actions") Link: https://lore.kernel.org/r/20191011133419.GA22905@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/uverbs.h | 2 +- include/rdma/ib_verbs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 37c8903e7fd0..8d79a48ccd38 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -87,7 +87,7 @@ struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device *dev; struct ib_device __rcu *ib_dev; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b8a5118b6a42..4a4319331989 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -306,7 +306,7 @@ enum ib_cq_creation_flags { struct ib_cq_init_attr { unsigned int cqe; - int comp_vector; + u32 comp_vector; u32 flags; }; From 107e5b0b9ed11d99e409e4a3e120237710c39e95 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 21 Oct 2019 12:01:57 +0200 Subject: [PATCH 0385/3715] net: openvswitch: free vport unless register_netdevice() succeeds [ Upstream commit 9464cc37f3671ee69cb1c00662b5e1f113a96b23 ] syzbot found the following crash on: HEAD commit: 1e78030e Merge tag 'mmc-v5.3-rc1' of git://git.kernel.org/.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=148d3d1a600000 kernel config: https://syzkaller.appspot.com/x/.config?x=30cef20daf3e9977 dashboard link: https://syzkaller.appspot.com/bug?extid=13210896153522fe1ee5 compiler: gcc (GCC) 9.0.0 20181231 (experimental) syz repro: https://syzkaller.appspot.com/x/repro.syz?x=136aa8c4600000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=109ba792600000 ===================================================================== BUG: memory leak unreferenced object 0xffff8881207e4100 (size 128): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 00 70 16 18 81 88 ff ff 80 af 8c 22 81 88 ff ff .p.........".... 00 b6 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 ..#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 BUG: memory leak unreferenced object 0xffff88811723b600 (size 64): comm "syz-executor032", pid 7014, jiffies 4294944027 (age 13.830s) hex dump (first 32 bytes): 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 02 00 00 00 05 35 82 c1 .............5.. backtrace: [<00000000352f46d8>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000352f46d8>] slab_post_alloc_hook mm/slab.h:522 [inline] [<00000000352f46d8>] slab_alloc mm/slab.c:3319 [inline] [<00000000352f46d8>] __do_kmalloc mm/slab.c:3653 [inline] [<00000000352f46d8>] __kmalloc+0x169/0x300 mm/slab.c:3664 [<000000008e48f3d1>] kmalloc include/linux/slab.h:557 [inline] [<000000008e48f3d1>] ovs_vport_set_upcall_portids+0x54/0xd0 net/openvswitch/vport.c:343 [<00000000541e4f4a>] ovs_vport_alloc+0x7f/0xf0 net/openvswitch/vport.c:139 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 BUG: memory leak unreferenced object 0xffff8881228ca500 (size 128): comm "syz-executor032", pid 7015, jiffies 4294944622 (age 7.880s) hex dump (first 32 bytes): 00 f0 27 18 81 88 ff ff 80 ac 8c 22 81 88 ff ff ..'........".... 40 b7 23 17 81 88 ff ff 00 00 00 00 00 00 00 00 @.#............. backtrace: [<000000000eb78212>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb78212>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000000eb78212>] slab_alloc mm/slab.c:3319 [inline] [<000000000eb78212>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000006ea6c6>] kmalloc include/linux/slab.h:552 [inline] [<00000000006ea6c6>] kzalloc include/linux/slab.h:748 [inline] [<00000000006ea6c6>] ovs_vport_alloc+0x37/0xf0 net/openvswitch/vport.c:130 [<00000000f9a04a7d>] internal_dev_create+0x24/0x1d0 net/openvswitch/vport-internal_dev.c:164 [<0000000056ee7c13>] ovs_vport_add+0x81/0x190 net/openvswitch/vport.c:199 [<000000005434efc7>] new_vport+0x19/0x80 net/openvswitch/datapath.c:194 [<00000000b7b253f1>] ovs_dp_cmd_new+0x22f/0x410 net/openvswitch/datapath.c:1614 [<00000000e0988518>] genl_family_rcv_msg+0x2ab/0x5b0 net/netlink/genetlink.c:629 [<00000000d0cc9347>] genl_rcv_msg+0x54/0x9c net/netlink/genetlink.c:654 [<000000006694b647>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<0000000088381f37>] genl_rcv+0x29/0x40 net/netlink/genetlink.c:665 [<00000000dad42a47>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000dad42a47>] netlink_unicast+0x1ec/0x2d0 net/netlink/af_netlink.c:1328 [<0000000067e6b079>] netlink_sendmsg+0x270/0x480 net/netlink/af_netlink.c:1917 [<00000000aab08a47>] sock_sendmsg_nosec net/socket.c:637 [inline] [<00000000aab08a47>] sock_sendmsg+0x54/0x70 net/socket.c:657 [<000000004cb7c11d>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2311 [<00000000c4901c63>] __sys_sendmsg+0x80/0xf0 net/socket.c:2356 [<00000000c10abb2d>] __do_sys_sendmsg net/socket.c:2365 [inline] [<00000000c10abb2d>] __se_sys_sendmsg net/socket.c:2363 [inline] [<00000000c10abb2d>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2363 ===================================================================== The function in net core, register_netdevice(), may fail with vport's destruction callback either invoked or not. After commit 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed."), the duty to destroy vport is offloaded from the driver OTOH, which ends up in the memory leak reported. It is fixed by releasing vport unless device is registered successfully. To do that, the callback assignment is defered until device is registered. Reported-by: syzbot+13210896153522fe1ee5@syzkaller.appspotmail.com Fixes: 309b66970ee2 ("net: openvswitch: do not free vport if register_netdevice() is failed.") Cc: Taehee Yoo Cc: Greg Rose Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Ying Xue Cc: Andrey Konovalov Signed-off-by: Hillf Danton Acked-by: Pravin B Shelar [sbrivio: this was sent to dev@openvswitch.org and never made its way to netdev -- resending original patch] Signed-off-by: Stefano Brivio Reviewed-by: Greg Rose Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/vport-internal_dev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index b9377afeaba4..1c09ad457d2a 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -156,7 +156,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->needs_free_netdev = true; - netdev->priv_destructor = internal_dev_destructor; + netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; @@ -178,7 +178,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) struct internal_dev *internal_dev; struct net_device *dev; int err; - bool free_vport = true; vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms); if (IS_ERR(vport)) { @@ -210,10 +209,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); - if (err) { - free_vport = false; + if (err) goto error_unlock; - } + vport->dev->priv_destructor = internal_dev_destructor; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -227,8 +225,7 @@ error_unlock: error_free_netdev: free_netdev(dev); error_free_vport: - if (free_vport) - ovs_vport_free(vport); + ovs_vport_free(vport); error: return ERR_PTR(err); } From b23937d01b034acb1001a2091639de65699c0da1 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 22 Oct 2019 09:21:12 +0200 Subject: [PATCH 0386/3715] scsi: lpfc: Honor module parameter lpfc_use_adisc [ Upstream commit 0fd103ccfe6a06e40e2d9d8c91d96332cc9e1239 ] The initial lpfc_desc_set_adisc implementation in commit dea3101e0a5c ("lpfc: add Emulex FC driver version 8.0.28") enabled ADISC if cfg_use_adisc && RSCN_MODE && FCP_2_DEVICE In commit 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of SLI-3") this changed to (cfg_use_adisc && RSC_MODE) || FCP_2_DEVICE and later in commit ffc954936b13 ("[SCSI] lpfc 8.3.13: FC Discovery Fixes and enhancements.") to (cfg_use_adisc && RSC_MODE) || (FCP_2_DEVICE && FCP_TARGET) A customer reports that after a devloss, an ADISC failure is logged. It turns out the ADISC flag is set even the user explicitly set lpfc_use_adisc = 0. [Sat Dec 22 22:55:58 2018] lpfc 0000:82:00.0: 2:(0):0203 Devloss timeout on WWPN 50:01:43:80:12:8e:40:20 NPort x05df00 Data: x82000000 x8 xa [Sat Dec 22 23:08:20 2018] lpfc 0000:82:00.0: 2:(0):2755 ADISC failure DID:05DF00 Status:x9/x70000 [mkp: fixed Hannes' email] Fixes: 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of SLI-3") Cc: Dick Kennedy Cc: James Smart Link: https://lore.kernel.org/r/20191022072112.132268-1-dwagner@suse.de Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nportdisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 36fb549eb4e8..a0658d158228 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -809,9 +809,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (!(vport->fc_flag & FC_PT2PT)) { /* Check config parameter use-adisc or FCP-2 */ - if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) || ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) && - (ndlp->nlp_type & NLP_FCP_TARGET))) { + (ndlp->nlp_type & NLP_FCP_TARGET)))) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; spin_unlock_irq(shost->host_lock); From a599c315fc6118e69c98686b695eec252a62ef65 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Tue, 22 Oct 2019 12:36:42 -0700 Subject: [PATCH 0387/3715] scsi: qla2xxx: Initialized mailbox to prevent driver load failure [ Upstream commit c2ff2a36eff60efb5e123c940115216d6bf65684 ] This patch fixes issue with Gen7 adapter in a blade environment where one of the ports will not be detected by driver. Firmware expects mailbox 11 to be set or cleared by driver for newer ISP. Following message is seen in the log file: [ 18.810892] qla2xxx [0000:d8:00.0]-1820:1: **** Failed=102 mb[0]=4005 mb[1]=37 mb[2]=20 mb[3]=8 [ 18.819596] cmd=2 **** [mkp: typos] Link: https://lore.kernel.org/r/20191022193643.7076-2-hmadhani@marvell.com Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 929ec087b8eb..459481ce5872 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -624,6 +624,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) mcp->mb[2] = LSW(risc_addr); mcp->mb[3] = 0; mcp->mb[4] = 0; + mcp->mb[11] = 0; ha->flags.using_lr_setting = 0; if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha)) { @@ -667,7 +668,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) if (ha->flags.exchoffld_enabled) mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD; - mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1; + mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11; mcp->in_mb |= MBX_3 | MBX_2 | MBX_1; } else { mcp->mb[1] = LSW(risc_addr); From 78ec9c409e932b0a1a198cd8e20a3a66e346d72d Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 19 Oct 2019 17:34:35 +0200 Subject: [PATCH 0388/3715] ipvs: don't ignore errors in case refcounting ip_vs module fails [ Upstream commit 62931f59ce9cbabb934a431f48f2f1f441c605ac ] if the IPVS module is removed while the sync daemon is starting, there is a small gap where try_module_get() might fail getting the refcount inside ip_vs_use_count_inc(). Then, the refcounts of IPVS module are unbalanced, and the subsequent call to stop_sync_thread() causes the following splat: WARNING: CPU: 0 PID: 4013 at kernel/module.c:1146 module_put.part.44+0x15b/0x290 Modules linked in: ip_vs(-) nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul ext4 mbcache jbd2 ghash_clmulni_intel snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev pcspkr snd_timer virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk failover virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ata_piix ttm crc32c_intel serio_raw drm virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: nf_defrag_ipv6] CPU: 0 PID: 4013 Comm: modprobe Tainted: G W 5.4.0-rc1.upstream+ #741 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:module_put.part.44+0x15b/0x290 Code: 04 25 28 00 00 00 0f 85 18 01 00 00 48 83 c4 68 5b 5d 41 5c 41 5d 41 5e 41 5f c3 89 44 24 28 83 e8 01 89 c5 0f 89 57 ff ff ff <0f> 0b e9 78 ff ff ff 65 8b 1d 67 83 26 4a 89 db be 08 00 00 00 48 RSP: 0018:ffff888050607c78 EFLAGS: 00010297 RAX: 0000000000000003 RBX: ffffffffc1420590 RCX: ffffffffb5db0ef9 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffc1420590 RBP: 00000000ffffffff R08: fffffbfff82840b3 R09: fffffbfff82840b3 R10: 0000000000000001 R11: fffffbfff82840b2 R12: 1ffff1100a0c0f90 R13: ffffffffc1420200 R14: ffff88804f533300 R15: ffff88804f533ca0 FS: 00007f8ea9720740(0000) GS:ffff888053800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3245abe000 CR3: 000000004c28a006 CR4: 00000000001606f0 Call Trace: stop_sync_thread+0x3a3/0x7c0 [ip_vs] ip_vs_sync_net_cleanup+0x13/0x50 [ip_vs] ops_exit_list.isra.5+0x94/0x140 unregister_pernet_operations+0x29d/0x460 unregister_pernet_device+0x26/0x60 ip_vs_cleanup+0x11/0x38 [ip_vs] __x64_sys_delete_module+0x2d5/0x400 do_syscall_64+0xa5/0x4e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f8ea8bf0db7 Code: 73 01 c3 48 8b 0d b9 80 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 80 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffcd38d2fe8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000002436240 RCX: 00007f8ea8bf0db7 RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00000000024362a8 RBP: 0000000000000000 R08: 00007f8ea8eba060 R09: 00007f8ea8c658a0 R10: 00007ffcd38d2a60 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000001 R14: 00000000024362a8 R15: 0000000000000000 irq event stamp: 4538 hardirqs last enabled at (4537): [] quarantine_put+0x9e/0x170 hardirqs last disabled at (4538): [] trace_hardirqs_off_thunk+0x1a/0x20 softirqs last enabled at (4522): [] sk_common_release+0x169/0x2d0 softirqs last disabled at (4520): [] sk_common_release+0xbe/0x2d0 Check the return value of ip_vs_use_count_inc() and let its caller return proper error. Inside do_ip_vs_set_ctl() the module is already refcounted, we don't need refcount/derefcount there. Finally, in register_ip_vs_app() and start_sync_thread(), take the module refcount earlier and ensure it's released in the error path. Change since v1: - better return values in case of failure of ip_vs_use_count_inc(), thanks to Julian Anastasov - no need to increase/decrease the module refcount in ip_vs_set_ctl(), thanks to Julian Anastasov Signed-off-by: Davide Caratti Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_app.c | 12 ++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 14 ++++---------- net/netfilter/ipvs/ip_vs_pe.c | 3 ++- net/netfilter/ipvs/ip_vs_sched.c | 3 ++- net/netfilter/ipvs/ip_vs_sync.c | 13 ++++++++++--- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 299edc6add5a..363475b246f6 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -198,21 +198,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app * mutex_lock(&__ip_vs_app_mutex); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) { + err = -ENOENT; + goto out_unlock; + } + list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); - /* increase the module use count */ - ip_vs_use_count_inc(); out_unlock: mutex_unlock(&__ip_vs_app_mutex); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4648dccebf59..6155fd6386bf 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1197,7 +1197,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service *svc = NULL; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; /* Lookup the scheduler by 'u->sched_name' */ if (strcmp(u->sched_name, "none")) { @@ -2395,9 +2396,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (copy_from_user(arg, user, len) != 0) return -EFAULT; - /* increase the module use count */ - ip_vs_use_count_inc(); - /* Handle daemons since they have another lock */ if (cmd == IP_VS_SO_SET_STARTDAEMON || cmd == IP_VS_SO_SET_STOPDAEMON) { @@ -2410,13 +2408,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)) <= 0) - goto out_dec; + return ret; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { ret = stop_sync_thread(ipvs, dm->state); } - goto out_dec; + return ret; } mutex_lock(&__ip_vs_mutex); @@ -2511,10 +2509,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) out_unlock: mutex_unlock(&__ip_vs_mutex); - out_dec: - /* decrease the module use count */ - ip_vs_use_count_dec(); - return ret; } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 0df17caa8af6..714e7e05c102 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -67,7 +67,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) struct ip_vs_pe *tmp; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index a2ff7d746ebf..3bd0ff36dc41 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -184,7 +184,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b578ebb3d7ef..b373e053ff9a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1771,6 +1771,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1901,9 +1905,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); - /* increase the module use count */ - ip_vs_use_count_inc(); - return 0; out: @@ -1933,11 +1934,17 @@ out: } kfree(ti); } + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; } From 3a0018ef9628b70fdb6d5a30c8d8e17f6ac1f9e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 09:53:03 -0700 Subject: [PATCH 0389/3715] ipvs: move old_secure_tcp into struct netns_ipvs [ Upstream commit c24b75e0f9239e78105f81c5f03a751641eb07ef ] syzbot reported the following issue : BUG: KCSAN: data-race in update_defense_level / update_defense_level read to 0xffffffff861a6260 of 4 bytes by task 3006 on cpu 1: update_defense_level+0x621/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:177 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 write to 0xffffffff861a6260 of 4 bytes by task 7333 on cpu 0: update_defense_level+0xa62/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:205 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7333 Comm: kworker/0:5 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events defense_work_handler Indeed, old_secure_tcp is currently a static variable, while it needs to be a per netns variable. Fixes: a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Simon Horman Signed-off-by: Sasha Levin --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 82bc9f0e8a76..f4e5ac8aa366 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -891,6 +891,7 @@ struct netns_ipvs { struct delayed_work defense_work; /* Work handler */ int drop_rate; int drop_counter; + int old_secure_tcp; atomic_t dropentry; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6155fd6386bf..5ec80818ace2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -98,7 +98,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net, static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; - static int old_secure_tcp = 0; int availmem; int nomem; int to_change = -1; @@ -179,35 +178,35 @@ static void update_defense_level(struct netns_ipvs *ipvs) spin_lock(&ipvs->securetcp_lock); switch (ipvs->sysctl_secure_tcp) { case 0: - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; break; case 1: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; ipvs->sysctl_secure_tcp = 2; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; } break; case 2: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; ipvs->sysctl_secure_tcp = 1; } break; case 3: - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; break; } - old_secure_tcp = ipvs->sysctl_secure_tcp; + ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, ipvs->sysctl_secure_tcp > 1); From addfc90b11df2c7fa4a8aa94ff2c113fda76710f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:52 +0000 Subject: [PATCH 0390/3715] bonding: fix unexpected IFF_BONDING bit unset [ Upstream commit 65de65d9033750d2cf1b336c9d6e9da3a8b5cc6e ] The IFF_BONDING means bonding master or bonding slave device. ->ndo_add_slave() sets IFF_BONDING flag and ->ndo_del_slave() unsets IFF_BONDING flag. bond0<--bond1 Both bond0 and bond1 are bonding device and these should keep having IFF_BONDING flag until they are removed. But bond1 would lose IFF_BONDING at ->ndo_del_slave() because that routine do not check whether the slave device is the bonding type or not. This patch adds the interface type check routine before removing IFF_BONDING flag. Test commands: ip link add bond0 type bond ip link add bond1 type bond ip link set bond1 master bond0 ip link set bond1 nomaster ip link del bond1 type bond ip link add bond1 type bond Splat looks like: [ 226.665555] proc_dir_entry 'bonding/bond1' already registered [ 226.666440] WARNING: CPU: 0 PID: 737 at fs/proc/generic.c:361 proc_register+0x2a9/0x3e0 [ 226.667571] Modules linked in: bonding af_packet sch_fq_codel ip_tables x_tables unix [ 226.668662] CPU: 0 PID: 737 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 226.669508] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 226.670652] RIP: 0010:proc_register+0x2a9/0x3e0 [ 226.671612] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 39 01 00 00 48 8b 04 24 48 89 ea 48 c7 c7 a0 0b 14 9f 48 8b b0 e 0 00 00 00 e8 07 e7 88 ff <0f> 0b 48 c7 c7 40 2d a5 9f e8 59 d6 23 01 48 8b 4c 24 10 48 b8 00 [ 226.675007] RSP: 0018:ffff888050e17078 EFLAGS: 00010282 [ 226.675761] RAX: dffffc0000000008 RBX: ffff88805fdd0f10 RCX: ffffffff9dd344e2 [ 226.676757] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88806c9f6b8c [ 226.677751] RBP: ffff8880507160f3 R08: ffffed100d940019 R09: ffffed100d940019 [ 226.678761] R10: 0000000000000001 R11: ffffed100d940018 R12: ffff888050716008 [ 226.679757] R13: ffff8880507160f2 R14: dffffc0000000000 R15: ffffed100a0e2c1e [ 226.680758] FS: 00007fdc217cc0c0(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000 [ 226.681886] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 226.682719] CR2: 00007f49313424d0 CR3: 0000000050e46001 CR4: 00000000000606f0 [ 226.683727] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 226.684725] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 226.685681] Call Trace: [ 226.687089] proc_create_seq_private+0xb3/0xf0 [ 226.687778] bond_create_proc_entry+0x1b3/0x3f0 [bonding] [ 226.691458] bond_netdev_event+0x433/0x970 [bonding] [ 226.692139] ? __module_text_address+0x13/0x140 [ 226.692779] notifier_call_chain+0x90/0x160 [ 226.693401] register_netdevice+0x9b3/0xd80 [ 226.694010] ? alloc_netdev_mqs+0x854/0xc10 [ 226.694629] ? netdev_change_features+0xa0/0xa0 [ 226.695278] ? rtnl_create_link+0x2ed/0xad0 [ 226.695849] bond_newlink+0x2a/0x60 [bonding] [ 226.696422] __rtnl_newlink+0xb9f/0x11b0 [ 226.696968] ? rtnl_link_unregister+0x220/0x220 [ ... ] Fixes: 0b680e753724 ("[PATCH] bonding: Add priv_flag to avoid event mishandling") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cf8385a22de5..5f6602cb191f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1779,7 +1779,8 @@ err_hwaddr_unsync: bond_hw_addr_flush(bond_dev, slave_dev); err_close: - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_restore_mac: @@ -1985,7 +1986,8 @@ static int __bond_release_one(struct net_device *bond_dev, else dev_set_mtu(slave_dev, slave->original_mtu); - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; bond_free_slave(slave); From 1861904a6092ed411203c6a02c75bfc45b27cc3c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:55 +0000 Subject: [PATCH 0391/3715] macsec: fix refcnt leak in module exit routine [ Upstream commit 2bce1ebed17da54c65042ec2b962e3234bad5b47 ] When a macsec interface is created, it increases a refcnt to a lower device(real device). when macsec interface is deleted, the refcnt is decreased in macsec_free_netdev(), which is ->priv_destructor() of macsec interface. The problem scenario is this. When nested macsec interfaces are exiting, the exit routine of the macsec module makes refcnt leaks. Test commands: ip link add dummy0 type dummy ip link add macsec0 link dummy0 type macsec ip link add macsec1 link macsec0 type macsec modprobe -rv macsec [ 208.629433] unregister_netdevice: waiting for macsec0 to become free. Usage count = 1 Steps of exit routine of macsec module are below. 1. Calls ->dellink() in __rtnl_link_unregister(). 2. Checks refcnt and wait refcnt to be 0 if refcnt is not 0 in netdev_run_todo(). 3. Calls ->priv_destruvtor() in netdev_run_todo(). Step2 checks refcnt, but step3 decreases refcnt. So, step2 waits forever. This patch makes the macsec module do not hold a refcnt of the lower device because it already holds a refcnt of the lower device with netdev_upper_dev_link(). Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index aa204c98af79..9bcb7c3e879f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2993,12 +2993,10 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); - struct net_device *real_dev = macsec->real_dev; free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); - dev_put(real_dev); } static void macsec_setup(struct net_device *dev) @@ -3239,8 +3237,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; - dev_hold(real_dev); - macsec->nest_level = dev_get_nest_level(real_dev) + 1; netdev_lockdep_set_classes(dev); lockdep_set_class_and_subclass(&dev->addr_list_lock, From 0b7dad3f4eb31461ee4b3197812134dcea83759f Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 21 Oct 2019 18:21:51 +0800 Subject: [PATCH 0392/3715] usb: fsl: Check memory resource before releasing it [ Upstream commit bc1e3a2dd0c9954fd956ac43ca2876bbea018c01 ] Check memory resource existence before releasing it to avoid NULL pointer dereference Signed-off-by: Nikhil Badola Reviewed-by: Ran Wang Reviewed-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/fsl_udc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index 7874c112f3fd..ee48c7938d61 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -2569,7 +2569,7 @@ static int fsl_udc_remove(struct platform_device *pdev) dma_pool_destroy(udc_controller->td_pool); free_irq(udc_controller->irq, udc_controller); iounmap(dr_regs); - if (pdata->operating_mode == FSL_USB2_DR_DEVICE) + if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE)) release_mem_region(res->start, resource_size(res)); /* free udc --wait for the release() finished */ From 7c27c19ca85127e9c802cdf2c336e10242f5e89a Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Fri, 4 Oct 2019 20:10:54 +0300 Subject: [PATCH 0393/3715] usb: gadget: udc: atmel: Fix interrupt storm in FIFO mode. [ Upstream commit ba3a1a915c49cc3023e4ddfc88f21e7514e82aa4 ] Fix interrupt storm generated by endpoints when working in FIFO mode. The TX_COMPLETE interrupt is used only by control endpoints processing. Do not enable it for other types of endpoints. Fixes: 914a3f3b3754 ("USB: add atmel_usba_udc driver") Signed-off-by: Cristian Birsan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/atmel_usba_udc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index cb66f982c313..39676824a2c6 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -488,9 +488,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req) next_fifo_transaction(ep, req); if (req->last_transaction) { usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY); - usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); } else { - usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY); } } From 6acb79f9940001954cba9374c6aba55bd096e09d Mon Sep 17 00:00:00 2001 From: Chandana Kishori Chiluveru Date: Tue, 1 Oct 2019 13:16:48 +0530 Subject: [PATCH 0394/3715] usb: gadget: composite: Fix possible double free memory bug [ Upstream commit 1c20c89b0421b52b2417bb0f62a611bc669eda1d ] composite_dev_cleanup call from the failure of configfs_composite_bind frees up the cdev->os_desc_req and cdev->req. If the previous calls of bind and unbind is successful these will carry stale values. Consider the below sequence of function calls: configfs_composite_bind() composite_dev_prepare() - Allocate cdev->req, cdev->req->buf composite_os_desc_req_prepare() - Allocate cdev->os_desc_req, cdev->os_desc_req->buf configfs_composite_unbind() composite_dev_cleanup() - free the cdev->os_desc_req->buf and cdev->req->buf Next composition switch configfs_composite_bind() - If it fails goto err_comp_cleanup will call the composite_dev_cleanup() function composite_dev_cleanup() - calls kfree up with the stale values of cdev->req->buf and cdev->os_desc_req from the previous configfs_composite_bind call. The free call on these stale values leads to double free. Hence, Fix this issue by setting request and buffer pointer to NULL after kfree. Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/composite.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 75c42393b64b..b29cd3979391 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2187,14 +2187,18 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev) usb_ep_dequeue(cdev->gadget->ep0, cdev->os_desc_req); kfree(cdev->os_desc_req->buf); + cdev->os_desc_req->buf = NULL; usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req); + cdev->os_desc_req = NULL; } if (cdev->req) { if (cdev->setup_pending) usb_ep_dequeue(cdev->gadget->ep0, cdev->req); kfree(cdev->req->buf); + cdev->req->buf = NULL; usb_ep_free_request(cdev->gadget->ep0, cdev->req); + cdev->req = NULL; } cdev->next_string_id = 0; device_remove_file(&cdev->gadget->dev, &dev_attr_suspended); From e45760e46f7bf11545f3b5347cec3744c5e631f2 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 26 Aug 2019 15:10:55 -0400 Subject: [PATCH 0395/3715] usb: gadget: configfs: fix concurrent issue between composite APIs [ Upstream commit 1a1c851bbd706ea9f3a9756c2d3db28523506d3b ] We meet several NULL pointer issues if configfs_composite_unbind and composite_setup (or composite_disconnect) are running together. These issues occur when do the function switch stress test, the configfs_compsoite_unbind is called from user mode by echo "" to /sys/../UDC entry, and meanwhile, the setup interrupt or disconnect interrupt occurs by hardware. The composite_setup will get the cdev from get_gadget_data, but configfs_composite_unbind will set gadget data as NULL, so the NULL pointer issue occurs. This concurrent is hard to reproduce by native kernel, but can be reproduced by android kernel. In this commit, we introduce one spinlock belongs to structure gadget_info since we can't use the same spinlock in usb_composite_dev due to exclusive running together between composite_setup and configfs_composite_unbind. And one bit flag 'unbind' to indicate the code is at unbind routine, this bit is needed due to we release the lock at during configfs_composite_unbind sometimes, and composite_setup may be run at that time. Several oops: oops 1: android_work: sent uevent USB_STATE=CONNECTED configfs-gadget gadget: super-speed config #1: b android_work: sent uevent USB_STATE=CONFIGURED init: Received control message 'start' for 'adbd' from pid: 3515 (system_server) Unable to handle kernel NULL pointer dereference at virtual address 0000002a init: Received control message 'stop' for 'adbd' from pid: 3375 (/vendor/bin/hw/android.hardware.usb@1.1-servic) Mem abort info: Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgd = ffff8008f1b7f000 [000000000000002a] *pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 4 PID: 2457 Comm: irq/125-5b11000 Not tainted 4.14.98-07846-g0b40a9b-dirty #16 Hardware name: Freescale i.MX8QM MEK (DT) task: ffff8008f2a98000 task.stack: ffff00000b7b8000 PC is at composite_setup+0x44/0x1508 LR is at android_setup+0xb8/0x13c pc : [] lr : [] pstate: 800001c5 sp : ffff00000b7bbb80 x29: ffff00000b7bbb80 x28: ffff8008f2a3c010 x27: 0000000000000001 x26: 0000000000000000 [1232/1897] audit: audit_lost=25791 audit_rate_limit=5 audit_backlog_limit=64 x25: 00000000ffffffa1 x24: ffff8008f2a3c010 audit: rate limit exceeded x23: 0000000000000409 x22: ffff000009c8e000 x21: ffff8008f7a8b428 x20: ffff00000afae000 x19: ffff0000089ff000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff0000082b7c9c x15: 0000000000000000 x14: f1866f5b952aca46 x13: e35502e30d44349c x12: 0000000000000008 x11: 0000000000000008 x10: 0000000000000a30 x9 : ffff00000b7bbd00 x8 : ffff8008f2a98a90 x7 : ffff8008f27a9c90 x6 : 0000000000000001 x5 : 0000000000000000 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000006 x1 : ffff0000089ff8d0 x0 : 732a010310b9ed00 X7: 0xffff8008f27a9c10: 9c10 00000002 00000000 00000001 00000000 13110000 ffff0000 00000002 00208040 9c30 00000000 00000000 00000000 00000000 00000000 00000005 00000029 00000000 9c50 00051778 00000001 f27a8e00 ffff8008 00000005 00000000 00000078 00000078 9c70 00000078 00000000 09031d48 ffff0000 00100000 00000000 00400000 00000000 9c90 00000001 00000000 00000000 00000000 00000000 00000000 ffefb1a0 ffff8008 9cb0 f27a9ca8 ffff8008 00000000 00000000 b9d88037 00000173 1618a3eb 00000001 9cd0 870a792a 0000002e 16188fe6 00000001 0000242b 00000000 00000000 00000000 using random self ethernet address 9cf0 019a4646 00000000 000547f3 00000000 ecfd6c33 00000002 00000000 using random host ethernet address 00000000 X8: 0xffff8008f2a98a10: 8a10 00000000 00000000 f7788d00 ffff8008 00000001 00000000 00000000 00000000 8a30 eb218000 ffff8008 f2a98000 ffff8008 f2a98000 ffff8008 09885000 ffff0000 8a50 f34df480 ffff8008 00000000 00000000 f2a98648 ffff8008 09c8e000 ffff0000 8a70 fff2c800 ffff8008 09031d48 ffff0000 0b7bbd00 ffff0000 0b7bbd00 ffff0000 8a90 080861bc ffff0000 00000000 00000000 00000000 00000000 00000000 00000000 8ab0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8ad0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8af0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 X21: 0xffff8008f7a8b3a8: b3a8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b3c8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b3e8 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b408 00000000 00000000 00000000 00000000 00000000 00000000 00000001 00000000 b428 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 b448 0053004d 00540046 00300031 00010030 eb07b520 ffff8008 20011201 00000003 b468 e418d109 0104404e 00010302 00000000 eb07b558 ffff8008 eb07b558 ffff8008 b488 f7a8b488 ffff8008 f7a8b488 ffff8008 f7a8b300 ffff8008 00000000 00000000 X24: 0xffff8008f2a3bf90: bf90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 c010 00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000 c030 f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008 c050 f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 c070 f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008 X28: 0xffff8008f2a3bf90: bf90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 c010 00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000 c030 f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008 c050 f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 c070 f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008 Process irq/125-5b11000 (pid: 2457, stack limit = 0xffff00000b7b8000) Call trace: Exception stack(0xffff00000b7bba40 to 0xffff00000b7bbb80) ba40: 732a010310b9ed00 ffff0000089ff8d0 0000000000000006 0000000000000000 ba60: 0000000000000001 0000000000000000 0000000000000001 ffff8008f27a9c90 ba80: ffff8008f2a98a90 ffff00000b7bbd00 0000000000000a30 0000000000000008 baa0: 0000000000000008 e35502e30d44349c f1866f5b952aca46 0000000000000000 bac0: ffff0000082b7c9c 0000000000000000 0000000000000000 ffff0000089ff000 bae0: ffff00000afae000 ffff8008f7a8b428 ffff000009c8e000 0000000000000409 bb00: ffff8008f2a3c010 00000000ffffffa1 0000000000000000 0000000000000001 bb20: ffff8008f2a3c010 ffff00000b7bbb80 ffff000008a032fc ffff00000b7bbb80 bb40: ffff0000089ffb3c 00000000800001c5 ffff00000b7bbb80 732a010310b9ed00 bb60: ffffffffffffffff ffff0000080f777c ffff00000b7bbb80 ffff0000089ffb3c [] composite_setup+0x44/0x1508 [] android_setup+0xb8/0x13c [] cdns3_ep0_delegate_req+0x44/0x70 [] cdns3_check_ep0_interrupt_proceed+0x33c/0x654 [] cdns3_device_thread_irq_handler+0x4b0/0x4bc [] cdns3_thread_irq+0x48/0x68 [] irq_thread_fn+0x28/0x88 [] irq_thread+0x13c/0x228 [] kthread+0x104/0x130 [] ret_from_fork+0x10/0x18 oops2: composite_disconnect: Calling disconnect on a Gadget that is not connected android_work: did not send uevent (0 0 (null)) init: Received control message 'stop' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx) init: Sending signal 9 to service 'adbd' (pid 22343) process group... ------------[ cut here ]------------ audit: audit_lost=180038 audit_rate_limit=5 audit_backlog_limit=64 audit: rate limit exceeded WARNING: CPU: 0 PID: 3468 at kernel_imx/drivers/usb/gadget/composite.c:2009 composite_disconnect+0x80/0x88 Modules linked in: CPU: 0 PID: 3468 Comm: HWC-UEvent-Thre Not tainted 4.14.98-07846-g0b40a9b-dirty #16 Hardware name: Freescale i.MX8QM MEK (DT) task: ffff8008f2349c00 task.stack: ffff00000b0a8000 PC is at composite_disconnect+0x80/0x88 LR is at composite_disconnect+0x80/0x88 pc : [] lr : [] pstate: 600001c5 sp : ffff000008003dd0 x29: ffff000008003dd0 x28: ffff8008f2349c00 x27: ffff000009885018 x26: ffff000008004000 Timeout for IPC response! x25: ffff000009885018 x24: ffff000009c8e280 x23: ffff8008f2d98010 x22: 00000000000001c0 x21: ffff8008f2d98394 x20: ffff8008f2d98010 x19: 0000000000000000 x18: 0000e3956f4f075a fxos8700 4-001e: i2c block read acc failed x17: 0000e395735727e8 x16: ffff00000829f4d4 x15: ffffffffffffffff x14: 7463656e6e6f6320 x13: 746f6e2009090920 x12: 7369207461687420 x11: 7465676461472061 x10: 206e6f207463656e x9 : 6e6f637369642067 x8 : ffff000009c8e280 x7 : ffff0000086ca6cc x6 : ffff000009f15e78 x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffffffffffffffff x2 : c3f28b86000c3900 x1 : c3f28b86000c3900 x0 : 000000000000004e X20: 0xffff8008f2d97f90: 7f90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 libprocessgroup: Failed to kill process cgroup uid 0 pid 22343 in 215ms, 1 processes remain 7fd0 Timeout for IPC response! 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 using random self ethernet address 7ff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 8010 00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc using random host ethernet address ffff0000 8030 f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008 8050 f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 8070 f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008 X21: 0xffff8008f2d98314: 8314 ffff8008 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8334 00000000 00000000 00000000 00000000 00000000 08a04cf4 ffff0000 00000000 8354 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 8374 00000000 00000000 00000000 00001001 00000000 00000000 00000000 00000000 8394 e4bbe4bb 0f230000 ffff0000 0afae000 ffff0000 ae001000 00000000 f206d400 Timeout for IPC response! 83b4 ffff8008 00000000 00000000 f7957b18 ffff8008 f7957718 ffff8008 f7957018 83d4 ffff8008 f7957118 ffff8008 f7957618 ffff8008 f7957818 ffff8008 f7957918 83f4 ffff8008 f7957d18 ffff8008 00000000 00000000 00000000 00000000 00000000 X23: 0xffff8008f2d97f90: 7f90 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fb0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7fd0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 7ff0 00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008 8010 00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc ffff0000 8030 f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008 8050 f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000 8070 f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008 X28: 0xffff8008f2349b80: 9b80 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9ba0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9bc0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9be0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9c00 00000022 00000000 ffffffff ffffffff 00010001 00000000 00000000 00000000 9c20 0b0a8000 ffff0000 00000002 00404040 00000000 00000000 00000000 00000000 9c40 00000001 00000000 00000001 00000000 001ebd44 00000001 f390b800 ffff8008 9c60 00000000 00000001 00000070 00000070 00000070 00000000 09031d48 ffff0000 Call trace: Exception stack(0xffff000008003c90 to 0xffff000008003dd0) 3c80: 000000000000004e c3f28b86000c3900 3ca0: c3f28b86000c3900 ffffffffffffffff 0000000000000000 0000000000000000 3cc0: ffff000009f15e78 ffff0000086ca6cc ffff000009c8e280 6e6f637369642067 3ce0: 206e6f207463656e 7465676461472061 7369207461687420 746f6e2009090920 3d00: 7463656e6e6f6320 ffffffffffffffff ffff00000829f4d4 0000e395735727e8 3d20: 0000e3956f4f075a 0000000000000000 ffff8008f2d98010 ffff8008f2d98394 3d40: 00000000000001c0 ffff8008f2d98010 ffff000009c8e280 ffff000009885018 3d60: ffff000008004000 ffff000009885018 ffff8008f2349c00 ffff000008003dd0 3d80: ffff0000089ff9b0 ffff000008003dd0 ffff0000089ff9b0 00000000600001c5 3da0: ffff8008f33f2cd8 0000000000000000 0000ffffffffffff 0000000000000000 init: Received control message 'start' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx) 3dc0: ffff000008003dd0 ffff0000089ff9b0 [] composite_disconnect+0x80/0x88 [] android_disconnect+0x3c/0x68 [] cdns3_device_irq_handler+0xfc/0x2c8 [] cdns3_irq+0x44/0x94 [] __handle_irq_event_percpu+0x60/0x24c [] handle_irq_event+0x58/0xc0 [] handle_fasteoi_irq+0x98/0x180 [] generic_handle_irq+0x24/0x38 [] __handle_domain_irq+0x60/0xac [] gic_handle_irq+0xd4/0x17c Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/configfs.c | 110 ++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index aeb9f3c40521..d0143d02e2f7 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -60,6 +60,8 @@ struct gadget_info { bool use_os_desc; char b_vendor_code; char qw_sign[OS_STRING_QW_SIGN_LEN]; + spinlock_t spinlock; + bool unbind; }; static inline struct gadget_info *to_gadget_info(struct config_item *item) @@ -1243,6 +1245,7 @@ static int configfs_composite_bind(struct usb_gadget *gadget, int ret; /* the gi->lock is hold by the caller */ + gi->unbind = 0; cdev->gadget = gadget; set_gadget_data(gadget, cdev); ret = composite_dev_prepare(composite, cdev); @@ -1375,31 +1378,128 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) { struct usb_composite_dev *cdev; struct gadget_info *gi; + unsigned long flags; /* the gi->lock is hold by the caller */ cdev = get_gadget_data(gadget); gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + gi->unbind = 1; + spin_unlock_irqrestore(&gi->spinlock, flags); kfree(otg_desc[0]); otg_desc[0] = NULL; purge_configs_funcs(gi); composite_dev_cleanup(cdev); usb_ep_autoconfig_reset(cdev->gadget); + spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; set_gadget_data(gadget, NULL); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static int configfs_composite_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + int ret; + + cdev = get_gadget_data(gadget); + if (!cdev) + return 0; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return 0; + } + + ret = composite_setup(gadget, ctrl); + spin_unlock_irqrestore(&gi->spinlock, flags); + return ret; +} + +static void configfs_composite_disconnect(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_disconnect(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_suspend(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_suspend(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_resume(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_resume(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); } static const struct usb_gadget_driver configfs_driver_template = { .bind = configfs_composite_bind, .unbind = configfs_composite_unbind, - .setup = composite_setup, - .reset = composite_disconnect, - .disconnect = composite_disconnect, + .setup = configfs_composite_setup, + .reset = configfs_composite_disconnect, + .disconnect = configfs_composite_disconnect, - .suspend = composite_suspend, - .resume = composite_resume, + .suspend = configfs_composite_suspend, + .resume = configfs_composite_resume, .max_speed = USB_SPEED_SUPER, .driver = { From 5e7777e53d9043ff88109152958b22aafb6a2a8b Mon Sep 17 00:00:00 2001 From: Yinbo Zhu Date: Mon, 29 Jul 2019 14:46:07 +0800 Subject: [PATCH 0396/3715] usb: dwc3: remove the call trace of USBx_GFLADJ [ Upstream commit a7d9874c6f3fbc8d25cd9ceba35b6822612c4ebf ] layerscape board sometimes reported some usb call trace, that is due to kernel sent LPM tokerns automatically when it has no pending transfers and think that the link is idle enough to enter L1, which procedure will ask usb register has a recovery,then kernel will compare USBx_GFLADJ and set GFLADJ_30MHZ, GFLADJ_30MHZ_REG until GFLADJ_30MHZ is equal 0x20, if the conditions were met then issue occur, but whatever the conditions whether were met that usb is all need keep GFLADJ_30MHZ of value is 0x20 (xhci spec ask use GFLADJ_30MHZ to adjust any offset from clock source that generates the clock that drives the SOF counter, 0x20 is default value of it)That is normal logic, so need remove the call trace. Signed-off-by: Yinbo Zhu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9b093978bd24..48755c501201 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -282,8 +282,7 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GFLADJ); dft = reg & DWC3_GFLADJ_30MHZ_MASK; - if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj, - "request value same as default, ignoring\n")) { + if (dft != dwc->fladj) { reg &= ~DWC3_GFLADJ_30MHZ_MASK; reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj; dwc3_writel(dwc->regs, DWC3_GFLADJ, reg); From 938449fdd37743e72eb386272360543468a2a101 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 23 Oct 2019 10:09:54 -0500 Subject: [PATCH 0397/3715] perf/x86/amd/ibs: Fix reading of the IBS OpData register and thus precise RIP validity [ Upstream commit 317b96bb14303c7998dbcd5bc606bd8038fdd4b4 ] The loop that reads all the IBS MSRs into *buf stopped one MSR short of reading the IbsOpData register, which contains the RipInvalid status bit. Fix the offset_max assignment so the MSR gets read, so the RIP invalid evaluation is based on what the IBS h/w output, instead of what was left in memory. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: d47e8238cd76 ("perf/x86-ibs: Take instruction pointer from ibs sample") Link: https://lkml.kernel.org/r/20191023150955.30292-1-kim.phillips@amd.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/amd/ibs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 7a86fbc07ddc..4deecdb26ab3 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -625,7 +625,7 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) offset_max = perf_ibs->offset_max; else if (check_rip) - offset_max = 2; + offset_max = 3; else offset_max = 1; do { From 09bbc0a0c399aace431e2701979730d33eb2584c Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 23 Oct 2019 10:09:55 -0500 Subject: [PATCH 0398/3715] perf/x86/amd/ibs: Handle erratum #420 only on the affected CPU family (10h) [ Upstream commit e431e79b60603079d269e0c2a5177943b95fa4b6 ] This saves us writing the IBS control MSR twice when disabling the event. I searched revision guides for all families since 10h, and did not find occurrence of erratum #420, nor anything remotely similar: so we isolate the secondary MSR write to family 10h only. Also unconditionally update the count mask for IBS Op implementations that have read & writeable current count (CurCnt) fields in addition to the MaxCnt field. These bits were reserved on prior implementations, and therefore shouldn't have negative impact. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: c9574fe0bdb9 ("perf/x86-ibs: Implement workaround for IBS erratum #420") Link: https://lkml.kernel.org/r/20191023150955.30292-2-kim.phillips@amd.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/amd/ibs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 4deecdb26ab3..f24e9adaa316 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -389,7 +389,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { config &= ~perf_ibs->cnt_mask; - wrmsrl(hwc->config_base, config); + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); config &= ~perf_ibs->enable_mask; wrmsrl(hwc->config_base, config); } @@ -564,7 +565,8 @@ static struct perf_ibs perf_ibs_op = { }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, - .cnt_mask = IBS_OP_MAX_CNT, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, .max_period = IBS_OP_MAX_CNT << 4, From faa06698c0e94279bbf03ae95cf4f8063c9235f6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 28 Oct 2019 10:52:35 -0400 Subject: [PATCH 0399/3715] USB: Skip endpoints with 0 maxpacket length [ Upstream commit d482c7bb0541d19dea8bff437a9f3c5563b5b2d2 ] Endpoints with a maxpacket length of 0 are probably useless. They can't transfer any data, and it's not at all unlikely that an HCD will crash or hang when trying to handle an URB for such an endpoint. Currently the USB core does not check for endpoints having a maxpacket value of 0. This patch adds a check, printing a warning and skipping over any endpoints it catches. Now, the USB spec does not rule out endpoints having maxpacket = 0. But since they wouldn't have any practical use, there doesn't seem to be any good reason for us to accept them. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910281050420.1485-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index d03d0e46b121..cfb8f1126cf8 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -348,6 +348,11 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* Validate the wMaxPacketSize field */ maxp = usb_endpoint_maxp(&endpoint->desc); + if (maxp == 0) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } /* Find the highest legal maxpacket size for this endpoint */ i = 0; /* additional transactions per microframe */ From 53746bf4e6220c4b40cf0f4c32a3d6a57dc43e1b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Oct 2019 16:32:03 +0200 Subject: [PATCH 0400/3715] USB: ldusb: use unsigned size format specifiers [ Upstream commit 88f6bf3846ee90bf33aa1ce848cd3bfb3229f4a4 ] A recent info-leak bug manifested itself along with warning about a negative buffer overflow: ldusb 1-1:0.28: Read buffer overflow, -131383859965943 bytes dropped when it was really a rather large positive one. A sanity check that prevents this has now been put in place, but let's fix up the size format specifiers, which should all be unsigned. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191022143203.5260-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/ldusb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 6387545b17ea..e16af177d467 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -490,7 +490,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) - dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", + dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n", *actual_buffer-bytes_to_read); /* copy one interrupt_in_buffer from ring_buffer into userspace */ @@ -565,8 +565,9 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) - dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write); - dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n", + dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n", + count - bytes_to_write); + dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { From 32eb2a0a19e51c2cb0f44c18c8a6c14bfde3a29a Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Fri, 25 Oct 2019 18:04:40 +0530 Subject: [PATCH 0401/3715] RDMA/iw_cxgb4: Avoid freeing skb twice in arp failure case [ Upstream commit d4934f45693651ea15357dd6c7c36be28b6da884 ] _put_ep_safe() and _put_pass_ep_safe() free the skb before it is freed by process_work(). fix double free by freeing the skb only in process_work(). Fixes: 1dad0ebeea1c ("iw_cxgb4: Avoid touch after free error in ARP failure handlers") Link: https://lore.kernel.org/r/1572006880-5800-1-git-send-email-bharat@chelsio.com Signed-off-by: Dakshaja Uppalapati Signed-off-by: Potnuri Bharat Teja Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d87f08cd78ad..bb36cdf82a8d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -491,7 +491,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -502,7 +501,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } From 3c7e78186d68351ef5962a593ec40945a11da8bf Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 24 Oct 2019 16:38:04 +1000 Subject: [PATCH 0402/3715] scsi: qla2xxx: stop timer in shutdown path [ Upstream commit d3566abb1a1e7772116e4d50fb6a58d19c9802e5 ] In shutdown/reboot paths, the timer is not stopped: qla2x00_shutdown pci_device_shutdown device_shutdown kernel_restart_prepare kernel_restart sys_reboot This causes lockups (on powerpc) when firmware config space access calls are interrupted by smp_send_stop later in reboot. Fixes: e30d1756480dc ("[SCSI] qla2xxx: Addition of shutdown callback handler.") Link: https://lore.kernel.org/r/20191024063804.14538-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7d7fb5bbb600..343fbaa6d2a2 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3437,6 +3437,10 @@ qla2x00_shutdown(struct pci_dev *pdev) /* Stop currently executing firmware. */ qla2x00_try_to_stop_firmware(vha); + /* Disable timer */ + if (vha->timer_active) + qla2x00_stop_timer(vha); + /* Turn adapter off line */ vha->flags.online = 0; From 81370ee55d9250e0744e09493186cb0e071dbcb2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Oct 2019 12:06:02 +0100 Subject: [PATCH 0403/3715] fjes: Handle workqueue allocation failure [ Upstream commit 85ac30fa2e24f628e9f4f9344460f4015d33fd7d ] In the highly unlikely event that we fail to allocate either of the "/txrx" or "/control" workqueues, we should bail cleanly rather than blindly march on with NULL queue pointer(s) installed in the 'fjes_adapter' instance. Cc: "David S. Miller" Reported-by: Nicolas Waisman Link: https://lore.kernel.org/lkml/CADJ_3a8WFrs5NouXNqS5WYe7rebFP+_A5CheeqAyD_p7DFJJcg@mail.gmail.com/ Signed-off-by: Will Deacon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/fjes/fjes_main.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 750954be5a74..14d6579b292a 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1252,8 +1252,17 @@ static int fjes_probe(struct platform_device *plat_dev) adapter->open_guard = false; adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->txrx_wq)) { + err = -ENOMEM; + goto err_free_netdev; + } + adapter->control_wq = alloc_workqueue(DRV_NAME "/control", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->control_wq)) { + err = -ENOMEM; + goto err_free_txrx_wq; + } INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task); INIT_WORK(&adapter->raise_intr_rxdata_task, @@ -1270,7 +1279,7 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.irq = platform_get_irq(plat_dev, 0); err = fjes_hw_init(&adapter->hw); if (err) - goto err_free_netdev; + goto err_free_control_wq; /* setup MAC address (02:00:00:00:00:[epid])*/ netdev->dev_addr[0] = 2; @@ -1292,6 +1301,10 @@ static int fjes_probe(struct platform_device *plat_dev) err_hw_exit: fjes_hw_exit(&adapter->hw); +err_free_control_wq: + destroy_workqueue(adapter->control_wq); +err_free_txrx_wq: + destroy_workqueue(adapter->txrx_wq); err_free_netdev: free_netdev(netdev); err_out: From 15c1c15be2a8fd0ef005972adc9c2a7782a2f558 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Fri, 25 Oct 2019 21:48:22 +0800 Subject: [PATCH 0404/3715] net: hisilicon: Fix "Trying to free already-free IRQ" [ Upstream commit 63a41746827cb16dc6ad0d4d761ab4e7dda7a0c3 ] When rmmod hip04_eth.ko, we can get the following warning: Task track: rmmod(1623)>bash(1591)>login(1581)>init(1) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1623 at kernel/irq/manage.c:1557 __free_irq+0xa4/0x2ac() Trying to free already-free IRQ 200 Modules linked in: ping(O) pramdisk(O) cpuinfo(O) rtos_snapshot(O) interrupt_ctrl(O) mtdblock mtd_blkdevrtfs nfs_acl nfs lockd grace sunrpc xt_tcpudp ipt_REJECT iptable_filter ip_tables x_tables nf_reject_ipv CPU: 0 PID: 1623 Comm: rmmod Tainted: G O 4.4.193 #1 Hardware name: Hisilicon A15 [] (rtos_unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa0/0xd8) [] (dump_stack) from [] (warn_slowpath_common+0x84/0xb0) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x3c/0x68) [] (warn_slowpath_fmt) from [] (__free_irq+0xa4/0x2ac) [] (__free_irq) from [] (free_irq+0x60/0x7c) [] (free_irq) from [] (release_nodes+0x1c4/0x1ec) [] (release_nodes) from [] (__device_release_driver+0xa8/0x104) [] (__device_release_driver) from [] (driver_detach+0xd0/0xf8) [] (driver_detach) from [] (bus_remove_driver+0x64/0x8c) [] (bus_remove_driver) from [] (SyS_delete_module+0x198/0x1e0) [] (SyS_delete_module) from [] (__sys_trace_return+0x0/0x10) ---[ end trace bb25d6123d849b44 ]--- Currently "rmmod hip04_eth.ko" call free_irq more than once as devres_release_all and hip04_remove both call free_irq. This results in a 'Trying to free already-free IRQ' warning. To solve the problem free_irq has been moved out of hip04_remove. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 17cbe8145dcd..ebc056b9a0fd 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -945,7 +945,6 @@ static int hip04_remove(struct platform_device *pdev) hip04_free_ring(ndev, d); unregister_netdev(ndev); - free_irq(ndev->irq, ndev); of_node_put(priv->phy_node); cancel_work_sync(&priv->tx_timeout_task); free_netdev(ndev); From 904e41c939f48a6ab38db3009ac3f59805e56a86 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 30 Oct 2019 15:32:13 +0000 Subject: [PATCH 0405/3715] hv_netvsc: Fix error handling in netvsc_attach() [ Upstream commit 719b85c336ed35565d0f3982269d6f684087bb00 ] If rndis_filter_open() fails, we need to remove the rndis device created in earlier steps, before returning an error code. Otherwise, the retry of netvsc_attach() from its callers will fail and hang. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 33c1f6548fb7..5a44b9795266 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -969,7 +969,7 @@ static int netvsc_attach(struct net_device *ndev, if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - return ret; + goto err; rdev = nvdev->extension; if (!rdev->link_state) @@ -977,6 +977,13 @@ static int netvsc_attach(struct net_device *ndev, } return 0; + +err: + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return ret; } static int netvsc_set_channels(struct net_device *net, From 640b29e01858c3cf28c04ca3eaee5380c6e7f6b1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Oct 2019 18:40:32 -0400 Subject: [PATCH 0406/3715] NFSv4: Don't allow a cached open with a revoked delegation [ Upstream commit be3df3dd4c70ee020587a943a31b98a0fb4b6424 ] If the delegation is marked as being revoked, we must not use it for cached opens. Fixes: 869f9dfa4d6d ("NFSv4: Fix races between nfs_remove_bad_delegation() and delegation return") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/delegation.c | 10 ++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4proc.c | 7 ++----- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 606dd3871f66..61bc0a6ba08b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,6 +52,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation, return false; } +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (nfs4_is_valid_delegation(delegation, 0)) + return delegation; + return NULL; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index ddaf2644cf13..df41d16dc6ab 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -63,6 +63,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af062e9f4580..f1526f65cc58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1355,8 +1355,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode, return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) - return 0; switch (claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_FH: @@ -1615,7 +1613,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; @@ -1632,7 +1629,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) } spin_unlock(&state->owner->so_lock); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); + delegation = nfs4_get_valid_delegation(state->inode); if (!can_open_delegated(delegation, fmode, claim)) { rcu_read_unlock(); break; @@ -2153,7 +2150,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + delegation = nfs4_get_valid_delegation(data->state->inode); if (can_open_delegated(delegation, data->o_arg.fmode, claim)) goto unlock_no_action; rcu_read_unlock(); From 93905fb6dc7efbec5f4c278da5ee7bb062657d4a Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 1 Nov 2019 20:17:25 +0800 Subject: [PATCH 0407/3715] net: ethernet: arc: add the missed clk_disable_unprepare [ Upstream commit 4202e219edd6cc164c042e16fa327525410705ae ] The remove misses to disable and unprepare priv->macclk like what is done when probe fails. Add the missed call in remove. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/arc/emac_rockchip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index c770ca37c9b2..a7d30731d376 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -261,6 +261,9 @@ static int emac_rockchip_remove(struct platform_device *pdev) if (priv->regulator) regulator_disable(priv->regulator); + if (priv->soc_data->need_div_macclk) + clk_disable_unprepare(priv->macclk); + free_netdev(ndev); return err; } From 4701d19f3e40813f4afbab27a379c2f62aa04cdf Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Thu, 15 Aug 2019 13:55:20 -0700 Subject: [PATCH 0408/3715] igb: Fix constant media auto sense switching when no cable is connected [ Upstream commit 8d5cfd7f76a2414e23c74bb8858af7540365d985 ] At least on the i350 there is an annoying behavior that is maybe also present on 82580 devices, but was probably not noticed yet as MAS is not widely used. If no cable is connected on both fiber/copper ports the media auto sense code will constantly swap between them as part of the watchdog task and produce many unnecessary kernel log messages. The swap code responsible for this behavior (switching to fiber) should not be executed if the current media type is copper and there is no signal detected on the fiber port. In this case we can safely wait until the AUTOSENSE_EN bit is cleared. Signed-off-by: Manfred Rudigier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 71b235f935d9..9c7e75b3b6c7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1680,7 +1680,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter) if ((hw->phy.media_type == e1000_media_type_copper) && (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { swap_now = true; - } else if (!(connsw & E1000_CONNSW_SERDESD)) { + } else if ((hw->phy.media_type != e1000_media_type_copper) && + !(connsw & E1000_CONNSW_SERDESD)) { /* copper signal takes time to appear */ if (adapter->copper_tries < 4) { adapter->copper_tries++; From f69f2d6119535c1f91ef37d0704d65fafee56ae0 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 12 Aug 2019 00:59:21 -0500 Subject: [PATCH 0409/3715] e1000: fix memory leaks [ Upstream commit 8472ba62154058b64ebb83d5f57259a352d28697 ] In e1000_set_ringparam(), 'tx_old' and 'rx_old' are not deallocated if e1000_up() fails, leading to memory leaks. Refactor the code to fix this issue. Signed-off-by: Wenwen Wang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 10df2d60c181..88b34f722337 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -627,6 +627,7 @@ static int e1000_set_ringparam(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) rxdr[i].count = rxdr->count; + err = 0; if (netif_running(adapter->netdev)) { /* Try to get new resources before deleting old */ err = e1000_setup_all_rx_resources(adapter); @@ -647,14 +648,13 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); - if (err) - goto err_setup; } kfree(tx_old); kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); - return 0; + return err; + err_setup_tx: e1000_free_all_rx_resources(adapter); err_setup_rx: @@ -666,7 +666,6 @@ err_alloc_rx: err_alloc_tx: if (netif_running(adapter->netdev)) e1000_up(adapter); -err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; } From fe241fe90ac12bc7fa52d9bded4b253c4ee4a27d Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 1 Mar 2018 13:59:28 +0800 Subject: [PATCH 0410/3715] x86/apic: Move pending interrupt check code into it's own function [ Upstream commit 9b217f33017715903d0956dfc58f82d2a2d00e63 ] The pending interrupt check code is mixed with the local APIC setup code, that looks messy. Extract the related code, move it into a new function named apic_pending_intr_clear(). Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Cc: bhe@redhat.com Cc: ebiederm@xmission.com Link: https://lkml.kernel.org/r/20180301055930.2396-2-douly.fnst@cn.fujitsu.com Signed-off-by: Sasha Levin --- arch/x86/kernel/apic/apic.c | 100 ++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ea2de324ab02..98fecdbec640 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1362,6 +1362,56 @@ static void lapic_setup_esr(void) oldvalue, value); } +static void apic_pending_intr_clear(void) +{ + long long max_loops = cpu_khz ? cpu_khz : 1000000; + unsigned long long tsc = 0, ntsc; + unsigned int value, queued; + int i, j, acked = 0; + + if (boot_cpu_has(X86_FEATURE_TSC)) + tsc = rdtsc(); + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + do { + queued = 0; + for (i = APIC_ISR_NR - 1; i >= 0; i--) + queued |= apic_read(APIC_IRR + i*0x10); + + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 256) { + printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", + acked); + break; + } + if (queued) { + if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { + ntsc = rdtsc(); + max_loops = (cpu_khz << 10) - (ntsc - tsc); + } else + max_loops--; + } + } while (queued && max_loops > 0); + WARN_ON(max_loops <= 0); +} + /** * setup_local_APIC - setup the local APIC * @@ -1371,13 +1421,11 @@ static void lapic_setup_esr(void) void setup_local_APIC(void) { int cpu = smp_processor_id(); - unsigned int value, queued; - int i, j, acked = 0; - unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz ? cpu_khz : 1000000; + unsigned int value; +#ifdef CONFIG_X86_32 + int i; +#endif - if (boot_cpu_has(X86_FEATURE_TSC)) - tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1437,45 +1485,7 @@ void setup_local_APIC(void) value &= ~APIC_TPRI_MASK; apic_write(APIC_TASKPRI, value); - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - do { - queued = 0; - for (i = APIC_ISR_NR - 1; i >= 0; i--) - queued |= apic_read(APIC_IRR + i*0x10); - - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 256) { - printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", - acked); - break; - } - if (queued) { - if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { - ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); - } else - max_loops--; - } - } while (queued && max_loops > 0); - WARN_ON(max_loops <= 0); + apic_pending_intr_clear(); /* * Now that we are all set up, enable the APIC From 7f2af6c87ee70baa1d755c13f331bfcd546170f1 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 1 Mar 2018 13:59:30 +0800 Subject: [PATCH 0411/3715] x86/apic: Drop logical_smp_processor_id() inline [ Upstream commit 8f1561680f42a5491b371b513f1ab8197f31fd62 ] The logical_smp_processor_id() inline which is only called in setup_local_APIC() on x86_32 systems has no real value. Drop it and directly use GET_APIC_LOGICAL_ID() at the call site and use a more suitable variable name for readability Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: bhe@redhat.com Cc: ebiederm@xmission.com Link: https://lkml.kernel.org/r/20180301055930.2396-4-douly.fnst@cn.fujitsu.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/smp.h | 10 ---------- arch/x86/kernel/apic/apic.c | 10 +++++----- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index fe2ee61880a8..a14730ca9d1e 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -175,16 +175,6 @@ static inline int wbinvd_on_all_cpus(void) extern unsigned disabled_cpus; #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 98fecdbec640..97d1290d1f0d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1423,7 +1423,7 @@ void setup_local_APIC(void) int cpu = smp_processor_id(); unsigned int value; #ifdef CONFIG_X86_32 - int i; + int logical_apicid, ldr_apicid; #endif @@ -1470,11 +1470,11 @@ void setup_local_APIC(void) * initialized during get_smp_config(), make sure it matches the * actual value. */ - i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - logical_smp_processor_id(); + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; #endif /* From 5bbc8cba1d1729b9427656bb134943c3b868a3a9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 29 Oct 2019 10:34:19 +0100 Subject: [PATCH 0412/3715] x86/apic/32: Avoid bogus LDR warnings [ Upstream commit fe6f85ca121e9c74e7490fe66b0c5aae38e332c3 ] The removal of the LDR initialization in the bigsmp_32 APIC code unearthed a problem in setup_local_APIC(). The code checks unconditionally for a mismatch of the logical APIC id by comparing the early APIC id which was initialized in get_smp_config() with the actual LDR value in the APIC. Due to the removal of the bogus LDR initialization the check now can trigger on bigsmp_32 APIC systems emitting a warning for every booting CPU. This is of course a false positive because the APIC is not using logical destination mode. Restrict the check and the possibly resulting fixup to systems which are actually using the APIC in logical destination mode. [ tglx: Massaged changelog and added Cc stable ] Fixes: bae3a8d3308 ("x86/apic: Do not initialize LDR and DFR for bigsmp") Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/666d8f91-b5a8-1afd-7add-821e72a35f03@suse.com Signed-off-by: Sasha Levin --- arch/x86/kernel/apic/apic.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 97d1290d1f0d..6415b4aead54 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1422,9 +1422,6 @@ void setup_local_APIC(void) { int cpu = smp_processor_id(); unsigned int value; -#ifdef CONFIG_X86_32 - int logical_apicid, ldr_apicid; -#endif if (disable_apic) { @@ -1465,16 +1462,21 @@ void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } #endif /* From 665b71ac70e2320634059118221f2e949f21609d Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 15 Aug 2019 08:00:26 +0000 Subject: [PATCH 0413/3715] can: flexcan: disable completely the ECC mechanism [ Upstream commit 5e269324db5adb2f5f6ec9a93a9c7b0672932b47 ] The ECC (memory error detection and correction) mechanism can be activated or not, controlled by the ECCDIS bit in CAN_MECR. When disabled, updates on indications and reporting registers are stopped. So if want to disable ECC completely, had better assert ECCDIS bit, not just mask the related interrupts. Fixes: cdce844865be ("can: flexcan: add vf610 support for FlexCAN") Signed-off-by: Joakim Zhang Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/flexcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 7280f3a8aa04..84dd79041285 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1018,6 +1018,7 @@ static int flexcan_chip_start(struct net_device *dev) reg_mecr = flexcan_read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; flexcan_write(reg_mecr, ®s->mecr); + reg_mecr |= FLEXCAN_MECR_ECCDIS; reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK | FLEXCAN_MECR_FANCEI_MSK); flexcan_write(reg_mecr, ®s->mecr); From 42364a188ba5225cffdeee6c17c7f9af1a909cbd Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 23 Sep 2019 15:34:45 -0700 Subject: [PATCH 0414/3715] mm/filemap.c: don't initiate writeback if mapping has no dirty pages commit c3aab9a0bd91b696a852169479b7db1ece6cbf8c upstream. Functions like filemap_write_and_wait_range() should do nothing if inode has no dirty pages or pages currently under writeback. But they anyway construct struct writeback_control and this does some atomic operations if CONFIG_CGROUP_WRITEBACK=y - on fast path it locks inode->i_lock and updates state of writeback ownership, on slow path might be more work. Current this path is safely avoided only when inode mapping has no pages. For example generic_file_read_iter() calls filemap_write_and_wait_range() at each O_DIRECT read - pretty hot path. This patch skips starting new writeback if mapping has no dirty tags set. If writeback is already in progress filemap_write_and_wait_range() will wait for it. Link: http://lkml.kernel.org/r/156378816804.1087.8607636317907921438.stgit@buzz Signed-off-by: Konstantin Khlebnikov Reviewed-by: Jan Kara Cc: Tejun Heo Cc: Jens Axboe Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 938365ad7e99..a30dbf93de99 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -338,7 +338,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, .range_end = end, }; - if (!mapping_cap_writeback_dirty(mapping)) + if (!mapping_cap_writeback_dirty(mapping) || + !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; wbc_attach_fdatawrite_inode(&wbc, mapping->host); From d527ab46d14451ec6a6257f706dccac3a3089133 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 8 Nov 2019 12:18:29 -0800 Subject: [PATCH 0415/3715] cgroup,writeback: don't switch wbs immediately on dead wbs if the memcg is dead commit 65de03e251382306a4575b1779c57c87889eee49 upstream. cgroup writeback tries to refresh the associated wb immediately if the current wb is dead. This is to avoid keeping issuing IOs on the stale wb after memcg - blkcg association has changed (ie. when blkcg got disabled / enabled higher up in the hierarchy). Unfortunately, the logic gets triggered spuriously on inodes which are associated with dead cgroups. When the logic is triggered on dead cgroups, the attempt fails only after doing quite a bit of work allocating and initializing a new wb. While c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages") alleviated the issue significantly as it now only triggers when the inode has dirty pages. However, the condition can still be triggered before the inode is switched to a different cgroup and the logic simply doesn't make sense. Skip the immediate switching if the associated memcg is dying. This is a simplified version of the following two patches: * https://lore.kernel.org/linux-mm/20190513183053.GA73423@dennisz-mbp/ * http://lkml.kernel.org/r/156355839560.2063.5265687291430814589.stgit@buzz Cc: Konstantin Khlebnikov Fixes: e8a7abf5a5bd ("writeback: disassociate inodes from dying bdi_writebacks") Acked-by: Dennis Zhou Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9e8fde348d61..6398bd8a066e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -582,10 +582,13 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, spin_unlock(&inode->i_lock); /* - * A dying wb indicates that the memcg-blkcg mapping has changed - * and a new wb is already serving the memcg. Switch immediately. + * A dying wb indicates that either the blkcg associated with the + * memcg changed or the associated memcg is dying. In the first + * case, a replacement wb should already be available and we should + * refresh the wb immediately. In the second case, trying to + * refresh will keep failing. */ - if (unlikely(wb_dying(wbc->wb))) + if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) inode_switch_wbs(inode, wbc->wb_id); } From 852a503230d9acc386a4c7b4e4b5c252f62c90c9 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Tue, 22 Oct 2019 18:30:17 +0900 Subject: [PATCH 0416/3715] usbip: Fix free of unallocated memory in vhci tx [ Upstream commit d4d8257754c3300ea2a465dadf8d2b02c713c920 ] iso_buffer should be set to NULL after use and free in the while loop. In the case of isochronous URB in the while loop, iso_buffer is allocated and after sending it to server, buffer is deallocated. And then, if the next URB in the while loop is not a isochronous pipe, iso_buffer still holds the previously deallocated buffer address and kfree tries to free wrong buffer address. Fixes: ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") Reported-by: kbuild test robot Reported-by: Julia Lawall Signed-off-by: Suwan Kim Reviewed-by: Julia Lawall Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20191022093017.8027-1-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/usbip/vhci_tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index 93c139d884f3..682127d258fd 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -161,7 +161,10 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) } kfree(iov); + /* This is only for isochronous case */ kfree(iso_buffer); + iso_buffer = NULL; + usbip_dbg_vhci_tx("send txdata\n"); total_size += txsize; From 47598ed4ad3cbf2102fe685e516dba1d61ad252d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 21:38:43 -0800 Subject: [PATCH 0417/3715] net: prevent load/store tearing on sk->sk_stamp [ Upstream commit f75359f3ac855940c5718af10ba089b8977bf339 ] Add a couple of READ_ONCE() and WRITE_ONCE() to prevent load-tearing and store-tearing in sock_read_timestamp() and sock_write_timestamp() This might prevent another KCSAN report. Fixes: 3a0ed3e96197 ("sock: Make sock->sk_stamp thread-safe") Signed-off-by: Eric Dumazet Cc: Deepa Dinamani Acked-by: Deepa Dinamani Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7ec4d0bd8d12..780c6c0a86f0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2229,7 +2229,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk) return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif } @@ -2240,7 +2240,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif } From b8fa42e6cfbf8220b90d054af7ebb1de250ea9ea Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Thu, 12 Jul 2018 19:53:10 +0100 Subject: [PATCH 0418/3715] drm/i915/gtt: Add read only pages to gen8_pte_encode commit 25dda4dabeeb12af5209b0183c788ef2a88dabbe upstream. We can set a bit inside the ppGTT PTE to indicate a page is read-only; writes from the GPU will be discarded. We can use this to protect pages and in particular support read-only userptr mappings (necessary for importing PROT_READ vma). Signed-off-by: Jon Bloomfield Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad524cb0f6fc..d8d6632abe77 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -223,10 +223,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + u32 flags) { - gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; - pte |= addr; + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; switch (level) { case I915_CACHE_NONE: @@ -487,7 +490,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { fill_px(vm, pt, - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } static void gen6_initialize_pt(struct i915_address_space *vm, @@ -691,7 +694,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); gen8_pte_t *vaddr; GEM_BUG_ON(num_entries > pt->used_ptes); @@ -866,7 +869,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, enum i915_cache_level cache_level) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); gen8_pte_t *vaddr; bool ret; @@ -1264,7 +1267,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); u64 start = 0, length = ppgtt->base.total; if (use_4lvl(vm)) { @@ -2078,7 +2081,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); - gen8_set_pte(pte, gen8_pte_encode(addr, level)); + gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); ggtt->invalidate(vm->i915); } @@ -2091,7 +2094,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level); + const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); dma_addr_t addr; gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; @@ -2162,7 +2165,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; From 0a41eb61a26b19fa7172c41410126ec1058769e1 Mon Sep 17 00:00:00 2001 From: "Vivi, Rodrigo" Date: Mon, 6 Aug 2018 14:10:48 -0700 Subject: [PATCH 0419/3715] drm/i915/gtt: Read-only pages for insert_entries on bdw+ commit 250f8c8140ac0a5e5acb91891d6813f12778b224 upstream. Hook up the flags to allow read-only ppGTT mappings for gen8+ v2: Include a selftest to check that writes to a readonly PTE are dropped v3: Don't duplicate cpu_check() as we can just reuse it, and even worse don't wholesale copy the theory-of-operation comment from igt_ctx_exec without changing it to explain the intention behind the new test! v4: Joonas really likes magic mystery values Signed-off-by: Jon Bloomfield Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-2-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.c | 37 ++++++++++++++++--------- drivers/gpu/drm/i915/i915_gem_gtt.h | 7 ++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++++-- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d8d6632abe77..bc79d952b81a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -207,7 +207,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ + /* Applicable to VLV, and gen8+ */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -866,10 +866,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, struct gen8_insert_pte *idx, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, + u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; bool ret; @@ -920,20 +921,20 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, - cache_level); + cache_level, flags); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); @@ -941,7 +942,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) + &idx, cache_level, flags)) GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } @@ -1342,6 +1343,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } + /* From bdw, there is support for read-only pages in the PPGTT */ + ppgtt->base.has_read_only = true; + /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -2089,7 +2093,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; @@ -2097,6 +2101,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); dma_addr_t addr; + /* The GTT does not support read-only mappings */ + GEM_BUG_ON(flags & PTE_READ_ONLY); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start >> PAGE_SHIFT; for_each_sgt_dma(addr, sgt_iter, vma->pages) @@ -2226,13 +2233,14 @@ struct insert_entries { struct i915_address_space *vm; struct i915_vma *vma; enum i915_cache_level level; + u32 flags; }; static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) { struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); bxt_vtd_ggtt_wa(arg->vm); return 0; @@ -2241,9 +2249,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { - struct insert_entries arg = { vm, vma, level }; + struct insert_entries arg = { vm, vma, level, flags }; stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } @@ -2340,7 +2348,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, return ret; } - /* Currently applicable only to VLV */ + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -3066,6 +3074,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) */ mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_init(&ggtt->base, dev_priv, "[global]"); + + /* Only VLV supports read-only GGTT mappings */ + ggtt->base.has_read_only = IS_VALLEYVIEW(dev_priv); + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3098,7 +3110,6 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) return -EIO; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0dbbe840f5f0..4b63d6cbd81e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -295,7 +295,12 @@ struct i915_address_space { struct list_head unbound_list; struct pagevec free_pages; - bool pt_kmap_wc; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084ef5aae..19fe2489065a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1358,6 +1358,7 @@ void intel_ring_unpin(struct intel_ring *ring) static struct i915_vma * intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { + struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1367,10 +1368,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) if (IS_ERR(obj)) return ERR_CAST(obj); - /* mark ring buffers as read-only from GPU side by default */ - obj->gt_ro = 1; + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + obj->gt_ro = 1; - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) goto err; From 9d41e1d06a46c20ea0ddf25a366192a9f7810f8e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jul 2018 19:53:12 +0100 Subject: [PATCH 0420/3715] drm/i915/gtt: Disable read-only support under GVT commit c9e666880de5a1fed04dc412b046916d542b72dd upstream. GVT is not propagating the PTE bits, and is always setting the read-write bit, thus breaking read-only support. Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Jon Bloomfield Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-3-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bc79d952b81a..920d064e9b53 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1343,8 +1343,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } - /* From bdw, there is support for read-only pages in the PPGTT */ - ppgtt->base.has_read_only = true; + /* + * From bdw, there is support for read-only pages in the PPGTT. + * + * XXX GVT is not honouring the lack of RW in the PTE bits. + */ + ppgtt->base.has_read_only = !intel_vgpu_active(dev_priv); /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. From d24b5d0d6d5f0763431350bf7b2b1a256baec692 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jul 2018 19:53:13 +0100 Subject: [PATCH 0421/3715] drm/i915: Prevent writing into a read-only object via a GGTT mmap commit 3e977ac6179b39faa3c0eda5fce4f00663ae298d upstream. If the user has created a read-only object, they should not be allowed to circumvent the write protection by using a GGTT mmapping. Deny it. Also most machines do not support read-only GGTT PTEs, so again we have to reject attempted writes. Fortunately, this is known a priori, so we can at least reject in the call to create the mmap (with a sanity check in the fault handler). v2: Check the vma->vm_flags during mmap() to allow readonly access. v3: Remove VM_MAYWRITE to curtail mprotect() Testcase: igt/gem_userptr_blits/readonly_mmap* Signed-off-by: Chris Wilson Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Matthew Auld Cc: David Herrmann Reviewed-by: Matthew Auld #v1 Reviewed-by: Jon Bloomfield Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-4-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_gem.c | 9 +++++++++ drivers/gpu/drm/i915/i915_gem.c | 4 ++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- drivers/gpu/drm/i915/i915_gem_object.h | 13 ++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- include/drm/drm_vma_manager.h | 1 + 6 files changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index c55f338e380b..d2c042af36b8 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1035,6 +1035,15 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return -EACCES; } + if (node->readonly) { + if (vma->vm_flags & VM_WRITE) { + drm_gem_object_put_unlocked(obj); + return -EINVAL; + } + + vma->vm_flags &= ~VM_MAYWRITE; + } + ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 727018a16cca..f95e2c8ac239 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1834,6 +1834,10 @@ int i915_gem_fault(struct vm_fault *vmf) unsigned int flags; int ret; + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) + return VM_FAULT_SIGBUS; + /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 920d064e9b53..6f5e1e18e530 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -209,7 +209,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, /* Applicable to VLV, and gen8+ */ pte_flags = 0; - if (vma->obj->gt_ro) + if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); @@ -2105,8 +2105,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); dma_addr_t addr; - /* The GTT does not support read-only mappings */ - GEM_BUG_ON(flags & PTE_READ_ONLY); + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start >> PAGE_SHIFT; @@ -2354,7 +2356,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; - if (obj->gt_ro) + if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; intel_runtime_pm_get(i915); @@ -2396,7 +2398,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, /* Currently applicable only to VLV */ pte_flags = 0; - if (vma->obj->gt_ro) + if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; if (flags & I915_VMA_LOCAL_BIND) { diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..39cfe04dcdb8 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -140,7 +140,6 @@ struct drm_i915_gem_object { * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit */ - unsigned long gt_ro:1; unsigned int cache_level:3; unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) @@ -313,6 +312,18 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) reservation_object_unlock(obj->resv); } +static inline void +i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) +{ + obj->base.vma_node.readonly = true; +} + +static inline bool +i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) +{ + return obj->base.vma_node.readonly; +} + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 19fe2489065a..63667a5c2c87 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1373,7 +1373,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) * if supported by the platform's GGTT. */ if (vm->has_read_only) - obj->gt_ro = 1; + i915_gem_object_set_readonly(obj); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index d84d52f6d2b1..b54c98f05460 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -41,6 +41,7 @@ struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; + bool readonly:1; }; struct drm_vma_offset_manager { From f4a8cc1331d00ba40ca3160d2f6e580511d5e7e4 Mon Sep 17 00:00:00 2001 From: Michal Srb Date: Mon, 5 Feb 2018 16:04:37 +0000 Subject: [PATCH 0422/3715] drm/i915/cmdparser: Check reg_table_count before derefencing. commit b18224e95cb13ef7517aa26e6b47c85117327f11 upstream. The find_reg function was assuming that there is always at least one table in reg_tables. It is not always true. In case of VCS or VECS, the reg_tables is NULL and reg_table_count is 0, implying that no register-accessing commands are allowed. However, the command tables include commands such as MI_STORE_REGISTER_MEM. When trying to check such command, the find_reg would dereference NULL pointer. Now it will just return NULL meaning that the register was not found and the command will be rejected. Fixes: 76ff480ec963 ("drm/i915/cmdparser: Use binary search for faster register lookup") Signed-off-by: Michal Srb Link: https://patchwork.freedesktop.org/patch/msgid/20180205142916.27092-2-msrb@suse.com Cc: Chris Wilson Cc: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180205160438.3267-1-chris@chris-wilson.co.uk register lookup") Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 8ba932b22f7c..de7ec59433d1 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1038,7 +1038,7 @@ find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) const struct drm_i915_reg_table *table = engine->reg_tables; int count = engine->reg_table_count; - do { + for (; count > 0; ++table, --count) { if (!table->master || is_master) { const struct drm_i915_reg_descriptor *reg; @@ -1046,7 +1046,7 @@ find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) if (reg != NULL) return reg; } - } while (table++, --count); + } return NULL; } From 4ca946ea4e12c27284290dce6a222e91bca67aee Mon Sep 17 00:00:00 2001 From: Michal Srb Date: Mon, 5 Feb 2018 16:04:38 +0000 Subject: [PATCH 0423/3715] drm/i915/cmdparser: Do not check past the cmd length. commit b3ad99ed45917f42884fee731fa3cf9b8229a26c upstream. The command MEDIA_VFE_STATE checks bits at offset +2 dwords. However, it is possible to have MEDIA_VFE_STATE command with length = 0 + LENGTH_BIAS = 2. In that case check_cmd will read bits from the following command, or even past the end of the buffer. If the offset ends up outside of the command length, reject the command. Fixes: 351e3db2b363 ("drm/i915: Implement command buffer parsing logic") Signed-off-by: Michal Srb Link: https://patchwork.freedesktop.org/patch/msgid/20180205151745.29292-1-msrb@suse.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180205160438.3267-2-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index de7ec59433d1..ef7ad016d67c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1218,6 +1218,12 @@ static bool check_cmd(const struct intel_engine_cs *engine, continue; } + if (desc->bits[i].offset >= length) { + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", + *cmd, engine->name); + return false; + } + dword = cmd[desc->bits[i].offset] & desc->bits[i].mask; From 738878ada16538c664a3ae032474fdd845765249 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 15:40:55 +0000 Subject: [PATCH 0424/3715] drm/i915: Silence smatch for cmdparser commit 0ffba1fc98e8ec35caae8d50b657296ebb9a9a51 upstream. drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue If we move the shift into each case not only do we kill the warning from smatch, but we shrink the code slightly: text data bss dec hex filename 1267906 20587 3168 1291661 13b58d before 1267890 20587 3168 1291645 13b57d after Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171107154055.19460-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ef7ad016d67c..d0a9f8d416b2 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -798,22 +798,15 @@ struct cmd_node { */ static inline u32 cmd_header_key(u32 x) { - u32 shift; - switch (x >> INSTR_CLIENT_SHIFT) { default: case INSTR_MI_CLIENT: - shift = STD_MI_OPCODE_SHIFT; - break; + return x >> STD_MI_OPCODE_SHIFT; case INSTR_RC_CLIENT: - shift = STD_3D_OPCODE_SHIFT; - break; + return x >> STD_3D_OPCODE_SHIFT; case INSTR_BC_CLIENT: - shift = STD_2D_OPCODE_SHIFT; - break; + return x >> STD_2D_OPCODE_SHIFT; } - - return x >> shift; } static int init_hash_table(struct intel_engine_cs *engine, From 26f26f603809180cbf3f9f83e20e4e734d180f42 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Aug 2017 14:56:20 +0100 Subject: [PATCH 0425/3715] drm/i915: Don't use GPU relocations prior to cmdparser stalls commit 3dbf26ed7b9b40a8cb008ab9ad25703363af815d upstream. If we are using the cmdparser, we will have to copy the batch and so stall for the relocations. Rather than prolong that stall by adding more relocation requests, just use CPU relocations and do the stall upfront. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170826135620.25949-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4cc9ce4b5b16..d3b005b13991 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -268,6 +268,11 @@ static inline u64 gen8_noncanonical_addr(u64 address) return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); } +static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +{ + return eb->engine->needs_cmd_parser && eb->batch_len; +} + static int eb_create(struct i915_execbuffer *eb) { if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { @@ -1165,6 +1170,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -2305,7 +2314,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (eb.engine->needs_cmd_parser && eb.batch_len) { + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; vma = eb_parse(&eb, drm_is_current_master(file)); From 8d2541e95f5b320cc401a59ef6af6786f61f8cb1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 29 Nov 2017 08:24:09 +0000 Subject: [PATCH 0426/3715] drm/i915: Move engine->needs_cmd_parser to engine->flags commit 439e2ee4ca520e72870e4fa44aa0076060ad6857 upstream. Will be adding a new per-engine flags shortly so it makes sense to consolidate. v2: Keep the original code flow in intel_engine_cleanup_cmd_parser. (Joonas Lahtinen) Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171129082409.18189-1-tvrtko.ursulin@linux.intel.com Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 7 ++++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index d0a9f8d416b2..95478db9998b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "i915_drv.h" +#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser @@ -940,7 +941,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->needs_cmd_parser = true; + engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; } /** @@ -952,7 +953,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!engine->needs_cmd_parser) + if (!intel_engine_needs_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1356,7 +1357,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv, id) { - if (engine->needs_cmd_parser) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d3b005b13991..b5dd25b12011 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -270,7 +270,7 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return eb->engine->needs_cmd_parser && eb->batch_len; + return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; } static int eb_create(struct i915_execbuffer *eb) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b2067f10824..1a00a4e2732b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -417,7 +417,8 @@ struct intel_engine_cs { struct intel_engine_hangcheck hangcheck; - bool needs_cmd_parser; +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) + unsigned int flags; /* * Table of commands the command parser needs to know about @@ -444,6 +445,11 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { From 82e0caec38b8eb2ec4fa90b614ba02fc297faeb7 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Fri, 20 Apr 2018 14:26:01 -0700 Subject: [PATCH 0427/3715] drm/i915: Rename gen7 cmdparser tables commit 0a2f661b6c21815a7fa60e30babe975fee8e73c6 upstream. We're about to introduce some new tables for later gens, and the current naming for the gen7 tables will no longer make sense. Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 70 +++++++++++++------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 95478db9998b..452c45ad89ce 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -211,7 +211,7 @@ struct drm_i915_cmd_table { /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), @@ -244,7 +244,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -328,7 +328,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -372,7 +372,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -410,7 +410,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -463,35 +463,35 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef B #undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; @@ -871,12 +871,12 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) switch (engine->id) { case RCS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -890,17 +890,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case BCS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -914,8 +914,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; From 771ca35572723782de8a4654d45397b1486d5053 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Fri, 8 Jun 2018 08:53:46 -0700 Subject: [PATCH 0428/3715] drm/i915: Disable Secure Batches for gen6+ commit 44157641d448cbc0c4b73c5231d2b911f0cb0427 upstream. Retroactively stop reporting support for secure batches through the api for gen6+ so that older binaries trigger the fallback path instead. Older binaries use secure batches pre gen6 to access resources that are not available to normal usermode processes. However, all known userspace explicitly checks for HAS_SECURE_BATCHES before relying on the secure batch feature. Since there are no known binaries relying on this for newer gens we can kill secure batches from gen6, via I915_PARAM_HAS_SECURE_BATCHES. v2: rebase (Mika) v3: rebase (Mika) Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c75f4ccbcdef..9b642dd040cf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -323,7 +323,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915.semaphores; break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 41f51509c9e4..e888bf5ee31d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2996,6 +2996,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b5dd25b12011..fb6913fb2af0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2195,6 +2195,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *dev_priv = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; @@ -2204,7 +2205,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = dev_priv; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2226,8 +2227,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(dev_priv) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(dev_priv)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } From 0b19d8fdfc160a6e700e12dcd49dbdee113a5f8d Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Fri, 8 Jun 2018 10:05:26 -0700 Subject: [PATCH 0429/3715] drm/i915: Remove Master tables from cmdparser commit 66d8aba1cd6db34af10de465c0d52af679288cb6 upstream. The previous patch has killed support for secure batches on gen6+, and hence the cmdparsers master tables are now dead code. Remove them. Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 84 ++++++---------------- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +- 3 files changed, 26 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 452c45ad89ce..59d1432ecbbf 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -51,13 +51,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -82,9 +80,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -102,8 +100,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -111,7 +107,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -207,14 +202,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -311,7 +305,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -444,7 +438,7 @@ static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; @@ -461,7 +455,6 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, @@ -610,47 +603,29 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), -}; - #undef REG64 #undef REG32 struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -1027,22 +1002,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - for (; count > 0; ++table, --count) { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ @@ -1127,8 +1096,7 @@ unpin_src: static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; @@ -1138,12 +1106,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1157,7 +1119,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", @@ -1244,7 +1206,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, * @shadow_batch_obj: copy of the batch buffer in question * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1256,8 +1217,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, - u32 batch_len, - bool is_master) + u32 batch_len) { u32 *cmd, *batch_end; struct drm_i915_cmd_descriptor default_desc = noop_desc; @@ -1323,7 +1283,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, break; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { + if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; break; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e888bf5ee31d..93d22f249b5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3847,8 +3847,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, struct drm_i915_gem_object *shadow_batch_obj, u32 batch_start_offset, - u32 batch_len, - bool is_master); + u32 batch_len); /* i915_perf.c */ extern void i915_perf_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index fb6913fb2af0..a4487af03659 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1911,7 +1911,7 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; @@ -1926,8 +1926,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) eb->batch->obj, shadow_batch_obj, eb->batch_start_offset, - eb->batch_len, - is_master); + eb->batch_len); if (err) { if (err == -EACCES) /* unhandled chained batch */ vma = NULL; @@ -2325,7 +2324,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; - vma = eb_parse(&eb, drm_is_current_master(file)); + vma = eb_parse(&eb); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_vma; From 397944fc408dcd47ed5224d4cb1b0a7088299117 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Wed, 1 Aug 2018 09:33:59 -0700 Subject: [PATCH 0430/3715] drm/i915: Add support for mandatory cmdparsing commit 311a50e76a33d1e029563c24b2ff6db0c02b5afe upstream. The existing cmdparser for gen7 can be bypassed by specifying batch_len=0 in the execbuf call. This is safe because bypassing simply reduces the cmd-set available. In a later patch we will introduce cmdparsing for gen9, as a security measure, which must be strictly enforced since without it we are vulnerable to DoS attacks. Introduce the concept of 'required' cmd parsing that cannot be bypassed by submitting zero-length bb's. v2: rebase (Mika) v2: rebase (Mika) v3: fix conflict on engine flags (Mika) Signed-off-by: Jon Bloomfield Cc: Joonas Lahtinen Cc: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 14 +++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 59d1432ecbbf..5fa6a68e03cd 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -916,7 +916,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -928,7 +928,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!intel_engine_needs_cmd_parser(engine)) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1317,7 +1317,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv, id) { - if (intel_engine_needs_cmd_parser(engine)) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a4487af03659..e72dc30e56c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -270,7 +270,8 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); } static int eb_create(struct i915_execbuffer *eb) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1a00a4e2732b..774e3772d0ed 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -417,7 +417,8 @@ struct intel_engine_cs { struct intel_engine_hangcheck hangcheck; -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(3) unsigned int flags; /* @@ -445,9 +446,16 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +static inline bool +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline unsigned int From 78eff893fa30e8fa1ca0cb938b0c8bdf0544b973 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Tue, 22 May 2018 13:59:06 -0700 Subject: [PATCH 0431/3715] drm/i915: Support ro ppgtt mapped cmdparser shadow buffers commit 4f7af1948abcb18b4772fe1bcd84d7d27d96258c upstream. For Gen7, the original cmdparser motive was to permit limited use of register read/write instructions in unprivileged BB's. This worked by copying the user supplied bb to a kmd owned bb, and running it in secure mode, from the ggtt, only if the scanner finds no unsafe commands or registers. For Gen8+ we can't use this same technique because running bb's from the ggtt also disables access to ppgtt space. But we also do not actually require 'secure' execution since we are only trying to reduce the available command/register set. Instead we will copy the user buffer to a kmd owned read-only bb in ppgtt, and run in the usual non-secure mode. Note that ro pages are only supported by ppgtt (not ggtt), but luckily that's exactly what we need. Add the required paths to map the shadow buffer to ppgtt ro for Gen8+ v2: IS_GEN7/IS_GEN (Mika) v3: rebase v4: rebase v5: rebase Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 14 ++++++ drivers/gpu/drm/i915/i915_gem.c | 16 ++++++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 56 +++++++++++++++------- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 93d22f249b5b..e50030c60665 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2980,6 +2980,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv) + #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) #define BSD_RING ENGINE_MASK(VCS) @@ -3393,6 +3399,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); +struct i915_vma * __must_check +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags); + int i915_gem_object_unbind(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f95e2c8ac239..785d192b27c8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4000,6 +4000,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_address_space *vm = &dev_priv->ggtt.base; + + return i915_gem_object_pin(obj, vm, view, size, alignment, + flags | PIN_GLOBAL); +} + +struct i915_vma * +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; @@ -4057,7 +4071,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin(vma, size, alignment, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e72dc30e56c7..2cd38ec79141 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1912,6 +1912,33 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } +static struct i915_vma * +shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = eb->i915; + struct i915_address_space *vm; + u64 flags; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.base; + eb->batch_flags |= I915_DISPATCH_SECURE; + } else if (eb->vm->has_read_only) { + flags = PIN_USER; + vm = eb->vm; + i915_gem_object_set_readonly(obj); + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); +} + static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct drm_i915_gem_object *shadow_batch_obj; @@ -1929,14 +1956,21 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) eb->batch_start_offset, eb->batch_len); if (err) { - if (err == -EACCES) /* unhandled chained batch */ + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) + /* Execute original buffer non-secure */ vma = NULL; else vma = ERR_PTR(err); goto out; } - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + vma = shadow_batch_pin(eb, shadow_batch_obj); if (IS_ERR(vma)) goto out; @@ -1945,6 +1979,9 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; + /* eb->batch_len unchanged */ out: i915_gem_object_unpin_pages(shadow_batch_obj); @@ -2330,21 +2367,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = PTR_ERR(vma); goto err_vma; } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } } if (eb.batch_len == 0) From d8573c4fb7af86c888faeba98f1ac482b1872ce2 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Wed, 1 Aug 2018 09:45:50 -0700 Subject: [PATCH 0432/3715] drm/i915: Allow parsing of unsized batches commit 435e8fc059dbe0eec823a75c22da2972390ba9e0 upstream. In "drm/i915: Add support for mandatory cmdparsing" we introduced the concept of mandatory parsing. This allows the cmdparser to be invoked even when user passes batch_len=0 to the execbuf ioctl's. However, the cmdparser needs to know the extents of the buffer being scanned. Refactor the code to ensure the cmdparser uses the actual object size, instead of the incoming length, if user passes 0. Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2cd38ec79141..12d44ea36d24 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -271,7 +271,8 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || - (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -2359,6 +2360,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; @@ -2369,9 +2373,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. From b125c2230ceae25b9d51cc1e7d263b97f59e64cb Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Mon, 23 Apr 2018 11:12:15 -0700 Subject: [PATCH 0433/3715] drm/i915: Add gen9 BCS cmdparsing commit 0f2f39758341df70202ae1c42d5a1e4ee392b6d3 upstream. For gen9 we enable cmdparsing on the BCS ring, specifically to catch inadvertent accesses to sensitive registers Unlike gen7/hsw, we use the parser only to block certain registers. We can rely on h/w to block restricted commands, so the command tables only provide enough info to allow the parser to delineate each command, and identify commands that access registers. Note: This patch deliberately ignores checkpatch issues in favour of matching the style of the surrounding code. We'll correct the entire file in one go in a later patch. v3: rebase (Mika) v4: Add RING_TIMESTAMP registers to whitelist (Jon) Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 116 ++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 4 + 3 files changed, 112 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5fa6a68e03cd..09f1672488bb 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -442,6 +442,47 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -488,6 +529,11 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -603,6 +649,29 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), +}; + #undef REG64 #undef REG32 @@ -628,6 +697,10 @@ static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, +}; + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) { u32 client = cmd_header >> INSTR_CLIENT_SHIFT; @@ -683,6 +756,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -840,7 +924,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN7(engine->i915)) + if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) && + engine->id == BCS)) return; switch (engine->id) { @@ -861,7 +946,6 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: @@ -870,7 +954,16 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case BCS: - if (IS_HASWELL(engine->i915)) { + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN9(engine->i915)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { cmd_tables = hsw_blt_ring_cmd_table; cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { @@ -878,15 +971,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN9(engine->i915)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: cmd_tables = hsw_vebox_cmd_table; @@ -1260,9 +1355,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, } /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. + * We don't try to handle BATCH_BUFFER_START because it adds + * non-trivial complexity. Instead we abort the scan and return + * and error to indicate that the batch is unsafe. */ if (desc->cmd.value == MI_BATCH_BUFFER_START) { ret = -EACCES; @@ -1342,6 +1437,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6f5e1e18e530..47d178817a29 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -159,7 +159,8 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) return 0; - if (enable_ppgtt == 1) + /* Full PPGTT is required by the Gen9 cmdparser */ + if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9) return 1; if (enable_ppgtt == 2 && has_full_ppgtt) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2e706f1abe64..ca2845609e7e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -703,6 +703,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) From 54ffc33792e48a128cd7a9105562d0078f2f467c Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Thu, 27 Sep 2018 10:23:17 -0700 Subject: [PATCH 0434/3715] drm/i915/cmdparser: Use explicit goto for error paths commit 0546a29cd884fb8184731c79ab008927ca8859d0 upstream. In the next patch we will be adding a second valid termination condition which will require a small amount of refactoring to share logic with the BB_END case. Refactor all error conditions to jump to a dedicated exit path, with 'break' reserved only for a successful parse. Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 09f1672488bb..1ff3c8fd228f 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1337,21 +1337,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; - break; + goto err; } /* @@ -1361,7 +1355,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, */ if (desc->cmd.value == MI_BATCH_BUFFER_START) { ret = -EACCES; - break; + goto err; } if (desc->flags & CMD_DESC_FIXED) @@ -1375,22 +1369,29 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, length, batch_end - cmd); ret = -EINVAL; - break; + goto err; } if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; - break; + goto err; } cmd += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; - break; + goto err; } } while (1); + if (needs_clflush_after) { + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + +err: i915_gem_object_unpin_map(shadow_batch_obj); return ret; } From 6c410451e9dfa163687b51f8d98d3f55f83d7bac Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Thu, 20 Sep 2018 09:58:36 -0700 Subject: [PATCH 0435/3715] drm/i915/cmdparser: Add support for backward jumps commit f8c08d8faee5567803c8c533865296ca30286bbf upstream. To keep things manageable, the pre-gen9 cmdparser does not attempt to track any form of nested BB_START's. This did not prevent usermode from using nested starts, or even chained batches because the cmdparser is not strictly enforced pre gen9. Instead, the existence of a nested BB_START would cause the batch to be emitted in insecure mode, and any privileged capabilities would not be available. For Gen9, the cmdparser becomes mandatory (for BCS at least), and so not providing any form of nested BB_START support becomes overly restrictive. Any such batch will simply not run. We make heavy use of backward jumps in igt, and it is much easier to add support for this restricted subset of nested jumps, than to rewrite the whole of our test suite to avoid them. Add the required logic to support limited backward jumps, to instructions that have already been validated by the parser. Note that it's not sufficient to simply approve any BB_START that jumps backwards in the buffer because this would allow an attacker to embed a rogue instruction sequence within the operand words of a harmless instruction (say LRI) and jump to that. We introduce a bit array to track every instr offset successfully validated, and test the target of BB_START against this. If the target offset hits, it is re-written to the same offset in the shadow buffer and the BB_START cmd is allowed. Note: This patch deliberately ignores checkpatch issues in the cmdtables, in order to match the style of the surrounding code. We'll correct the entire file in one go in a later patch. v2: set dispatch secure late (Mika) v3: rebase (Mika) v4: Clear whitelist on each parse Minor review updates (Chris) v5: Correct backward jump batching v6: fix compilation error due to struct eb shuffle (Mika) Signed-off-by: Jon Bloomfield Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 151 +++++++++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 9 +- drivers/gpu/drm/i915/i915_gem_context.c | 5 + drivers/gpu/drm/i915/i915_gem_context.h | 6 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 32 +++-- 5 files changed, 177 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 1ff3c8fd228f..23d220fbca5f 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -481,6 +481,19 @@ static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { .reg = { .offset = 1, .mask = 0x007FFFFC } ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), }; static const struct drm_i915_cmd_descriptor noop_desc = @@ -1292,15 +1305,113 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(const struct i915_gem_context *ctx, + u32 *cmd, u32 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + memset(ctx->jump_whitelist, 0, + BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @engine: the engine on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start: Canonical base address of batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1308,13 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int intel_engine_cmd_parser(struct intel_engine_cs *engine, + +int intel_engine_cmd_parser(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 batch_start, u32 batch_start_offset, - u32 batch_len) + u32 batch_len, + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; bool needs_clflush_after = false; @@ -1328,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1348,16 +1465,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, goto err; } - /* - * We don't try to handle BATCH_BUFFER_START because it adds - * non-trivial complexity. Instead we abort the scan and return - * and error to indicate that the batch is unsafe. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - goto err; - } - if (desc->flags & CMD_DESC_FIXED) length = desc->length.fixed; else @@ -1377,7 +1484,21 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, goto err; } + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, batch_start, + shadow_batch_start); + + if (ret) + goto err; + break; + } + + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); + cmd += length; + offset += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e50030c60665..e7e5f94e97f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3857,11 +3857,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -int intel_engine_cmd_parser(struct intel_engine_cs *engine, +int intel_engine_cmd_parser(struct i915_gem_context *cxt, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, - u32 batch_len); + u32 batch_len, + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); /* i915_perf.c */ extern void i915_perf_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8afd2ce59b8d..3925a63c1661 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -141,6 +141,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) __i915_gem_object_release_unless_active(ce->state->obj); } + kfree(ctx->jump_whitelist); + kfree(ctx->name); put_pid(ctx->pid); @@ -321,6 +323,9 @@ __create_hw_context(struct drm_i915_private *dev_priv, else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + return ctx; err_pid: diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..b651c5f427b9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -181,6 +181,12 @@ struct i915_gem_context { /** remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; + /** jump_whitelist: Bit array for tracking cmds during cmdparsing */ + unsigned long *jump_whitelist; + + /** jump_whitelist_cmds: No of cmd slots available */ + u32 jump_whitelist_cmds; + /** handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 12d44ea36d24..d99d05a91032 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1927,7 +1927,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) if (CMDPARSER_USES_GGTT(dev_priv)) { flags = PIN_GLOBAL; vm = &dev_priv->ggtt.base; - eb->batch_flags |= I915_DISPATCH_SECURE; } else if (eb->vm->has_read_only) { flags = PIN_USER; vm = eb->vm; @@ -1944,6 +1943,8 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; + u64 batch_start; + u64 shadow_batch_start; int err; shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, @@ -1951,12 +1952,27 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - err = intel_engine_cmd_parser(eb->engine, + vma = shadow_batch_pin(eb, shadow_batch_obj); + if (IS_ERR(vma)) + goto out; + + batch_start = gen8_canonical_addr(eb->batch->node.start) + + eb->batch_start_offset; + + shadow_batch_start = gen8_canonical_addr(vma->node.start); + + err = intel_engine_cmd_parser(eb->ctx, + eb->engine, eb->batch->obj, - shadow_batch_obj, + batch_start, eb->batch_start_offset, - eb->batch_len); + eb->batch_len, + shadow_batch_obj, + shadow_batch_start); + if (err) { + i915_vma_unpin(vma); + /* * Unsafe GGTT-backed buffers can still be submitted safely * as non-secure. @@ -1968,13 +1984,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) vma = NULL; else vma = ERR_PTR(err); + goto out; } - vma = shadow_batch_pin(eb, shadow_batch_obj); - if (IS_ERR(vma)) - goto out; - eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; @@ -1984,6 +1997,9 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb) eb->batch = vma; /* eb->batch_len unchanged */ + if (CMDPARSER_USES_GGTT(eb->i915)) + eb->batch_flags |= I915_DISPATCH_SECURE; + out: i915_gem_object_unpin_pages(shadow_batch_obj); return vma; From b08903f89ee6e7b8116070bcee13eda9fd357aca Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Thu, 20 Sep 2018 09:45:10 -0700 Subject: [PATCH 0436/3715] drm/i915/cmdparser: Ignore Length operands during command matching commit 926abff21a8f29ef159a3ac893b05c6e50e043c3 upstream. Some of the gen instruction macros (e.g. MI_DISPLAY_FLIP) have the length directly encoded in them. Since these are used directly in the tables, the Length becomes part of the comparison used for matching during parsing. Thus, if the cmd being parsed has a different length to that in the table, it is not matched and the cmd is accepted via the default variable length path. Fix by masking out everything except the Opcode in the cmd tables Cc: Tony Luck Cc: Dave Airlie Cc: Takashi Iwai Cc: Tyler Hicks Signed-off-by: Jon Bloomfield Reviewed-by: Chris Wilson Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 23d220fbca5f..5c2ae816ac32 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -187,7 +187,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } From d302d64a90b0b390ead3f70832aac2662e34323b Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Tue, 7 Aug 2018 21:15:35 +0530 Subject: [PATCH 0437/3715] drm/i915: Lower RM timeout to avoid DSI hard hangs commit 1d85a299c4db57c55e0229615132c964d17aa765 upstream. In BXT/APL, device 2 MMIO reads from MIPI controller requires its PLL to be turned ON. When MIPI PLL is turned off (MIPI Display is not active or connected), and someone (host or GT engine) tries to read MIPI registers, it causes hard hang. This is a hardware restriction or limitation. Driver by itself doesn't read MIPI registers when MIPI display is off. But any userspace application can submit unprivileged batch buffer for execution. In that batch buffer there can be mmio reads. And these reads are allowed even for unprivileged applications. If these register reads are for MIPI DSI controller and MIPI display is not active during that time, then the MMIO read operation causes system hard hang and only way to recover is hard reboot. A genuine process/application won't submit batch buffer like this and doesn't cause any issue. But on a compromised system, a malign userspace process/app can generate such batch buffer and can trigger system hard hang (denial of service attack). The fix is to lower the internal MMIO timeout value to an optimum value of 950us as recommended by hardware team. If the timeout is beyond 1ms (which will hit for any value we choose if MMIO READ on a DSI specific register is performed without PLL ON), it causes the system hang. But if the timeout value is lower than it will be below the threshold (even if timeout happens) and system will not get into a hung state. This will avoid a system hang without losing any programming or GT interrupts, taking the worst case of lowest CDCLK frequency and early DC5 abort into account. Signed-off-by: Uma Shankar Reviewed-by: Jon Bloomfield Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ca2845609e7e..e95547aad312 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6726,6 +6726,10 @@ enum { #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cb377b003321..391c248a5755 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -121,6 +121,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) From 1a5a64e0bde89f5c768353fce5307b9816c00af7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 9 Jul 2018 18:24:27 +0300 Subject: [PATCH 0438/3715] drm/i915/gen8+: Add RC6 CTX corruption WA commit 7e34f4e4aad3fd34c02b294a3cf2321adf5b4438 upstream. In some circumstances the RC6 context can get corrupted. We can detect this and take the required action, that is disable RC6 and runtime PM. The HW recovers from the corrupted state after a system suspend/resume cycle, so detect the recovery and re-enable RC6 and runtime PM. v2: rebase (Mika) v3: - Move intel_suspend_gt_powersave() to the end of the GEM suspend sequence. - Add commit message. v4: - Rebased on intel_uncore_forcewake_put(i915->uncore, ...) API change. v5: - Rebased on latest upstream gt_pm refactoring. Signed-off-by: Imre Deak Signed-off-by: Mika Kuoppala Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_gem.c | 6 + drivers/gpu/drm/i915/i915_gem_request.c | 4 + drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_drv.h | 3 + drivers/gpu/drm/i915/intel_pm.c | 153 +++++++++++++++++++++--- 7 files changed, 158 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9b642dd040cf..02a2af7c8166 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1564,6 +1564,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) disable_rpm_wakeref_asserts(dev_priv); intel_display_set_init_power(dev_priv, false); + i915_rc6_ctx_wa_suspend(dev_priv); fw_csr = !IS_GEN9_LP(dev_priv) && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; @@ -1800,6 +1801,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_set_init_power(dev_priv, true); i915_gem_sanitize(dev_priv); + i915_rc6_ctx_wa_resume(dev_priv); enable_rpm_wakeref_asserts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e7e5f94e97f7..a5fb7404b29e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1320,6 +1320,7 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; + bool ctx_corrupted; struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; @@ -3025,9 +3026,12 @@ intel_info(const struct drm_i915_private *dev_priv) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9) + /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (INTEL_GEN(dev_priv) == 9) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 785d192b27c8..9263b65720bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3243,6 +3243,12 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) { + i915_rc6_ctx_wa_check(dev_priv); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + } + intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 813a3b546d6e..1d556dcbd656 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -252,6 +252,10 @@ static void mark_busy(struct drm_i915_private *i915) GEM_BUG_ON(!i915->gt.active_requests); intel_runtime_pm_get_noresume(i915); + + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + i915->gt.awake = true; intel_enable_gt_powersave(i915); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e95547aad312..1db70350af0b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -358,6 +358,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_CONFIG0 _MMIO(0xD00) #define GEN9_DEFAULT_FIXES (1 << 3 | 1 << 2 | 1 << 1) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1<<13) #define ECOBITS_PPGTT_CACHE64B (3<<8) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3adb9c3b412e..cba10cdab2a9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1838,6 +1838,9 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 391c248a5755..674410682ccc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6282,19 +6282,23 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) { /* we're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ @@ -6545,7 +6549,8 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ @@ -6594,7 +6599,8 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); if (IS_BROADWELL(dev_priv)) @@ -7775,6 +7781,95 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } +static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) +{ + return !I915_READ(GEN8_RC6_CTX_INFO); +} + +static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get(i915); + } +} + +static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) { + intel_runtime_pm_put(i915); + i915->rps.ctx_corrupted = false; + } +} + +/** + * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to clean up the RC6 CTX WA before system suspend. + */ +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) + intel_runtime_pm_put(i915); +} + +/** + * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) +{ + if (!i915->rps.ctx_corrupted) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + intel_runtime_pm_get(i915); + return; + } + + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + i915->rps.ctx_corrupted = false; +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv); + +/** + * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @i915: i915 device + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. + * + * Return false if no context corruption has happened since the last call of + * this function, true otherwise. +*/ +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return false; + + if (i915->rps.ctx_corrupted) + return false; + + if (!i915_rc6_ctx_corrupted(i915)) + return false; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_disable_rc6(i915); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get_noresume(i915); + + return true; +} + void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { /* @@ -7789,6 +7884,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); + i915_rc6_ctx_wa_init(dev_priv); + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); @@ -7838,6 +7935,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); + i915_rc6_ctx_wa_cleanup(dev_priv); + if (!i915.enable_rc6) intel_runtime_pm_put(dev_priv); } @@ -7869,6 +7968,35 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } +static void __intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->rps.hw_lock); + __intel_disable_rc6(dev_priv); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rps(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rps(dev_priv); + else if (IS_IRONLAKE_M(dev_priv)) + ironlake_disable_drps(dev_priv); +} + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { if (!READ_ONCE(dev_priv->rps.enabled)) @@ -7876,20 +8004,11 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_GEN(dev_priv) >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } + __intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } From e78af200c633837b0ad98dd9d8612e0e5bbe83c7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 11 Nov 2019 08:13:24 -0800 Subject: [PATCH 0439/3715] drm/i915/cmdparser: Fix jump whitelist clearing commit ea0b163b13ffc52818c079adb00d55e227a6da6f upstream. When a jump_whitelist bitmap is reused, it needs to be cleared. Currently this is done with memset() and the size calculation assumes bitmaps are made of 32-bit words, not longs. So on 64-bit architectures, only the first half of the bitmap is cleared. If some whitelist bits are carried over between successive batches submitted on the same context, this will presumably allow embedding the rogue instructions that we're trying to reject. Use bitmap_zero() instead, which gets the calculation right. Fixes: f8c08d8faee5 ("drm/i915/cmdparser: Add support for backward jumps") Signed-off-by: Ben Hutchings Signed-off-by: Jon Bloomfield Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_cmd_parser.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5c2ae816ac32..e4b9eb1f6b60 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1374,7 +1374,7 @@ static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) return; if (batch_cmds <= ctx->jump_whitelist_cmds) { - memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); + bitmap_zero(ctx->jump_whitelist, batch_cmds); return; } @@ -1394,8 +1394,7 @@ again: } DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); - memset(ctx->jump_whitelist, 0, - BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); return; } From f570b657264ba55b336f65ab689eea096c0fef39 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Aug 2019 17:24:07 +0200 Subject: [PATCH 0440/3715] KVM: x86: use Intel speculation bugs and features as derived in generic x86 code commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream. Similar to AMD bits, set the Intel bits from the vendor-independent feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care about the vendor and they should be set on AMD processors as well. Suggested-by: Jim Mattson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 ++++++++ arch/x86/kvm/x86.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5c82b4bc4a68..33f87b696487 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -481,8 +481,16 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + if (boot_cpu_has(X86_FEATURE_IBPB) && + boot_cpu_has(X86_FEATURE_IBRS)) + entry->edx |= F(SPEC_CTRL); + if (boot_cpu_has(X86_FEATURE_STIBP)) + entry->edx |= F(INTEL_STIBP); + if (boot_cpu_has(X86_FEATURE_SSBD)) + entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even * if the host doesn't support it. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4927d0f5be13..b1e0969a4543 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1081,8 +1081,16 @@ u64 kvm_get_arch_capabilities(void) if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + data |= ARCH_CAP_RDCL_NO; + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + data |= ARCH_CAP_SSB_NO; + if (!boot_cpu_has_bug(X86_BUG_MDS)) + data |= ARCH_CAP_MDS_NO; + return data; } + EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); static int kvm_get_msr_feature(struct kvm_msr_entry *msr) From 74bccd0ec712fdff716cec80ba1553d41bc887b8 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 10:45:50 +0200 Subject: [PATCH 0441/3715] x86/msr: Add the IA32_TSX_CTRL MSR commit c2955f270a84762343000f103e0640d29c7a96f3 upstream. Transactional Synchronization Extensions (TSX) may be used on certain processors as part of a speculative side channel attack. A microcode update for existing processors that are vulnerable to this attack will add a new MSR - IA32_TSX_CTRL to allow the system administrator the option to disable TSX as one of the possible mitigations. The CPUs which get this new MSR after a microcode upgrade are the ones which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all CPU buffers takes care of the TAA case as well. [ Note that future processors that are not vulnerable will also support the IA32_TSX_CTRL MSR. ] Add defines for the new IA32_TSX_CTRL MSR and its bits. TSX has two sub-features: 1. Restricted Transactional Memory (RTM) is an explicitly-used feature where new instructions begin and end TSX transactions. 2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of "old" style locks are used by software. Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the IA32_TSX_CTRL MSR. There are two control bits in IA32_TSX_CTRL MSR: Bit 0: When set, it disables the Restricted Transactional Memory (RTM) sub-feature of TSX (will force all transactions to abort on the XBEGIN instruction). Bit 1: When set, it disables the enumeration of the RTM and HLE feature (i.e. it will make CPUID(EAX=7).EBX{bit4} and CPUID(EAX=7).EBX{bit11} read as 0). The other TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally disabled by the new microcode but still enumerated as present by CPUID(EAX=7).EBX{bit4}, unless disabled by IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fda3bf75de6c..127fc249def7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -84,6 +84,7 @@ * Microarchitectural Data * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -94,6 +95,10 @@ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 From 52bd862adee5c3ab7125bb7309f23bccf0ffd691 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 10:52:35 +0200 Subject: [PATCH 0442/3715] x86/cpu: Add a helper function x86_read_arch_cap_msr() commit 286836a70433fb64131d2590f4bf512097c255e1 upstream. Add a helper function to read the IA32_ARCH_CAPABILITIES MSR. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 15 +++++++++++---- arch/x86/kernel/cpu/cpu.h | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 551c6bed7c8c..e6edbe5d2f81 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -968,19 +968,26 @@ static bool __init cpu_matches(unsigned long which) return m && !!(m->driver_data & which); } -static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +u64 x86_read_arch_cap_msr(void) { u64 ia32_cap = 0; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + return ia32_cap; +} + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index cca588407dca..f28c6cd056f9 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -54,4 +54,6 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern u64 x86_read_arch_cap_msr(void); + #endif /* ARCH_X86_CPU_H */ From 4b708ea4e5e772747b89619489ab96e9d1a1a44d Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 11:01:53 +0200 Subject: [PATCH 0443/3715] x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream. Add a kernel cmdline parameter "tsx" to control the Transactional Synchronization Extensions (TSX) feature. On CPUs that support TSX control, use "tsx=on|off" to enable or disable TSX. Not specifying this option is equivalent to "tsx=off". This is because on certain processors TSX may be used as a part of a speculative side channel attack. Carve out the TSX controlling functionality into a separate compilation unit because TSX is a CPU feature while the TSX async abort control machinery will go to cpu/bugs.c. [ bp: - Massage, shorten and clear the arg buffer. - Clarifications of the tsx= possible options - Josh. - Expand on TSX_CTRL availability - Pawan. ] Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 26 ++++ arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/cpu.h | 16 +++ arch/x86/kernel/cpu/intel.c | 5 + arch/x86/kernel/cpu/tsx.c | 125 ++++++++++++++++++ 6 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/tsx.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 671f518b09ee..01463ac89a00 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4505,6 +4505,32 @@ platforms where RDTSC is slow and this accounting can add overhead. + tsx= [X86] Control Transactional Synchronization + Extensions (TSX) feature in Intel processors that + support TSX control. + + This parameter controls the TSX feature. The options are: + + on - Enable TSX on the system. Although there are + mitigations for all known security vulnerabilities, + TSX has been known to be an accelerator for + several previous speculation-related CVEs, and + so there may be unknown security risks associated + with leaving it enabled. + + off - Disable TSX on the system. (Note that this + option takes effect only on newer CPUs which are + not vulnerable to MDS, i.e., have + MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get + the new IA32_TSX_CTRL MSR through a microcode + update. This new MSR allows for the reliable + deactivation of the TSX functionality.) + + Not specifying this option is equivalent to tsx=off. + + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + for more details. + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 570e8bb1f386..e13ddd19a76c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o tsx.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e6edbe5d2f81..255ab4d8a3c6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1414,6 +1414,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + tsx_init(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f28c6cd056f9..db10a63687d3 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -45,6 +45,22 @@ struct _tlb_table { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; +#ifdef CONFIG_CPU_SUP_INTEL +enum tsx_ctrl_states { + TSX_CTRL_ENABLE, + TSX_CTRL_DISABLE, + TSX_CTRL_NOT_SUPPORTED, +}; + +extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; + +extern void __init tsx_init(void); +extern void tsx_enable(void); +extern void tsx_disable(void); +#else +static inline void tsx_init(void) { } +#endif /* CONFIG_CPU_SUP_INTEL */ + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); extern int detect_extended_topology_early(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 574dcdc092ab..3a5ea741701b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -695,6 +695,11 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_energy_perf(c); init_intel_misc_features(c); + + if (tsx_ctrl_state == TSX_CTRL_ENABLE) + tsx_enable(); + if (tsx_ctrl_state == TSX_CTRL_DISABLE) + tsx_disable(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c new file mode 100644 index 000000000000..04471c4378d8 --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Transactional Synchronization Extensions (TSX) control. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Pawan Gupta + */ + +#include + +#include + +#include "cpu.h" + +enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; + +void tsx_disable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Force all transactions to immediately abort */ + tsx |= TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is not enumerated in CPUID. + * This is visible to userspace and will ensure they + * do not waste resources trying TSX transactions that + * will always abort. + */ + tsx |= TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +void tsx_enable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Enable the RTM feature in the cpu */ + tsx &= ~TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is enumerated in CPUID. + * This is visible to userspace and will ensure they + * can enumerate and use the TSX feature. + */ + tsx &= ~TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +static bool __init tsx_ctrl_is_supported(void) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +} + +void __init tsx_init(void) +{ + char arg[4] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) + return; + + ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); + if (ret >= 0) { + if (!strcmp(arg, "on")) { + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { + /* tsx= not provided, defaulting to off */ + tsx_ctrl_state = TSX_CTRL_DISABLE; + } + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { + tsx_disable(); + + /* + * tsx_disable() will change the state of the + * RTM CPUID bit. Clear it here since it is now + * expected to be not set. + */ + setup_clear_cpu_cap(X86_FEATURE_RTM); + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { + + /* + * HW defaults TSX to be enabled at bootup. + * We may still need the TSX enable support + * during init for special cases like + * kexec after TSX is disabled. + */ + tsx_enable(); + + /* + * tsx_enable() will change the state of the + * RTM CPUID bit. Force it here since it is now + * expected to be set. + */ + setup_force_cpu_cap(X86_FEATURE_RTM); + } +} From 9a5757771b81203d6ff9c170b2a24ae9e447da08 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 11:30:45 +0200 Subject: [PATCH 0444/3715] x86/speculation/taa: Add mitigation for TSX Async Abort commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream. TSX Async Abort (TAA) is a side channel vulnerability to the internal buffers in some Intel processors similar to Microachitectural Data Sampling (MDS). In this case, certain loads may speculatively pass invalid data to dependent operations when an asynchronous abort condition is pending in a TSX transaction. This includes loads with no fault or assist condition. Such loads may speculatively expose stale data from the uarch data structures as in MDS. Scope of exposure is within the same-thread and cross-thread. This issue affects all current processors that support TSX, but do not have ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES. On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0, CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers using VERW or L1D_FLUSH, there is no additional mitigation needed for TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by disabling the Transactional Synchronization Extensions (TSX) feature. A new MSR IA32_TSX_CTRL in future and current processors after a microcode update can be used to control the TSX feature. There are two bits in that MSR: * TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted Transactional Memory (RTM). * TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally disabled with updated microcode but still enumerated as present by CPUID(EAX=7).EBX{bit4}. The second mitigation approach is similar to MDS which is clearing the affected CPU buffers on return to user space and when entering a guest. Relevant microcode update is required for the mitigation to work. More details on this approach can be found here: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html The TSX feature can be controlled by the "tsx" command line parameter. If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is deployed. The effective mitigation state can be read from sysfs. [ bp: - massage + comments cleanup - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh. - remove partial TAA mitigation in update_mds_branch_idle() - Josh. - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g ] Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 + arch/x86/include/asm/nospec-branch.h | 4 +- arch/x86/include/asm/processor.h | 7 ++ arch/x86/kernel/cpu/bugs.c | 110 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 15 ++++ 6 files changed, 139 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 14357354cd28..46a4d5f4a77c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -388,5 +388,6 @@ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 127fc249def7..2ff54c96576d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -85,6 +85,10 @@ * Sampling (MDS) vulnerabilities. */ #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f6b496d11097..b73a16a56e4f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include /** - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the @@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d55a0adbcf27..6a87eda9691e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -994,4 +994,11 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8bf21bc7a190..8b219d30c381 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,11 +32,14 @@ #include #include +#include "cpu.h" + static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init taa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -103,6 +106,7 @@ void __init check_bugs(void) ssb_select_mitigation(); l1tf_select_mitigation(); mds_select_mitigation(); + taa_select_mitigation(); arch_smt_update(); @@ -266,6 +270,100 @@ static int __init mds_cmdline(char *str) } early_param("mds", mds_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "TAA: " fmt + +/* Default mitigation for TAA-affected CPUs */ +static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; +static bool taa_nosmt __ro_after_init; + +static const char * const taa_strings[] = { + [TAA_MITIGATION_OFF] = "Vulnerable", + [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", + [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", +}; + +static void __init taa_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* TSX previously disabled by tsx=off */ + if (!boot_cpu_has(X86_FEATURE_RTM)) { + taa_mitigation = TAA_MITIGATION_TSX_DISABLED; + goto out; + } + + if (cpu_mitigations_off()) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ + if (taa_mitigation == TAA_MITIGATION_OFF) + goto out; + + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + taa_mitigation = TAA_MITIGATION_VERW; + else + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. + * A microcode update fixes this behavior to clear CPU buffers. It also + * adds support for MSR_IA32_TSX_CTRL which is enumerated by the + * ARCH_CAP_TSX_CTRL_MSR bit. + * + * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode + * update is required. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ( (ia32_cap & ARCH_CAP_MDS_NO) && + !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * TSX is enabled, select alternate mitigation for TAA which is + * the same as MDS. Enable MDS static branch to clear CPU buffers. + * + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ + static_branch_enable(&mds_user_clear); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); + +out: + pr_info("%s\n", taa_strings[taa_mitigation]); +} + +static int __init tsx_async_abort_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_TAA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + taa_mitigation = TAA_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + taa_mitigation = TAA_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_nosmt = true; + } + + return 0; +} +early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt @@ -772,6 +870,7 @@ static void update_mds_branch_idle(void) } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" void arch_smt_update(void) { @@ -804,6 +903,17 @@ void arch_smt_update(void) break; } + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 255ab4d8a3c6..8f5059c7b2a4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1004,6 +1004,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (!cpu_matches(NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); + /* + * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: + * - TSX is supported or + * - TSX_CTRL is present + * + * TSX_CTRL check is needed for cases when TSX could be disabled before + * the kernel boot e.g. kexec. + * TSX_CTRL check alone is not sufficient for cases when the microcode + * update is not present or running as guest that don't get TSX_CTRL. + */ + if (!(ia32_cap & ARCH_CAP_TAA_NO) && + (cpu_has(c, X86_FEATURE_RTM) || + (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + setup_force_cpu_bug(X86_BUG_TAA); + if (cpu_matches(NO_MELTDOWN)) return; From 79373f485f7be07cb097a49aa200f80914407ae9 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 12:19:51 +0200 Subject: [PATCH 0445/3715] x86/speculation/taa: Add sysfs reporting for TSX Async Abort commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream. Add the sysfs reporting file for TSX Async Abort. It exposes the vulnerability and the mitigation state similar to the existing files for the other hardware vulnerabilities. Sysfs file path is: /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ drivers/base/cpu.c | 9 +++++++++ include/linux/cpu.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8b219d30c381..507aa25176c1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1408,6 +1408,21 @@ static ssize_t mds_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t tsx_async_abort_show_state(char *buf) +{ + if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || + (taa_mitigation == TAA_MITIGATION_OFF)) + return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); + } + + return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1473,6 +1488,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MDS: return mds_show_state(buf); + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); + default: break; } @@ -1509,4 +1527,9 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_MDS); } + +ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); +} #endif diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 32b52e6bd13b..0cd78375cd5e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -539,12 +539,20 @@ ssize_t __weak cpu_show_mds(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); +static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -553,6 +561,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, &dev_attr_mds.attr, + &dev_attr_tsx_async_abort.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index efc48efb0ec6..851208e7aa13 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,6 +59,9 @@ extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From d2f9854c2c34552e2b8f5a01eb1ac66796de89b6 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 12:23:33 +0200 Subject: [PATCH 0446/3715] kvm/x86: Export MDS_NO=0 to guests when TSX is enabled commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream. Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX Async Abort(TAA) affected hosts that have TSX enabled and updated microcode. This is required so that the guests don't complain, "Vulnerable: Clear CPU buffers attempted, no microcode" when the host has the updated microcode to clear CPU buffers. Microcode update also adds support for MSR_IA32_TSX_CTRL which is enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR. Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is not exported to the guests. In this case export MDS_NO=0 to the guests. When guests have CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b1e0969a4543..461c6ca2cea5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1088,6 +1088,25 @@ u64 kvm_get_arch_capabilities(void) if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + /* + * On TAA affected systems, export MDS_NO=0 when: + * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. + * - Updated microcode is present. This is detected by + * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures + * that VERW clears CPU buffers. + * + * When MDS_NO=0 is exported, guests deploy clear CPU buffer + * mitigation and don't complain: + * + * "Vulnerable: Clear CPU buffers attempted, no microcode" + * + * If TSX is disabled on the system, guests are also mitigated against + * TAA and clear CPU buffer mitigation is not required for guests. + */ + if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && + (data & ARCH_CAP_TSX_CTRL_MSR)) + data &= ~ARCH_CAP_MDS_NO; + return data; } From 8c99df217f8e36fde46cbf2af50b5b191857d9d4 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 12:28:57 +0200 Subject: [PATCH 0447/3715] x86/tsx: Add "auto" option to the tsx= cmdline parameter commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream. Platforms which are not affected by X86_BUG_TAA may want the TSX feature enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto disable TSX when X86_BUG_TAA is present, otherwise enable TSX. More details on X86_BUG_TAA can be found here: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html [ bp: Extend the arg buffer to accommodate "auto\0". ] Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/kernel/cpu/tsx.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 01463ac89a00..3cae24bff0a1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4526,6 +4526,9 @@ update. This new MSR allows for the reliable deactivation of the TSX functionality.) + auto - Disable TSX if X86_BUG_TAA is present, + otherwise enable TSX on the system. + Not specifying this option is equivalent to tsx=off. See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 04471c4378d8..dda328ec2ba1 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported(void) void __init tsx_init(void) { - char arg[4] = {}; + char arg[5] = {}; int ret; if (!tsx_ctrl_is_supported()) @@ -87,6 +87,11 @@ void __init tsx_init(void) tsx_ctrl_state = TSX_CTRL_ENABLE; } else if (!strcmp(arg, "off")) { tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(arg, "auto")) { + if (boot_cpu_has_bug(X86_BUG_TAA)) + tsx_ctrl_state = TSX_CTRL_DISABLE; + else + tsx_ctrl_state = TSX_CTRL_ENABLE; } else { tsx_ctrl_state = TSX_CTRL_DISABLE; pr_err("tsx: invalid option, defaulting to off\n"); From a4f14d5a0795fe7c4f75d31ef4abf816570e3872 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 23 Oct 2019 12:32:55 +0200 Subject: [PATCH 0448/3715] x86/speculation/taa: Add documentation for TSX Async Abort commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream. Add the documenation for TSX Async Abort. Include the description of the issue, how to check the mitigation state, control the mitigation, guidance for system administrators. [ bp: Add proper SPDX tags, touch ups by Josh and me. ] Co-developed-by: Antonio Gomez Iglesias Signed-off-by: Pawan Gupta Signed-off-by: Antonio Gomez Iglesias Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/hw-vuln/index.rst | 1 + .../admin-guide/hw-vuln/tsx_async_abort.rst | 276 ++++++++++++++++++ .../admin-guide/kernel-parameters.txt | 38 +++ Documentation/x86/index.rst | 1 + Documentation/x86/tsx_async_abort.rst | 117 ++++++++ 6 files changed, 434 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst create mode 100644 Documentation/x86/tsx_async_abort.rst diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 645687b1870d..0fffc1c66da1 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -381,6 +381,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 49311f3da6f2..0802b1c67452 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -12,3 +12,4 @@ are configurable at compile, boot or run time. spectre l1tf mds + tsx_async_abort diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst new file mode 100644 index 000000000000..fddbd7579c53 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst @@ -0,0 +1,276 @@ +.. SPDX-License-Identifier: GPL-2.0 + +TAA - TSX Asynchronous Abort +====================================== + +TAA is a hardware vulnerability that allows unprivileged speculative access to +data which is available in various CPU internal buffers by using asynchronous +aborts within an Intel TSX transactional region. + +Affected processors +------------------- + +This vulnerability only affects Intel processors that support Intel +Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) +is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit +(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations +also mitigate against TAA. + +Whether a processor is affected or not can be read out from the TAA +vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. + +Related CVEs +------------ + +The following CVE entry is related to this TAA issue: + + ============== ===== =================================================== + CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some + microprocessors utilizing speculative execution may + allow an authenticated user to potentially enable + information disclosure via a side channel with + local access. + ============== ===== =================================================== + +Problem +------- + +When performing store, load or L1 refill operations, processors write +data into temporary microarchitectural structures (buffers). The data in +those buffers can be forwarded to load operations as an optimization. + +Intel TSX is an extension to the x86 instruction set architecture that adds +hardware transactional memory support to improve performance of multi-threaded +software. TSX lets the processor expose and exploit concurrency hidden in an +application due to dynamically avoiding unnecessary synchronization. + +TSX supports atomic memory transactions that are either committed (success) or +aborted. During an abort, operations that happened within the transactional region +are rolled back. An asynchronous abort takes place, among other options, when a +different thread accesses a cache line that is also used within the transactional +region when that access might lead to a data race. + +Immediately after an uncompleted asynchronous abort, certain speculatively +executed loads may read data from those internal buffers and pass it to dependent +operations. This can be then used to infer the value via a cache side channel +attack. + +Because the buffers are potentially shared between Hyper-Threads cross +Hyper-Thread attacks are possible. + +The victim of a malicious actor does not need to make use of TSX. Only the +attacker needs to begin a TSX transaction and raise an asynchronous abort +which in turn potenitally leaks data stored in the buffers. + +More detailed technical information is available in the TAA specific x86 +architecture section: :ref:`Documentation/x86/tsx_async_abort.rst `. + + +Attack scenarios +---------------- + +Attacks against the TAA vulnerability can be implemented from unprivileged +applications running on hosts or guests. + +As for MDS, the attacker has no control over the memory addresses that can +be leaked. Only the victim is responsible for bringing data to the CPU. As +a result, the malicious actor has to sample as much data as possible and +then postprocess it to try to infer any useful information from it. + +A potential attacker only has read access to the data. Also, there is no direct +privilege escalation by using this technique. + + +.. _tsx_async_abort_sys_info: + +TAA system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current TAA status +of mitigated systems. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/tsx_async_abort + +The possible values in this file are: + +.. list-table:: + + * - 'Vulnerable' + - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The system tries to clear the buffers but the microcode might not support the operation. + * - 'Mitigation: Clear CPU buffers' + - The microcode has been updated to clear the buffers. TSX is still enabled. + * - 'Mitigation: TSX disabled' + - TSX is disabled. + * - 'Not affected' + - The CPU is not affected by this issue. + +.. _ucode_needed: + +Best effort mitigation mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the processor is vulnerable, but the availability of the microcode-based +mitigation mechanism is not advertised via CPUID the kernel selects a best +effort mitigation mode. This mode invokes the mitigation instructions +without a guarantee that they clear the CPU buffers. + +This is done to address virtualization scenarios where the host has the +microcode update applied, but the hypervisor is not yet updated to expose the +CPUID to the guest. If the host has updated microcode the protection takes +effect; otherwise a few CPU cycles are wasted pointlessly. + +The state in the tsx_async_abort sysfs file reflects this situation +accordingly. + + +Mitigation mechanism +-------------------- + +The kernel detects the affected CPUs and the presence of the microcode which is +required. If a CPU is affected and the microcode is available, then the kernel +enables the mitigation by default. + + +The mitigation can be controlled at boot time via a kernel command line option. +See :ref:`taa_mitigation_control_command_line`. + +.. _virt_mechanism: + +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Affected systems where the host has TAA microcode and TAA is mitigated by +having disabled TSX previously, are not vulnerable regardless of the status +of the VMs. + +In all other cases, if the host either does not have the TAA microcode or +the kernel is not mitigated, the system might be vulnerable. + + +.. _taa_mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the TAA mitigations at boot time with +the option "tsx_async_abort=". The valid arguments for this option are: + + ============ ============================================================= + off This option disables the TAA mitigation on affected platforms. + If the system has TSX enabled (see next parameter) and the CPU + is affected, the system is vulnerable. + + full TAA mitigation is enabled. If TSX is enabled, on an affected + system it will clear CPU buffers on ring transitions. On + systems which are MDS-affected and deploy MDS mitigation, + TAA is also mitigated. Specifying this option on those + systems will have no effect. + + full,nosmt The same as tsx_async_abort=full, with SMT disabled on + vulnerable CPUs that have TSX enabled. This is the complete + mitigation. When TSX is disabled, SMT is not disabled because + CPU is not vulnerable to cross-thread TAA attacks. + ============ ============================================================= + +Not specifying this option is equivalent to "tsx_async_abort=full". + +The kernel command line also allows to control the TSX feature using the +parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used +to control the TSX feature and the enumeration of the TSX feature bits (RTM +and HLE) in CPUID. + +The valid options are: + + ============ ============================================================= + off Disables TSX on the system. + + Note that this option takes effect only on newer CPUs which are + not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 + and which get the new IA32_TSX_CTRL MSR through a microcode + update. This new MSR allows for the reliable deactivation of + the TSX functionality. + + on Enables TSX. + + Although there are mitigations for all known security + vulnerabilities, TSX has been known to be an accelerator for + several previous speculation-related CVEs, and so there may be + unknown security risks associated with leaving it enabled. + + auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX + on the system. + ============ ============================================================= + +Not specifying this option is equivalent to "tsx=off". + +The following combinations of the "tsx_async_abort" and "tsx" are possible. For +affected platforms tsx=auto is equivalent to tsx=off and the result will be: + + ========= ========================== ========================================= + tsx=on tsx_async_abort=full The system will use VERW to clear CPU + buffers. Cross-thread attacks are still + possible on SMT machines. + tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT + mitigated. + tsx=on tsx_async_abort=off The system is vulnerable. + tsx=off tsx_async_abort=full TSX might be disabled if microcode + provides a TSX control MSR. If so, + system is not vulnerable. + tsx=off tsx_async_abort=full,nosmt Ditto + tsx=off tsx_async_abort=off ditto + ========= ========================== ========================================= + + +For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU +buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) +"tsx" command line argument has no effect. + +For the affected platforms below table indicates the mitigation status for the +combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO +and TSX_CTRL_MSR. + + ======= ========= ============= ======================================== + MDS_NO MD_CLEAR TSX_CTRL_MSR Status + ======= ========= ============= ======================================== + 0 0 0 Vulnerable (needs microcode) + 0 1 0 MDS and TAA mitigated via VERW + 1 1 0 MDS fixed, TAA vulnerable if TSX enabled + because MD_CLEAR has no meaning and + VERW is not guaranteed to clear buffers + 1 X 1 MDS fixed, TAA can be mitigated by + VERW or TSX_CTRL_MSR + ======= ========= ============= ======================================== + +Mitigation selection guide +-------------------------- + +1. Trusted userspace and guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If all user space applications are from a trusted source and do not execute +untrusted code which is supplied externally, then the mitigation can be +disabled. The same applies to virtualized environments with trusted guests. + + +2. Untrusted userspace and guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If there are untrusted applications or guests on the system, enabling TSX +might allow a malicious actor to leak data from the host or from other +processes running on the same physical core. + +If the microcode is available and the TSX is disabled on the host, attacks +are prevented in a virtualized environment as well, even if the VMs do not +explicitly enable the mitigation. + + +.. _taa_default_mitigations: + +Default mitigations +------------------- + +The kernel's default action for vulnerable processors is: + + - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3cae24bff0a1..d16b3d41ffe5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2409,6 +2409,7 @@ ssbd=force-off [ARM64] l1tf=off [X86] mds=off [X86] + tsx_async_abort=off [X86] auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2424,6 +2425,7 @@ be fully mitigated, even if it means losing SMT. Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] + tsx_async_abort=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -4534,6 +4536,42 @@ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst for more details. + tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async + Abort (TAA) vulnerability. + + Similar to Micro-architectural Data Sampling (MDS) + certain CPUs that support Transactional + Synchronization Extensions (TSX) are vulnerable to an + exploit against CPU internal buffers which can forward + information to a disclosure gadget under certain + conditions. + + In vulnerable processors, the speculatively forwarded + data can be used in a cache side channel attack, to + access data to which the attacker does not have direct + access. + + This parameter controls the TAA mitigation. The + options are: + + full - Enable TAA mitigation on vulnerable CPUs + if TSX is enabled. + + full,nosmt - Enable TAA mitigation and disable SMT on + vulnerable CPUs. If TSX is disabled, SMT + is not disabled because CPU is not + vulnerable to cross-thread TAA attacks. + off - Unconditionally disable TAA mitigation + + Not specifying this option is equivalent to + tsx_async_abort=full. On CPUs which are MDS affected + and deploy MDS mitigation, TAA mitigation is not + required and doesn't provide any additional + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index ef389dcf1b1d..0780d55c5aa8 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -6,3 +6,4 @@ x86 architecture specifics :maxdepth: 1 mds + tsx_async_abort diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst new file mode 100644 index 000000000000..583ddc185ba2 --- /dev/null +++ b/Documentation/x86/tsx_async_abort.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: GPL-2.0 + +TSX Async Abort (TAA) mitigation +================================ + +.. _tsx_async_abort: + +Overview +-------- + +TSX Async Abort (TAA) is a side channel attack on internal buffers in some +Intel processors similar to Microachitectural Data Sampling (MDS). In this +case certain loads may speculatively pass invalid data to dependent operations +when an asynchronous abort condition is pending in a Transactional +Synchronization Extensions (TSX) transaction. This includes loads with no +fault or assist condition. Such loads may speculatively expose stale data from +the same uarch data structures as in MDS, with same scope of exposure i.e. +same-thread and cross-thread. This issue affects all current processors that +support TSX. + +Mitigation strategy +------------------- + +a) TSX disable - one of the mitigations is to disable TSX. A new MSR +IA32_TSX_CTRL will be available in future and current processors after +microcode update which can be used to disable TSX. In addition, it +controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. + +b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this +vulnerability. More details on this approach can be found in +:ref:`Documentation/admin-guide/hw-vuln/mds.rst `. + +Kernel internal mitigation modes +-------------------------------- + + ============= ============================================================ + off Mitigation is disabled. Either the CPU is not affected or + tsx_async_abort=off is supplied on the kernel command line. + + tsx disabled Mitigation is enabled. TSX feature is disabled by default at + bootup on processors that support TSX control. + + verw Mitigation is enabled. CPU is affected and MD_CLEAR is + advertised in CPUID. + + ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not + advertised in CPUID. That is mainly for virtualization + scenarios where the host has the updated microcode but the + hypervisor does not expose MD_CLEAR in CPUID. It's a best + effort approach without guarantee. + ============= ============================================================ + +If the CPU is affected and the "tsx_async_abort" kernel command line parameter is +not provided then the kernel selects an appropriate mitigation depending on the +status of RTM and MD_CLEAR CPUID bits. + +Below tables indicate the impact of tsx=on|off|auto cmdline options on state of +TAA mitigation, VERW behavior and TSX feature for various combinations of +MSR_IA32_ARCH_CAPABILITIES bits. + +1. "tsx=off" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Disabled Yes TSX disabled TSX disabled + 1 X 1 Disabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +2. "tsx=on" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Enabled Yes None Same as MDS + 1 X 1 Enabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +3. "tsx=auto" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Disabled Yes TSX disabled TSX disabled + 1 X 1 Enabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that +indicates whether MSR_IA32_TSX_CTRL is supported. + +There are two control bits in IA32_TSX_CTRL MSR: + + Bit 0: When set it disables the Restricted Transactional Memory (RTM) + sub-feature of TSX (will force all transactions to abort on the + XBEGIN instruction). + + Bit 1: When set it disables the enumeration of the RTM and HLE feature + (i.e. it will make CPUID(EAX=7).EBX{bit4} and + CPUID(EAX=7).EBX{bit11} read as 0). From aa6ca7b9a9b72eb61e43f44c978dd8e6d4b2f046 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 23 Oct 2019 12:35:50 +0200 Subject: [PATCH 0449/3715] x86/tsx: Add config options to set tsx=on|off|auto commit db616173d787395787ecc93eef075fa975227b10 upstream. There is a general consensus that TSX usage is not largely spread while the history shows there is a non trivial space for side channel attacks possible. Therefore the tsx is disabled by default even on platforms that might have a safe implementation of TSX according to the current knowledge. This is a fair trade off to make. There are, however, workloads that really do benefit from using TSX and updating to a newer kernel with TSX disabled might introduce a noticeable regressions. This would be especially a problem for Linux distributions which will provide TAA mitigations. Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config setting can be overridden by the tsx cmdline options. [ bp: Text cleanups from Josh. ] Suggested-by: Borislav Petkov Signed-off-by: Michal Hocko Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 45 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8fec1585ac7a..b58daecc591e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1853,6 +1853,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS If unsure, say y. +choice + prompt "TSX enable mode" + depends on CPU_SUP_INTEL + default X86_INTEL_TSX_MODE_OFF + help + Intel's TSX (Transactional Synchronization Extensions) feature + allows to optimize locking protocols through lock elision which + can lead to a noticeable performance boost. + + On the other hand it has been shown that TSX can be exploited + to form side channel attacks (e.g. TAA) and chances are there + will be more of those attacks discovered in the future. + + Therefore TSX is not enabled by default (aka tsx=off). An admin + might override this decision by tsx=on the command line parameter. + Even with TSX enabled, the kernel will attempt to enable the best + possible TAA mitigation setting depending on the microcode available + for the particular machine. + + This option allows to set the default tsx mode between tsx=on, =off + and =auto. See Documentation/admin-guide/kernel-parameters.txt for more + details. + + Say off if not sure, auto if TSX is in use but it should be used on safe + platforms or on if TSX is in use and the security aspect of tsx is not + relevant. + +config X86_INTEL_TSX_MODE_OFF + bool "off" + help + TSX is disabled if possible - equals to tsx=off command line parameter. + +config X86_INTEL_TSX_MODE_ON + bool "on" + help + TSX is always enabled on TSX capable HW - equals the tsx=on command + line parameter. + +config X86_INTEL_TSX_MODE_AUTO + bool "auto" + help + TSX is enabled on TSX capable HW that is believed to be safe against + side channel attacks- equals the tsx=auto command line parameter. +endchoice + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index dda328ec2ba1..3e20d322bc98 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported(void) return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); } +static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) +{ + if (boot_cpu_has_bug(X86_BUG_TAA)) + return TSX_CTRL_DISABLE; + + return TSX_CTRL_ENABLE; +} + void __init tsx_init(void) { char arg[5] = {}; @@ -88,17 +96,19 @@ void __init tsx_init(void) } else if (!strcmp(arg, "off")) { tsx_ctrl_state = TSX_CTRL_DISABLE; } else if (!strcmp(arg, "auto")) { - if (boot_cpu_has_bug(X86_BUG_TAA)) - tsx_ctrl_state = TSX_CTRL_DISABLE; - else - tsx_ctrl_state = TSX_CTRL_ENABLE; + tsx_ctrl_state = x86_get_tsx_auto_mode(); } else { tsx_ctrl_state = TSX_CTRL_DISABLE; pr_err("tsx: invalid option, defaulting to off\n"); } } else { - /* tsx= not provided, defaulting to off */ - tsx_ctrl_state = TSX_CTRL_DISABLE; + /* tsx= not provided */ + if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) + tsx_ctrl_state = x86_get_tsx_auto_mode(); + else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) + tsx_ctrl_state = TSX_CTRL_DISABLE; + else + tsx_ctrl_state = TSX_CTRL_ENABLE; } if (tsx_ctrl_state == TSX_CTRL_DISABLE) { From 9b81a067dcfcdc2411338fcc110d90c412c92c76 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 6 Nov 2019 20:26:46 -0600 Subject: [PATCH 0450/3715] x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream. For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of cpu_bugs_smt_update() causes the function to return early, unintentionally skipping the MDS and TAA logic. This is not a problem for MDS, because there appears to be no overlap between IBRS_ALL and MDS-affected CPUs. So the MDS mitigation would be disabled and nothing would need to be done in this function anyway. But for TAA, the TAA_MSG_SMT string will never get printed on Cascade Lake and newer. The check is superfluous anyway: when 'spectre_v2_enabled' is SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement handles it appropriately by doing nothing. So just remove the check. Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 507aa25176c1..255b79df603c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -874,10 +874,6 @@ static void update_mds_branch_idle(void) void arch_smt_update(void) { - /* Enhanced IBRS implies STIBP. No update required. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return; - mutex_lock(&spec_ctrl_mutex); switch (spectre_v2_user) { From 56a0f3867c1bc40c6f155c780a284cc881a89488 Mon Sep 17 00:00:00 2001 From: Vineela Tummalapalli Date: Mon, 4 Nov 2019 12:22:01 +0100 Subject: [PATCH 0451/3715] x86/bugs: Add ITLB_MULTIHIT bug infrastructure commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream. Some processors may incur a machine check error possibly resulting in an unrecoverable CPU lockup when an instruction fetch encounters a TLB multi-hit in the instruction TLB. This can occur when the page size is changed along with either the physical address or cache type. The relevant erratum can be found here: https://bugzilla.kernel.org/show_bug.cgi?id=205195 There are other processors affected for which the erratum does not fully disclose the impact. This issue affects both bare-metal x86 page tables and EPT. It can be mitigated by either eliminating the use of large pages or by using careful TLB invalidations when changing the page size in the page tables. Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which are mitigated against this issue. Signed-off-by: Vineela Tummalapalli Co-developed-by: Pawan Gupta Signed-off-by: Pawan Gupta Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 7 +++ arch/x86/kernel/cpu/bugs.c | 13 ++++ arch/x86/kernel/cpu/common.c | 59 ++++++++++--------- drivers/base/cpu.c | 8 +++ include/linux/cpu.h | 2 + 7 files changed, 64 insertions(+), 27 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 0fffc1c66da1..9ebca6a750f3 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -382,6 +382,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/itlb_multihit Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 46a4d5f4a77c..b4bef819d5d5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -389,5 +389,6 @@ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2ff54c96576d..5761a86b88e0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -84,6 +84,13 @@ * Microarchitectural Data * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ #define ARCH_CAP_TAA_NO BIT(8) /* * Not susceptible to diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 255b79df603c..ee558f70d549 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1387,6 +1387,11 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t itlb_multihit_show_state(char *buf) +{ + return sprintf(buf, "Processor vulnerable\n"); +} + static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { @@ -1487,6 +1492,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_TAA: return tsx_async_abort_show_state(buf); + case X86_BUG_ITLB_MULTIHIT: + return itlb_multihit_show_state(buf); + default: break; } @@ -1528,4 +1536,9 @@ ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *at { return cpu_show_common(dev, attr, buf, X86_BUG_TAA); } + +ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f5059c7b2a4..b8680f932a07 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,13 +897,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) c->x86_cache_bits = c->x86_phys_bits; } -#define NO_SPECULATION BIT(0) -#define NO_MELTDOWN BIT(1) -#define NO_SSB BIT(2) -#define NO_L1TF BIT(3) -#define NO_MDS BIT(4) -#define MSBDS_ONLY BIT(5) -#define NO_SWAPGS BIT(6) +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) +#define NO_SWAPGS BIT(6) +#define NO_ITLB_MULTIHIT BIT(7) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -921,26 +922,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ - VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -951,13 +952,13 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { */ /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), {} }; @@ -982,6 +983,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); + if (cpu_matches(NO_SPECULATION)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 0cd78375cd5e..0272f66db5ac 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -546,6 +546,12 @@ ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -553,6 +559,7 @@ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); +static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -562,6 +569,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_l1tf.attr, &dev_attr_mds.attr, &dev_attr_tsx_async_abort.attr, + &dev_attr_itlb_multihit.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 851208e7aa13..fde995ba1228 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -62,6 +62,8 @@ extern ssize_t cpu_show_mds(struct device *dev, extern ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 402c32059af1a308ff9eab0fb3666bb8a6e77c67 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 4 Nov 2019 12:22:01 +0100 Subject: [PATCH 0452/3715] x86/cpu: Add Tremont to the cpu vulnerability whitelist commit cad14885a8d32c1c0d8eaa7bf5c0152a22b6080e upstream. Add the new cpu family ATOM_TREMONT_D to the cpu vunerability whitelist. ATOM_TREMONT_D is not affected by X86_BUG_ITLB_MULTIHIT. ATOM_TREMONT_D might have mitigations against other issues as well, but only the ITLB multihit mitigation is confirmed at this point. Signed-off-by: Pawan Gupta Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b8680f932a07..c0c9c5a44e82 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -951,6 +951,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ + VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), + /* AMD Family 0xf - 0x12 */ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), From fa8617fddad51c42f66e56cd14382ee0e9f9782e Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: [PATCH 0453/3715] cpu/speculation: Uninline and export CPU mitigations helpers commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream. A kernel module may need to check the value of the "mitigations=" kernel command line parameter as part of its setup when the module needs to perform software mitigations for a CPU flaw. Uninline and export the helper functions surrounding the cpu_mitigations enum to allow for their usage from a module. Lastly, privatize the enum and cpu_mitigations variable since the value of cpu_mitigations can be checked with the exported helper functions. Signed-off-by: Tyler Hicks Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 25 ++----------------------- kernel/cpu.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fde995ba1228..67e8ba81c35f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -206,28 +206,7 @@ static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif -/* - * These are used for a global "mitigations=" cmdline option for toggling - * optional CPU mitigations. - */ -enum cpu_mitigations { - CPU_MITIGATIONS_OFF, - CPU_MITIGATIONS_AUTO, - CPU_MITIGATIONS_AUTO_NOSMT, -}; - -extern enum cpu_mitigations cpu_mitigations; - -/* mitigations=off */ -static inline bool cpu_mitigations_off(void) -{ - return cpu_mitigations == CPU_MITIGATIONS_OFF; -} - -/* mitigations=auto,nosmt */ -static inline bool cpu_mitigations_auto_nosmt(void) -{ - return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; -} +extern bool cpu_mitigations_off(void); +extern bool cpu_mitigations_auto_nosmt(void); #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index d768e15bef83..96f970d77339 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2301,7 +2301,18 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } -enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +static enum cpu_mitigations cpu_mitigations __ro_after_init = + CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -2318,3 +2329,17 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } early_param("mitigations", mitigations_parse_cmdline); + +/* mitigations=off */ +bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_off); + +/* mitigations=auto,nosmt */ +bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); From cc5b0b7602f6f56cf6a03cbf091fc3ac2e4bb744 Mon Sep 17 00:00:00 2001 From: "Gomez Iglesias, Antonio" Date: Mon, 4 Nov 2019 12:22:03 +0100 Subject: [PATCH 0454/3715] Documentation: Add ITLB_MULTIHIT documentation commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream. Add the initial ITLB_MULTIHIT documentation. [ tglx: Add it to the index so it gets actually built. ] Signed-off-by: Antonio Gomez Iglesias Signed-off-by: Nelson D'Souza Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../admin-guide/hw-vuln/multihit.rst | 163 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/multihit.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 0802b1c67452..0795e3c2643f 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -13,3 +13,4 @@ are configurable at compile, boot or run time. l1tf mds tsx_async_abort + multihit.rst diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst new file mode 100644 index 000000000000..ba9988d8bce5 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/multihit.rst @@ -0,0 +1,163 @@ +iTLB multihit +============= + +iTLB multihit is an erratum where some processors may incur a machine check +error, possibly resulting in an unrecoverable CPU lockup, when an +instruction fetch hits multiple entries in the instruction TLB. This can +occur when the page size is changed along with either the physical address +or cache type. A malicious guest running on a virtualized system can +exploit this erratum to perform a denial of service attack. + + +Affected processors +------------------- + +Variations of this erratum are present on most Intel Core and Xeon processor +models. The erratum is not present on: + + - non-Intel processors + + - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) + + - Intel processors that have the PSCHANGE_MC_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. + + +Related CVEs +------------ + +The following CVE entry is related to this issue: + + ============== ================================================= + CVE-2018-12207 Machine Check Error Avoidance on Page Size Change + ============== ================================================= + + +Problem +------- + +Privileged software, including OS and virtual machine managers (VMM), are in +charge of memory management. A key component in memory management is the control +of the page tables. Modern processors use virtual memory, a technique that creates +the illusion of a very large memory for processors. This virtual space is split +into pages of a given size. Page tables translate virtual addresses to physical +addresses. + +To reduce latency when performing a virtual to physical address translation, +processors include a structure, called TLB, that caches recent translations. +There are separate TLBs for instruction (iTLB) and data (dTLB). + +Under this errata, instructions are fetched from a linear address translated +using a 4 KB translation cached in the iTLB. Privileged software modifies the +paging structure so that the same linear address using large page size (2 MB, 4 +MB, 1 GB) with a different physical address or memory type. After the page +structure modification but before the software invalidates any iTLB entries for +the linear address, a code fetch that happens on the same linear address may +cause a machine-check error which can result in a system hang or shutdown. + + +Attack scenarios +---------------- + +Attacks against the iTLB multihit erratum can be mounted from malicious +guests in a virtualized system. + + +iTLB multihit system information +-------------------------------- + +The Linux kernel provides a sysfs interface to enumerate the current iTLB +multihit status of the system:whether the system is vulnerable and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/itlb_multihit + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - KVM: Mitigation: Split huge pages + - Software changes mitigate this issue. + * - KVM: Vulnerable + - The processor is vulnerable, but no mitigation enabled + + +Enumeration of the erratum +-------------------------------- + +A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr +and will be set on CPU's which are mitigated against this issue. + + ======================================= =========== =============================== + IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable + ======================================= =========== =============================== + + +Mitigation mechanism +------------------------- + +This erratum can be mitigated by restricting the use of large page sizes to +non-executable pages. This forces all iTLB entries to be 4K, and removes +the possibility of multiple hits. + +In order to mitigate the vulnerability, KVM initially marks all huge pages +as non-executable. If the guest attempts to execute in one of those pages, +the page is broken down into 4K pages, which are then marked executable. + +If EPT is disabled or not available on the host, KVM is in control of TLB +flushes and the problematic situation cannot happen. However, the shadow +EPT paging mechanism used by nested virtualization is vulnerable, because +the nested guest can trigger multiple iTLB hits by modifying its own +(non-nested) page tables. For simplicity, KVM will make large pages +non-executable in all shadow paging modes. + +Mitigation control on the kernel command line and KVM - module parameter +------------------------------------------------------------------------ + +The KVM hypervisor mitigation mechanism for marking huge pages as +non-executable can be controlled with a module parameter "nx_huge_pages=". +The kernel command line allows to control the iTLB multihit mitigations at +boot time with the option "kvm.nx_huge_pages=". + +The valid arguments for these options are: + + ========== ================================================================ + force Mitigation is enabled. In this case, the mitigation implements + non-executable huge pages in Linux kernel KVM module. All huge + pages in the EPT are marked as non-executable. + If a guest attempts to execute in one of those pages, the page is + broken down into 4K pages, which are then marked executable. + + off Mitigation is disabled. + + auto Enable mitigation only if the platform is affected and the kernel + was not booted with the "mitigations=off" command line parameter. + This is the default option. + ========== ================================================================ + + +Mitigation selection guide +-------------------------- + +1. No virtualization in use +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The system is protected by the kernel unconditionally and no further + action is required. + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the guest comes from a trusted source, you may assume that the guest will + not attempt to maliciously exploit these errata and no further action is + required. + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + If the guest comes from an untrusted source, the guest host kernel will need + to apply iTLB multihit mitigation via the kernel command line or kvm + module parameter. From 82e77746f07db70367f66ef272256037d6415353 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 11 Oct 2019 11:59:48 +0200 Subject: [PATCH 0455/3715] kvm: x86, powerpc: do not allow clearing largepages debugfs entry commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream. The largepages debugfs entry is incremented/decremented as shadow pages are created or destroyed. Clearing it will result in an underflow, which is harmless to KVM but ugly (and could be misinterpreted by tools that use debugfs information), so make this particular statistic read-only. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Cc: kvm-ppc@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 +++--- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 10 +++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 461c6ca2cea5..a6f7b75da27a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,8 +90,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -191,7 +191,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c8b9d3519c8e..eeabf9823164 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1013,6 +1013,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1020,6 +1021,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 89fd40e57cae..1ba7c312d26a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -596,8 +596,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0644, + if (!debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind])) @@ -3713,7 +3714,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -3964,7 +3967,8 @@ static int kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind])) goto out_dir; From 05fe997e30d439e3ff0c7e7e46499e9d41d98ba7 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 3 Jan 2019 17:14:28 -0800 Subject: [PATCH 0456/3715] kvm: Convert kvm_lock to a mutex commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream. It doesn't seem as if there is any particular need for kvm_lock to be a spinlock, so convert the lock to a mutex so that sleepable functions (in particular cond_resched()) can be called while holding it. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/virtual/kvm/locking.txt | 4 +--- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 10 ++++----- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 30 +++++++++++++-------------- 6 files changed, 26 insertions(+), 28 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cab41bc2572f..ff62a4fe2159 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1926,13 +1926,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 87a0601b1c20..3ad5a853b948 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5454,7 +5454,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5504,7 +5504,7 @@ unlock: break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6f7b75da27a..06d7df4331b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6156,17 +6156,17 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6303,12 +6303,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eeabf9823164..31d566de33e9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,7 +140,7 @@ static inline bool is_error_page(struct page *page) extern struct kmem_cache *kvm_vcpu_cache; -extern spinlock_t kvm_lock; +extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1ba7c312d26a..345f6cf36043 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -668,9 +668,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); @@ -716,9 +716,9 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -3830,13 +3830,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3849,12 +3849,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3869,13 +3869,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3888,12 +3888,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3914,7 +3914,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -3923,7 +3923,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) From 15ac253adbeea9463bf3db8e0bbc533b3ec779e0 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 3 Jan 2019 16:22:21 -0800 Subject: [PATCH 0457/3715] kvm: mmu: Do not release the page inside mmu_set_spte() commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream. Release the page at the call-site where it was originally acquired. This makes the exit code cleaner for most call sites, since they do not need to duplicate code between success and the failure label. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 18 +++++++----------- arch/x86/kvm/paging_tmpl.h | 8 +++----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3ad5a853b948..d9a78b887bc2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2918,8 +2918,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, } } - kvm_release_pfn_clean(pfn); - return ret; } @@ -2954,9 +2952,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, if (ret <= 0) return -1; - for (i = 0; i < ret; i++, gfn++, start++) + for (i = 0; i < ret; i++, gfn++, start++) { mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, page_to_pfn(pages[i]), true, true); + put_page(pages[i]); + } return 0; } @@ -3361,6 +3361,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -3369,14 +3370,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } @@ -3954,6 +3952,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -3962,14 +3961,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static void nonpaging_init_context(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6288e9d7068e..04c499774868 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); + kvm_release_pfn_clean(pfn); return true; } @@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, return ret; out_gpte_changed: - kvm_release_pfn_clean(pfn); return RET_PF_RETRY; } @@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, walker.pte_access &= ~ACC_EXEC_MASK; } + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, level, pfn, map_writable, prefault); ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) From 3d16a8635826916820de036872c8e90322655ee3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 Jun 2019 13:06:21 +0200 Subject: [PATCH 0458/3715] KVM: x86: make FNAME(fetch) and __direct_map more similar commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream. These two functions are basically doing the same thing through kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical reasons, their code looks very different. This patch tries to take the best of each and make them very similar, so that it is easy to understand changes that apply to both of them. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 53 ++++++++++++++++++-------------------- arch/x86/kvm/paging_tmpl.h | 30 ++++++++++----------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d9a78b887bc2..055a677ed663 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3004,40 +3004,39 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, - int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + int map_writable, int level, kvm_pfn_t pfn, + bool prefault) { - struct kvm_shadow_walk_iterator iterator; + struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; - int emulate = 0; - gfn_t pseudo_gfn; + int ret; + gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t base_gfn = gfn; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return 0; + return RET_PF_RETRY; - for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { - if (iterator.level == level) { - emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, level, gfn, pfn, prefault, - map_writable); - direct_pte_prefetch(vcpu, iterator.sptep); - ++vcpu->stat.pf_fixed; + for_each_shadow_entry(vcpu, gpa, it) { + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) break; - } - drop_large_spte(vcpu, iterator.sptep); - if (!is_shadow_present_pte(*iterator.sptep)) { - u64 base_addr = iterator.addr; + drop_large_spte(vcpu, it.sptep); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, + it.level - 1, true, ACC_ALL); - base_addr &= PT64_LVL_ADDR_MASK(iterator.level); - pseudo_gfn = base_addr >> PAGE_SHIFT; - sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, 1, ACC_ALL); - - link_shadow_page(vcpu, iterator.sptep, sp); + link_shadow_page(vcpu, it.sptep, sp); } } - return emulate; + + ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, + write, level, base_gfn, pfn, prefault, + map_writable); + direct_pte_prefetch(vcpu, it.sptep); + ++vcpu->stat.pf_fixed; + return ret; } static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) @@ -3369,8 +3368,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - + r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -3960,8 +3958,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 04c499774868..1c74bcf96232 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; + gfn_t base_gfn; direct_access = gw->pte_access; @@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - for (; - shadow_walk_okay(&it) && it.level > hlevel; - shadow_walk_next(&it)) { - gfn_t direct_gfn; + base_gfn = gw->gfn; + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == hlevel) + break; + validate_direct_spte(vcpu, it.sptep, direct_access); drop_large_spte(vcpu, it.sptep); - if (is_shadow_present_pte(*it.sptep)) - continue; - - direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); - - sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access); - link_shadow_page(vcpu, it.sptep, sp); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, addr, + it.level - 1, true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); + } } - clear_sp_write_flooding_count(it.sptep); ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, - it.level, gw->gfn, pfn, prefault, map_writable); + it.level, base_gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); - + ++vcpu->stat.pf_fixed; return ret; out_gpte_changed: @@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, pfn, map_writable, prefault); - ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: From 0fb1e3c9c7a1b7d3ce527c1b60992626756fd158 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 23 Jun 2019 19:15:49 +0200 Subject: [PATCH 0459/3715] KVM: x86: remove now unneeded hugepage gfn adjustment commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream. After the previous patch, the low bits of the gfn are masked in both FNAME(fetch) and __direct_map, so we do not need to clear them in transparent_hugepage_adjust. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 9 +++------ arch/x86/kvm/paging_tmpl.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 055a677ed663..bd5a63722d25 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3071,11 +3071,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t *gfnp, kvm_pfn_t *pfnp, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { kvm_pfn_t pfn = *pfnp; - gfn_t gfn = *gfnp; int level = *levelp; /* @@ -3102,8 +3101,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, mask = KVM_PAGES_PER_HPAGE(level) - 1; VM_BUG_ON((gfn & mask) != (pfn & mask)); if (pfn & mask) { - gfn &= ~mask; - *gfnp = gfn; kvm_release_pfn_clean(pfn); pfn &= ~mask; kvm_get_pfn(pfn); @@ -3367,7 +3364,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); @@ -3957,7 +3954,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1c74bcf96232..7fa2e95149ee 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (!force_pt_level) - transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); + transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, pfn, map_writable, prefault); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); From 6e8121097b41f6e4ff7df1b8359f3085630ba749 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 30 Jun 2019 08:36:21 -0400 Subject: [PATCH 0460/3715] KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream. Note that in such a case it is quite likely that KVM will BUG_ON in __pte_list_remove when the VM is closed. However, there is no immediate risk of memory corruption in the host so a WARN_ON is enough and it lets you gather traces for debugging. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bd5a63722d25..17c2a1235215 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1008,10 +1008,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { - if (sp->role.direct) - BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); - else + if (!sp->role.direct) { sp->gfns[index] = gfn; + return; + } + + if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) + pr_err_ratelimited("gfn mismatch under direct page %llx " + "(expected %llx, got %llx)\n", + sp->gfn, + kvm_mmu_page_get_gfn(sp, index), gfn); } /* From 95867919494dce594c3c57bbf31247157a881fb7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Jul 2019 05:14:13 -0400 Subject: [PATCH 0461/3715] KVM: x86: add tracepoints around __direct_map and FNAME(fetch) commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream. These are useful in debugging shadow paging. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 14 ++++----- arch/x86/kvm/mmutrace.h | 59 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/paging_tmpl.h | 2 ++ 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 17c2a1235215..d2c4d7615aa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -139,9 +139,6 @@ module_param(dbg, bool, 0644); #include -#define CREATE_TRACE_POINTS -#include "mmutrace.h" - #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) @@ -244,6 +241,11 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); +static bool is_executable_pte(u64 spte); + +#define CREATE_TRACE_POINTS +#include "mmutrace.h" + void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { @@ -2909,10 +2911,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, ret = RET_PF_EMULATE; pgprintk("%s: setting spte %llx\n", __func__, *sptep); - pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", - is_large_pte(*sptep)? "2MB" : "4kB", - *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, - *sptep, sptep); + trace_kvm_mmu_set_spte(level, gfn, sptep); if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; @@ -3023,6 +3022,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return RET_PF_RETRY; + trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index c73bf4e4988c..918b0d5bf272 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -325,6 +325,65 @@ TRACE_EVENT( __entry->kvm_gen == __entry->spte_gen ) ); + +TRACE_EVENT( + kvm_mmu_set_spte, + TP_PROTO(int level, gfn_t gfn, u64 *sptep), + TP_ARGS(level, gfn, sptep), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, spte) + __field(u64, sptep) + __field(u8, level) + /* These depend on page entry type, so compute them now. */ + __field(bool, r) + __field(bool, x) + __field(u8, u) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->spte = *sptep; + __entry->sptep = virt_to_phys(sptep); + __entry->level = level; + __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); + __entry->x = is_executable_pte(__entry->spte); + __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; + ), + + TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", + __entry->gfn, __entry->spte, + __entry->r ? "r" : "-", + __entry->spte & PT_WRITABLE_MASK ? "w" : "-", + __entry->x ? "x" : "-", + __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), + __entry->level, __entry->sptep + ) +); + +TRACE_EVENT( + kvm_mmu_spte_requested, + TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), + TP_ARGS(addr, level, pfn), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, pfn) + __field(u8, level) + ), + + TP_fast_assign( + __entry->gfn = addr >> PAGE_SHIFT; + __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); + __entry->level = level; + ), + + TP_printk("gfn %llx pfn %llx level %d", + __entry->gfn, __entry->pfn, __entry->level + ) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7fa2e95149ee..b6b0fb60e0a7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, base_gfn = gw->gfn; + trace_kvm_mmu_spte_requested(addr, gw->level, pfn); + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); From 6abe2aaed9a4af9ddbc247b461c365d7cf027ea6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 27 Oct 2019 09:36:37 +0100 Subject: [PATCH 0462/3715] KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream. VMX already does so if the host has SMEP, in order to support the combination of CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in fact VMX also ends up running with EFER.NXE=1 on old processors that lack the "load EFER" controls, because it may help avoiding a slow MSR write. SVM does not have similar code, but it should since recent AMD processors do support SMEP. So this patch makes the code for the two vendors simpler and more similar, while fixing an issue with CR0.WP=1 and CR4.SMEP=1 on AMD. Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Cc: Joerg Roedel Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 10 ++++++++-- arch/x86/kvm/vmx.c | 14 +++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f6adc8db0e32..52edb8cf1c40 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -608,8 +608,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu->arch.efer = efer; - if (!npt_enabled && !(efer & EFER_LMA)) - efer &= ~EFER_LME; + + if (!npt_enabled) { + /* Shadow paging assumes NX to be available. */ + efer |= EFER_NX; + + if (!(efer & EFER_LMA)) + efer &= ~EFER_LME; + } to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02c0326dc259..cd5a8e888eb6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2259,17 +2259,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) u64 guest_efer = vmx->vcpu.arch.efer; u64 ignore_bits = 0; - if (!enable_ept) { - /* - * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing - * host CPUID is more efficient than testing guest CPUID - * or CR4. Host SMEP is anyway a requirement for guest SMEP. - */ - if (boot_cpu_has(X86_FEATURE_SMEP)) - guest_efer |= EFER_NX; - else if (!(guest_efer & EFER_NX)) - ignore_bits |= EFER_NX; - } + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; /* * LMA and LME handled by hardware; SCE meaningless outside long mode. From bb16a6ba5d1ed79b40caea8d924e237f63205b7c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: [PATCH 0463/3715] kvm: mmu: ITLB_MULTIHIT mitigation commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream. With some Intel processors, putting the same virtual address in the TLB as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit and cause the processor to issue a machine check resulting in a CPU lockup. Unfortunately when EPT page tables use huge pages, it is possible for a malicious guest to cause this situation. Add a knob to mark huge pages as non-executable. When the nx_huge_pages parameter is enabled (and we are using EPT), all huge pages are marked as NX. If the guest attempts to execute in one of those pages, the page is broken down into 4K pages, which are then marked executable. This is not an issue for shadow paging (except nested EPT), because then the host is in control of TLB flushes and the problematic situation cannot happen. With nested EPT, again the nested guest can cause problems shadow and direct EPT is treated in the same way. [ tglx: Fixup default to auto and massage wording a bit ] Originally-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 19 +++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kernel/cpu/bugs.c | 13 +- arch/x86/kvm/mmu.c | 141 +++++++++++++++++- arch/x86/kvm/paging_tmpl.h | 29 +++- arch/x86/kvm/x86.c | 9 ++ 6 files changed, 200 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d16b3d41ffe5..496bc24733a6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1852,6 +1852,19 @@ KVM MMU at runtime. Default is 0 (off) + kvm.nx_huge_pages= + [KVM] Controls the software workaround for the + X86_BUG_ITLB_MULTIHIT bug. + force : Always deploy workaround. + off : Never deploy workaround. + auto : Deploy workaround based on the presence of + X86_BUG_ITLB_MULTIHIT. + + Default is 'auto'. + + If the software workaround is enabled for the host, + guests do need not to enable it for nested guests. + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. Default is 1 (enabled) @@ -2410,6 +2423,12 @@ l1tf=off [X86] mds=off [X86] tsx_async_abort=off [X86] + kvm.nx_huge_pages=off [X86] + + Exceptions: + This does not have any effect on + kvm.nx_huge_pages when + kvm.nx_huge_pages=force. auto (default) Mitigate all CPU vulnerabilities, but leave SMT diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 00c12158a5dc..9751feab5149 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -289,6 +289,7 @@ struct kvm_mmu_page { /* hold the gfn of each spte inside spt */ gfn_t *gfns; bool unsync; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -867,6 +868,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong nx_lpage_splits; ulong max_mmu_page_hash_collisions; }; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ee558f70d549..8596811843cc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1225,6 +1225,9 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } +bool itlb_multihit_kvm_mitigation; +EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); + #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt @@ -1380,17 +1383,25 @@ static ssize_t l1tf_show_state(char *buf) l1tf_vmx_states[l1tf_vmx_mitigation], sched_smt_active() ? "vulnerable" : "disabled"); } + +static ssize_t itlb_multihit_show_state(char *buf) +{ + if (itlb_multihit_kvm_mitigation) + return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + else + return sprintf(buf, "KVM: Vulnerable\n"); +} #else static ssize_t l1tf_show_state(char *buf) { return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); } -#endif static ssize_t itlb_multihit_show_state(char *buf) { return sprintf(buf, "Processor vulnerable\n"); } +#endif static ssize_t mds_show_state(char *buf) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d2c4d7615aa2..e3492160aed0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -48,6 +48,20 @@ #include #include "trace.h" +extern bool itlb_multihit_kvm_mitigation; + +static int __read_mostly nx_huge_pages = -1; + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); + +static struct kernel_param_ops nx_huge_pages_ops = { + .set = set_nx_huge_pages, + .get = param_get_bool, +}; + +module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); +__MODULE_PARM_TYPE(nx_huge_pages, "bool"); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -266,6 +280,11 @@ static inline bool spte_ad_enabled(u64 spte) return !(spte & shadow_acc_track_value); } +static bool is_nx_huge_page_enabled(void) +{ + return READ_ONCE(nx_huge_pages); +} + static inline u64 spte_shadow_accessed_mask(u64 spte) { MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); @@ -1078,6 +1097,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); } +static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + if (sp->lpage_disallowed) + return; + + ++kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = true; +} + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; @@ -1095,6 +1123,12 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } +static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + --kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = false; +} + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -2642,6 +2676,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, kvm_reload_remote_mmus(kvm); } + if (sp->lpage_disallowed) + unaccount_huge_nx_page(kvm, sp); + sp->role.invalid = 1; return ret; } @@ -2796,6 +2833,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!speculative) spte |= spte_shadow_accessed_mask(spte); + if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && + is_nx_huge_page_enabled()) { + pte_access &= ~ACC_EXEC_MASK; + } + if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; else @@ -3009,9 +3051,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } +static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) +{ + int level = *levelp; + u64 spte = *it.sptep; + + if (it.level == level && level > PT_PAGE_TABLE_LEVEL && + is_nx_huge_page_enabled() && + is_shadow_present_pte(spte) && + !is_large_pte(spte)) { + /* + * A small SPTE exists for this pfn, but FNAME(fetch) + * and __direct_map would like to create a large PTE + * instead: just force them to go down another level, + * patching back for them into pfn the next 9 bits of + * the address. + */ + u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); + *pfnp |= gfn & page_mask; + (*levelp)--; + } +} + static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, int map_writable, int level, kvm_pfn_t pfn, - bool prefault) + bool prefault, bool lpage_disallowed) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; @@ -3024,6 +3089,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &level); + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) break; @@ -3034,6 +3105,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); } } @@ -3333,11 +3406,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, { int r; int level; - bool force_pt_level = false; + bool force_pt_level; kvm_pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + force_pt_level = lpage_disallowed; level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { /* @@ -3371,7 +3447,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, + prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -3921,6 +3998,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; bool map_writable; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -3931,8 +4010,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (r) return r; - force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, - PT_DIRECTORY_LEVEL); + force_pt_level = + lpage_disallowed || + !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { if (level > PT_DIRECTORY_LEVEL && @@ -3961,7 +4041,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, + prefault, lpage_disallowed); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -5524,8 +5605,56 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static bool get_nx_auto_mode(void) +{ + /* Return true when CPU has the bug, and mitigations are ON */ + return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); +} + +static void __set_nx_huge_pages(bool val) +{ + nx_huge_pages = itlb_multihit_kvm_mitigation = val; +} + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) +{ + bool old_val = nx_huge_pages; + bool new_val; + + /* In "auto" mode deploy workaround only if CPU has the bug. */ + if (sysfs_streq(val, "off")) + new_val = 0; + else if (sysfs_streq(val, "force")) + new_val = 1; + else if (sysfs_streq(val, "auto")) + new_val = get_nx_auto_mode(); + else if (strtobool(val, &new_val) < 0) + return -EINVAL; + + __set_nx_huge_pages(new_val); + + if (new_val != old_val) { + struct kvm *kvm; + int idx; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + idx = srcu_read_lock(&kvm->srcu); + kvm_mmu_invalidate_zap_all_pages(kvm); + srcu_read_unlock(&kvm->srcu, idx); + } + mutex_unlock(&kvm_lock); + } + + return 0; +} + int kvm_mmu_module_init(void) { + if (nx_huge_pages == -1) + __set_nx_huge_pages(get_nx_auto_mode()); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b6b0fb60e0a7..8cf7a09bdd73 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, - kvm_pfn_t pfn, bool map_writable, bool prefault) + kvm_pfn_t pfn, bool map_writable, bool prefault, + bool lpage_disallowed) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; - gfn_t base_gfn; + gfn_t gfn, base_gfn; direct_access = gw->pte_access; @@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - base_gfn = gw->gfn; + /* + * FNAME(page_fault) might have clobbered the bottom bits of + * gw->gfn, restore them from the virtual address. + */ + gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); + base_gfn = gfn; trace_kvm_mmu_spte_requested(addr, gw->level, pfn); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); - base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); + + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == hlevel) break; @@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, sp = kvm_mmu_get_page(vcpu, base_gfn, addr, it.level - 1, true, direct_access); link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); } } @@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int r; kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; - bool force_pt_level = false; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + bool force_pt_level = lpage_disallowed; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (!force_pt_level) transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, - level, pfn, map_writable, prefault); + level, pfn, map_writable, prefault, lpage_disallowed); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 06d7df4331b1..0804ad3326dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -192,6 +192,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages, .mode = 0444) }, + { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } @@ -1069,6 +1070,14 @@ u64 kvm_get_arch_capabilities(void) rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + /* + * If nx_huge_pages is enabled, KVM's shadow paging will ensure that + * the nested hypervisor runs with NX huge pages. If it is not, + * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other + * L1 guests, so it need not worry about its own (L2) guests. + */ + data |= ARCH_CAP_PSCHANGE_MC_NO; + /* * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. From 73959112cc8dbd30a09e169a9da868f40e750e2d Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Fri, 1 Nov 2019 00:14:08 +0100 Subject: [PATCH 0464/3715] kvm: Add helper function for creating VM worker threads commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream. Add a function to create a kernel thread associated with a given VM. In particular, it ensures that the worker thread inherits the priority and cgroups of the calling thread. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 6 +++ virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 31d566de33e9..bb4758ffd403 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1260,4 +1260,10 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ +typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr); + #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 345f6cf36043..5e640f82b314 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -4155,3 +4156,86 @@ void kvm_exit(void) kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +struct kvm_vm_worker_thread_context { + struct kvm *kvm; + struct task_struct *parent; + struct completion init_done; + kvm_vm_thread_fn_t thread_fn; + uintptr_t data; + int err; +}; + +static int kvm_vm_worker_thread(void *context) +{ + /* + * The init_context is allocated on the stack of the parent thread, so + * we have to locally copy anything that is needed beyond initialization + */ + struct kvm_vm_worker_thread_context *init_context = context; + struct kvm *kvm = init_context->kvm; + kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; + uintptr_t data = init_context->data; + int err; + + err = kthread_park(current); + /* kthread_park(current) is never supposed to return an error */ + WARN_ON(err != 0); + if (err) + goto init_complete; + + err = cgroup_attach_task_all(init_context->parent, current); + if (err) { + kvm_err("%s: cgroup_attach_task_all failed with err %d\n", + __func__, err); + goto init_complete; + } + + set_user_nice(current, task_nice(init_context->parent)); + +init_complete: + init_context->err = err; + complete(&init_context->init_done); + init_context = NULL; + + if (err) + return err; + + /* Wait to be woken up by the spawner before proceeding. */ + kthread_parkme(); + + if (!kthread_should_stop()) + err = thread_fn(kvm, data); + + return err; +} + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr) +{ + struct kvm_vm_worker_thread_context init_context = {}; + struct task_struct *thread; + + *thread_ptr = NULL; + init_context.kvm = kvm; + init_context.parent = current; + init_context.thread_fn = thread_fn; + init_context.data = data; + init_completion(&init_context.init_done); + + thread = kthread_run(kvm_vm_worker_thread, &init_context, + "%s-%d", name, task_pid_nr(current)); + if (IS_ERR(thread)) + return PTR_ERR(thread); + + /* kthread_run is never supposed to return NULL */ + WARN_ON(thread == NULL); + + wait_for_completion(&init_context.init_done); + + if (!init_context.err) + *thread_ptr = thread; + + return init_context.err; +} From 2d371f8836c5d633f9f495c9165eaf814643539d Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Fri, 1 Nov 2019 00:14:14 +0100 Subject: [PATCH 0465/3715] kvm: x86: mmu: Recovery of shattered NX large pages commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream. The page table pages corresponding to broken down large pages are zapped in FIFO order, so that the large page can potentially be recovered, if it is not longer being used for execution. This removes the performance penalty for walking deeper EPT page tables. By default, one large page will last about one hour once the guest reaches a steady state. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 6 + arch/x86/include/asm/kvm_host.h | 4 + arch/x86/kvm/mmu.c | 129 ++++++++++++++++++ arch/x86/kvm/mmu.h | 4 + arch/x86/kvm/x86.c | 11 ++ virt/kvm/kvm_main.c | 30 +++- 6 files changed, 183 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 496bc24733a6..05596e05bc71 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1865,6 +1865,12 @@ If the software workaround is enabled for the host, guests do need not to enable it for nested guests. + kvm.nx_huge_pages_recovery_ratio= + [KVM] Controls how many 4KiB pages are periodically zapped + back to huge pages. 0 disables the recovery, otherwise if + the value is N KVM will zap 1/Nth of the 4KiB pages every + minute. The default is 60. + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. Default is 1 (enabled) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9751feab5149..d0e17813a9b0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -277,6 +277,7 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; /* * The following two entries are used to key the shadow page in the @@ -780,6 +781,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -855,6 +857,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e3492160aed0..8cd26e50d41c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -51,16 +52,26 @@ extern bool itlb_multihit_kvm_mitigation; static int __read_mostly nx_huge_pages = -1; +static uint __read_mostly nx_huge_pages_recovery_ratio = 60; static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); static struct kernel_param_ops nx_huge_pages_ops = { .set = set_nx_huge_pages, .get = param_get_bool, }; +static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { + .set = set_nx_huge_pages_recovery_ratio, + .get = param_get_uint, +}; + module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); __MODULE_PARM_TYPE(nx_huge_pages, "bool"); +module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, + &nx_huge_pages_recovery_ratio, 0644); +__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -1103,6 +1114,8 @@ static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) return; ++kvm->stat.nx_lpage_splits; + list_add_tail(&sp->lpage_disallowed_link, + &kvm->arch.lpage_disallowed_mmu_pages); sp->lpage_disallowed = true; } @@ -1127,6 +1140,7 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) { --kvm->stat.nx_lpage_splits; sp->lpage_disallowed = false; + list_del(&sp->lpage_disallowed_link); } static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, @@ -5643,6 +5657,8 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) idx = srcu_read_lock(&kvm->srcu); kvm_mmu_invalidate_zap_all_pages(kvm); srcu_read_unlock(&kvm->srcu, idx); + + wake_up_process(kvm->arch.nx_lpage_recovery_thread); } mutex_unlock(&kvm_lock); } @@ -5720,3 +5736,116 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } + +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) +{ + unsigned int old_val; + int err; + + old_val = nx_huge_pages_recovery_ratio; + err = param_set_uint(val, kp); + if (err) + return err; + + if (READ_ONCE(nx_huge_pages) && + !old_val && nx_huge_pages_recovery_ratio) { + struct kvm *kvm; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) + wake_up_process(kvm->arch.nx_lpage_recovery_thread); + + mutex_unlock(&kvm_lock); + } + + return err; +} + +static void kvm_recover_nx_lpages(struct kvm *kvm) +{ + int rcu_idx; + struct kvm_mmu_page *sp; + unsigned int ratio; + LIST_HEAD(invalid_list); + ulong to_zap; + + rcu_idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; + while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { + /* + * We use a separate list instead of just using active_mmu_pages + * because the number of lpage_disallowed pages is expected to + * be relatively small compared to the total. + */ + sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + struct kvm_mmu_page, + lpage_disallowed_link); + WARN_ON_ONCE(!sp->lpage_disallowed); + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + WARN_ON_ONCE(sp->lpage_disallowed); + + if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { + kvm_mmu_commit_zap_page(kvm, &invalid_list); + if (to_zap) + cond_resched_lock(&kvm->mmu_lock); + } + } + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, rcu_idx); +} + +static long get_nx_lpage_recovery_timeout(u64 start_time) +{ + return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) + ? start_time + 60 * HZ - get_jiffies_64() + : MAX_SCHEDULE_TIMEOUT; +} + +static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) +{ + u64 start_time; + long remaining_time; + + while (true) { + start_time = get_jiffies_64(); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop() && remaining_time > 0) { + schedule_timeout(remaining_time); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + return 0; + + kvm_recover_nx_lpages(kvm); + } +} + +int kvm_mmu_post_init_vm(struct kvm *kvm) +{ + int err; + + err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, + "kvm-nx-lpage-recovery", + &kvm->arch.nx_lpage_recovery_thread); + if (!err) + kthread_unpark(kvm->arch.nx_lpage_recovery_thread); + + return err; +} + +void kvm_mmu_pre_destroy_vm(struct kvm *kvm) +{ + if (kvm->arch.nx_lpage_recovery_thread) + kthread_stop(kvm->arch.nx_lpage_recovery_thread); +} diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index efc857615d8e..068feab64acf 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -195,4 +195,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); + +int kvm_mmu_post_init_vm(struct kvm *kvm); +void kvm_mmu_pre_destroy_vm(struct kvm *kvm); + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0804ad3326dc..dcee3282112d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8370,6 +8370,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); + INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -8399,6 +8400,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; } +int kvm_arch_post_init_vm(struct kvm *kvm) +{ + return kvm_mmu_post_init_vm(kvm); +} + static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { int r; @@ -8502,6 +8508,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(x86_set_memory_region); +void kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ + kvm_mmu_pre_destroy_vm(kvm); +} + void kvm_arch_destroy_vm(struct kvm *kvm) { if (current->mm == kvm->mm) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5e640f82b314..ea61162b2b53 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -608,6 +608,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; } +/* + * Called after the VM is otherwise initialized, but just before adding it to + * the vm_list. + */ +int __weak kvm_arch_post_init_vm(struct kvm *kvm) +{ + return 0; +} + +/* + * Called just after removing the VM from the vm_list, but before doing any + * other destruction. + */ +void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ +} + static struct kvm *kvm_create_vm(unsigned long type) { int r, i; @@ -662,10 +679,14 @@ static struct kvm *kvm_create_vm(unsigned long type) rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) - goto out_err; + goto out_err_no_mmu_notifier; } r = kvm_init_mmu_notifier(kvm); + if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err; @@ -678,6 +699,11 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + if (kvm->mmu_notifier.ops) + mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); +#endif +out_err_no_mmu_notifier: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -720,6 +746,8 @@ static void kvm_destroy_vm(struct kvm *kvm) mutex_lock(&kvm_lock); list_del(&kvm->vm_list); mutex_unlock(&kvm_lock); + kvm_arch_pre_destroy_vm(kvm); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); From 775d01b65b5daa002a9ba60f2d2bb3b1a6ce12fb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Nov 2019 19:19:08 +0100 Subject: [PATCH 0466/3715] Linux 4.14.154 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2819ed540ce2..4d2d55691548 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 153 +SUBLEVEL = 154 EXTRAVERSION = NAME = Petit Gorille From b4f45376a4f419167d271d38e6d673d0ec81f47e Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Mon, 18 Dec 2017 11:05:43 -0700 Subject: [PATCH 0467/3715] UPSTREAM: coresight: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: drivers/hwtracing/coresight/coresight-tpiu.c:163:1-3: WARNING: PTR_ERR_OR_ZERO can be used drivers/hwtracing/coresight/coresight-funnel.c:217:1-3: WARNING: PTR_ERR_OR_ZERO can be used drivers/hwtracing/coresight/coresight-dynamic-replicator.c:166:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 3703221944d00ac7af85c293f8939a273f587544). Bug: 140266694 Change-Id: I7bb7b8b63768311cb50d7eabf783edc25c9179ab Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-dynamic-replicator.c | 4 +--- drivers/hwtracing/coresight/coresight-funnel.c | 4 +--- drivers/hwtracing/coresight/coresight-tpiu.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index accc2056f7c6..bfc65067505f 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -163,10 +163,8 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id) desc.dev = &adev->dev; desc.groups = replicator_groups; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); - return 0; + return PTR_ERR_OR_ZERO(drvdata->csdev); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 77642e0e955b..5b273ffd71f5 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -214,10 +214,8 @@ static int funnel_probe(struct amba_device *adev, const struct amba_id *id) desc.dev = dev; desc.groups = coresight_funnel_groups; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); - return 0; + return PTR_ERR_OR_ZERO(drvdata->csdev); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 15dd01f8c197..ccd2c9951aad 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -168,10 +168,8 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) desc.pdata = pdata; desc.dev = dev; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); - return 0; + return PTR_ERR_OR_ZERO(drvdata->csdev); } #ifdef CONFIG_PM From 7dace9ced8d7be42384bb990c0d20c03f8c49fb3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 17 Apr 2018 23:29:07 -0700 Subject: [PATCH 0468/3715] UPSTREAM: perf/core: Fix bad use of igrab() As Miklos reported and suggested: "This pattern repeats two times in trace_uprobe.c and in kernel/events/core.c as well: ret = kern_path(filename, LOOKUP_FOLLOW, &path); if (ret) goto fail_address_parse; inode = igrab(d_inode(path.dentry)); path_put(&path); And it's wrong. You can only hold a reference to the inode if you have an active ref to the superblock as well (which is normally through path.mnt) or holding s_umount. This way unmounting the containing filesystem while the tracepoint is active will give you the "VFS: Busy inodes after unmount..." message and a crash when the inode is finally put. Solution: store path instead of inode." This patch fixes the issue in kernel/event/core.c. Reviewed-and-tested-by: Alexander Shishkin Reported-by: Miklos Szeredi Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 375637bc5249 ("perf/core: Introduce address range filtering") Link: http://lkml.kernel.org/r/20180418062907.3210386-2-songliubraving@fb.com Signed-off-by: Ingo Molnar (Upstream commit 9511bce9fe8e5e6c0f923c09243a713eba560141). Bug: 140266694 Change-Id: I6476df739d81e0c75d02bdfd5e4e5796a3e845c0 Signed-off-by: Yabin Cui --- arch/x86/events/intel/pt.c | 4 ++-- include/linux/perf_event.h | 2 +- kernel/events/core.c | 21 +++++++++------------ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 81fd41d5a0d9..0661227d935c 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1190,7 +1190,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode) { + if (!filter->path.dentry) { if (!valid_kernel_ip(filter->offset)) return -EINVAL; @@ -1217,7 +1217,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) return; list_for_each_entry(filter, &head->list, entry) { - if (filter->inode && !offs[range]) { + if (filter->path.dentry && !offs[range]) { msr_a = msr_b = 0; } else { /* apply the offset */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 00ae925e2878..987573d4c05c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -466,7 +466,7 @@ struct pmu { */ struct perf_addr_filter { struct list_head entry; - struct inode *inode; + struct path path; unsigned long offset; unsigned long size; unsigned int range : 1, diff --git a/kernel/events/core.c b/kernel/events/core.c index ff503be82692..3cd1cd4d6e0e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6435,7 +6435,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (filter->inode) { + if (filter->path.dentry) { event->addr_filters_offs[count] = 0; restart++; } @@ -7102,7 +7102,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, struct file *file, unsigned long offset, unsigned long size) { - if (filter->inode != file_inode(file)) + if (d_inode(filter->path.dentry) != file_inode(file)) return false; if (filter->offset > offset + size) @@ -8323,8 +8323,7 @@ static void free_filters_list(struct list_head *filters) struct perf_addr_filter *filter, *iter; list_for_each_entry_safe(filter, iter, filters, entry) { - if (filter->inode) - iput(filter->inode); + path_put(&filter->path); list_del(&filter->entry); kfree(filter); } @@ -8421,7 +8420,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) * Adjust base offset if the filter is associated to a binary * that needs to be mapped: */ - if (filter->inode) + if (filter->path.dentry) event->addr_filters_offs[count] = perf_addr_filter_apply(filter, mm); @@ -8494,7 +8493,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, { struct perf_addr_filter *filter = NULL; char *start, *orig, *filename = NULL; - struct path path; substring_t args[MAX_OPT_ARGS]; int state = IF_STATE_ACTION, token; unsigned int kernel = 0; @@ -8598,19 +8596,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, goto fail_free_name; /* look up the path and grab its inode */ - ret = kern_path(filename, LOOKUP_FOLLOW, &path); + ret = kern_path(filename, LOOKUP_FOLLOW, + &filter->path); if (ret) goto fail_free_name; - filter->inode = igrab(d_inode(path.dentry)); - path_put(&path); kfree(filename); filename = NULL; ret = -EINVAL; - if (!filter->inode || - !S_ISREG(filter->inode->i_mode)) - /* free_filters_list() will iput() */ + if (!filter->path.dentry || + !S_ISREG(d_inode(filter->path.dentry) + ->i_mode)) goto fail; event->addr_filters.nr_file_filters++; From eaaaac3f9f15439dff64012d4db5289145ea82b0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 9 May 2018 12:06:03 -0600 Subject: [PATCH 0469/3715] UPSTREAM: coresight: use put_device() instead of kfree() Never directly free @dev after calling device_register(), even if it returned an error. Always use put_device() to give up the reference initialized. Signed-off-by: Arvind Yadav Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit a7082daac85231ddbb0502d777455756e4e3e69d). Bug: 140266694 Change-Id: I50e535126a509e13b18d1cb53af167493fe59534 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index e571e4010dff..9ced2c7476a4 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -1044,8 +1044,10 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) dev_set_name(&csdev->dev, "%s", desc->pdata->name); ret = device_register(&csdev->dev); - if (ret) - goto err_device_register; + if (ret) { + put_device(&csdev->dev); + goto err_kzalloc_csdev; + } mutex_lock(&coresight_mutex); @@ -1056,8 +1058,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) return csdev; -err_device_register: - kfree(conns); err_kzalloc_conns: kfree(refcnts); err_kzalloc_refcnts: From dd796e5edff83c0f3883bd68dcdbcefd7b7b8dbc Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 9 May 2018 12:06:05 -0600 Subject: [PATCH 0470/3715] UPSTREAM: coresight tmc etr: Make memory check consistent in the same function While operating from sysFS the TMC-ETR driver needs to make sure it has memory to work with but doesn't allocate memory uselessly either. Since the main memory handle for this driver is drvdata::vaddr, use it throughout function tmc_enable_etr_sink_sysfs() so that things are consistent. Reported-by: Dan Carpenter Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 3606a1dcb46eee173203702b6b19cd898afa5d15). Bug: 140266694 Change-Id: I2a0435ae0eeb8b38858e73d65db1a24149b4ea95 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 68fbc8f7450e..45589bdaf150 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -164,11 +164,11 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) goto out; /* - * If drvdata::buf == NULL, use the memory allocated above. + * If drvdata::vaddr == NULL, use the memory allocated above. * Otherwise a buffer still exists from a previous session, so * simply use that. */ - if (drvdata->buf == NULL) { + if (drvdata->vaddr == NULL) { used = true; drvdata->vaddr = vaddr; drvdata->paddr = paddr; From 91e656ba0587ef1b9083c989302e184bfc8c860d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 9 May 2018 12:06:06 -0600 Subject: [PATCH 0471/3715] UPSTREAM: coresight tmc etr: Fix uninitialised variable Variable 'paddr' can't be used if uninitialised but is nonetheless confusing to some static checker. As such simply initialise it to zero. Reported-by: Dan Carpenter Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 805c2c2cd0d3f16438d2391f3f188e4357fbc1cc). Bug: 140266694 Change-Id: Ibb50e47ecf46106879ccae7117f0c3745b6c0685 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 45589bdaf150..ed4344968b76 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -124,7 +124,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) bool used = false; unsigned long flags; void __iomem *vaddr = NULL; - dma_addr_t paddr; + dma_addr_t paddr = 0; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); From 9159bec76b38d5f57b1baa362831bf7e23912fc8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:15 -0600 Subject: [PATCH 0472/3715] UPSTREAM: coresight: tmc: Hide trace buffer handling for file read At the moment we adjust the buffer pointers for reading the trace data via misc device in the common code for ETF/ETB and ETR. Since we are going to change how we manage the buffer for ETR, let us move the buffer manipulation to the respective driver files, hiding it from the common code. We do so by adding type specific helpers for finding the length of data and the pointer to the buffer, for a given length at a file position. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 3495722a0093c80f66c020f90161983b46a1f5ee). Bug: 140266694 Change-Id: If57b9ea693ff42075ae8f39adf1c3ff003bd3128 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etf.c | 18 ++++++++ .../hwtracing/coresight/coresight-tmc-etr.c | 34 +++++++++++++++ drivers/hwtracing/coresight/coresight-tmc.c | 41 +++++++++++-------- drivers/hwtracing/coresight/coresight-tmc.h | 4 ++ 4 files changed, 79 insertions(+), 18 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e2513b786242..e5edf46eccf9 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -120,6 +120,24 @@ static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +/* + * Return the available trace data in the buffer from @pos, with + * a maximum limit of @len, updating the @bufpp on where to + * find it. + */ +ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + ssize_t actual = len; + + /* Adjust the len to available size @pos */ + if (pos + actual > drvdata->len) + actual = drvdata->len - pos; + if (actual > 0) + *bufpp = drvdata->buf + pos; + return actual; +} + static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) { int ret = 0; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ed4344968b76..1b2fe1dc1271 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -69,6 +69,40 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +/* + * Return the available trace data in the buffer @pos, with a maximum + * limit of @len, also updating the @bufpp on where to find it. + */ +ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + ssize_t actual = len; + char *bufp = drvdata->buf + pos; + char *bufend = (char *)(drvdata->vaddr + drvdata->size); + + /* Adjust the len to available size @pos */ + if (pos + actual > drvdata->len) + actual = drvdata->len - pos; + + if (actual <= 0) + return actual; + + /* + * Since we use a circular buffer, with trace data starting + * @drvdata->buf, possibly anywhere in the buffer @drvdata->vaddr, + * wrap the current @pos to within the buffer. + */ + if (bufp >= bufend) + bufp -= drvdata->size; + /* + * For simplicity, avoid copying over a wrapped around buffer. + */ + if ((bufp + actual) > bufend) + actual = bufend - bufp; + *bufpp = bufp; + return actual; +} + static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) { const u32 *barrier; diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 2ff4a66a3caa..6d7e0a21b2f4 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -131,35 +131,40 @@ static int tmc_open(struct inode *inode, struct file *file) return 0; } +static inline ssize_t tmc_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + switch (drvdata->config_type) { + case TMC_CONFIG_TYPE_ETB: + case TMC_CONFIG_TYPE_ETF: + return tmc_etb_get_sysfs_trace(drvdata, pos, len, bufpp); + case TMC_CONFIG_TYPE_ETR: + return tmc_etr_get_sysfs_trace(drvdata, pos, len, bufpp); + } + + return -EINVAL; +} + static ssize_t tmc_read(struct file *file, char __user *data, size_t len, loff_t *ppos) { + char *bufp; + ssize_t actual; struct tmc_drvdata *drvdata = container_of(file->private_data, struct tmc_drvdata, miscdev); - char *bufp = drvdata->buf + *ppos; + actual = tmc_get_sysfs_trace(drvdata, *ppos, len, &bufp); + if (actual <= 0) + return 0; - if (*ppos + len > drvdata->len) - len = drvdata->len - *ppos; - - if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { - if (bufp == (char *)(drvdata->vaddr + drvdata->size)) - bufp = drvdata->vaddr; - else if (bufp > (char *)(drvdata->vaddr + drvdata->size)) - bufp -= drvdata->size; - if ((bufp + len) > (char *)(drvdata->vaddr + drvdata->size)) - len = (char *)(drvdata->vaddr + drvdata->size) - bufp; - } - - if (copy_to_user(data, bufp, len)) { + if (copy_to_user(data, bufp, actual)) { dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__); return -EFAULT; } - *ppos += len; + *ppos += actual; + dev_dbg(drvdata->dev, "%zu bytes copied\n", actual); - dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n", - __func__, len, (int)(drvdata->len - *ppos)); - return len; + return actual; } static int tmc_release(struct inode *inode, struct file *file) diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 8df7a813f537..73f944da836e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -183,10 +183,14 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata); extern const struct coresight_ops tmc_etb_cs_ops; extern const struct coresight_ops tmc_etf_cs_ops; +ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp); /* ETR functions */ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata); int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata); extern const struct coresight_ops tmc_etr_cs_ops; +ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp); #define TMC_REG_PAIR(name, lo_off, hi_off) \ From 8de039caf0d59fa662f9e8d9c51c9e997f93b0f7 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:16 -0600 Subject: [PATCH 0473/3715] UPSTREAM: coresight: tmc-etr: Do not clean trace buffer We zero out the entire trace buffer used for ETR before it is enabled, for helping with debugging. With the addition of scatter-gather mode, the buffer could be bigger and non-contiguous. Get rid of this step; if someone wants to debug, they can always add it as and when needed. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 0f728a7f9f8fed5dfd86a628d871d572cb91942c). Bug: 140266694 Change-Id: I913a9bdf93f412fbbf0ceb2a25ad09dd7ce99677 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 1b2fe1dc1271..00c632205a11 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -24,9 +24,6 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; - /* Zero out the memory to help with debug */ - memset(drvdata->vaddr, 0, drvdata->size); - CS_UNLOCK(drvdata->base); /* Wait for TMCSReady bit to be set */ @@ -352,9 +349,8 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace - * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), - * so we don't have to explicitly clear it. Also, since the - * tracer is still enabled drvdata::buf can't be NULL. + * buffer. Since the tracer is still enabled drvdata::buf can't + * be NULL. */ tmc_etr_enable_hw(drvdata); } else { From d4c5943b189355c8cc6f788845ffffdb9961b185 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:17 -0600 Subject: [PATCH 0474/3715] UPSTREAM: coresight: tmc-etr: Disallow perf mode We don't support ETR in perf mode yet. So, don't even try to enable the hardware, even by mistake. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit ef32df53b73bd29d5bfa4af4fda287baa6dc6544). Bug: 140266694 Change-Id: Ie30284a724855c198fe22e0157481124fe6e9f01 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 28 ++----------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 00c632205a11..84bc3d56711f 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -223,32 +223,8 @@ out: static int tmc_enable_etr_sink_perf(struct coresight_device *csdev) { - int ret = 0; - unsigned long flags; - struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - spin_lock_irqsave(&drvdata->spinlock, flags); - if (drvdata->reading) { - ret = -EINVAL; - goto out; - } - - /* - * In Perf mode there can be only one writer per sink. There - * is also no need to continue if the ETR is already operated - * from sysFS. - */ - if (drvdata->mode != CS_MODE_DISABLED) { - ret = -EINVAL; - goto out; - } - - drvdata->mode = CS_MODE_PERF; - tmc_etr_enable_hw(drvdata); -out: - spin_unlock_irqrestore(&drvdata->spinlock, flags); - - return ret; + /* We don't support perf mode yet ! */ + return -EINVAL; } static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) From b683f66f0db5c8bbad7f167b97643223d032879a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:18 -0600 Subject: [PATCH 0475/3715] UPSTREAM: coresight: Add helper for inserting synchronization packets Right now we open code filling the trace buffer with synchronization packets when the circular buffer wraps around in different drivers. Move this to a common place. While at it, clean up the barrier_pkt array to strip off the trailing '\0'. Cc: Mathieu Poirier Cc: Mike Leach Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 6f755e85c332de532d1bbcf1c9769aad08e8cb41). Bug: 140266694 Change-Id: I290fa5e22a3c10ca083a714448ad9429825a80c6 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 12 +++------ drivers/hwtracing/coresight/coresight-priv.h | 10 ++++++- .../hwtracing/coresight/coresight-tmc-etf.c | 27 ++++++------------- .../hwtracing/coresight/coresight-tmc-etr.c | 13 +-------- drivers/hwtracing/coresight/coresight.c | 3 +-- 5 files changed, 23 insertions(+), 42 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d14a9cb7959a..3d708d24c37a 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -207,7 +207,6 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) bool lost = false; int i; u8 *buf_ptr; - const u32 *barrier; u32 read_data, depth; u32 read_ptr, write_ptr; u32 frame_off, frame_endoff; @@ -238,19 +237,16 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) depth = drvdata->buffer_depth; buf_ptr = drvdata->buf; - barrier = barrier_pkt; for (i = 0; i < depth; i++) { read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); - if (lost && *barrier) { - read_data = *barrier; - barrier++; - } - *(u32 *)buf_ptr = read_data; buf_ptr += 4; } + if (lost) + coresight_insert_barrier_packet(drvdata->buf); + if (frame_off) { buf_ptr -= (frame_endoff * 4); for (i = 0; i < frame_endoff; i++) { @@ -459,7 +455,7 @@ static void etb_update_buffer(struct coresight_device *csdev, buf_ptr = buf->data_pages[cur] + offset; read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); - if (lost && *barrier) { + if (lost && i < CORESIGHT_BARRIER_PKT_SIZE) { read_data = *barrier; barrier++; } diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index f1d0e21d8cab..2bb0a1569c94 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -64,7 +64,8 @@ static DEVICE_ATTR_RO(name) #define coresight_simple_reg64(type, name, lo_off, hi_off) \ __coresight_simple_func(type, NULL, name, lo_off, hi_off) -extern const u32 barrier_pkt[5]; +extern const u32 barrier_pkt[4]; +#define CORESIGHT_BARRIER_PKT_SIZE (sizeof(barrier_pkt)) enum etm_addr_type { ETM_ADDR_TYPE_NONE, @@ -98,6 +99,13 @@ struct cs_buffers { void **data_pages; }; +static inline void coresight_insert_barrier_packet(void *buf) +{ + if (buf) + memcpy(buf, barrier_pkt, CORESIGHT_BARRIER_PKT_SIZE); +} + + static inline void CS_LOCK(void __iomem *addr) { do { diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e5edf46eccf9..f30e5d8d3454 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -43,39 +43,28 @@ static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata) { - bool lost = false; char *bufp; - const u32 *barrier; - u32 read_data, status; + u32 read_data, lost; int i; - /* - * Get a hold of the status register and see if a wrap around - * has occurred. - */ - status = readl_relaxed(drvdata->base + TMC_STS); - if (status & TMC_STS_FULL) - lost = true; - + /* Check if the buffer wrapped around. */ + lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL; bufp = drvdata->buf; drvdata->len = 0; - barrier = barrier_pkt; while (1) { for (i = 0; i < drvdata->memwidth; i++) { read_data = readl_relaxed(drvdata->base + TMC_RRD); if (read_data == 0xFFFFFFFF) - return; - - if (lost && *barrier) { - read_data = *barrier; - barrier++; - } - + goto done; memcpy(bufp, &read_data, 4); bufp += 4; drvdata->len += 4; } } +done: + if (lost) + coresight_insert_barrier_packet(drvdata->buf); + return; } static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 84bc3d56711f..12565b894c1b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -102,9 +102,7 @@ ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) { - const u32 *barrier; u32 val; - u32 *temp; u64 rwp; rwp = tmc_read_rwp(drvdata); @@ -117,16 +115,7 @@ static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) if (val & TMC_STS_FULL) { drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr; drvdata->len = drvdata->size; - - barrier = barrier_pkt; - temp = (u32 *)drvdata->buf; - - while (*barrier) { - *temp = *barrier; - temp++; - barrier++; - } - + coresight_insert_barrier_packet(drvdata->buf); } else { drvdata->buf = drvdata->vaddr; drvdata->len = rwp - drvdata->paddr; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 9ced2c7476a4..01c6b0eb44d0 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -58,8 +58,7 @@ static struct list_head *stm_path; * beginning of the data collected in a buffer. That way the decoder knows that * it needs to look for another sync sequence. */ -const u32 barrier_pkt[5] = {0x7fffffff, 0x7fffffff, - 0x7fffffff, 0x7fffffff, 0x0}; +const u32 barrier_pkt[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; static int coresight_id_match(struct device *dev, void *data) { From 325bb09bbfcb937264c6c63fc40aa8046b5e6d73 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:19 -0600 Subject: [PATCH 0476/3715] UPSTREAM: dts: bindings: Restrict coresight tmc-etr scatter-gather mode We are about to add the support for ETR builtin scatter-gather mode for dealing with large amount of trace buffers. However, on some of the platforms, using the ETR SG mode can lock up the system due to the way the ETR is connected to the memory subsystem. In SG mode, the ETR performs READ from the scatter-gather table to fetch the next page and regular WRITE of trace data. If the READ operation doesn't complete(due to the memory subsystem issues, which we have seen on a couple of platforms) the trace WRITE cannot proceed leading to issues. So, we by default do not use the SG mode, unless it is known to be safe on the platform. We define a DT property for the TMC node to specify whether we have a proper SG mode. Cc: Mathieu Poirier Cc: Mike Leach Cc: Mark Rutland Cc: John Horley Cc: Robert Walker Cc: devicetree@vger.kernel.org Cc: frowand.list@gmail.com Reviewed-by: Rob Herring Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit ed2cfb2b3cd8f32419c0abea6cd4648ff99fca86). Bug: 140266694 Change-Id: I181e9ee9c615b1b9224c28b3663a4e97fdcd4c3c Signed-off-by: Yabin Cui --- Documentation/devicetree/bindings/arm/coresight.txt | 2 ++ drivers/hwtracing/coresight/coresight-tmc.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 15ac8e8dcfdf..603d3c62e8c7 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -86,6 +86,8 @@ its hardware characteristcs. * arm,buffer-size: size of contiguous buffer space for TMC ETR (embedded trace router) + * arm,scatter-gather: boolean. Indicates that the TMC-ETR can safely + use the SG mode on this system. Example: diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 6d7e0a21b2f4..1eaecc926603 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -304,6 +305,12 @@ const struct attribute_group *coresight_tmc_groups[] = { NULL, }; +static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata) +{ + return fwnode_property_present(drvdata->dev->fwnode, + "arm,scatter-gather"); +} + /* Detect and initialise the capabilities of a TMC ETR */ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, u32 devid, void *dev_caps) @@ -313,7 +320,7 @@ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, /* Set the unadvertised capabilities */ tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps); - if (!(devid & TMC_DEVID_NOSCAT)) + if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(drvdata)) tmc_etr_set_cap(drvdata, TMC_ETR_SG); /* Check if the AXI address width is available */ From 294694782c6380da74f35ee59364387edc8973d5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:20 -0600 Subject: [PATCH 0477/3715] UPSTREAM: coresight: Add generic TMC sg table framework This patch introduces a generic sg table data structure and associated operations. An SG table can be used to map a set of Data pages where the trace data could be stored by the TMC ETR. The information about the data pages could be stored in different formats, depending on the type of the underlying SG mechanism (e.g, TMC ETR SG vs Coresight CATU). The generic structure provides book keeping of the pages used for the data as well as the table contents. The table should be filled by the user of the infrastructure. A table can be created by specifying the number of data pages as well as the number of table pages required to hold the pointers, where the latter could be different for different types of tables. The pages are mapped in the appropriate dma data direction mode (i.e, DMA_TO_DEVICE for table pages and DMA_FROM_DEVICE for data pages). The framework can optionally accept a set of allocated data pages (e.g, perf ring buffer) and map them accordingly. The table and data pages are vmap'ed to allow easier access by the drivers. The framework also provides helpers to sync the data written to the pages with appropriate directions. This will be later used by the TMC ETR SG unit and CATU. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 99443ea19e8bb0fee5fb6f4ed9cec11fd825061f). Bug: 140266694 Change-Id: I206573c7bee0bcfc59d5a5013ad612621389ad1c Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 268 ++++++++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 50 ++++ 2 files changed, 318 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 12565b894c1b..8ce75201d06e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -17,9 +17,277 @@ #include #include +#include #include "coresight-priv.h" #include "coresight-tmc.h" +/* + * tmc_pages_get_offset: Go through all the pages in the tmc_pages + * and map the device address @addr to an offset within the virtual + * contiguous buffer. + */ +static long +tmc_pages_get_offset(struct tmc_pages *tmc_pages, dma_addr_t addr) +{ + int i; + dma_addr_t page_start; + + for (i = 0; i < tmc_pages->nr_pages; i++) { + page_start = tmc_pages->daddrs[i]; + if (addr >= page_start && addr < (page_start + PAGE_SIZE)) + return i * PAGE_SIZE + (addr - page_start); + } + + return -EINVAL; +} + +/* + * tmc_pages_free : Unmap and free the pages used by tmc_pages. + * If the pages were not allocated in tmc_pages_alloc(), we would + * simply drop the refcount. + */ +static void tmc_pages_free(struct tmc_pages *tmc_pages, + struct device *dev, enum dma_data_direction dir) +{ + int i; + + for (i = 0; i < tmc_pages->nr_pages; i++) { + if (tmc_pages->daddrs && tmc_pages->daddrs[i]) + dma_unmap_page(dev, tmc_pages->daddrs[i], + PAGE_SIZE, dir); + if (tmc_pages->pages && tmc_pages->pages[i]) + __free_page(tmc_pages->pages[i]); + } + + kfree(tmc_pages->pages); + kfree(tmc_pages->daddrs); + tmc_pages->pages = NULL; + tmc_pages->daddrs = NULL; + tmc_pages->nr_pages = 0; +} + +/* + * tmc_pages_alloc : Allocate and map pages for a given @tmc_pages. + * If @pages is not NULL, the list of page virtual addresses are + * used as the data pages. The pages are then dma_map'ed for @dev + * with dma_direction @dir. + * + * Returns 0 upon success, else the error number. + */ +static int tmc_pages_alloc(struct tmc_pages *tmc_pages, + struct device *dev, int node, + enum dma_data_direction dir, void **pages) +{ + int i, nr_pages; + dma_addr_t paddr; + struct page *page; + + nr_pages = tmc_pages->nr_pages; + tmc_pages->daddrs = kcalloc(nr_pages, sizeof(*tmc_pages->daddrs), + GFP_KERNEL); + if (!tmc_pages->daddrs) + return -ENOMEM; + tmc_pages->pages = kcalloc(nr_pages, sizeof(*tmc_pages->pages), + GFP_KERNEL); + if (!tmc_pages->pages) { + kfree(tmc_pages->daddrs); + tmc_pages->daddrs = NULL; + return -ENOMEM; + } + + for (i = 0; i < nr_pages; i++) { + if (pages && pages[i]) { + page = virt_to_page(pages[i]); + /* Hold a refcount on the page */ + get_page(page); + } else { + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO, 0); + } + paddr = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + if (dma_mapping_error(dev, paddr)) + goto err; + tmc_pages->daddrs[i] = paddr; + tmc_pages->pages[i] = page; + } + return 0; +err: + tmc_pages_free(tmc_pages, dev, dir); + return -ENOMEM; +} + +static inline long +tmc_sg_get_data_page_offset(struct tmc_sg_table *sg_table, dma_addr_t addr) +{ + return tmc_pages_get_offset(&sg_table->data_pages, addr); +} + +static inline void tmc_free_table_pages(struct tmc_sg_table *sg_table) +{ + if (sg_table->table_vaddr) + vunmap(sg_table->table_vaddr); + tmc_pages_free(&sg_table->table_pages, sg_table->dev, DMA_TO_DEVICE); +} + +static void tmc_free_data_pages(struct tmc_sg_table *sg_table) +{ + if (sg_table->data_vaddr) + vunmap(sg_table->data_vaddr); + tmc_pages_free(&sg_table->data_pages, sg_table->dev, DMA_FROM_DEVICE); +} + +void tmc_free_sg_table(struct tmc_sg_table *sg_table) +{ + tmc_free_table_pages(sg_table); + tmc_free_data_pages(sg_table); +} + +/* + * Alloc pages for the table. Since this will be used by the device, + * allocate the pages closer to the device (i.e, dev_to_node(dev) + * rather than the CPU node). + */ +static int tmc_alloc_table_pages(struct tmc_sg_table *sg_table) +{ + int rc; + struct tmc_pages *table_pages = &sg_table->table_pages; + + rc = tmc_pages_alloc(table_pages, sg_table->dev, + dev_to_node(sg_table->dev), + DMA_TO_DEVICE, NULL); + if (rc) + return rc; + sg_table->table_vaddr = vmap(table_pages->pages, + table_pages->nr_pages, + VM_MAP, + PAGE_KERNEL); + if (!sg_table->table_vaddr) + rc = -ENOMEM; + else + sg_table->table_daddr = table_pages->daddrs[0]; + return rc; +} + +static int tmc_alloc_data_pages(struct tmc_sg_table *sg_table, void **pages) +{ + int rc; + + /* Allocate data pages on the node requested by the caller */ + rc = tmc_pages_alloc(&sg_table->data_pages, + sg_table->dev, sg_table->node, + DMA_FROM_DEVICE, pages); + if (!rc) { + sg_table->data_vaddr = vmap(sg_table->data_pages.pages, + sg_table->data_pages.nr_pages, + VM_MAP, + PAGE_KERNEL); + if (!sg_table->data_vaddr) + rc = -ENOMEM; + } + return rc; +} + +/* + * tmc_alloc_sg_table: Allocate and setup dma pages for the TMC SG table + * and data buffers. TMC writes to the data buffers and reads from the SG + * Table pages. + * + * @dev - Device to which page should be DMA mapped. + * @node - Numa node for mem allocations + * @nr_tpages - Number of pages for the table entries. + * @nr_dpages - Number of pages for Data buffer. + * @pages - Optional list of virtual address of pages. + */ +struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev, + int node, + int nr_tpages, + int nr_dpages, + void **pages) +{ + long rc; + struct tmc_sg_table *sg_table; + + sg_table = kzalloc(sizeof(*sg_table), GFP_KERNEL); + if (!sg_table) + return ERR_PTR(-ENOMEM); + sg_table->data_pages.nr_pages = nr_dpages; + sg_table->table_pages.nr_pages = nr_tpages; + sg_table->node = node; + sg_table->dev = dev; + + rc = tmc_alloc_data_pages(sg_table, pages); + if (!rc) + rc = tmc_alloc_table_pages(sg_table); + if (rc) { + tmc_free_sg_table(sg_table); + kfree(sg_table); + return ERR_PTR(rc); + } + + return sg_table; +} + +/* + * tmc_sg_table_sync_data_range: Sync the data buffer written + * by the device from @offset upto a @size bytes. + */ +void tmc_sg_table_sync_data_range(struct tmc_sg_table *table, + u64 offset, u64 size) +{ + int i, index, start; + int npages = DIV_ROUND_UP(size, PAGE_SIZE); + struct device *dev = table->dev; + struct tmc_pages *data = &table->data_pages; + + start = offset >> PAGE_SHIFT; + for (i = start; i < (start + npages); i++) { + index = i % data->nr_pages; + dma_sync_single_for_cpu(dev, data->daddrs[index], + PAGE_SIZE, DMA_FROM_DEVICE); + } +} + +/* tmc_sg_sync_table: Sync the page table */ +void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table) +{ + int i; + struct device *dev = sg_table->dev; + struct tmc_pages *table_pages = &sg_table->table_pages; + + for (i = 0; i < table_pages->nr_pages; i++) + dma_sync_single_for_device(dev, table_pages->daddrs[i], + PAGE_SIZE, DMA_TO_DEVICE); +} + +/* + * tmc_sg_table_get_data: Get the buffer pointer for data @offset + * in the SG buffer. The @bufpp is updated to point to the buffer. + * Returns : + * the length of linear data available at @offset. + * or + * <= 0 if no data is available. + */ +ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, + u64 offset, size_t len, char **bufpp) +{ + size_t size; + int pg_idx = offset >> PAGE_SHIFT; + int pg_offset = offset & (PAGE_SIZE - 1); + struct tmc_pages *data_pages = &sg_table->data_pages; + + size = tmc_sg_table_buf_size(sg_table); + if (offset >= size) + return -EINVAL; + + /* Make sure we don't go beyond the end */ + len = (len < (size - offset)) ? len : size - offset; + /* Respect the page boundaries */ + len = (len < (PAGE_SIZE - pg_offset)) ? len : (PAGE_SIZE - pg_offset); + if (len > 0) + *bufpp = page_address(data_pages->pages[pg_idx]) + pg_offset; + return len; +} + static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 73f944da836e..19a765c1748b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -18,6 +18,7 @@ #ifndef _CORESIGHT_TMC_H #define _CORESIGHT_TMC_H +#include #include #define TMC_RSZ 0x004 @@ -171,6 +172,38 @@ struct tmc_drvdata { u32 etr_caps; }; +/** + * struct tmc_pages - Collection of pages used for SG. + * @nr_pages: Number of pages in the list. + * @daddrs: Array of DMA'able page address. + * @pages: Array pages for the buffer. + */ +struct tmc_pages { + int nr_pages; + dma_addr_t *daddrs; + struct page **pages; +}; + +/* + * struct tmc_sg_table - Generic SG table for TMC + * @dev: Device for DMA allocations + * @table_vaddr: Contiguous Virtual address for PageTable + * @data_vaddr: Contiguous Virtual address for Data Buffer + * @table_daddr: DMA address of the PageTable base + * @node: Node for Page allocations + * @table_pages: List of pages & dma address for Table + * @data_pages: List of pages & dma address for Data + */ +struct tmc_sg_table { + struct device *dev; + void *table_vaddr; + void *data_vaddr; + dma_addr_t table_daddr; + int node; + struct tmc_pages table_pages; + struct tmc_pages data_pages; +}; + /* Generic functions */ void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata); void tmc_flush_and_stop(struct tmc_drvdata *drvdata); @@ -226,4 +259,21 @@ static inline bool tmc_etr_has_cap(struct tmc_drvdata *drvdata, u32 cap) return !!(drvdata->etr_caps & cap); } +struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev, + int node, + int nr_tpages, + int nr_dpages, + void **pages); +void tmc_free_sg_table(struct tmc_sg_table *sg_table); +void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table); +void tmc_sg_table_sync_data_range(struct tmc_sg_table *table, + u64 offset, u64 size); +ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, + u64 offset, size_t len, char **bufpp); +static inline unsigned long +tmc_sg_table_buf_size(struct tmc_sg_table *sg_table) +{ + return sg_table->data_pages.nr_pages << PAGE_SHIFT; +} + #endif From 45135f1411424485bb50636f8a42dccbbb7b1e82 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:21 -0600 Subject: [PATCH 0478/3715] UPSTREAM: coresight: Add support for TMC ETR SG unit This patch adds support for setting up an SG table used by the TMC ETR inbuilt SG unit. The TMC ETR uses 4K page sized tables to hold pointers to the 4K data pages with the last entry in a table pointing to the next table with the entries, by kind of chaining. The 2 LSBs determine the type of the table entry, to one of : Normal - Points to a 4KB data page. Last - Points to a 4KB data page, but is the last entry in the page table. Link - Points to another 4KB table page with pointers to data. The code takes care of handling the system page size which could be different than 4K. So we could end up putting multiple ETR SG tables in a single system page, vice versa for the data pages. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 260ec24b31a596361b617e6993ee9a31c43f278c). Bug: 140266694 Change-Id: Ic376c4c0eafba2bae97174df35a02c4fdfbfc811 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 263 ++++++++++++++++++ 1 file changed, 263 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 8ce75201d06e..fed252a001af 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -21,6 +21,87 @@ #include "coresight-priv.h" #include "coresight-tmc.h" +/* + * The TMC ETR SG has a page size of 4K. The SG table contains pointers + * to 4KB buffers. However, the OS may use a PAGE_SIZE different from + * 4K (i.e, 16KB or 64KB). This implies that a single OS page could + * contain more than one SG buffer and tables. + * + * A table entry has the following format: + * + * ---Bit31------------Bit4-------Bit1-----Bit0-- + * | Address[39:12] | SBZ | Entry Type | + * ---------------------------------------------- + * + * Address: Bits [39:12] of a physical page address. Bits [11:0] are + * always zero. + * + * Entry type: + * b00 - Reserved. + * b01 - Last entry in the tables, points to 4K page buffer. + * b10 - Normal entry, points to 4K page buffer. + * b11 - Link. The address points to the base of next table. + */ + +typedef u32 sgte_t; + +#define ETR_SG_PAGE_SHIFT 12 +#define ETR_SG_PAGE_SIZE (1UL << ETR_SG_PAGE_SHIFT) +#define ETR_SG_PAGES_PER_SYSPAGE (PAGE_SIZE / ETR_SG_PAGE_SIZE) +#define ETR_SG_PTRS_PER_PAGE (ETR_SG_PAGE_SIZE / sizeof(sgte_t)) +#define ETR_SG_PTRS_PER_SYSPAGE (PAGE_SIZE / sizeof(sgte_t)) + +#define ETR_SG_ET_MASK 0x3 +#define ETR_SG_ET_LAST 0x1 +#define ETR_SG_ET_NORMAL 0x2 +#define ETR_SG_ET_LINK 0x3 + +#define ETR_SG_ADDR_SHIFT 4 + +#define ETR_SG_ENTRY(addr, type) \ + (sgte_t)((((addr) >> ETR_SG_PAGE_SHIFT) << ETR_SG_ADDR_SHIFT) | \ + (type & ETR_SG_ET_MASK)) + +#define ETR_SG_ADDR(entry) \ + (((dma_addr_t)(entry) >> ETR_SG_ADDR_SHIFT) << ETR_SG_PAGE_SHIFT) +#define ETR_SG_ET(entry) ((entry) & ETR_SG_ET_MASK) + +/* + * struct etr_sg_table : ETR SG Table + * @sg_table: Generic SG Table holding the data/table pages. + * @hwaddr: hwaddress used by the TMC, which is the base + * address of the table. + */ +struct etr_sg_table { + struct tmc_sg_table *sg_table; + dma_addr_t hwaddr; +}; + +/* + * tmc_etr_sg_table_entries: Total number of table entries required to map + * @nr_pages system pages. + * + * We need to map @nr_pages * ETR_SG_PAGES_PER_SYSPAGE data pages. + * Each TMC page can map (ETR_SG_PTRS_PER_PAGE - 1) buffer pointers, + * with the last entry pointing to another page of table entries. + * If we spill over to a new page for mapping 1 entry, we could as + * well replace the link entry of the previous page with the last entry. + */ +static inline unsigned long __attribute_const__ +tmc_etr_sg_table_entries(int nr_pages) +{ + unsigned long nr_sgpages = nr_pages * ETR_SG_PAGES_PER_SYSPAGE; + unsigned long nr_sglinks = nr_sgpages / (ETR_SG_PTRS_PER_PAGE - 1); + /* + * If we spill over to a new page for 1 entry, we could as well + * make it the LAST entry in the previous page, skipping the Link + * address. + */ + if (nr_sglinks && (nr_sgpages % (ETR_SG_PTRS_PER_PAGE - 1) < 2)) + nr_sglinks--; + return nr_sgpages + nr_sglinks; +} + /* * tmc_pages_get_offset: Go through all the pages in the tmc_pages * and map the device address @addr to an offset within the virtual @@ -288,6 +369,188 @@ ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, return len; } +#ifdef ETR_SG_DEBUG +/* Map a dma address to virtual address */ +static unsigned long +tmc_sg_daddr_to_vaddr(struct tmc_sg_table *sg_table, + dma_addr_t addr, bool table) +{ + long offset; + unsigned long base; + struct tmc_pages *tmc_pages; + + if (table) { + tmc_pages = &sg_table->table_pages; + base = (unsigned long)sg_table->table_vaddr; + } else { + tmc_pages = &sg_table->data_pages; + base = (unsigned long)sg_table->data_vaddr; + } + + offset = tmc_pages_get_offset(tmc_pages, addr); + if (offset < 0) + return 0; + return base + offset; +} + +/* Dump the given sg_table */ +static void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) +{ + sgte_t *ptr; + int i = 0; + dma_addr_t addr; + struct tmc_sg_table *sg_table = etr_table->sg_table; + + ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table, + etr_table->hwaddr, true); + while (ptr) { + addr = ETR_SG_ADDR(*ptr); + switch (ETR_SG_ET(*ptr)) { + case ETR_SG_ET_NORMAL: + dev_dbg(sg_table->dev, + "%05d: %p\t:[N] 0x%llx\n", i, ptr, addr); + ptr++; + break; + case ETR_SG_ET_LINK: + dev_dbg(sg_table->dev, + "%05d: *** %p\t:{L} 0x%llx ***\n", + i, ptr, addr); + ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table, + addr, true); + break; + case ETR_SG_ET_LAST: + dev_dbg(sg_table->dev, + "%05d: ### %p\t:[L] 0x%llx ###\n", + i, ptr, addr); + return; + default: + dev_dbg(sg_table->dev, + "%05d: xxx %p\t:[INVALID] 0x%llx xxx\n", + i, ptr, addr); + return; + } + i++; + } + dev_dbg(sg_table->dev, "******* End of Table *****\n"); +} +#else +static inline void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) {} +#endif + +/* + * Populate the SG Table page table entries from table/data + * pages allocated. Each Data page has ETR_SG_PAGES_PER_SYSPAGE SG pages. + * So does a Table page. So we keep track of indices of the tables + * in each system page and move the pointers accordingly. + */ +#define INC_IDX_ROUND(idx, size) ((idx) = ((idx) + 1) % (size)) +static void tmc_etr_sg_table_populate(struct etr_sg_table *etr_table) +{ + dma_addr_t paddr; + int i, type, nr_entries; + int tpidx = 0; /* index to the current system table_page */ + int sgtidx = 0; /* index to the sg_table within the current syspage */ + int sgtentry = 0; /* the entry within the sg_table */ + int dpidx = 0; /* index to the current system data_page */ + int spidx = 0; /* index to the SG page within the current data page */ + sgte_t *ptr; /* pointer to the table entry to fill */ + struct tmc_sg_table *sg_table = etr_table->sg_table; + dma_addr_t *table_daddrs = sg_table->table_pages.daddrs; + dma_addr_t *data_daddrs = sg_table->data_pages.daddrs; + + nr_entries = tmc_etr_sg_table_entries(sg_table->data_pages.nr_pages); + /* + * Use the contiguous virtual address of the table to update entries. + */ + ptr = sg_table->table_vaddr; + /* + * Fill all the entries, except the last entry to avoid special + * checks within the loop. + */ + for (i = 0; i < nr_entries - 1; i++) { + if (sgtentry == ETR_SG_PTRS_PER_PAGE - 1) { + /* + * Last entry in a sg_table page is a link address to + * the next table page. If this sg_table is the last + * one in the system page, it links to the first + * sg_table in the next system page. Otherwise, it + * links to the next sg_table page within the system + * page. + */ + if (sgtidx == ETR_SG_PAGES_PER_SYSPAGE - 1) { + paddr = table_daddrs[tpidx + 1]; + } else { + paddr = table_daddrs[tpidx] + + (ETR_SG_PAGE_SIZE * (sgtidx + 1)); + } + type = ETR_SG_ET_LINK; + } else { + /* + * Update the indices to the data_pages to point to the + * next sg_page in the data buffer. + */ + type = ETR_SG_ET_NORMAL; + paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE; + if (!INC_IDX_ROUND(spidx, ETR_SG_PAGES_PER_SYSPAGE)) + dpidx++; + } + *ptr++ = ETR_SG_ENTRY(paddr, type); + /* + * Move to the next table pointer, moving the table page index + * if necessary + */ + if (!INC_IDX_ROUND(sgtentry, ETR_SG_PTRS_PER_PAGE)) { + if (!INC_IDX_ROUND(sgtidx, ETR_SG_PAGES_PER_SYSPAGE)) + tpidx++; + } + } + + /* Set up the last entry, which is always a data pointer */ + paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE; + *ptr++ = ETR_SG_ENTRY(paddr, ETR_SG_ET_LAST); +} + +/* + * tmc_init_etr_sg_table: Allocate a TMC ETR SG table, data buffer of @size and + * populate the table. + * + * @dev - Device pointer for the TMC + * @node - NUMA node where the memory should be allocated + * @size - Total size of the data buffer + * @pages - Optional list of page virtual address + */ +static struct etr_sg_table __maybe_unused * +tmc_init_etr_sg_table(struct device *dev, int node, + unsigned long size, void **pages) +{ + int nr_entries, nr_tpages; + int nr_dpages = size >> PAGE_SHIFT; + struct tmc_sg_table *sg_table; + struct etr_sg_table *etr_table; + + etr_table = kzalloc(sizeof(*etr_table), GFP_KERNEL); + if (!etr_table) + return ERR_PTR(-ENOMEM); + nr_entries = tmc_etr_sg_table_entries(nr_dpages); + nr_tpages = DIV_ROUND_UP(nr_entries, ETR_SG_PTRS_PER_SYSPAGE); + + sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages); + if (IS_ERR(sg_table)) { + kfree(etr_table); + return ERR_PTR(PTR_ERR(sg_table)); + } + + etr_table->sg_table = sg_table; + /* TMC should use table base address for DBA */ + etr_table->hwaddr = sg_table->table_daddr; + tmc_etr_sg_table_populate(etr_table); + /* Sync the table pages for the HW */ + tmc_sg_table_sync_table(sg_table); + tmc_etr_sg_table_dump(etr_table); + + return etr_table; +} + static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; From b9169dfc3524ea3fd370af0e9d3059e36b4c4cd5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:22 -0600 Subject: [PATCH 0479/3715] UPSTREAM: coresight: tmc-etr: Add transparent buffer management The TMC-ETR can use the target trace buffer in two different modes. Normal physically contiguous mode and a discontiguous list pages in Scatter-Gather mode. Also we have dedicated Coresight component, CATU (Coresight Address Translation Unit) to provide improved scatter-gather mode in Coresight SoC-600. This complicates the management of the buffer used for trace, depending on the mode in which ETR is configured. So, this patch adds a transparent layer for managing the ETR buffer which abstracts the basic operations on the buffer (alloc, free, sync and retrieve the data) and uses the mode specific helpers to do the actual operation. This also allows the ETR driver to choose the best mode for a given use case and adds the flexibility to fallback to a different mode, without duplicating the code. The patch also adds the "normal" flat memory mode and switches the sysfs driver to use the new layer. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 75f4e3619fe202c642080a798e84d05288522a22). Bug: 140266694 Change-Id: I28aced59bd0357c0ab3692d2cd6fa74d859b9d03 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 342 ++++++++++++++---- drivers/hwtracing/coresight/coresight-tmc.h | 55 ++- 2 files changed, 308 insertions(+), 89 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index fed252a001af..44aa8b6b9344 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -17,10 +17,18 @@ #include #include +#include #include #include "coresight-priv.h" #include "coresight-tmc.h" +struct etr_flat_buf { + struct device *dev; + dma_addr_t daddr; + void *vaddr; + size_t size; +}; + /* * The TMC ETR SG has a page size of 4K. The SG table contains pointers * to 4KB buffers. However, the OS may use a PAGE_SIZE different from @@ -551,16 +559,207 @@ tmc_init_etr_sg_table(struct device *dev, int node, return etr_table; } +/* + * tmc_etr_alloc_flat_buf: Allocate a contiguous DMA buffer. + */ +static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + struct etr_flat_buf *flat_buf; + + /* We cannot reuse existing pages for flat buf */ + if (pages) + return -EINVAL; + + flat_buf = kzalloc(sizeof(*flat_buf), GFP_KERNEL); + if (!flat_buf) + return -ENOMEM; + + flat_buf->vaddr = dma_alloc_coherent(drvdata->dev, etr_buf->size, + &flat_buf->daddr, GFP_KERNEL); + if (!flat_buf->vaddr) { + kfree(flat_buf); + return -ENOMEM; + } + + flat_buf->size = etr_buf->size; + flat_buf->dev = drvdata->dev; + etr_buf->hwaddr = flat_buf->daddr; + etr_buf->mode = ETR_MODE_FLAT; + etr_buf->private = flat_buf; + return 0; +} + +static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf) +{ + struct etr_flat_buf *flat_buf = etr_buf->private; + + if (flat_buf && flat_buf->daddr) + dma_free_coherent(flat_buf->dev, flat_buf->size, + flat_buf->vaddr, flat_buf->daddr); + kfree(flat_buf); +} + +static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + /* + * Adjust the buffer to point to the beginning of the trace data + * and update the available trace data. + */ + etr_buf->offset = rrp - etr_buf->hwaddr; + if (etr_buf->full) + etr_buf->len = etr_buf->size; + else + etr_buf->len = rwp - rrp; +} + +static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf, + u64 offset, size_t len, char **bufpp) +{ + struct etr_flat_buf *flat_buf = etr_buf->private; + + *bufpp = (char *)flat_buf->vaddr + offset; + /* + * tmc_etr_buf_get_data already adjusts the length to handle + * buffer wrapping around. + */ + return len; +} + +static const struct etr_buf_operations etr_flat_buf_ops = { + .alloc = tmc_etr_alloc_flat_buf, + .free = tmc_etr_free_flat_buf, + .sync = tmc_etr_sync_flat_buf, + .get_data = tmc_etr_get_data_flat_buf, +}; + +static const struct etr_buf_operations *etr_buf_ops[] = { + [ETR_MODE_FLAT] = &etr_flat_buf_ops, +}; + +static inline int tmc_etr_mode_alloc_buf(int mode, + struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + int rc; + + switch (mode) { + case ETR_MODE_FLAT: + rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, node, pages); + if (!rc) + etr_buf->ops = etr_buf_ops[mode]; + return rc; + default: + return -EINVAL; + } +} + +/* + * tmc_alloc_etr_buf: Allocate a buffer use by ETR. + * @drvdata : ETR device details. + * @size : size of the requested buffer. + * @flags : Required properties for the buffer. + * @node : Node for memory allocations. + * @pages : An optional list of pages. + */ +static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, + ssize_t size, int flags, + int node, void **pages) +{ + int rc = 0; + struct etr_buf *etr_buf; + + etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL); + if (!etr_buf) + return ERR_PTR(-ENOMEM); + + etr_buf->size = size; + + rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata, + etr_buf, node, pages); + if (rc) { + kfree(etr_buf); + return ERR_PTR(rc); + } + + return etr_buf; +} + +static void tmc_free_etr_buf(struct etr_buf *etr_buf) +{ + WARN_ON(!etr_buf->ops || !etr_buf->ops->free); + etr_buf->ops->free(etr_buf); + kfree(etr_buf); +} + +/* + * tmc_etr_buf_get_data: Get the pointer the trace data at @offset + * with a maximum of @len bytes. + * Returns: The size of the linear data available @pos, with *bufpp + * updated to point to the buffer. + */ +static ssize_t tmc_etr_buf_get_data(struct etr_buf *etr_buf, + u64 offset, size_t len, char **bufpp) +{ + /* Adjust the length to limit this transaction to end of buffer */ + len = (len < (etr_buf->size - offset)) ? len : etr_buf->size - offset; + + return etr_buf->ops->get_data(etr_buf, (u64)offset, len, bufpp); +} + +static inline s64 +tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset) +{ + ssize_t len; + char *bufp; + + len = tmc_etr_buf_get_data(etr_buf, offset, + CORESIGHT_BARRIER_PKT_SIZE, &bufp); + if (WARN_ON(len <= CORESIGHT_BARRIER_PKT_SIZE)) + return -EINVAL; + coresight_insert_barrier_packet(bufp); + return offset + CORESIGHT_BARRIER_PKT_SIZE; +} + +/* + * tmc_sync_etr_buf: Sync the trace buffer availability with drvdata. + * Makes sure the trace data is synced to the memory for consumption. + * @etr_buf->offset will hold the offset to the beginning of the trace data + * within the buffer, with @etr_buf->len bytes to consume. + */ +static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) +{ + struct etr_buf *etr_buf = drvdata->etr_buf; + u64 rrp, rwp; + u32 status; + + rrp = tmc_read_rrp(drvdata); + rwp = tmc_read_rwp(drvdata); + status = readl_relaxed(drvdata->base + TMC_STS); + etr_buf->full = status & TMC_STS_FULL; + + WARN_ON(!etr_buf->ops || !etr_buf->ops->sync); + + etr_buf->ops->sync(etr_buf, rrp, rwp); + + /* Insert barrier packets at the beginning, if there was an overflow */ + if (etr_buf->full) + tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); +} + static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; + struct etr_buf *etr_buf = drvdata->etr_buf; CS_UNLOCK(drvdata->base); /* Wait for TMCSReady bit to be set */ tmc_wait_for_tmcready(drvdata); - writel_relaxed(drvdata->size / 4, drvdata->base + TMC_RSZ); + writel_relaxed(etr_buf->size / 4, drvdata->base + TMC_RSZ); writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); axictl = readl_relaxed(drvdata->base + TMC_AXICTL); @@ -574,15 +773,15 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) } writel_relaxed(axictl, drvdata->base + TMC_AXICTL); - tmc_write_dba(drvdata, drvdata->paddr); + tmc_write_dba(drvdata, etr_buf->hwaddr); /* * If the TMC pointers must be programmed before the session, * we have to set it properly (i.e, RRP/RWP to base address and * STS to "not full"). */ if (tmc_etr_has_cap(drvdata, TMC_ETR_SAVE_RESTORE)) { - tmc_write_rrp(drvdata, drvdata->paddr); - tmc_write_rwp(drvdata, drvdata->paddr); + tmc_write_rrp(drvdata, etr_buf->hwaddr); + tmc_write_rwp(drvdata, etr_buf->hwaddr); sts = readl_relaxed(drvdata->base + TMC_STS) & ~TMC_STS_FULL; writel_relaxed(sts, drvdata->base + TMC_STS); } @@ -598,59 +797,48 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) } /* - * Return the available trace data in the buffer @pos, with a maximum - * limit of @len, also updating the @bufpp on where to find it. + * Return the available trace data in the buffer (starts at etr_buf->offset, + * limited by etr_buf->len) from @pos, with a maximum limit of @len, + * also updating the @bufpp on where to find it. Since the trace data + * starts at anywhere in the buffer, depending on the RRP, we adjust the + * @len returned to handle buffer wrapping around. */ ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, loff_t pos, size_t len, char **bufpp) { + s64 offset; ssize_t actual = len; - char *bufp = drvdata->buf + pos; - char *bufend = (char *)(drvdata->vaddr + drvdata->size); - - /* Adjust the len to available size @pos */ - if (pos + actual > drvdata->len) - actual = drvdata->len - pos; + struct etr_buf *etr_buf = drvdata->etr_buf; + if (pos + actual > etr_buf->len) + actual = etr_buf->len - pos; if (actual <= 0) return actual; - /* - * Since we use a circular buffer, with trace data starting - * @drvdata->buf, possibly anywhere in the buffer @drvdata->vaddr, - * wrap the current @pos to within the buffer. - */ - if (bufp >= bufend) - bufp -= drvdata->size; - /* - * For simplicity, avoid copying over a wrapped around buffer. - */ - if ((bufp + actual) > bufend) - actual = bufend - bufp; - *bufpp = bufp; - return actual; + /* Compute the offset from which we read the data */ + offset = etr_buf->offset + pos; + if (offset >= etr_buf->size) + offset -= etr_buf->size; + return tmc_etr_buf_get_data(etr_buf, offset, actual, bufpp); } -static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) +static struct etr_buf * +tmc_etr_setup_sysfs_buf(struct tmc_drvdata *drvdata) { - u32 val; - u64 rwp; + return tmc_alloc_etr_buf(drvdata, drvdata->size, + 0, cpu_to_node(0), NULL); +} - rwp = tmc_read_rwp(drvdata); - val = readl_relaxed(drvdata->base + TMC_STS); +static void +tmc_etr_free_sysfs_buf(struct etr_buf *buf) +{ + if (buf) + tmc_free_etr_buf(buf); +} - /* - * Adjust the buffer to point to the beginning of the trace data - * and update the available trace data. - */ - if (val & TMC_STS_FULL) { - drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr; - drvdata->len = drvdata->size; - coresight_insert_barrier_packet(drvdata->buf); - } else { - drvdata->buf = drvdata->vaddr; - drvdata->len = rwp - drvdata->paddr; - } +static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata) +{ + tmc_sync_etr_buf(drvdata); } static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) @@ -663,7 +851,8 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) * read before the TMC is disabled. */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etr_dump_hw(drvdata); + tmc_etr_sync_sysfs_buf(drvdata); + tmc_disable_hw(drvdata); CS_LOCK(drvdata->base); @@ -672,36 +861,33 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { int ret = 0; - bool used = false; unsigned long flags; - void __iomem *vaddr = NULL; - dma_addr_t paddr = 0; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etr_buf *new_buf = NULL, *free_buf = NULL; /* - * If we don't have a buffer release the lock and allocate memory. - * Otherwise keep the lock and move along. + * If we are enabling the ETR from disabled state, we need to make + * sure we have a buffer with the right size. The etr_buf is not reset + * immediately after we stop the tracing in SYSFS mode as we wait for + * the user to collect the data. We may be able to reuse the existing + * buffer, provided the size matches. Any allocation has to be done + * with the lock released. */ spin_lock_irqsave(&drvdata->spinlock, flags); - if (!drvdata->vaddr) { + if (!drvdata->etr_buf || (drvdata->etr_buf->size != drvdata->size)) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - /* - * Contiguous memory can't be allocated while a spinlock is - * held. As such allocate memory here and free it if a buffer - * has already been allocated (from a previous session). - */ - vaddr = dma_alloc_coherent(drvdata->dev, drvdata->size, - &paddr, GFP_KERNEL); - if (!vaddr) - return -ENOMEM; + /* Allocate memory with the locks released */ + free_buf = new_buf = tmc_etr_setup_sysfs_buf(drvdata); + if (IS_ERR(new_buf)) + return PTR_ERR(new_buf); /* Let's try again */ spin_lock_irqsave(&drvdata->spinlock, flags); } - if (drvdata->reading) { + if (drvdata->reading || drvdata->mode == CS_MODE_PERF) { ret = -EBUSY; goto out; } @@ -709,21 +895,19 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be - * touched. + * touched, even if the buffer size has changed. */ if (drvdata->mode == CS_MODE_SYSFS) goto out; /* - * If drvdata::vaddr == NULL, use the memory allocated above. - * Otherwise a buffer still exists from a previous session, so - * simply use that. + * If we don't have a buffer or it doesn't match the requested size, + * use the buffer allocated above. Otherwise reuse the existing buffer. */ - if (drvdata->vaddr == NULL) { - used = true; - drvdata->vaddr = vaddr; - drvdata->paddr = paddr; - drvdata->buf = drvdata->vaddr; + if (!drvdata->etr_buf || + (new_buf && drvdata->etr_buf->size != new_buf->size)) { + free_buf = drvdata->etr_buf; + drvdata->etr_buf = new_buf; } drvdata->mode = CS_MODE_SYSFS; @@ -732,8 +916,8 @@ out: spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free memory outside the spinlock if need be */ - if (!used && vaddr) - dma_free_coherent(drvdata->dev, drvdata->size, vaddr, paddr); + if (free_buf) + tmc_etr_free_sysfs_buf(free_buf); if (!ret) dev_info(drvdata->dev, "TMC-ETR enabled\n"); @@ -812,8 +996,8 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - /* If drvdata::buf is NULL the trace data has been read already */ - if (drvdata->buf == NULL) { + /* If drvdata::etr_buf is NULL the trace data has been read already */ + if (drvdata->etr_buf == NULL) { ret = -EINVAL; goto out; } @@ -832,8 +1016,7 @@ out: int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) { unsigned long flags; - dma_addr_t paddr; - void __iomem *vaddr = NULL; + struct etr_buf *etr_buf = NULL; /* config types are set a boot time and never change */ if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR)) @@ -854,17 +1037,16 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) * The ETR is not tracing and the buffer was just read. * As such prepare to free the trace buffer. */ - vaddr = drvdata->vaddr; - paddr = drvdata->paddr; - drvdata->buf = drvdata->vaddr = NULL; + etr_buf = drvdata->etr_buf; + drvdata->etr_buf = NULL; } drvdata->reading = false; spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free allocated memory out side of the spinlock */ - if (vaddr) - dma_free_coherent(drvdata->dev, drvdata->size, vaddr, paddr); + if (etr_buf) + tmc_free_etr_buf(etr_buf); return 0; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 19a765c1748b..67788c075285 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -134,6 +134,34 @@ enum tmc_mem_intf_width { #define CORESIGHT_SOC_600_ETR_CAPS \ (TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE) +enum etr_mode { + ETR_MODE_FLAT, /* Uses contiguous flat buffer */ +}; + +struct etr_buf_operations; + +/** + * struct etr_buf - Details of the buffer used by ETR + * @mode : Mode of the ETR buffer, contiguous, Scatter Gather etc. + * @full : Trace data overflow + * @size : Size of the buffer. + * @hwaddr : Address to be programmed in the TMC:DBA{LO,HI} + * @offset : Offset of the trace data in the buffer for consumption. + * @len : Available trace data @buf (may round up to the beginning). + * @ops : ETR buffer operations for the mode. + * @private : Backend specific information for the buf + */ +struct etr_buf { + enum etr_mode mode; + bool full; + ssize_t size; + dma_addr_t hwaddr; + unsigned long offset; + s64 len; + const struct etr_buf_operations *ops; + void *private; +}; + /** * struct tmc_drvdata - specifics associated to an TMC component * @base: memory mapped base address for this component. @@ -141,11 +169,10 @@ enum tmc_mem_intf_width { * @csdev: component vitals needed by the framework. * @miscdev: specifics to handle "/dev/xyz.tmc" entry. * @spinlock: only one at a time pls. - * @buf: area of memory where trace data get sent. - * @paddr: DMA start location in RAM. - * @vaddr: virtual representation of @paddr. - * @size: trace buffer size. - * @len: size of the available trace. + * @buf: Snapshot of the trace data for ETF/ETB. + * @etr_buf: details of buffer used in TMC-ETR + * @len: size of the available trace for ETF/ETB. + * @size: trace buffer size for this TMC (common for all modes). * @mode: how this TMC is being used. * @config_type: TMC variant, must be of type @tmc_config_type. * @memwidth: width of the memory interface databus, in bytes. @@ -160,11 +187,12 @@ struct tmc_drvdata { struct miscdevice miscdev; spinlock_t spinlock; bool reading; - char *buf; - dma_addr_t paddr; - void __iomem *vaddr; - u32 size; + union { + char *buf; /* TMC ETB */ + struct etr_buf *etr_buf; /* TMC ETR */ + }; u32 len; + u32 size; u32 mode; enum tmc_config_type config_type; enum tmc_mem_intf_width memwidth; @@ -172,6 +200,15 @@ struct tmc_drvdata { u32 etr_caps; }; +struct etr_buf_operations { + int (*alloc)(struct tmc_drvdata *drvdata, struct etr_buf *etr_buf, + int node, void **pages); + void (*sync)(struct etr_buf *etr_buf, u64 rrp, u64 rwp); + ssize_t (*get_data)(struct etr_buf *etr_buf, u64 offset, size_t len, + char **bufpp); + void (*free)(struct etr_buf *etr_buf); +}; + /** * struct tmc_pages - Collection of pages used for SG. * @nr_pages: Number of pages in the list. From 5c989a3bd9017cfb87e421ef04a1c9cd24de5c40 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:23 -0600 Subject: [PATCH 0480/3715] UPSTREAM: coresight: tmc-etr buf: Add TMC scatter gather mode backend Add the support for Scatter-Gather mode to the etr-buf layer. Since we now have two different modes, we choose the backend based on a set of conditions, documented in the code. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit e8e3b771395d9b75ba5da409b7d20435ddb05a21). Bug: 140266694 Change-Id: I8bc47e6f7ec1807577519e574176f7666a83c33a Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 114 +++++++++++++++++- drivers/hwtracing/coresight/coresight-tmc.h | 1 + 2 files changed, 111 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 44aa8b6b9344..68967547a092 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -527,7 +527,7 @@ static void tmc_etr_sg_table_populate(struct etr_sg_table *etr_table) * @size - Total size of the data buffer * @pages - Optional list of page virtual address */ -static struct etr_sg_table __maybe_unused * +static struct etr_sg_table * tmc_init_etr_sg_table(struct device *dev, int node, unsigned long size, void **pages) { @@ -634,8 +634,86 @@ static const struct etr_buf_operations etr_flat_buf_ops = { .get_data = tmc_etr_get_data_flat_buf, }; +/* + * tmc_etr_alloc_sg_buf: Allocate an SG buf @etr_buf. Setup the parameters + * appropriately. + */ +static int tmc_etr_alloc_sg_buf(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + struct etr_sg_table *etr_table; + + etr_table = tmc_init_etr_sg_table(drvdata->dev, node, + etr_buf->size, pages); + if (IS_ERR(etr_table)) + return -ENOMEM; + etr_buf->hwaddr = etr_table->hwaddr; + etr_buf->mode = ETR_MODE_ETR_SG; + etr_buf->private = etr_table; + return 0; +} + +static void tmc_etr_free_sg_buf(struct etr_buf *etr_buf) +{ + struct etr_sg_table *etr_table = etr_buf->private; + + if (etr_table) { + tmc_free_sg_table(etr_table->sg_table); + kfree(etr_table); + } +} + +static ssize_t tmc_etr_get_data_sg_buf(struct etr_buf *etr_buf, u64 offset, + size_t len, char **bufpp) +{ + struct etr_sg_table *etr_table = etr_buf->private; + + return tmc_sg_table_get_data(etr_table->sg_table, offset, len, bufpp); +} + +static void tmc_etr_sync_sg_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + long r_offset, w_offset; + struct etr_sg_table *etr_table = etr_buf->private; + struct tmc_sg_table *table = etr_table->sg_table; + + /* Convert hw address to offset in the buffer */ + r_offset = tmc_sg_get_data_page_offset(table, rrp); + if (r_offset < 0) { + dev_warn(table->dev, + "Unable to map RRP %llx to offset\n", rrp); + etr_buf->len = 0; + return; + } + + w_offset = tmc_sg_get_data_page_offset(table, rwp); + if (w_offset < 0) { + dev_warn(table->dev, + "Unable to map RWP %llx to offset\n", rwp); + etr_buf->len = 0; + return; + } + + etr_buf->offset = r_offset; + if (etr_buf->full) + etr_buf->len = etr_buf->size; + else + etr_buf->len = ((w_offset < r_offset) ? etr_buf->size : 0) + + w_offset - r_offset; + tmc_sg_table_sync_data_range(table, r_offset, etr_buf->len); +} + +static const struct etr_buf_operations etr_sg_buf_ops = { + .alloc = tmc_etr_alloc_sg_buf, + .free = tmc_etr_free_sg_buf, + .sync = tmc_etr_sync_sg_buf, + .get_data = tmc_etr_get_data_sg_buf, +}; + static const struct etr_buf_operations *etr_buf_ops[] = { [ETR_MODE_FLAT] = &etr_flat_buf_ops, + [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, }; static inline int tmc_etr_mode_alloc_buf(int mode, @@ -647,6 +725,7 @@ static inline int tmc_etr_mode_alloc_buf(int mode, switch (mode) { case ETR_MODE_FLAT: + case ETR_MODE_ETR_SG: rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, node, pages); if (!rc) etr_buf->ops = etr_buf_ops[mode]; @@ -668,17 +747,38 @@ static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, ssize_t size, int flags, int node, void **pages) { - int rc = 0; + int rc = -ENOMEM; + bool has_etr_sg, has_iommu; struct etr_buf *etr_buf; + has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG); + has_iommu = iommu_get_domain_for_dev(drvdata->dev); + etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL); if (!etr_buf) return ERR_PTR(-ENOMEM); etr_buf->size = size; - rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata, - etr_buf, node, pages); + /* + * If we have to use an existing list of pages, we cannot reliably + * use a contiguous DMA memory (even if we have an IOMMU). Otherwise, + * we use the contiguous DMA memory if at least one of the following + * conditions is true: + * a) The ETR cannot use Scatter-Gather. + * b) we have a backing IOMMU + * c) The requested memory size is smaller (< 1M). + * + * Fallback to available mechanisms. + * + */ + if (!pages && + (!has_etr_sg || has_iommu || size < SZ_1M)) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata, + etr_buf, node, pages); + if (rc && has_etr_sg) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_ETR_SG, drvdata, + etr_buf, node, pages); if (rc) { kfree(etr_buf); return ERR_PTR(rc); @@ -772,6 +872,12 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) axictl |= TMC_AXICTL_ARCACHE_OS; } + if (etr_buf->mode == ETR_MODE_ETR_SG) { + if (WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG))) + return; + axictl |= TMC_AXICTL_SCT_GAT_MODE; + } + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); tmc_write_dba(drvdata, etr_buf->hwaddr); /* diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 67788c075285..266a16e03ebf 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -136,6 +136,7 @@ enum tmc_mem_intf_width { enum etr_mode { ETR_MODE_FLAT, /* Uses contiguous flat buffer */ + ETR_MODE_ETR_SG, /* Uses in-built TMC ETR SG mechanism */ }; struct etr_buf_operations; From 1eb28677793ae6f74f029598b620fa9710137faa Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:24 -0600 Subject: [PATCH 0481/3715] UPSTREAM: coresight: tmc: Add configuration support for trace buffer size Now that we can dynamically switch between contiguous memory and SG table depending on the trace buffer size, provide the support for selecting an appropriate buffer size. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Acked-by: Rob Herring Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit c34cc23f1dcb77c66b5cd074f83dee61bfcbe567). Bug: 140266694 Change-Id: Iaa86f2aedefcb48a95fe8d4db7e37d9dc7a2fddf Signed-off-by: Yabin Cui --- .../testing/sysfs-bus-coresight-devices-tmc | 8 +++++ .../devicetree/bindings/arm/coresight.txt | 3 +- drivers/hwtracing/coresight/coresight-tmc.c | 33 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc index 4fe677ed1305..ab49b9ac3bcb 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc @@ -83,3 +83,11 @@ KernelVersion: 4.7 Contact: Mathieu Poirier Description: (R) Indicates the capabilities of the Coresight TMC. The value is read directly from the DEVID register, 0xFC8, + +What: /sys/bus/coresight/devices/.tmc/buffer_size +Date: December 2018 +KernelVersion: 4.19 +Contact: Mathieu Poirier +Description: (RW) Size of the trace buffer for TMC-ETR when used in SYSFS + mode. Writable only for TMC-ETR configurations. The value + should be aligned to the kernel pagesize. diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 603d3c62e8c7..9aa30a156b57 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -84,7 +84,8 @@ its hardware characteristcs. * Optional property for TMC: * arm,buffer-size: size of contiguous buffer space for TMC ETR - (embedded trace router) + (embedded trace router). This property is obsolete. The buffer size + can be configured dynamically via buffer_size property in sysfs. * arm,scatter-gather: boolean. Indicates that the TMC-ETR can safely use the SG mode on this system. diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 1eaecc926603..7c138b080844 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -285,8 +285,41 @@ static ssize_t trigger_cntr_store(struct device *dev, } static DEVICE_ATTR_RW(trigger_cntr); +static ssize_t buffer_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + return sprintf(buf, "%#x\n", drvdata->size); +} + +static ssize_t buffer_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* Only permitted for TMC-ETRs */ + if (drvdata->config_type != TMC_CONFIG_TYPE_ETR) + return -EPERM; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + /* The buffer size should be page aligned */ + if (val & (PAGE_SIZE - 1)) + return -EINVAL; + drvdata->size = val; + return size; +} + +static DEVICE_ATTR_RW(buffer_size); + static struct attribute *coresight_tmc_attrs[] = { &dev_attr_trigger_cntr.attr, + &dev_attr_buffer_size.attr, NULL, }; From d39dcd76ed8a7a636febff0d5239a8eba4a3da94 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jul 2018 13:40:25 -0600 Subject: [PATCH 0482/3715] UPSTREAM: coresight: include vmalloc.h for vmap/vunmap The newly introduced code fails to build in some configurations unless we include the right headers: drivers/hwtracing/coresight/coresight-tmc-etr.c: In function 'tmc_free_table_pages': drivers/hwtracing/coresight/coresight-tmc-etr.c:206:3: error: implicit declaration of function 'vunmap'; did you mean 'iounmap'? [-Werror=implicit-function-declaration] Fixes: 79613ae8715a ("coresight: Add generic TMC sg table framework") Signed-off-by: Arnd Bergmann Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit a748ddd1132c79888390934216ef428e0b2f0ea8). Bug: 140266694 Change-Id: I0a65ed05483ab6607c94dd43f048538854ba4e72 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 68967547a092..09f53defb36c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "coresight-priv.h" #include "coresight-tmc.h" From c1097966d6da53c92a014914631735c6173f816c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:26 -0600 Subject: [PATCH 0483/3715] UPSTREAM: coresight: Fix check in coresight_tmc_etr_buf_insert_barrier_packet We request for "CORESIGHT_BARRIER_PKT_SIZE" length and we should be happy when we get that size. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 00ea1970298677e31ed8f962bda663cfc4848481). Bug: 140266694 Change-Id: Ib850da4881b95b9610ac8d9d115fd20b514f94ea Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 09f53defb36c..850f4f36f4e6 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -818,7 +818,7 @@ tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset) len = tmc_etr_buf_get_data(etr_buf, offset, CORESIGHT_BARRIER_PKT_SIZE, &bufp); - if (WARN_ON(len <= CORESIGHT_BARRIER_PKT_SIZE)) + if (WARN_ON(len < CORESIGHT_BARRIER_PKT_SIZE)) return -EINVAL; coresight_insert_barrier_packet(bufp); return offset + CORESIGHT_BARRIER_PKT_SIZE; From e54783ddffd0887bae1b2f86369e85614dc8b3ce Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:29 -0600 Subject: [PATCH 0484/3715] UPSTREAM: coresight: Cleanup device subtype struct Clean up our struct a little bit by using a union instead of a struct for tracking the subtype of a device. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 00b78e8b7b003c686a8570df7d231787f514226c). Bug: 140266694 Change-Id: I7cfbc6d8a2f0552e1361e04ce06b4bbb277448e6 Signed-off-by: Yabin Cui --- include/linux/coresight.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index d950dad5056a..556fe5933ad9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -70,17 +70,20 @@ enum coresight_dev_subtype_source { }; /** - * struct coresight_dev_subtype - further characterisation of a type + * union coresight_dev_subtype - further characterisation of a type * @sink_subtype: type of sink this component is, as defined - by @coresight_dev_subtype_sink. + * by @coresight_dev_subtype_sink. * @link_subtype: type of link this component is, as defined - by @coresight_dev_subtype_link. + * by @coresight_dev_subtype_link. * @source_subtype: type of source this component is, as defined - by @coresight_dev_subtype_source. + * by @coresight_dev_subtype_source. */ -struct coresight_dev_subtype { - enum coresight_dev_subtype_sink sink_subtype; - enum coresight_dev_subtype_link link_subtype; +union coresight_dev_subtype { + /* We have some devices which acts as LINK and SINK */ + struct { + enum coresight_dev_subtype_sink sink_subtype; + enum coresight_dev_subtype_link link_subtype; + }; enum coresight_dev_subtype_source source_subtype; }; @@ -120,7 +123,7 @@ struct coresight_platform_data { */ struct coresight_desc { enum coresight_dev_type type; - struct coresight_dev_subtype subtype; + union coresight_dev_subtype subtype; const struct coresight_ops *ops; struct coresight_platform_data *pdata; struct device *dev; @@ -164,7 +167,7 @@ struct coresight_device { int nr_inport; int nr_outport; enum coresight_dev_type type; - struct coresight_dev_subtype subtype; + union coresight_dev_subtype subtype; const struct coresight_ops *ops; struct device dev; atomic_t *refcnt; From f8c0d6b98d7da7b3a955b274fc7a0e998c484479 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:30 -0600 Subject: [PATCH 0485/3715] UPSTREAM: coresight: Add helper device type Add a new coresight device type, which do not belong to any of the existing types, i.e, source, sink, link etc. A helper device could be connected to a coresight device, which could augment the functionality of the coresight device. This is intended to cover Coresight Address Translation Unit (CATU) devices, which provide improved Scatter Gather mechanism for TMC ETR. The idea is that the helper device could be controlled by the driver of the device it is attached to (in this case ETR), transparent to the generic coresight driver (and paths). The operations include enable(), disable(), both of which could accept a device specific "data" which the driving device and the helper device could share. Since they don't appear in the coresight "path" tracked by software, we have to ensure that they are powered up/down whenever the master device is turned on. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 8a091d847cecd1a3656172cb79b53a19e90b8c4f). Bug: 140266694 Change-Id: Ia616f051eee9f9d782e0ce958d243ce6c2ad8637 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 43 +++++++++++++++++++++++-- include/linux/coresight.h | 24 ++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 01c6b0eb44d0..d208e7ffd515 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -432,6 +432,42 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate) return dev ? to_coresight_device(dev) : NULL; } +/* + * coresight_grab_device - Power up this device and any of the helper + * devices connected to it for trace operation. Since the helper devices + * don't appear on the trace path, they should be handled along with the + * the master device. + */ +static void coresight_grab_device(struct coresight_device *csdev) +{ + int i; + + for (i = 0; i < csdev->nr_outport; i++) { + struct coresight_device *child = csdev->conns[i].child_dev; + + if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) + pm_runtime_get_sync(child->dev.parent); + } + pm_runtime_get_sync(csdev->dev.parent); +} + +/* + * coresight_drop_device - Release this device and any of the helper + * devices connected to it. + */ +static void coresight_drop_device(struct coresight_device *csdev) +{ + int i; + + pm_runtime_put(csdev->dev.parent); + for (i = 0; i < csdev->nr_outport; i++) { + struct coresight_device *child = csdev->conns[i].child_dev; + + if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) + pm_runtime_put(child->dev.parent); + } +} + /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. @@ -480,9 +516,9 @@ out: if (!node) return -ENOMEM; + coresight_grab_device(csdev); node->csdev = csdev; list_add(&node->link, path); - pm_runtime_get_sync(csdev->dev.parent); return 0; } @@ -526,7 +562,7 @@ void coresight_release_path(struct list_head *path) list_for_each_entry_safe(nd, next, path, link) { csdev = nd->csdev; - pm_runtime_put_sync(csdev->dev.parent); + coresight_drop_device(csdev); list_del(&nd->link); kfree(nd); } @@ -777,6 +813,9 @@ static struct device_type coresight_dev_type[] = { .name = "source", .groups = coresight_source_groups, }, + { + .name = "helper", + }, }; static void coresight_device_release(struct device *dev) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 556fe5933ad9..9bf0097a5ed1 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -47,6 +47,7 @@ enum coresight_dev_type { CORESIGHT_DEV_TYPE_LINK, CORESIGHT_DEV_TYPE_LINKSINK, CORESIGHT_DEV_TYPE_SOURCE, + CORESIGHT_DEV_TYPE_HELPER, }; enum coresight_dev_subtype_sink { @@ -69,6 +70,10 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, }; +enum coresight_dev_subtype_helper { + CORESIGHT_DEV_SUBTYPE_HELPER_NONE, +}; + /** * union coresight_dev_subtype - further characterisation of a type * @sink_subtype: type of sink this component is, as defined @@ -77,6 +82,8 @@ enum coresight_dev_subtype_source { * by @coresight_dev_subtype_link. * @source_subtype: type of source this component is, as defined * by @coresight_dev_subtype_source. + * @helper_subtype: type of helper this component is, as defined + * by @coresight_dev_subtype_helper. */ union coresight_dev_subtype { /* We have some devices which acts as LINK and SINK */ @@ -85,6 +92,7 @@ union coresight_dev_subtype { enum coresight_dev_subtype_link link_subtype; }; enum coresight_dev_subtype_source source_subtype; + enum coresight_dev_subtype_helper helper_subtype; }; /** @@ -181,6 +189,7 @@ struct coresight_device { #define source_ops(csdev) csdev->ops->source_ops #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops +#define helper_ops(csdev) csdev->ops->helper_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -240,10 +249,25 @@ struct coresight_ops_source { struct perf_event *event); }; +/** + * struct coresight_ops_helper - Operations for a helper device. + * + * All operations could pass in a device specific data, which could + * help the helper device to determine what to do. + * + * @enable : Enable the device + * @disable : Disable the device + */ +struct coresight_ops_helper { + int (*enable)(struct coresight_device *csdev, void *data); + int (*disable)(struct coresight_device *csdev, void *data); +}; + struct coresight_ops { const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; + const struct coresight_ops_helper *helper_ops; }; #ifdef CONFIG_CORESIGHT From 45717b83be97d957f7eb25ce8742d400e1975229 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:31 -0600 Subject: [PATCH 0486/3715] UPSTREAM: coresight: Introduce support for Coresight Address Translation Unit Add the initial support for Coresight Address Translation Unit, which augments the TMC in Coresight SoC-600 by providing an improved Scatter Gather mechanism. CATU is always connected to a single TMC-ETR and converts the AXI address with a translated address (from a given SG table with specific format). The CATU should be programmed in pass through mode and enabled even if the ETR doesn't use the translation by CATU. This patch provides mechanism to enable/disable the CATU always in the pass through mode. We reuse the existing ports mechanism to link the TMC-ETR to the connected CATU. i.e, TMC-ETR:output_port0 -> CATU:input_port0 Reference manual for CATU component is avilable in version r2p0 of : "Arm Coresight System-on-Chip SoC-600 Technical Reference Manual". Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit fcacb5c154baaeaee3d89b2a2b7cf6e4ce43f5f5). Bug: 140266694 Change-Id: I7c2fb81f109415e29b96c219176b14c9d1a5d1c9 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/Kconfig | 11 + drivers/hwtracing/coresight/Makefile | 1 + drivers/hwtracing/coresight/coresight-catu.c | 214 ++++++++++++++++++ drivers/hwtracing/coresight/coresight-catu.h | 84 +++++++ .../hwtracing/coresight/coresight-tmc-etr.c | 52 +++++ include/linux/coresight.h | 1 + 6 files changed, 363 insertions(+) create mode 100644 drivers/hwtracing/coresight/coresight-catu.c create mode 100644 drivers/hwtracing/coresight/coresight-catu.h diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index ef9cb3c164e1..ad34380cac49 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -31,6 +31,17 @@ config CORESIGHT_LINK_AND_SINK_TMC complies with the generic implementation of the component without special enhancement or added features. +config CORESIGHT_CATU + bool "Coresight Address Translation Unit (CATU) driver" + depends on CORESIGHT_LINK_AND_SINK_TMC + help + Enable support for the Coresight Address Translation Unit (CATU). + CATU supports a scatter gather table of 4K pages, with forward/backward + lookup. CATU helps TMC ETR to use a large physically non-contiguous trace + buffer by translating the addresses used by ETR to the physical address + by looking up the provided table. CATU can also be used in pass-through + mode where the address is not translated. + config CORESIGHT_SINK_TPIU bool "Coresight generic TPIU driver" depends on CORESIGHT_LINKS_AND_SINKS diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index 61db9dd0d571..41870ded51a3 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \ obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o +obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c new file mode 100644 index 000000000000..9d0cb1f6f16b --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Arm Limited. All rights reserved. + * + * Coresight Address Translation Unit support + * + * Author: Suzuki K Poulose + */ + +#include +#include +#include +#include +#include +#include + +#include "coresight-catu.h" +#include "coresight-priv.h" + +#define csdev_to_catu_drvdata(csdev) \ + dev_get_drvdata(csdev->dev.parent) + +coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID); +coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL); +coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS); +coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE); +coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL); +coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN); +coresight_simple_reg64(struct catu_drvdata, sladdr, + CATU_SLADDRLO, CATU_SLADDRHI); +coresight_simple_reg64(struct catu_drvdata, inaddr, + CATU_INADDRLO, CATU_INADDRHI); + +static struct attribute *catu_mgmt_attrs[] = { + &dev_attr_devid.attr, + &dev_attr_control.attr, + &dev_attr_status.attr, + &dev_attr_mode.attr, + &dev_attr_axictrl.attr, + &dev_attr_irqen.attr, + &dev_attr_sladdr.attr, + &dev_attr_inaddr.attr, + NULL, +}; + +static const struct attribute_group catu_mgmt_group = { + .attrs = catu_mgmt_attrs, + .name = "mgmt", +}; + +static const struct attribute_group *catu_groups[] = { + &catu_mgmt_group, + NULL, +}; + + +static inline int catu_wait_for_ready(struct catu_drvdata *drvdata) +{ + return coresight_timeout(drvdata->base, + CATU_STATUS, CATU_STATUS_READY, 1); +} + +static int catu_enable_hw(struct catu_drvdata *drvdata, void *__unused) +{ + u32 control; + + if (catu_wait_for_ready(drvdata)) + dev_warn(drvdata->dev, "Timeout while waiting for READY\n"); + + control = catu_read_control(drvdata); + if (control & BIT(CATU_CONTROL_ENABLE)) { + dev_warn(drvdata->dev, "CATU is already enabled\n"); + return -EBUSY; + } + + control |= BIT(CATU_CONTROL_ENABLE); + catu_write_mode(drvdata, CATU_MODE_PASS_THROUGH); + catu_write_control(drvdata, control); + dev_dbg(drvdata->dev, "Enabled in Pass through mode\n"); + return 0; +} + +static int catu_enable(struct coresight_device *csdev, void *data) +{ + int rc; + struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); + + CS_UNLOCK(catu_drvdata->base); + rc = catu_enable_hw(catu_drvdata, data); + CS_LOCK(catu_drvdata->base); + return rc; +} + +static int catu_disable_hw(struct catu_drvdata *drvdata) +{ + int rc = 0; + + catu_write_control(drvdata, 0); + if (catu_wait_for_ready(drvdata)) { + dev_info(drvdata->dev, "Timeout while waiting for READY\n"); + rc = -EAGAIN; + } + + dev_dbg(drvdata->dev, "Disabled\n"); + return rc; +} + +static int catu_disable(struct coresight_device *csdev, void *__unused) +{ + int rc; + struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); + + CS_UNLOCK(catu_drvdata->base); + rc = catu_disable_hw(catu_drvdata); + CS_LOCK(catu_drvdata->base); + return rc; +} + +const struct coresight_ops_helper catu_helper_ops = { + .enable = catu_enable, + .disable = catu_disable, +}; + +const struct coresight_ops catu_ops = { + .helper_ops = &catu_helper_ops, +}; + +static int catu_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret = 0; + u32 dma_mask; + struct catu_drvdata *drvdata; + struct coresight_desc catu_desc; + struct coresight_platform_data *pdata = NULL; + struct device *dev = &adev->dev; + struct device_node *np = dev->of_node; + void __iomem *base; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out; + } + dev->platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) { + ret = -ENOMEM; + goto out; + } + + drvdata->dev = dev; + dev_set_drvdata(dev, drvdata); + base = devm_ioremap_resource(dev, &adev->res); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto out; + } + + /* Setup dma mask for the device */ + dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f; + switch (dma_mask) { + case 32: + case 40: + case 44: + case 48: + case 52: + case 56: + case 64: + break; + default: + /* Default to the 40bits as supported by TMC-ETR */ + dma_mask = 40; + } + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask)); + if (ret) + goto out; + + drvdata->base = base; + catu_desc.pdata = pdata; + catu_desc.dev = dev; + catu_desc.groups = catu_groups; + catu_desc.type = CORESIGHT_DEV_TYPE_HELPER; + catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU; + catu_desc.ops = &catu_ops; + drvdata->csdev = coresight_register(&catu_desc); + if (IS_ERR(drvdata->csdev)) + ret = PTR_ERR(drvdata->csdev); +out: + pm_runtime_put(&adev->dev); + return ret; +} + +static struct amba_id catu_ids[] = { + { + .id = 0x000bb9ee, + .mask = 0x000fffff, + }, + {}, +}; + +static struct amba_driver catu_driver = { + .drv = { + .name = "coresight-catu", + .owner = THIS_MODULE, + .suppress_bind_attrs = true, + }, + .probe = catu_probe, + .id_table = catu_ids, +}; + +builtin_amba_driver(catu_driver); diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h new file mode 100644 index 000000000000..4f221fccffca --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Arm Limited. All rights reserved. + * + * Author: Suzuki K Poulose + */ + +#ifndef _CORESIGHT_CATU_H +#define _CORESIGHT_CATU_H + +#include "coresight-priv.h" + +/* Register offset from base */ +#define CATU_CONTROL 0x000 +#define CATU_MODE 0x004 +#define CATU_AXICTRL 0x008 +#define CATU_IRQEN 0x00c +#define CATU_SLADDRLO 0x020 +#define CATU_SLADDRHI 0x024 +#define CATU_INADDRLO 0x028 +#define CATU_INADDRHI 0x02c +#define CATU_STATUS 0x100 +#define CATU_DEVARCH 0xfbc + +#define CATU_CONTROL_ENABLE 0 + +#define CATU_MODE_PASS_THROUGH 0U +#define CATU_MODE_TRANSLATE 1U + +#define CATU_STATUS_READY 8 +#define CATU_STATUS_ADRERR 0 +#define CATU_STATUS_AXIERR 4 + +#define CATU_IRQEN_ON 0x1 +#define CATU_IRQEN_OFF 0x0 + +struct catu_drvdata { + struct device *dev; + void __iomem *base; + struct coresight_device *csdev; + int irq; +}; + +#define CATU_REG32(name, offset) \ +static inline u32 \ +catu_read_##name(struct catu_drvdata *drvdata) \ +{ \ + return coresight_read_reg_pair(drvdata->base, offset, -1); \ +} \ +static inline void \ +catu_write_##name(struct catu_drvdata *drvdata, u32 val) \ +{ \ + coresight_write_reg_pair(drvdata->base, val, offset, -1); \ +} + +#define CATU_REG_PAIR(name, lo_off, hi_off) \ +static inline u64 \ +catu_read_##name(struct catu_drvdata *drvdata) \ +{ \ + return coresight_read_reg_pair(drvdata->base, lo_off, hi_off); \ +} \ +static inline void \ +catu_write_##name(struct catu_drvdata *drvdata, u64 val) \ +{ \ + coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off); \ +} + +CATU_REG32(control, CATU_CONTROL); +CATU_REG32(mode, CATU_MODE); +CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI) +CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI) + +static inline bool coresight_is_catu_device(struct coresight_device *csdev) +{ + if (!IS_ENABLED(CONFIG_CORESIGHT_CATU)) + return false; + if (csdev->type != CORESIGHT_DEV_TYPE_HELPER) + return false; + if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU) + return false; + return true; +} + +#endif diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 850f4f36f4e6..b48a52890417 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -20,6 +20,7 @@ #include #include #include +#include "coresight-catu.h" #include "coresight-priv.h" #include "coresight-tmc.h" @@ -712,6 +713,48 @@ static const struct etr_buf_operations etr_sg_buf_ops = { .get_data = tmc_etr_get_data_sg_buf, }; +/* + * TMC ETR could be connected to a CATU device, which can provide address + * translation service. This is represented by the Output port of the TMC + * (ETR) connected to the input port of the CATU. + * + * Returns : coresight_device ptr for the CATU device if a CATU is found. + * : NULL otherwise. + */ +static inline struct coresight_device * +tmc_etr_get_catu_device(struct tmc_drvdata *drvdata) +{ + int i; + struct coresight_device *tmp, *etr = drvdata->csdev; + + if (!IS_ENABLED(CONFIG_CORESIGHT_CATU)) + return NULL; + + for (i = 0; i < etr->nr_outport; i++) { + tmp = etr->conns[i].child_dev; + if (tmp && coresight_is_catu_device(tmp)) + return tmp; + } + + return NULL; +} + +static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata) +{ + struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); + + if (catu && helper_ops(catu)->enable) + helper_ops(catu)->enable(catu, NULL); +} + +static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) +{ + struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); + + if (catu && helper_ops(catu)->disable) + helper_ops(catu)->disable(catu, NULL); +} + static const struct etr_buf_operations *etr_buf_ops[] = { [ETR_MODE_FLAT] = &etr_flat_buf_ops, [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, @@ -855,6 +898,12 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) u32 axictl, sts; struct etr_buf *etr_buf = drvdata->etr_buf; + /* + * If this ETR is connected to a CATU, enable it before we turn + * this on + */ + tmc_etr_enable_catu(drvdata); + CS_UNLOCK(drvdata->base); /* Wait for TMCSReady bit to be set */ @@ -963,6 +1012,9 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) tmc_disable_hw(drvdata); CS_LOCK(drvdata->base); + + /* Disable CATU device if this ETR is connected to one */ + tmc_etr_disable_catu(drvdata); } static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 9bf0097a5ed1..e11fb81592a4 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -72,6 +72,7 @@ enum coresight_dev_subtype_source { enum coresight_dev_subtype_helper { CORESIGHT_DEV_SUBTYPE_HELPER_NONE, + CORESIGHT_DEV_SUBTYPE_HELPER_CATU, }; /** From a981df2d79f51c5ee257784076703a290b1efe6f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:33 -0600 Subject: [PATCH 0487/3715] UPSTREAM: coresight: catu: Add support for scatter gather tables This patch adds the support for setting up a SG table for use by the CATU. We reuse the tmc_sg_table to represent the table/data pages, even though the table format is different. Similar to ETR SG table, CATU uses a 4KB page size for data buffers as well as page tables. All table entries are 64bit wide and have the following format: 63 12 1 0 x-----------------------------------x | Address [63-12] | SBZ | V | x-----------------------------------x Where [V] -> 0 - Pointer is invalid 1 - Pointer is Valid CATU uses only first half of the page for data page pointers. i.e, single table page will only have 256 page pointers, addressing upto 1MB of data. The second half of a table page contains only two pointers at the end of the page (i.e, pointers at index 510 and 511), which are used as links to the "Previous" and "Next" page tables respectively. The first table page has an "Invalid" previous pointer and the next pointer entry points to the second page table if there is one. Similarly the last table page has an "Invalid" next pointer to indicate the end of the table chain. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 8ed536b1e2838dad4f495347f0917b1cb6e3604f). Bug: 140266694 Change-Id: Id249e199fa9006c2b8166a7f869d0664e966cd04 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-catu.c | 251 +++++++++++++++++++ 1 file changed, 251 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 9d0cb1f6f16b..559d45b6bc39 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -16,10 +16,261 @@ #include "coresight-catu.h" #include "coresight-priv.h" +#include "coresight-tmc.h" #define csdev_to_catu_drvdata(csdev) \ dev_get_drvdata(csdev->dev.parent) +/* Verbose output for CATU table contents */ +#ifdef CATU_DEBUG +#define catu_dbg(x, ...) dev_dbg(x, __VA_ARGS__) +#else +#define catu_dbg(x, ...) do {} while (0) +#endif + +/* + * CATU uses a page size of 4KB for page tables as well as data pages. + * Each 64bit entry in the table has the following format. + * + * 63 12 1 0 + * ------------------------------------ + * | Address [63-12] | SBZ | V| + * ------------------------------------ + * + * Where bit[0] V indicates if the address is valid or not. + * Each 4K table pages have upto 256 data page pointers, taking upto 2K + * size. There are two Link pointers, pointing to the previous and next + * table pages respectively at the end of the 4K page. (i.e, entry 510 + * and 511). + * E.g, a table of two pages could look like : + * + * Table Page 0 Table Page 1 + * SLADDR ===> x------------------x x--> x-----------------x + * INADDR ->| Page 0 | V | | | Page 256 | V | <- INADDR+1M + * |------------------| | |-----------------| + * INADDR+4K ->| Page 1 | V | | | | + * |------------------| | |-----------------| + * | Page 2 | V | | | | + * |------------------| | |-----------------| + * | ... | V | | | ... | + * |------------------| | |-----------------| + * INADDR+1020K| Page 255 | V | | | Page 511 | V | + * SLADDR+2K==>|------------------| | |-----------------| + * | UNUSED | | | | | + * |------------------| | | | + * | UNUSED | | | | | + * |------------------| | | | + * | ... | | | | | + * |------------------| | |-----------------| + * | IGNORED | 0 | | | Table Page 0| 1 | + * |------------------| | |-----------------| + * | Table Page 1| 1 |--x | IGNORED | 0 | + * x------------------x x-----------------x + * SLADDR+4K==> + * + * The base input address (used by the ETR, programmed in INADDR_{LO,HI}) + * must be aligned to 1MB (the size addressable by a single page table). + * The CATU maps INADDR{LO:HI} to the first page in the table pointed + * to by SLADDR{LO:HI} and so on. + * + */ +typedef u64 cate_t; + +#define CATU_PAGE_SHIFT 12 +#define CATU_PAGE_SIZE (1UL << CATU_PAGE_SHIFT) +#define CATU_PAGES_PER_SYSPAGE (PAGE_SIZE / CATU_PAGE_SIZE) + +/* Page pointers are only allocated in the first 2K half */ +#define CATU_PTRS_PER_PAGE ((CATU_PAGE_SIZE >> 1) / sizeof(cate_t)) +#define CATU_PTRS_PER_SYSPAGE (CATU_PAGES_PER_SYSPAGE * CATU_PTRS_PER_PAGE) +#define CATU_LINK_PREV ((CATU_PAGE_SIZE / sizeof(cate_t)) - 2) +#define CATU_LINK_NEXT ((CATU_PAGE_SIZE / sizeof(cate_t)) - 1) + +#define CATU_ADDR_SHIFT 12 +#define CATU_ADDR_MASK ~(((cate_t)1 << CATU_ADDR_SHIFT) - 1) +#define CATU_ENTRY_VALID ((cate_t)0x1) +#define CATU_VALID_ENTRY(addr) \ + (((cate_t)(addr) & CATU_ADDR_MASK) | CATU_ENTRY_VALID) +#define CATU_ENTRY_ADDR(entry) ((cate_t)(entry) & ~((cate_t)CATU_ENTRY_VALID)) + +/* + * catu_get_table : Retrieve the table pointers for the given @offset + * within the buffer. The buffer is wrapped around to a valid offset. + * + * Returns : The CPU virtual address for the beginning of the table + * containing the data page pointer for @offset. If @daddrp is not NULL, + * @daddrp points the DMA address of the beginning of the table. + */ +static inline cate_t *catu_get_table(struct tmc_sg_table *catu_table, + unsigned long offset, + dma_addr_t *daddrp) +{ + unsigned long buf_size = tmc_sg_table_buf_size(catu_table); + unsigned int table_nr, pg_idx, pg_offset; + struct tmc_pages *table_pages = &catu_table->table_pages; + void *ptr; + + /* Make sure offset is within the range */ + offset %= buf_size; + + /* + * Each table can address 1MB and a single kernel page can + * contain "CATU_PAGES_PER_SYSPAGE" CATU tables. + */ + table_nr = offset >> 20; + /* Find the table page where the table_nr lies in */ + pg_idx = table_nr / CATU_PAGES_PER_SYSPAGE; + pg_offset = (table_nr % CATU_PAGES_PER_SYSPAGE) * CATU_PAGE_SIZE; + if (daddrp) + *daddrp = table_pages->daddrs[pg_idx] + pg_offset; + ptr = page_address(table_pages->pages[pg_idx]); + return (cate_t *)((unsigned long)ptr + pg_offset); +} + +#ifdef CATU_DEBUG +static void catu_dump_table(struct tmc_sg_table *catu_table) +{ + int i; + cate_t *table; + unsigned long table_end, buf_size, offset = 0; + + buf_size = tmc_sg_table_buf_size(catu_table); + dev_dbg(catu_table->dev, + "Dump table %p, tdaddr: %llx\n", + catu_table, catu_table->table_daddr); + + while (offset < buf_size) { + table_end = offset + SZ_1M < buf_size ? + offset + SZ_1M : buf_size; + table = catu_get_table(catu_table, offset, NULL); + for (i = 0; offset < table_end; i++, offset += CATU_PAGE_SIZE) + dev_dbg(catu_table->dev, "%d: %llx\n", i, table[i]); + dev_dbg(catu_table->dev, "Prev : %llx, Next: %llx\n", + table[CATU_LINK_PREV], table[CATU_LINK_NEXT]); + dev_dbg(catu_table->dev, "== End of sub-table ==="); + } + dev_dbg(catu_table->dev, "== End of Table ==="); +} + +#else +static inline void catu_dump_table(struct tmc_sg_table *catu_table) +{ +} +#endif + +static inline cate_t catu_make_entry(dma_addr_t addr) +{ + return addr ? CATU_VALID_ENTRY(addr) : 0; +} + +/* + * catu_populate_table : Populate the given CATU table. + * The table is always populated as a circular table. + * i.e, the "prev" link of the "first" table points to the "last" + * table and the "next" link of the "last" table points to the + * "first" table. The buffer should be made linear by calling + * catu_set_table(). + */ +static void +catu_populate_table(struct tmc_sg_table *catu_table) +{ + int i; + int sys_pidx; /* Index to current system data page */ + int catu_pidx; /* Index of CATU page within the system data page */ + unsigned long offset, buf_size, table_end; + dma_addr_t data_daddr; + dma_addr_t prev_taddr, next_taddr, cur_taddr; + cate_t *table_ptr, *next_table; + + buf_size = tmc_sg_table_buf_size(catu_table); + sys_pidx = catu_pidx = 0; + offset = 0; + + table_ptr = catu_get_table(catu_table, 0, &cur_taddr); + prev_taddr = 0; /* Prev link for the first table */ + + while (offset < buf_size) { + /* + * The @offset is always 1M aligned here and we have an + * empty table @table_ptr to fill. Each table can address + * upto 1MB data buffer. The last table may have fewer + * entries if the buffer size is not aligned. + */ + table_end = (offset + SZ_1M) < buf_size ? + (offset + SZ_1M) : buf_size; + for (i = 0; offset < table_end; + i++, offset += CATU_PAGE_SIZE) { + + data_daddr = catu_table->data_pages.daddrs[sys_pidx] + + catu_pidx * CATU_PAGE_SIZE; + catu_dbg(catu_table->dev, + "[table %5ld:%03d] 0x%llx\n", + (offset >> 20), i, data_daddr); + table_ptr[i] = catu_make_entry(data_daddr); + /* Move the pointers for data pages */ + catu_pidx = (catu_pidx + 1) % CATU_PAGES_PER_SYSPAGE; + if (catu_pidx == 0) + sys_pidx++; + } + + /* + * If we have finished all the valid entries, fill the rest of + * the table (i.e, last table page) with invalid entries, + * to fail the lookups. + */ + if (offset == buf_size) { + memset(&table_ptr[i], 0, + sizeof(cate_t) * (CATU_PTRS_PER_PAGE - i)); + next_taddr = 0; + } else { + next_table = catu_get_table(catu_table, + offset, &next_taddr); + } + + table_ptr[CATU_LINK_PREV] = catu_make_entry(prev_taddr); + table_ptr[CATU_LINK_NEXT] = catu_make_entry(next_taddr); + + catu_dbg(catu_table->dev, + "[table%5ld]: Cur: 0x%llx Prev: 0x%llx, Next: 0x%llx\n", + (offset >> 20) - 1, cur_taddr, prev_taddr, next_taddr); + + /* Update the prev/next addresses */ + if (next_taddr) { + prev_taddr = cur_taddr; + cur_taddr = next_taddr; + table_ptr = next_table; + } + } + + /* Sync the table for device */ + tmc_sg_table_sync_table(catu_table); +} + +static struct tmc_sg_table __maybe_unused * +catu_init_sg_table(struct device *catu_dev, int node, + ssize_t size, void **pages) +{ + int nr_tpages; + struct tmc_sg_table *catu_table; + + /* + * Each table can address upto 1MB and we can have + * CATU_PAGES_PER_SYSPAGE tables in a system page. + */ + nr_tpages = DIV_ROUND_UP(size, SZ_1M) / CATU_PAGES_PER_SYSPAGE; + catu_table = tmc_alloc_sg_table(catu_dev, node, nr_tpages, + size >> PAGE_SHIFT, pages); + if (IS_ERR(catu_table)) + return catu_table; + + catu_populate_table(catu_table); + dev_dbg(catu_dev, + "Setup table %p, size %ldKB, %d table pages\n", + catu_table, (unsigned long)size >> 10, nr_tpages); + catu_dump_table(catu_table); + return catu_table; +} + coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID); coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL); coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS); From 33c5b7081f4da0a7429262f0b1a27169f81d7ae8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:34 -0600 Subject: [PATCH 0488/3715] UPSTREAM: coresight: catu: Plug in CATU as a backend for ETR buffer Now that we can use a CATU with a scatter gather table, add support for the TMC ETR to make use of the connected CATU in translate mode. This is done by adding CATU as new buffer mode. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 434d611cddef1ceed32bf416a363992b01a3ff9a). Bug: 140266694 Change-Id: I2649242040ab4c1e6be92efd122e0a57e1354b19 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-catu.c | 122 +++++++++++++++++- drivers/hwtracing/coresight/coresight-catu.h | 35 +++++ .../hwtracing/coresight/coresight-tmc-etr.c | 25 +++- drivers/hwtracing/coresight/coresight-tmc.h | 3 + 4 files changed, 174 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 559d45b6bc39..ff94e58845b7 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -28,6 +28,11 @@ #define catu_dbg(x, ...) do {} while (0) #endif +struct catu_etr_buf { + struct tmc_sg_table *catu_table; + dma_addr_t sladdr; +}; + /* * CATU uses a page size of 4KB for page tables as well as data pages. * Each 64bit entry in the table has the following format. @@ -93,6 +98,9 @@ typedef u64 cate_t; (((cate_t)(addr) & CATU_ADDR_MASK) | CATU_ENTRY_VALID) #define CATU_ENTRY_ADDR(entry) ((cate_t)(entry) & ~((cate_t)CATU_ENTRY_VALID)) +/* CATU expects the INADDR to be aligned to 1M. */ +#define CATU_DEFAULT_INADDR (1ULL << 20) + /* * catu_get_table : Retrieve the table pointers for the given @offset * within the buffer. The buffer is wrapped around to a valid offset. @@ -246,7 +254,7 @@ catu_populate_table(struct tmc_sg_table *catu_table) tmc_sg_table_sync_table(catu_table); } -static struct tmc_sg_table __maybe_unused * +static struct tmc_sg_table * catu_init_sg_table(struct device *catu_dev, int node, ssize_t size, void **pages) { @@ -271,6 +279,91 @@ catu_init_sg_table(struct device *catu_dev, int node, return catu_table; } +static void catu_free_etr_buf(struct etr_buf *etr_buf) +{ + struct catu_etr_buf *catu_buf; + + if (!etr_buf || etr_buf->mode != ETR_MODE_CATU || !etr_buf->private) + return; + + catu_buf = etr_buf->private; + tmc_free_sg_table(catu_buf->catu_table); + kfree(catu_buf); +} + +static ssize_t catu_get_data_etr_buf(struct etr_buf *etr_buf, u64 offset, + size_t len, char **bufpp) +{ + struct catu_etr_buf *catu_buf = etr_buf->private; + + return tmc_sg_table_get_data(catu_buf->catu_table, offset, len, bufpp); +} + +static void catu_sync_etr_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + struct catu_etr_buf *catu_buf = etr_buf->private; + struct tmc_sg_table *catu_table = catu_buf->catu_table; + u64 r_offset, w_offset; + + /* + * ETR started off at etr_buf->hwaddr. Convert the RRP/RWP to + * offsets within the trace buffer. + */ + r_offset = rrp - etr_buf->hwaddr; + w_offset = rwp - etr_buf->hwaddr; + + if (!etr_buf->full) { + etr_buf->len = w_offset - r_offset; + if (w_offset < r_offset) + etr_buf->len += etr_buf->size; + } else { + etr_buf->len = etr_buf->size; + } + + etr_buf->offset = r_offset; + tmc_sg_table_sync_data_range(catu_table, r_offset, etr_buf->len); +} + +static int catu_alloc_etr_buf(struct tmc_drvdata *tmc_drvdata, + struct etr_buf *etr_buf, int node, void **pages) +{ + struct coresight_device *csdev; + struct device *catu_dev; + struct tmc_sg_table *catu_table; + struct catu_etr_buf *catu_buf; + + csdev = tmc_etr_get_catu_device(tmc_drvdata); + if (!csdev) + return -ENODEV; + catu_dev = csdev->dev.parent; + catu_buf = kzalloc(sizeof(*catu_buf), GFP_KERNEL); + if (!catu_buf) + return -ENOMEM; + + catu_table = catu_init_sg_table(catu_dev, node, etr_buf->size, pages); + if (IS_ERR(catu_table)) { + kfree(catu_buf); + return PTR_ERR(catu_table); + } + + etr_buf->mode = ETR_MODE_CATU; + etr_buf->private = catu_buf; + etr_buf->hwaddr = CATU_DEFAULT_INADDR; + + catu_buf->catu_table = catu_table; + /* Get the table base address */ + catu_buf->sladdr = catu_table->table_daddr; + + return 0; +} + +const struct etr_buf_operations etr_catu_buf_ops = { + .alloc = catu_alloc_etr_buf, + .free = catu_free_etr_buf, + .sync = catu_sync_etr_buf, + .get_data = catu_get_data_etr_buf, +}; + coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID); coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL); coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS); @@ -311,9 +404,10 @@ static inline int catu_wait_for_ready(struct catu_drvdata *drvdata) CATU_STATUS, CATU_STATUS_READY, 1); } -static int catu_enable_hw(struct catu_drvdata *drvdata, void *__unused) +static int catu_enable_hw(struct catu_drvdata *drvdata, void *data) { - u32 control; + u32 control, mode; + struct etr_buf *etr_buf = data; if (catu_wait_for_ready(drvdata)) dev_warn(drvdata->dev, "Timeout while waiting for READY\n"); @@ -325,9 +419,27 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *__unused) } control |= BIT(CATU_CONTROL_ENABLE); - catu_write_mode(drvdata, CATU_MODE_PASS_THROUGH); + + if (etr_buf && etr_buf->mode == ETR_MODE_CATU) { + struct catu_etr_buf *catu_buf = etr_buf->private; + + mode = CATU_MODE_TRANSLATE; + catu_write_axictrl(drvdata, CATU_OS_AXICTRL); + catu_write_sladdr(drvdata, catu_buf->sladdr); + catu_write_inaddr(drvdata, CATU_DEFAULT_INADDR); + } else { + mode = CATU_MODE_PASS_THROUGH; + catu_write_sladdr(drvdata, 0); + catu_write_inaddr(drvdata, 0); + } + + catu_write_irqen(drvdata, 0); + catu_write_mode(drvdata, mode); catu_write_control(drvdata, control); - dev_dbg(drvdata->dev, "Enabled in Pass through mode\n"); + dev_dbg(drvdata->dev, "Enabled in %s mode\n", + (mode == CATU_MODE_PASS_THROUGH) ? + "Pass through" : + "Translate"); return 0; } diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h index 4f221fccffca..1b281f0dcccc 100644 --- a/drivers/hwtracing/coresight/coresight-catu.h +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -27,6 +27,32 @@ #define CATU_MODE_PASS_THROUGH 0U #define CATU_MODE_TRANSLATE 1U +#define CATU_AXICTRL_ARCACHE_SHIFT 4 +#define CATU_AXICTRL_ARCACHE_MASK 0xf +#define CATU_AXICTRL_ARPROT_MASK 0x3 +#define CATU_AXICTRL_ARCACHE(arcache) \ + (((arcache) & CATU_AXICTRL_ARCACHE_MASK) << CATU_AXICTRL_ARCACHE_SHIFT) + +#define CATU_AXICTRL_VAL(arcache, arprot) \ + (CATU_AXICTRL_ARCACHE(arcache) | ((arprot) & CATU_AXICTRL_ARPROT_MASK)) + +#define AXI3_AxCACHE_WB_READ_ALLOC 0x7 +/* + * AXI - ARPROT bits: + * See AMBA AXI & ACE Protocol specification (ARM IHI 0022E) + * sectionA4.7 Access Permissions. + * + * Bit 0: 0 - Unprivileged access, 1 - Privileged access + * Bit 1: 0 - Secure access, 1 - Non-secure access. + * Bit 2: 0 - Data access, 1 - instruction access. + * + * CATU AXICTRL:ARPROT[2] is res0 as we always access data. + */ +#define CATU_OS_ARPROT 0x2 + +#define CATU_OS_AXICTRL \ + CATU_AXICTRL_VAL(AXI3_AxCACHE_WB_READ_ALLOC, CATU_OS_ARPROT) + #define CATU_STATUS_READY 8 #define CATU_STATUS_ADRERR 0 #define CATU_STATUS_AXIERR 4 @@ -67,6 +93,8 @@ catu_write_##name(struct catu_drvdata *drvdata, u64 val) \ CATU_REG32(control, CATU_CONTROL); CATU_REG32(mode, CATU_MODE); +CATU_REG32(irqen, CATU_IRQEN); +CATU_REG32(axictrl, CATU_AXICTRL); CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI) CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI) @@ -81,4 +109,11 @@ static inline bool coresight_is_catu_device(struct coresight_device *csdev) return true; } +#ifdef CONFIG_CORESIGHT_CATU +extern const struct etr_buf_operations etr_catu_buf_ops; +#else +/* Dummy declaration for the CATU ops */ +static const struct etr_buf_operations etr_catu_buf_ops; +#endif + #endif diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index b48a52890417..4abe12d8af20 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -721,7 +721,7 @@ static const struct etr_buf_operations etr_sg_buf_ops = { * Returns : coresight_device ptr for the CATU device if a CATU is found. * : NULL otherwise. */ -static inline struct coresight_device * +struct coresight_device * tmc_etr_get_catu_device(struct tmc_drvdata *drvdata) { int i; @@ -744,7 +744,7 @@ static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata) struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); if (catu && helper_ops(catu)->enable) - helper_ops(catu)->enable(catu, NULL); + helper_ops(catu)->enable(catu, drvdata->etr_buf); } static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) @@ -752,12 +752,13 @@ static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); if (catu && helper_ops(catu)->disable) - helper_ops(catu)->disable(catu, NULL); + helper_ops(catu)->disable(catu, drvdata->etr_buf); } static const struct etr_buf_operations *etr_buf_ops[] = { [ETR_MODE_FLAT] = &etr_flat_buf_ops, [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, + [ETR_MODE_CATU] = &etr_catu_buf_ops, }; static inline int tmc_etr_mode_alloc_buf(int mode, @@ -765,12 +766,15 @@ static inline int tmc_etr_mode_alloc_buf(int mode, struct etr_buf *etr_buf, int node, void **pages) { - int rc; + int rc = -EINVAL; switch (mode) { case ETR_MODE_FLAT: case ETR_MODE_ETR_SG: - rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, node, pages); + case ETR_MODE_CATU: + if (etr_buf_ops[mode]->alloc) + rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, + node, pages); if (!rc) etr_buf->ops = etr_buf_ops[mode]; return rc; @@ -793,10 +797,14 @@ static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, { int rc = -ENOMEM; bool has_etr_sg, has_iommu; + bool has_sg, has_catu; struct etr_buf *etr_buf; has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG); has_iommu = iommu_get_domain_for_dev(drvdata->dev); + has_catu = !!tmc_etr_get_catu_device(drvdata); + + has_sg = has_catu || has_etr_sg; etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL); if (!etr_buf) @@ -817,17 +825,22 @@ static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, * */ if (!pages && - (!has_etr_sg || has_iommu || size < SZ_1M)) + (!has_sg || has_iommu || size < SZ_1M)) rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata, etr_buf, node, pages); if (rc && has_etr_sg) rc = tmc_etr_mode_alloc_buf(ETR_MODE_ETR_SG, drvdata, etr_buf, node, pages); + if (rc && has_catu) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_CATU, drvdata, + etr_buf, node, pages); if (rc) { kfree(etr_buf); return ERR_PTR(rc); } + dev_dbg(drvdata->dev, "allocated buffer of size %ldKB in mode %d\n", + (unsigned long)size >> 10, etr_buf->mode); return etr_buf; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 266a16e03ebf..d8cb926e17ee 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -137,6 +137,7 @@ enum tmc_mem_intf_width { enum etr_mode { ETR_MODE_FLAT, /* Uses contiguous flat buffer */ ETR_MODE_ETR_SG, /* Uses in-built TMC ETR SG mechanism */ + ETR_MODE_CATU, /* Use SG mechanism in CATU */ }; struct etr_buf_operations; @@ -314,4 +315,6 @@ tmc_sg_table_buf_size(struct tmc_sg_table *sg_table) return sg_table->data_pages.nr_pages << PAGE_SHIFT; } +struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata); + #endif From ed00fb441172cf18ad49aeab1fee544c8858ec72 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:36 -0600 Subject: [PATCH 0489/3715] UPSTREAM: coresight: Document error handling in coresight_register commit 6403587a930c ("coresight: use put_device() instead of kfree()") fixes the double freeing of resources and ensures that the device refcount is dropped properly. Add a comment to explain this to help the readers and prevent people trying to "unfix" it again. While at it, rename the labels for better readability. Cc: Mathieu Poirier Cc: Arvind Yadav Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit fac253e52fda73b6610f39716abe04dd2d919fb8). Bug: 140266694 Change-Id: Ic1ee4317af562695f4adb428111e0698cb248150 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index d208e7ffd515..e84cbebb8b9a 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -1028,7 +1028,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); if (!csdev) { ret = -ENOMEM; - goto err_kzalloc_csdev; + goto err_out; } if (desc->type == CORESIGHT_DEV_TYPE_LINK || @@ -1044,7 +1044,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); if (!refcnts) { ret = -ENOMEM; - goto err_kzalloc_refcnts; + goto err_free_csdev; } csdev->refcnt = refcnts; @@ -1057,7 +1057,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL); if (!conns) { ret = -ENOMEM; - goto err_kzalloc_conns; + goto err_free_refcnts; } for (i = 0; i < csdev->nr_outport; i++) { @@ -1084,7 +1084,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) ret = device_register(&csdev->dev); if (ret) { put_device(&csdev->dev); - goto err_kzalloc_csdev; + /* + * All resources are free'd explicitly via + * coresight_device_release(), triggered from put_device(). + */ + goto err_out; } mutex_lock(&coresight_mutex); @@ -1096,11 +1100,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) return csdev; -err_kzalloc_conns: +err_free_refcnts: kfree(refcnts); -err_kzalloc_refcnts: +err_free_csdev: kfree(csdev); -err_kzalloc_csdev: +err_out: return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(coresight_register); From 3e29f66f939ffce82c0e40fc53ebf76a4b862c85 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:37 -0600 Subject: [PATCH 0490/3715] UPSTREAM: coresight: platform: Refactor graph endpoint parsing Refactor the of graph endpoint parsing code, to make the error handling easier. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 6575fdb74645c14453f3119568ec45cbc54d4afb). Bug: 140266694 Change-Id: If271717bc321a90eb9dfaa40c3a187395991f931 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/of_coresight.c | 138 +++++++++++++-------- 1 file changed, 83 insertions(+), 55 deletions(-) diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index a18794128bf8..eb8c2bd0104e 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -127,17 +127,70 @@ int of_coresight_get_cpu(const struct device_node *node) } EXPORT_SYMBOL_GPL(of_coresight_get_cpu); +/* + * of_coresight_parse_endpoint : Parse the given output endpoint @ep + * and fill the connection information in @pdata[@i]. + * + * Parses the local port, remote device name and the remote port. + * + * Returns : + * 1 - If the parsing is successful and a connection record + * was created for an output connection. + * 0 - If the parsing completed without any fatal errors. + * -Errno - Fatal error, abort the scanning. + */ +static int of_coresight_parse_endpoint(struct device *dev, + struct device_node *ep, + struct coresight_platform_data *pdata, + int i) +{ + int ret = 0; + struct of_endpoint endpoint, rendpoint; + struct device_node *rparent = NULL; + struct device_node *rport = NULL; + struct device *rdev = NULL; + + do { + /* Parse the local port details */ + if (of_graph_parse_endpoint(ep, &endpoint)) + break; + /* + * Get a handle on the remote port and parent + * attached to it. + */ + rparent = of_graph_get_remote_port_parent(ep); + if (!rparent) + break; + rport = of_graph_get_remote_port(ep); + if (!rport) + break; + if (of_graph_parse_endpoint(rport, &rendpoint)) + break; + + /* If the remote device is not available, defer probing */ + rdev = of_coresight_get_endpoint_device(rparent); + if (!rdev) { + ret = -EPROBE_DEFER; + break; + } + + pdata->outports[i] = endpoint.port; + pdata->child_names[i] = dev_name(rdev); + pdata->child_ports[i] = rendpoint.id; + /* Connection record updated */ + ret = 1; + } while (0); + + return ret; +} + struct coresight_platform_data * of_get_coresight_platform_data(struct device *dev, const struct device_node *node) { int i = 0, ret = 0; struct coresight_platform_data *pdata; - struct of_endpoint endpoint, rendpoint; - struct device *rdev; struct device_node *ep = NULL; - struct device_node *rparent = NULL; - struct device_node *rport = NULL; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) @@ -145,64 +198,39 @@ of_get_coresight_platform_data(struct device *dev, /* Use device name as sysfs handle */ pdata->name = dev_name(dev); + pdata->cpu = of_coresight_get_cpu(node); /* Get the number of input and output port for this component */ of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport); - if (pdata->nr_outport) { - ret = of_coresight_alloc_memory(dev, pdata); - if (ret) + /* If there are no output connections, we are done */ + if (!pdata->nr_outport) + return pdata; + + ret = of_coresight_alloc_memory(dev, pdata); + if (ret) + return ERR_PTR(ret); + + /* Iterate through each port to discover topology */ + while ((ep = of_graph_get_next_endpoint(node, ep))) { + /* + * No need to deal with input ports, as processing the + * output ports connected to them will process the details. + */ + if (of_find_property(ep, "slave-mode", NULL)) + continue; + + ret = of_coresight_parse_endpoint(dev, ep, pdata, i); + switch (ret) { + case 1: + i++; /* Fall through */ + case 0: + break; + default: return ERR_PTR(ret); - - /* Iterate through each port to discover topology */ - do { - /* Get a handle on a port */ - ep = of_graph_get_next_endpoint(node, ep); - if (!ep) - break; - - /* - * No need to deal with input ports, processing for as - * processing for output ports will deal with them. - */ - if (of_find_property(ep, "slave-mode", NULL)) - continue; - - /* Get a handle on the local endpoint */ - ret = of_graph_parse_endpoint(ep, &endpoint); - - if (ret) - continue; - - /* The local out port number */ - pdata->outports[i] = endpoint.port; - - /* - * Get a handle on the remote port and parent - * attached to it. - */ - rparent = of_graph_get_remote_port_parent(ep); - rport = of_graph_get_remote_port(ep); - - if (!rparent || !rport) - continue; - - if (of_graph_parse_endpoint(rport, &rendpoint)) - continue; - - rdev = of_coresight_get_endpoint_device(rparent); - if (!rdev) - return ERR_PTR(-EPROBE_DEFER); - - pdata->child_names[i] = dev_name(rdev); - pdata->child_ports[i] = rendpoint.id; - - i++; - } while (ep); + } } - pdata->cpu = of_coresight_get_cpu(node); - return pdata; } EXPORT_SYMBOL_GPL(of_get_coresight_platform_data); From c155acd987ebf6f136fd7f7b564ed134c1af3ba5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:38 -0600 Subject: [PATCH 0491/3715] UPSTREAM: coresight: platform: Fix refcounting for graph nodes The coresight driver doesn't drop the references on the remote endpoint/port nodes. Add the missing of_node_put() calls. Reported-by: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit a0f9992c809fb73a05de1894734418a88178539f). Bug: 140266694 Change-Id: I40343d76a90d9e6e86f65c27a6b7ff7b44b24e33 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/of_coresight.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index eb8c2bd0104e..02171d31b511 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -181,6 +181,11 @@ static int of_coresight_parse_endpoint(struct device *dev, ret = 1; } while (0); + if (rparent) + of_node_put(rparent); + if (rport) + of_node_put(rport); + return ret; } From 2985337ac82ff3dcccc3e759432ee8833b522d58 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:39 -0600 Subject: [PATCH 0492/3715] UPSTREAM: coresight: platform: Fix leaking device reference We don't drop the reference on the remote device while parsing the connection, held by bus_find_device(). Fix this by duplicating the device name and dropping the reference. Cc: Mathieu Poirier Cc: Kim Phillips Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 5111e749c775ebae6f7d39c6f836cb3f06c7b938). Bug: 140266694 Change-Id: I99c2664ba29a88250db8f03c2d22dea59f29ded3 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/of_coresight.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index 02171d31b511..00350062ec68 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -175,7 +175,9 @@ static int of_coresight_parse_endpoint(struct device *dev, } pdata->outports[i] = endpoint.port; - pdata->child_names[i] = dev_name(rdev); + pdata->child_names[i] = devm_kstrdup(dev, + dev_name(rdev), + GFP_KERNEL); pdata->child_ports[i] = rendpoint.id; /* Connection record updated */ ret = 1; @@ -185,6 +187,8 @@ static int of_coresight_parse_endpoint(struct device *dev, of_node_put(rparent); if (rport) of_node_put(rport); + if (rdev) + put_device(rdev); return ret; } From 7b667e20f57635ab1d4b5eb4afc6a79b39340ebd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:40 -0600 Subject: [PATCH 0493/3715] UPSTREAM: coresight: Fix remote endpoint parsing When parsing the remote endpoint of an output port, we do : rport = of_graph_get_remote_port(ep); rparent = of_graph_get_remote_port_parent(ep); and then parse the "remote_port" as if it was the remote endpoint, which is wrong. The code worked fine because we used endpoint number as the port number. Let us fix it and optimise a bit as: remote_ep = of_graph_get_remote_endpoint(ep); if (remote_ep) remote_parent = of_graph_get_port_parent(remote_ep); and then, parse the remote_ep for the port/endpoint details. Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 96330407f86abe8e5f0594734cff28b6196c94d7). Bug: 140266694 Change-Id: Ie617d59d8a945689f7c33d84e16db6b113c36443 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/of_coresight.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index 00350062ec68..f6941fda9777 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -147,7 +147,7 @@ static int of_coresight_parse_endpoint(struct device *dev, int ret = 0; struct of_endpoint endpoint, rendpoint; struct device_node *rparent = NULL; - struct device_node *rport = NULL; + struct device_node *rep = NULL; struct device *rdev = NULL; do { @@ -155,16 +155,16 @@ static int of_coresight_parse_endpoint(struct device *dev, if (of_graph_parse_endpoint(ep, &endpoint)) break; /* - * Get a handle on the remote port and parent - * attached to it. + * Get a handle on the remote endpoint and the device it is + * attached to. */ - rparent = of_graph_get_remote_port_parent(ep); + rep = of_graph_get_remote_endpoint(ep); + if (!rep) + break; + rparent = of_graph_get_port_parent(rep); if (!rparent) break; - rport = of_graph_get_remote_port(ep); - if (!rport) - break; - if (of_graph_parse_endpoint(rport, &rendpoint)) + if (of_graph_parse_endpoint(rep, &rendpoint)) break; /* If the remote device is not available, defer probing */ @@ -178,15 +178,15 @@ static int of_coresight_parse_endpoint(struct device *dev, pdata->child_names[i] = devm_kstrdup(dev, dev_name(rdev), GFP_KERNEL); - pdata->child_ports[i] = rendpoint.id; + pdata->child_ports[i] = rendpoint.port; /* Connection record updated */ ret = 1; } while (0); if (rparent) of_node_put(rparent); - if (rport) - of_node_put(rport); + if (rep) + of_node_put(rep); if (rdev) put_device(rdev); From ef59abefcb4e573b8f0eabc456aebb501afc7c64 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 20 Sep 2018 13:17:44 -0600 Subject: [PATCH 0494/3715] UPSTREAM: coresight: Use ERR_CAST instead of ERR_PTR Use ERR_CAT inlined function to replace the ERR_PTR(PTR_ERR). It make the code more concise. Signed-off-by: zhong jiang Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit bbd35ba6fab5419e58e96f35f1431f13bdc14f98). Bug: 140266694 Change-Id: I0556f3a4f75175c96498665d5f47060b9a9b14d1 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 4abe12d8af20..7843fd4bd9bc 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -547,7 +547,7 @@ tmc_init_etr_sg_table(struct device *dev, int node, sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages); if (IS_ERR(sg_table)) { kfree(etr_table); - return ERR_PTR(PTR_ERR(sg_table)); + return ERR_CAST(sg_table); } etr_table->sg_table = sg_table; From fa79da768196003e3fe769d293e804613563ccce Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:45 -0600 Subject: [PATCH 0495/3715] UPSTREAM: coresight: Fix handling of sinks The coresight components could be operated either in sysfs mode or in perf mode. For some of the components, the mode of operation doesn't matter as they simply relay the data to the next component in the trace path. But for sinks, they need to be able to provide the trace data back to the user. Thus we need to make sure that "mode" is handled appropriately. e.g, the sysfs mode could have multiple sources driving the trace data, while perf mode doesn't allow sharing the sink. The coresight_enable_sink() however doesn't really allow this check to trigger as it skips the "enable_sink" callback if the component is already enabled, irrespective of the mode. This could cause mixing of data from different modes or even same mode (in perf), if the sources are different. Also, if we fail to enable the sink while enabling a path (where sink is the first component enabled), we could end up in disabling the components in the "entire" path which were not enabled in this trial, causing disruptions in the existing trace paths. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit c71369de02b285d9da526a526d8f2affc7b17c59). Bug: 140266694 Change-Id: I12733e459c4adafdb0d161327416281adfc964ff Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index e84cbebb8b9a..f3e7a745bf33 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -139,12 +139,14 @@ static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) { int ret; - if (!csdev->enable) { - if (sink_ops(csdev)->enable) { - ret = sink_ops(csdev)->enable(csdev, mode); - if (ret) - return ret; - } + /* + * We need to make sure the "new" session is compatible with the + * existing "mode" of operation. + */ + if (sink_ops(csdev)->enable) { + ret = sink_ops(csdev)->enable(csdev, mode); + if (ret) + return ret; csdev->enable = true; } @@ -346,8 +348,14 @@ int coresight_enable_path(struct list_head *path, u32 mode) switch (type) { case CORESIGHT_DEV_TYPE_SINK: ret = coresight_enable_sink(csdev, mode); + /* + * Sink is the first component turned on. If we + * failed to enable the sink, there are no components + * that need disabling. Disabling the path here + * would mean we could disrupt an existing session. + */ if (ret) - goto err; + goto out; break; case CORESIGHT_DEV_TYPE_SOURCE: /* sources are enabled from either sysFS or Perf */ From ff3e10988db0dc2fcbfdfe13ca3003aca4ba75fc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:47 -0600 Subject: [PATCH 0496/3715] UPSTREAM: coresight: perf: Fix per cpu path management We create a coresight trace path for each online CPU when we start the event. We rely on the number of online CPUs and then go on to allocate an array matching the "number of online CPUs" for holding the path and then uses normal CPU id as the index to the array. This is problematic as we could have some offline CPUs causing us to access beyond the actual array size (e.g, on a dual SMP system, if CPU0 is offline, CPU1 could be really accessing beyond the array). The solution is to switch to per-cpu array for holding the path. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 5ecabe4a76e8cdb61fa3e24862d9ca240a1c4ddf). Bug: 140266694 Change-Id: Iefbd55ef00149165558749af67dcef5756be2046 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 55 ++++++++++++++----- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8a0ad77574e7..99cbf5d5d1c1 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -44,7 +45,7 @@ struct etm_event_data { struct work_struct work; cpumask_t mask; void *snk_config; - struct list_head **path; + struct list_head * __percpu *path; }; static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); @@ -72,6 +73,18 @@ static const struct attribute_group *etm_pmu_attr_groups[] = { NULL, }; +static inline struct list_head ** +etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) +{ + return per_cpu_ptr(data->path, cpu); +} + +static inline struct list_head * +etm_event_cpu_path(struct etm_event_data *data, int cpu) +{ + return *etm_event_cpu_path_ptr(data, cpu); +} + static void etm_event_read(struct perf_event *event) {} static int etm_addr_filters_alloc(struct perf_event *event) @@ -131,23 +144,26 @@ static void free_event_data(struct work_struct *work) */ if (event_data->snk_config) { cpu = cpumask_first(mask); - sink = coresight_get_sink(event_data->path[cpu]); + sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); if (sink_ops(sink)->free_buffer) sink_ops(sink)->free_buffer(event_data->snk_config); } for_each_cpu(cpu, mask) { - if (!(IS_ERR_OR_NULL(event_data->path[cpu]))) - coresight_release_path(event_data->path[cpu]); + struct list_head **ppath; + + ppath = etm_event_cpu_path_ptr(event_data, cpu); + if (!(IS_ERR_OR_NULL(*ppath))) + coresight_release_path(*ppath); + *ppath = NULL; } - kfree(event_data->path); + free_percpu(event_data->path); kfree(event_data); } static void *alloc_event_data(int cpu) { - int size; cpumask_t *mask; struct etm_event_data *event_data; @@ -158,7 +174,6 @@ static void *alloc_event_data(int cpu) /* Make sure nothing disappears under us */ get_online_cpus(); - size = num_online_cpus(); mask = &event_data->mask; if (cpu != -1) @@ -175,8 +190,8 @@ static void *alloc_event_data(int cpu) * unused memory when dealing with single CPU trace scenarios is small * compared to the cost of searching through an optimized array. */ - event_data->path = kcalloc(size, - sizeof(struct list_head *), GFP_KERNEL); + event_data->path = alloc_percpu(struct list_head *); + if (!event_data->path) { kfree(event_data); return NULL; @@ -224,6 +239,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, /* Setup the path for each CPU in a trace session */ for_each_cpu(cpu, mask) { + struct list_head *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); @@ -235,9 +251,11 @@ static void *etm_setup_aux(int event_cpu, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev, sink); - if (IS_ERR(event_data->path[cpu])) + path = coresight_build_path(csdev, sink); + if (IS_ERR(path)) goto err; + + *etm_event_cpu_path_ptr(event_data, cpu) = path; } if (!sink_ops(sink)->alloc_buffer) @@ -266,6 +284,7 @@ static void etm_event_start(struct perf_event *event, int flags) struct etm_event_data *event_data; struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); + struct list_head *path; if (!csdev) goto fail; @@ -278,8 +297,9 @@ static void etm_event_start(struct perf_event *event, int flags) if (!event_data) goto fail; + path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ - sink = coresight_get_sink(event_data->path[cpu]); + sink = coresight_get_sink(path); if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) goto fail_end_stop; @@ -289,7 +309,7 @@ static void etm_event_start(struct perf_event *event, int flags) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF)) + if (coresight_enable_path(path, CS_MODE_PERF)) goto fail_end_stop; /* Tell the perf core the event is alive */ @@ -317,6 +337,7 @@ static void etm_event_stop(struct perf_event *event, int mode) struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct etm_event_data *event_data = perf_get_aux(handle); + struct list_head *path; if (event->hw.state == PERF_HES_STOPPED) return; @@ -324,7 +345,11 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!csdev) return; - sink = coresight_get_sink(event_data->path[cpu]); + path = etm_event_cpu_path(event_data, cpu); + if (!path) + return; + + sink = coresight_get_sink(path); if (!sink) return; @@ -355,7 +380,7 @@ static void etm_event_stop(struct perf_event *event, int mode) } /* Disabling the path make its elements available to other sessions */ - coresight_disable_path(event_data->path[cpu]); + coresight_disable_path(path); } static int etm_event_add(struct perf_event *event, int mode) From a7f32ea06c7eb4e49f4fc922373ca75f1cfa19b9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:48 -0600 Subject: [PATCH 0497/3715] UPSTREAM: coresight: perf: Avoid unncessary CPU hotplug read lock We hold the read lock on CPU hotplug to simply copy the online mask, which is not really needed. And this can cause a lockdep warning, like : [ 54.632093] ====================================================== [ 54.638207] WARNING: possible circular locking dependency detected [ 54.644322] 4.18.0-rc3-00042-g2d39e6356bb7-dirty #309 Not tainted [ 54.650350] ------------------------------------------------------ [ 54.656464] perf/2862 is trying to acquire lock: [ 54.661031] 000000007e21d170 (&event->mmap_mutex){+.+.}, at: perf_event_set_output+0x98/0x138 [ 54.669486] [ 54.669486] but task is already holding lock: [ 54.675256] 000000001080eb1b (&cpuctx_mutex){+.+.}, at: perf_event_ctx_lock_nested+0xf8/0x1f0 [ 54.683704] [ 54.683704] which lock already depends on the new lock. [ 54.683704] [ 54.691797] [ 54.691797] the existing dependency chain (in reverse order) is: [ 54.699201] [ 54.699201] -> #3 (&cpuctx_mutex){+.+.}: [ 54.704556] __mutex_lock+0x70/0x808 [ 54.708608] mutex_lock_nested+0x1c/0x28 [ 54.713005] perf_event_init_cpu+0x8c/0xd8 [ 54.717574] perf_event_init+0x194/0x1d4 [ 54.721971] start_kernel+0x2b8/0x42c [ 54.726107] [ 54.726107] -> #2 (pmus_lock){+.+.}: [ 54.731114] __mutex_lock+0x70/0x808 [ 54.735165] mutex_lock_nested+0x1c/0x28 [ 54.739560] perf_event_init_cpu+0x30/0xd8 [ 54.744129] cpuhp_invoke_callback+0x84/0x248 [ 54.748954] _cpu_up+0xe8/0x1c8 [ 54.752576] do_cpu_up+0xa8/0xc8 [ 54.756283] cpu_up+0x10/0x18 [ 54.759731] smp_init+0xa0/0x114 [ 54.763438] kernel_init_freeable+0x120/0x288 [ 54.768264] kernel_init+0x10/0x108 [ 54.772230] ret_from_fork+0x10/0x18 [ 54.776279] [ 54.776279] -> #1 (cpu_hotplug_lock.rw_sem){++++}: [ 54.782492] cpus_read_lock+0x34/0xb0 [ 54.786631] etm_setup_aux+0x5c/0x308 [ 54.790769] rb_alloc_aux+0x1ec/0x300 [ 54.794906] perf_mmap+0x284/0x610 [ 54.798787] mmap_region+0x388/0x570 [ 54.802838] do_mmap+0x344/0x4f8 [ 54.806544] vm_mmap_pgoff+0xe4/0x110 [ 54.810682] ksys_mmap_pgoff+0xa8/0x240 [ 54.814992] sys_mmap+0x18/0x28 [ 54.818613] el0_svc_naked+0x30/0x34 [ 54.822661] [ 54.822661] -> #0 (&event->mmap_mutex){+.+.}: [ 54.828445] lock_acquire+0x48/0x68 [ 54.832409] __mutex_lock+0x70/0x808 [ 54.836459] mutex_lock_nested+0x1c/0x28 [ 54.840855] perf_event_set_output+0x98/0x138 [ 54.845680] _perf_ioctl+0x2a0/0x6a0 [ 54.849731] perf_ioctl+0x3c/0x68 [ 54.853526] do_vfs_ioctl+0xb8/0xa20 [ 54.857577] ksys_ioctl+0x80/0xb8 [ 54.861370] sys_ioctl+0xc/0x18 [ 54.864990] el0_svc_naked+0x30/0x34 [ 54.869039] [ 54.869039] other info that might help us debug this: [ 54.869039] [ 54.876960] Chain exists of: [ 54.876960] &event->mmap_mutex --> pmus_lock --> &cpuctx_mutex [ 54.876960] [ 54.887217] Possible unsafe locking scenario: [ 54.887217] [ 54.893073] CPU0 CPU1 [ 54.897552] ---- ---- [ 54.902030] lock(&cpuctx_mutex); [ 54.905396] lock(pmus_lock); [ 54.910911] lock(&cpuctx_mutex); [ 54.916770] lock(&event->mmap_mutex); [ 54.920566] [ 54.920566] *** DEADLOCK *** [ 54.920566] [ 54.926424] 1 lock held by perf/2862: [ 54.930042] #0: 000000001080eb1b (&cpuctx_mutex){+.+.}, at: perf_event_ctx_lock_nested+0xf8/0x1f0 Since we have per-cpu array for the paths, we simply don't care about the number of online CPUs. This patch gets rid of the {get/put}_online_cpus(). Reported-by: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit c48fb3bbe912a295e5b75eaabaf39874d5b9b773). Bug: 140266694 Change-Id: Iec5bb31efa37ee54fa0140dde6486758708879e2 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 99cbf5d5d1c1..63b59037d210 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -172,15 +172,12 @@ static void *alloc_event_data(int cpu) if (!event_data) return NULL; - /* Make sure nothing disappears under us */ - get_online_cpus(); mask = &event_data->mask; if (cpu != -1) cpumask_set_cpu(cpu, mask); else cpumask_copy(mask, cpu_online_mask); - put_online_cpus(); /* * Each CPU has a single path between source and destination. As such From 46baedaba139f99d522c64d3102f93bfc5801704 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:49 -0600 Subject: [PATCH 0498/3715] UPSTREAM: coresight: perf: Allow tracing on hotplugged CPUs At the moment, if there is no CPU specified for a given event, we use cpu_online_mask and try to build path for each of the CPUs in the mask. This could prevent any CPU that is turned online later to be used for the tracing. This patch changes to use the cpu_present_mask and tries to build path for as much CPUs as possible ignoring the failures in building path for some of the CPUs. If ever we try to trace on those CPUs, we fail the operation. Based on a patch from Mathieu Poirier. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit f9d81a657bb833ef030a092c50230359dfef4648). Bug: 140266694 Change-Id: I8b93954ce954c1ffd973ff0eeb6290385679e56b Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 63b59037d210..a2d77ec00769 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -138,11 +138,9 @@ static void free_event_data(struct work_struct *work) event_data = container_of(work, struct etm_event_data, work); mask = &event_data->mask; - /* - * First deal with the sink configuration. See comment in - * etm_setup_aux() about why we take the first available path. - */ - if (event_data->snk_config) { + + /* Free the sink buffers, if there are any */ + if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) { cpu = cpumask_first(mask); sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); if (sink_ops(sink)->free_buffer) @@ -177,7 +175,7 @@ static void *alloc_event_data(int cpu) if (cpu != -1) cpumask_set_cpu(cpu, mask); else - cpumask_copy(mask, cpu_online_mask); + cpumask_copy(mask, cpu_present_mask); /* * Each CPU has a single path between source and destination. As such @@ -229,19 +227,32 @@ static void *etm_setup_aux(int event_cpu, void **pages, * on the cmd line. As such the "enable_sink" flag in sysFS is reset. */ sink = coresight_get_enabled_sink(true); - if (!sink) + if (!sink || !sink_ops(sink)->alloc_buffer) goto err; mask = &event_data->mask; - /* Setup the path for each CPU in a trace session */ + /* + * Setup the path for each CPU in a trace session. We try to build + * trace path for each CPU in the mask. If we don't find an ETM + * for the CPU or fail to build a path, we clear the CPU from the + * mask and continue with the rest. If ever we try to trace on those + * CPUs, we can handle it and fail the session. + */ for_each_cpu(cpu, mask) { struct list_head *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); - if (!csdev) - goto err; + /* + * If there is no ETM associated with this CPU clear it from + * the mask and continue with the rest. If ever we try to trace + * on this CPU, we handle it accordingly. + */ + if (!csdev) { + cpumask_clear_cpu(cpu, mask); + continue; + } /* * Building a path doesn't enable it, it simply builds a @@ -249,17 +260,20 @@ static void *etm_setup_aux(int event_cpu, void **pages, * referenced later when the path is actually needed. */ path = coresight_build_path(csdev, sink); - if (IS_ERR(path)) - goto err; + if (IS_ERR(path)) { + cpumask_clear_cpu(cpu, mask); + continue; + } *etm_event_cpu_path_ptr(event_data, cpu) = path; } - if (!sink_ops(sink)->alloc_buffer) + /* If we don't have any CPUs ready for tracing, abort */ + cpu = cpumask_first(mask); + if (cpu >= nr_cpu_ids) goto err; - cpu = cpumask_first(mask); - /* Get the AUX specific data from the sink buffer */ + /* Allocate the sink buffer for this session */ event_data->snk_config = sink_ops(sink)->alloc_buffer(sink, cpu, pages, nr_pages, overwrite); From d249b39ebf6294f289ec8fba1ed3731557f14630 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:50 -0600 Subject: [PATCH 0499/3715] UPSTREAM: coresight: perf: Disable trace path upon source error We enable the trace path, before activating the source. If we fail to enable the source, we must disable the path to make sure it is available for another session. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 4f8ef21007531c3d7cb5b826e7b2c8999b65ecae). Bug: 140266694 Change-Id: Icb4074ae4ae5a4836b752591a56d8ce2de709597 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index a2d77ec00769..ad85f4913b14 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -328,11 +328,13 @@ static void etm_event_start(struct perf_event *event, int flags) /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) - goto fail_end_stop; + goto fail_disable_path; out: return; +fail_disable_path: + coresight_disable_path(path); fail_end_stop: perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); perf_aux_output_end(handle, 0); From 2bac167126359f2189ffd134bd4d47e2f8b6b5fb Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:51 -0600 Subject: [PATCH 0500/3715] UPSTREAM: coresight: tmc-etr: Handle driver mode specific ETR buffers Since the ETR could be driven either by SYSFS or by perf, it becomes complicated how we deal with the buffers used for each of these modes. The ETR driver cannot simply free the current attached buffer without knowing the provider (i.e, sysfs vs perf). To solve this issue, we provide: 1) the driver-mode specific etr buffer to be retained in the drvdata 2) the etr_buf for a session should be passed on when enabling the hardware, which will be stored in drvdata->etr_buf. This will be replaced (not free'd) as soon as the hardware is disabled, after necessary sync operation. The advantages of this are : 1) The common code path doesn't need to worry about how to dispose an existing buffer, if it is about to start a new session with a different buffer, possibly in a different mode. 2) The driver mode can control its buffers and can get access to the saved session even when the hardware is operating in a different mode. (e.g, we can still access a trace buffer from a sysfs mode even if the etr is now used in perf mode, without disrupting the current session.) Towards this, we introduce a sysfs specific data which will hold the etr_buf used for sysfs mode of operation, controlled solely by the sysfs mode handling code. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 96a7f644006ecc05eaaa1a5d09373d0ee63beb0a). Bug: 140266694 Change-Id: Id666201f5cd46cfb82e17fc1ccc2e2b8e1fc9dd5 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 58 ++++++++++++------- drivers/hwtracing/coresight/coresight-tmc.h | 2 + 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 7843fd4bd9bc..cc73c609bb24 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -906,10 +906,15 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); } -static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) +static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf) { u32 axictl, sts; - struct etr_buf *etr_buf = drvdata->etr_buf; + + /* Callers should provide an appropriate buffer for use */ + if (WARN_ON(!etr_buf || drvdata->etr_buf)) + return; + drvdata->etr_buf = etr_buf; /* * If this ETR is connected to a CATU, enable it before we turn @@ -971,13 +976,16 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) * also updating the @bufpp on where to find it. Since the trace data * starts at anywhere in the buffer, depending on the RRP, we adjust the * @len returned to handle buffer wrapping around. + * + * We are protected here by drvdata->reading != 0, which ensures the + * sysfs_buf stays alive. */ ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, loff_t pos, size_t len, char **bufpp) { s64 offset; ssize_t actual = len; - struct etr_buf *etr_buf = drvdata->etr_buf; + struct etr_buf *etr_buf = drvdata->sysfs_buf; if (pos + actual > etr_buf->len) actual = etr_buf->len - pos; @@ -1007,7 +1015,14 @@ tmc_etr_free_sysfs_buf(struct etr_buf *buf) static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata) { - tmc_sync_etr_buf(drvdata); + struct etr_buf *etr_buf = drvdata->etr_buf; + + if (WARN_ON(drvdata->sysfs_buf != etr_buf)) { + tmc_etr_free_sysfs_buf(drvdata->sysfs_buf); + drvdata->sysfs_buf = NULL; + } else { + tmc_sync_etr_buf(drvdata); + } } static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) @@ -1028,6 +1043,8 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) /* Disable CATU device if this ETR is connected to one */ tmc_etr_disable_catu(drvdata); + /* Reset the ETR buf used by hardware */ + drvdata->etr_buf = NULL; } static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) @@ -1035,7 +1052,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - struct etr_buf *new_buf = NULL, *free_buf = NULL; + struct etr_buf *sysfs_buf = NULL, *new_buf = NULL, *free_buf = NULL; /* @@ -1047,7 +1064,8 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) * with the lock released. */ spin_lock_irqsave(&drvdata->spinlock, flags); - if (!drvdata->etr_buf || (drvdata->etr_buf->size != drvdata->size)) { + sysfs_buf = READ_ONCE(drvdata->sysfs_buf); + if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) { spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Allocate memory with the locks released */ @@ -1076,14 +1094,14 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) * If we don't have a buffer or it doesn't match the requested size, * use the buffer allocated above. Otherwise reuse the existing buffer. */ - if (!drvdata->etr_buf || - (new_buf && drvdata->etr_buf->size != new_buf->size)) { - free_buf = drvdata->etr_buf; - drvdata->etr_buf = new_buf; + sysfs_buf = READ_ONCE(drvdata->sysfs_buf); + if (!sysfs_buf || (new_buf && sysfs_buf->size != new_buf->size)) { + free_buf = sysfs_buf; + drvdata->sysfs_buf = new_buf; } drvdata->mode = CS_MODE_SYSFS; - tmc_etr_enable_hw(drvdata); + tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1168,13 +1186,13 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - /* If drvdata::etr_buf is NULL the trace data has been read already */ - if (drvdata->etr_buf == NULL) { + /* If sysfs_buf is NULL the trace data has been read already */ + if (!drvdata->sysfs_buf) { ret = -EINVAL; goto out; } - /* Disable the TMC if need be */ + /* Disable the TMC if we are trying to read from a running session */ if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_disable_hw(drvdata); @@ -1188,7 +1206,7 @@ out: int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) { unsigned long flags; - struct etr_buf *etr_buf = NULL; + struct etr_buf *sysfs_buf = NULL; /* config types are set a boot time and never change */ if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR)) @@ -1203,22 +1221,22 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) * buffer. Since the tracer is still enabled drvdata::buf can't * be NULL. */ - tmc_etr_enable_hw(drvdata); + tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); } else { /* * The ETR is not tracing and the buffer was just read. * As such prepare to free the trace buffer. */ - etr_buf = drvdata->etr_buf; - drvdata->etr_buf = NULL; + sysfs_buf = drvdata->sysfs_buf; + drvdata->sysfs_buf = NULL; } drvdata->reading = false; spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free allocated memory out side of the spinlock */ - if (etr_buf) - tmc_free_etr_buf(etr_buf); + if (sysfs_buf) + tmc_etr_free_sysfs_buf(sysfs_buf); return 0; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index d8cb926e17ee..84d82c7c5045 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -181,6 +181,7 @@ struct etr_buf { * @trigger_cntr: amount of words to store after a trigger. * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the * device configuration register (DEVID) + * @sysfs_data: SYSFS buffer for ETR. */ struct tmc_drvdata { void __iomem *base; @@ -200,6 +201,7 @@ struct tmc_drvdata { enum tmc_mem_intf_width memwidth; u32 trigger_cntr; u32 etr_caps; + struct etr_buf *sysfs_buf; }; struct etr_buf_operations { From 4a9e6e4b9f8b613726d17a51b6a1a8651d3c2f73 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:52 -0600 Subject: [PATCH 0501/3715] UPSTREAM: coresight: tmc-etr: Relax collection of trace from sysfs mode Since the ETR now uses mode specific buffers, we can reliably provide the trace data captured in sysfs mode, even when the ETR is operating in PERF mode. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit cad5f8d399bbe3c4ed61a21f649d61b09f6efb7b). Bug: 140266694 Change-Id: Ic7ac02c4d80fe00b469d05b1abe779f9f177362b Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index cc73c609bb24..5239948822a9 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1180,19 +1180,17 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - /* Don't interfere if operated from Perf */ - if (drvdata->mode == CS_MODE_PERF) { - ret = -EINVAL; - goto out; - } - - /* If sysfs_buf is NULL the trace data has been read already */ + /* + * We can safely allow reads even if the ETR is operating in PERF mode, + * since the sysfs session is captured in mode specific data. + * If drvdata::sysfs_data is NULL the trace data has been read already. + */ if (!drvdata->sysfs_buf) { ret = -EINVAL; goto out; } - /* Disable the TMC if we are trying to read from a running session */ + /* Disable the TMC if we are trying to read from a running session. */ if (drvdata->mode == CS_MODE_SYSFS) tmc_etr_disable_hw(drvdata); From ee924619fadee2802e40657e150334ca9ead4dd3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:53 -0600 Subject: [PATCH 0502/3715] UPSTREAM: coresight: Convert driver messages to dev_dbg Convert component enable/disable messages from dev_info to dev_dbg. When used with perf, the components in the paths are enabled/disabled during each schedule of the run, which can flood the dmesg with these messages. Moreover, they are only useful for debug purposes. So, convert such messages to dev_dbg() which can be turned on as needed. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 41a75cdde7351a902066bf7ddf44e0a27996f13c). Bug: 140266694 Change-Id: Id69592dc7c90068fd008a4673c39d3927248a03f Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-dynamic-replicator.c | 4 ++-- drivers/hwtracing/coresight/coresight-etb10.c | 6 +++--- drivers/hwtracing/coresight/coresight-etm3x.c | 4 ++-- drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++-- drivers/hwtracing/coresight/coresight-funnel.c | 4 ++-- drivers/hwtracing/coresight/coresight-replicator.c | 4 ++-- drivers/hwtracing/coresight/coresight-stm.c | 4 ++-- drivers/hwtracing/coresight/coresight-tmc-etf.c | 8 ++++---- drivers/hwtracing/coresight/coresight-tmc-etr.c | 4 ++-- drivers/hwtracing/coresight/coresight-tmc.c | 4 ++-- drivers/hwtracing/coresight/coresight-tpiu.c | 4 ++-- 11 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index bfc65067505f..fc9e48f5d92e 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -64,7 +64,7 @@ static int replicator_enable(struct coresight_device *csdev, int inport, CS_LOCK(drvdata->base); - dev_info(drvdata->dev, "REPLICATOR enabled\n"); + dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); return 0; } @@ -83,7 +83,7 @@ static void replicator_disable(struct coresight_device *csdev, int inport, CS_LOCK(drvdata->base); - dev_info(drvdata->dev, "REPLICATOR disabled\n"); + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 3d708d24c37a..fff90bda5493 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -168,7 +168,7 @@ static int etb_enable(struct coresight_device *csdev, u32 mode) spin_unlock_irqrestore(&drvdata->spinlock, flags); out: - dev_info(drvdata->dev, "ETB enabled\n"); + dev_dbg(drvdata->dev, "ETB enabled\n"); return 0; } @@ -274,7 +274,7 @@ static void etb_disable(struct coresight_device *csdev) local_set(&drvdata->mode, CS_MODE_DISABLED); - dev_info(drvdata->dev, "ETB disabled\n"); + dev_dbg(drvdata->dev, "ETB disabled\n"); } static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, @@ -517,7 +517,7 @@ static void etb_dump(struct etb_drvdata *drvdata) } spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "ETB dumped\n"); + dev_dbg(drvdata->dev, "ETB dumped\n"); } static int etb_open(struct inode *inode, struct file *file) diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index e5b1ec57dbde..aa8a2b076ad4 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -510,7 +510,7 @@ static int etm_enable_sysfs(struct coresight_device *csdev) drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "ETM tracing enabled\n"); + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return 0; err: @@ -613,7 +613,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); - dev_info(drvdata->dev, "ETM tracing disabled\n"); + dev_dbg(drvdata->dev, "ETM tracing disabled\n"); } static void etm_disable(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index b0141ba7b741..d74b17f87de7 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -281,7 +281,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "ETM tracing enabled\n"); + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return 0; err: @@ -398,7 +398,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); - dev_info(drvdata->dev, "ETM tracing disabled\n"); + dev_dbg(drvdata->dev, "ETM tracing disabled\n"); } static void etm4_disable(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 5b273ffd71f5..b1e74fa79635 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -72,7 +72,7 @@ static int funnel_enable(struct coresight_device *csdev, int inport, funnel_enable_hw(drvdata, inport); - dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport); + dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); return 0; } @@ -96,7 +96,7 @@ static void funnel_disable(struct coresight_device *csdev, int inport, funnel_disable_hw(drvdata, inport); - dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport); + dev_dbg(drvdata->dev, "FUNNEL inport %d disabled\n", inport); } static const struct coresight_ops_link funnel_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 3756e71cb8f5..4f7781203fd4 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -42,7 +42,7 @@ static int replicator_enable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_info(drvdata->dev, "REPLICATOR enabled\n"); + dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); return 0; } @@ -51,7 +51,7 @@ static void replicator_disable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_info(drvdata->dev, "REPLICATOR disabled\n"); + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 92a780a6df1d..696455891ec4 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -218,7 +218,7 @@ static int stm_enable(struct coresight_device *csdev, stm_enable_hw(drvdata); spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "STM tracing enabled\n"); + dev_dbg(drvdata->dev, "STM tracing enabled\n"); return 0; } @@ -281,7 +281,7 @@ static void stm_disable(struct coresight_device *csdev, pm_runtime_put(drvdata->dev); local_set(&drvdata->mode, CS_MODE_DISABLED); - dev_info(drvdata->dev, "STM tracing disabled\n"); + dev_dbg(drvdata->dev, "STM tracing disabled\n"); } } diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index f30e5d8d3454..d6c410d61058 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -244,7 +244,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) if (ret) return ret; - dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n"); + dev_dbg(drvdata->dev, "TMC-ETB/ETF enabled\n"); return 0; } @@ -267,7 +267,7 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETB/ETF disabled\n"); + dev_dbg(drvdata->dev, "TMC-ETB/ETF disabled\n"); } static int tmc_enable_etf_link(struct coresight_device *csdev, @@ -286,7 +286,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, drvdata->mode = CS_MODE_SYSFS; spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETF enabled\n"); + dev_dbg(drvdata->dev, "TMC-ETF enabled\n"); return 0; } @@ -306,7 +306,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETF disabled\n"); + dev_dbg(drvdata->dev, "TMC-ETF disabled\n"); } static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 5239948822a9..472fcc483af3 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1110,7 +1110,7 @@ out: tmc_etr_free_sysfs_buf(free_buf); if (!ret) - dev_info(drvdata->dev, "TMC-ETR enabled\n"); + dev_dbg(drvdata->dev, "TMC-ETR enabled\n"); return ret; } @@ -1153,7 +1153,7 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETR disabled\n"); + dev_dbg(drvdata->dev, "TMC-ETR disabled\n"); } static const struct coresight_ops_sink tmc_etr_sink_ops = { diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 7c138b080844..dd7fb2782722 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -89,7 +89,7 @@ static int tmc_read_prepare(struct tmc_drvdata *drvdata) } if (!ret) - dev_info(drvdata->dev, "TMC read start\n"); + dev_dbg(drvdata->dev, "TMC read start\n"); return ret; } @@ -111,7 +111,7 @@ static int tmc_read_unprepare(struct tmc_drvdata *drvdata) } if (!ret) - dev_info(drvdata->dev, "TMC read end\n"); + dev_dbg(drvdata->dev, "TMC read end\n"); return ret; } diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index ccd2c9951aad..b073b792b11c 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -81,7 +81,7 @@ static int tpiu_enable(struct coresight_device *csdev, u32 mode) tpiu_enable_hw(drvdata); - dev_info(drvdata->dev, "TPIU enabled\n"); + dev_dbg(drvdata->dev, "TPIU enabled\n"); return 0; } @@ -107,7 +107,7 @@ static void tpiu_disable(struct coresight_device *csdev) tpiu_disable_hw(drvdata); - dev_info(drvdata->dev, "TPIU disabled\n"); + dev_dbg(drvdata->dev, "TPIU disabled\n"); } static const struct coresight_ops_sink tpiu_sink_ops = { From dfb1c8e424b1f69c1fd395b6ffd02453fe145942 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:54 -0600 Subject: [PATCH 0503/3715] UPSTREAM: coresight: perf: Remove reset_buffer call back for sinks Right now we issue an update_buffer() and reset_buffer() call backs in succession when we stop tracing an event. The update_buffer is supposed to check the status of the buffer and make sure the ring buffer is updated with the trace data. And we store information about the size of the data collected only to be consumed by the reset_buffer callback which always follows the update_buffer. This was originally designed for handling future IPs which could trigger a buffer overflow interrupt. This patch gets rid of the reset_buffer callback altogether and performs the actions in update_buffer, making it return the size collected. We can always add the support for handling the overflow interrupt case later. This removes some not-so pretty hack (storing the new head in the size field for snapshot mode) and cleans it up a little bit. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 7ec786ad193beb5579223174e119805569a7af3b). Bug: 140266694 Change-Id: I4fe2a369fad29923e96217c9fe92cc24a3016241 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 56 ++++-------------- .../hwtracing/coresight/coresight-etm-perf.c | 9 +-- .../hwtracing/coresight/coresight-tmc-etf.c | 58 ++++--------------- include/linux/coresight.h | 6 +- 4 files changed, 26 insertions(+), 103 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index fff90bda5493..4fa23860db76 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -327,37 +327,7 @@ static int etb_set_buffer(struct coresight_device *csdev, return ret; } -static unsigned long etb_reset_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) -{ - unsigned long size = 0; - struct cs_buffers *buf = sink_config; - - if (buf) { - /* - * In snapshot mode ->data_size holds the new address of the - * ring buffer's head. The size itself is the whole address - * range since we want the latest information. - */ - if (buf->snapshot) - handle->head = local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - - /* - * Tell the tracer PMU how much we got in this run and if - * something went wrong along the way. Nobody else can use - * this cs_buffers instance until we are done. As such - * resetting parameters here and squaring off with the ring - * buffer API in the tracer PMU is fine. - */ - size = local_xchg(&buf->data_size, 0); - } - - return size; -} - -static void etb_update_buffer(struct coresight_device *csdev, +static unsigned long etb_update_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { @@ -366,13 +336,13 @@ static void etb_update_buffer(struct coresight_device *csdev, u8 *buf_ptr; const u32 *barrier; u32 read_ptr, write_ptr, capacity; - u32 status, read_data, to_read; - unsigned long offset; + u32 status, read_data; + unsigned long offset, to_read; struct cs_buffers *buf = sink_config; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (!buf) - return; + return 0; capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS; @@ -477,18 +447,17 @@ static void etb_update_buffer(struct coresight_device *csdev, writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER); /* - * In snapshot mode all we have to do is communicate to - * perf_aux_output_end() the address of the current head. In full - * trace mode the same function expects a size to move rb->aux_head - * forward. + * In snapshot mode we have to update the handle->head to point + * to the new location. */ - if (buf->snapshot) - local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); - else - local_add(to_read, &buf->data_size); - + if (buf->snapshot) { + handle->head = (cur * PAGE_SIZE) + offset; + to_read = buf->nr_pages << PAGE_SHIFT; + } etb_enable_hw(drvdata); CS_LOCK(drvdata->base); + + return to_read; } static const struct coresight_ops_sink etb_sink_ops = { @@ -497,7 +466,6 @@ static const struct coresight_ops_sink etb_sink_ops = { .alloc_buffer = etb_alloc_buffer, .free_buffer = etb_free_buffer, .set_buffer = etb_set_buffer, - .reset_buffer = etb_reset_buffer, .update_buffer = etb_update_buffer, }; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index ad85f4913b14..88ac88d158ed 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -380,15 +380,8 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!sink_ops(sink)->update_buffer) return; - sink_ops(sink)->update_buffer(sink, handle, + size = sink_ops(sink)->update_buffer(sink, handle, event_data->snk_config); - - if (!sink_ops(sink)->reset_buffer) - return; - - size = sink_ops(sink)->reset_buffer(sink, handle, - event_data->snk_config); - perf_aux_output_end(handle, size); } diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d6c410d61058..f6360093356c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -360,36 +360,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, return ret; } -static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) -{ - long size = 0; - struct cs_buffers *buf = sink_config; - - if (buf) { - /* - * In snapshot mode ->data_size holds the new address of the - * ring buffer's head. The size itself is the whole address - * range since we want the latest information. - */ - if (buf->snapshot) - handle->head = local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - /* - * Tell the tracer PMU how much we got in this run and if - * something went wrong along the way. Nobody else can use - * this cs_buffers instance until we are done. As such - * resetting parameters here and squaring off with the ring - * buffer API in the tracer PMU is fine. - */ - size = local_xchg(&buf->data_size, 0); - } - - return size; -} - -static void tmc_update_etf_buffer(struct coresight_device *csdev, +static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { @@ -398,17 +369,17 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, const u32 *barrier; u32 *buf_ptr; u64 read_ptr, write_ptr; - u32 status, to_read; - unsigned long offset; + u32 status; + unsigned long offset, to_read; struct cs_buffers *buf = sink_config; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (!buf) - return; + return 0; /* This shouldn't happen */ if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) - return; + return 0; CS_UNLOCK(drvdata->base); @@ -497,18 +468,14 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, } } - /* - * In snapshot mode all we have to do is communicate to - * perf_aux_output_end() the address of the current head. In full - * trace mode the same function expects a size to move rb->aux_head - * forward. - */ - if (buf->snapshot) - local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); - else - local_add(to_read, &buf->data_size); - + /* In snapshot mode we have to update the head */ + if (buf->snapshot) { + handle->head = (cur * PAGE_SIZE) + offset; + to_read = buf->nr_pages << PAGE_SHIFT; + } CS_LOCK(drvdata->base); + + return to_read; } static const struct coresight_ops_sink tmc_etf_sink_ops = { @@ -517,7 +484,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = { .alloc_buffer = tmc_alloc_etf_buffer, .free_buffer = tmc_free_etf_buffer, .set_buffer = tmc_set_etf_buffer, - .reset_buffer = tmc_reset_etf_buffer, .update_buffer = tmc_update_etf_buffer, }; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e11fb81592a4..e0c44cc4aeb1 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -200,7 +200,6 @@ struct coresight_device { * @alloc_buffer: initialises perf's ring buffer for trace collection. * @free_buffer: release memory allocated in @get_config. * @set_buffer: initialises buffer mechanic before a trace session. - * @reset_buffer: finalises buffer mechanic after a trace session. * @update_buffer: update buffer pointers after a trace session. */ struct coresight_ops_sink { @@ -212,10 +211,7 @@ struct coresight_ops_sink { int (*set_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); - unsigned long (*reset_buffer)(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config); - void (*update_buffer)(struct coresight_device *csdev, + unsigned long (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); }; From 19ed4af8ba3cf35d20817a03a12ec9d048aa92ad Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:55 -0600 Subject: [PATCH 0504/3715] UPSTREAM: coresight: perf: Add helper to retrieve sink configuration We can always find the sink configuration for a given perf_output_handle. Add a helper to retrieve the sink configuration for a given perf_output_handle. This will be used to get rid of the set_buffer() call back. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit d25054ee8d18c937058a1b69b35fa5bfdef471f3). Bug: 140266694 Change-Id: I2e069881a090a1a8818bdf237c4f4d8624a3d6e3 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 14 ---------- .../hwtracing/coresight/coresight-etm-perf.h | 26 +++++++++++++++++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 88ac88d158ed..a3a45eb2d223 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -34,20 +34,6 @@ static struct pmu etm_pmu; static bool etm_perf_up; -/** - * struct etm_event_data - Coresight specifics associated to an event - * @work: Handle to free allocated memory outside IRQ context. - * @mask: Hold the CPU(s) this event was set for. - * @snk_config: The sink configuration. - * @path: An array of path, each slot for one CPU. - */ -struct etm_event_data { - struct work_struct work; - cpumask_t mask; - void *snk_config; - struct list_head * __percpu *path; -}; - static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); static DEFINE_PER_CPU(struct coresight_device *, csdev_src); diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index 3ffc9feb2d64..ab7732f98484 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -18,6 +18,7 @@ #ifndef _CORESIGHT_ETM_PERF_H #define _CORESIGHT_ETM_PERF_H +#include #include "coresight-priv.h" struct coresight_device; @@ -53,14 +54,39 @@ struct etm_filters { bool ssstatus; }; +/** + * struct etm_event_data - Coresight specifics associated to an event + * @work: Handle to free allocated memory outside IRQ context. + * @mask: Hold the CPU(s) this event was set for. + * @snk_config: The sink configuration. + * @path: An array of path, each slot for one CPU. + */ +struct etm_event_data { + struct work_struct work; + cpumask_t mask; + void *snk_config; + struct list_head * __percpu *path; +}; #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +static inline void *etm_perf_sink_config(struct perf_output_handle *handle) +{ + struct etm_event_data *data = perf_get_aux(handle); + if (data) + return data->snk_config; + return NULL; +} #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } +static inline void *etm_perf_sink_config(struct perf_output_handle *handle) +{ + return NULL; +} + #endif /* CONFIG_CORESIGHT */ #endif From 9b645bae2ead4ebf866816426dae688b596490f0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:56 -0600 Subject: [PATCH 0505/3715] UPSTREAM: coresight: perf: Remove set_buffer call back In coresight perf mode, we need to prepare the sink before starting a session, which is done via set_buffer call back. We then proceed to enable the tracing. If we fail to start the session successfully, we leave the sink configuration unchanged. In order to make the operation atomic and to avoid yet another call back to clear the buffer, we get rid of the "set_buffer" call back and pass the buffer details via enable() call back to the sink. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 3d6e8935758392179645e1b105789b3da329ad38). Bug: 140266694 Change-Id: I630ac4c48b72fcb1d4aa9311bbed8d6f88ff145a Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 32 +++++++++++++++---- .../hwtracing/coresight/coresight-etm-perf.c | 9 ++---- drivers/hwtracing/coresight/coresight-priv.h | 2 +- .../hwtracing/coresight/coresight-tmc-etf.c | 28 ++++++++++------ .../hwtracing/coresight/coresight-tmc-etr.c | 7 ++-- drivers/hwtracing/coresight/coresight-tpiu.c | 2 +- drivers/hwtracing/coresight/coresight.c | 11 ++++--- include/linux/coresight.h | 6 +--- 8 files changed, 59 insertions(+), 38 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 4fa23860db76..2ab767ac5757 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -36,6 +36,7 @@ #include #include "coresight-priv.h" +#include "coresight-etm-perf.h" #define ETB_RAM_DEPTH_REG 0x004 #define ETB_STATUS_REG 0x00c @@ -98,6 +99,9 @@ struct etb_drvdata { u32 trigger_cntr; }; +static int etb_set_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle); + static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata) { u32 depth = 0; @@ -139,12 +143,24 @@ static void etb_enable_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int etb_enable(struct coresight_device *csdev, u32 mode) +static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) { + int ret = 0; u32 val; unsigned long flags; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + /* + * We don't have an internal state to clean up if we fail to setup + * the perf buffer. So we can perform the step before we turn the + * ETB on and leave without cleaning up. + */ + if (mode == CS_MODE_PERF) { + ret = etb_set_buffer(csdev, (struct perf_output_handle *)data); + if (ret) + goto out; + } + val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode); /* @@ -168,8 +184,9 @@ static int etb_enable(struct coresight_device *csdev, u32 mode) spin_unlock_irqrestore(&drvdata->spinlock, flags); out: - dev_dbg(drvdata->dev, "ETB enabled\n"); - return 0; + if (!ret) + dev_dbg(drvdata->dev, "ETB enabled\n"); + return ret; } static void etb_disable_hw(struct etb_drvdata *drvdata) @@ -306,12 +323,14 @@ static void etb_free_buffer(void *config) } static int etb_set_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) + struct perf_output_handle *handle) { int ret = 0; unsigned long head; - struct cs_buffers *buf = sink_config; + struct cs_buffers *buf = etm_perf_sink_config(handle); + + if (!buf) + return -EINVAL; /* wrap head around to the amount of space we have */ head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); @@ -465,7 +484,6 @@ static const struct coresight_ops_sink etb_sink_ops = { .disable = etb_disable, .alloc_buffer = etb_alloc_buffer, .free_buffer = etb_free_buffer, - .set_buffer = etb_set_buffer, .update_buffer = etb_update_buffer, }; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index a3a45eb2d223..b6c70a8de2f6 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -297,16 +297,11 @@ static void etm_event_start(struct perf_event *event, int flags) path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ sink = coresight_get_sink(path); - if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) - goto fail_end_stop; - - /* Configure the sink */ - if (sink_ops(sink)->set_buffer(sink, handle, - event_data->snk_config)) + if (WARN_ON_ONCE(!sink)) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(path, CS_MODE_PERF)) + if (coresight_enable_path(path, CS_MODE_PERF, handle)) goto fail_end_stop; /* Tell the perf core the event is alive */ diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 2bb0a1569c94..d6c8c355045d 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -144,7 +144,7 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val, } void coresight_disable_path(struct list_head *path); -int coresight_enable_path(struct list_head *path, u32 mode); +int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); struct coresight_device *coresight_get_enabled_sink(bool reset); struct list_head *coresight_build_path(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index f6360093356c..01a0c2ca43be 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -21,6 +21,10 @@ #include #include "coresight-priv.h" #include "coresight-tmc.h" +#include "coresight-etm-perf.h" + +static int tmc_set_etf_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle); static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { @@ -193,11 +197,12 @@ out: return ret; } -static int tmc_enable_etf_sink_perf(struct coresight_device *csdev) +static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) { int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct perf_output_handle *handle = data; spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { @@ -215,15 +220,19 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev) goto out; } - drvdata->mode = CS_MODE_PERF; - tmc_etb_enable_hw(drvdata); + ret = tmc_set_etf_buffer(csdev, handle); + if (!ret) { + drvdata->mode = CS_MODE_PERF; + tmc_etb_enable_hw(drvdata); + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } -static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink(struct coresight_device *csdev, + u32 mode, void *data) { int ret; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -233,7 +242,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) ret = tmc_enable_etf_sink_sysfs(csdev); break; case CS_MODE_PERF: - ret = tmc_enable_etf_sink_perf(csdev); + ret = tmc_enable_etf_sink_perf(csdev, data); break; /* We shouldn't be here */ default: @@ -339,12 +348,14 @@ static void tmc_free_etf_buffer(void *config) } static int tmc_set_etf_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) + struct perf_output_handle *handle) { int ret = 0; unsigned long head; - struct cs_buffers *buf = sink_config; + struct cs_buffers *buf = etm_perf_sink_config(handle); + + if (!buf) + return -EINVAL; /* wrap head around to the amount of space we have */ head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); @@ -483,7 +494,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = { .disable = tmc_disable_etf_sink, .alloc_buffer = tmc_alloc_etf_buffer, .free_buffer = tmc_free_etf_buffer, - .set_buffer = tmc_set_etf_buffer, .update_buffer = tmc_update_etf_buffer, }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 472fcc483af3..b6a619e9c82b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1115,19 +1115,20 @@ out: return ret; } -static int tmc_enable_etr_sink_perf(struct coresight_device *csdev) +static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) { /* We don't support perf mode yet ! */ return -EINVAL; } -static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink(struct coresight_device *csdev, + u32 mode, void *data) { switch (mode) { case CS_MODE_SYSFS: return tmc_enable_etr_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etr_sink_perf(csdev); + return tmc_enable_etr_sink_perf(csdev, data); } /* We shouldn't be here */ diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index b073b792b11c..51d6b315de18 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -75,7 +75,7 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tpiu_enable(struct coresight_device *csdev, u32 mode) +static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index f3e7a745bf33..96d560873135 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -135,7 +135,8 @@ static int coresight_find_link_outport(struct coresight_device *csdev, return -ENODEV; } -static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) +static int coresight_enable_sink(struct coresight_device *csdev, + u32 mode, void *data) { int ret; @@ -144,7 +145,7 @@ static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) * existing "mode" of operation. */ if (sink_ops(csdev)->enable) { - ret = sink_ops(csdev)->enable(csdev, mode); + ret = sink_ops(csdev)->enable(csdev, mode, data); if (ret) return ret; csdev->enable = true; @@ -322,7 +323,7 @@ void coresight_disable_path(struct list_head *path) } } -int coresight_enable_path(struct list_head *path, u32 mode) +int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data) { int ret = 0; @@ -347,7 +348,7 @@ int coresight_enable_path(struct list_head *path, u32 mode) switch (type) { case CORESIGHT_DEV_TYPE_SINK: - ret = coresight_enable_sink(csdev, mode); + ret = coresight_enable_sink(csdev, mode, sink_data); /* * Sink is the first component turned on. If we * failed to enable the sink, there are no components @@ -650,7 +651,7 @@ int coresight_enable(struct coresight_device *csdev) goto out; } - ret = coresight_enable_path(path, CS_MODE_SYSFS); + ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL); if (ret) goto err_path; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e0c44cc4aeb1..9ea129e87d29 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -199,18 +199,14 @@ struct coresight_device { * @disable: disables the sink. * @alloc_buffer: initialises perf's ring buffer for trace collection. * @free_buffer: release memory allocated in @get_config. - * @set_buffer: initialises buffer mechanic before a trace session. * @update_buffer: update buffer pointers after a trace session. */ struct coresight_ops_sink { - int (*enable)(struct coresight_device *csdev, u32 mode); + int (*enable)(struct coresight_device *csdev, u32 mode, void *data); void (*disable)(struct coresight_device *csdev); void *(*alloc_buffer)(struct coresight_device *csdev, int cpu, void **pages, int nr_pages, bool overwrite); void (*free_buffer)(void *config); - int (*set_buffer)(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config); unsigned long (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); From 837df46eab3075d2a0044d7f175c1224000fe40b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:57 -0600 Subject: [PATCH 0506/3715] UPSTREAM: coresight: etm-perf: Add support for ETR backend Add support for using TMC-ETR as backend for ETM perf tracing. We use software double buffering at the moment. i.e, the TMC-ETR uses a separate buffer than the perf ring buffer. The data is copied to the perf ring buffer once a session completes. The TMC-ETR would try to match the larger of perf ring buffer or the ETR buffer size configured via sysfs, scaling down to a minimum limit of 1MB. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 22f429f19c4135d51e9dcaf360c0920e32aac7f9). Bug: 140266694 Change-Id: I41669d685143bbc9bdd6e18fe02274fcb4445f6c Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 248 +++++++++++++++++- drivers/hwtracing/coresight/coresight-tmc.h | 2 + 2 files changed, 248 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index b6a619e9c82b..ed1f0897bfd4 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -21,6 +21,7 @@ #include #include #include "coresight-catu.h" +#include "coresight-etm-perf.h" #include "coresight-priv.h" #include "coresight-tmc.h" @@ -31,6 +32,28 @@ struct etr_flat_buf { size_t size; }; +/* + * etr_perf_buffer - Perf buffer used for ETR + * @etr_buf - Actual buffer used by the ETR + * @snaphost - Perf session mode + * @head - handle->head at the beginning of the session. + * @nr_pages - Number of pages in the ring buffer. + * @pages - Array of Pages in the ring buffer. + */ +struct etr_perf_buffer { + struct etr_buf *etr_buf; + bool snapshot; + unsigned long head; + int nr_pages; + void **pages; +}; + +/* Convert the perf index to an offset within the ETR buffer */ +#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) + +/* Lower limit for ETR hardware buffer */ +#define TMC_ETR_PERF_MIN_BUF_SIZE SZ_1M + /* * The TMC ETR SG has a page size of 4K. The SG table contains pointers * to 4KB buffers. However, the OS may use a PAGE_SIZE different from @@ -1115,10 +1138,228 @@ out: return ret; } +/* + * tmc_etr_setup_perf_buf: Allocate ETR buffer for use by perf. + * The size of the hardware buffer is dependent on the size configured + * via sysfs and the perf ring buffer size. We prefer to allocate the + * largest possible size, scaling down the size by half until it + * reaches a minimum limit (1M), beyond which we give up. + */ +static struct etr_perf_buffer * +tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, int node, int nr_pages, + void **pages, bool snapshot) +{ + struct etr_buf *etr_buf; + struct etr_perf_buffer *etr_perf; + unsigned long size; + + etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); + if (!etr_perf) + return ERR_PTR(-ENOMEM); + + /* + * Try to match the perf ring buffer size if it is larger + * than the size requested via sysfs. + */ + if ((nr_pages << PAGE_SHIFT) > drvdata->size) { + etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT), + 0, node, NULL); + if (!IS_ERR(etr_buf)) + goto done; + } + + /* + * Else switch to configured size for this ETR + * and scale down until we hit the minimum limit. + */ + size = drvdata->size; + do { + etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL); + if (!IS_ERR(etr_buf)) + goto done; + size /= 2; + } while (size >= TMC_ETR_PERF_MIN_BUF_SIZE); + + kfree(etr_perf); + return ERR_PTR(-ENOMEM); + +done: + etr_perf->etr_buf = etr_buf; + return etr_perf; +} + + +static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, + int cpu, void **pages, int nr_pages, + bool snapshot) +{ + struct etr_perf_buffer *etr_perf; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (cpu == -1) + cpu = smp_processor_id(); + + etr_perf = tmc_etr_setup_perf_buf(drvdata, cpu_to_node(cpu), + nr_pages, pages, snapshot); + if (IS_ERR(etr_perf)) { + dev_dbg(drvdata->dev, "Unable to allocate ETR buffer\n"); + return NULL; + } + + etr_perf->snapshot = snapshot; + etr_perf->nr_pages = nr_pages; + etr_perf->pages = pages; + + return etr_perf; +} + +static void tmc_free_etr_buffer(void *config) +{ + struct etr_perf_buffer *etr_perf = config; + + if (etr_perf->etr_buf) + tmc_free_etr_buf(etr_perf->etr_buf); + kfree(etr_perf); +} + +/* + * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware + * buffer to the perf ring buffer. + */ +static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) +{ + long bytes, to_copy; + long pg_idx, pg_offset, src_offset; + unsigned long head = etr_perf->head; + char **dst_pages, *src_buf; + struct etr_buf *etr_buf = etr_perf->etr_buf; + + head = etr_perf->head; + pg_idx = head >> PAGE_SHIFT; + pg_offset = head & (PAGE_SIZE - 1); + dst_pages = (char **)etr_perf->pages; + src_offset = etr_buf->offset; + to_copy = etr_buf->len; + + while (to_copy > 0) { + /* + * In one iteration, we can copy minimum of : + * 1) what is available in the source buffer, + * 2) what is available in the source buffer, before it + * wraps around. + * 3) what is available in the destination page. + * in one iteration. + */ + bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy, + &src_buf); + if (WARN_ON_ONCE(bytes <= 0)) + break; + bytes = min(bytes, (long)(PAGE_SIZE - pg_offset)); + + memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes); + + to_copy -= bytes; + + /* Move destination pointers */ + pg_offset += bytes; + if (pg_offset == PAGE_SIZE) { + pg_offset = 0; + if (++pg_idx == etr_perf->nr_pages) + pg_idx = 0; + } + + /* Move source pointers */ + src_offset += bytes; + if (src_offset >= etr_buf->size) + src_offset -= etr_buf->size; + } +} + +/* + * tmc_update_etr_buffer : Update the perf ring buffer with the + * available trace data. We use software double buffering at the moment. + * + * TODO: Add support for reusing the perf ring buffer. + */ +static unsigned long +tmc_update_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *config) +{ + bool lost = false; + unsigned long flags, size = 0; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etr_perf_buffer *etr_perf = config; + struct etr_buf *etr_buf = etr_perf->etr_buf; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (WARN_ON(drvdata->perf_data != etr_perf)) { + lost = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + goto out; + } + + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_sync_etr_buf(drvdata); + + CS_LOCK(drvdata->base); + /* Reset perf specific data */ + drvdata->perf_data = NULL; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + size = etr_buf->len; + tmc_etr_sync_perf_buffer(etr_perf); + + /* + * Update handle->head in snapshot mode. Also update the size to the + * hardware buffer size if there was an overflow. + */ + if (etr_perf->snapshot) { + handle->head += size; + if (etr_buf->full) + size = etr_buf->size; + } + + lost |= etr_buf->full; +out: + if (lost) + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + return size; +} + static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) { - /* We don't support perf mode yet ! */ - return -EINVAL; + int rc = 0; + unsigned long flags; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct perf_output_handle *handle = data; + struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); + + spin_lock_irqsave(&drvdata->spinlock, flags); + /* + * There can be only one writer per sink in perf mode. If the sink + * is already open in SYSFS mode, we can't use it. + */ + if (drvdata->mode != CS_MODE_DISABLED || WARN_ON(drvdata->perf_data)) { + rc = -EBUSY; + goto unlock_out; + } + + if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) { + rc = -EINVAL; + goto unlock_out; + } + + etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); + drvdata->perf_data = etr_perf; + drvdata->mode = CS_MODE_PERF; + tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); + +unlock_out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return rc; } static int tmc_enable_etr_sink(struct coresight_device *csdev, @@ -1160,6 +1401,9 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, + .alloc_buffer = tmc_alloc_etr_buffer, + .update_buffer = tmc_update_etr_buffer, + .free_buffer = tmc_free_etr_buffer, }; const struct coresight_ops tmc_etr_cs_ops = { diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 84d82c7c5045..36748ab19b98 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -181,6 +181,7 @@ struct etr_buf { * @trigger_cntr: amount of words to store after a trigger. * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the * device configuration register (DEVID) + * @perf_data: PERF buffer for ETR. * @sysfs_data: SYSFS buffer for ETR. */ struct tmc_drvdata { @@ -202,6 +203,7 @@ struct tmc_drvdata { u32 trigger_cntr; u32 etr_caps; struct etr_buf *sysfs_buf; + void *perf_data; }; struct etr_buf_operations { From 570bba9b361d1ed7667c1d680b3cdd0b1a68026c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 20 Sep 2018 13:17:58 -0600 Subject: [PATCH 0507/3715] UPSTREAM: coresight: etb10: Refactor etb_drvdata::mode handling This patch moves the etb_drvdata::mode from a locat_t to a simple u32, as it is for the ETF and ETR drivers. This streamlines the code and adds commonality with the other drivers when dealing with similar operations. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit d43b8ec599f90c1f07b1bdd29b0c4b6306726ef2). Bug: 140266694 Change-Id: I3cf670b5cc6c0c395a03256b1defc17854ea8d3a Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 2ab767ac5757..3f33f295ce6e 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -12,7 +12,6 @@ * GNU General Public License for more details. */ -#include #include #include #include @@ -80,8 +79,8 @@ * @miscdev: specifics to handle "/dev/xyz.etb" entry. * @spinlock: only one at a time pls. * @reading: synchronise user space access to etb buffer. - * @mode: this ETB is being used. * @buf: area of memory where ETB buffer content gets sent. + * @mode: this ETB is being used. * @buffer_depth: size of @buf. * @trigger_cntr: amount of words to store after a trigger. */ @@ -93,8 +92,8 @@ struct etb_drvdata { struct miscdevice miscdev; spinlock_t spinlock; local_t reading; - local_t mode; u8 *buf; + u32 mode; u32 buffer_depth; u32 trigger_cntr; }; @@ -146,10 +145,31 @@ static void etb_enable_hw(struct etb_drvdata *drvdata) static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) { int ret = 0; - u32 val; unsigned long flags; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* + * When accessing from Perf, a HW buffer can be handled + * by a single trace entity. In sysFS mode many tracers + * can be logging to the same HW buffer. + */ + if (drvdata->mode == CS_MODE_PERF) { + ret = -EBUSY; + goto out; + } + + /* Don't let perf disturb sysFS sessions */ + if (drvdata->mode == CS_MODE_SYSFS && mode == CS_MODE_PERF) { + ret = -EBUSY; + goto out; + } + + /* Nothing to do, the tracer is already enabled. */ + if (drvdata->mode == CS_MODE_SYSFS && mode == CS_MODE_SYSFS) + goto out; + /* * We don't have an internal state to clean up if we fail to setup * the perf buffer. So we can perform the step before we turn the @@ -161,29 +181,12 @@ static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) goto out; } - val = local_cmpxchg(&drvdata->mode, - CS_MODE_DISABLED, mode); - /* - * When accessing from Perf, a HW buffer can be handled - * by a single trace entity. In sysFS mode many tracers - * can be logging to the same HW buffer. - */ - if (val == CS_MODE_PERF) - return -EBUSY; - - /* Don't let perf disturb sysFS sessions */ - if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF) - return -EBUSY; - - /* Nothing to do, the tracer is already enabled. */ - if (val == CS_MODE_SYSFS) - goto out; - - spin_lock_irqsave(&drvdata->spinlock, flags); + drvdata->mode = mode; etb_enable_hw(drvdata); - spin_unlock_irqrestore(&drvdata->spinlock, flags); out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + if (!ret) dev_dbg(drvdata->dev, "ETB enabled\n"); return ret; @@ -285,11 +288,14 @@ static void etb_disable(struct coresight_device *csdev) unsigned long flags; spin_lock_irqsave(&drvdata->spinlock, flags); - etb_disable_hw(drvdata); - etb_dump_hw(drvdata); - spin_unlock_irqrestore(&drvdata->spinlock, flags); - local_set(&drvdata->mode, CS_MODE_DISABLED); + /* Disable the ETB only if it needs to */ + if (drvdata->mode != CS_MODE_DISABLED) { + etb_disable_hw(drvdata); + etb_dump_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(drvdata->dev, "ETB disabled\n"); } @@ -496,7 +502,7 @@ static void etb_dump(struct etb_drvdata *drvdata) unsigned long flags; spin_lock_irqsave(&drvdata->spinlock, flags); - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { + if (drvdata->mode == CS_MODE_SYSFS) { etb_disable_hw(drvdata); etb_dump_hw(drvdata); etb_enable_hw(drvdata); From 48b8f165dd6dd6c664be92f13191122c09d801f0 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 20 Sep 2018 13:17:59 -0600 Subject: [PATCH 0508/3715] UPSTREAM: coresight: etb10: Splitting function etb_enable() Up until now the relative simplicity of enabling the ETB made it possible to accommodate processing for both sysFS and perf methods. But work on claimtags and CPU-wide trace scenarios is adding some complexity, making the current code messy and hard to maintain. As such follow what has been done for ETF and ETR components and split function etb_enable() so that processing for both API can be done cleanly. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit d4989fe88603367e5998af70ee638ae6790d42d1). Bug: 140266694 Change-Id: Ic4285b48c5d35e78161bd39ace47316888f7743c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 73 +++++++++++++------ 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 3f33f295ce6e..c7d5b54c6e77 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -142,7 +142,7 @@ static void etb_enable_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) +static int etb_enable_sysfs(struct coresight_device *csdev) { int ret = 0; unsigned long flags; @@ -150,48 +150,79 @@ static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) spin_lock_irqsave(&drvdata->spinlock, flags); - /* - * When accessing from Perf, a HW buffer can be handled - * by a single trace entity. In sysFS mode many tracers - * can be logging to the same HW buffer. - */ + /* Don't messup with perf sessions. */ if (drvdata->mode == CS_MODE_PERF) { ret = -EBUSY; goto out; } - /* Don't let perf disturb sysFS sessions */ - if (drvdata->mode == CS_MODE_SYSFS && mode == CS_MODE_PERF) { + /* Nothing to do, the tracer is already enabled. */ + if (drvdata->mode == CS_MODE_SYSFS) + goto out; + + drvdata->mode = CS_MODE_SYSFS; + etb_enable_hw(drvdata); + +out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return ret; +} + +static int etb_enable_perf(struct coresight_device *csdev, void *data) +{ + int ret = 0; + unsigned long flags; + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* No need to continue if the component is already in use. */ + if (drvdata->mode != CS_MODE_DISABLED) { ret = -EBUSY; goto out; } - /* Nothing to do, the tracer is already enabled. */ - if (drvdata->mode == CS_MODE_SYSFS && mode == CS_MODE_SYSFS) - goto out; - /* * We don't have an internal state to clean up if we fail to setup * the perf buffer. So we can perform the step before we turn the * ETB on and leave without cleaning up. */ - if (mode == CS_MODE_PERF) { - ret = etb_set_buffer(csdev, (struct perf_output_handle *)data); - if (ret) - goto out; - } + ret = etb_set_buffer(csdev, (struct perf_output_handle *)data); + if (ret) + goto out; - drvdata->mode = mode; + drvdata->mode = CS_MODE_PERF; etb_enable_hw(drvdata); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); - - if (!ret) - dev_dbg(drvdata->dev, "ETB enabled\n"); return ret; } +static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) +{ + int ret; + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + switch (mode) { + case CS_MODE_SYSFS: + ret = etb_enable_sysfs(csdev); + break; + case CS_MODE_PERF: + ret = etb_enable_perf(csdev, data); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + return ret; + + dev_dbg(drvdata->dev, "ETB enabled\n"); + return 0; +} + static void etb_disable_hw(struct etb_drvdata *drvdata) { u32 ffcr; From 8fa5351213559f58bc20d1583f3a003db2ad239c Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Thu, 20 Sep 2018 13:18:00 -0600 Subject: [PATCH 0509/3715] UPSTREAM: coresight: etm4x: Configure EL2 exception level when kernel is running in HYP For non-VHE systems host kernel runs at EL1 and jumps to EL2 whenever hypervisor code should be executed. In this case ETM4x driver must restrict configuration to EL1 when it setups kernel tracing. However, there is no separate hypervisor privilege level when VHE is enabled, the host kernel runs at EL2. This patch fixes configuration of TRCACATRn register for VHE systems so that ETM_EXLEVEL_NS_HYP bit is used instead of ETM_EXLEVEL_NS_OS to on/off kernel tracing. At the same time, it moves common code to new helper. Signed-off-by: Tomasz Nowicki Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit b860801e3237ec4c74cf8de0be4816996757ae5c). Bug: 140266694 Change-Id: I16cd8a977606ef0fe75ea2398bf7a2eb371e7662 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x.c | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index d74b17f87de7..53016776be8b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "coresight-etm4x.h" #include "coresight-etm-perf.h" @@ -623,7 +624,7 @@ static void etm4_set_default_config(struct etmv4_config *config) config->vinst_ctrl |= BIT(0); } -static u64 etm4_get_access_type(struct etmv4_config *config) +static u64 etm4_get_ns_access_type(struct etmv4_config *config) { u64 access_type = 0; @@ -634,17 +635,26 @@ static u64 etm4_get_access_type(struct etmv4_config *config) * Bit[13] Exception level 1 - OS * Bit[14] Exception level 2 - Hypervisor * Bit[15] Never implemented - * - * Always stay away from hypervisor mode. */ - access_type = ETM_EXLEVEL_NS_HYP; - - if (config->mode & ETM_MODE_EXCL_KERN) - access_type |= ETM_EXLEVEL_NS_OS; + if (!is_kernel_in_hyp_mode()) { + /* Stay away from hypervisor mode for non-VHE */ + access_type = ETM_EXLEVEL_NS_HYP; + if (config->mode & ETM_MODE_EXCL_KERN) + access_type |= ETM_EXLEVEL_NS_OS; + } else if (config->mode & ETM_MODE_EXCL_KERN) { + access_type = ETM_EXLEVEL_NS_HYP; + } if (config->mode & ETM_MODE_EXCL_USER) access_type |= ETM_EXLEVEL_NS_APP; + return access_type; +} + +static u64 etm4_get_access_type(struct etmv4_config *config) +{ + u64 access_type = etm4_get_ns_access_type(config); + /* * EXLEVEL_S, bits[11:8], don't trace anything happening * in secure state. @@ -898,20 +908,10 @@ void etm4_config_trace_mode(struct etmv4_config *config) addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP]; /* clear default config */ - addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS); + addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS | + ETM_EXLEVEL_NS_HYP); - /* - * EXLEVEL_NS, bits[15:12] - * The Exception levels are: - * Bit[12] Exception level 0 - Application - * Bit[13] Exception level 1 - OS - * Bit[14] Exception level 2 - Hypervisor - * Bit[15] Never implemented - */ - if (mode & ETM_MODE_EXCL_KERN) - addr_acc |= ETM_EXLEVEL_NS_OS; - else - addr_acc |= ETM_EXLEVEL_NS_APP; + addr_acc |= etm4_get_ns_access_type(config); config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc; config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc; From 34c170056a9e755f8f0f0e652d88418aff69fd10 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 20 Sep 2018 13:18:02 -0600 Subject: [PATCH 0510/3715] UPSTREAM: coresight: tmc: Fix byte-address alignment for RRP >From the comment in the code, it claims the requirement for byte-address alignment for RRP register: 'for 32-bit, 64-bit and 128-bit wide trace memory, the four LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must be 0s'. This isn't consistent with the program, the program sets five LSBs as zeros for 32/64/128-bit wide trace memory and set six LSBs zeros for 256-bit wide trace memory. After checking with the CoreSight Trace Memory Controller technical reference manual (ARM DDI 0461B, section 3.3.4 RAM Read Pointer Register), it proves the comment is right and the program does wrong setting. This patch fixes byte-address alignment for RRP by following correct definition in the technical reference manual. Cc: Mathieu Poirier Cc: Mike Leach Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit e7753f3937610633a540f2be81be87531f96ff04). Bug: 140266694 Change-Id: I2e6e27ac701bf1a8b42fff5afe9f0ebe44fd50b9 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 01a0c2ca43be..03b080c62156 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -431,10 +431,10 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, case TMC_MEM_INTF_WIDTH_32BITS: case TMC_MEM_INTF_WIDTH_64BITS: case TMC_MEM_INTF_WIDTH_128BITS: - mask = GENMASK(31, 5); + mask = GENMASK(31, 4); break; case TMC_MEM_INTF_WIDTH_256BITS: - mask = GENMASK(31, 6); + mask = GENMASK(31, 5); break; } From 0bf9acc1934051e1555d07ae26f357f0251a1f64 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:03 -0600 Subject: [PATCH 0511/3715] UPSTREAM: coresight: Handle failures in enabling a trace path coresight_enable_path() enables the components in a trace path from a given source to a sink, excluding the source. The operation is performed in the reverse order; the sink first and then backwards in the list. However, if we encounter an error in enabling any of the component, we simply disable all the components in the given path irrespective of whether we enabled some of the components in the enable iteration. This could interfere with another trace session if one of the link devices is turned off (e.g, TMC-ETF). So, we need to make sure that we only disable those components which were actually enabled from the iteration. This patch achieves the same by refactoring the coresight_disable_path to accept a "node" to start from in the forward order, which can then be used from the error path of coresight_enable_path(). With this change, we don't issue a disable call back for a component which didn't get enabled. This change of behavior triggers a bug in coresight_enable_link(), where we leave the refcount on the device and will prevent the device from being enabled forever. So, we also drop the refcount in the coresight_enable_link() if the operation failed. Also, with the refactoring, we always start after the first node (which is the "SOURCE" device) for disabling the entire path. This implies, we must not find a "SOURCE" in the middle of the path. Hence, added a WARN_ON() to make sure the paths we get are sane, rather than simply ignoring them. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit b9866bb16882e89b57e2dc826114316357263fb7). Bug: 140266694 Change-Id: I664db1ae01ac8b472b97b9cadc36086a2c4acaf0 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 32 ++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 96d560873135..70a632b612e4 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -194,8 +194,10 @@ static int coresight_enable_link(struct coresight_device *csdev, if (atomic_inc_return(&csdev->refcnt[refport]) == 1) { if (link_ops(csdev)->enable) { ret = link_ops(csdev)->enable(csdev, inport, outport); - if (ret) + if (ret) { + atomic_dec(&csdev->refcnt[refport]); return ret; + } } } @@ -284,13 +286,21 @@ static bool coresight_disable_source(struct coresight_device *csdev) return !csdev->enable; } -void coresight_disable_path(struct list_head *path) +/* + * coresight_disable_path_from : Disable components in the given path beyond + * @nd in the list. If @nd is NULL, all the components, except the SOURCE are + * disabled. + */ +static void coresight_disable_path_from(struct list_head *path, + struct coresight_node *nd) { u32 type; - struct coresight_node *nd; struct coresight_device *csdev, *parent, *child; - list_for_each_entry(nd, path, link) { + if (!nd) + nd = list_first_entry(path, struct coresight_node, link); + + list_for_each_entry_continue(nd, path, link) { csdev = nd->csdev; type = csdev->type; @@ -310,7 +320,12 @@ void coresight_disable_path(struct list_head *path) coresight_disable_sink(csdev); break; case CORESIGHT_DEV_TYPE_SOURCE: - /* sources are disabled from either sysFS or Perf */ + /* + * We skip the first node in the path assuming that it + * is the source. So we don't expect a source device in + * the middle of a path. + */ + WARN_ON(1); break; case CORESIGHT_DEV_TYPE_LINK: parent = list_prev_entry(nd, link)->csdev; @@ -323,6 +338,11 @@ void coresight_disable_path(struct list_head *path) } } +void coresight_disable_path(struct list_head *path) +{ + coresight_disable_path_from(path, NULL); +} + int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data) { @@ -376,7 +396,7 @@ int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data) out: return ret; err: - coresight_disable_path(path); + coresight_disable_path_from(path, nd); goto out; } From 078b8f101840dacad5dddeb369da78c3812afbbb Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:04 -0600 Subject: [PATCH 0512/3715] UPSTREAM: coresight: tmc-etr: Refactor for handling errors Refactor the tmc-etr enable operation to make it easier to handle errors in enabling the hardware. We need to make sure that the buffer is compatible with the ETR. This patch re-arranges to make the error handling easier, by deferring the hardware enablement until all the errors are checked. This also avoids turning the CATU on/off during a sysfs read session. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 6276f9cba50f77c5b51b581c11b74a51a3f9e040). Bug: 140266694 Change-Id: Ib7f1d20fd5b999f27c2a609de68600e1de8ab317 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 67 ++++++++++++------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ed1f0897bfd4..01d502137c12 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -929,21 +929,10 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); } -static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata, - struct etr_buf *etr_buf) +static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; - - /* Callers should provide an appropriate buffer for use */ - if (WARN_ON(!etr_buf || drvdata->etr_buf)) - return; - drvdata->etr_buf = etr_buf; - - /* - * If this ETR is connected to a CATU, enable it before we turn - * this on - */ - tmc_etr_enable_catu(drvdata); + struct etr_buf *etr_buf = drvdata->etr_buf; CS_UNLOCK(drvdata->base); @@ -963,11 +952,8 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata, axictl |= TMC_AXICTL_ARCACHE_OS; } - if (etr_buf->mode == ETR_MODE_ETR_SG) { - if (WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG))) - return; + if (etr_buf->mode == ETR_MODE_ETR_SG) axictl |= TMC_AXICTL_SCT_GAT_MODE; - } writel_relaxed(axictl, drvdata->base + TMC_AXICTL); tmc_write_dba(drvdata, etr_buf->hwaddr); @@ -993,6 +979,32 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata, CS_LOCK(drvdata->base); } +static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf) +{ + /* Callers should provide an appropriate buffer for use */ + if (WARN_ON(!etr_buf)) + return -EINVAL; + + if ((etr_buf->mode == ETR_MODE_ETR_SG) && + WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG))) + return -EINVAL; + + if (WARN_ON(drvdata->etr_buf)) + return -EBUSY; + + /* Set the buffer for the session */ + drvdata->etr_buf = etr_buf; + /* + * If this ETR is connected to a CATU, enable it before we turn + * this on. + */ + tmc_etr_enable_catu(drvdata); + + __tmc_etr_enable_hw(drvdata); + return 0; +} + /* * Return the available trace data in the buffer (starts at etr_buf->offset, * limited by etr_buf->len) from @pos, with a maximum limit of @len, @@ -1048,7 +1060,7 @@ static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata) } } -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -1064,6 +1076,11 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); +} + +static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +{ + __tmc_etr_disable_hw(drvdata); /* Disable CATU device if this ETR is connected to one */ tmc_etr_disable_catu(drvdata); /* Reset the ETR buf used by hardware */ @@ -1123,8 +1140,9 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) drvdata->sysfs_buf = new_buf; } - drvdata->mode = CS_MODE_SYSFS; - tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); + ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); + if (!ret) + drvdata->mode = CS_MODE_SYSFS; out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1354,8 +1372,9 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); drvdata->perf_data = etr_perf; - drvdata->mode = CS_MODE_PERF; - tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); + rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); + if (!rc) + drvdata->mode = CS_MODE_PERF; unlock_out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1437,7 +1456,7 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) /* Disable the TMC if we are trying to read from a running session. */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etr_disable_hw(drvdata); + __tmc_etr_disable_hw(drvdata); drvdata->reading = true; out: @@ -1464,7 +1483,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) * buffer. Since the tracer is still enabled drvdata::buf can't * be NULL. */ - tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); + __tmc_etr_enable_hw(drvdata); } else { /* * The ETR is not tracing and the buffer was just read. From 712859d34d060ba9d30509107d4533a1a05d3a12 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:05 -0600 Subject: [PATCH 0513/3715] UPSTREAM: coresight: tmc-etr: Handle errors enabling CATU Make sure we honor the errors in CATU device and abort the operation. While at it, delay setting the etr_buf for the session until we are sure that we are indeed enabling the ETR. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 1c7995e11cd3252d0db63dad948fc96f05d75b77). Bug: 140266694 Change-Id: I3773cbee72fa8960b473f5770674f2168563586a Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 01d502137c12..956401b6a736 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -762,12 +762,14 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata) return NULL; } -static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata) +static inline int tmc_etr_enable_catu(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf) { struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); if (catu && helper_ops(catu)->enable) - helper_ops(catu)->enable(catu, drvdata->etr_buf); + return helper_ops(catu)->enable(catu, etr_buf); + return 0; } static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) @@ -982,6 +984,8 @@ static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata, struct etr_buf *etr_buf) { + int rc; + /* Callers should provide an appropriate buffer for use */ if (WARN_ON(!etr_buf)) return -EINVAL; @@ -993,16 +997,17 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata, if (WARN_ON(drvdata->etr_buf)) return -EBUSY; - /* Set the buffer for the session */ - drvdata->etr_buf = etr_buf; /* * If this ETR is connected to a CATU, enable it before we turn * this on. */ - tmc_etr_enable_catu(drvdata); + rc = tmc_etr_enable_catu(drvdata, etr_buf); + if (!rc) { + drvdata->etr_buf = etr_buf; + __tmc_etr_enable_hw(drvdata); + } - __tmc_etr_enable_hw(drvdata); - return 0; + return rc; } /* From 44a991ead6756055e350b17d709b8a6c8e240327 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:06 -0600 Subject: [PATCH 0514/3715] UPSTREAM: coresight: tmc-etb/etf: Prepare to handle errors enabling Prepare to handle errors in enabling the hardware and report it back to the core driver. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 1d364034aaf2fcc7bc23cac2ffde52747376337e). Bug: 140266694 Change-Id: Ic86a6dd1911b98f557063e1875279d13324566e5 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etf.c | 73 ++++++++++++------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 03b080c62156..763eac6dcaae 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -26,7 +26,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle); -static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -45,6 +45,12 @@ static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata) +{ + __tmc_etb_enable_hw(drvdata); + return 0; +} + static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata) { char *bufp; @@ -87,7 +93,7 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -103,6 +109,12 @@ static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +{ + __tmc_etf_enable_hw(drvdata); + return 0; +} + static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -185,8 +197,12 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) drvdata->buf = buf; } - drvdata->mode = CS_MODE_SYSFS; - tmc_etb_enable_hw(drvdata); + ret = tmc_etb_enable_hw(drvdata); + if (!ret) + drvdata->mode = CS_MODE_SYSFS; + else + /* Free up the buffer if we failed to enable */ + used = false; out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -205,27 +221,25 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) struct perf_output_handle *handle = data; spin_lock_irqsave(&drvdata->spinlock, flags); - if (drvdata->reading) { + do { ret = -EINVAL; - goto out; - } + if (drvdata->reading) + break; + /* + * In Perf mode there can be only one writer per sink. There + * is also no need to continue if the ETB/ETF is already + * operated from sysFS. + */ + if (drvdata->mode != CS_MODE_DISABLED) + break; - /* - * In Perf mode there can be only one writer per sink. There - * is also no need to continue if the ETB/ETR is already operated - * from sysFS. - */ - if (drvdata->mode != CS_MODE_DISABLED) { - ret = -EINVAL; - goto out; - } - - ret = tmc_set_etf_buffer(csdev, handle); - if (!ret) { - drvdata->mode = CS_MODE_PERF; - tmc_etb_enable_hw(drvdata); - } -out: + ret = tmc_set_etf_buffer(csdev, handle); + if (ret) + break; + ret = tmc_etb_enable_hw(drvdata); + if (!ret) + drvdata->mode = CS_MODE_PERF; + } while (0); spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; @@ -282,6 +296,7 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) static int tmc_enable_etf_link(struct coresight_device *csdev, int inport, int outport) { + int ret; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -291,12 +306,14 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, return -EBUSY; } - tmc_etf_enable_hw(drvdata); - drvdata->mode = CS_MODE_SYSFS; + ret = tmc_etf_enable_hw(drvdata); + if (!ret) + drvdata->mode = CS_MODE_SYSFS; spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_dbg(drvdata->dev, "TMC-ETF enabled\n"); - return 0; + if (!ret) + dev_dbg(drvdata->dev, "TMC-ETF enabled\n"); + return ret; } static void tmc_disable_etf_link(struct coresight_device *csdev, @@ -590,7 +607,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) * can't be NULL. */ memset(drvdata->buf, 0, drvdata->size); - tmc_etb_enable_hw(drvdata); + __tmc_etb_enable_hw(drvdata); } else { /* * The ETB/ETF is not tracing and the buffer was just read. From 462c096fb98c04e6c01b613ab1106e2f1d09c8ba Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:07 -0600 Subject: [PATCH 0515/3715] UPSTREAM: coresight: etm4x: Add support for handling errors Add support for handling errors in enabling the component. The ETM is enabled via cross call to owner CPU. Make necessary changes to report the error back from the cross call. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit e006d89abedd5d04e6033c5614e1bf160b252615). Bug: 140266694 Change-Id: I2ddad5992aca58d9b019218b07680d7c36ccfab3 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x.c | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 53016776be8b..732c7070c908 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -86,10 +86,14 @@ static int etm4_trace_id(struct coresight_device *csdev) return drvdata->trcid; } -static void etm4_enable_hw(void *info) +struct etm4_enable_arg { + struct etmv4_drvdata *drvdata; + int rc; +}; + +static int etm4_enable_hw(struct etmv4_drvdata *drvdata) { int i; - struct etmv4_drvdata *drvdata = info; struct etmv4_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); @@ -192,6 +196,16 @@ static void etm4_enable_hw(void *info) CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); + return 0; +} + +static void etm4_enable_hw_smp_call(void *info) +{ + struct etm4_enable_arg *arg = info; + + if (WARN_ON(!arg)) + return; + arg->rc = etm4_enable_hw(arg->drvdata); } static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, @@ -257,7 +271,7 @@ static int etm4_enable_perf(struct coresight_device *csdev, if (ret) goto out; /* And enable it */ - etm4_enable_hw(drvdata); + ret = etm4_enable_hw(drvdata); out: return ret; @@ -266,6 +280,7 @@ out: static int etm4_enable_sysfs(struct coresight_device *csdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etm4_enable_arg arg = { 0 }; int ret; spin_lock(&drvdata->spinlock); @@ -274,19 +289,17 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) * Executing etm4_enable_hw on the cpu whose ETM is being enabled * ensures that register writes occur when cpu is powered. */ + arg.drvdata = drvdata; ret = smp_call_function_single(drvdata->cpu, - etm4_enable_hw, drvdata, 1); - if (ret) - goto err; - - drvdata->sticky_enable = true; + etm4_enable_hw_smp_call, &arg, 1); + if (!ret) + ret = arg.rc; + if (!ret) + drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_dbg(drvdata->dev, "ETM tracing enabled\n"); - return 0; - -err: - spin_unlock(&drvdata->spinlock); + if (!ret) + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return ret; } From d020831e31497e1a59137f74c3db341bbf4b39cc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:08 -0600 Subject: [PATCH 0516/3715] UPSTREAM: coresight: etm3: Add support for handling errors Add support for reporting errors back from the SMP cross function call for enabling ETM. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit e2a1551a881f27d1914f182e06b423cc242b43b6). Bug: 140266694 Change-Id: I28fa8136586aaa5966f13933df66edf28130fa33 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm3x.c | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index aa8a2b076ad4..d8cfd76a53b9 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -364,11 +364,10 @@ static int etm_parse_event_config(struct etm_drvdata *drvdata, return 0; } -static void etm_enable_hw(void *info) +static int etm_enable_hw(struct etm_drvdata *drvdata) { int i; u32 etmcr; - struct etm_drvdata *drvdata = info; struct etm_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); @@ -430,6 +429,21 @@ static void etm_enable_hw(void *info) CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); + return 0; +} + +struct etm_enable_arg { + struct etm_drvdata *drvdata; + int rc; +}; + +static void etm_enable_hw_smp_call(void *info) +{ + struct etm_enable_arg *arg = info; + + if (WARN_ON(!arg)) + return; + arg->rc = etm_enable_hw(arg->drvdata); } static int etm_cpu_id(struct coresight_device *csdev) @@ -484,14 +498,13 @@ static int etm_enable_perf(struct coresight_device *csdev, /* Configure the tracer based on the session's specifics */ etm_parse_event_config(drvdata, event); /* And enable it */ - etm_enable_hw(drvdata); - - return 0; + return etm_enable_hw(drvdata); } static int etm_enable_sysfs(struct coresight_device *csdev) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etm_enable_arg arg = { 0 }; int ret; spin_lock(&drvdata->spinlock); @@ -501,20 +514,21 @@ static int etm_enable_sysfs(struct coresight_device *csdev) * hw configuration will take place on the local CPU during bring up. */ if (cpu_online(drvdata->cpu)) { + arg.drvdata = drvdata; ret = smp_call_function_single(drvdata->cpu, - etm_enable_hw, drvdata, 1); - if (ret) - goto err; + etm_enable_hw_smp_call, &arg, 1); + if (!ret) + ret = arg.rc; + if (!ret) + drvdata->sticky_enable = true; + } else { + ret = -ENODEV; } - drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_dbg(drvdata->dev, "ETM tracing enabled\n"); - return 0; - -err: - spin_unlock(&drvdata->spinlock); + if (!ret) + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return ret; } From e60339da8e26c9c5ce864ba38bd198a37f139053 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:09 -0600 Subject: [PATCH 0517/3715] UPSTREAM: coresight: etb10: Handle errors enabling the device Prepare the etb10 driver to return errors in enabling the device. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 62563e84a8c9c682e07b8cce8678e583a24be504). Bug: 140266694 Change-Id: I18bcfdccd016ce09b788a82dab854445f277209e Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index c7d5b54c6e77..020c61c10fea 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -114,7 +114,7 @@ static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata) return depth; } -static void etb_enable_hw(struct etb_drvdata *drvdata) +static void __etb_enable_hw(struct etb_drvdata *drvdata) { int i; u32 depth; @@ -142,6 +142,12 @@ static void etb_enable_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } +static int etb_enable_hw(struct etb_drvdata *drvdata) +{ + __etb_enable_hw(drvdata); + return 0; +} + static int etb_enable_sysfs(struct coresight_device *csdev) { int ret = 0; @@ -160,8 +166,9 @@ static int etb_enable_sysfs(struct coresight_device *csdev) if (drvdata->mode == CS_MODE_SYSFS) goto out; - drvdata->mode = CS_MODE_SYSFS; - etb_enable_hw(drvdata); + ret = etb_enable_hw(drvdata); + if (!ret) + drvdata->mode = CS_MODE_SYSFS; out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -191,8 +198,9 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) if (ret) goto out; - drvdata->mode = CS_MODE_PERF; - etb_enable_hw(drvdata); + ret = etb_enable_hw(drvdata); + if (!ret) + drvdata->mode = CS_MODE_PERF; out: spin_unlock_irqrestore(&drvdata->spinlock, flags); From 1d217cb3b90a8eb29c2f8b38391d75bb9f25fce4 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:10 -0600 Subject: [PATCH 0518/3715] UPSTREAM: coresight: dynamic-replicator: Handle multiple connections When a replicator port is enabled, we block the traffic on the other port and route all traffic to the new enabled port. If there are two active trace sessions each targeting the two different paths from the replicator, the second session will disable the first session and route all the data to the second path. ETR / e.g, replicator \ ETB If CPU0 is operated in sysfs mode to ETR and CPU1 is operated in perf mode to ETB, depending on the order in which the replicator is enabled one device is blocked. Ideally we need trace-id for the session to make the right choice. That implies we need a trace-id allocation logic for the coresight subsystem and use that to route the traffic. The short term solution is to only manage the "target port" and leave the other port untouched. That leaves both the paths unaffected, except that some unwanted traffic may be pushed to the paths (if the Trace-IDs are not far enough), which is still fine and can be filtered out while processing rather than silently blocking the data. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 30af4fb619e5126cb3152072e687b377fc9398d6). Bug: 140266694 Change-Id: Ia880f4e38c469e6d11d90280303edeb0dbafa531 Signed-off-by: Yabin Cui --- .../coresight/coresight-dynamic-replicator.c | 64 ++++++++++++++----- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index fc9e48f5d92e..98e166a0efb1 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -42,26 +42,42 @@ struct replicator_state { struct coresight_device *csdev; }; +/* + * replicator_reset : Reset the replicator configuration to sane values. + */ +static void replicator_reset(struct replicator_state *drvdata) +{ + CS_UNLOCK(drvdata->base); + + writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); + writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + + CS_LOCK(drvdata->base); +} + static int replicator_enable(struct coresight_device *csdev, int inport, int outport) { + u32 reg; struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + switch (outport) { + case 0: + reg = REPLICATOR_IDFILTER0; + break; + case 1: + reg = REPLICATOR_IDFILTER1; + break; + default: + WARN_ON(1); + return -EINVAL; + } + CS_UNLOCK(drvdata->base); - /* - * Ensure that the other port is disabled - * 0x00 - passing through the replicator unimpeded - * 0xff - disable (or impede) the flow of ATB data - */ - if (outport == 0) { - writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER0); - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); - } else { - writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER1); - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); - } + /* Ensure that the outport is enabled. */ + writel_relaxed(0x00, drvdata->base + reg); CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); @@ -71,15 +87,25 @@ static int replicator_enable(struct coresight_device *csdev, int inport, static void replicator_disable(struct coresight_device *csdev, int inport, int outport) { + u32 reg; struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + switch (outport) { + case 0: + reg = REPLICATOR_IDFILTER0; + break; + case 1: + reg = REPLICATOR_IDFILTER1; + break; + default: + WARN_ON(1); + return; + } + CS_UNLOCK(drvdata->base); /* disable the flow of ATB data through port */ - if (outport == 0) - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); - else - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + writel_relaxed(0xff, drvdata->base + reg); CS_LOCK(drvdata->base); @@ -164,7 +190,11 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id) desc.groups = replicator_groups; drvdata->csdev = coresight_register(&desc); - return PTR_ERR_OR_ZERO(drvdata->csdev); + if (!IS_ERR(drvdata->csdev)) { + replicator_reset(drvdata); + return 0; + } + return PTR_ERR(drvdata->csdev); } #ifdef CONFIG_PM From ca4fc6cd1846c4660d220c0b582d203d602353e8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:11 -0600 Subject: [PATCH 0519/3715] UPSTREAM: coresight: Add support for CLAIM tag protocol Coresight architecture defines CLAIM tags for a device to negotiate control of the components (external agent vs self-hosted). Each device has a pair of registers (CLAIMSET & CLAIMCLR) for managing the CLAIM tags. However, the protocol for the CLAIM tags is IMPLEMENTATION DEFINED. PSCI has recommendations for the use of the CLAIM tags to negotiate controls for external agent vs self-hosted use. This patch implements the recommended protocol by PSCI. The claim/disclaim operations are performed from the device specific drivers. The disadvantage is that the calls are sprinkled in each driver, but this makes the operation much simpler. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 2478a6ae4a6a4c8e3f7e9f6f849dffe92e5238e1). Bug: 140266694 Change-Id: I3d28206fa969566df06998727469efe6d9bc0f45 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-priv.h | 7 ++ drivers/hwtracing/coresight/coresight.c | 86 ++++++++++++++++++++ include/linux/coresight.h | 20 +++++ 3 files changed, 113 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index d6c8c355045d..33ceea8c7b99 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -32,6 +32,13 @@ #define CORESIGHT_DEVID 0xfc8 #define CORESIGHT_DEVTYPE 0xfcc + +/* + * Coresight device CLAIM protocol. + * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore. + */ +#define CORESIGHT_CLAIM_SELF_HOSTED BIT(1) + #define TIMEOUT_US 100 #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 70a632b612e4..0a42b2d48983 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -135,6 +135,92 @@ static int coresight_find_link_outport(struct coresight_device *csdev, return -ENODEV; } +static inline u32 coresight_read_claim_tags(void __iomem *base) +{ + return readl_relaxed(base + CORESIGHT_CLAIMCLR); +} + +static inline bool coresight_is_claimed_self_hosted(void __iomem *base) +{ + return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED; +} + +static inline bool coresight_is_claimed_any(void __iomem *base) +{ + return coresight_read_claim_tags(base) != 0; +} + +static inline void coresight_set_claim_tags(void __iomem *base) +{ + writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET); + isb(); +} + +static inline void coresight_clear_claim_tags(void __iomem *base) +{ + writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR); + isb(); +} + +/* + * coresight_claim_device_unlocked : Claim the device for self-hosted usage + * to prevent an external tool from touching this device. As per PSCI + * standards, section "Preserving the execution context" => "Debug and Trace + * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and + * DBGCLAIM[0] is reserved for external tools. + * + * Called with CS_UNLOCKed for the component. + * Returns : 0 on success + */ +int coresight_claim_device_unlocked(void __iomem *base) +{ + if (coresight_is_claimed_any(base)) + return -EBUSY; + + coresight_set_claim_tags(base); + if (coresight_is_claimed_self_hosted(base)) + return 0; + /* There was a race setting the tags, clean up and fail */ + coresight_clear_claim_tags(base); + return -EBUSY; +} + +int coresight_claim_device(void __iomem *base) +{ + int rc; + + CS_UNLOCK(base); + rc = coresight_claim_device_unlocked(base); + CS_LOCK(base); + + return rc; +} + +/* + * coresight_disclaim_device_unlocked : Clear the claim tags for the device. + * Called with CS_UNLOCKed for the component. + */ +void coresight_disclaim_device_unlocked(void __iomem *base) +{ + + if (coresight_is_claimed_self_hosted(base)) + coresight_clear_claim_tags(base); + else + /* + * The external agent may have not honoured our claim + * and has manipulated it. Or something else has seriously + * gone wrong in our driver. + */ + WARN_ON_ONCE(1); +} + +void coresight_disclaim_device(void __iomem *base) +{ + CS_UNLOCK(base); + coresight_disclaim_device_unlocked(base); + CS_LOCK(base); +} + static int coresight_enable_sink(struct coresight_device *csdev, u32 mode, void *data) { diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 9ea129e87d29..0f594e56f5d6 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -271,6 +271,13 @@ extern int coresight_enable(struct coresight_device *csdev); extern void coresight_disable(struct coresight_device *csdev); extern int coresight_timeout(void __iomem *addr, u32 offset, int position, int value); + +extern int coresight_claim_device(void __iomem *base); +extern int coresight_claim_device_unlocked(void __iomem *base); + +extern void coresight_disclaim_device(void __iomem *base); +extern void coresight_disclaim_device_unlocked(void __iomem *base); + #else static inline struct coresight_device * coresight_register(struct coresight_desc *desc) { return NULL; } @@ -280,6 +287,19 @@ coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } static inline void coresight_disable(struct coresight_device *csdev) {} static inline int coresight_timeout(void __iomem *addr, u32 offset, int position, int value) { return 1; } +static inline int coresight_claim_device_unlocked(void __iomem *base) +{ + return -EINVAL; +} + +static inline int coresight_claim_device(void __iomem *base) +{ + return -EINVAL; +} + +static inline void coresight_disclaim_device(void __iomem *base) {} +static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} + #endif #ifdef CONFIG_OF From fbbe9f4ed59863d2f840614d4edcec39f92721bc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:12 -0600 Subject: [PATCH 0520/3715] UPSTREAM: coresight: etmx: Claim devices before use Use the CLAIM tags to grab the device for self-hosted usage. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 68a147752d04da73e2786890d78317fc37e6a6a8). Bug: 140266694 Change-Id: Iea73b23aed3cee51ad1fb9d5c3158f6c940b1dde Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm3x.c | 16 +++++++++++++--- drivers/hwtracing/coresight/coresight-etm4x.c | 14 +++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index d8cfd76a53b9..80a2b32da813 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -366,7 +366,7 @@ static int etm_parse_event_config(struct etm_drvdata *drvdata, static int etm_enable_hw(struct etm_drvdata *drvdata) { - int i; + int i, rc; u32 etmcr; struct etm_config *config = &drvdata->config; @@ -378,6 +378,9 @@ static int etm_enable_hw(struct etm_drvdata *drvdata) etm_set_pwrup(drvdata); /* Make sure all registers are accessible */ etm_os_unlock(drvdata); + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; etm_set_prog(drvdata); @@ -426,10 +429,15 @@ static int etm_enable_hw(struct etm_drvdata *drvdata) etm_writel(drvdata, 0x0, ETMVMIDCVR); etm_clr_prog(drvdata); + +done: + if (rc) + etm_set_pwrdwn(drvdata); CS_LOCK(drvdata->base); - dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); - return 0; + dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n", + drvdata->cpu, rc); + return rc; } struct etm_enable_arg { @@ -578,6 +586,8 @@ static void etm_disable_hw(void *info) for (i = 0; i < drvdata->nr_cntr; i++) config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i)); + coresight_disclaim_device_unlocked(drvdata->base); + etm_set_pwrdwn(drvdata); CS_LOCK(drvdata->base); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 732c7070c908..d843b240583a 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -93,13 +93,17 @@ struct etm4_enable_arg { static int etm4_enable_hw(struct etmv4_drvdata *drvdata) { - int i; + int i, rc; struct etmv4_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); etm4_os_unlock(drvdata); + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; + /* Disable the trace unit before programming trace registers */ writel_relaxed(0, drvdata->base + TRCPRGCTLR); @@ -193,10 +197,12 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) dsb(sy); isb(); +done: CS_LOCK(drvdata->base); - dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); - return 0; + dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n", + drvdata->cpu, rc); + return rc; } static void etm4_enable_hw_smp_call(void *info) @@ -360,6 +366,8 @@ static void etm4_disable_hw(void *info) isb(); writel_relaxed(control, drvdata->base + TRCPRGCTLR); + coresight_disclaim_device_unlocked(drvdata->base); + CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu); From 150a752645df0cc6bf809232f92ba30947e365cc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:13 -0600 Subject: [PATCH 0521/3715] UPSTREAM: coresight: funnel: Claim devices before use Use the CLAIM protocol to grab the ownership of the component. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit f13d7c0835c383e318e28cc9f34b002fc8c85f61). Bug: 140266694 Change-Id: I2fb07a5a61ac387175f8aafd95487e48679b55e9 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-funnel.c | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index b1e74fa79635..ebea26b78975 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -32,6 +32,7 @@ #define FUNNEL_HOLDTIME_MASK 0xf00 #define FUNNEL_HOLDTIME_SHFT 0x8 #define FUNNEL_HOLDTIME (0x7 << FUNNEL_HOLDTIME_SHFT) +#define FUNNEL_ENSx_MASK 0xff /** * struct funnel_drvdata - specifics associated to a funnel component @@ -49,31 +50,42 @@ struct funnel_drvdata { unsigned long priority; }; -static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port) +static int funnel_enable_hw(struct funnel_drvdata *drvdata, int port) { u32 functl; + int rc = 0; CS_UNLOCK(drvdata->base); functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL); + /* Claim the device only when we enable the first slave */ + if (!(functl & FUNNEL_ENSx_MASK)) { + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; + } + functl &= ~FUNNEL_HOLDTIME_MASK; functl |= FUNNEL_HOLDTIME; functl |= (1 << port); writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL); - +done: CS_LOCK(drvdata->base); + return rc; } static int funnel_enable(struct coresight_device *csdev, int inport, int outport) { + int rc; struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - funnel_enable_hw(drvdata, inport); + rc = funnel_enable_hw(drvdata, inport); - dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); - return 0; + if (!rc) + dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); + return rc; } static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) @@ -86,6 +98,10 @@ static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) functl &= ~(1 << inport); writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); + /* Disclaim the device if none of the slaves are now active */ + if (!(functl & FUNNEL_ENSx_MASK)) + coresight_disclaim_device_unlocked(drvdata->base); + CS_LOCK(drvdata->base); } From d10e7c62dc3b6cd83bed27cbaf179eefe81d08a2 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:15 -0600 Subject: [PATCH 0522/3715] UPSTREAM: coresight: dynamic-replicator: Claim device for use Use CLAIM protocol to make sure the device is available for use. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 4e33d694376e7f57d788e734dc96fdbd3c408768). Bug: 140266694 Change-Id: I8b033f4a8093bc88d35dc8b4969706051acc789b Signed-off-by: Yabin Cui --- .../coresight/coresight-dynamic-replicator.c | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index 98e166a0efb1..3fe7f0ca7cf5 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -49,8 +49,11 @@ static void replicator_reset(struct replicator_state *drvdata) { CS_UNLOCK(drvdata->base); - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + if (!coresight_claim_device_unlocked(drvdata->base)) { + writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); + writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + coresight_disclaim_device_unlocked(drvdata->base); + } CS_LOCK(drvdata->base); } @@ -58,6 +61,7 @@ static void replicator_reset(struct replicator_state *drvdata) static int replicator_enable(struct coresight_device *csdev, int inport, int outport) { + int rc = 0; u32 reg; struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -75,13 +79,19 @@ static int replicator_enable(struct coresight_device *csdev, int inport, CS_UNLOCK(drvdata->base); + if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) && + (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff)) + rc = coresight_claim_device_unlocked(drvdata->base); /* Ensure that the outport is enabled. */ - writel_relaxed(0x00, drvdata->base + reg); + if (!rc) { + writel_relaxed(0x00, drvdata->base + reg); + dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); + } + CS_LOCK(drvdata->base); - dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); - return 0; + return rc; } static void replicator_disable(struct coresight_device *csdev, int inport, @@ -107,6 +117,9 @@ static void replicator_disable(struct coresight_device *csdev, int inport, /* disable the flow of ATB data through port */ writel_relaxed(0xff, drvdata->base + reg); + if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) && + (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff)) + coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); From 73afa32f5b62ebf07bf84970e4f4464f3c8adbd7 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:18:16 -0600 Subject: [PATCH 0523/3715] UPSTREAM: coreisght: tmc: Claim device before use Use CLAIM tags to make sure the device is available for use. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 4d3ebd3658d8b87d8ead979725305adc6fae3855). Bug: 140266694 Change-Id: I90dad9f4cc25e21192eba9f647b49a490ae69e16 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etf.c | 22 ++++++++++++++++--- .../hwtracing/coresight/coresight-tmc-etr.c | 4 ++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 763eac6dcaae..a3d3f1bf5913 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -47,6 +47,11 @@ static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata) static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { + int rc = coresight_claim_device(drvdata->base); + + if (rc) + return rc; + __tmc_etb_enable_hw(drvdata); return 0; } @@ -77,7 +82,7 @@ done: return; } -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -93,6 +98,12 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +{ + coresight_disclaim_device(drvdata); + __tmc_etb_disable_hw(drvdata); +} + static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -111,6 +122,11 @@ static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata) static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata) { + int rc = coresight_claim_device(drvdata->base); + + if (rc) + return rc; + __tmc_etf_enable_hw(drvdata); return 0; } @@ -121,7 +137,7 @@ static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) tmc_flush_and_stop(drvdata); tmc_disable_hw(drvdata); - + coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); } @@ -567,7 +583,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) /* Disable the TMC if need be */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etb_disable_hw(drvdata); + __tmc_etb_disable_hw(drvdata); drvdata->reading = true; out: diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 956401b6a736..592e979a4eea 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1002,6 +1002,9 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata, * this on. */ rc = tmc_etr_enable_catu(drvdata, etr_buf); + if (rc) + return rc; + rc = coresight_claim_device(drvdata->base); if (!rc) { drvdata->etr_buf = etr_buf; __tmc_etr_enable_hw(drvdata); @@ -1088,6 +1091,7 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) __tmc_etr_disable_hw(drvdata); /* Disable CATU device if this ETR is connected to one */ tmc_etr_disable_catu(drvdata); + coresight_disclaim_device(drvdata->base); /* Reset the ETR buf used by hardware */ drvdata->etr_buf = NULL; } From ebc9e6aab2e82134bcd385199d473f98faf7413c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 30 Nov 2018 11:43:03 -0700 Subject: [PATCH 0524/3715] UPSTREAM: coresight: etb10: Add support for CLAIM tag Following in the footstep of what was done for other CoreSight devices, add CLAIM tag support to ETB10 in order to synchronise access to the HW between the kernel and an external agent. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit acaf5a06b9718ca3499ccd0b6fd9ec461cd53554). Bug: 140266694 Change-Id: I7f66ad4cff09410c321a6711367e66612639856f Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 020c61c10fea..d73197f55692 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -144,6 +144,11 @@ static void __etb_enable_hw(struct etb_drvdata *drvdata) static int etb_enable_hw(struct etb_drvdata *drvdata) { + int rc = coresight_claim_device(drvdata->base); + + if (rc) + return rc; + __etb_enable_hw(drvdata); return 0; } @@ -231,7 +236,7 @@ static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) return 0; } -static void etb_disable_hw(struct etb_drvdata *drvdata) +static void __etb_disable_hw(struct etb_drvdata *drvdata) { u32 ffcr; @@ -321,6 +326,13 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } +static void etb_disable_hw(struct etb_drvdata *drvdata) +{ + __etb_disable_hw(drvdata); + etb_dump_hw(drvdata); + coresight_disclaim_device(drvdata->base); +} + static void etb_disable(struct coresight_device *csdev) { struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -331,7 +343,6 @@ static void etb_disable(struct coresight_device *csdev) /* Disable the ETB only if it needs to */ if (drvdata->mode != CS_MODE_DISABLED) { etb_disable_hw(drvdata); - etb_dump_hw(drvdata); drvdata->mode = CS_MODE_DISABLED; } spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -410,7 +421,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS; - etb_disable_hw(drvdata); + __etb_disable_hw(drvdata); CS_UNLOCK(drvdata->base); /* unit is in words, not bytes */ @@ -518,7 +529,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, handle->head = (cur * PAGE_SIZE) + offset; to_read = buf->nr_pages << PAGE_SHIFT; } - etb_enable_hw(drvdata); + __etb_enable_hw(drvdata); CS_LOCK(drvdata->base); return to_read; @@ -542,9 +553,9 @@ static void etb_dump(struct etb_drvdata *drvdata) spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->mode == CS_MODE_SYSFS) { - etb_disable_hw(drvdata); + __etb_disable_hw(drvdata); etb_dump_hw(drvdata); - etb_enable_hw(drvdata); + __etb_enable_hw(drvdata); } spin_unlock_irqrestore(&drvdata->spinlock, flags); From b1883d534c4e205f3e77c8ff8dbd7d83c54d6d2c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 5 Feb 2019 16:24:56 -0700 Subject: [PATCH 0525/3715] BACKPORT: perf/aux: Make perf_event accessible to setup_aux() Backport: drop changes to perf_cpum_sf.c and arm_spe_pmu.c as setup_aux() hasn't been used in them. When pmu::setup_aux() is called the coresight PMU needs to know which sink to use for the session by looking up the information in the event's attr::config2 field. As such simply replace the cpu information by the complete perf_event structure and change all affected customers. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Acked-by: Peter Zijlstra (Intel) Signed-off-by: Greg Kroah-Hartman (Upstream commit e11a5795cb7cd1e25bbd1697baa109943938c0f6). Bug: 140266694 Change-Id: I29dd9ff70e84372cd02b6e65efe25323c0ed7ba0 Signed-off-by: Yabin Cui --- arch/x86/events/intel/bts.c | 4 +++- arch/x86/events/intel/pt.c | 5 +++-- drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++--- include/linux/perf_event.h | 2 +- kernel/events/ring_buffer.c | 2 +- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 24ffa1e88cf9..7139f6bf27ad 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -77,10 +77,12 @@ static size_t buf_size(struct page *page) } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 0661227d935c..7f7e03bf38cf 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1104,10 +1104,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index b6c70a8de2f6..aa8d2f4ef9ef 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -188,15 +188,15 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 987573d4c05c..b2c234a496f5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -408,7 +408,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f3a69a4f0d57..20f6a50fb40b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -668,7 +668,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; From 09d6e5b552cd55400c3daa18a6daad00161a6001 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 5 Feb 2019 16:24:57 -0700 Subject: [PATCH 0526/3715] UPSTREAM: coresight: perf: Add "sinks" group to PMU directory Add a "sinks" directory entry so that users can see all the sinks available in the system in a single place. Individual sink are added as they are registered with the coresight bus. Signed-off-by: Mathieu Poirier Acked-by: Peter Zijlstra (Intel) Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit 988036f9d322cbd787d8f6a776dbe903d05bae22). Bug: 140266694 Change-Id: Ida34e7f137b9d7e9094e603408ed7cca8be0fbd5 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 82 +++++++++++++++++++ .../hwtracing/coresight/coresight-etm-perf.h | 6 +- drivers/hwtracing/coresight/coresight.c | 18 ++++ include/linux/coresight.h | 7 +- 4 files changed, 110 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index aa8d2f4ef9ef..b27c01c3f158 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -54,8 +55,18 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; +static struct attribute *etm_config_sinks_attr[] = { + NULL, +}; + +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, + &etm_pmu_sinks_group, NULL, }; @@ -497,6 +508,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *pdev = csdev->dev.parent; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(pdev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index ab7732f98484..56ae1e68c0eb 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -70,6 +70,8 @@ struct etm_event_data { #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { struct etm_event_data *data = perf_get_aux(handle); @@ -81,7 +83,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle) #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } - +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { return NULL; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 0a42b2d48983..daacd65fa942 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -25,6 +25,7 @@ #include #include +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -1206,6 +1207,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) goto err_out; } + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } + mutex_lock(&coresight_mutex); coresight_fixup_device_conns(csdev); @@ -1226,6 +1243,7 @@ EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 0f594e56f5d6..e68cc604c965 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -168,8 +168,9 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; @@ -182,7 +183,9 @@ struct coresight_device { atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) From 6db42003ce821b0aee9c90fb95ee7ed43c98cbb3 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 5 Feb 2019 16:24:58 -0700 Subject: [PATCH 0527/3715] UPSTREAM: coresight: Use event attributes for sink selection This patch uses the information conveyed by perf_event::attr::config2 to select a sink to use for the session. That way a sink can easily be selected to be used by more than one source, something that isn't currently possible with the sysfs implementation. Signed-off-by: Mathieu Poirier Acked-by: Peter Zijlstra (Intel) Reviewed-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit b5390f4b5e0b75634c7f08c88e97b5fe0e833599). Bug: 140266694 Change-Id: Ic155e8c63d805e4bd34cfca00f43b68eebce4360 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 24 +++++------ drivers/hwtracing/coresight/coresight-priv.h | 1 + drivers/hwtracing/coresight/coresight.c | 42 +++++++++++++++++++ 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index b27c01c3f158..60fa6cc3f644 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -42,11 +42,14 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src); PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); +/* Sink ID - same for all ETMs */ +PMU_FORMAT_ATTR(sinkid, "config2:0-31"); static struct attribute *etm_config_formats_attr[] = { &format_attr_cycacc.attr, &format_attr_timestamp.attr, &format_attr_retstack.attr, + &format_attr_sinkid.attr, NULL, }; @@ -202,6 +205,7 @@ static void etm_free_aux(void *data) static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { + u32 id; int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; @@ -212,18 +216,14 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, return NULL; INIT_WORK(&event_data->work, free_event_data); - /* - * In theory nothing prevent tracers in a trace session from being - * associated with different sinks, nor having a sink per tracer. But - * until we have HW with this kind of topology we need to assume tracers - * in a trace session are using the same sink. Therefore go through - * the coresight bus and pick the first enabled sink. - * - * When operated from sysFS users are responsible to enable the sink - * while from perf, the perf tools will do it based on the choice made - * on the cmd line. As such the "enable_sink" flag in sysFS is reset. - */ - sink = coresight_get_enabled_sink(true); + /* First get the selected sink from user space. */ + if (event->attr.config2) { + id = (u32)event->attr.config2; + sink = coresight_get_sink_by_id(id); + } else { + sink = coresight_get_enabled_sink(true); + } + if (!sink || !sink_ops(sink)->alloc_buffer) goto err; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 33ceea8c7b99..71b4c37fe88c 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -154,6 +154,7 @@ void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); struct coresight_device *coresight_get_enabled_sink(bool reset); +struct coresight_device *coresight_get_sink_by_id(u32 id); struct list_head *coresight_build_path(struct coresight_device *csdev, struct coresight_device *sink); void coresight_release_path(struct list_head *path); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index daacd65fa942..457a9cd1e14b 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -548,6 +549,47 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate) return dev ? to_coresight_device(dev) : NULL; } +static int coresight_sink_by_id(struct device *dev, void *data) +{ + struct coresight_device *csdev = to_coresight_device(dev); + unsigned long hash; + + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + + if (!csdev->ea) + return 0; + /* + * See function etm_perf_add_symlink_sink() to know where + * this comes from. + */ + hash = (unsigned long)csdev->ea->var; + + if ((u32)hash == *(u32 *)data) + return 1; + } + + return 0; +} + +/** + * coresight_get_sink_by_id - returns the sink that matches the id + * @id: Id of the sink to match + * + * The name of a sink is unique, whether it is found on the AMBA bus or + * otherwise. As such the hash of that name can easily be used to identify + * a sink. + */ +struct coresight_device *coresight_get_sink_by_id(u32 id) +{ + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &id, + coresight_sink_by_id); + + return dev ? to_coresight_device(dev) : NULL; +} + /* * coresight_grab_device - Power up this device and any of the helper * devices connected to it for trace operation. Since the helper devices From 21561f0aacedb6892c017b6d1fd2cd1b15e65dc7 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 15 Feb 2019 13:56:54 +0200 Subject: [PATCH 0528/3715] BACKPORT: perf: Copy parent's address filter offsets on clone Backport: context code is different from upstream. When a child event is allocated in the inherit_event() path, the VMA based filter offsets are not copied from the parent, even though the address space mapping of the new task remains the same, which leads to no trace for the new task until exec. Reported-by: Mansour Alharthi Signed-off-by: Alexander Shishkin Tested-by: Mathieu Poirier Acked-by: Peter Zijlstra Cc: Jiri Olsa Fixes: 375637bc5249 ("perf/core: Introduce address range filtering") Link: http://lkml.kernel.org/r/20190215115655.63469-2-alexander.shishkin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo (Upstream commit 18736eef12137c59f60cc9f56dc5bea05c92e0eb). Bug: 140266694 Change-Id: I02acd373cb1ce44c13de8806eff229a56f0062fc Signed-off-by: Yabin Cui --- kernel/events/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd1cd4d6e0e..844e53552135 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1222,6 +1222,7 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::lock * perf_event::mmap_mutex * mmap_sem + * perf_addr_filters_head::lock */ static struct perf_event_context * perf_event_ctx_lock_nested(struct perf_event *event, int nesting) @@ -9658,6 +9659,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_per_task; } + /* + * Clone the parent's vma offsets: they are valid until exec() + * even if the mm is not shared with the parent. + */ + if (event->parent) { + struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); + + raw_spin_lock_irq(&ifh->lock); + memcpy(event->addr_filters_offs, + event->parent->addr_filters_offs, + pmu->nr_addr_filters * sizeof(unsigned long)); + raw_spin_unlock_irq(&ifh->lock); + } + /* force hw sync on the address filters */ event->addr_filters_gen = 1; } From dc7366c44da4363ed2e9c10f34c3ca6d04c7538b Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 15 Feb 2019 13:56:55 +0200 Subject: [PATCH 0529/3715] UPSTREAM: perf, pt, coresight: Fix address filters for vmas with non-zero offset Currently, the address range calculation for file-based filters works as long as the vma that maps the matching part of the object file starts from offset zero into the file (vm_pgoff==0). Otherwise, the resulting filter range would be off by vm_pgoff pages. Another related problem is that in case of a partially matching vma, that is, a vma that matches part of a filter region, the filter range size wouldn't be adjusted. Fix the arithmetics around address filter range calculations, taking into account vma offset, so that the entire calculation is done before the filter configuration is passed to the PMU drivers instead of having those drivers do the final bit of arithmetics. Based on the patch by Adrian Hunter . Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Tested-by: Mathieu Poirier Acked-by: Peter Zijlstra Cc: Jiri Olsa Fixes: 375637bc5249 ("perf/core: Introduce address range filtering") Link: http://lkml.kernel.org/r/20190215115655.63469-3-alexander.shishkin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo (Upstream commit c60f83b813e5b25ccd5de7e8c8925c31b3aebcc1). Bug: 140266694 Change-Id: If1c57d0aa69d89280d2d751acf4f6755cf4ae609 Signed-off-by: Yabin Cui --- arch/x86/events/intel/pt.c | 9 ++- .../hwtracing/coresight/coresight-etm-perf.c | 7 +- include/linux/perf_event.h | 7 +- kernel/events/core.c | 81 +++++++++++-------- 4 files changed, 62 insertions(+), 42 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 7f7e03bf38cf..5af568cc4c8f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1209,7 +1209,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters) static void pt_event_addr_filters_sync(struct perf_event *event) { struct perf_addr_filters_head *head = perf_event_addr_filters(event); - unsigned long msr_a, msr_b, *offs = event->addr_filters_offs; + unsigned long msr_a, msr_b; + struct perf_addr_filter_range *fr = event->addr_filter_ranges; struct pt_filters *filters = event->hw.addr_filters; struct perf_addr_filter *filter; int range = 0; @@ -1218,12 +1219,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event) return; list_for_each_entry(filter, &head->list, entry) { - if (filter->path.dentry && !offs[range]) { + if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { /* apply the offset */ - msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a - 1; + msr_a = fr[range].start; + msr_b = msr_a + fr[range].size - 1; } filters->filter[range].msr_a = msr_a; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 60fa6cc3f644..8207a8032c55 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -453,15 +453,16 @@ static int etm_addr_filters_validate(struct list_head *filters) static void etm_addr_filters_sync(struct perf_event *event) { struct perf_addr_filters_head *head = perf_event_addr_filters(event); - unsigned long start, stop, *offs = event->addr_filters_offs; + unsigned long start, stop; + struct perf_addr_filter_range *fr = event->addr_filter_ranges; struct etm_filters *filters = event->hw.addr_filters; struct etm_filter *etm_filter; struct perf_addr_filter *filter; int i = 0; list_for_each_entry(filter, &head->list, entry) { - start = filter->offset + offs[i]; - stop = start + filter->size; + start = fr[i].start; + stop = start + fr[i].size; etm_filter = &filters->etm_filter[i]; if (filter->range == 1) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b2c234a496f5..ac16bac38c03 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -489,6 +489,11 @@ struct perf_addr_filters_head { unsigned int nr_file_filters; }; +struct perf_addr_filter_range { + unsigned long start; + unsigned long size; +}; + /** * enum perf_event_active_state - the states of a event */ @@ -677,7 +682,7 @@ struct perf_event { /* address range filters */ struct perf_addr_filters_head addr_filters; /* vma address array for file-based filders */ - unsigned long *addr_filters_offs; + struct perf_addr_filter_range *addr_filter_ranges; unsigned long addr_filters_gen; void (*destroy)(struct perf_event *); diff --git a/kernel/events/core.c b/kernel/events/core.c index 844e53552135..09973476da9e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2729,7 +2729,7 @@ static int perf_event_stop(struct perf_event *event, int restart) * * (p1) when userspace mappings change as a result of (1) or (2) or (3) below, * we update the addresses of corresponding vmas in - * event::addr_filters_offs array and bump the event::addr_filters_gen; + * event::addr_filter_ranges array and bump the event::addr_filters_gen; * (p2) when an event is scheduled in (pmu::add), it calls * perf_event_addr_filters_sync() which calls pmu::addr_filters_sync() * if the generation has changed since the previous call. @@ -4238,7 +4238,7 @@ static void _free_event(struct perf_event *event) perf_event_free_bpf_prog(event); perf_addr_filters_splice(event, NULL); - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); if (event->destroy) event->destroy(event); @@ -6437,7 +6437,8 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { if (filter->path.dentry) { - event->addr_filters_offs[count] = 0; + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; restart++; } @@ -7115,28 +7116,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, return true; } +static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter, + struct vm_area_struct *vma, + struct perf_addr_filter_range *fr) +{ + unsigned long vma_size = vma->vm_end - vma->vm_start; + unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + struct file *file = vma->vm_file; + + if (!perf_addr_filter_match(filter, file, off, vma_size)) + return false; + + if (filter->offset < off) { + fr->start = vma->vm_start; + fr->size = min(vma_size, filter->size - (off - filter->offset)); + } else { + fr->start = vma->vm_start + filter->offset - off; + fr->size = min(vma->vm_end - fr->start, filter->size); + } + + return true; +} + static void __perf_addr_filters_adjust(struct perf_event *event, void *data) { struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); struct vm_area_struct *vma = data; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags; - struct file *file = vma->vm_file; struct perf_addr_filter *filter; unsigned int restart = 0, count = 0; + unsigned long flags; if (!has_addr_filter(event)) return; - if (!file) + if (!vma->vm_file) return; raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (perf_addr_filter_match(filter, file, off, - vma->vm_end - vma->vm_start)) { - event->addr_filters_offs[count] = vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, + &event->addr_filter_ranges[count])) restart++; - } count++; } @@ -8362,26 +8382,19 @@ static void perf_addr_filters_splice(struct perf_event *event, * @filter; if so, adjust filter's address range. * Called with mm::mmap_sem down for reading. */ -static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter, - struct mm_struct *mm) +static void perf_addr_filter_apply(struct perf_addr_filter *filter, + struct mm_struct *mm, + struct perf_addr_filter_range *fr) { struct vm_area_struct *vma; for (vma = mm->mmap; vma; vma = vma->vm_next) { - struct file *file = vma->vm_file; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; - unsigned long vma_size = vma->vm_end - vma->vm_start; - - if (!file) + if (!vma->vm_file) continue; - if (!perf_addr_filter_match(filter, file, off, vma_size)) - continue; - - return vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, fr)) + return; } - - return 0; } /* @@ -8415,15 +8428,15 @@ static void perf_event_addr_filters_apply(struct perf_event *event) raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filters_offs[count] = 0; + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; /* * Adjust base offset if the filter is associated to a binary * that needs to be mapped: */ if (filter->path.dentry) - event->addr_filters_offs[count] = - perf_addr_filter_apply(filter, mm); + perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); count++; } @@ -9651,10 +9664,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_pmu; if (has_addr_filter(event)) { - event->addr_filters_offs = kcalloc(pmu->nr_addr_filters, - sizeof(unsigned long), - GFP_KERNEL); - if (!event->addr_filters_offs) { + event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters, + sizeof(struct perf_addr_filter_range), + GFP_KERNEL); + if (!event->addr_filter_ranges) { err = -ENOMEM; goto err_per_task; } @@ -9667,9 +9680,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); raw_spin_lock_irq(&ifh->lock); - memcpy(event->addr_filters_offs, - event->parent->addr_filters_offs, - pmu->nr_addr_filters * sizeof(unsigned long)); + memcpy(event->addr_filter_ranges, + event->parent->addr_filter_ranges, + pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range)); raw_spin_unlock_irq(&ifh->lock); } @@ -9691,7 +9704,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, return event; err_addr_filters: - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); err_per_task: exclusive_event_destroy(event); From c5c70b9818c2d70903a4d0306238f2e96d4ab836 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 29 Mar 2019 11:12:12 +0200 Subject: [PATCH 0530/3715] UPSTREAM: perf/core: Fix the address filtering fix The following recent commit: c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset") changes the address filtering logic to communicate filter ranges to the PMU driver via a single address range object, instead of having the driver do the final bit of math. That change forgets to take into account kernel filters, which are not calculated the same way as DSO based filters. Fix that by passing the kernel filters the same way as file-based filters. This doesn't require any additional changes in the drivers. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset") Link: https://lkml.kernel.org/r/20190329091212.29870-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar (Upstream commit 52a44f83fc2d64a5e74d5d685fad2fecc7b7a321). Bug: 140266694 Change-Id: I9024fd777e10b992ea7ad89c565204d08d1a26c8 Signed-off-by: Yabin Cui --- kernel/events/core.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 09973476da9e..121544ef3371 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8417,26 +8417,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (task == TASK_TOMBSTONE) return; - if (!ifh->nr_file_filters) - return; + if (ifh->nr_file_filters) { + mm = get_task_mm(event->ctx->task); + if (!mm) + goto restart; - mm = get_task_mm(event->ctx->task); - if (!mm) - goto restart; - - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + } raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filter_ranges[count].start = 0; - event->addr_filter_ranges[count].size = 0; + if (filter->path.dentry) { + /* + * Adjust base offset if the filter is associated to a + * binary that needs to be mapped: + */ + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; - /* - * Adjust base offset if the filter is associated to a binary - * that needs to be mapped: - */ - if (filter->path.dentry) perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); + } else { + event->addr_filter_ranges[count].start = filter->offset; + event->addr_filter_ranges[count].size = filter->size; + } count++; } @@ -8444,9 +8447,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) event->addr_filters_gen++; raw_spin_unlock_irqrestore(&ifh->lock, flags); - up_read(&mm->mmap_sem); + if (ifh->nr_file_filters) { + up_read(&mm->mmap_sem); - mmput(mm); + mmput(mm); + } restart: perf_event_stop(event, 1); From 3804fb8a2587759175359825f91bd6661d224ded Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Apr 2019 13:52:39 -0600 Subject: [PATCH 0531/3715] UPSTREAM: coresight: catu: fix clang build warning Clang points out a syntax error, as the etr_catu_buf_ops structure is declared 'static' before the type is known: In file included from drivers/hwtracing/coresight/coresight-tmc-etr.c:12: drivers/hwtracing/coresight/coresight-catu.h:116:40: warning: tentative definition of variable with internal linkage has incomplete non-array type 'const struct etr_buf_operations' [-Wtentative-definition-incomplete-type] static const struct etr_buf_operations etr_catu_buf_ops; ^ drivers/hwtracing/coresight/coresight-catu.h:116:21: note: forward declaration of 'struct etr_buf_operations' static const struct etr_buf_operations etr_catu_buf_ops; This seems worth fixing in the code, so replace pointer to the empty constant structure with a NULL pointer. We need an extra NULL pointer check here, but the result should be better object code otherwise, avoiding the silly empty structure. Fixes: 434d611cddef ("coresight: catu: Plug in CATU as a backend for ETR buffer") Signed-off-by: Arnd Bergmann Reviewed-by: Suzuki K Poulose [Fixed line over 80 characters] Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 59d63de076607a9334b11628b5c3ddda1d8f56cd). Bug: 140266694 Change-Id: Ia40a6c5cd1448ea81d2a07ff93c98526f6be038e Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-catu.h | 5 ----- drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h index 1b281f0dcccc..1d2ad183fd92 100644 --- a/drivers/hwtracing/coresight/coresight-catu.h +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -109,11 +109,6 @@ static inline bool coresight_is_catu_device(struct coresight_device *csdev) return true; } -#ifdef CONFIG_CORESIGHT_CATU extern const struct etr_buf_operations etr_catu_buf_ops; -#else -/* Dummy declaration for the CATU ops */ -static const struct etr_buf_operations etr_catu_buf_ops; -#endif #endif diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 592e979a4eea..ad1acea60957 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -783,7 +783,8 @@ static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) static const struct etr_buf_operations *etr_buf_ops[] = { [ETR_MODE_FLAT] = &etr_flat_buf_ops, [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, - [ETR_MODE_CATU] = &etr_catu_buf_ops, + [ETR_MODE_CATU] = IS_ENABLED(CONFIG_CORESIGHT_CATU) + ? &etr_catu_buf_ops : NULL, }; static inline int tmc_etr_mode_alloc_buf(int mode, @@ -797,7 +798,7 @@ static inline int tmc_etr_mode_alloc_buf(int mode, case ETR_MODE_FLAT: case ETR_MODE_ETR_SG: case ETR_MODE_CATU: - if (etr_buf_ops[mode]->alloc) + if (etr_buf_ops[mode] && etr_buf_ops[mode]->alloc) rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, node, pages); if (!rc) From fbbc71a62a644f35cdedf8b1274dcd01b425e6ac Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 25 Apr 2019 13:52:41 -0600 Subject: [PATCH 0532/3715] UPSTREAM: coresight: tmc: Report DMA setup failures If we failed to setup the DMA mask for TMC-ETR, report the error before failing the probe. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 08be874775f13a0acffff864440d10b8b1397912). Bug: 140266694 Change-Id: I1e8a32189eef2789a73cc76105ebbace88e9c7e6 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index dd7fb2782722..935c05f6f4c0 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -348,6 +348,8 @@ static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata) static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, u32 devid, void *dev_caps) { + int rc; + u32 dma_mask = 0; /* Set the unadvertised capabilities */ @@ -377,7 +379,10 @@ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, dma_mask = 40; } - return dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask)); + rc = dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask)); + if (rc) + dev_err(drvdata->dev, "Failed to setup DMA mask: %d\n", rc); + return rc; } static int tmc_probe(struct amba_device *adev, const struct amba_id *id) From a88f3f88e8bf27d76d82969e02279a18af306b5d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 25 Apr 2019 13:52:46 -0600 Subject: [PATCH 0533/3715] UPSTREAM: coresight: Fix freeing up the coresight connections With commit c2c729415b2d2132 ("coresight: platform: Cleanup coresight connection handling"), we switched to re-using coresight_connections for the coresight_device. However, that introduced a mismatch in the alloc/free of the connections. The allocation is made using devm_*, while we use kfree() to release the memory when a device is released (even though we don't support this at the moment). Fix this by leaving it to the automatic freeing of the memory. Fixes: c2c729415b2d2132 ("coresight: platform: Cleanup coresight connection handling") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 1b015ef28a443a89985db2a67080bf7c660d66d2). Bug: 140266694 Change-Id: I7905bc3200376e85d2121d4ffdf2a2f3c26ad07c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 457a9cd1e14b..d3fac9b1eb2d 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -980,7 +980,6 @@ static void coresight_device_release(struct device *dev) { struct coresight_device *csdev = to_coresight_device(dev); - kfree(csdev->conns); kfree(csdev->refcnt); kfree(csdev); } From 81491131b8e9c08ee3048bab61e9ba5dc5fda993 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 25 Apr 2019 13:52:50 -0600 Subject: [PATCH 0534/3715] UPSTREAM: coresight: tmc: Cleanup power management Drop the power only if we were successful in probing the device. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 263556950f2ad764bad4f98fbc5a9d711fd653a6). Bug: 140266694 Change-Id: I8f53b3ef0d438b01c53e8132360f776b0171704e Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 935c05f6f4c0..b2d3926b8bfe 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -440,8 +440,6 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4; } - pm_runtime_put(&adev->dev); - desc.pdata = pdata; desc.dev = dev; desc.groups = coresight_tmc_groups; @@ -483,6 +481,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) ret = misc_register(&drvdata->miscdev); if (ret) coresight_unregister(drvdata->csdev); + else + pm_runtime_put(&adev->dev); out: return ret; } From 8fed41fab1a7afaa2af7f5eb81b9157c02666fb3 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:51 -0600 Subject: [PATCH 0535/3715] UPSTREAM: coresight: pmu: Adding ITRACE property to cs_etm PMU Add to the capabilities the ITRACE property so that ITRACE START events are generated when the PMU is switched on by the core. Signed-off-by: Mathieu Poirier Acked-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 6fcdba33ab0d43e5fcb557897c1c81541927f615). Bug: 140266694 Change-Id: Ib1db39cf698c22e0ab7e12a08c8680df777ce09c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8207a8032c55..a41038856ad0 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -584,7 +584,8 @@ static int __init etm_perf_init(void) { int ret; - etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE; + etm_pmu.capabilities = (PERF_PMU_CAP_EXCLUSIVE | + PERF_PMU_CAP_ITRACE); etm_pmu.attr_groups = etm_pmu_attr_groups; etm_pmu.task_ctx_nr = perf_sw_context; From be6982983f7349619fcb934e2af1757c09512db9 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:52 -0600 Subject: [PATCH 0536/3715] UPSTREAM: coresight: etm4x: Add kernel configuration for CONTEXTID Set the proper bit in the configuration register when contextID tracing has been requested by user space. That way PE_CONTEXT elements are generated by the tracers when a process is installed on a CPU. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 82500a810ee26ac542d128499d7adae163e61adb). Bug: 140266694 Change-Id: Icc9286f22880d2a9cf1af521ccb13c9f89427444 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/Kconfig | 1 + drivers/hwtracing/coresight/coresight-etm-perf.c | 2 ++ drivers/hwtracing/coresight/coresight-etm4x.c | 5 +++++ include/linux/coresight-pmu.h | 2 ++ tools/include/linux/coresight-pmu.h | 2 ++ 5 files changed, 12 insertions(+) diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index ad34380cac49..44d1650f398e 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -75,6 +75,7 @@ config CORESIGHT_SOURCE_ETM4X bool "CoreSight Embedded Trace Macrocell 4.x driver" depends on ARM64 select CORESIGHT_LINKS_AND_SINKS + select PID_IN_CONTEXTIDR help This driver provides support for the ETM4.x tracer module, tracing the instructions that a processor is executing. This is primarily useful diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index a41038856ad0..e4222c74ec34 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -40,6 +40,7 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src); /* ETMv3.5/PTM's ETMCR is 'config' */ PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); +PMU_FORMAT_ATTR(contextid, "config:" __stringify(ETM_OPT_CTXTID)); PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); /* Sink ID - same for all ETMs */ @@ -47,6 +48,7 @@ PMU_FORMAT_ATTR(sinkid, "config2:0-31"); static struct attribute *etm_config_formats_attr[] = { &format_attr_cycacc.attr, + &format_attr_contextid.attr, &format_attr_timestamp.attr, &format_attr_retstack.attr, &format_attr_sinkid.attr, diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index d843b240583a..df0fb12db272 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -252,6 +252,11 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, if (attr->config & BIT(ETM_OPT_TS)) /* bit[11], Global timestamp tracing bit */ config->cfg |= BIT(11); + + if (attr->config & BIT(ETM_OPT_CTXTID)) + /* bit[6], Context ID tracing bit */ + config->cfg |= BIT(ETM4_CFG_BIT_CTXTID); + /* return stack - enable if selected and supported */ if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack) /* bit[12], Return stack enable bit */ diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index edfeaba95429..a2681d17a579 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -23,11 +23,13 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 +#define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index edfeaba95429..a2681d17a579 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -23,11 +23,13 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 +#define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 From a5b1d5d8932cd16adb64088501882588cc1386e7 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:53 -0600 Subject: [PATCH 0537/3715] UPSTREAM: coresight: etm4x: Skip selector pair 0 Resource selector pair 0 is always implemented and reserved. As such it should not be explicitly programmed. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 8013f32a1b352f810cc2acfb00317df2ae3c5dee). Bug: 140266694 Change-Id: I5ab0fd659e0a2bd2bcdd4eaad4fc8d905b378ad8 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index df0fb12db272..3d605749a10e 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -145,8 +145,11 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) drvdata->base + TRCCNTVRn(i)); } - /* Resource selector pair 0 is always implemented and reserved */ - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* + * Resource selector pair 0 is always implemented and reserved. As + * such start at 2. + */ + for (i = 2; i < drvdata->nr_resource * 2; i++) writel_relaxed(config->res_ctrl[i], drvdata->base + TRCRSCTLRn(i)); From 189574d8d135e05c7bb169a77bdeca23547eda16 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:54 -0600 Subject: [PATCH 0538/3715] UPSTREAM: coresight: etm4x: Configure tracers to emit timestamps Configure timestamps to be emitted at regular intervals in the trace stream to temporally correlate instructions executed on different CPUs. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit a54e14f810f257d782ddf2fde1df5f3819e612a8). Bug: 140266694 Change-Id: I6fb09e8ca1570e6afa69f7f358a5e9f74d11e6d6 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x.c | 102 +++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 3d605749a10e..564bbee60cdc 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -217,6 +217,91 @@ static void etm4_enable_hw_smp_call(void *info) arg->rc = etm4_enable_hw(arg->drvdata); } +/* + * The goal of function etm4_config_timestamp_event() is to configure a + * counter that will tell the tracer to emit a timestamp packet when it + * reaches zero. This is done in order to get a more fine grained idea + * of when instructions are executed so that they can be correlated + * with execution on other CPUs. + * + * To do this the counter itself is configured to self reload and + * TRCRSCTLR1 (always true) used to get the counter to decrement. From + * there a resource selector is configured with the counter and the + * timestamp control register to use the resource selector to trigger the + * event that will insert a timestamp packet in the stream. + */ +static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) +{ + int ctridx, ret = -EINVAL; + int counter, rselector; + u32 val = 0; + struct etmv4_config *config = &drvdata->config; + + /* No point in trying if we don't have at least one counter */ + if (!drvdata->nr_cntr) + goto out; + + /* Find a counter that hasn't been initialised */ + for (ctridx = 0; ctridx < drvdata->nr_cntr; ctridx++) + if (config->cntr_val[ctridx] == 0) + break; + + /* All the counters have been configured already, bail out */ + if (ctridx == drvdata->nr_cntr) { + pr_debug("%s: no available counter found\n", __func__); + ret = -ENOSPC; + goto out; + } + + /* + * Searching for an available resource selector to use, starting at + * '2' since every implementation has at least 2 resource selector. + * ETMIDR4 gives the number of resource selector _pairs_, + * hence multiply by 2. + */ + for (rselector = 2; rselector < drvdata->nr_resource * 2; rselector++) + if (!config->res_ctrl[rselector]) + break; + + if (rselector == drvdata->nr_resource * 2) { + pr_debug("%s: no available resource selector found\n", + __func__); + ret = -ENOSPC; + goto out; + } + + /* Remember what counter we used */ + counter = 1 << ctridx; + + /* + * Initialise original and reload counter value to the smallest + * possible value in order to get as much precision as we can. + */ + config->cntr_val[ctridx] = 1; + config->cntrldvr[ctridx] = 1; + + /* Set the trace counter control register */ + val = 0x1 << 16 | /* Bit 16, reload counter automatically */ + 0x0 << 7 | /* Select single resource selector */ + 0x1; /* Resource selector 1, i.e always true */ + + config->cntr_ctrl[ctridx] = val; + + val = 0x2 << 16 | /* Group 0b0010 - Counter and sequencers */ + counter << 0; /* Counter to use */ + + config->res_ctrl[rselector] = val; + + val = 0x0 << 7 | /* Select single resource selector */ + rselector; /* Resource selector */ + + config->ts_ctrl = val; + + ret = 0; +out: + return ret; +} + static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, struct perf_event *event) { @@ -252,9 +337,24 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, /* TRM: Must program this for cycacc to work */ config->ccctlr = ETM_CYC_THRESHOLD_DEFAULT; } - if (attr->config & BIT(ETM_OPT_TS)) + if (attr->config & BIT(ETM_OPT_TS)) { + /* + * Configure timestamps to be emitted at regular intervals in + * order to correlate instructions executed on different CPUs + * (CPU-wide trace scenarios). + */ + ret = etm4_config_timestamp_event(drvdata); + + /* + * No need to go further if timestamp intervals can't + * be configured. + */ + if (ret) + goto out; + /* bit[11], Global timestamp tracing bit */ config->cfg |= BIT(11); + } if (attr->config & BIT(ETM_OPT_CTXTID)) /* bit[6], Context ID tracing bit */ From c6b211cb04296960c18458138cfecba528b0a702 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:55 -0600 Subject: [PATCH 0539/3715] UPSTREAM: coresight: Adding return code to sink::disable() operation In preparation to handle device reference counting inside of the sink drivers, add a return code to the sink::disable() operation so that proper action can be taken if a sink has not been disabled. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 6c817a95d84b8388e97d4e2b6ee361a660e244e9). Bug: 140266694 Change-Id: If256cf9711ae95d27dd23045ee95a7a7a3de657f Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 3 ++- drivers/hwtracing/coresight/coresight-tmc-etf.c | 5 +++-- drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++-- drivers/hwtracing/coresight/coresight-tpiu.c | 3 ++- drivers/hwtracing/coresight/coresight.c | 6 +++++- include/linux/coresight.h | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d73197f55692..c1be042d0bcf 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -333,7 +333,7 @@ static void etb_disable_hw(struct etb_drvdata *drvdata) coresight_disclaim_device(drvdata->base); } -static void etb_disable(struct coresight_device *csdev) +static int etb_disable(struct coresight_device *csdev) { struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); unsigned long flags; @@ -348,6 +348,7 @@ static void etb_disable(struct coresight_device *csdev) spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(drvdata->dev, "ETB disabled\n"); + return 0; } static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index a3d3f1bf5913..1408809e13e7 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -287,7 +287,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, return 0; } -static void tmc_disable_etf_sink(struct coresight_device *csdev) +static int tmc_disable_etf_sink(struct coresight_device *csdev) { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -295,7 +295,7 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - return; + return -EBUSY; } /* Disable the TMC only if it needs to */ @@ -307,6 +307,7 @@ static void tmc_disable_etf_sink(struct coresight_device *csdev) spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(drvdata->dev, "TMC-ETB/ETF disabled\n"); + return 0; } static int tmc_enable_etf_link(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ad1acea60957..6f9db35d4774 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1405,7 +1405,7 @@ static int tmc_enable_etr_sink(struct coresight_device *csdev, return -EINVAL; } -static void tmc_disable_etr_sink(struct coresight_device *csdev) +static int tmc_disable_etr_sink(struct coresight_device *csdev) { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -1413,7 +1413,7 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - return; + return -EBUSY; } /* Disable the TMC only if it needs to */ @@ -1425,6 +1425,7 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(drvdata->dev, "TMC-ETR disabled\n"); + return 0; } static const struct coresight_ops_sink tmc_etr_sink_ops = { diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 51d6b315de18..b1d90412f472 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -101,13 +101,14 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void tpiu_disable(struct coresight_device *csdev) +static int tpiu_disable(struct coresight_device *csdev) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); tpiu_disable_hw(drvdata); dev_dbg(drvdata->dev, "TPIU disabled\n"); + return 0; } static const struct coresight_ops_sink tpiu_sink_ops = { diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index d3fac9b1eb2d..0331eec3a49a 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -246,9 +246,13 @@ static int coresight_enable_sink(struct coresight_device *csdev, static void coresight_disable_sink(struct coresight_device *csdev) { + int ret; + if (atomic_dec_return(csdev->refcnt) == 0) { if (sink_ops(csdev)->disable) { - sink_ops(csdev)->disable(csdev); + ret = sink_ops(csdev)->disable(csdev); + if (ret) + return; csdev->enable = false; } } diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e68cc604c965..8348f4d6c646 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -206,7 +206,7 @@ struct coresight_device { */ struct coresight_ops_sink { int (*enable)(struct coresight_device *csdev, u32 mode, void *data); - void (*disable)(struct coresight_device *csdev); + int (*disable)(struct coresight_device *csdev); void *(*alloc_buffer)(struct coresight_device *csdev, int cpu, void **pages, int nr_pages, bool overwrite); void (*free_buffer)(void *config); From 3a1313156911eb42d290dd99c398f3e3404fa63c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:56 -0600 Subject: [PATCH 0540/3715] UPSTREAM: coresight: Move reference counting inside sink drivers When operating in CPU-wide mode with an N:1 source/sink HW topology, multiple CPUs can access a sink concurrently. As such reference counting needs to happen when the device's spinlock is held to avoid racing with other operations (start(), update(), stop()), such as: session A Session B ----- ------- enable_sink atomic_inc(refcount) = 1 ... atomic_dec(refcount) = 0 enable_sink if (refcount == 0) disable_sink atomic_inc() Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit f973d88b75703719d39c4d5145079199aaf442b2). Bug: 140266694 Change-Id: I6a2f92b2fcb9fbc8a1ccd991321fbdf649122f88 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 21 ++++++++++---- .../hwtracing/coresight/coresight-tmc-etf.c | 21 +++++++++++--- .../hwtracing/coresight/coresight-tmc-etr.c | 19 +++++++++++-- drivers/hwtracing/coresight/coresight-tpiu.c | 6 +++- drivers/hwtracing/coresight/coresight.c | 28 +++++++++---------- 5 files changed, 66 insertions(+), 29 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index c1be042d0bcf..3d4adec4a09a 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -12,6 +12,7 @@ * GNU General Public License for more details. */ +#include #include #include #include @@ -167,14 +168,15 @@ static int etb_enable_sysfs(struct coresight_device *csdev) goto out; } - /* Nothing to do, the tracer is already enabled. */ - if (drvdata->mode == CS_MODE_SYSFS) - goto out; + if (drvdata->mode == CS_MODE_DISABLED) { + ret = etb_enable_hw(drvdata); + if (ret) + goto out; - ret = etb_enable_hw(drvdata); - if (!ret) drvdata->mode = CS_MODE_SYSFS; + } + atomic_inc(csdev->refcnt); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; @@ -204,8 +206,10 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data) goto out; ret = etb_enable_hw(drvdata); - if (!ret) + if (!ret) { drvdata->mode = CS_MODE_PERF; + atomic_inc(csdev->refcnt); + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -340,6 +344,11 @@ static int etb_disable(struct coresight_device *csdev) spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; + } + /* Disable the ETB only if it needs to */ if (drvdata->mode != CS_MODE_DISABLED) { etb_disable_hw(drvdata); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 1408809e13e7..2e33c579e150 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -15,6 +15,7 @@ * this program. If not, see . */ +#include #include #include #include @@ -194,8 +195,10 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (drvdata->mode == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(csdev->refcnt); goto out; + } /* * If drvdata::buf isn't NULL, memory was allocated for a previous @@ -214,11 +217,13 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) } ret = tmc_etb_enable_hw(drvdata); - if (!ret) + if (!ret) { drvdata->mode = CS_MODE_SYSFS; - else + atomic_inc(csdev->refcnt); + } else { /* Free up the buffer if we failed to enable */ used = false; + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -253,8 +258,10 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) if (ret) break; ret = tmc_etb_enable_hw(drvdata); - if (!ret) + if (!ret) { drvdata->mode = CS_MODE_PERF; + atomic_inc(csdev->refcnt); + } } while (0); spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -293,11 +300,17 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev) struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; + } + /* Disable the TMC only if it needs to */ if (drvdata->mode != CS_MODE_DISABLED) { tmc_etb_disable_hw(drvdata); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 6f9db35d4774..b6b7a4fb5084 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -15,6 +15,7 @@ * this program. If not, see . */ +#include #include #include #include @@ -1137,8 +1138,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) * sink is already enabled no memory is needed and the HW need not be * touched, even if the buffer size has changed. */ - if (drvdata->mode == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(csdev->refcnt); goto out; + } /* * If we don't have a buffer or it doesn't match the requested size, @@ -1151,8 +1154,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) } ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); - if (!ret) + if (!ret) { drvdata->mode = CS_MODE_SYSFS; + atomic_inc(csdev->refcnt); + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1383,8 +1388,10 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); drvdata->perf_data = etr_perf; rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); - if (!rc) + if (!rc) { drvdata->mode = CS_MODE_PERF; + atomic_inc(csdev->refcnt); + } unlock_out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1411,11 +1418,17 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); return -EBUSY; } + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; + } + /* Disable the TMC only if it needs to */ if (drvdata->mode != CS_MODE_DISABLED) { tmc_etr_disable_hw(drvdata); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index b1d90412f472..0a55fd3b0efa 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -12,6 +12,7 @@ * GNU General Public License for more details. */ +#include #include #include #include @@ -80,7 +81,7 @@ static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused) struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); tpiu_enable_hw(drvdata); - + atomic_inc(csdev->refcnt); dev_dbg(drvdata->dev, "TPIU enabled\n"); return 0; } @@ -105,6 +106,9 @@ static int tpiu_disable(struct coresight_device *csdev) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + if (atomic_dec_return(csdev->refcnt)) + return -EBUSY; + tpiu_disable_hw(drvdata); dev_dbg(drvdata->dev, "TPIU disabled\n"); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 0331eec3a49a..a7cf02b498ab 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -232,14 +232,13 @@ static int coresight_enable_sink(struct coresight_device *csdev, * We need to make sure the "new" session is compatible with the * existing "mode" of operation. */ - if (sink_ops(csdev)->enable) { - ret = sink_ops(csdev)->enable(csdev, mode, data); - if (ret) - return ret; - csdev->enable = true; - } + if (!sink_ops(csdev)->enable) + return -EINVAL; - atomic_inc(csdev->refcnt); + ret = sink_ops(csdev)->enable(csdev, mode, data); + if (ret) + return ret; + csdev->enable = true; return 0; } @@ -248,14 +247,13 @@ static void coresight_disable_sink(struct coresight_device *csdev) { int ret; - if (atomic_dec_return(csdev->refcnt) == 0) { - if (sink_ops(csdev)->disable) { - ret = sink_ops(csdev)->disable(csdev); - if (ret) - return; - csdev->enable = false; - } - } + if (!sink_ops(csdev)->disable) + return; + + ret = sink_ops(csdev)->disable(csdev); + if (ret) + return; + csdev->enable = false; } static int coresight_enable_link(struct coresight_device *csdev, From 361994682a7236000fea36f2fe2b161f664b0d5e Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:57 -0600 Subject: [PATCH 0541/3715] UPSTREAM: coresight: Properly address errors in sink::disable() functions When disabling a sink the reference counter ensures the operation goes through if nobody else is using it. As such if drvdata::mode is already set do CS_MODE_DISABLED, it is an error and should be reported as such. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 12dfc9e022aa4e8768d127416bf841c169f8c57e). Bug: 140266694 Change-Id: Ib9d61e50e6ded3a407ff263fef18e5dc711a7cc2 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 9 ++++----- drivers/hwtracing/coresight/coresight-tmc-etf.c | 9 ++++----- drivers/hwtracing/coresight/coresight-tmc-etr.c | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 3d4adec4a09a..ee60faaa49d0 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -349,11 +349,10 @@ static int etb_disable(struct coresight_device *csdev) return -EBUSY; } - /* Disable the ETB only if it needs to */ - if (drvdata->mode != CS_MODE_DISABLED) { - etb_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; - } + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); + etb_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); dev_dbg(drvdata->dev, "ETB disabled\n"); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 2e33c579e150..a7e584aaee74 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -311,11 +311,10 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev) return -EBUSY; } - /* Disable the TMC only if it needs to */ - if (drvdata->mode != CS_MODE_DISABLED) { - tmc_etb_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; - } + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); + tmc_etb_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index b6b7a4fb5084..825e10257aab 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1429,11 +1429,10 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) return -EBUSY; } - /* Disable the TMC only if it needs to */ - if (drvdata->mode != CS_MODE_DISABLED) { - tmc_etr_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; - } + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); + tmc_etr_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); From 4862aa8d2f199bc1d9c42ff04fd73e10a82a534e Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:58 -0600 Subject: [PATCH 0542/3715] UPSTREAM: coresight: Properly address concurrency in sink::update() functions When operating in CPU-wide trace scenarios and working with an N:1 source/sink HW topology, update() functions need to be made atomic in order to avoid racing with start and stop operations. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 0916447c8728ed1ffa2bf9feee220f01d802b37e). Bug: 140266694 Change-Id: I821793b8e9e3cebeac72741a18b90779e1c97b6b Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 4 +++- drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index ee60faaa49d0..ba515b7465b5 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -421,7 +421,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, const u32 *barrier; u32 read_ptr, write_ptr, capacity; u32 status, read_data; - unsigned long offset, to_read; + unsigned long offset, to_read, flags; struct cs_buffers *buf = sink_config; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -430,6 +430,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS; + spin_lock_irqsave(&drvdata->spinlock, flags); __etb_disable_hw(drvdata); CS_UNLOCK(drvdata->base); @@ -540,6 +541,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, } __etb_enable_hw(drvdata); CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); return to_read; } diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index a7e584aaee74..2c766de9d249 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -427,7 +427,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, u32 *buf_ptr; u64 read_ptr, write_ptr; u32 status; - unsigned long offset, to_read; + unsigned long offset, to_read, flags; struct cs_buffers *buf = sink_config; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -438,6 +438,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) return 0; + spin_lock_irqsave(&drvdata->spinlock, flags); CS_UNLOCK(drvdata->base); tmc_flush_and_stop(drvdata); @@ -531,6 +532,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, to_read = buf->nr_pages << PAGE_SHIFT; } CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); return to_read; } From 878aafdaa48f8d9b06c649b9bf78484901d0b61a Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:52:59 -0600 Subject: [PATCH 0543/3715] UPSTREAM: coresight: perf: Clean up function etm_setup_aux() There is no point in allocating sink memory for a trace session if there is not a way to free it once it is no longer needed. As such make sure the sink API function to allocate and free memory have been implemented before moving ahead with the establishment of a trace session. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 02d5c897a091f8e064f022a5383978182751d7fb). Bug: 140266694 Change-Id: I5d28ade5b9859beb10ddf4f61d1d4698dd960473 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index e4222c74ec34..ce2b12419091 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -145,8 +145,7 @@ static void free_event_data(struct work_struct *work) if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) { cpu = cpumask_first(mask); sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); - if (sink_ops(sink)->free_buffer) - sink_ops(sink)->free_buffer(event_data->snk_config); + sink_ops(sink)->free_buffer(event_data->snk_config); } for_each_cpu(cpu, mask) { @@ -226,7 +225,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, sink = coresight_get_enabled_sink(true); } - if (!sink || !sink_ops(sink)->alloc_buffer) + if (!sink) goto err; mask = &event_data->mask; @@ -272,6 +271,9 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, if (cpu >= nr_cpu_ids) goto err; + if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer) + goto err; + /* Allocate the sink buffer for this session */ event_data->snk_config = sink_ops(sink)->alloc_buffer(sink, cpu, pages, From a4b16ea9e245476afbd9e167aecd3de8db0df53b Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:00 -0600 Subject: [PATCH 0544/3715] UPSTREAM: coresight: perf: Refactor function free_event_data() Function free_event_data() is already busy and is bound to become worse with the addition of CPU-wide trace scenarios. As such spin off a new function to strickly take care of the sink buffers. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit f5200aa9831f38271c8104565fdcae4401658475). Bug: 140266694 Change-Id: I7b9d0c73d99a89da32a3cd72088ebd25d81a7cb3 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-etm-perf.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index ce2b12419091..8c44e643255d 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -131,22 +131,34 @@ out: return ret; } +static void free_sink_buffer(struct etm_event_data *event_data) +{ + int cpu; + cpumask_t *mask = &event_data->mask; + struct coresight_device *sink; + + if (WARN_ON(cpumask_empty(mask))) + return; + + if (!event_data->snk_config) + return; + + cpu = cpumask_first(mask); + sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); + sink_ops(sink)->free_buffer(event_data->snk_config); +} + static void free_event_data(struct work_struct *work) { int cpu; cpumask_t *mask; struct etm_event_data *event_data; - struct coresight_device *sink; event_data = container_of(work, struct etm_event_data, work); mask = &event_data->mask; /* Free the sink buffers, if there are any */ - if (event_data->snk_config && !WARN_ON(cpumask_empty(mask))) { - cpu = cpumask_first(mask); - sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); - sink_ops(sink)->free_buffer(event_data->snk_config); - } + free_sink_buffer(event_data); for_each_cpu(cpu, mask) { struct list_head **ppath; From a1cc48e5ac61dfac8728413a36b67c426c51ec10 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:01 -0600 Subject: [PATCH 0545/3715] UPSTREAM: coresight: Communicate perf event to sink buffer allocation functions Make struct perf_event available to sink buffer allocation functions in order to use the pid they carry to allocate and free buffer memory along with regimenting access to what source a sink can collect data for. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit a0f08a6a9fee0ce3f2d36e3e30799e4c1ec171f4). Bug: 140266694 Change-Id: I3f1f16ebef75ec3aa629c46e3f1dc70f3a5ccb91 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 7 ++++--- .../hwtracing/coresight/coresight-etm-perf.c | 2 +- .../hwtracing/coresight/coresight-tmc-etf.c | 7 ++++--- .../hwtracing/coresight/coresight-tmc-etr.c | 18 ++++++++++-------- include/linux/coresight.h | 5 +++-- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index ba515b7465b5..a762464237d3 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -359,10 +359,11 @@ static int etb_disable(struct coresight_device *csdev) return 0; } -static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite) +static void *etb_alloc_buffer(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { - int node; + int node, cpu = event->cpu; struct cs_buffers *buf; if (cpu == -1) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8c44e643255d..5040350e5ca5 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -288,7 +288,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, /* Allocate the sink buffer for this session */ event_data->snk_config = - sink_ops(sink)->alloc_buffer(sink, cpu, pages, + sink_ops(sink)->alloc_buffer(sink, event, pages, nr_pages, overwrite); if (!event_data->snk_config) goto err; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 2c766de9d249..850b14f065f5 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -364,10 +364,11 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, dev_dbg(drvdata->dev, "TMC-ETF disabled\n"); } -static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite) +static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { - int node; + int node, cpu = event->cpu; struct cs_buffers *buf; if (cpu == -1) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 825e10257aab..ad1e73d7173e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1179,13 +1179,18 @@ out: * reaches a minimum limit (1M), beyond which we give up. */ static struct etr_perf_buffer * -tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, int node, int nr_pages, - void **pages, bool snapshot) +tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) { + int node, cpu = event->cpu; struct etr_buf *etr_buf; struct etr_perf_buffer *etr_perf; unsigned long size; + if (cpu == -1) + cpu = smp_processor_id(); + node = cpu_to_node(cpu); + etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); if (!etr_perf) return ERR_PTR(-ENOMEM); @@ -1223,16 +1228,13 @@ done: static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, - int cpu, void **pages, int nr_pages, - bool snapshot) + struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct etr_perf_buffer *etr_perf; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - if (cpu == -1) - cpu = smp_processor_id(); - - etr_perf = tmc_etr_setup_perf_buf(drvdata, cpu_to_node(cpu), + etr_perf = tmc_etr_setup_perf_buf(drvdata, event, nr_pages, pages, snapshot); if (IS_ERR(etr_perf)) { dev_dbg(drvdata->dev, "Unable to allocate ETR buffer\n"); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 8348f4d6c646..b8c51f606121 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -207,8 +207,9 @@ struct coresight_device { struct coresight_ops_sink { int (*enable)(struct coresight_device *csdev, u32 mode, void *data); int (*disable)(struct coresight_device *csdev); - void *(*alloc_buffer)(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite); + void *(*alloc_buffer)(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite); void (*free_buffer)(void *config); unsigned long (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, From bdcc66df747121a2506d86576627e21d11469387 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:02 -0600 Subject: [PATCH 0546/3715] UPSTREAM: coresight: tmc-etr: Refactor function tmc_etr_setup_perf_buf() Refactoring function tmc_etr_setup_perf_buf() so that it only deals with the high level etr_perf_buffer, leaving the allocation of the backend buffer (i.e etr_buf) to another function. That way the backend buffer allocation function can decide if it wants to reuse an existing buffer (CPU-wide trace scenarios) or simply create a new one. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 855ab61c16bf70b646fa9c84e7b07d14a003cd42). Bug: 140266694 Change-Id: Ib9306a443e0b915a54a918803789441254da4f10 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ad1e73d7173e..6a0d307d3d0c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1172,29 +1172,24 @@ out: } /* - * tmc_etr_setup_perf_buf: Allocate ETR buffer for use by perf. + * alloc_etr_buf: Allocate ETR buffer for use by perf. * The size of the hardware buffer is dependent on the size configured * via sysfs and the perf ring buffer size. We prefer to allocate the * largest possible size, scaling down the size by half until it * reaches a minimum limit (1M), beyond which we give up. */ -static struct etr_perf_buffer * -tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, - int nr_pages, void **pages, bool snapshot) +static struct etr_buf * +alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) { int node, cpu = event->cpu; struct etr_buf *etr_buf; - struct etr_perf_buffer *etr_perf; unsigned long size; if (cpu == -1) cpu = smp_processor_id(); node = cpu_to_node(cpu); - etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); - if (!etr_perf) - return ERR_PTR(-ENOMEM); - /* * Try to match the perf ring buffer size if it is larger * than the size requested via sysfs. @@ -1218,6 +1213,32 @@ tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, size /= 2; } while (size >= TMC_ETR_PERF_MIN_BUF_SIZE); + return ERR_PTR(-ENOMEM); + +done: + return etr_buf; +} + +static struct etr_perf_buffer * +tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) +{ + int node, cpu = event->cpu; + struct etr_buf *etr_buf; + struct etr_perf_buffer *etr_perf; + + if (cpu == -1) + cpu = smp_processor_id(); + node = cpu_to_node(cpu); + + etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); + if (!etr_perf) + return ERR_PTR(-ENOMEM); + + etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + if (!IS_ERR(etr_buf)) + goto done; + kfree(etr_perf); return ERR_PTR(-ENOMEM); From c9cb6028b42dc24605de30e0b7e4e572a4ddf4c4 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:03 -0600 Subject: [PATCH 0547/3715] UPSTREAM: coresight: tmc-etr: Create per-thread buffer allocation function Buffer allocation is different when dealing with per-thread and CPU-wide sessions. In preparation to support CPU-wide trace scenarios simplify things by keeping allocation functions for both type separate. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit e553a8aef4c300530844fe096c10975548ea26cb). Bug: 140266694 Change-Id: I4bb7fb65a98e93181ea61dced9639e18f937511a Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 6a0d307d3d0c..2b6129278fdd 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1219,6 +1219,33 @@ done: return etr_buf; } +static struct etr_buf * +get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata, + struct perf_event *event, int nr_pages, + void **pages, bool snapshot) +{ + struct etr_buf *etr_buf; + + /* + * In per-thread mode the etr_buf isn't shared, so just go ahead + * with memory allocation. + */ + etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + + return etr_buf; +} + +static struct etr_buf * +get_perf_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) +{ + if (event->cpu == -1) + return get_perf_etr_buf_per_thread(drvdata, event, nr_pages, + pages, snapshot); + + return ERR_PTR(-ENOENT); +} + static struct etr_perf_buffer * tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, int nr_pages, void **pages, bool snapshot) @@ -1235,7 +1262,7 @@ tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, if (!etr_perf) return ERR_PTR(-ENOMEM); - etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + etr_buf = get_perf_etr_buf(drvdata, event, nr_pages, pages, snapshot); if (!IS_ERR(etr_buf)) goto done; From 24765c476b4e68770aa8424565ffd950254c470f Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:04 -0600 Subject: [PATCH 0548/3715] UPSTREAM: coresight: tmc-etr: Introduce the notion of process ID to ETR devices In preparation to support CPU-wide trace scenarios, introduce the notion of process ID to ETR devices. That way events monitoring the same process can use the same etr_buf, allowing multiple CPUs to use the same sink. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit ef848e463ac9052be620757579cfa0aa975e97ea). Bug: 140266694 Change-Id: Ic74a1b5057e25f89b86a2fab5217d849ae186534 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 2b6129278fdd..ce52a582efb0 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "coresight-catu.h" #include "coresight-etm-perf.h" @@ -36,6 +37,7 @@ struct etr_flat_buf { /* * etr_perf_buffer - Perf buffer used for ETR * @etr_buf - Actual buffer used by the ETR + * @pid - The PID this etr_perf_buffer belongs to. * @snaphost - Perf session mode * @head - handle->head at the beginning of the session. * @nr_pages - Number of pages in the ring buffer. @@ -43,6 +45,7 @@ struct etr_flat_buf { */ struct etr_perf_buffer { struct etr_buf *etr_buf; + pid_t pid; bool snapshot; unsigned long head; int nr_pages; @@ -1289,6 +1292,7 @@ static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, return NULL; } + etr_perf->pid = task_pid_nr(event->owner); etr_perf->snapshot = snapshot; etr_perf->nr_pages = nr_pages; etr_perf->pages = pages; From a6dcac7ac4c550cc3af0ff536d0152b30561ab8d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:05 -0600 Subject: [PATCH 0549/3715] UPSTREAM: coresight: tmc-etr: Introduce the notion of reference counting to ETR devices This patch adds reference counting to struct etr_buf so that, in CPU-wide trace scenarios, shared buffers can be disposed of when no longer used. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 57549999b9a0d0c72900d2413ef1e3168bd2e817). Bug: 140266694 Change-Id: I0a9434fc147b5b66dba7e6e706781db4fb95e963 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++++ drivers/hwtracing/coresight/coresight-tmc.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ce52a582efb0..27dbc882cbf1 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1234,7 +1235,11 @@ get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata, * with memory allocation. */ etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + if (IS_ERR(etr_buf)) + goto out; + refcount_set(&etr_buf->refcount, 1); +out: return etr_buf; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 36748ab19b98..37c6e9983013 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -20,6 +20,7 @@ #include #include +#include #define TMC_RSZ 0x004 #define TMC_STS 0x00c @@ -144,6 +145,7 @@ struct etr_buf_operations; /** * struct etr_buf - Details of the buffer used by ETR + * refcount ; Number of sources currently using this etr_buf. * @mode : Mode of the ETR buffer, contiguous, Scatter Gather etc. * @full : Trace data overflow * @size : Size of the buffer. @@ -154,6 +156,7 @@ struct etr_buf_operations; * @private : Backend specific information for the buf */ struct etr_buf { + refcount_t refcount; enum etr_mode mode; bool full; ssize_t size; From dff2cd06520fccdb8eef588bb00e424c593886c1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:06 -0600 Subject: [PATCH 0550/3715] UPSTREAM: coresight: tmc-etr: Introduce the notion of IDR to ETR devices In CPU-wide scenarios with an N:1 source/sink topology, sources share the same sink. In order to reuse the same sink for all sources an IDR is needed to archive events that have already been accounted for. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit c5ff734462b1d32f793db717ef222cd05e6232d5). Bug: 140266694 Change-Id: Ib1149bd168dc5231fc1f18306f7cef068792b3e8 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc.c | 4 ++++ drivers/hwtracing/coresight/coresight-tmc.h | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index b2d3926b8bfe..0e558ef0a3a6 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -457,6 +459,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) ret = tmc_etr_setup_caps(drvdata, devid, id->data); if (ret) goto out; + idr_init(&drvdata->idr); + mutex_init(&drvdata->idr_mutex); break; case TMC_CONFIG_TYPE_ETF: desc.type = CORESIGHT_DEV_TYPE_LINKSINK; diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 37c6e9983013..b3db246ee0b0 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -19,7 +19,9 @@ #define _CORESIGHT_TMC_H #include +#include #include +#include #include #define TMC_RSZ 0x004 @@ -184,6 +186,8 @@ struct etr_buf { * @trigger_cntr: amount of words to store after a trigger. * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the * device configuration register (DEVID) + * @idr: Holds etr_bufs allocated for this ETR. + * @idr_mutex: Access serialisation for idr. * @perf_data: PERF buffer for ETR. * @sysfs_data: SYSFS buffer for ETR. */ @@ -205,6 +209,8 @@ struct tmc_drvdata { enum tmc_mem_intf_width memwidth; u32 trigger_cntr; u32 etr_caps; + struct idr idr; + struct mutex idr_mutex; struct etr_buf *sysfs_buf; void *perf_data; }; From 1fec8e53bd5e458d94f66e9eea0996164ce1b160 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:07 -0600 Subject: [PATCH 0551/3715] UPSTREAM: coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios This patch uses the PID of the process being traced to allocate and free ETR memory buffers for CPU-wide scenarios. The implementation is tailored to handle both N:1 and 1:1 source/sink HW topologies. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 3147da92a8a81fc304e6e9d7ac75b68d6a54d9f7). Bug: 140266694 Change-Id: I3fbfd686d246e95bc72c474cfcaad2049eecfce8 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 107 +++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 27dbc882cbf1..e34516cd2b0d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -37,6 +39,7 @@ struct etr_flat_buf { /* * etr_perf_buffer - Perf buffer used for ETR + * @drvdata - The ETR drvdaga this buffer has been allocated for. * @etr_buf - Actual buffer used by the ETR * @pid - The PID this etr_perf_buffer belongs to. * @snaphost - Perf session mode @@ -45,6 +48,7 @@ struct etr_flat_buf { * @pages - Array of Pages in the ring buffer. */ struct etr_perf_buffer { + struct tmc_drvdata *drvdata; struct etr_buf *etr_buf; pid_t pid; bool snapshot; @@ -1223,6 +1227,72 @@ done: return etr_buf; } +static struct etr_buf * +get_perf_etr_buf_cpu_wide(struct tmc_drvdata *drvdata, + struct perf_event *event, int nr_pages, + void **pages, bool snapshot) +{ + int ret; + pid_t pid = task_pid_nr(event->owner); + struct etr_buf *etr_buf; + +retry: + /* + * An etr_perf_buffer is associated with an event and holds a reference + * to the AUX ring buffer that was created for that event. In CPU-wide + * N:1 mode multiple events (one per CPU), each with its own AUX ring + * buffer, share a sink. As such an etr_perf_buffer is created for each + * event but a single etr_buf associated with the ETR is shared between + * them. The last event in a trace session will copy the content of the + * etr_buf to its AUX ring buffer. Ring buffer associated to other + * events are simply not used an freed as events are destoyed. We still + * need to allocate a ring buffer for each event since we don't know + * which event will be last. + */ + + /* + * The first thing to do here is check if an etr_buf has already been + * allocated for this session. If so it is shared with this event, + * otherwise it is created. + */ + mutex_lock(&drvdata->idr_mutex); + etr_buf = idr_find(&drvdata->idr, pid); + if (etr_buf) { + refcount_inc(&etr_buf->refcount); + mutex_unlock(&drvdata->idr_mutex); + return etr_buf; + } + + /* If we made it here no buffer has been allocated, do so now. */ + mutex_unlock(&drvdata->idr_mutex); + + etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + if (IS_ERR(etr_buf)) + return etr_buf; + + refcount_set(&etr_buf->refcount, 1); + + /* Now that we have a buffer, add it to the IDR. */ + mutex_lock(&drvdata->idr_mutex); + ret = idr_alloc(&drvdata->idr, etr_buf, pid, pid + 1, GFP_KERNEL); + mutex_unlock(&drvdata->idr_mutex); + + /* Another event with this session ID has allocated this buffer. */ + if (ret == -ENOSPC) { + tmc_free_etr_buf(etr_buf); + goto retry; + } + + /* The IDR can't allocate room for a new session, abandon ship. */ + if (ret == -ENOMEM) { + tmc_free_etr_buf(etr_buf); + return ERR_PTR(ret); + } + + + return etr_buf; +} + static struct etr_buf * get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata, struct perf_event *event, int nr_pages, @@ -1251,7 +1321,8 @@ get_perf_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, return get_perf_etr_buf_per_thread(drvdata, event, nr_pages, pages, snapshot); - return ERR_PTR(-ENOENT); + return get_perf_etr_buf_cpu_wide(drvdata, event, nr_pages, + pages, snapshot); } static struct etr_perf_buffer * @@ -1278,7 +1349,13 @@ tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, return ERR_PTR(-ENOMEM); done: + /* + * Keep a reference to the ETR this buffer has been allocated for + * in order to have access to the IDR in tmc_free_etr_buffer(). + */ + etr_perf->drvdata = drvdata; etr_perf->etr_buf = etr_buf; + return etr_perf; } @@ -1308,9 +1385,33 @@ static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, static void tmc_free_etr_buffer(void *config) { struct etr_perf_buffer *etr_perf = config; + struct tmc_drvdata *drvdata = etr_perf->drvdata; + struct etr_buf *buf, *etr_buf = etr_perf->etr_buf; - if (etr_perf->etr_buf) - tmc_free_etr_buf(etr_perf->etr_buf); + if (!etr_buf) + goto free_etr_perf_buffer; + + mutex_lock(&drvdata->idr_mutex); + /* If we are not the last one to use the buffer, don't touch it. */ + if (!refcount_dec_and_test(&etr_buf->refcount)) { + mutex_unlock(&drvdata->idr_mutex); + goto free_etr_perf_buffer; + } + + /* We are the last one, remove from the IDR and free the buffer. */ + buf = idr_remove(&drvdata->idr, etr_perf->pid); + mutex_unlock(&drvdata->idr_mutex); + + /* + * Something went very wrong if the buffer associated with this ID + * is not the same in the IDR. Leak to avoid use after free. + */ + if (buf && WARN_ON(buf != etr_buf)) + goto free_etr_perf_buffer; + + tmc_free_etr_buf(etr_perf->etr_buf); + +free_etr_perf_buffer: kfree(etr_perf); } From db1658cfbb79d8ae5f78e7b44e8372a8dac3d1d2 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Apr 2019 13:53:08 -0600 Subject: [PATCH 0552/3715] UPSTREAM: coresight: tmc-etr: Add support for CPU-wide trace scenarios This patch adds support for CPU-wide trace scenarios by making sure that only the sources monitoring the same process have access to a common sink. Because the sink is shared between sources, the first source to use the sink switches it on while the last one does the cleanup. Any attempt to modify the HW is overlooked for as long as more than one source is using a sink. Signed-off-by: Mathieu Poirier Tested-by: Leo Yan Tested-by: Robert Walker Signed-off-by: Greg Kroah-Hartman (Upstream commit 8d03cfd16a7283e9e7a5aeb7dc0742ceb66d2d23). Bug: 140266694 Change-Id: I76ad471591014415efdb3f982013891f5bcfed38 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 38 ++++++++++++++++--- drivers/hwtracing/coresight/coresight-tmc.c | 2 + drivers/hwtracing/coresight/coresight-tmc.h | 3 ++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index e34516cd2b0d..a35ad9e80aa0 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1486,6 +1486,13 @@ tmc_update_etr_buffer(struct coresight_device *csdev, struct etr_buf *etr_buf = etr_perf->etr_buf; spin_lock_irqsave(&drvdata->spinlock, flags); + + /* Don't do anything if another tracer is using this sink */ + if (atomic_read(csdev->refcnt) != 1) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + goto out; + } + if (WARN_ON(drvdata->perf_data != etr_perf)) { lost = true; spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -1525,17 +1532,15 @@ out: static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) { int rc = 0; + pid_t pid; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct perf_output_handle *handle = data; struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); spin_lock_irqsave(&drvdata->spinlock, flags); - /* - * There can be only one writer per sink in perf mode. If the sink - * is already open in SYSFS mode, we can't use it. - */ - if (drvdata->mode != CS_MODE_DISABLED || WARN_ON(drvdata->perf_data)) { + /* Don't use this sink if it is already claimed by sysFS */ + if (drvdata->mode == CS_MODE_SYSFS) { rc = -EBUSY; goto unlock_out; } @@ -1545,10 +1550,31 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) goto unlock_out; } + /* Get a handle on the pid of the process to monitor */ + pid = etr_perf->pid; + + /* Do not proceed if this device is associated with another session */ + if (drvdata->pid != -1 && drvdata->pid != pid) { + rc = -EBUSY; + goto unlock_out; + } + etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); drvdata->perf_data = etr_perf; + + /* + * No HW configuration is needed if the sink is already in + * use for this session. + */ + if (drvdata->pid == pid) { + atomic_inc(csdev->refcnt); + goto unlock_out; + } + rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); if (!rc) { + /* Associate with monitored process. */ + drvdata->pid = pid; drvdata->mode = CS_MODE_PERF; atomic_inc(csdev->refcnt); } @@ -1592,6 +1618,8 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) /* Complain if we (somehow) got out of sync */ WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); tmc_etr_disable_hw(drvdata); + /* Dissociate from monitored process. */ + drvdata->pid = -1; drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 0e558ef0a3a6..3b98ccf717e8 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -430,6 +430,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); drvdata->config_type = BMVAL(devid, 6, 7); drvdata->memwidth = tmc_get_memwidth(devid); + /* This device is not associated with a session */ + drvdata->pid = -1; if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { if (np) diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index b3db246ee0b0..44e78e292968 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -176,6 +176,8 @@ struct etr_buf { * @csdev: component vitals needed by the framework. * @miscdev: specifics to handle "/dev/xyz.tmc" entry. * @spinlock: only one at a time pls. + * @pid: Process ID of the process being monitored by the session + * that is using this component. * @buf: Snapshot of the trace data for ETF/ETB. * @etr_buf: details of buffer used in TMC-ETR * @len: size of the available trace for ETF/ETB. @@ -197,6 +199,7 @@ struct tmc_drvdata { struct coresight_device *csdev; struct miscdevice miscdev; spinlock_t spinlock; + pid_t pid; bool reading; union { char *buf; /* TMC ETB */ From e05a928ebd93d92d8249c4f47d44dd5aeff3b415 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 2 May 2019 10:54:05 -0600 Subject: [PATCH 0553/3715] UPSTREAM: coresight: funnel: Support static funnel Since CoreSight hardware topology can use a 'hidden' funnel in the trace data path, this kind funnel doesn't have register for accessing and is used by default from hardware design perspective. Below is an example for related hardware topology: +------+ +------+ | cpu0 |->| ETM |-\ +------+ +------+ \-> +--------+ +-----+ ...... | Funnel |->| ETF |-\ Hidden funnel +------+ +------+ /-> +--------+ +-----+ \ | | cpu3 |->| ETM |-/ \ V +------+ +------+ \-> +--------+ | Funnel |-> ... +------+ +------+ /-> +--------+ | cpu4 |->| ETM |-\ / +------+ +------+ \-> +--------+ +-----+ / ...... | Funnel |->| ETF |-/ +------+ +------+ /-> +--------+ +-----+ | cpu7 |->| ETM |-/ +------+ +------+ The CoreSight funnel driver only supports dynamic funnel with registration register resource, thus it cannot support for the static funnel case and it's impossible to create trace data path for this case. This patch is to extend CoreSight funnel driver to support both for static funnel and dynamic funnel. For the dynamic funnel it reuses the code existed in the driver, for static funnel the driver will support device probe if without providing register resource and the driver skips registers accessing when detect the register base is NULL. Cc: Mathieu Poirier Cc: Suzuki K Poulose Cc: Wanglai Shi Suggested-by: Suzuki K Poulose Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (Upstream commit 78e6427b4e7b017951785982f7f97cf64e2d624b). Bug: 140266694 Change-Id: Id0171c7d7adab24c5efd47d266f539ae50252708 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-funnel.c | 118 +++++++++++++----- 1 file changed, 90 insertions(+), 28 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index ebea26b78975..87fe4388aeac 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -50,7 +52,7 @@ struct funnel_drvdata { unsigned long priority; }; -static int funnel_enable_hw(struct funnel_drvdata *drvdata, int port) +static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port) { u32 functl; int rc = 0; @@ -78,17 +80,19 @@ done: static int funnel_enable(struct coresight_device *csdev, int inport, int outport) { - int rc; + int rc = 0; struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - rc = funnel_enable_hw(drvdata, inport); + if (drvdata->base) + rc = dynamic_funnel_enable_hw(drvdata, inport); if (!rc) dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); return rc; } -static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) +static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata, + int inport) { u32 functl; @@ -110,7 +114,8 @@ static void funnel_disable(struct coresight_device *csdev, int inport, { struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - funnel_disable_hw(drvdata, inport); + if (drvdata->base) + dynamic_funnel_disable_hw(drvdata, inport); dev_dbg(drvdata->dev, "FUNNEL inport %d disabled\n", inport); } @@ -184,54 +189,70 @@ static struct attribute *coresight_funnel_attrs[] = { }; ATTRIBUTE_GROUPS(coresight_funnel); -static int funnel_probe(struct amba_device *adev, const struct amba_id *id) +static int funnel_probe(struct device *dev, struct resource *res) { int ret; void __iomem *base; - struct device *dev = &adev->dev; struct coresight_platform_data *pdata = NULL; struct funnel_drvdata *drvdata; - struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; + struct device_node *np = dev->of_node; if (np) { pdata = of_get_coresight_platform_data(dev, np); if (IS_ERR(pdata)) return PTR_ERR(pdata); - adev->dev.platform_data = pdata; + dev->platform_data = pdata; } + if (of_device_is_compatible(np, "arm,coresight-funnel")) + pr_warn_once("Uses OBSOLETE CoreSight funnel binding\n"); + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; - drvdata->dev = &adev->dev; - drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */ + drvdata->dev = dev; + drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */ if (!IS_ERR(drvdata->atclk)) { ret = clk_prepare_enable(drvdata->atclk); if (ret) return ret; } + + /* + * Map the device base for dynamic-funnel, which has been + * validated by AMBA core. + */ + if (res) { + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto out_disable_clk; + } + drvdata->base = base; + desc.groups = coresight_funnel_groups; + } + dev_set_drvdata(dev, drvdata); - /* Validity for the resource is already checked by the AMBA core */ - base = devm_ioremap_resource(dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); - - drvdata->base = base; - pm_runtime_put(&adev->dev); - desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG; desc.ops = &funnel_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.groups = coresight_funnel_groups; drvdata->csdev = coresight_register(&desc); + if (IS_ERR(drvdata->csdev)) { + ret = PTR_ERR(drvdata->csdev); + goto out_disable_clk; + } - return PTR_ERR_OR_ZERO(drvdata->csdev); + pm_runtime_put(dev); + +out_disable_clk: + if (ret && !IS_ERR_OR_NULL(drvdata->atclk)) + clk_disable_unprepare(drvdata->atclk); + return ret; } #ifdef CONFIG_PM @@ -260,7 +281,48 @@ static const struct dev_pm_ops funnel_dev_pm_ops = { SET_RUNTIME_PM_OPS(funnel_runtime_suspend, funnel_runtime_resume, NULL) }; -static const struct amba_id funnel_ids[] = { +static int static_funnel_probe(struct platform_device *pdev) +{ + int ret; + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + /* Static funnel do not have programming base */ + ret = funnel_probe(&pdev->dev, NULL); + + if (ret) { + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + } + + return ret; +} + +static const struct of_device_id static_funnel_match[] = { + {.compatible = "arm,coresight-static-funnel"}, + {} +}; + +static struct platform_driver static_funnel_driver = { + .probe = static_funnel_probe, + .driver = { + .name = "coresight-static-funnel", + .of_match_table = static_funnel_match, + .pm = &funnel_dev_pm_ops, + .suppress_bind_attrs = true, + }, +}; +builtin_platform_driver(static_funnel_driver); + +static int dynamic_funnel_probe(struct amba_device *adev, + const struct amba_id *id) +{ + return funnel_probe(&adev->dev, &adev->res); +} + +static const struct amba_id dynamic_funnel_ids[] = { { .id = 0x0003b908, .mask = 0x0003ffff, @@ -273,14 +335,14 @@ static const struct amba_id funnel_ids[] = { { 0, 0}, }; -static struct amba_driver funnel_driver = { +static struct amba_driver dynamic_funnel_driver = { .drv = { - .name = "coresight-funnel", + .name = "coresight-dynamic-funnel", .owner = THIS_MODULE, .pm = &funnel_dev_pm_ops, .suppress_bind_attrs = true, }, - .probe = funnel_probe, - .id_table = funnel_ids, + .probe = dynamic_funnel_probe, + .id_table = dynamic_funnel_ids, }; -builtin_amba_driver(funnel_driver); +builtin_amba_driver(dynamic_funnel_driver); From bc1323dc9dc24999d1c7e03b448df7e3493fbf92 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 19 Jun 2019 11:29:06 -0600 Subject: [PATCH 0554/3715] UPSTREAM: coresight: tmc-etr: Properly set AUX buffer head in snapshot mode Unify amongst sink drivers how the AUX ring buffer head is communicated to user space. That way the same algorithm in user space can be used to determine where the latest data is and how much of it to access. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Signed-off-by: Greg Kroah-Hartman (Upstream commit 3ecb03022a25b3f089b93a35f608ea7ee6a244aa). Bug: 140266694 Change-Id: I90ce5258c04b5196a93fd9833eb15b32dff9060b Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index a35ad9e80aa0..c8cefe0f97e1 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1513,14 +1513,13 @@ tmc_update_etr_buffer(struct coresight_device *csdev, tmc_etr_sync_perf_buffer(etr_perf); /* - * Update handle->head in snapshot mode. Also update the size to the - * hardware buffer size if there was an overflow. + * In snapshot mode we simply increment the head by the number of byte + * that were written. User space function cs_etm_find_snapshot() will + * figure out how many bytes to get from the AUX buffer based on the + * position of the head. */ - if (etr_perf->snapshot) { + if (etr_perf->snapshot) handle->head += size; - if (etr_buf->full) - size = etr_buf->size; - } lost |= etr_buf->full; out: From 7a8b3f08b9ad49ea89cc459e5cfba14a8ec1ce76 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 19 Jun 2019 11:29:08 -0600 Subject: [PATCH 0555/3715] UPSTREAM: coresight: tmc-etf: Fix snapshot mode update function When working in snapshot mode function perf_aux_output_begin() does not set the handle->size because the size is expected to be deduced by the placement of the "head" and "old" pointers in user space. As such there is no point in trying to adjust the amount of data to copy to the ring buffer. Signed-off-by: Mathieu Poirier Reviewed-by: Leo Yan Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Signed-off-by: Greg Kroah-Hartman (Upstream commit 99f81eb9c51d499f65bac25597e9def22f6a32dc). Bug: 140266694 Change-Id: I81fe21897251370cf7dd3178d67e0dcc5f206d6c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 850b14f065f5..20d4d0510b94 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -462,9 +462,11 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, /* * The TMC RAM buffer may be bigger than the space available in the * perf ring buffer (handle->size). If so advance the RRP so that we - * get the latest trace data. + * get the latest trace data. In snapshot mode none of that matters + * since we are expected to clobber stale data in favour of the latest + * traces. */ - if (to_read > handle->size) { + if (!buf->snapshot && to_read > handle->size) { u32 mask = 0; /* From c2bed1634a9fb70116b3baa526f8849f9f635905 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 19 Jun 2019 11:29:09 -0600 Subject: [PATCH 0556/3715] UPSTREAM: coresight: perf: Don't set the truncated flag in snapshot mode This patch avoids setting the truncated flag when operating in snapshot mode since the trace buffer is expected to be truncated and discontinuous from one snapshot to another. Moreover when the truncated flag is set the perf core stops enabling the event, waiting for user space to consume the data. In snapshot mode this is clearly not what we want since it results in stale data. Signed-off-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Tested-by: Leo Yan Signed-off-by: Greg Kroah-Hartman (Upstream commit 5aafd9bf7aa932f2a97e5a55d1acda67c161621f). Bug: 140266694 Change-Id: If37e9ad3e246f1e2b869726078bacaf3ae85fd91 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etb10.c | 8 +++++++- drivers/hwtracing/coresight/coresight-tmc-etf.c | 8 +++++++- drivers/hwtracing/coresight/coresight-tmc-etr.c | 8 +++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index a762464237d3..acda27e008f2 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -497,7 +497,13 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, lost = true; } - if (lost) + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); /* finally tell HW where we want to start reading from */ diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 20d4d0510b94..0ffb1aad41a9 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -503,7 +503,13 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, lost = true; } - if (lost) + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); cur = buf->cur; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index c8cefe0f97e1..d0bdd4ef3e6d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1523,7 +1523,13 @@ tmc_update_etr_buffer(struct coresight_device *csdev, lost |= etr_buf->full; out: - if (lost) + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!etr_perf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); return size; } From 404d6bda94680e396fb6d9f52c1e7ad7eda68c42 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Jun 2019 16:12:33 -0600 Subject: [PATCH 0557/3715] UPSTREAM: coresight: tmc-etr: Do not call smp_processor_id() from preemptible During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it's not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below : BUG: using smp_processor_id() in preemptible [00000000] code: perf/1743 caller is alloc_etr_buf.isra.6+0x80/0xa0 CPU: 1 PID: 1743 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 Call trace: dump_backtrace+0x0/0x150 show_stack+0x14/0x20 dump_stack+0x9c/0xc4 debug_smp_processor_id+0x10c/0x110 alloc_etr_buf.isra.6+0x80/0xa0 tmc_alloc_etr_buffer+0x12c/0x1f0 etm_setup_aux+0x1c4/0x230 rb_alloc_aux+0x1b8/0x2b8 perf_mmap+0x35c/0x478 mmap_region+0x34c/0x4f0 do_mmap+0x2d8/0x418 vm_mmap_pgoff+0xd0/0xf8 ksys_mmap_pgoff+0x88/0xf8 __arm64_sys_mmap+0x28/0x38 el0_svc_handler+0xd8/0x138 el0_svc+0x8/0xc Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs. Fixes: 855ab61c16bf70b646 ("coresight: tmc-etr: Refactor function tmc_etr_setup_perf_buf()") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Cc: stable Link: https://lore.kernel.org/r/20190620221237.3536-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 3ff44563dbb02456a33f2a42000f04db4ef19a8f). Bug: 140266694 Change-Id: I1998e791c7e5337b3c04201532db5a7e59d06361 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index d0bdd4ef3e6d..460ba718f41a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1329,13 +1329,11 @@ static struct etr_perf_buffer * tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, int nr_pages, void **pages, bool snapshot) { - int node, cpu = event->cpu; + int node; struct etr_buf *etr_buf; struct etr_perf_buffer *etr_perf; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu); etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); if (!etr_perf) From 4dbf8b7434a89adfe52967ebc4ca083f0a4d2772 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Jun 2019 16:12:34 -0600 Subject: [PATCH 0558/3715] UPSTREAM: coresight: tmc-etr: alloc_perf_buf: Do not call smp_processor_id from preemptible During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it is not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below : BUG: using smp_processor_id() in preemptible [00000000] code: perf/1743 caller is tmc_alloc_etr_buffer+0x1bc/0x1f0 CPU: 1 PID: 1743 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 Call trace: dump_backtrace+0x0/0x150 show_stack+0x14/0x20 dump_stack+0x9c/0xc4 debug_smp_processor_id+0x10c/0x110 tmc_alloc_etr_buffer+0x1bc/0x1f0 etm_setup_aux+0x1c4/0x230 rb_alloc_aux+0x1b8/0x2b8 perf_mmap+0x35c/0x478 mmap_region+0x34c/0x4f0 do_mmap+0x2d8/0x418 vm_mmap_pgoff+0xd0/0xf8 ksys_mmap_pgoff+0x88/0xf8 __arm64_sys_mmap+0x28/0x38 el0_svc_handler+0xd8/0x138 el0_svc+0x8/0xc Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs. Fixes: 22f429f19c4135d51e9 ("coresight: etm-perf: Add support for ETR backend") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Cc: stable # 4.20+ Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190620221237.3536-3-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 3a8710392db2c70f74aed6f06b16e8bec0f05a35). Bug: 140266694 Change-Id: Ic9a7ac03668472ba919b5ccdc5c185409a404b9c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 460ba718f41a..756e2ded78e3 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1190,14 +1190,11 @@ static struct etr_buf * alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, int nr_pages, void **pages, bool snapshot) { - int node, cpu = event->cpu; + int node; struct etr_buf *etr_buf; unsigned long size; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); - + node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu); /* * Try to match the perf ring buffer size if it is larger * than the size requested via sysfs. From 50f371dc69dc5e91ca55b0bb6ebafe6e7e882921 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 19 Jun 2019 11:29:18 -0600 Subject: [PATCH 0559/3715] UPSTREAM: coresight: Use coresight device names for sinks in PMU attribute Move to using the coresight device name instead of the parent device name for SINK attribute for PMU. Signed-off-by: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Greg Kroah-Hartman (Upstream commit 6887cfa075349a8c16e72747408feeced7235ade). Bug: 140266694 Change-Id: I12a368640d4ad89bd71012cb92fd6744d840dc04 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 5040350e5ca5..1aac779ff546 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -541,7 +541,7 @@ int etm_perf_add_symlink_sink(struct coresight_device *csdev) unsigned long hash; const char *name; struct device *pmu_dev = etm_pmu.dev; - struct device *pdev = csdev->dev.parent; + struct device *dev = &csdev->dev; struct dev_ext_attribute *ea; if (csdev->type != CORESIGHT_DEV_TYPE_SINK && @@ -554,15 +554,15 @@ int etm_perf_add_symlink_sink(struct coresight_device *csdev) if (!etm_perf_up) return -EPROBE_DEFER; - ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL); if (!ea) return -ENOMEM; - name = dev_name(pdev); + name = dev_name(dev); /* See function coresight_get_sink_by_id() to know where this is used */ hash = hashlen_hash(hashlen_string(NULL, name)); - ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL); if (!ea->attr.attr.name) return -ENOMEM; From 3185600f1474a5b6df20ef9c88704f029e4507d9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 1 Aug 2019 11:23:23 -0600 Subject: [PATCH 0560/3715] UPSTREAM: coresight: Fix DEBUG_LOCKS_WARN_ON for uninitialized attribute While running the linux-next with CONFIG_DEBUG_LOCKS_ALLOC enabled, I get the following splat. BUG: key ffffcb5636929298 has not been registered! ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 1 PID: 53 at kernel/locking/lockdep.c:3669 lockdep_init_map+0x164/0x1f0 CPU: 1 PID: 53 Comm: kworker/1:1 Tainted: G W 5.2.0-next-20190712-00015-g00ad4634222e-dirty #603 Workqueue: events amba_deferred_retry_func pstate: 60c00005 (nZCv daif +PAN +UAO) pc : lockdep_init_map+0x164/0x1f0 lr : lockdep_init_map+0x164/0x1f0 [ trimmed ] Call trace: lockdep_init_map+0x164/0x1f0 __kernfs_create_file+0x9c/0x158 sysfs_add_file_mode_ns+0xa8/0x1d0 sysfs_add_file_to_group+0x88/0xd8 etm_perf_add_symlink_sink+0xcc/0x138 coresight_register+0x110/0x280 tmc_probe+0x160/0x420 [ trimmed ] ---[ end trace ab4cc669615ba1b0 ]--- Fix this by initialising the dynamically allocated attribute properly. Cc: Mathieu Poirier Fixes: bb8e370bdc14 ("coresight: perf: Add "sinks" group to PMU directory") Cc: stable Signed-off-by: Suzuki K Poulose [Fixed a typograhic error in the changelog] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190801172323.18359-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 5511c0c309db4c526a6e9f8b2b8a1483771574bc). Bug: 140266694 Change-Id: I87ec21858e4420c4d69b1ec0ccc360911276a36c Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm-perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 1aac779ff546..5cc053d7ed05 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -562,6 +562,7 @@ int etm_perf_add_symlink_sink(struct coresight_device *csdev) /* See function coresight_get_sink_by_id() to know where this is used */ hash = hashlen_hash(hashlen_string(NULL, name)); + sysfs_attr_init(&ea->attr.attr); ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL); if (!ea->attr.attr.name) return -ENOMEM; From 96214c0b988e9c7c8d4b96394c043f170eced312 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 29 Aug 2019 14:28:29 -0600 Subject: [PATCH 0561/3715] BACKPORT: coresight: etr_buf: Consolidate refcount initialization Backport: dev_dbg() in context code is different from upstream. We now use refcounts for the etr_buf users. Let us initialize it while we allocate it. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190829202842.580-5-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 5774a34fc802dbec0c1e7afd0c6737a0233e64ec). Bug: 140266694 Change-Id: Iad3ac7f986c808948f979e4e1fcb3197e3244081 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 756e2ded78e3..b02603c952f2 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -874,6 +874,7 @@ static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, return ERR_PTR(rc); } + refcount_set(&etr_buf->refcount, 1); dev_dbg(drvdata->dev, "allocated buffer of size %ldKB in mode %d\n", (unsigned long)size >> 10, etr_buf->mode); return etr_buf; @@ -1267,8 +1268,6 @@ retry: if (IS_ERR(etr_buf)) return etr_buf; - refcount_set(&etr_buf->refcount, 1); - /* Now that we have a buffer, add it to the IDR. */ mutex_lock(&drvdata->idr_mutex); ret = idr_alloc(&drvdata->idr, etr_buf, pid, pid + 1, GFP_KERNEL); @@ -1295,19 +1294,11 @@ get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata, struct perf_event *event, int nr_pages, void **pages, bool snapshot) { - struct etr_buf *etr_buf; - /* * In per-thread mode the etr_buf isn't shared, so just go ahead * with memory allocation. */ - etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); - if (IS_ERR(etr_buf)) - goto out; - - refcount_set(&etr_buf->refcount, 1); -out: - return etr_buf; + return alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); } static struct etr_buf * From 7ebbc82a6121e8d23a014cacab06c13a1a245fd0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 29 Aug 2019 14:28:30 -0600 Subject: [PATCH 0562/3715] UPSTREAM: coresight: tmc-etr: Handle memory errors We have so far ignored the memory errors, assuming that we have perfect hardware and driver. Let us handle the memory errors reported by the TMC ETR in status and truncate the buffer. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose [Removed ASCII smiley face from changelog] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190829202842.580-6-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit f52ff9b7d64953b9a1a03faaf797d23b5c1d10d0). Bug: 140266694 Change-Id: I04cbc55cb4b916f877ff25ab292e3b1ebb396333 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 13 +++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index b02603c952f2..16d003b34e5e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -931,6 +931,19 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) rrp = tmc_read_rrp(drvdata); rwp = tmc_read_rwp(drvdata); status = readl_relaxed(drvdata->base + TMC_STS); + + /* + * If there were memory errors in the session, truncate the + * buffer. + */ + if (WARN_ON_ONCE(status & TMC_STS_MEMERR)) { + dev_dbg(&drvdata->csdev->dev, + "tmc memory error detected, truncating buffer\n"); + etr_buf->len = 0; + etr_buf->full = 0; + return; + } + etr_buf->full = status & TMC_STS_FULL; WARN_ON(!etr_buf->ops || !etr_buf->ops->sync); diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 44e78e292968..47725be28257 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -58,6 +58,7 @@ #define TMC_STS_TMCREADY_BIT 2 #define TMC_STS_FULL BIT(0) #define TMC_STS_TRIGGERED BIT(1) +#define TMC_STS_MEMERR BIT(5) /* * TMC_AXICTL - 0x110 * From 86aaa8b9d208dbae7486486da7aa13612515f8aa Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 29 Aug 2019 14:28:31 -0600 Subject: [PATCH 0563/3715] UPSTREAM: coresight: tmc-etr: Check if non-secure access is enabled CoreSight TMC-ETR must have the non-secure invasive debug access enabled for use by self-hosted tracing. Without it, there is no point in enabling the ETR. So, let us check it in the TMC_AUTHSTATUS register and fail the probe if it is disabled. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190829202842.580-7-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 8a4bc4f195044004520e38da7b8a52a76ccc9945). Bug: 140266694 Change-Id: I85816c0a336f4328896ffc0ea3311fbf3870f803 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc.c | 12 ++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 3b98ccf717e8..4f39c368985a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -239,6 +239,7 @@ coresight_tmc_reg(ffcr, TMC_FFCR); coresight_tmc_reg(mode, TMC_MODE); coresight_tmc_reg(pscr, TMC_PSCR); coresight_tmc_reg(axictl, TMC_AXICTL); +coresight_tmc_reg(authstatus, TMC_AUTHSTATUS); coresight_tmc_reg(devid, CORESIGHT_DEVID); coresight_tmc_reg64(rrp, TMC_RRP, TMC_RRPHI); coresight_tmc_reg64(rwp, TMC_RWP, TMC_RWPHI); @@ -258,6 +259,7 @@ static struct attribute *coresight_tmc_mgmt_attrs[] = { &dev_attr_devid.attr, &dev_attr_dba.attr, &dev_attr_axictl.attr, + &dev_attr_authstatus.attr, NULL, }; @@ -346,6 +348,13 @@ static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata) "arm,scatter-gather"); } +static inline bool tmc_etr_has_non_secure_access(struct tmc_drvdata *drvdata) +{ + u32 auth = readl_relaxed(drvdata->base + TMC_AUTHSTATUS); + + return (auth & TMC_AUTH_NSID_MASK) == 0x3; +} + /* Detect and initialise the capabilities of a TMC ETR */ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, u32 devid, void *dev_caps) @@ -354,6 +363,9 @@ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, u32 dma_mask = 0; + if (!tmc_etr_has_non_secure_access(drvdata)) + return -EACCES; + /* Set the unadvertised capabilities */ tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps); diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 47725be28257..51fc06168c42 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -50,6 +50,7 @@ #define TMC_ITATBCTR2 0xef0 #define TMC_ITATBCTR1 0xef4 #define TMC_ITATBCTR0 0xef8 +#define TMC_AUTHSTATUS 0xfb8 /* register description */ /* TMC_CTL - 0x020 */ @@ -101,6 +102,8 @@ #define TMC_DEVID_AXIAW_SHIFT 17 #define TMC_DEVID_AXIAW_MASK 0x7f +#define TMC_AUTH_NSID_MASK GENMASK(1, 0) + enum tmc_config_type { TMC_CONFIG_TYPE_ETB, TMC_CONFIG_TYPE_ETR, From cc09d16d172f866eefcbd7c4660e21ff8c69d000 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 29 Aug 2019 14:28:38 -0600 Subject: [PATCH 0564/3715] UPSTREAM: coresight: tmc-etr: Fix updating buffer in not-snapshot mode. TMC etr always copies all available data to perf aux buffer, which may exceed the available space in perf aux buffer. It isn't suitable for not-snapshot mode, because: 1) It may overwrite previously written data. 2) It may make the perf_event_mmap_page->aux_head report having more or less data than the reality. So change to only copy the latest data fitting the available space in perf aux buffer. Signed-off-by: Yabin Cui Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190829202842.580-14-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 13af88f312fc57becacfcbb1cc77f844281a30ec). Bug: 140266694 Change-Id: I17bbfd65d1e89e307298cf8e7d59d2062afeedae Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 16d003b34e5e..d11622410a05 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1418,9 +1418,10 @@ free_etr_perf_buffer: * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware * buffer to the perf ring buffer. */ -static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) +static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf, + unsigned long to_copy) { - long bytes, to_copy; + long bytes; long pg_idx, pg_offset, src_offset; unsigned long head = etr_perf->head; char **dst_pages, *src_buf; @@ -1430,8 +1431,7 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) pg_idx = head >> PAGE_SHIFT; pg_offset = head & (PAGE_SIZE - 1); dst_pages = (char **)etr_perf->pages; - src_offset = etr_buf->offset; - to_copy = etr_buf->len; + src_offset = etr_buf->offset + etr_buf->len - to_copy; while (to_copy > 0) { /* @@ -1442,6 +1442,8 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) * 3) what is available in the destination page. * in one iteration. */ + if (src_offset >= etr_buf->size) + src_offset -= etr_buf->size; bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy, &src_buf); if (WARN_ON_ONCE(bytes <= 0)) @@ -1462,8 +1464,6 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) /* Move source pointers */ src_offset += bytes; - if (src_offset >= etr_buf->size) - src_offset -= etr_buf->size; } } @@ -1509,7 +1509,11 @@ tmc_update_etr_buffer(struct coresight_device *csdev, spin_unlock_irqrestore(&drvdata->spinlock, flags); size = etr_buf->len; - tmc_etr_sync_perf_buffer(etr_perf); + if (!etr_perf->snapshot && size > handle->size) { + size = handle->size; + lost = true; + } + tmc_etr_sync_perf_buffer(etr_perf, size); /* * In snapshot mode we simply increment the head by the number of byte From 05e9e78e0f4d8d54fa2576da6fb2f19e039fd2df Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 29 Aug 2019 14:28:39 -0600 Subject: [PATCH 0565/3715] UPSTREAM: coresight: tmc-etr: Fix perf_data check When tracing etm data of multiple threads on multiple cpus through perf interface, each cpu has a unique etr_perf_buffer while sharing the same etr device. There is no guarantee that the last cpu starts etm tracing also stops last. This makes perf_data check fail. Fix it by checking etr_buf instead of etr_perf_buffer. Also move the code setting and clearing perf_buf to more suitable places. Fixes: 3147da92a8a8 ("coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios") Signed-off-by: Yabin Cui Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190829202842.580-15-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit bbedcb91cc3bf252e6031e199ab3d1f07107f7c5). Bug: 140266694 Change-Id: I6d4f4c461af40596c8bcc70e5d7b9924f3c2b585 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 8 ++++---- drivers/hwtracing/coresight/coresight-tmc.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index d11622410a05..1e87dd873a50 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1492,7 +1492,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev, goto out; } - if (WARN_ON(drvdata->perf_data != etr_perf)) { + if (WARN_ON(drvdata->perf_buf != etr_buf)) { lost = true; spin_unlock_irqrestore(&drvdata->spinlock, flags); goto out; @@ -1504,8 +1504,6 @@ tmc_update_etr_buffer(struct coresight_device *csdev, tmc_sync_etr_buf(drvdata); CS_LOCK(drvdata->base); - /* Reset perf specific data */ - drvdata->perf_data = NULL; spin_unlock_irqrestore(&drvdata->spinlock, flags); size = etr_buf->len; @@ -1568,7 +1566,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) } etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); - drvdata->perf_data = etr_perf; /* * No HW configuration is needed if the sink is already in @@ -1584,6 +1581,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) /* Associate with monitored process. */ drvdata->pid = pid; drvdata->mode = CS_MODE_PERF; + drvdata->perf_buf = etr_perf->etr_buf; atomic_inc(csdev->refcnt); } @@ -1629,6 +1627,8 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev) /* Dissociate from monitored process. */ drvdata->pid = -1; drvdata->mode = CS_MODE_DISABLED; + /* Reset perf specific data */ + drvdata->perf_buf = NULL; spin_unlock_irqrestore(&drvdata->spinlock, flags); diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 51fc06168c42..0143958c201d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -194,8 +194,8 @@ struct etr_buf { * device configuration register (DEVID) * @idr: Holds etr_bufs allocated for this ETR. * @idr_mutex: Access serialisation for idr. - * @perf_data: PERF buffer for ETR. - * @sysfs_data: SYSFS buffer for ETR. + * @sysfs_buf: SYSFS buffer for ETR. + * @perf_buf: PERF buffer for ETR. */ struct tmc_drvdata { void __iomem *base; @@ -219,7 +219,7 @@ struct tmc_drvdata { struct idr idr; struct mutex idr_mutex; struct etr_buf *sysfs_buf; - void *perf_data; + struct etr_buf *perf_buf; }; struct etr_buf_operations { From 5d70da12ada10c11e107f624d0d6979deb8a0c08 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 29 Aug 2019 14:28:40 -0600 Subject: [PATCH 0566/3715] UPSTREAM: coresight: tmc: Make memory width mask computation into a function Make the computation of a memory mask representing the width of the memory bus into a function so that it can be re-used by the ETR driver. Signed-off-by: Mathieu Poirier Reviewed-by: Leo Yan Link: https://lore.kernel.org/r/20190829202842.580-16-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 00bb485ce7b8d1186a5166fc9e38822797fae7d4). Bug: 140266694 Change-Id: I3fdcbc019d9cf676fc1ec92a37c8271887823eba Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etf.c | 23 ++------------- drivers/hwtracing/coresight/coresight-tmc.c | 28 +++++++++++++++++++ drivers/hwtracing/coresight/coresight-tmc.h | 1 + 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 0ffb1aad41a9..a3a97a28ee21 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -467,30 +467,11 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, * traces. */ if (!buf->snapshot && to_read > handle->size) { - u32 mask = 0; - - /* - * The value written to RRP must be byte-address aligned to - * the width of the trace memory databus _and_ to a frame - * boundary (16 byte), whichever is the biggest. For example, - * for 32-bit, 64-bit and 128-bit wide trace memory, the four - * LSBs must be 0s. For 256-bit wide trace memory, the five - * LSBs must be 0s. - */ - switch (drvdata->memwidth) { - case TMC_MEM_INTF_WIDTH_32BITS: - case TMC_MEM_INTF_WIDTH_64BITS: - case TMC_MEM_INTF_WIDTH_128BITS: - mask = GENMASK(31, 4); - break; - case TMC_MEM_INTF_WIDTH_256BITS: - mask = GENMASK(31, 5); - break; - } + u32 mask = tmc_get_memwidth_mask(drvdata); /* * Make sure the new size is aligned in accordance with the - * requirement explained above. + * requirement explained in function tmc_get_memwidth_mask(). */ to_read = handle->size & mask; /* Move the RAM read pointer up */ diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 4f39c368985a..877348cf7d64 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -74,6 +74,34 @@ void tmc_disable_hw(struct tmc_drvdata *drvdata) writel_relaxed(0x0, drvdata->base + TMC_CTL); } +u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata) +{ + u32 mask = 0; + + /* + * When moving RRP or an offset address forward, the new values must + * be byte-address aligned to the width of the trace memory databus + * _and_ to a frame boundary (16 byte), whichever is the biggest. For + * example, for 32-bit, 64-bit and 128-bit wide trace memory, the four + * LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must + * be 0s. + */ + switch (drvdata->memwidth) { + case TMC_MEM_INTF_WIDTH_32BITS: + /* fallthrough */ + case TMC_MEM_INTF_WIDTH_64BITS: + /* fallthrough */ + case TMC_MEM_INTF_WIDTH_128BITS: + mask = GENMASK(31, 4); + break; + case TMC_MEM_INTF_WIDTH_256BITS: + mask = GENMASK(31, 5); + break; + } + + return mask; +} + static int tmc_read_prepare(struct tmc_drvdata *drvdata) { int ret = 0; diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 0143958c201d..13bf55179766 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -268,6 +268,7 @@ void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata); void tmc_flush_and_stop(struct tmc_drvdata *drvdata); void tmc_enable_hw(struct tmc_drvdata *drvdata); void tmc_disable_hw(struct tmc_drvdata *drvdata); +u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata); /* ETB/ETF functions */ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata); From bd93f4ec61f8abef60585c85b301b58c678f1e27 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 29 Aug 2019 14:28:41 -0600 Subject: [PATCH 0567/3715] UPSTREAM: coresight: tmc-etr: Decouple buffer sync and barrier packet insertion If less space is available in the perf ring buffer than the ETR buffer, barrier packets inserted in the trace stream by tmc_sync_etr_buf() are skipped over when the head of the buffer is moved forward, resulting in traces that can't be decoded. This patch decouples the process of syncing ETR buffers and the addition of barrier packets in order to perform the latter once the offset in the trace buffer has been properly computed. Signed-off-by: Mathieu Poirier Reviewed-by: Leo Yan Link: https://lore.kernel.org/r/20190829202842.580-17-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit 3507d231a499e27c4bac8a47169b74ec7ef87292). Bug: 140266694 Change-Id: I2bc0429aacb32cc59cf2e002530a00c8c2909683 Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 1e87dd873a50..72a145a618ac 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -949,10 +949,6 @@ static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) WARN_ON(!etr_buf->ops || !etr_buf->ops->sync); etr_buf->ops->sync(etr_buf, rrp, rwp); - - /* Insert barrier packets at the beginning, if there was an overflow */ - if (etr_buf->full) - tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); } static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) @@ -1089,6 +1085,13 @@ static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata) drvdata->sysfs_buf = NULL; } else { tmc_sync_etr_buf(drvdata); + /* + * Insert barrier packets at the beginning, if there was + * an overflow. + */ + if (etr_buf->full) + tmc_etr_buf_insert_barrier_packet(etr_buf, + etr_buf->offset); } } @@ -1506,11 +1509,16 @@ tmc_update_etr_buffer(struct coresight_device *csdev, CS_LOCK(drvdata->base); spin_unlock_irqrestore(&drvdata->spinlock, flags); + lost = etr_buf->full; size = etr_buf->len; if (!etr_perf->snapshot && size > handle->size) { size = handle->size; lost = true; } + + /* Insert barrier packets at the beginning, if there was an overflow */ + if (lost) + tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); tmc_etr_sync_perf_buffer(etr_perf, size); /* @@ -1521,8 +1529,6 @@ tmc_update_etr_buffer(struct coresight_device *csdev, */ if (etr_perf->snapshot) handle->head += size; - - lost |= etr_buf->full; out: /* * Don't set the TRUNCATED flag in snapshot mode because 1) the From f09edab86e8388fb875958274b0d961842ca6765 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 29 Aug 2019 14:28:42 -0600 Subject: [PATCH 0568/3715] UPSTREAM: coresight: tmc-etr: Add barrier packets when moving offset forward This patch adds barrier packets in the trace stream when the offset in the data buffer needs to be moved forward. Otherwise the decoder isn't aware of the break in the stream and can't synchronise itself with the trace data. Signed-off-by: Mathieu Poirier Tested-by: Yabin Cui Reviewed-by: Leo Yan Link: https://lore.kernel.org/r/20190829202842.580-18-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit ec13c78d7b45851d21786127ae17193f9a1446fb). Bug: 140266694 Change-Id: If19ed31b5c2b89a62fb4bfddb95f7e3725c7d91d Signed-off-by: Yabin Cui --- .../hwtracing/coresight/coresight-tmc-etr.c | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 72a145a618ac..8cd03f78aab2 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1422,10 +1422,11 @@ free_etr_perf_buffer: * buffer to the perf ring buffer. */ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf, + unsigned long src_offset, unsigned long to_copy) { long bytes; - long pg_idx, pg_offset, src_offset; + long pg_idx, pg_offset; unsigned long head = etr_perf->head; char **dst_pages, *src_buf; struct etr_buf *etr_buf = etr_perf->etr_buf; @@ -1434,7 +1435,6 @@ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf, pg_idx = head >> PAGE_SHIFT; pg_offset = head & (PAGE_SIZE - 1); dst_pages = (char **)etr_perf->pages; - src_offset = etr_buf->offset + etr_buf->len - to_copy; while (to_copy > 0) { /* @@ -1482,7 +1482,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev, void *config) { bool lost = false; - unsigned long flags, size = 0; + unsigned long flags, offset, size = 0; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); struct etr_perf_buffer *etr_perf = config; struct etr_buf *etr_buf = etr_perf->etr_buf; @@ -1510,16 +1510,35 @@ tmc_update_etr_buffer(struct coresight_device *csdev, spin_unlock_irqrestore(&drvdata->spinlock, flags); lost = etr_buf->full; + offset = etr_buf->offset; size = etr_buf->len; + + /* + * The ETR buffer may be bigger than the space available in the + * perf ring buffer (handle->size). If so advance the offset so that we + * get the latest trace data. In snapshot mode none of that matters + * since we are expected to clobber stale data in favour of the latest + * traces. + */ if (!etr_perf->snapshot && size > handle->size) { - size = handle->size; + u32 mask = tmc_get_memwidth_mask(drvdata); + + /* + * Make sure the new size is aligned in accordance with the + * requirement explained in function tmc_get_memwidth_mask(). + */ + size = handle->size & mask; + offset = etr_buf->offset + etr_buf->len - size; + + if (offset >= etr_buf->size) + offset -= etr_buf->size; lost = true; } /* Insert barrier packets at the beginning, if there was an overflow */ if (lost) tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); - tmc_etr_sync_perf_buffer(etr_perf, size); + tmc_etr_sync_perf_buffer(etr_perf, offset, size); /* * In snapshot mode we simply increment the head by the number of byte From d2577cb655b7f51c53fffc348d4717888c9e5a43 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 17 Oct 2019 17:29:16 -0700 Subject: [PATCH 0569/3715] BACKPORT:FROMGIT: coresight: Serialize enabling/disabling a link device. Backport: coresight-dynamic-replicator.c and coresight-replicator.c has been merged upstream into coresight-replicator.c, in commit 455328b1772a190e27fe7ef3a2416dfee6234317. So backport the change to both of the files. When tracing etm data of multiple threads on multiple cpus through perf interface, some link devices are shared between paths of different cpus. It creates race conditions when different cpus wants to enable/disable the same link device at the same time. Example 1: Two cpus want to enable different ports of a coresight funnel, thus calling the funnel enable operation at the same time. But the funnel enable operation isn't reentrantable. Example 2: For an enabled coresight dynamic replicator with refcnt=1, one cpu wants to disable it, while another cpu wants to enable it. Ideally we still have an enabled replicator with refcnt=1 at the end. But in reality the result is uncertain. Since coresight devices claim themselves when enabled for self-hosted usage, the race conditions above usually make the link devices not usable after many cycles. To fix the race conditions, this patch uses spinlocks to serialize enabling/disabling link devices. Fixes: a06ae8609b3d ("coresight: add CoreSight core layer framework") Signed-off-by: Yabin Cui Signed-off-by: Mathieu Poirier Cc: stable # 5.3 Link: https://lore.kernel.org/r/20191104181251.26732-14-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit edda32dabedb01f98b9d7b9a4492c13357834bbe git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git char-misc-next) Bug: 140266694 Change-Id: I42f55aca4db7f9328b5e2425c2f3990d5d470a6e Signed-off-by: Yabin Cui --- .../coresight/coresight-dynamic-replicator.c | 58 +++++++++++++++---- .../hwtracing/coresight/coresight-funnel.c | 32 ++++++++-- .../coresight/coresight-replicator.c | 6 +- .../hwtracing/coresight/coresight-tmc-etf.c | 26 ++++++--- drivers/hwtracing/coresight/coresight.c | 45 +++++--------- 5 files changed, 110 insertions(+), 57 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index 3fe7f0ca7cf5..613ac2618b88 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -34,12 +34,14 @@ * @dev: the device entity associated with this component * @atclk: optional clock for the core parts of the replicator. * @csdev: component vitals needed by the framework + * @spinlock: serialize enable/disable operations. */ struct replicator_state { void __iomem *base; struct device *dev; struct clk *atclk; struct coresight_device *csdev; + spinlock_t spinlock; }; /* @@ -58,12 +60,11 @@ static void replicator_reset(struct replicator_state *drvdata) CS_LOCK(drvdata->base); } -static int replicator_enable(struct coresight_device *csdev, int inport, - int outport) +static int dynamic_replicator_enable(struct replicator_state *drvdata, + int inport, int outport) { int rc = 0; u32 reg; - struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); switch (outport) { case 0: @@ -84,21 +85,40 @@ static int replicator_enable(struct coresight_device *csdev, int inport, rc = coresight_claim_device_unlocked(drvdata->base); /* Ensure that the outport is enabled. */ - if (!rc) { + if (!rc) writel_relaxed(0x00, drvdata->base + reg); - dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); - } - CS_LOCK(drvdata->base); return rc; } -static void replicator_disable(struct coresight_device *csdev, int inport, - int outport) +static int replicator_enable(struct coresight_device *csdev, int inport, + int outport) +{ + int rc = 0; + struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool first_enable = false; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_read(&csdev->refcnt[outport]) == 0) { + rc = dynamic_replicator_enable(drvdata, inport, outport); + if (!rc) + first_enable = true; + } + if (!rc) + atomic_inc(&csdev->refcnt[outport]); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + if (first_enable) + dev_dbg(&csdev->dev, "REPLICATOR enabled\n"); + return rc; +} + +static void dynamic_replicator_disable(struct replicator_state *drvdata, + int inport, int outport) { u32 reg; - struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); switch (outport) { case 0: @@ -121,8 +141,24 @@ static void replicator_disable(struct coresight_device *csdev, int inport, (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff)) coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); +} - dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); +static void replicator_disable(struct coresight_device *csdev, int inport, + int outport) +{ + struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool last_disable = false; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_dec_return(&csdev->refcnt[outport]) == 0) { + dynamic_replicator_disable(drvdata, inport, outport); + last_disable = true; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + if (last_disable) + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 87fe4388aeac..954265a1ffbd 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -43,6 +43,7 @@ * @atclk: optional clock for the core parts of the funnel. * @csdev: component vitals needed by the framework. * @priority: port selection order. + * @spinlock: serialize enable/disable operations. */ struct funnel_drvdata { void __iomem *base; @@ -50,6 +51,7 @@ struct funnel_drvdata { struct clk *atclk; struct coresight_device *csdev; unsigned long priority; + spinlock_t spinlock; }; static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port) @@ -82,11 +84,21 @@ static int funnel_enable(struct coresight_device *csdev, int inport, { int rc = 0; struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool first_enable = false; - if (drvdata->base) - rc = dynamic_funnel_enable_hw(drvdata, inport); - + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_read(&csdev->refcnt[inport]) == 0) { + if (drvdata->base) + rc = dynamic_funnel_enable_hw(drvdata, inport); + if (!rc) + first_enable = true; + } if (!rc) + atomic_inc(&csdev->refcnt[inport]); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + if (first_enable) dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); return rc; } @@ -113,11 +125,19 @@ static void funnel_disable(struct coresight_device *csdev, int inport, int outport) { struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool last_disable = false; - if (drvdata->base) - dynamic_funnel_disable_hw(drvdata, inport); + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_dec_return(&csdev->refcnt[inport]) == 0) { + if (drvdata->base) + dynamic_funnel_disable_hw(drvdata, inport); + last_disable = true; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_dbg(drvdata->dev, "FUNNEL inport %d disabled\n", inport); + if (last_disable) + dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n", inport);; } static const struct coresight_ops_link funnel_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 4f7781203fd4..93fe5785c8a4 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -42,7 +42,8 @@ static int replicator_enable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); + if (atomic_inc_return(&csdev->refcnt[outport]) == 1) + dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); return 0; } @@ -51,7 +52,8 @@ static void replicator_disable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); + if (atomic_dec_return(&csdev->refcnt[outport]) == 0) + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index a3a97a28ee21..0da861d8b95a 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -325,9 +325,10 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev) static int tmc_enable_etf_link(struct coresight_device *csdev, int inport, int outport) { - int ret; + int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + bool first_enable = false; spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { @@ -335,12 +336,18 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, return -EBUSY; } - ret = tmc_etf_enable_hw(drvdata); + if (atomic_read(&csdev->refcnt[0]) == 0) { + ret = tmc_etf_enable_hw(drvdata); + if (!ret) { + drvdata->mode = CS_MODE_SYSFS; + first_enable = true; + } + } if (!ret) - drvdata->mode = CS_MODE_SYSFS; + atomic_inc(&csdev->refcnt[0]); spin_unlock_irqrestore(&drvdata->spinlock, flags); - if (!ret) + if (first_enable) dev_dbg(drvdata->dev, "TMC-ETF enabled\n"); return ret; } @@ -350,6 +357,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + bool last_disable = false; spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { @@ -357,11 +365,15 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, return; } - tmc_etf_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; + if (atomic_dec_return(&csdev->refcnt[0]) == 0) { + tmc_etf_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + last_disable = true; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_dbg(drvdata->dev, "TMC-ETF disabled\n"); + if (last_disable) + dev_dbg(drvdata->dev, "TMC-ETF disabled\n"); } static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index a7cf02b498ab..8381e726c29c 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -260,9 +260,9 @@ static int coresight_enable_link(struct coresight_device *csdev, struct coresight_device *parent, struct coresight_device *child) { - int ret; + int ret = 0; int link_subtype; - int refport, inport, outport; + int inport, outport; if (!parent || !child) return -EINVAL; @@ -271,29 +271,17 @@ static int coresight_enable_link(struct coresight_device *csdev, outport = coresight_find_link_outport(csdev, child); link_subtype = csdev->subtype.link_subtype; - if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) - refport = inport; - else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) - refport = outport; - else - refport = 0; + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && inport < 0) + return inport; + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT && outport < 0) + return outport; - if (refport < 0) - return refport; + if (link_ops(csdev)->enable) + ret = link_ops(csdev)->enable(csdev, inport, outport); + if (!ret) + csdev->enable = true; - if (atomic_inc_return(&csdev->refcnt[refport]) == 1) { - if (link_ops(csdev)->enable) { - ret = link_ops(csdev)->enable(csdev, inport, outport); - if (ret) { - atomic_dec(&csdev->refcnt[refport]); - return ret; - } - } - } - - csdev->enable = true; - - return 0; + return ret; } static void coresight_disable_link(struct coresight_device *csdev, @@ -302,7 +290,7 @@ static void coresight_disable_link(struct coresight_device *csdev, { int i, nr_conns; int link_subtype; - int refport, inport, outport; + int inport, outport; if (!parent || !child) return; @@ -312,20 +300,15 @@ static void coresight_disable_link(struct coresight_device *csdev, link_subtype = csdev->subtype.link_subtype; if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) { - refport = inport; nr_conns = csdev->nr_inport; } else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) { - refport = outport; nr_conns = csdev->nr_outport; } else { - refport = 0; nr_conns = 1; } - if (atomic_dec_return(&csdev->refcnt[refport]) == 0) { - if (link_ops(csdev)->disable) - link_ops(csdev)->disable(csdev, inport, outport); - } + if (link_ops(csdev)->disable) + link_ops(csdev)->disable(csdev, inport, outport); for (i = 0; i < nr_conns; i++) if (atomic_read(&csdev->refcnt[i]) != 0) From 5f4c5e11943188190d6e27572dc92dfc5fe544f0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Nov 2019 12:00:21 +0000 Subject: [PATCH 0570/3715] BACKPORT:FROMGIT: coresight: funnel: Fix missing spin_lock_init() Backport: context code is different from upstream. The driver allocates the spinlock but not initialize it. Use spin_lock_init() on it to initialize it correctly. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Tested-by: Yabin Cui Signed-off-by: Mathieu Poirier (cherry picked from commit 9d5ec2ec787967f16598094de507a60816378fae https://git.linaro.org/kernel/coresight.git next) Bug: 140266694 Change-Id: I34da2da267afa839d7432bd7c33bcf92d6287ef0 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-funnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 954265a1ffbd..6b9fce3a94be 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -256,6 +256,7 @@ static int funnel_probe(struct device *dev, struct resource *res) dev_set_drvdata(dev, drvdata); + spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG; desc.ops = &funnel_cs_ops; From 20243f80614560630070a6586152983387d8da81 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Nov 2019 11:56:51 +0000 Subject: [PATCH 0571/3715] BACKPORT:FROMGIT: coresight: replicator: Fix missing spin_lock_init() Backport: change in coresight-dynamic-replicator.c instead of coresight-replicator.c. The driver allocates the spinlock but not initialize it. Use spin_lock_init() on it to initialize it correctly. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Tested-by: Yabin Cui Signed-off-by: Mathieu Poirier (cherry picked from commit 372697412e921ae550a2da485526aeb3abee955d https://git.linaro.org/kernel/coresight.git next) Bug: 140266694 Change-Id: Ia7769f33fb51260c59c9c35a507d7461e16049a6 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-dynamic-replicator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index 613ac2618b88..6a3fd22d757c 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -231,6 +231,7 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id) dev_set_drvdata(dev, drvdata); pm_runtime_put(&adev->dev); + spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT; desc.ops = &replicator_cs_ops; From abd7879a2ab61b2fc107a542ef5df1bfa586e0db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 1 Nov 2019 13:40:06 -0700 Subject: [PATCH 0572/3715] ANDROID: cuttlefish_defconfig: enable fs-verity fs-verity will be used for APK verification in R. Bug: 142494008 Change-Id: I51626da9a9975d337c44096f8443c3713dfcd6eb Signed-off-by: Eric Biggers --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 7 ++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 9ee2a6014efb..66b476d89814 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -420,6 +420,8 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y # CONFIG_DNOTIFY is not set CONFIG_QUOTA=y CONFIG_QFMT_V2=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 4ec35bad32d4..625142b2f8bc 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -439,6 +439,8 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -487,15 +489,10 @@ CONFIG_SECURITY_PATH=y CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -CONFIG_CRYPTO_RSA=y # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_AES_NI_INTEL=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y CONFIG_CRYPTO_DEV_VIRTIO=y -CONFIG_ASYMMETRIC_KEY_TYPE=y -CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y -CONFIG_X509_CERTIFICATE_PARSER=y -CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509" From 460dc7c31cef1aba1f0228fc2556a8285ac17c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rickard=20M=C3=B6ller?= Date: Fri, 1 Nov 2019 14:20:46 +0100 Subject: [PATCH 0573/3715] ANDROID: uid_sys_stats: avoid double accounting of dying threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a thread is being killed process_notifier() is called to record the final accounting of the thread. But after that uid_cputime_show() and add_uid_io_stats() can be called before the dying thread is removed from the parent's thread_group resulting in double accounting. This can cause the user and system time for a given UID to move backwards in /proc/uid_cputime/show_uid_stat. That gives negative delta times in KernelCpuUidUserSysTimeReader.readDeltaImpl() and it logs an error: "Negative user/sys time delta for UID=..." One consequence of which was incorrectly calculated power consumptions in BatteryStats. With this change we avoid the double accounting by ignoring the thread if it has the PF_EXITING flag set. Bug: 144366911 Change-Id: I6b929e8f558cd81ce1c00481c8b550d24877aa2c Signed-off-by: Rickard Möller --- drivers/misc/uid_sys_stats.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index 3954e345b2e9..d04faf312ed6 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -358,9 +358,12 @@ static int uid_cputime_show(struct seq_file *m, void *v) __func__, uid); return -ENOMEM; } - task_cputime_adjusted(task, &utime, &stime); - uid_entry->active_utime += utime; - uid_entry->active_stime += stime; + /* avoid double accounting of dying threads */ + if (!(task->flags & PF_EXITING)) { + task_cputime_adjusted(task, &utime, &stime); + uid_entry->active_utime += utime; + uid_entry->active_stime += stime; + } } while_each_thread(temp, task); rcu_read_unlock(); @@ -453,6 +456,10 @@ static void add_uid_io_stats(struct uid_entry *uid_entry, { struct io_stats *io_slot = &uid_entry->io[slot]; + /* avoid double accounting of dying threads */ + if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING)) + return; + io_slot->read_bytes += task->ioac.read_bytes; io_slot->write_bytes += compute_write_bytes(task); io_slot->rchar += task->ioac.rchar; From 5afdcc2a0ae40bc6195476504f4e1932544cb441 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Mon, 11 Nov 2019 15:37:17 -0800 Subject: [PATCH 0574/3715] kvm: mmu: Don't read PDPTEs when paging is not enabled [ Upstream commit d35b34a9a70edae7ef923f100e51b8b5ae9fe899 ] kvm should not attempt to read guest PDPTEs when CR0.PG = 0 and CR4.PAE = 1. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dcee3282112d..dc1b6d5bb16d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -620,7 +620,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu)) + if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -7787,7 +7787,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu)) { + if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } From c155547af70fff9a346bbf428fc14d3beec2d6d3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 11 Nov 2019 15:37:18 -0800 Subject: [PATCH 0575/3715] KVM: x86: introduce is_pae_paging [ Upstream commit bf03d4f9334728bf7c8ffc7de787df48abd6340e ] Checking for 32-bit PAE is quite common around code that fiddles with the PDPTRs. Add a function to compress all checks into a single invocation. Moving to the common helper also fixes a subtle bug in kvm_set_cr3() where it fails to check is_long_mode() and results in KVM incorrectly attempting to load PDPTRs for a 64-bit guest. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini [sean: backport to 4.x; handle vmx.c split in 5.x, call out the bugfix] Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 7 +++---- arch/x86/kvm/x86.c | 8 ++++---- arch/x86/kvm/x86.h | 5 +++++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cd5a8e888eb6..ab6384efc791 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4468,7 +4468,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_dirty)) return; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); @@ -4480,7 +4480,7 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); @@ -10906,8 +10906,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * If PAE paging and EPT are both on, CR3 is not used by the CPU and * must not be dereferenced. */ - if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) && - !nested_ept) { + if (is_pae_paging(vcpu) && !nested_ept) { if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { *entry_failure_code = ENTRY_FAIL_PDPTE; return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dc1b6d5bb16d..1f9360320a82 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -620,7 +620,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) + if (!is_pae_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -849,8 +849,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu) && (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) return 1; - else if (is_pae(vcpu) && is_paging(vcpu) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) + else if (is_pae_paging(vcpu) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; vcpu->arch.cr3 = cr3; @@ -7787,7 +7787,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { + if (is_pae_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c88305d997b0..68eb0d03e5fc 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -94,6 +94,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); } +static inline bool is_pae_paging(struct kvm_vcpu *vcpu) +{ + return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); +} + static inline u32 bit(int bitno) { return 1 << (bitno & 31); From 3ad7824c0d454e7bab6729209128b2892582d76d Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 10 Dec 2018 12:40:38 +0100 Subject: [PATCH 0576/3715] MIPS: BCM63XX: fix switch core reset on BCM6368 commit 8a38dacf87180738d42b058334c951eba15d2d47 upstream. The Ethernet Switch core mask was set to 0, causing the switch core to be not reset on BCM6368 on boot. Provide the proper mask so the switch core gets reset to a known good state. Fixes: 799faa626c71 ("MIPS: BCM63XX: add core reset helper") Signed-off-by: Jonas Gorski Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Florian Fainelli Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/bcm63xx/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/bcm63xx/reset.c b/arch/mips/bcm63xx/reset.c index a2af38cf28a7..64574e74cb23 100644 --- a/arch/mips/bcm63xx/reset.c +++ b/arch/mips/bcm63xx/reset.c @@ -120,7 +120,7 @@ #define BCM6368_RESET_DSL 0 #define BCM6368_RESET_SAR SOFTRESET_6368_SAR_MASK #define BCM6368_RESET_EPHY SOFTRESET_6368_EPHY_MASK -#define BCM6368_RESET_ENETSW 0 +#define BCM6368_RESET_ENETSW SOFTRESET_6368_ENETSW_MASK #define BCM6368_RESET_PCM SOFTRESET_6368_PCM_MASK #define BCM6368_RESET_MPI SOFTRESET_6368_MPI_MASK #define BCM6368_RESET_PCIE 0 From cc7d7e27fb20c7458914ecd265521389782888be Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Tue, 5 Nov 2019 15:49:10 +1300 Subject: [PATCH 0577/3715] scsi: core: Handle drivers which set sg_tablesize to zero commit 9393c8de628cf0968d81a17cc11841e42191e041 upstream. In scsi_mq_setup_tags(), cmd_size is calculated based on zero size for the scatter-gather list in case the low level driver uses SG_NONE in its host template. cmd_size is passed on to the block layer for calculation of the request size, and we've seen NULL pointer dereference errors from the block layer in drivers where SG_NONE is used and a mq IO scheduler is active, apparently as a consequence of this (see commit 68ab2d76e4be ("scsi: cxlflash: Set sg_tablesize to 1 instead of SG_NONE"), and a recent patch by Finn Thain converting the three m68k NFR5380 drivers to avoid setting SG_NONE). Try to avoid these errors by accounting for at least one sg list entry when calculating cmd_size, regardless of whether the low level driver set a zero sg_tablesize. Tested on 030 m68k with the atari_scsi driver - setting sg_tablesize to SG_NONE no longer results in a crash when loading this driver. CC: Finn Thain Link: https://lore.kernel.org/r/1572922150-4358-1-git-send-email-schmitzmic@gmail.com Signed-off-by: Michael Schmitz Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index efb8af57dd9c..c36c84c8725a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2277,7 +2277,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; - sgl_size = scsi_mq_sgl_size(shost); + sgl_size = max_t(unsigned int, sizeof(struct scatterlist), + scsi_mq_sgl_size(shost)); cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; if (scsi_host_get_prot(shost)) cmd_size += sizeof(struct scsi_data_buffer) + sgl_size; From 97b858bfe955141edcca59a5824e974f52940fd5 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Thu, 14 Nov 2019 11:22:59 -0800 Subject: [PATCH 0578/3715] Revert "Input: synaptics-rmi4 - avoid processing unknown IRQs" This reverts commit 7b9f7a928255a232012be55cb95db30e963b83a7. That change should have had a fixes tag for commit 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain"). The conversion to irq_domain introduced the issue being fixed by this commit. In older kernels the bitmap IRQ accounting is done differently, and it doesn't suffer from the same issue of calling handle_nested_irq(0). Keeping this commit on kernels 4.14 and older causes problems with touchpads due to the different semantics of the IRQ bitmasks. Signed-off-by: Evan Green Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_driver.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index bae46816a3b3..997ccae7ee05 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -165,7 +165,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) } mutex_lock(&data->irq_mutex); - bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits, + bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask, data->irq_count); /* * At this point, irq_status has all bits that are set in the @@ -412,8 +412,6 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev, bitmap_copy(data->current_irq_mask, data->new_irq_mask, data->num_of_irq_regs); - bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count); - error_unlock: mutex_unlock(&data->irq_mutex); return error; @@ -427,8 +425,6 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev, struct device *dev = &rmi_dev->dev; mutex_lock(&data->irq_mutex); - bitmap_andnot(data->fn_irq_bits, - data->fn_irq_bits, mask, data->irq_count); bitmap_andnot(data->new_irq_mask, data->current_irq_mask, mask, data->irq_count); From 80da94a1c9f2a5002b9090a70d6a77d9c47a5a51 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 22 Nov 2017 10:45:39 +0530 Subject: [PATCH 0579/3715] powerpc/perf: Fix IMC_MAX_PMU macro commit 73ce9aec65b17433e18163d07eb5cb6bf114bd6c upstream. IMC_MAX_PMU is used for static storage (per_nest_pmu_arr) which holds nest pmu information. Current value for the macro is 32 based on the initial number of nest pmu units supported by the nest microcode. But going forward, microcode could support more nest units. Instead of static storage, patch to fix the code to dynamically allocate an array based on the number of nest imc units found in the device tree. Fixes:8f95faaac56c1 ('powerpc/powernv: Detect and create IMC device') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Cc: Andrew Donnellan Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/imc-pmu.h | 6 +----- arch/powerpc/perf/imc-pmu.c | 15 ++++++++++++--- arch/powerpc/platforms/powernv/opal-imc.c | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 7f74c282710f..fad0e6ff460f 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -20,11 +20,6 @@ #include #include -/* - * For static allocation of some of the structures. - */ -#define IMC_MAX_PMUS 32 - /* * Compatibility macros for IMC devices */ @@ -125,4 +120,5 @@ enum { extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); extern void thread_imc_disable(void); +extern int get_max_nest_dev(void); #endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 994e4392cac5..2d08015a903f 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -26,7 +26,7 @@ */ static DEFINE_MUTEX(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); -static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; @@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) static void nest_change_cpu_context(int old_cpu, int new_cpu) { struct imc_pmu **pn = per_nest_pmu_arr; - int i; if (old_cpu < 0 || new_cpu < 0) return; - for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) + while (*pn) { perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); + pn++; + } } static int ppc_nest_imc_cpu_offline(unsigned int cpu) @@ -1212,6 +1213,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); + kfree(per_nest_pmu_arr); return; } @@ -1236,6 +1238,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, return -ENOMEM; /* Needed for hotplug/migration */ + if (!per_nest_pmu_arr) { + per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1, + sizeof(struct imc_pmu *), + GFP_KERNEL); + if (!per_nest_pmu_arr) + return -ENOMEM; + } per_nest_pmu_arr[pmu_index] = pmu_ptr; break; case IMC_DOMAIN_CORE: diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 7b93191dc2e3..dad23cd8a1de 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -159,6 +159,22 @@ static void disable_core_pmu_counters(void) put_online_cpus(); } +int get_max_nest_dev(void) +{ + struct device_node *node; + u32 pmu_units = 0, type; + + for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(node, "type", &type)) + continue; + + if (type == IMC_TYPE_CHIP) + pmu_units++; + } + + return pmu_units; +} + static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; From eda6c45adcf79f6a82a150e9a094f254689ef368 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 7 Dec 2017 22:53:27 +0530 Subject: [PATCH 0580/3715] powerpc/perf: Fix kfree memory allocated for nest pmus commit 110df8bd3e418b3476cae80babe8add48a8ea523 upstream. imc_common_cpuhp_mem_free() is the common function for all IMC (In-memory Collection counters) domains to unregister cpuhotplug callback and free memory. Since kfree of memory allocated for nest-imc (per_nest_pmu_arr) is in the common code, all domains (core/nest/thread) can do the kfree in the failure case. This could potentially create a call trace as shown below, where core(/thread/nest) imc pmu initialization fails and in the failure path imc_common_cpuhp_mem_free() free the memory(per_nest_pmu_arr), which is allocated by successfully registered nest units. The call trace is generated in a scenario where core-imc initialization is made to fail and a cpuhotplug is performed in a p9 system. During cpuhotplug ppc_nest_imc_cpu_offline() tries to access per_nest_pmu_arr, which is already freed by core-imc. NIP [c000000000cb6a94] mutex_lock+0x34/0x90 LR [c000000000cb6a88] mutex_lock+0x28/0x90 Call Trace: mutex_lock+0x28/0x90 (unreliable) perf_pmu_migrate_context+0x90/0x3a0 ppc_nest_imc_cpu_offline+0x190/0x1f0 cpuhp_invoke_callback+0x160/0x820 cpuhp_thread_fun+0x1bc/0x270 smpboot_thread_fn+0x250/0x290 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x74 To address this scenario do the kfree(per_nest_pmu_arr) only in case of nest-imc initialization failure, and when there is no other nest units registered. Fixes: 73ce9aec65b1 ("powerpc/perf: Fix IMC_MAX_PMU macro") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Cc: Andrew Donnellan Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/imc-pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 2d08015a903f..a0b4c22d963a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1189,6 +1189,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1213,7 +1214,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1327,6 +1327,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } From 63a44739bc53dbd44529b7e9ca641d88f151c7cb Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 11:16:01 +0100 Subject: [PATCH 0581/3715] ax88172a: fix information leak on short answers [ Upstream commit a9a51bd727d141a67b589f375fe69d0e54c4fe22 ] If a malicious device gives a short MAC it can elicit up to 5 bytes of leaked memory out of the driver. We need to check for ETH_ALEN instead. Reported-by: syzbot+a8d4acdad35e6bbca308@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88172a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 501576f53854..914cac55a7ae 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -208,7 +208,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); - if (ret < 0) { + if (ret < ETH_ALEN) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); goto free; } From 021ede687dcccba48a2cae8c98795e9eedc857e1 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 13 Nov 2019 11:11:10 +0100 Subject: [PATCH 0582/3715] net: usb: qmi_wwan: add support for Foxconn T77W968 LTE modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 802753cb0b141cf5170ab97fe7e79f5ca10d06b0 ] These are the Foxconn-branded variants of the Dell DW5821e modules, same USB layout as those. The QMI interface is exposed in USB configuration #1: P: Vendor=0489 ProdID=e0b4 Rev=03.18 S: Manufacturer=FII S: Product=T77W968 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8ed538295d09..4a984b76a60e 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1295,6 +1295,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ + {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From 82ea5a1742e22638db3e3bb812e2e2b1eaf6b661 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 13 Nov 2019 13:45:02 +0200 Subject: [PATCH 0583/3715] slip: Fix memory leak in slip_open error path [ Upstream commit 3b5a39979dafea9d0cd69c7ae06088f7a84cdafa ] Driver/net/can/slcan.c is derived from slip.c. Memory leak was detected by Syzkaller in slcan. Same issue exists in slip.c and this patch is addressing the leak in slip.c. Here is the slcan memory leak trace reported by Syzkaller: BUG: memory leak unreferenced object 0xffff888067f65500 (size 4096): comm "syz-executor043", pid 454, jiffies 4294759719 (age 11.930s) hex dump (first 32 bytes): 73 6c 63 61 6e 30 00 00 00 00 00 00 00 00 00 00 slcan0.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a06eec0d>] __kmalloc+0x18b/0x2c0 [<0000000083306e66>] kvmalloc_node+0x3a/0xc0 [<000000006ac27f87>] alloc_netdev_mqs+0x17a/0x1080 [<0000000061a996c9>] slcan_open+0x3ae/0x9a0 [<000000001226f0f9>] tty_ldisc_open.isra.1+0x76/0xc0 [<0000000019289631>] tty_set_ldisc+0x28c/0x5f0 [<000000004de5a617>] tty_ioctl+0x48d/0x1590 [<00000000daef496f>] do_vfs_ioctl+0x1c7/0x1510 [<0000000059068dbc>] ksys_ioctl+0x99/0xb0 [<000000009a6eb334>] __x64_sys_ioctl+0x78/0xb0 [<0000000053d0332e>] do_syscall_64+0x16f/0x580 [<0000000021b83b99>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000008ea75434>] 0xfffffffffffffff Cc: "David S. Miller" Cc: Oliver Hartkopp Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/slip/slip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 436dd78c396a..2901b7db9d2e 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -859,6 +859,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + free_netdev(sl->dev); err_exit: rtnl_unlock(); From 524c3eacd7bbc92f255c98e1b0a8b824b0ebb5d0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 9 Nov 2019 19:16:58 +0100 Subject: [PATCH 0584/3715] ALSA: usb-audio: Fix missing error check at mixer resolution test commit 167beb1756791e0806365a3f86a0da10d7a327ee upstream. A check of the return value from get_cur_mix_raw() is missing at the resolution test code in get_min_max_with_quirks(), which may leave the variable untouched, leading to a random uninitialized value, as detected by syzkaller fuzzer. Add the missing return error check for fixing that. Reported-and-tested-by: syzbot+abe1ab7afc62c6bb6377@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20191109181658.30368-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 5d48f8e74c56..044193b2364d 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1052,7 +1052,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, if (cval->min + cval->res < cval->max) { int last_valid_res = cval->res; int saved, test, check; - get_cur_mix_raw(cval, minchn, &saved); + if (get_cur_mix_raw(cval, minchn, &saved) < 0) + goto no_res_check; for (;;) { test = saved; if (test < cval->max) @@ -1072,6 +1073,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, snd_usb_set_cur_mix_value(cval, minchn, 0, saved); } +no_res_check: cval->initialized = 1; } From a7a3b07ecaad0acbf56d0da67ae2bd0394657c0f Mon Sep 17 00:00:00 2001 From: Henry Lin Date: Wed, 13 Nov 2019 10:14:19 +0800 Subject: [PATCH 0585/3715] ALSA: usb-audio: not submit urb for stopped endpoint commit 528699317dd6dc722dccc11b68800cf945109390 upstream. While output urb's snd_complete_urb() is executing, calling prepare_outbound_urb() may cause endpoint stopped before prepare_outbound_urb() returns and result in next urb submitted to stopped endpoint. usb-audio driver cannot re-use it afterwards as the urb is still hold by usb stack. This change checks EP_FLAG_RUNNING flag after prepare_outbound_urb() again to let snd_complete_urb() know the endpoint already stopped and does not submit next urb. Below kind of error will be fixed: [ 213.153103] usb 1-2: timeout: still 1 active urbs on EP #1 [ 213.164121] usb 1-2: cannot submit urb 0, error -16: unknown error Signed-off-by: Henry Lin Cc: Link: https://lore.kernel.org/r/20191113021420.13377-1-henryl@nvidia.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index c90607ebe155..8caf0b57f9c6 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -403,6 +403,9 @@ static void snd_complete_urb(struct urb *urb) } prepare_outbound_urb(ep, ctx); + /* can be stopped during prepare callback */ + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; } else { retire_inbound_urb(ep, ctx); /* can be stopped during retire callback */ From 5edab14154213a4b6bcc527f60adb6124034a0be Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 15 Nov 2019 11:35:05 -0800 Subject: [PATCH 0586/3715] Input: ff-memless - kill timer in destroy() commit fa3a5a1880c91bb92594ad42dfe9eedad7996b86 upstream. No timer must be left running when the device goes away. Signed-off-by: Oliver Neukum Reported-and-tested-by: syzbot+b6c55daa701fc389e286@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1573726121.17351.3.camel@suse.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/ff-memless.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c index fcc6c3368182..ea3f0f5eb534 100644 --- a/drivers/input/ff-memless.c +++ b/drivers/input/ff-memless.c @@ -501,6 +501,15 @@ static void ml_ff_destroy(struct ff_device *ff) { struct ml_device *ml = ff->private; + /* + * Even though we stop all playing effects when tearing down + * an input device (via input_device_flush() that calls into + * input_ff_flush() that stops and erases all effects), we + * do not actually stop the timer, and therefore we should + * do it here. + */ + del_timer_sync(&ml->timer); + kfree(ml->private); } From f74f050cfc30876d1a70021a959d561863b2248f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 4 Nov 2019 15:58:34 -0800 Subject: [PATCH 0587/3715] Input: synaptics-rmi4 - fix video buffer size commit 003f01c780020daa9a06dea1db495b553a868c29 upstream. The video buffer used by the queue is a vb2_v4l2_buffer, not a plain vb2_buffer. Using the wrong type causes the allocation of the buffer storage to be too small, causing a out of bounds write when __init_vb2_v4l2_buffer initializes the buffer. Signed-off-by: Lucas Stach Fixes: 3a762dbd5347 ("[media] Input: synaptics-rmi4 - add support for F54 diagnostics") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191104114454.10500-1-l.stach@pengutronix.de Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f54.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index f5206e2c767e..b430fc6b6fb0 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -362,7 +362,7 @@ static const struct vb2_ops rmi_f54_queue_ops = { static const struct vb2_queue rmi_f54_queue = { .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, .io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ, - .buf_struct_size = sizeof(struct vb2_buffer), + .buf_struct_size = sizeof(struct vb2_v4l2_buffer), .ops = &rmi_f54_queue_ops, .mem_ops = &vb2_vmalloc_memops, .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, From fb427af8abb63f09d50d390e18fca25711969e52 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 4 Nov 2019 16:06:44 -0800 Subject: [PATCH 0588/3715] Input: synaptics-rmi4 - disable the relative position IRQ in the F12 driver commit f6aabe1ff1d9d7bad0879253011216438bdb2530 upstream. This patch fixes an issue seen on HID touchpads which report finger positions using RMI4 Function 12. The issue manifests itself as spurious button presses as described in: https://www.spinics.net/lists/linux-input/msg58618.html Commit 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain") switched the RMI4 driver to using an irq_domain to handle RMI4 function interrupts. Functions with more then one interrupt now have each interrupt mapped to their own IRQ and IRQ handler. The result of this change is that the F12 IRQ handler was now getting called twice. Once for the absolute data interrupt and once for the relative data interrupt. For HID devices, calling rmi_f12_attention() a second time causes the attn_data data pointer and size to be set incorrectly. When the touchpad button is pressed, F30 will generate an interrupt and attempt to read the F30 data from the invalid attn_data data pointer and report incorrect button events. This patch disables the F12 relative interrupt which prevents rmi_f12_attention() from being called twice. Signed-off-by: Andrew Duggan Reported-by: Simon Wood Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191025002527.3189-2-aduggan@synaptics.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f12.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index 8b0db086d68a..9939aba786a8 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -58,6 +58,9 @@ struct f12_data { const struct rmi_register_desc_item *data15; u16 data15_offset; + + unsigned long *abs_mask; + unsigned long *rel_mask; }; static int rmi_f12_read_sensor_tuning(struct f12_data *f12) @@ -296,9 +299,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn) static int rmi_f12_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; + struct f12_data *f12 = dev_get_drvdata(&fn->dev); + struct rmi_2d_sensor *sensor; int ret; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + sensor = &f12->sensor; + + if (!sensor->report_abs) + drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask); + else + drv->set_irq_bits(fn->rmi_dev, f12->abs_mask); + + drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask); ret = rmi_f12_write_control_regs(fn); if (ret) @@ -320,9 +332,12 @@ static int rmi_f12_probe(struct rmi_function *fn) struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); u16 data_offset = 0; + int mask_size; rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__); + mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long); + ret = rmi_read(fn->rmi_dev, query_addr, &buf); if (ret < 0) { dev_err(&fn->dev, "Failed to read general info register: %d\n", @@ -337,10 +352,19 @@ static int rmi_f12_probe(struct rmi_function *fn) return -ENODEV; } - f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL); + f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2, + GFP_KERNEL); if (!f12) return -ENOMEM; + f12->abs_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data)); + f12->rel_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data) + mask_size); + + set_bit(fn->irq_pos, f12->abs_mask); + set_bit(fn->irq_pos + 1, f12->rel_mask); + f12->has_dribble = !!(buf & BIT(3)); if (fn->dev.of_node) { From 972748923d25ba923399ecfa711758fbb20af4fe Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 4 Nov 2019 16:07:30 -0800 Subject: [PATCH 0589/3715] Input: synaptics-rmi4 - do not consume more data than we have (F11, F12) commit 5d40d95e7e64756cc30606c2ba169271704d47cb upstream. Currently, rmi_f11_attention() and rmi_f12_attention() functions update the attn_data data pointer and size based on the size of the expected size of the attention data. However, if the actual valid data in the attn buffer is less then the expected value then the updated data pointer will point to memory beyond the end of the attn buffer. Using the calculated valid_bytes instead will prevent this from happening. Signed-off-by: Andrew Duggan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191025002527.3189-3-aduggan@synaptics.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f11.c | 4 ++-- drivers/input/rmi4/rmi_f12.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index bb63b8823d62..e8c3e5d1ea22 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1295,8 +1295,8 @@ static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits) valid_bytes = f11->sensor.attn_size; memcpy(f11->sensor.data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += f11->sensor.attn_size; - drvdata->attn_data.size -= f11->sensor.attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { error = rmi_read_block(rmi_dev, data_base_addr, f11->sensor.data_pkt, diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index 9939aba786a8..99dd2f512058 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -217,8 +217,8 @@ static int rmi_f12_attention(struct rmi_function *fn, valid_bytes = sensor->attn_size; memcpy(sensor->data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += sensor->attn_size; - drvdata->attn_data.size -= sensor->attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { retval = rmi_read_block(rmi_dev, f12->data_addr, sensor->data_pkt, sensor->pkt_size); From 17cb370744e64257dcc5f6d1ee7a279a39b0460e Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 12 Nov 2019 16:47:08 -0800 Subject: [PATCH 0590/3715] Input: synaptics-rmi4 - clear IRQ enables for F54 commit 549766ac2ac1f6c8bb85906bbcea759541bb19a2 upstream. The driver for F54 just polls the status and doesn't even have a IRQ handler registered. Make sure to disable all F54 IRQs, so we don't crash the kernel on a nonexistent handler. Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20191105114402.6009-1-l.stach@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f54.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index b430fc6b6fb0..4e4406eae42d 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -619,7 +619,7 @@ static int rmi_f54_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask); return 0; } From ae833ad83ff4c9b2713c5f7184595c1d0183ff23 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 15 Nov 2019 11:32:36 -0800 Subject: [PATCH 0591/3715] Input: synaptics-rmi4 - destroy F54 poller workqueue when removing commit ba60cf9f78f0d7c8e73c7390608f7f818ee68aa0 upstream. The driver forgets to destroy workqueue in remove() similarly to what is done when probe() fails. Add a call to destroy_workqueue() to fix it. Since unregistration will wait for the work to finish, we do not need to cancel/flush the work instance in remove(). Signed-off-by: Chuhong Yuan Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191114023405.31477-1-hslester96@gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f54.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 4e4406eae42d..7f1959517ec0 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -747,6 +747,7 @@ static void rmi_f54_remove(struct rmi_function *fn) video_unregister_device(&f54->vdev); v4l2_device_unregister(&f54->v4l2); + destroy_workqueue(f54->workqueue); } struct rmi_function_handler rmi_f54_handler = { From c46089e301a5d78c4d8fd844eae3c81bb626a60d Mon Sep 17 00:00:00 2001 From: James Erwin Date: Fri, 1 Nov 2019 15:20:59 -0400 Subject: [PATCH 0592/3715] IB/hfi1: Ensure full Gen3 speed in a Gen4 system commit a9c3c4c597704b3a1a2b9bef990e7d8a881f6533 upstream. If an hfi1 card is inserted in a Gen4 systems, the driver will avoid the gen3 speed bump and the card will operate at half speed. This is because the driver avoids the gen3 speed bump when the parent bus speed isn't identical to gen3, 8.0GT/s. This is not compatible with gen4 and newer speeds. Fix by relaxing the test to explicitly look for the lower capability speeds which inherently allows for gen4 and all future speeds. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20191101192059.106248.1699.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Dennis Dalessandro Reviewed-by: Kaike Wan Signed-off-by: James Erwin Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/pcie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 51a5416b1da4..fd9ae23c480e 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -327,7 +327,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } From 958ec1be8f0dc3dadc6ab37160dd644a0a3ce19a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 13 Nov 2019 19:29:38 +0100 Subject: [PATCH 0593/3715] i2c: acpi: Force bus speed to 400KHz if a Silead touchscreen is present commit 7574c0db2e68c4d0bae9d415a683bdd8b2a761e9 upstream. Many cheap devices use Silead touchscreen controllers. Testing has shown repeatedly that these touchscreen controllers work fine at 400KHz, but for unknown reasons do not work properly at 100KHz. This has been seen on both ARM and x86 devices using totally different i2c controllers. On some devices the ACPI tables list another device at the same I2C-bus as only being capable of 100KHz, testing has shown that these other devices work fine at 400KHz (as can be expected of any recent I2C hw). This commit makes i2c_acpi_find_bus_speed() always return 400KHz if a Silead touchscreen controller is present, fixing the touchscreen not working on devices which ACPI tables' wrongly list another device on the same bus as only being capable of 100KHz. Specifically this fixes the touchscreen on the Jumper EZpad 6 m4 not working. Reported-by: youling 257 Tested-by: youling 257 Signed-off-by: Hans de Goede Reviewed-by: Jarkko Nikula Acked-by: Mika Westerberg [wsa: rewording warning a little] Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-acpi.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 847d9bf6744c..df9800aaeac7 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -43,6 +43,7 @@ struct i2c_acpi_lookup { int index; u32 speed; u32 min_speed; + u32 force_speed; }; static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data) @@ -240,6 +241,19 @@ i2c_acpi_match_device(const struct acpi_device_id *matches, return acpi_match_device(matches, &client->dev); } +static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = { + /* + * These Silead touchscreen controllers only work at 400KHz, for + * some reason they do not work at 100KHz. On some devices the ACPI + * tables list another device at their bus as only being capable + * of 100KHz, testing has shown that these other devices work fine + * at 400KHz (as can be expected of any recent i2c hw) so we force + * the speed of the bus to 400 KHz if a Silead device is present. + */ + { "MSSL1680", 0 }, + {} +}; + static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, void *data, void **return_value) { @@ -258,6 +272,9 @@ static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, if (lookup->speed <= lookup->min_speed) lookup->min_speed = lookup->speed; + if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0) + lookup->force_speed = 400000; + return AE_OK; } @@ -295,7 +312,16 @@ u32 i2c_acpi_find_bus_speed(struct device *dev) return 0; } - return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0; + if (lookup.force_speed) { + if (lookup.force_speed != lookup.min_speed) + dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n", + lookup.min_speed, lookup.force_speed); + return lookup.force_speed; + } else if (lookup.min_speed != UINT_MAX) { + return lookup.min_speed; + } else { + return 0; + } } EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed); From dc39d4f7ecbd4176b4e358c8caf9b8f18c8f8e24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:45:04 -0500 Subject: [PATCH 0594/3715] ecryptfs_lookup_interpose(): lower_dentry->d_inode is not stable commit e72b9dd6a5f17d0fb51f16f8685f3004361e83d0 upstream. lower_dentry can't go from positive to negative (we have it pinned), but it *can* go from negative to positive. So fetching ->d_inode into a local variable, doing a blocking allocation, checking that now ->d_inode is non-NULL and feeding the value we'd fetched earlier to a function that won't accept NULL is not a good idea. Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/inode.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bda65a730790..d40229ccb6a9 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -326,7 +326,7 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { - struct inode *inode, *lower_inode = d_inode(lower_dentry); + struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; struct vfsmount *lower_mnt; int rc = 0; @@ -349,7 +349,15 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, dentry_info->lower_path.mnt = lower_mnt; dentry_info->lower_path.dentry = lower_dentry; - if (d_really_is_negative(lower_dentry)) { + /* + * negative dentry can go positive under us here - its parent is not + * locked. That's OK and that could happen just as we return from + * ecryptfs_lookup() anyway. Just need to be careful and fetch + * ->d_inode only once - it's not stable here. + */ + lower_inode = READ_ONCE(lower_dentry->d_inode); + + if (!lower_inode) { /* We want to add because we couldn't find in lower */ d_add(dentry, NULL); return NULL; From d47135249f4d1e8d29e514986a63bf129c64276c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:55:43 -0500 Subject: [PATCH 0595/3715] ecryptfs_lookup_interpose(): lower_dentry->d_parent is not stable either commit 762c69685ff7ad5ad7fee0656671e20a0c9c864d upstream. We need to get the underlying dentry of parent; sure, absent the races it is the parent of underlying dentry, but there's nothing to prevent losing a timeslice to preemtion in the middle of evaluation of lower_dentry->d_parent->d_inode, having another process move lower_dentry around and have its (ex)parent not pinned anymore and freed on memory pressure. Then we regain CPU and try to fetch ->d_inode from memory that is freed by that point. dentry->d_parent *is* stable here - it's an argument of ->lookup() and we are guaranteed that it won't be moved anywhere until we feed it to d_add/d_splice_alias. So we safely go that way to get to its underlying dentry. Cc: stable@vger.kernel.org # since 2009 or so Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d40229ccb6a9..62d1dea85ef1 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -326,9 +326,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { + struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; - struct vfsmount *lower_mnt; int rc = 0; dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); @@ -340,13 +340,12 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, return ERR_PTR(-ENOMEM); } - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); fsstack_copy_attr_atime(d_inode(dentry->d_parent), - d_inode(lower_dentry->d_parent)); + d_inode(path->dentry)); BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.mnt = mntget(path->mnt); dentry_info->lower_path.dentry = lower_dentry; /* From 997b62e3f8921b6779d2458f5c69e731b928e5c3 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 8 Nov 2019 16:58:03 +0100 Subject: [PATCH 0596/3715] iommu/vt-d: Fix QI_DEV_IOTLB_PFSID and QI_DEV_EIOTLB_PFSID macros commit 4e7120d79edb31e4ee68e6f8421448e4603be1e9 upstream. For both PASID-based-Device-TLB Invalidate Descriptor and Device-TLB Invalidate Descriptor, the Physical Function Source-ID value is split according to this layout: PFSID[3:0] is set at offset 12 and PFSID[15:4] is put at offset 52. Fix the part laid out at offset 52. Fixes: 0f725561e1684 ("iommu/vt-d: Add definitions for PFSID") Signed-off-by: Eric Auger Acked-by: Jacob Pan Cc: stable@vger.kernel.org # v4.19+ Acked-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-iommu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a6ab2f51f703..4def15c32a01 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -282,7 +282,8 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -307,7 +308,8 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) -#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) From a60145d686467bb82fa5f76d8a9c769686ca0482 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 15 Nov 2019 17:34:43 -0800 Subject: [PATCH 0597/3715] mm: memcg: switch to css_tryget() in get_mem_cgroup_from_mm() commit 00d484f354d85845991b40141d40ba9e5eb60faf upstream. We've encountered a rcu stall in get_mem_cgroup_from_mm(): rcu: INFO: rcu_sched self-detected stall on CPU rcu: 33-....: (21000 ticks this GP) idle=6c6/1/0x4000000000000002 softirq=35441/35441 fqs=5017 (t=21031 jiffies g=324821 q=95837) NMI backtrace for cpu 33 <...> RIP: 0010:get_mem_cgroup_from_mm+0x2f/0x90 <...> __memcg_kmem_charge+0x55/0x140 __alloc_pages_nodemask+0x267/0x320 pipe_write+0x1ad/0x400 new_sync_write+0x127/0x1c0 __kernel_write+0x4f/0xf0 dump_emit+0x91/0xc0 writenote+0xa0/0xc0 elf_core_dump+0x11af/0x1430 do_coredump+0xc65/0xee0 get_signal+0x132/0x7c0 do_signal+0x36/0x640 exit_to_usermode_loop+0x61/0xd0 do_syscall_64+0xd4/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The problem is caused by an exiting task which is associated with an offline memcg. We're iterating over and over in the do {} while (!css_tryget_online()) loop, but obviously the memcg won't become online and the exiting task won't be migrated to a live memcg. Let's fix it by switching from css_tryget_online() to css_tryget(). As css_tryget_online() cannot guarantee that the memcg won't go offline, the check is usually useless, except some rare cases when for example it determines if something should be presented to a user. A similar problem is described by commit 18fa84a2db0e ("cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css()"). Johannes: : The bug aside, it doesn't matter whether the cgroup is online for the : callers. It used to matter when offlining needed to evacuate all charges : from the memcg, and so needed to prevent new ones from showing up, but we : don't care now. Link: http://lkml.kernel.org/r/20191106225131.3543616-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Michal Koutn Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2dd99c7884cd..326525a97c47 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -725,7 +725,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) if (unlikely(!memcg)) memcg = root_mem_cgroup; } - } while (!css_tryget_online(&memcg->css)); + } while (!css_tryget(&memcg->css)); rcu_read_unlock(); return memcg; } From 8a175e137b84d389d443c37b6133ae893ed97d85 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 15 Nov 2019 17:34:46 -0800 Subject: [PATCH 0598/3715] mm: hugetlb: switch to css_tryget() in hugetlb_cgroup_charge_cgroup() commit 0362f326d86c645b5e96b7dbc3ee515986ed019d upstream. An exiting task might belong to an offline cgroup. In this case an attempt to grab a cgroup reference from the task can end up with an infinite loop in hugetlb_cgroup_charge_cgroup(), because neither the cgroup will become online, neither the task will be migrated to a live cgroup. Fix this by switching over to css_tryget(). As css_tryget_online() can't guarantee that the cgroup won't go offline, in most cases the check doesn't make sense. In this particular case users of hugetlb_cgroup_charge_cgroup() are not affected by this change. A similar problem is described by commit 18fa84a2db0e ("cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css()"). Link: http://lkml.kernel.org/r/20191106225131.3543616-2-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index eec1150125b9..e430e04997ee 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); - if (!css_tryget_online(&h_cg->css)) { + if (!css_tryget(&h_cg->css)) { rcu_read_unlock(); goto again; } From 181246754d93f97ca4c36d057a0e9fb7f12346c9 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 14 Nov 2019 12:59:26 +0000 Subject: [PATCH 0599/3715] mmc: sdhci-of-at91: fix quirk2 overwrite commit fed23c5829ecab4ddc712d7b0046e59610ca3ba4 upstream. The quirks2 are parsed and set (e.g. from DT) before the quirk for broken HS200 is set in the driver. The driver needs to enable just this flag, not rewrite the whole quirk set. Fixes: 7871aa60ae00 ("mmc: sdhci-of-at91: add quirk for broken HS200") Signed-off-by: Eugen Hristev Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-at91.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 74af8cc4ef36..564e7be21e06 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -365,7 +365,7 @@ static int sdhci_at91_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); /* HS200 is broken at this moment */ - host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200; + host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200; ret = sdhci_add_host(host); if (ret) From bf95ccce798daaf0a0dbea2927d51894a45b9d7d Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 11 Aug 2018 11:12:19 +0200 Subject: [PATCH 0600/3715] iio: adc: max9611: explicitly cast gain_selectors [ Upstream commit b1ec0802503820ccbc894aadfd2a44da20232f5e ] After finding a reasonable gain, the function converts the configured gain to a gain configuration option selector enum max9611_csa_gain. Make the conversion clearly visible by using an explicit cast. This also avoids a warning seen with clang: drivers/iio/adc/max9611.c:292:16: warning: implicit conversion from enumeration type 'enum max9611_conf_ids' to different enumeration type 'enum max9611_csa_gain' [-Wenum-conversion] *csa_gain = gain_selectors[i]; ~ ^~~~~~~~~~~~~~~~~ Signed-off-by: Stefan Agner Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/max9611.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index c61fbf560271..33be07c78b96 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -289,7 +289,7 @@ static int max9611_read_csa_voltage(struct max9611_dev *max9611, return ret; if (*adc_raw > 0) { - *csa_gain = gain_selectors[i]; + *csa_gain = (enum max9611_csa_gain)gain_selectors[i]; return 0; } } From a045fe21f0f4bc0908f6dc21d61eeb660f7bbd26 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Aug 2018 11:48:33 +0200 Subject: [PATCH 0601/3715] tee: optee: take DT status property into account [ Upstream commit db878f76b9ff7487da9bb0f686153f81829f1230 ] DT nodes may have a 'status' property which, if set to anything other than 'ok' or 'okay', indicates to the OS that the DT node should be treated as if it was not present. So add that missing logic to the OP-TEE driver. Signed-off-by: Ard Biesheuvel Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- drivers/tee/optee/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index edb6e4e9ef3a..ca79c2ba2ef2 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -590,7 +590,7 @@ static int __init optee_driver_init(void) return -ENODEV; np = of_find_matching_node(fw_np, optee_match); - if (!np) + if (!np || !of_device_is_available(np)) return -ENODEV; optee = optee_probe(np); From 502e825baa9c738e5bbd3df6f98968bf4d475f67 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 6 Aug 2018 12:39:01 +0300 Subject: [PATCH 0602/3715] ath10k: fix kernel panic by moving pci flush after napi_disable [ Upstream commit bd1d395070cca4f42a93e520b0597274789274a4 ] When continuously running wifi up/down sequence, the napi poll can be scheduled after the CE buffers being freed by ath10k_pci_flush Steps: In a certain condition, during wifi down below scenario might occur. ath10k_stop->ath10k_hif_stop->napi_schedule->ath10k_pci_flush->napi_poll(napi_synchronize). In the above scenario, CE buffer entries will be freed up and become NULL in ath10k_pci_flush. And the napi_poll has been invoked after the flush process and it will try to get the skb from the CE buffer entry and perform some action on that. Since the CE buffer already cleaned by pci flush this action will create NULL pointer dereference and trigger below kernel panic. Unable to handle kernel NULL pointer dereference at virtual address 0000005c PC is at ath10k_pci_htt_rx_cb+0x64/0x3ec [ath10k_pci] ath10k_pci_htt_rx_cb [ath10k_pci] ath10k_ce_per_engine_service+0x74/0xc4 [ath10k_pci] ath10k_ce_per_engine_service [ath10k_pci] ath10k_ce_per_engine_service_any+0x74/0x80 [ath10k_pci] ath10k_ce_per_engine_service_any [ath10k_pci] ath10k_pci_napi_poll+0x48/0xec [ath10k_pci] ath10k_pci_napi_poll [ath10k_pci] net_rx_action+0xac/0x160 net_rx_action __do_softirq+0xdc/0x208 __do_softirq irq_exit+0x84/0xe0 irq_exit __handle_domain_irq+0x80/0xa0 __handle_domain_irq gic_handle_irq+0x38/0x5c gic_handle_irq __irq_usr+0x44/0x60 Tested on QCA4019 and firmware version 10.4.3.2.1.1-00010 Signed-off-by: Tamizh chelvam Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/ahb.c | 4 ++-- drivers/net/wireless/ath/ath10k/pci.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index ff6815e95684..1404ec9f56be 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -663,10 +663,10 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) ath10k_ahb_irq_disable(ar); synchronize_irq(ar_ahb->irq); - ath10k_pci_flush(ar); - napi_synchronize(&ar->napi); napi_disable(&ar->napi); + + ath10k_pci_flush(ar); } static int ath10k_ahb_hif_power_up(struct ath10k *ar) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index d790ea20b95d..27ab3eb47534 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1787,9 +1787,9 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_pci_irq_disable(ar); ath10k_pci_irq_sync(ar); - ath10k_pci_flush(ar); napi_synchronize(&ar->napi); napi_disable(&ar->napi); + ath10k_pci_flush(ar); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); From bd0e3c12a07e59e5f5c54130df0e77bc1a468bbb Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 24 Aug 2018 22:24:40 +0200 Subject: [PATCH 0603/3715] iio: dac: mcp4922: fix error handling in mcp4922_write_raw [ Upstream commit 0833627fc3f757a0dca11e2a9c46c96335a900ee ] Do not try to write negative values and make sure that the write goes well. Signed-off-by: Marcus Folkesson Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/dac/mcp4922.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c index 3854d201a5d6..68dd0be1ac07 100644 --- a/drivers/iio/dac/mcp4922.c +++ b/drivers/iio/dac/mcp4922.c @@ -94,17 +94,22 @@ static int mcp4922_write_raw(struct iio_dev *indio_dev, long mask) { struct mcp4922_state *state = iio_priv(indio_dev); + int ret; if (val2 != 0) return -EINVAL; switch (mask) { case IIO_CHAN_INFO_RAW: - if (val > GENMASK(chan->scan_type.realbits-1, 0)) + if (val < 0 || val > GENMASK(chan->scan_type.realbits - 1, 0)) return -EINVAL; val <<= chan->scan_type.shift; - state->value[chan->channel] = val; - return mcp4922_spi_write(state, chan->channel, val); + + ret = mcp4922_spi_write(state, chan->channel, val); + if (!ret) + state->value[chan->channel] = val; + return ret; + default: return -EINVAL; } From c82749aed1ca79766eb692e234ac061df8221806 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 30 Jul 2018 13:31:31 +0100 Subject: [PATCH 0604/3715] arm64: dts: allwinner: a64: Olinuxino: fix DRAM voltage [ Upstream commit 93366b49a35f3a190052734b3f32c8fe2535b53f ] The Olinuxino board uses DDR3L chips which are supposed to be driven with 1.35V. The reset default of the AXP is properly set to 1.36V. While technically the chips can also run at 1.5 volts, changing the voltage on the fly while booting Linux is asking for trouble. Also running at a lower voltage saves power. So fix the DCDC5 value to match the actual board design. Signed-off-by: Andre Przywara Tested-by: Martin Lucina Acked-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 338e786155b1..2ef779b02757 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -120,10 +120,14 @@ /* DCDC3 is polyphased with DCDC2 */ +/* + * The board uses DDR3L DRAM chips. 1.36V is the closest to the nominal + * 1.35V that the PMIC can drive. + */ ®_dcdc5 { regulator-always-on; - regulator-min-microvolt = <1500000>; - regulator-max-microvolt = <1500000>; + regulator-min-microvolt = <1360000>; + regulator-max-microvolt = <1360000>; regulator-name = "vcc-ddr3"; }; From 284183a0f4b84da89e9a514146792d7f006daf9c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 30 Jul 2018 13:31:34 +0100 Subject: [PATCH 0605/3715] arm64: dts: allwinner: a64: NanoPi-A64: Fix DCDC1 voltage [ Upstream commit 480f58cdbe392d4387a2193b6131a277e0111dd0 ] According to the NanoPi-A64 schematics, DCDC1 is connected to a voltage rail named "VDD_SYS_3.3V". All users seem to expect 3.3V here: the Ethernet PHY, the uSD card slot, the camera interface and the GPIO pins on the headers. Fix up the voltage on the regulator to lift it up to 3.3V. Signed-off-by: Andre Przywara Acked-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts index 2beef9e6cb88..aa0b3844ad63 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts @@ -126,9 +126,9 @@ ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { From c9f373293e684acfcad234744beac67625b99b1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Aug 2018 12:21:45 +0300 Subject: [PATCH 0606/3715] ALSA: pcm: signedness bug in snd_pcm_plug_alloc() [ Upstream commit 6f128fa41f310e1f39ebcea9621d2905549ecf52 ] The "frames" variable is unsigned so the error handling doesn't work properly. Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/oss/pcm_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 617845d4a811..b8ab46b8298d 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON(frames <= 0)) + if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON(frames <= 0)) + if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); From c7e0e7e51e8c8a6dcb26cb636d160a99c0f0646a Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Fri, 10 Aug 2018 21:08:35 +0300 Subject: [PATCH 0607/3715] arm64: dts: tegra210-p2180: Correct sdmmc4 vqmmc-supply [ Upstream commit 6ff7705da8806de45ca1490194f0b4eb07725804 ] On p2180 sdmmc4 is powered from a fixed 1.8 V regulator. Signed-off-by: Aapo Vienamo Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index f6e6f1e83ba8..be91873c0878 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -282,6 +282,7 @@ status = "okay"; bus-width = <8>; non-removable; + vqmmc-supply = <&vdd_1v8>; }; clocks { From 593177c468f27a7e56d90b37beebf3827825ef84 Mon Sep 17 00:00:00 2001 From: Jay Foster Date: Mon, 20 Aug 2018 11:42:01 +0200 Subject: [PATCH 0608/3715] ARM: dts: at91/trivial: Fix USART1 definition for at91sam9g45 [ Upstream commit 10af10db8c76fa5b9bf1f52a895c1cb2c0ac24da ] Fix a typo. No functional change made by this patch. Signed-off-by: Jay Foster Signed-off-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91sam9g45.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 64fa3f9a39d3..db0921e7a613 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -566,7 +566,7 @@ }; }; - uart1 { + usart1 { pinctrl_usart1: usart1-0 { atmel,pins = Date: Mon, 27 Aug 2018 23:23:43 +0200 Subject: [PATCH 0609/3715] rtc: rv8803: fix the rv8803 id in the OF table [ Upstream commit c856618d20662695fcdb47bf4d560dc457662aec ] The ID for RV8803 must be rv_8803 Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-rv8803.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index aae2576741a6..6e06fb3b0b92 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -622,7 +622,7 @@ MODULE_DEVICE_TABLE(i2c, rv8803_id); static const struct of_device_id rv8803_of_match[] = { { .compatible = "microcrystal,rv8803", - .data = (void *)rx_8900 + .data = (void *)rv_8803 }, { .compatible = "epson,rx8900", From a724f780a62b67e188ba97aa034dea5524ea67eb Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 16 Aug 2018 17:49:19 -0700 Subject: [PATCH 0610/3715] remoteproc/davinci: Use %zx for formating size_t [ Upstream commit 1e28dbbeced6152b9ea7c417ff8cef3f7dcf0f19 ] da8xx_rproc_mem size is of type size_t, so use %zx to format the debug print of it to avoid a compile warning. Acked-by: Suman Anna Reviewed-by: Bartosz Golaszewski Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/remoteproc/da8xx_remoteproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/da8xx_remoteproc.c b/drivers/remoteproc/da8xx_remoteproc.c index bf3b9034c319..a127d2ccd7ca 100644 --- a/drivers/remoteproc/da8xx_remoteproc.c +++ b/drivers/remoteproc/da8xx_remoteproc.c @@ -207,7 +207,7 @@ static int da8xx_rproc_get_internal_memories(struct platform_device *pdev, res->start & DA8XX_RPROC_LOCAL_ADDRESS_MASK; drproc->mem[i].size = resource_size(res); - dev_dbg(dev, "memory %8s: bus addr %pa size 0x%x va %p da 0x%x\n", + dev_dbg(dev, "memory %8s: bus addr %pa size 0x%zx va %p da 0x%x\n", mem_names[i], &drproc->mem[i].bus_addr, drproc->mem[i].size, drproc->mem[i].cpu_addr, drproc->mem[i].dev_addr); From 312bf40b1cb7c5d25c16b10fa3db9eae9580dcb2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Aug 2018 18:35:53 +0300 Subject: [PATCH 0611/3715] extcon: cht-wc: Return from default case to avoid warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 962341b54b99965ebec5f70c8d39f1c382eea833 ] When we have first case to fall through it's not enough to put single comment there to satisfy compiler. Instead of doing that, return fall back value directly from default case. This to avoid following warnings: drivers/extcon/extcon-intel-cht-wc.c: In function ‘cht_wc_extcon_get_charger’: include/linux/device.h:1420:2: warning: this statement may fall through [-Wimplicit-fallthrough=] _dev_warn(dev, dev_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/extcon/extcon-intel-cht-wc.c:148:3: note: in expansion of macro ‘dev_warn’ dev_warn(ext->dev, ^~~~~~~~ drivers/extcon/extcon-intel-cht-wc.c:152:2: note: here case CHT_WC_USBSRC_TYPE_SDP: ^~~~ Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/extcon/extcon-intel-cht-wc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c index 60baaf693103..c562d9d69ae3 100644 --- a/drivers/extcon/extcon-intel-cht-wc.c +++ b/drivers/extcon/extcon-intel-cht-wc.c @@ -155,7 +155,7 @@ static int cht_wc_extcon_get_charger(struct cht_wc_extcon_data *ext, dev_warn(ext->dev, "Unhandled charger type %d, defaulting to SDP\n", ret); - /* Fall through, treat as SDP */ + return EXTCON_CHG_USB_SDP; case CHT_WC_USBSRC_TYPE_SDP: case CHT_WC_USBSRC_TYPE_FLOAT_DP_DN: case CHT_WC_USBSRC_TYPE_OTHER: From 3ba8f7915e0a3787ef577a635f1f299a18de27d7 Mon Sep 17 00:00:00 2001 From: Rajeev Kumar Sirasanagandla Date: Tue, 10 Jul 2018 18:46:13 +0530 Subject: [PATCH 0612/3715] cfg80211: Avoid regulatory restore when COUNTRY_IE_IGNORE is set [ Upstream commit 7417844b63d4b0dc8ab23f88259bf95de7d09b57 ] When REGULATORY_COUNTRY_IE_IGNORE is set, __reg_process_hint_country_ie() ignores the country code change request from __cfg80211_connect_result() via regulatory_hint_country_ie(). After Disconnect, similar to above, country code should not be reset to world when country IE ignore is set. But this is violated and restore of regulatory settings is invoked by cfg80211_disconnect_work via regulatory_hint_disconnect(). To address this, avoid regulatory restore from regulatory_hint_disconnect() when COUNTRY_IE_IGNORE is set. Note: Currently, restore_regulatory_settings() takes care of clearing beacon hints. But in the proposed change, regulatory restore is avoided. Therefore, explicitly clear beacon hints when DISABLE_BEACON_HINTS is not set. Signed-off-by: Rajeev Kumar Sirasanagandla Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b940d5c2003b..804eac073b6b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2703,8 +2703,54 @@ static void restore_regulatory_settings(bool reset_user) schedule_work(®_work); } +static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + wdev_lock(wdev); + if (!(wdev->wiphy->regulatory_flags & flag)) { + wdev_unlock(wdev); + return false; + } + wdev_unlock(wdev); + } + } + + return true; +} + void regulatory_hint_disconnect(void) { + /* Restore of regulatory settings is not required when wiphy(s) + * ignore IE from connected access point but clearance of beacon hints + * is required when wiphy(s) supports beacon hints. + */ + if (is_wiphy_all_set_reg_flag(REGULATORY_COUNTRY_IE_IGNORE)) { + struct reg_beacon *reg_beacon, *btmp; + + if (is_wiphy_all_set_reg_flag(REGULATORY_DISABLE_BEACON_HINTS)) + return; + + spin_lock_bh(®_pending_beacons_lock); + list_for_each_entry_safe(reg_beacon, btmp, + ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + spin_unlock_bh(®_pending_beacons_lock); + + list_for_each_entry_safe(reg_beacon, btmp, + ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + + return; + } + pr_debug("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } From b335136bb7305c81deea6c6cdb47e678d380502b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Aug 2018 12:49:43 +0200 Subject: [PATCH 0613/3715] ALSA: seq: Do error checks at creating system ports [ Upstream commit b8e131542b47b81236ecf6768c923128e1f5db6e ] snd_seq_system_client_init() doesn't check the errors returned from its port creations. Let's do it properly and handle the error paths. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_system.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_system.c b/sound/core/seq/seq_system.c index 8ce1d0b40dce..ce1f1e4727ab 100644 --- a/sound/core/seq/seq_system.c +++ b/sound/core/seq/seq_system.c @@ -123,6 +123,7 @@ int __init snd_seq_system_client_init(void) { struct snd_seq_port_callback pcallbacks; struct snd_seq_port_info *port; + int err; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -144,7 +145,10 @@ int __init snd_seq_system_client_init(void) port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; - snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); + err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, + port); + if (err < 0) + goto error_port; /* register announcement port */ strcpy(port->name, "Announce"); @@ -154,16 +158,24 @@ int __init snd_seq_system_client_init(void) port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE; - snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); + err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, + port); + if (err < 0) + goto error_port; announce_port = port->addr.port; kfree(port); return 0; + + error_port: + snd_seq_system_client_done(); + kfree(port); + return err; } /* unregister our internal client */ -void __exit snd_seq_system_client_done(void) +void snd_seq_system_client_done(void) { int oldsysclient = sysclient; From 77a718e992d2ddb7a7dfe3011306b672d342e14d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 20 Aug 2018 11:37:51 +0200 Subject: [PATCH 0614/3715] ath9k: fix tx99 with monitor mode interface [ Upstream commit d9c52fd17cb483bd8a470398afcb79f86c1b77c8 ] Tx99 is typically configured via a monitor mode interface, which does not get added to the driver as a vif. Since the code currently expects a configured virtual interface for tx99, enabling tx99 via debugfs fails. Since the vif is not needed anyway, remove all checks for it. Signed-off-by: Felix Fietkau [kvalo@codeaurora.org: s/CPTCFG/CONFIG/] Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/ath9k.h | 1 - drivers/net/wireless/ath/ath9k/main.c | 12 +++--------- drivers/net/wireless/ath/ath9k/tx99.c | 9 --------- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- 4 files changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index f9339b5c3624..db2b119199d7 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -1074,7 +1074,6 @@ struct ath_softc { struct ath_spec_scan_priv spec_priv; - struct ieee80211_vif *tx99_vif; struct sk_buff *tx99_skb; bool tx99_state; s16 tx99_power; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 8b4ac7f0a09b..055f86951680 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1250,15 +1250,10 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, struct ath_vif *avp = (void *)vif->drv_priv; struct ath_node *an = &avp->mcast_node; - mutex_lock(&sc->mutex); + if (IS_ENABLED(CONFIG_ATH9K_TX99)) + return -EOPNOTSUPP; - if (IS_ENABLED(CONFIG_ATH9K_TX99)) { - if (sc->cur_chan->nvifs >= 1) { - mutex_unlock(&sc->mutex); - return -EOPNOTSUPP; - } - sc->tx99_vif = vif; - } + mutex_lock(&sc->mutex); ath_dbg(common, CONFIG, "Attach a VIF of type: %d\n", vif->type); sc->cur_chan->nvifs++; @@ -1341,7 +1336,6 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, ath9k_p2p_remove_vif(sc, vif); sc->cur_chan->nvifs--; - sc->tx99_vif = NULL; if (!ath9k_is_chanctx_enabled()) list_del(&avp->list); diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index fe3a8263b224..311547f532bc 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -54,12 +54,6 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; struct sk_buff *skb; - struct ath_vif *avp; - - if (!sc->tx99_vif) - return NULL; - - avp = (struct ath_vif *)sc->tx99_vif->drv_priv; skb = alloc_skb(len, GFP_KERNEL); if (!skb) @@ -77,14 +71,11 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) memcpy(hdr->addr2, hw->wiphy->perm_addr, ETH_ALEN); memcpy(hdr->addr3, hw->wiphy->perm_addr, ETH_ALEN); - hdr->seq_ctrl |= cpu_to_le16(avp->seq_no); - tx_info = IEEE80211_SKB_CB(skb); memset(tx_info, 0, sizeof(*tx_info)); rate = &tx_info->control.rates[0]; tx_info->band = sc->cur_chan->chandef.chan->band; tx_info->flags = IEEE80211_TX_CTL_NO_ACK; - tx_info->control.vif = sc->tx99_vif; rate->count = 1; if (ah->curchan && IS_CHAN_HT(ah->curchan)) { rate->flags |= IEEE80211_TX_RC_MCS; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 458c4f53ba5d..a743e3535d0a 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2952,7 +2952,7 @@ int ath9k_tx99_send(struct ath_softc *sc, struct sk_buff *skb, return -EINVAL; } - ath_set_rates(sc->tx99_vif, NULL, bf); + ath_set_rates(NULL, NULL, bf); ath9k_hw_set_desc_link(sc->sc_ah, bf->bf_desc, bf->bf_daddr); ath9k_hw_tx99_start(sc->sc_ah, txctl->txq->axq_qnum); From e98d5c0716ada43d70727d89bfedbf203cabac44 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 24 Aug 2018 15:04:59 +0300 Subject: [PATCH 0615/3715] ath10k: limit available channels via DT ieee80211-freq-limit [ Upstream commit 34d5629d2ca89d847b7040762b87964c696c14da ] Tri-band devices (1x 2.4GHz + 2x 5GHz) often incorporate special filters in the RX and TX path. These filtered channel can in theory still be used by the hardware but the signal strength is reduced so much that it makes no sense. There is already a DT property to limit the available channels but ath10k has to manually call this functionality to limit the currrently set wiphy channels further. Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 58a3c42c4aed..8c4bb56c262f 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -17,6 +17,7 @@ #include "mac.h" +#include #include #include #include @@ -8174,6 +8175,7 @@ int ath10k_mac_register(struct ath10k *ar) ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = band; } + wiphy_read_of_freq_limits(ar->hw->wiphy); ath10k_mac_setup_ht_vht_cap(ar); ar->hw->wiphy->interface_modes = From caf450580cd30f35010d86f1fbde95c84578eee3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 16 Aug 2018 10:32:13 -0500 Subject: [PATCH 0616/3715] gfs2: Don't set GFS2_RDF_UPTODATE when the lvb is updated [ Upstream commit 4f36cb36c9d14340bb200d2ad9117b03ce992cfe ] The GFS2_RDF_UPTODATE flag in the rgrp is used to determine when a rgrp buffer is valid. It's cleared when the glock is invalidated, signifying that the buffer data is now invalid. But before this patch, function update_rgrp_lvb was setting the flag when it determined it had a valid lvb. But that's an invalid assumption: just because you have a valid lvb doesn't mean you have valid buffers. After all, another node may have made the lvb valid, and this node just fetched it from the glock via dlm. Consider this scenario: 1. The file system is mounted with RGRPLVB option. 2. In gfs2_inplace_reserve it locks the rgrp glock EX, but thanks to GL_SKIP, it skips the gfs2_rgrp_bh_get. 3. Since loops == 0 and the allocation target (ap->target) is bigger than the largest known chunk of blocks in the rgrp (rs->rs_rbm.rgd->rd_extfail_pt) it skips that rgrp and bypasses the call to gfs2_rgrp_bh_get there as well. 4. update_rgrp_lvb sees the lvb MAGIC number is valid, so bypasses gfs2_rgrp_bh_get, but it still sets sets GFS2_RDF_UPTODATE due to this invalid assumption. 5. The next time update_rgrp_lvb is called, it sees the bit is set and just returns 0, assuming both the lvb and rgrp are both uptodate. But since this is a smaller allocation, or space has been freed by another node, thus adjusting the lvb values, it decides to use the rgrp for allocations, with invalid rd_free due to the fact it was never updated. This patch changes update_rgrp_lvb so it doesn't set the UPTODATE flag anymore. That way, it has no choice but to fetch the latest values. Signed-off-by: Bob Peterson Signed-off-by: Sasha Levin --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b0eee90738ff..0d72baae5150 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1201,7 +1201,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; rgd->rd_flags &= GFS2_RDF_MASK; - rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK); if (rgd->rd_rgl->rl_unlinked == 0) rgd->rd_flags &= ~GFS2_RDF_CHECK; rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); From 8cb302840cd44cb344731c8a505f1244fe6253c9 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 27 Aug 2018 14:26:47 +0100 Subject: [PATCH 0617/3715] ASoC: dpcm: Properly initialise hw->rate_max [ Upstream commit e33ffbd9cd39da09831ce62c11025d830bf78d9e ] If the CPU DAI does not initialise rate_max, say if using using KNOT or CONTINUOUS, then the rate_max field will be initialised to 0. A value of zero in the rate_max field of the hardware runtime will cause the sound card to support no sample rates at all. Obviously this is not desired, just a different mechanism is being used to apply the constraints. As such update the setting of rate_max in dpcm_init_runtime_hw to be consistent with the non-DPCM cases and set rate_max to UINT_MAX if nothing is defined on the CPU DAI. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 052b6294a428..24047375c2fb 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1578,7 +1578,7 @@ static void dpcm_init_runtime_hw(struct snd_pcm_runtime *runtime, u64 formats) { runtime->hw.rate_min = stream->rate_min; - runtime->hw.rate_max = stream->rate_max; + runtime->hw.rate_max = min_not_zero(stream->rate_max, UINT_MAX); runtime->hw.channels_min = stream->channels_min; runtime->hw.channels_max = stream->channels_max; if (runtime->hw.formats) From 9670ba2575378fdf820057626d79167182ae1eb8 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 21 Aug 2018 18:42:30 +0200 Subject: [PATCH 0618/3715] pinctrl: ingenic: Probe driver at subsys_initcall [ Upstream commit 556a36a71ed80e17ade49225b58513ea3c9e4558 ] Using postcore_initcall() makes the driver try to initialize way too early. Signed-off-by: Paul Cercueil Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-ingenic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 103aaab41357..1541f8cba556 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -849,4 +849,4 @@ static int __init ingenic_pinctrl_drv_register(void) { return platform_driver_register(&ingenic_pinctrl_driver); } -postcore_initcall(ingenic_pinctrl_drv_register); +subsys_initcall(ingenic_pinctrl_drv_register); From 983a2c0e4c9dbe9b7b2439186a5e153b0190ff78 Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Sun, 19 Aug 2018 22:20:23 +0300 Subject: [PATCH 0619/3715] MIPS: BCM47XX: Enable USB power on Netgear WNDR3400v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit feef7918667b84f9d5653c501542dd8d84ae32af ] Setting GPIO 21 high seems to be required to enable power to USB ports on the WNDR3400v3. As there is already similar code for WNR3500L, make the existing USB power GPIO code generic and use that. Signed-off-by: Tuomas Tynkkynen Acked-by: Hauke Mehrtens Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20259/ Cc: Rafał Miłecki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/bcm47xx/workarounds.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c index 1a8a07e7a563..46eddbec8d9f 100644 --- a/arch/mips/bcm47xx/workarounds.c +++ b/arch/mips/bcm47xx/workarounds.c @@ -5,9 +5,8 @@ #include #include -static void __init bcm47xx_workarounds_netgear_wnr3500l(void) +static void __init bcm47xx_workarounds_enable_usb_power(int usb_power) { - const int usb_power = 12; int err; err = gpio_request_one(usb_power, GPIOF_OUT_INIT_HIGH, "usb_power"); @@ -23,7 +22,10 @@ void __init bcm47xx_workarounds(void) switch (board) { case BCM47XX_BOARD_NETGEAR_WNR3500L: - bcm47xx_workarounds_netgear_wnr3500l(); + bcm47xx_workarounds_enable_usb_power(12); + break; + case BCM47XX_BOARD_NETGEAR_WNDR3400_V3: + bcm47xx_workarounds_enable_usb_power(21); break; default: /* No workaround(s) needed */ From b9f822b34e8fa5123727746c91b94cd83604ffac Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 3 Aug 2018 12:55:32 +0200 Subject: [PATCH 0620/3715] ARM: dts: exynos: Fix sound in Snow-rev5 Chromebook [ Upstream commit 64858773d78e820003a94e5a7179d368213655d6 ] This patch adds missing properties to the CODEC and sound nodes, so the audio will work also on Snow rev5 Chromebook. This patch is an extension to the commit e9eefc3f8ce0 ("ARM: dts: exynos: Add missing clock and DAI properties to the max98095 node in Snow Chromebook") and commit 6ab569936d60 ("ARM: dts: exynos: Enable HDMI audio on Snow Chromebook"). It has been reported that such changes work fine on the rev5 board too. Signed-off-by: Marek Szyprowski [krzk: Fixed typo in phandle to &max98090] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos5250-snow-rev5.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/exynos5250-snow-rev5.dts b/arch/arm/boot/dts/exynos5250-snow-rev5.dts index 90560c316f64..cb986175b69b 100644 --- a/arch/arm/boot/dts/exynos5250-snow-rev5.dts +++ b/arch/arm/boot/dts/exynos5250-snow-rev5.dts @@ -23,6 +23,14 @@ samsung,model = "Snow-I2S-MAX98090"; samsung,audio-codec = <&max98090>; + + cpu { + sound-dai = <&i2s0 0>; + }; + + codec { + sound-dai = <&max98090 0>, <&hdmi>; + }; }; }; @@ -34,6 +42,9 @@ interrupt-parent = <&gpx0>; pinctrl-names = "default"; pinctrl-0 = <&max98090_irq>; + clocks = <&pmu_system_controller 0>; + clock-names = "mclk"; + #sound-dai-cells = <1>; }; }; From c12934798357efe0c786839100b18253afd1c5d4 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Tue, 28 Aug 2018 11:32:55 -0700 Subject: [PATCH 0621/3715] liquidio: fix race condition in instruction completion processing [ Upstream commit b943f17e06493fd2c7fd00743093ad5dcdb90e7f ] In lio_enable_irq, the pkt_in_done count register was being cleared to zero. However, there could be some completed instructions which were not yet processed due to budget and limit constraints. So, only write this register with the number of actual completions that were processed. Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/liquidio/octeon_device.c | 5 +++-- drivers/net/ethernet/cavium/liquidio/octeon_iq.h | 2 ++ drivers/net/ethernet/cavium/liquidio/request_manager.c | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 29d53b1763a7..2a9c925376cc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1444,8 +1444,9 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) } if (iq) { spin_lock_bh(&iq->lock); - writel(iq->pkt_in_done, iq->inst_cnt_reg); - iq->pkt_in_done = 0; + writel(iq->pkts_processed, iq->inst_cnt_reg); + iq->pkt_in_done -= iq->pkts_processed; + iq->pkts_processed = 0; /* this write needs to be flushed before we release the lock */ mmiowb(); spin_unlock_bh(&iq->lock); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 5c3c8da976f7..1860603452ee 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -84,6 +84,8 @@ struct octeon_instr_queue { u32 pkt_in_done; + u32 pkts_processed; + /** A spinlock to protect access to the input ring.*/ spinlock_t iq_flush_running_lock; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 55e873126463..0ea623768783 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -122,6 +122,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, iq->do_auto_flush = 1; iq->db_timeout = (u32)conf->db_timeout; atomic_set(&iq->instr_pending, 0); + iq->pkts_processed = 0; /* Initialize the spinlock for this instruction queue */ spin_lock_init(&iq->lock); @@ -474,6 +475,7 @@ octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, lio_process_iq_request_list(oct, iq, 0); if (inst_processed) { + iq->pkts_processed += inst_processed; atomic_sub(inst_processed, &iq->instr_pending); iq->stats.instr_processed += inst_processed; } From 4d723872339dea6f540b1fdc4f08d833b7a77de2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Aug 2018 10:04:25 +0200 Subject: [PATCH 0622/3715] ARM: dts: exynos: Fix regulators configuration on Peach Pi/Pit Chromebooks [ Upstream commit f8f3b7fc21b1cb59385b780acd9b9a26d04cb7b2 ] Regulators, which are marked as 'on-in-suspend' seems to be critical for board operation, thus they must not be disabled anytime. This can be only assured by marking them as 'always-on', because otherwise some actions of their clients might result in turning them off. This patch restores suspend/resume operation on Peach-Pit Chromebook board. It partially reverts 'always-on' property removal done by the commit mentioned in the Fixes tag. Fixes: 665c441eea3d ("ARM: dts: exynos: Remove unneded always-on for regulators on Peach boards") Signed-off-by: Marek Szyprowski Tested-by: Tomasz Figa Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 3 +++ arch/arm/boot/dts/exynos5800-peach-pi.dts | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 7ccee2cfe481..442161d2acd5 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -301,6 +301,7 @@ regulator-name = "vdd_1v35"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -322,6 +323,7 @@ regulator-name = "vdd_2v"; regulator-min-microvolt = <2000000>; regulator-max-microvolt = <2000000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -332,6 +334,7 @@ regulator-name = "vdd_1v8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 0900b38f60b4..58af2254e521 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -301,6 +301,7 @@ regulator-name = "vdd_1v35"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -322,6 +323,7 @@ regulator-name = "vdd_2v"; regulator-min-microvolt = <2000000>; regulator-max-microvolt = <2000000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -332,6 +334,7 @@ regulator-name = "vdd_1v8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; From 85deb3b185dc6006dc699b3476c9d626ff9b9590 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 20 Aug 2018 08:12:30 -0700 Subject: [PATCH 0623/3715] i40e: use correct length for strncpy [ Upstream commit 7eb74ff891b4e94b8bac48f648a21e4b94ddee64 ] Caught by GCC 8. When we provide a length for strncpy, we should not include the terminating null. So we must tell it one less than the size of the destination buffer. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ef242dbae116..5fc870757480 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -704,7 +704,8 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) if (!IS_ERR_OR_NULL(pf->ptp_clock)) return 0; - strncpy(pf->ptp_caps.name, i40e_driver_name, sizeof(pf->ptp_caps.name)); + strncpy(pf->ptp_caps.name, i40e_driver_name, + sizeof(pf->ptp_caps.name) - 1); pf->ptp_caps.owner = THIS_MODULE; pf->ptp_caps.max_adj = 999999999; pf->ptp_caps.n_ext_ts = 0; From 546594c2b2018dbcae975bc239b6042648b284af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Ma=C5=82ek?= Date: Tue, 28 Aug 2018 10:16:03 -0700 Subject: [PATCH 0624/3715] i40e: hold the rtnl lock on clearing interrupt scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5cba17b14182696d6bb0ec83a1d087933f252241 ] Hold the rtnl lock when we're clearing interrupt scheme in i40e_shutdown and in i40e_remove. Signed-off-by: Patryk Małek Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 39029a12a233..aa2b446d6ad0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11885,6 +11885,7 @@ static void i40e_remove(struct pci_dev *pdev) mutex_destroy(&hw->aq.asq_mutex); /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i]) { @@ -11893,6 +11894,7 @@ static void i40e_remove(struct pci_dev *pdev) pf->vsi[i] = NULL; } } + rtnl_unlock(); for (i = 0; i < I40E_MAX_VEB; i++) { kfree(pf->veb[i]); @@ -12086,7 +12088,13 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + /* Since we're going to destroy queues during the + * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this + * whole section + */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); + rtnl_unlock(); if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); From e25f56774621a637d2c8264e916ba4d663376e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Ma=C5=82ek?= Date: Tue, 28 Aug 2018 10:16:09 -0700 Subject: [PATCH 0625/3715] i40e: Prevent deleting MAC address from VF when set by PF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5907cf6c5bbe78be2ed18b875b316c6028b20634 ] To prevent VF from deleting MAC address that was assigned by the PF we need to check for that scenario when we try to delete a MAC address from a VF. Signed-off-by: Patryk Małek Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index bdb752321600..b3307b1b3aac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2177,6 +2177,16 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } + + if (vf->pf_set_mac && + ether_addr_equal(al->list[i].addr, + vf->default_lan_addr.addr)) { + dev_err(&pf->pdev->dev, + "MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n", + vf->default_lan_addr.addr, vf->vf_id); + ret = I40E_ERR_PARAM; + goto error_param; + } } vsi = pf->vsi[vf->lan_vsi_idx]; From 5c3c4654ec1d5747026f8868474842f2e99869d1 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Wed, 13 Jun 2018 18:48:07 -0700 Subject: [PATCH 0626/3715] IB/rxe: fixes for rdma read retry [ Upstream commit 030e46e495af855a13964a0aab9753ea82a96edc ] When a read request is retried for the remaining partial data, the response may restart from read response first or read response only. So support those cases. Do not advance the comp psn beyond the current wqe's last_psn as that could skip over an entire read wqe and will cause the req_retry() logic to set an incorrect req psn. An example sequence is as follows: Write PSN 40 -- this is the current WQE. Read request PSN 41 Write PSN 42 Receive ACK PSN 42 -- this will complete the current WQE for PSN 40, and set the comp psn to 42 which is a problem because the read request at PSN 41 has been skipped over. So when req_retry() tries to retransmit the read request, it sets the req psn to 42 which is incorrect. When retrying a read request, calculate the number of psns completed based on the dma resid instead of the wqe first_psn. The wqe first_psn could have moved if the read request was retried multiple times. Set the reth length to the dma resid to handle read retries for the remaining partial data. Signed-off-by: Vijay Immanuel Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_comp.c | 21 ++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_req.c | 15 +++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 83cfe44f070e..fd9ce03dbd29 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -253,6 +253,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + /* read retries of partial data may restart from + * read response first or response only. + */ + if ((pkt->psn == wqe->first_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || + (wqe->first_psn == wqe->last_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) + break; + return COMPST_ERROR; } break; @@ -501,11 +512,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct rxe_send_wqe *wqe) { - qp->comp.opcode = -1; - - if (pkt) { - if (psn_compare(pkt->psn, qp->comp.psn) >= 0) - qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + if (pkt && wqe->state == wqe_state_pending) { + if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) { + qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; + qp->comp.opcode = -1; + } if (qp->req.wait_psn) { qp->req.wait_psn = 0; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 08ae4f3a6a37..9fd4f04df3b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp) int npsn; int first = 1; - wqe = queue_head(qp->sq.queue); - npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; - qp->req.wqe_index = consumer_index(qp->sq.queue); qp->req.psn = qp->comp.psn; qp->req.opcode = -1; @@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp) if (first) { first = 0; - if (mask & WR_WRITE_OR_SEND_MASK) + if (mask & WR_WRITE_OR_SEND_MASK) { + npsn = (qp->comp.psn - wqe->first_psn) & + BTH_PSN_MASK; retry_first_write_send(qp, wqe, mask, npsn); + } - if (mask & WR_READ_MASK) + if (mask & WR_READ_MASK) { + npsn = (wqe->dma.length - wqe->dma.resid) / + qp->mtu; wqe->iova += npsn * qp->mtu; + } } wqe->state = wqe_state_posted; @@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_RETH_MASK) { reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); - reth_set_len(pkt, wqe->dma.length); + reth_set_len(pkt, wqe->dma.resid); } if (pkt->mask & RXE_IMMDT_MASK) From 4c337b4ff09fa549415409d8564af9559f9a28f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Apr 2018 10:57:08 +0200 Subject: [PATCH 0627/3715] iwlwifi: don't WARN on trying to dump dead firmware [ Upstream commit 84f260251ed8153e84c64eb2c5278ab18d3ddef6 ] There's no point in warning here, the user will just get an error back to the debugfs file write, and warning just makes it seem like there's an internal consistency problem when in reality the user just happened to hit this at a bad time. Remove the warning. Fixes: f45f979dc208 ("iwlwifi: mvm: disable dbg data collect when fw isn't alive") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 839010417241..e72c0b825420 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -954,7 +954,7 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, * If the loading of the FW completed successfully, the next step is to * get the SMEM config data. Thus, if fwrt->smem_cfg.num_lmacs is non * zero, the FW was already loaded successully. If the state is "NO_FW" - * in such a case - WARN and exit, since FW may be dead. Otherwise, we + * in such a case - exit, since FW may be dead. Otherwise, we * can try to collect the data, since FW might just not be fully * loaded (no "ALIVE" yet), and the debug data is accessible. * @@ -962,9 +962,8 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, * config. In such a case, due to HW access problems, we might * collect garbage. */ - if (WARN((fwrt->trans->state == IWL_TRANS_NO_FW) && - fwrt->smem_cfg.num_lmacs, - "Can't collect dbg data when FW isn't alive\n")) + if (fwrt->trans->state == IWL_TRANS_NO_FW && + fwrt->smem_cfg.num_lmacs) return -EIO; if (test_and_set_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status)) From 92d88fa66fb7240480037963e1443120c1bd4b2f Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 1 May 2018 15:12:08 +0300 Subject: [PATCH 0628/3715] iwlwifi: mvm: avoid sending too many BARs [ Upstream commit 1a19c139be18ed4d6d681049cc48586fae070120 ] When we receive TX response, we may release a few packets due to a hole that was closed in the transmission window. However, if that frame failed, we will mark all the released frames as failed and will send multiple BARs. This affects statistics badly, and cause unnecessary frames transmission. Instead, mark all the following packets as success, with the desired result of sending a bar for the failed frame only. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index efef28012a6b..ac1e05b93a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1378,6 +1378,14 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, break; } + /* + * If we are freeing multiple frames, mark all the frames + * but the first one as acked, since they were acknowledged + * before + * */ + if (skb_freed > 1) + info->flags |= IEEE80211_TX_STAT_ACK; + iwl_mvm_tx_status_check_trigger(mvm, status); info->status.rates[0].count = tx_resp->failure_frame + 1; From 0a9983a8a9c72d404c6e7ea579790af60b834b53 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 25 Jun 2018 18:44:01 +0200 Subject: [PATCH 0629/3715] ARM: dts: pxa: fix the rtc controller [ Upstream commit 24a610eba32a80ed778ea79680b600c3fe73d7de ] The RTC controller is fed by an external fixed 32kHz clock. Yet the driver wants to acquire this clock, even though it doesn't make any use of it, ie. doesn't get the rate to make calculation. Therefore, use the exported 32.768kHz clock in the PXA clock tree to make the driver happy and working. Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/boot/dts/pxa25x.dtsi | 4 ++++ arch/arm/boot/dts/pxa27x.dtsi | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/pxa25x.dtsi b/arch/arm/boot/dts/pxa25x.dtsi index 95d59be97213..8494b5787170 100644 --- a/arch/arm/boot/dts/pxa25x.dtsi +++ b/arch/arm/boot/dts/pxa25x.dtsi @@ -80,6 +80,10 @@ #pwm-cells = <1>; clocks = <&clks CLK_PWM1>; }; + + rtc@40900000 { + clocks = <&clks CLK_OSC32k768>; + }; }; timer@40a00000 { diff --git a/arch/arm/boot/dts/pxa27x.dtsi b/arch/arm/boot/dts/pxa27x.dtsi index 747f750f675d..2ab6986433c8 100644 --- a/arch/arm/boot/dts/pxa27x.dtsi +++ b/arch/arm/boot/dts/pxa27x.dtsi @@ -113,6 +113,10 @@ status = "disabled"; }; + + rtc@40900000 { + clocks = <&clks CLK_OSC32k768>; + }; }; clocks { From 5e6204f442ce4255f8ca9a95cca663ae371ffc22 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 31 Aug 2018 14:03:09 +0200 Subject: [PATCH 0630/3715] ARM: dts: pxa: fix power i2c base address [ Upstream commit 8a1ecc01a473b75ab97be9b36f623e4551a6e9ae ] There is one too many zeroes in the Power I2C base address. Fix this. Signed-off-by: Marcel Ziswiler Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/boot/dts/pxa27x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/pxa27x.dtsi b/arch/arm/boot/dts/pxa27x.dtsi index 2ab6986433c8..3228ad5fb725 100644 --- a/arch/arm/boot/dts/pxa27x.dtsi +++ b/arch/arm/boot/dts/pxa27x.dtsi @@ -71,7 +71,7 @@ clocks = <&clks CLK_PWM1>; }; - pwri2c: i2c@40f000180 { + pwri2c: i2c@40f00180 { compatible = "mrvl,pxa-i2c"; reg = <0x40f00180 0x24>; interrupts = <6>; From bbc864a7b8474bd5241d3b10207ccf819b902d19 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 20 Aug 2018 13:48:31 -0500 Subject: [PATCH 0631/3715] rtl8187: Fix warning generated when strncpy() destination length matches the sixe argument [ Upstream commit 199ba9faca909e77ac533449ecd1248123ce89e7 ] In gcc8, when the 3rd argument (size) of a call to strncpy() matches the length of the first argument, the compiler warns of the possibility of an unterminated string. Using strlcpy() forces a null at the end. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c index c2d5b495c179..c089540116fa 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c @@ -146,7 +146,7 @@ static int rtl8187_register_led(struct ieee80211_hw *dev, led->dev = dev; led->ledpin = ledpin; led->is_radio = is_radio; - strncpy(led->name, name, sizeof(led->name)); + strlcpy(led->name, name, sizeof(led->name)); led->led_dev.name = led->name; led->led_dev.default_trigger = default_trigger; From 830a50a3c429ea266758debf02d9d4d39bdd58b4 Mon Sep 17 00:00:00 2001 From: Sven Schmitt Date: Tue, 24 Jul 2018 09:46:03 +0000 Subject: [PATCH 0632/3715] soc: imx: gpc: fix PDN delay [ Upstream commit 9f4d61d531e0efc9c3283963ae5ef7e314579191 ] imx6_pm_domain_power_off() reads iso and iso2sw from GPC_PGC_PUPSCR_OFFS which stores the power up delays. So use GPC_PGC_PDNSCR_OFFS for the correct delays. Signed-off-by: Sven Schmitt Reviewed-by: Leonard Crestez Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- drivers/soc/imx/gpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index c54d229f8da4..3a12123de466 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -73,7 +73,7 @@ static int imx6_pm_domain_power_off(struct generic_pm_domain *genpd) return -EBUSY; /* Read ISO and ISO2SW power down delays */ - regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PUPSCR_OFFS, &val); + regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PDNSCR_OFFS, &val); iso = val & 0x3f; iso2sw = (val >> 8) & 0x3f; From 994c5661a7f5b71c74d85216f1579bc79f4c2883 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Mon, 3 Sep 2018 07:07:07 +0000 Subject: [PATCH 0633/3715] ASoC: rsnd: ssi: Fix issue in dma data address assignment [ Upstream commit 0e289012b47a2de1f029a6b61c75998e2f159dd9 ] Same SSI device may be used in different dai links, by only having one dma struct in rsnd_ssi, after the first instance's dma config be initilized, the following instances can no longer configure dma, this causes issue, when their dma data address are different from the first instance. Signed-off-by: Jiada Wang Signed-off-by: Timo Wischer [Kuninori: tidyup for upstream] Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/rsnd.h | 1 + sound/soc/sh/rcar/ssi.c | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 1768a0ae469d..c68b31483c7b 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -432,6 +432,7 @@ struct rsnd_dai_stream { char name[RSND_DAI_NAME_SIZE]; struct snd_pcm_substream *substream; struct rsnd_mod *mod[RSND_MOD_MAX]; + struct rsnd_mod *dma; struct rsnd_dai *rdai; u32 parent_ssi_status; }; diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 60cc550c5a4c..cae9ed6a0cdb 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -66,7 +66,6 @@ struct rsnd_ssi { struct rsnd_mod mod; - struct rsnd_mod *dma; u32 flags; u32 cr_own; @@ -868,7 +867,6 @@ static int rsnd_ssi_dma_probe(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) { - struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); int ret; /* @@ -883,7 +881,7 @@ static int rsnd_ssi_dma_probe(struct rsnd_mod *mod, return ret; /* SSI probe might be called many times in MUX multi path */ - ret = rsnd_dma_attach(io, mod, &ssi->dma); + ret = rsnd_dma_attach(io, mod, &io->dma); return ret; } From fa1a884a3279c9693f777f4bf578a0791b8b301d Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 3 Sep 2018 10:48:49 +0200 Subject: [PATCH 0634/3715] net: phy: mscc: read 'vsc8531,vddmac' as an u32 [ Upstream commit a993e0f583c7925adaa7721226ccd7a41e7e63d1 ] In the DT binding, it is specified nowhere that 'vsc8531,vddmac' is an u16, even though it's read as an u16 in the driver. Let's update the driver to take into consideration that the 'vsc8531,vddmac' property is of the default type u32. Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mscc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 650c2667d523..88bcdbcb432c 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -111,7 +111,7 @@ struct vsc8531_private { #ifdef CONFIG_OF_MDIO struct vsc8531_edge_rate_table { - u16 vddmac; + u32 vddmac; u8 slowdown[8]; }; @@ -376,7 +376,7 @@ out_unlock: static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) { u8 sd; - u16 vdd; + u32 vdd; int rc, i, j; struct device *dev = &phydev->mdio.dev; struct device_node *of_node = dev->of_node; @@ -385,7 +385,7 @@ static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) if (!of_node) return -ENODEV; - rc = of_property_read_u16(of_node, "vsc8531,vddmac", &vdd); + rc = of_property_read_u32(of_node, "vsc8531,vddmac", &vdd); if (rc != 0) vdd = MSCC_VDDMAC_3300; From 3c53714415f4c6cab9c91091c8290c10aac1327c Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 3 Sep 2018 10:48:50 +0200 Subject: [PATCH 0635/3715] net: phy: mscc: read 'vsc8531, edge-slowdown' as an u32 [ Upstream commit 36c53cf0f46526b898390659b125155939f67892 ] In the DT binding, it is specified nowhere that 'vsc8531,edge-slowdown' is an u8, even though it's read as an u8 in the driver. Let's update the driver to take into consideration that the 'vsc8531,edge-slowdown' property is of the default type u32. Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mscc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 88bcdbcb432c..fe81741ab66a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -112,7 +112,7 @@ struct vsc8531_private { #ifdef CONFIG_OF_MDIO struct vsc8531_edge_rate_table { u32 vddmac; - u8 slowdown[8]; + u32 slowdown[8]; }; static const struct vsc8531_edge_rate_table edge_table[] = { @@ -375,8 +375,7 @@ out_unlock: #ifdef CONFIG_OF_MDIO static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) { - u8 sd; - u32 vdd; + u32 vdd, sd; int rc, i, j; struct device *dev = &phydev->mdio.dev; struct device_node *of_node = dev->of_node; @@ -389,7 +388,7 @@ static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) if (rc != 0) vdd = MSCC_VDDMAC_3300; - rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", &sd); + rc = of_property_read_u32(of_node, "vsc8531,edge-slowdown", &sd); if (rc != 0) sd = 0; From f245b2c1c7ebd5c725240c8d91e0664e90cd3a58 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 21 Jul 2018 21:05:52 +0200 Subject: [PATCH 0636/3715] ARM: dts: meson8: fix the clock controller register size [ Upstream commit f7f9da89bc4f61e33f7b9f5c75c4efdc1f0455d8 ] The clock controller registers are not 0x460 wide because the reset controller starts at CBUS 0x4404. This currently overlaps with the clock controller (which is at CBUS 0x4000). There is no public documentation available on the actual size of the clock controller's register area (also called "HHI"). However, in Amlogic's GPL kernel sources the last "HHI" register is HHI_HDMI_PHY_CNTL2 at CBUS + 0x43a8. 0x400 was chosen because that size doesn't seem unlikely. Fixes: 2c323c43a3d619 ("ARM: dts: meson8: add and use the real clock controller") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index b98d44fde6b6..e3ae85d65b39 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -170,7 +170,7 @@ #clock-cells = <1>; #reset-cells = <1>; compatible = "amlogic,meson8-clkc"; - reg = <0x8000 0x4>, <0x4000 0x460>; + reg = <0x8000 0x4>, <0x4000 0x400>; }; pwm_ef: pwm@86c0 { From 46506366d8848b172f8c98dc826e50545e85ee1a Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 21 Jul 2018 21:05:53 +0200 Subject: [PATCH 0637/3715] ARM: dts: meson8b: fix the clock controller register size [ Upstream commit f31094fe8c16fbd2ca47921acf93b744b045aace ] The clock controller registers are not 0x460 wide because the reset controller starts at CBUS 0x4404. This currently overlaps with the clock controller (which is at CBUS 0x4000). There is no public documentation available on the actual size of the clock controller's register area (also called "HHI"). However, in Amlogic's GPL kernel sources the last "HHI" register is HHI_HDMI_PHY_CNTL2 at CBUS + 0x43a8. 0x400 was chosen because that size doesn't seem unlikely. Fixes: 4a69fcd3a10803 ("ARM: meson: Add DTS for Odroid-C1 and Tronfy MXQ boards") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8b.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index bc278da7df0d..0f76da280ee7 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -121,7 +121,7 @@ #clock-cells = <1>; #reset-cells = <1>; compatible = "amlogic,meson8b-clkc"; - reg = <0x8000 0x4>, <0x4000 0x460>; + reg = <0x8000 0x4>, <0x4000 0x400>; }; reset: reset-controller@4404 { From f3fc7254b0c4a228dc5b52ad14549d1403d804ee Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 4 Sep 2018 19:29:09 +0200 Subject: [PATCH 0638/3715] net: lan78xx: Bail out if lan78xx_get_endpoints fails [ Upstream commit fa8cd98c06407b5798b927cd7fd14d30f360ed02 ] We need to bail out if lan78xx_get_endpoints() fails, otherwise the result is overwritten. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") Signed-off-by: Stefan Wahren Reviewed-by: Raghuram Chary Jallipalli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 78a12d7b96e8..2229284d16f5 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2818,6 +2818,11 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) int i; ret = lan78xx_get_endpoints(dev, intf); + if (ret) { + netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n", + ret); + return ret; + } dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL); From 00843344c6871cde6b8c85bf88bd2197d6eb1da6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Sep 2018 11:41:52 +0100 Subject: [PATCH 0639/3715] ASoC: sgtl5000: avoid division by zero if lo_vag is zero [ Upstream commit 9ab708aef61f5620113269a9d1bdb1543d1207d0 ] In the case where lo_vag <= SGTL5000_LINE_OUT_GND_BASE, lo_vag is set to zero and later vol_quot is computed by dividing by lo_vag causing a division by zero error. Fix this by avoiding a zero division and set vol_quot to zero in this specific case so that the lowest setting for i is correctly set. Signed-off-by: Colin Ian King Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/sgtl5000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 10764c1e854e..ca8a70ab22a8 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1314,7 +1314,7 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec) * Searching for a suitable index solving this formula: * idx = 40 * log10(vag_val / lo_cagcntrl) + 15 */ - vol_quot = (vag * 100) / lo_vag; + vol_quot = lo_vag ? (vag * 100) / lo_vag : 0; lo_vol = 0; for (i = 0; i < ARRAY_SIZE(vol_quot_table); i++) { if (vol_quot >= vol_quot_table[i]) From 7b72dc2f100d1fe8e969d645050c8ee64b5dd301 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 6 Sep 2018 17:41:35 +0200 Subject: [PATCH 0640/3715] ARM: dts: exynos: Disable pull control for S5M8767 PMIC [ Upstream commit ef2ecab9af5feae97c47b7f61cdd96f7f49b2c23 ] S5M8767 PMIC interrupt line on Exynos5250-based Arndale board has external pull-up resistors, so disable any pull control for it in in controller node. This fixes support for S5M8767 interrupts and enables operation of wakeup from S5M8767 RTC alarm. Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos5250-arndale.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts index 18a7f396ac5f..abd1705635f9 100644 --- a/arch/arm/boot/dts/exynos5250-arndale.dts +++ b/arch/arm/boot/dts/exynos5250-arndale.dts @@ -169,6 +169,8 @@ reg = <0x66>; interrupt-parent = <&gpx3>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&s5m8767_irq>; vinb1-supply = <&main_dc_reg>; vinb2-supply = <&main_dc_reg>; @@ -544,6 +546,13 @@ cap-sd-highspeed; }; +&pinctrl_0 { + s5m8767_irq: s5m8767-irq { + samsung,pins = "gpx3-2"; + samsung,pin-pud = ; + }; +}; + &rtc { status = "okay"; }; From 1b8331e7dbe2688b59c921a3bf53ec26b5cb2de6 Mon Sep 17 00:00:00 2001 From: Erik Stromdahl Date: Tue, 4 Sep 2018 15:07:07 +0300 Subject: [PATCH 0641/3715] ath10k: wmi: disable softirq's while calling ieee80211_rx [ Upstream commit 37f62c0d5822f631b786b29a1b1069ab714d1a28 ] This is done in order not to trig the below warning in ieee80211_rx_napi: WARN_ON_ONCE(softirq_count() == 0); ieee80211_rx_napi requires that softirq's are disabled during execution. The High latency bus drivers (SDIO and USB) sometimes call the wmi ep_rx_complete callback from non softirq context, resulting in a trigger of the above warning. Calling ieee80211_rx_ni with softirq's already disabled (e.g., from softirq context) should be safe as the local_bh_disable and local_bh_enable functions (called from ieee80211_rx_ni) are fully reentrant. Signed-off-by: Erik Stromdahl Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ab8eb9cdfda0..4d6c2986c40d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2414,7 +2414,8 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) status->freq, status->band, status->signal, status->rate_idx); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_ni(ar->hw, skb); + return 0; } From e30fb85862cd4f4cde55b9212a73297446d652e4 Mon Sep 17 00:00:00 2001 From: Muhammad Sammar Date: Tue, 28 Aug 2018 14:45:30 +0300 Subject: [PATCH 0642/3715] IB/ipoib: Ensure that MTU isn't less than minimum permitted [ Upstream commit 142a9c287613560edf5a03c8d142c8b6ebc1995b ] It is illegal to change MTU to a value lower than the minimum MTU stated in ethernet spec. In addition to that we need to add 4 bytes for encapsulation header (IPOIB_ENCAP_LEN). Before "ifconfig ib0 mtu 0" command, succeeds while it obviously shouldn't. Signed-off-by: Muhammad Sammar Reviewed-by: Feras Daoud Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 1a93d3d58c8a..caae4bfab950 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -249,7 +249,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) + if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) || + new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; From c0e762da6b0ddaf9437e955faa28712a6f7df283 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 28 Aug 2018 14:45:31 +0300 Subject: [PATCH 0643/3715] RDMA/core: Rate limit MAD error messages [ Upstream commit f9d08f1e1939ad4d92e38bd3dee6842512f5bee6 ] While registering a mad agent, a user space can trigger various errors and flood the logs. Therefore, decrease verbosity and rate limit such error messages. While we are at it, use __func__ to print function name. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/mad.c | 72 ++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e4339b9e43a5..6072ac7023cb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -217,30 +217,30 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid QP Type %d\n", - qp_type); + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", + __func__, qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid RMPP Version %u\n", - rmpp_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid RMPP Version %u\n", + __func__, rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid Class Version %u\n", - mad_reg_req->mgmt_class_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid Class Version %u\n", + __func__, + mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { - dev_notice(&device->dev, - "ib_register_mad_agent: no recv_handler\n"); + dev_dbg_ratelimited(&device->dev, + "%s: no recv_handler\n", __func__); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { @@ -250,9 +250,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { @@ -260,8 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0\n"); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0\n", + __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* @@ -269,18 +270,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { - dev_notice(&device->dev, - "ib_register_mad_agent: No OUI specified for class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: No OUI specified for class 0x%x\n", + __func__, + mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { - dev_notice(&device->dev, - "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: RMPP version for non-RMPP class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -291,9 +293,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid SM QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else { @@ -301,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid GS QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -318,18 +320,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid port %d\n", - port_num); + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n", + __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } - /* Verify the QP requested is supported. For example, Ethernet devices - * will not have QP0 */ + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0. + */ if (!port_priv->qp_info[qpn].qp) { - dev_notice(&device->dev, - "ib_register_mad_agent: QP %d not supported\n", qpn); + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", + __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } From 359aab04c01121f4c32ec9cb234f5d417ace6158 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 6 Sep 2018 10:55:31 +0300 Subject: [PATCH 0644/3715] RDMA/core: Follow correct unregister order between sysfs and cgroup [ Upstream commit c715a39541bb399eb03d728a996b224d90ce1336 ] During register_device() init sequence is, (a) register with rdma cgroup followed by (b) register with sysfs Therefore, unregister_device() sequence should follow the reverse order. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 61ade4b3e7bb..6b0d1d8609ca 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -599,8 +599,8 @@ void ib_unregister_device(struct ib_device *device) } up_read(&lists_rwsem); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); + ib_device_unregister_rdmacg(device); mutex_unlock(&device_mutex); From 75791ce0521d8eae7cc090b388b2d1bf09e2bdf4 Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Thu, 6 Sep 2018 12:19:19 +0800 Subject: [PATCH 0645/3715] mips: txx9: fix iounmap related issue [ Upstream commit c6e1241a82e6e74d1ae5cc34581dab2ffd6022d0 ] if device_register return error, iounmap should be called, also iounmap need to call before put_device. Signed-off-by: Ding Xiang Reviewed-by: Atsushi Nemoto Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20476/ Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/txx9/generic/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 1791a44ee570..20aaf77166e8 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -959,12 +959,11 @@ void __init txx9_sramc_init(struct resource *r) goto exit_put; err = sysfs_create_bin_file(&dev->dev.kobj, &dev->bindata_attr); if (err) { - device_unregister(&dev->dev); iounmap(dev->base); - kfree(dev); + device_unregister(&dev->dev); } return; exit_put: + iounmap(dev->base); put_device(&dev->dev); - return; } From 2958d917fadeea14fb1bc54756a3f29f87fbf6f4 Mon Sep 17 00:00:00 2001 From: Yong Zhi Date: Tue, 7 Aug 2018 12:19:16 -0500 Subject: [PATCH 0646/3715] ASoC: Intel: hdac_hdmi: Limit sampling rates at dai creation [ Upstream commit 3b857472f34faa7d11001afa5e158833812c98d7 ] Playback of 44.1Khz contents with HDMI plugged returns "Invalid pipe config" because HDMI paths in the FW topology are configured to operate at 48Khz. This patch filters out sampling rates not supported at hdac_hdmi_create_dais() to let user space SRC to do the converting. Signed-off-by: Yong Zhi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdac_hdmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index e824d47cc22b..1c3626347e12 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1408,6 +1408,12 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdac, if (ret) return ret; + /* Filter out 44.1, 88.2 and 176.4Khz */ + rates &= ~(SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_88200 | + SNDRV_PCM_RATE_176400); + if (!rates) + return -EINVAL; + sprintf(dai_name, "intel-hdmi-hifi%d", i+1); hdmi_dais[i].name = devm_kstrdup(&hdac->dev, dai_name, GFP_KERNEL); From 9032374066b416e1ade7b97b8a8f3c7c304c1faa Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 27 Aug 2018 09:50:09 -0500 Subject: [PATCH 0647/3715] of: make PowerMac cache node search conditional on CONFIG_PPC_PMAC [ Upstream commit f6707fd6241e483f6fea2caae82d876e422bb11a ] Cache nodes under the cpu node(s) is PowerMac specific according to the comment above, so make the code enforce that. Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index ce8a6e0c9b6a..41b254be0295 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1837,7 +1837,7 @@ struct device_node *of_find_next_cache_node(const struct device_node *np) /* OF on pmac has nodes instead of properties named "l2-cache" * beneath CPU nodes. */ - if (!strcmp(np->type, "cpu")) + if (IS_ENABLED(CONFIG_PPC_PMAC) && !strcmp(np->type, "cpu")) for_each_child_of_node(np, child) if (!strcmp(child->type, "cache")) return child; From ef10593a92e11249e6d26e39dccd1a8bf8e06d88 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:06 +0200 Subject: [PATCH 0648/3715] ARM: dts: omap3-gta04: give spi_lcd node a label so that we can overwrite in other DTS files [ Upstream commit fa0d7dc355c890725b6178dab0cc11b194203afa ] needed for device variants based on GTA04 board but with different display panel (driver). Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 4504908c23fe..0b0aa020a8d5 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -71,7 +71,7 @@ #sound-dai-cells = <0>; }; - spi_lcd { + spi_lcd: spi_lcd { compatible = "spi-gpio"; #address-cells = <0x1>; #size-cells = <0x0>; From ec1b4e3862d7323e518a53b4cc36e6828c041951 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:07 +0200 Subject: [PATCH 0649/3715] ARM: dts: omap3-gta04: fixes for tvout / venc [ Upstream commit f6591391373dbff2c0200e1055d4ff86191578d2 ] * fix connector compatibility (composite) * add comment for gpio1 23 * add proper #address-cells * we use only one venc_out channel for composite Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 0b0aa020a8d5..5f62b2f3c6e9 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -123,7 +123,7 @@ }; tv0: connector { - compatible = "svideo-connector"; + compatible = "composite-video-connector"; label = "tv"; port { @@ -135,7 +135,7 @@ tv_amp: opa362 { compatible = "ti,opa362"; - enable-gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; + enable-gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; /* GPIO_23 to enable video out amplifier */ ports { #address-cells = <1>; @@ -540,10 +540,14 @@ vdda-supply = <&vdac>; + #address-cells = <1>; + #size-cells = <0>; + port { + reg = <0>; venc_out: endpoint { remote-endpoint = <&opa_in>; - ti,channels = <2>; + ti,channels = <1>; ti,invert-polarity; }; }; From 28e09725cb4cbdeb96e1256190c314bf32f32bae Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:09 +0200 Subject: [PATCH 0650/3715] ARM: dts: omap3-gta04: tvout: enable as display1 alias [ Upstream commit 8905592b6e50cec905e6c6035bbd36201a3bfac1 ] The omap dss susbystem takes the display aliases to find out which displays exist. To enable tv-out we must define an alias. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 5f62b2f3c6e9..7e9d6c4cdbfb 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -28,6 +28,7 @@ aliases { display0 = &lcd; + display1 = &tv0; }; gpio-keys { From 64590cfd49067ebefdf737ef5df0577aad79001e Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:10 +0200 Subject: [PATCH 0651/3715] ARM: dts: omap3-gta04: fix touchscreen tsc2007 [ Upstream commit 7384a24248eda140a234d356b6c840701ee9f055 ] we fix penirq polarity, add penirq pinmux and touchscreen properties. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 7e9d6c4cdbfb..11daca2f19c3 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -275,6 +275,13 @@ OMAP3_CORE1_IOPAD(0x2134, PIN_INPUT_PULLUP | MUX_MODE4) /* gpio112 */ >; }; + + penirq_pins: pinmux_penirq_pins { + pinctrl-single,pins = < + /* here we could enable to wakeup the cpu from suspend by a pen touch */ + OMAP3_CORE1_IOPAD(0x2194, PIN_INPUT_PULLUP | MUX_MODE4) /* gpio160 */ + >; + }; }; &omap3_pmx_core2 { @@ -412,10 +419,19 @@ tsc2007@48 { compatible = "ti,tsc2007"; reg = <0x48>; + pinctrl-names = "default"; + pinctrl-0 = <&penirq_pins>; interrupt-parent = <&gpio6>; interrupts = <0 IRQ_TYPE_EDGE_FALLING>; /* GPIO_160 */ - gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; + gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* GPIO_160 */ ti,x-plate-ohms = <600>; + touchscreen-size-x = <480>; + touchscreen-size-y = <640>; + touchscreen-max-pressure = <1000>; + touchscreen-fuzz-x = <3>; + touchscreen-fuzz-y = <8>; + touchscreen-fuzz-pressure = <10>; + touchscreen-inverted-y; }; /* RFID EEPROM */ From a248b125a6d1aa6a881992b78b6e07c1cd742927 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:12 +0200 Subject: [PATCH 0652/3715] ARM: dts: omap3-gta04: make NAND partitions compatible with recent U-Boot [ Upstream commit fa99c21ecb3cd4021a60d0e8bf880e78b5bd0729 ] Vendor defined U-Boot has changed the partition scheme a while ago: * kernel partition 6MB * file system partition uses the remainder up to end of the NAND * increased size of the environment partition (to get an OneNAND compatible base address) * shrink the U-Boot partition Let's be compatible (e.g. Debian kernel built from upstream). Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 11daca2f19c3..7992489b953e 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -608,22 +608,22 @@ bootloaders@80000 { label = "U-Boot"; - reg = <0x80000 0x1e0000>; + reg = <0x80000 0x1c0000>; }; - bootloaders_env@260000 { + bootloaders_env@240000 { label = "U-Boot Env"; - reg = <0x260000 0x20000>; + reg = <0x240000 0x40000>; }; kernel@280000 { label = "Kernel"; - reg = <0x280000 0x400000>; + reg = <0x280000 0x600000>; }; - filesystem@680000 { + filesystem@880000 { label = "File System"; - reg = <0x680000 0xf980000>; + reg = <0x880000 0>; /* 0 = MTDPART_SIZ_FULL */ }; }; }; From e8651ccbc7910bd8d9359149a104f825a80c836a Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 31 Jul 2018 09:11:14 +0200 Subject: [PATCH 0653/3715] ARM: dts: omap3-gta04: keep vpll2 always on [ Upstream commit 1ae00833e30c9b4af5cbfda65d75b1de12f74013 ] This is needed to make the display and venc work properly. Compare to omap3-beagle.dts. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 7992489b953e..e83d0619b3b7 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -537,6 +537,12 @@ regulator-max-microvolt = <3150000>; }; +/* Needed to power the DPI pins */ + +&vpll2 { + regulator-always-on; +}; + &dss { pinctrl-names = "default"; pinctrl-0 = < &dss_dpi_pins >; From 70816f4ce8ce34a196d09fe253c4240481f23994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 5 Sep 2018 11:36:36 +0200 Subject: [PATCH 0654/3715] sched/debug: Use symbolic names for task state constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ff28915fd31ccafc0d38e6f84b66df280ed9e86a ] include/trace/events/sched.h includes (via ) and so knows about the TASK_* constants used to interpret .prev_state. So instead of duplicating the magic numbers make use of the defined macros to ease understanding the mapping from state bits to letters which isn't completely intuitive for an outsider. Signed-off-by: Uwe Kleine-König Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/20180905093636.24068-1-u.kleine-koenig@pengutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- include/trace/events/sched.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6e692a52936c..18197e0bb510 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -169,9 +169,14 @@ TRACE_EVENT(sched_switch, (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", - { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, - { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, - { 0x40, "P" }, { 0x80, "I" }) : + { TASK_INTERRUPTIBLE, "S" }, + { TASK_UNINTERRUPTIBLE, "D" }, + { __TASK_STOPPED, "T" }, + { __TASK_TRACED, "t" }, + { EXIT_DEAD, "X" }, + { EXIT_ZOMBIE, "Z" }, + { TASK_PARKED, "P" }, + { TASK_DEAD, "I" }) : "R", __entry->prev_state & TASK_REPORT_MAX ? "+" : "", From c184ddb5a7660b1b975d7a1fb461858c978f09ba Mon Sep 17 00:00:00 2001 From: Vicente Bergas Date: Sat, 8 Sep 2018 21:00:46 +0200 Subject: [PATCH 0655/3715] arm64: dts: rockchip: Fix VCC5V0_HOST_EN on rk3399-sapphire [ Upstream commit bcdb578a5f5b4aea79441606ab7f0a2e076b4474 ] The pin is GPIO4-D1 not GPIO1-D1, see schematic, page 15 for reference. Signed-off-by: Vicente Bergas Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index ce592a4c0c4c..82576011b959 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -136,7 +136,7 @@ vcc5v0_host: vcc5v0-host-regulator { compatible = "regulator-fixed"; enable-active-high; - gpio = <&gpio1 RK_PD1 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PD1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_host_en>; regulator-name = "vcc5v0_host"; From c7befe4de8b3b8695f4724702b823b9768526458 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 29 Aug 2018 23:32:48 +0200 Subject: [PATCH 0656/3715] dmaengine: dma-jz4780: Don't depend on MACH_JZ4780 [ Upstream commit c558ecd21c852c97ff98dc6c61f715ba420ec251 ] If we make this driver depend on MACH_JZ4780, that means it can be enabled only if we're building a kernel specially crafted for a JZ4780-based board, while most GNU/Linux distributions will want one generic MIPS kernel that works on multiple boards. Signed-off-by: Paul Cercueil Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index fadc4d8783bd..79b809dbfda0 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -143,7 +143,7 @@ config DMA_JZ4740 config DMA_JZ4780 tristate "JZ4780 DMA support" - depends on MACH_JZ4780 || COMPILE_TEST + depends on MIPS || COMPILE_TEST select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help From 61acfcb2478edff351306f679fc50a50b304d401 Mon Sep 17 00:00:00 2001 From: Daniel Silsby Date: Wed, 29 Aug 2018 23:32:56 +0200 Subject: [PATCH 0657/3715] dmaengine: dma-jz4780: Further residue status fix [ Upstream commit 83ef4fb7556b6a673f755da670cbacab7e2c7f1b ] Func jz4780_dma_desc_residue() expects the index to the next hw descriptor as its last parameter. Caller func jz4780_dma_tx_status(), however, applied modulus before passing it. When the current hw descriptor was last in the list, the index passed became zero. The resulting excess of reported residue especially caused problems with cyclic DMA transfer clients, i.e. ALSA AIC audio output, which rely on this for determining current DMA location within buffer. Combined with the recent and related residue-reporting fixes, spurious ALSA audio underruns on jz4770 hardware are now fixed. Signed-off-by: Daniel Silsby Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dma-jz4780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 803cfb4523b0..aca2d6fd92d5 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -580,7 +580,7 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, to_jz4780_dma_desc(vdesc), 0); } else if (cookie == jzchan->desc->vdesc.tx.cookie) { txstate->residue = jz4780_dma_desc_residue(jzchan, jzchan->desc, - (jzchan->curr_hwdesc + 1) % jzchan->desc->count); + jzchan->curr_hwdesc + 1); } else txstate->residue = 0; From 0d533e3681c79ffa1dcbc4c97535b61fda878d37 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 7 Sep 2018 16:08:27 -0700 Subject: [PATCH 0658/3715] EDAC, sb_edac: Return early on ADDRV bit and address type test [ Upstream commit dcc960b225ceb2bd66c45e0845d03e577f7010f9 ] Users of the mce_register_decode_chain() are called for every logged error. EDAC drivers should check: 1) Is this a memory error? [bit 7 in status register] 2) Is there a valid address? [bit 58 in status register] 3) Is the address a system address? [bitfield 8:6 in misc register] The sb_edac driver performed test "1" twice. Waited far too long to perform check "2". Didn't do check "3" at all. Fix it by moving the test for valid address from sbridge_mce_output_error() into sbridge_mce_check_error() and add a test for the type immediately after. Delete the redundant check for the type of the error from sbridge_mce_output_error(). Signed-off-by: Qiuxu Zhuo Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180907230828.13901-2-tony.luck@intel.com [ Re-word commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin --- drivers/edac/sb_edac.c | 68 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index b0b390a1da15..ddd5990211f8 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2915,35 +2915,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; } - /* Only decode errors with an valid address (ADDRV) */ - if (!GET_BITFIELD(m->status, 58, 58)) - return; - if (pvt->info.type == KNIGHTS_LANDING) { if (channel == 14) { edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", @@ -3049,17 +3041,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; char *type; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; - mci = get_mci_for_node_id(mce->socketid, IMC0); - if (!mci) - return NOTIFY_DONE; - pvt = mci->pvt_info; - /* * Just let mcelog handle it if the error is * outside the memory controller. A memory error @@ -3069,6 +3055,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; + + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; + + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) + return NOTIFY_DONE; + + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) + return NOTIFY_DONE; + if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; else From 9a7a5487408d0d0fb4a269085f36f1129f3b8538 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 9 Sep 2018 22:38:46 +0200 Subject: [PATCH 0659/3715] rtc: mt6397: fix possible race condition [ Upstream commit babab2f86440352d24e76118fdd7d40cab5fd7bf ] The IRQ is requested before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before requesting the IRQ. Acked-by: Eddie Huang Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mt6397.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 1a61fa56f3ad..e82df43e5ca2 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -333,6 +333,10 @@ static int mtk_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); + rtc->rtc_dev = devm_rtc_allocate_device(rtc->dev); + if (IS_ERR(rtc->rtc_dev)) + return PTR_ERR(rtc->rtc_dev); + ret = request_threaded_irq(rtc->irq, NULL, mtk_rtc_irq_handler_thread, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, @@ -345,11 +349,11 @@ static int mtk_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - rtc->rtc_dev = rtc_device_register("mt6397-rtc", &pdev->dev, - &mtk_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtc_dev)) { + rtc->rtc_dev->ops = &mtk_rtc_ops; + + ret = rtc_register_device(rtc->rtc_dev); + if (ret) { dev_err(&pdev->dev, "register rtc device failed\n"); - ret = PTR_ERR(rtc->rtc_dev); goto out_free_irq; } @@ -366,7 +370,6 @@ static int mtk_rtc_remove(struct platform_device *pdev) { struct mt6397_rtc *rtc = platform_get_drvdata(pdev); - rtc_device_unregister(rtc->rtc_dev); free_irq(rtc->irq, rtc->rtc_dev); irq_dispose_mapping(rtc->irq); From c07f8f5046a38b4b0d57ecbca18367a1ac129f3c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 9 Sep 2018 22:38:47 +0200 Subject: [PATCH 0660/3715] rtc: pl030: fix possible race condition [ Upstream commit c778ec85825dc895936940072aea9fe9037db684 ] The IRQ is requested before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before requesting the IRQ. Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pl030.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index f85a1a93e669..343bb6ed1783 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -112,6 +112,13 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) goto err_rtc; } + rtc->rtc = devm_rtc_allocate_device(&dev->dev); + if (IS_ERR(rtc->rtc)) { + ret = PTR_ERR(rtc->rtc); + goto err_rtc; + } + + rtc->rtc->ops = &pl030_ops; rtc->base = ioremap(dev->res.start, resource_size(&dev->res)); if (!rtc->base) { ret = -ENOMEM; @@ -128,12 +135,9 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto err_irq; - rtc->rtc = rtc_device_register("pl030", &dev->dev, &pl030_ops, - THIS_MODULE); - if (IS_ERR(rtc->rtc)) { - ret = PTR_ERR(rtc->rtc); + ret = rtc_register_device(rtc->rtc); + if (ret) goto err_reg; - } return 0; @@ -154,7 +158,6 @@ static int pl030_remove(struct amba_device *dev) writel(0, rtc->base + RTC_CR); free_irq(dev->irq[0], rtc); - rtc_device_unregister(rtc->rtc); iounmap(rtc->base); amba_release_regions(dev); From ff603107d69b7ef88e1460d4bc6328215c9bef8f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 22 Sep 2018 18:49:05 +0200 Subject: [PATCH 0661/3715] ath9k: add back support for using active monitor interfaces for tx99 [ Upstream commit 6df0580be8bc30803c4d8b2ed9c2230a2740c795 ] Various documented examples on how to set up tx99 with ath9k rely on setting up a regular monitor interface for setting the channel. My previous patch "ath9k: fix tx99 with monitor mode interface" made it possible to set it up this way again. However, it was removing support for using an active monitor interface, which is required for controlling the bitrate as well, since the bitrate is not passed down with a regular monitor interface. This patch partially reverts the previous one, but keeps support for using a regular monitor interface to keep documented steps working in cases where the bitrate does not matter Fixes: d9c52fd17cb48 ("ath9k: fix tx99 with monitor mode interface") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/ath9k.h | 1 + drivers/net/wireless/ath/ath9k/main.c | 10 ++++++++-- drivers/net/wireless/ath/ath9k/tx99.c | 7 +++++++ drivers/net/wireless/ath/ath9k/xmit.c | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index db2b119199d7..f9339b5c3624 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -1074,6 +1074,7 @@ struct ath_softc { struct ath_spec_scan_priv spec_priv; + struct ieee80211_vif *tx99_vif; struct sk_buff *tx99_skb; bool tx99_state; s16 tx99_power; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 055f86951680..3589f1f3e744 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1250,8 +1250,13 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, struct ath_vif *avp = (void *)vif->drv_priv; struct ath_node *an = &avp->mcast_node; - if (IS_ENABLED(CONFIG_ATH9K_TX99)) - return -EOPNOTSUPP; + if (IS_ENABLED(CONFIG_ATH9K_TX99)) { + if (sc->cur_chan->nvifs >= 1) { + mutex_unlock(&sc->mutex); + return -EOPNOTSUPP; + } + sc->tx99_vif = vif; + } mutex_lock(&sc->mutex); @@ -1336,6 +1341,7 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, ath9k_p2p_remove_vif(sc, vif); sc->cur_chan->nvifs--; + sc->tx99_vif = NULL; if (!ath9k_is_chanctx_enabled()) list_del(&avp->list); diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index 311547f532bc..87d09d1e74aa 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -54,6 +54,7 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; struct sk_buff *skb; + struct ath_vif *avp; skb = alloc_skb(len, GFP_KERNEL); if (!skb) @@ -71,11 +72,17 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) memcpy(hdr->addr2, hw->wiphy->perm_addr, ETH_ALEN); memcpy(hdr->addr3, hw->wiphy->perm_addr, ETH_ALEN); + if (sc->tx99_vif) { + avp = (struct ath_vif *) sc->tx99_vif->drv_priv; + hdr->seq_ctrl |= cpu_to_le16(avp->seq_no); + } + tx_info = IEEE80211_SKB_CB(skb); memset(tx_info, 0, sizeof(*tx_info)); rate = &tx_info->control.rates[0]; tx_info->band = sc->cur_chan->chandef.chan->band; tx_info->flags = IEEE80211_TX_CTL_NO_ACK; + tx_info->control.vif = sc->tx99_vif; rate->count = 1; if (ah->curchan && IS_CHAN_HT(ah->curchan)) { rate->flags |= IEEE80211_TX_RC_MCS; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index a743e3535d0a..458c4f53ba5d 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2952,7 +2952,7 @@ int ath9k_tx99_send(struct ath_softc *sc, struct sk_buff *skb, return -EINVAL; } - ath_set_rates(NULL, NULL, bf); + ath_set_rates(sc->tx99_vif, NULL, bf); ath9k_hw_set_desc_link(sc->sc_ah, bf->bf_desc, bf->bf_daddr); ath9k_hw_tx99_start(sc->sc_ah, txctl->txq->axq_qnum); From ee013471266b6fc278eff264ef2979f903a6c502 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 10 Sep 2018 09:39:28 -0700 Subject: [PATCH 0662/3715] IB/hfi1: Missing return value in error path for user sdma [ Upstream commit 2bf4b33f83dfe521c4c7c407b6b150aeec04d69c ] If the set_txreq_header_agh() function returns an error, the exit path is chosen. In this path, the code fails to set the return value. This will cause the caller to not realize an error has occurred. Set the return value correctly in the error path. Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 75275f9e363d..4854a4a453b5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -856,8 +856,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) changes = set_txreq_header_ahg(req, tx, datalen); - if (changes < 0) + if (changes < 0) { + ret = changes; goto free_tx; + } } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + From 09357805c5e563a905a1055507855ef66ee2880b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 19 Jul 2018 19:47:27 -0500 Subject: [PATCH 0663/3715] signal: Always ignore SIGKILL and SIGSTOP sent to the global init [ Upstream commit 86989c41b5ea08776c450cb759592532314a4ed6 ] If the first process started (aka /sbin/init) receives a SIGKILL it will panic the system if it is delivered. Making the system unusable and undebugable. It isn't much better if the first process started receives SIGSTOP. So always ignore SIGSTOP and SIGKILL sent to init. This is done in a separate clause in sig_task_ignored as force_sig_info can clear SIG_UNKILLABLE and this protection should work even then. Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- kernel/signal.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index bb801156628e..c9b203875001 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -77,6 +77,10 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler = sig_handler(t, sig); + /* SIGKILL and SIGSTOP may not be sent to the global init */ + if (unlikely(is_global_init(t) && sig_kernel_only(sig))) + return true; + if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; From b0ae4104dc4a1bd748838c124c8bf11e707add93 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 19 Jul 2018 20:33:53 -0500 Subject: [PATCH 0664/3715] signal: Properly deliver SIGILL from uprobes [ Upstream commit 55a3235fc71bf34303e34a95eeee235b2d2a35dd ] For userspace to tell the difference between a random signal and an exception, the exception must include siginfo information. Using SEND_SIG_FORCED for SIGILL is thus wrong, and it will result in userspace seeing si_code == SI_USER (like a random signal) instead of si_code == SI_KERNEL or a more specific si_code as all exceptions deliver. Therefore replace force_sig_info(SIGILL, SEND_SIG_FORCE, current) with force_sig(SIG_ILL, current) which gets this right and is shorter and easier to type. Fixes: 014940bad8e4 ("uprobes/x86: Send SIGILL if arch_uprobe_post_xol() fails") Fixes: 0b5256c7f173 ("uprobes: Send SIGILL if handle_trampoline() fails") Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 01941cffa9c2..c74fc9826250 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1854,7 +1854,7 @@ static void handle_trampoline(struct pt_regs *regs) sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); - force_sig_info(SIGILL, SEND_SIG_FORCED, current); + force_sig(SIGILL, current); } @@ -1970,7 +1970,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); - force_sig_info(SIGILL, SEND_SIG_FORCED, current); + force_sig(SIGILL, current); } } From b7da2be19178ba02912b1a3c7e46b6ab4f050f92 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 19 Jul 2018 20:48:30 -0500 Subject: [PATCH 0665/3715] signal: Properly deliver SIGSEGV from x86 uprobes [ Upstream commit 4a63c1ffd384ebdce40aac9c997dab68379137be ] For userspace to tell the difference between an random signal and an exception, the exception must include siginfo information. Using SEND_SIG_FORCED for SIGSEGV is thus wrong, and it will result in userspace seeing si_code == SI_USER (like a random signal) instead of si_code == SI_KERNEL or a more specific si_code as all exceptions deliver. Therefore replace force_sig_info(SIGSEGV, SEND_SIG_FORCE, current) with force_sig(SIG_SEGV, current) which gets this right and is shorter and easier to type. Fixes: 791eca10107f ("uretprobes/x86: Hijack return address") Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- arch/x86/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7a87ef1f5b5e..73391c1bd2a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -987,7 +987,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + force_sig(SIGSEGV, current); } return -1; From 2afdbe702024508ef0286ff3240660c3b7164d47 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:02 +0800 Subject: [PATCH 0666/3715] f2fs: fix memory leak of percpu counter in fill_super() [ Upstream commit 4a70e255449c9a13eed7a6eeecc85a1ea63cef76 ] In fill_super -> init_percpu_info, we should destroy percpu counter in error path, otherwise memory allcoated for percpu counter will leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0f3209b23c94..e4aabfc21bd4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2123,8 +2123,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED From 18d7d335678813347c566f2e0d79ccc2a90b089d Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 31 Aug 2018 11:24:36 -0700 Subject: [PATCH 0667/3715] scsi: qla2xxx: Fix iIDMA error [ Upstream commit 8d9bf0a9a268f7ca0b811d6e6a1fc783afa5c746 ] When switch responds with error for Get Port Speed Command (GPSC), driver should not proceed with telling FW about the speed of the remote port. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_gs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 2a19ec0660cb..1088038e6a41 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3033,7 +3033,7 @@ static void qla24xx_async_gpsc_sp_done(void *s, int res) ql_dbg(ql_dbg_disc, vha, 0x2019, "GPSC command unsupported, disabling query.\n"); ha->flags.gpsc_supported = 0; - res = QLA_SUCCESS; + goto done; } } else { switch (be16_to_cpu(ct_rsp->rsp.gpsc.speed)) { @@ -3066,13 +3066,13 @@ static void qla24xx_async_gpsc_sp_done(void *s, int res) be16_to_cpu(ct_rsp->rsp.gpsc.speeds), be16_to_cpu(ct_rsp->rsp.gpsc.speed)); } -done: memset(&ea, 0, sizeof(ea)); ea.event = FCME_GPSC_DONE; ea.rc = res; ea.fcport = fcport; qla2x00_fcport_event_handler(vha, &ea); +done: sp->free(sp); } From 32fd94c6dbf7e634ab91818546e12ea7600e157d Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 31 Aug 2018 11:24:37 -0700 Subject: [PATCH 0668/3715] scsi: qla2xxx: Defer chip reset until target mode is enabled [ Upstream commit 93eca6135183f7a71e36acd47655a085ed11bcdc ] For target mode, any chip reset triggered before target mode is enabled will be held off until user is ready to enable. This prevents the chip from starting or running before it is intended. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 343fbaa6d2a2..5617bb18c233 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5801,12 +5801,27 @@ qla2x00_do_dpc(void *data) if (test_and_clear_bit (ISP_ABORT_NEEDED, &base_vha->dpc_flags) && !test_bit(UNLOADING, &base_vha->dpc_flags)) { + bool do_reset = true; - ql_dbg(ql_dbg_dpc, base_vha, 0x4007, - "ISP abort scheduled.\n"); - if (!(test_and_set_bit(ABORT_ISP_ACTIVE, + switch (ql2x_ini_mode) { + case QLA2XXX_INI_MODE_ENABLED: + break; + case QLA2XXX_INI_MODE_DISABLED: + if (!qla_tgt_mode_enabled(base_vha)) + do_reset = false; + break; + case QLA2XXX_INI_MODE_DUAL: + if (!qla_dual_mode_enabled(base_vha)) + do_reset = false; + break; + default: + break; + } + + if (do_reset && !(test_and_set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags))) { - + ql_dbg(ql_dbg_dpc, base_vha, 0x4007, + "ISP abort scheduled.\n"); if (ha->isp_ops->abort_isp(base_vha)) { /* failed. retry later */ set_bit(ISP_ABORT_NEEDED, @@ -5814,10 +5829,9 @@ qla2x00_do_dpc(void *data) } clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); + ql_dbg(ql_dbg_dpc, base_vha, 0x4008, + "ISP abort end.\n"); } - - ql_dbg(ql_dbg_dpc, base_vha, 0x4008, - "ISP abort end.\n"); } if (test_and_clear_bit(FCPORT_UPDATE_NEEDED, From 9ab019f1ddc4ad8a23fb2bdcbb5674d06cf5f628 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 11 Sep 2018 10:18:16 -0700 Subject: [PATCH 0669/3715] scsi: qla2xxx: Fix dropped srb resource. [ Upstream commit 527b8ae3948bb59c13ebaa7d657ced56ea25ab05 ] When FW rejects a command due to "entry_status" error (malform IOCB), the srb resource needs to be returned back for cleanup. The filter to catch this is in the wrong location. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_isr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index df94ef816826..6a76d7217515 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2792,6 +2792,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) case ELS_IOCB_TYPE: case ABORT_IOCB_TYPE: case MBX_IOCB_TYPE: + default: sp = qla2x00_get_sp_from_handle(vha, func, req, pkt); if (sp) { sp->done(sp, res); @@ -2802,7 +2803,6 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) case ABTS_RESP_24XX: case CTIO_TYPE7: case CTIO_CRC2: - default: return 1; } fatal: From 5577a1c86be96bbc57f3a68ac2e39ba6b9179a05 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:48 -0700 Subject: [PATCH 0670/3715] scsi: lpfc: Fix errors in log messages. [ Upstream commit 2879265f514b1f4154288243c91438ddbedb3ed4 ] Message 6408 is displayed for each entry in an array, but the cpu and queue numbers were incorrect for the entry. Message 6001 includes an extraneous character. Resolve both issues Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 23bdb1ca106e..6c4499db969c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -144,7 +144,7 @@ lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport, vport = lport->vport; lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, - "6001 ENTER. lpfc_pnvme %p, qidx x%xi qhandle %p\n", + "6001 ENTER. lpfc_pnvme %p, qidx x%x qhandle %p\n", lport, qidx, handle); kfree(handle); } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 7ac1a067d780..eacdcb931bda 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1078,15 +1078,14 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) idx = 0; } - infop = phba->sli4_hba.nvmet_ctx_info; - for (j = 0; j < phba->cfg_nvmet_mrq; j++) { - for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + for (j = 0; j < phba->cfg_nvmet_mrq; j++) { + infop = lpfc_get_ctx_list(phba, i, j); lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, "6408 TOTAL NVMET ctx for CPU %d " "MRQ %d: cnt %d nextcpu %p\n", i, j, infop->nvmet_ctx_list_cnt, infop->nvmet_ctx_next_cpu); - infop++; } } return 0; From a125e3ee2d0a88e7a6f0ab374ef19d26a0a2670e Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Wed, 29 Aug 2018 11:38:16 -0400 Subject: [PATCH 0671/3715] scsi: sym53c8xx: fix NULL pointer dereference panic in sym_int_sir() [ Upstream commit 288315e95264b6355e26609e9dec5dc4563d4ab0 ] sym_int_sir() in sym_hipd.c does not check the command pointer for NULL before using it in debug message prints. Suggested-by: Matthew Wilcox Signed-off-by: George Kennedy Reviewed-by: Mark Kanda Acked-by: Matthew Wilcox Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sym53c8xx_2/sym_hipd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 378af306fda1..b87b6c63431d 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -4371,6 +4371,13 @@ static void sym_nego_rejected(struct sym_hcb *np, struct sym_tcb *tp, struct sym OUTB(np, HS_PRT, HS_BUSY); } +#define sym_printk(lvl, tp, cp, fmt, v...) do { \ + if (cp) \ + scmd_printk(lvl, cp->cmd, fmt, ##v); \ + else \ + starget_printk(lvl, tp->starget, fmt, ##v); \ +} while (0) + /* * chip exception handler for programmed interrupts. */ @@ -4416,7 +4423,7 @@ static void sym_int_sir(struct sym_hcb *np) * been selected with ATN. We do not want to handle that. */ case SIR_SEL_ATN_NO_MSG_OUT: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No MSG OUT phase after selection with ATN\n"); goto out_stuck; /* @@ -4424,7 +4431,7 @@ static void sym_int_sir(struct sym_hcb *np) * having reselected the initiator. */ case SIR_RESEL_NO_MSG_IN: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No MSG IN phase after reselection\n"); goto out_stuck; /* @@ -4432,7 +4439,7 @@ static void sym_int_sir(struct sym_hcb *np) * an IDENTIFY. */ case SIR_RESEL_NO_IDENTIFY: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No IDENTIFY after reselection\n"); goto out_stuck; /* @@ -4461,7 +4468,7 @@ static void sym_int_sir(struct sym_hcb *np) case SIR_RESEL_ABORTED: np->lastmsg = np->msgout[0]; np->msgout[0] = M_NOOP; - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "message %x sent on bad reselection\n", np->lastmsg); goto out; /* From 12e8d5b469a7e88b4d6ef3386a83e3aebda84376 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 2 Aug 2018 12:34:21 +0200 Subject: [PATCH 0672/3715] ARM: imx6: register pm_power_off handler if "fsl,pmic-stby-poweroff" is set [ Upstream commit 8148d2136002da2e2887caf6a07bbd9c033f14f3 ] One of the Freescale recommended sequences for power off with external PMIC is the following: ... 3. SoC is programming PMIC for power off when standby is asserted. 4. In CCM STOP mode, Standby is asserted, PMIC gates SoC supplies. See: http://www.nxp.com/assets/documents/data/en/reference-manuals/IMX6DQRM.pdf page 5083 This patch implements step 4. of this sequence. Signed-off-by: Oleksij Rempel Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/pm-imx6.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index ecdf071653d4..6078bcc9f594 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -604,6 +604,28 @@ static void __init imx6_pm_common_init(const struct imx6_pm_socdata IMX6Q_GPR1_GINT); } +static void imx6_pm_stby_poweroff(void) +{ + imx6_set_lpm(STOP_POWER_OFF); + imx6q_suspend_finish(0); + + mdelay(1000); + + pr_emerg("Unable to poweroff system\n"); +} + +static int imx6_pm_stby_poweroff_probe(void) +{ + if (pm_power_off) { + pr_warn("%s: pm_power_off already claimed %p %pf!\n", + __func__, pm_power_off, pm_power_off); + return -EBUSY; + } + + pm_power_off = imx6_pm_stby_poweroff; + return 0; +} + void __init imx6_pm_ccm_init(const char *ccm_compat) { struct device_node *np; @@ -620,6 +642,9 @@ void __init imx6_pm_ccm_init(const char *ccm_compat) val = readl_relaxed(ccm_base + CLPCR); val &= ~BM_CLPCR_LPM; writel_relaxed(val, ccm_base + CLPCR); + + if (of_property_read_bool(np, "fsl,pmic-stby-poweroff")) + imx6_pm_stby_poweroff_probe(); } void __init imx6q_pm_init(void) From f84f69da3610b57a5f3720f8ca6943761203dfa9 Mon Sep 17 00:00:00 2001 From: Deepak Ukey Date: Tue, 11 Sep 2018 14:18:03 +0530 Subject: [PATCH 0673/3715] scsi: pm80xx: Corrected dma_unmap_sg() parameter [ Upstream commit 76cb25b058034d37244be6aca97a2ad52a5fbcad ] For the function dma_unmap_sg(), the parameter should be number of elements in the scatter list prior to the mapping, not after the mapping. Signed-off-by: Deepak Ukey Signed-off-by: Viswas G Acked-by: Jack Wang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/pm8001/pm8001_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index ce584c31d36e..d1fcd21f7f7d 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -466,7 +466,7 @@ err_out: dev_printk(KERN_ERR, pm8001_ha->dev, "pm8001 exec failed[%d]!\n", rc); if (!sas_protocol_ata(t->task_proto)) if (n_elem) - dma_unmap_sg(pm8001_ha->dev, t->scatter, n_elem, + dma_unmap_sg(pm8001_ha->dev, t->scatter, t->num_scatter, t->data_dir); out_done: spin_unlock_irqrestore(&pm8001_ha->lock, flags); From d36d5fbfe15cdc0c553e924dad53f14a6b3de77f Mon Sep 17 00:00:00 2001 From: Deepak Ukey Date: Tue, 11 Sep 2018 14:18:04 +0530 Subject: [PATCH 0674/3715] scsi: pm80xx: Fixed system hang issue during kexec boot [ Upstream commit 72349b62a571effd6faadd0600b8e657dd87afbf ] When the firmware is not responding, execution of kexec boot causes a system hang. When firmware assertion happened, driver get notified with interrupt vector updated in MPI configuration table. Then, the driver will read scratchpad register and set controller_fatal_error flag to true. Signed-off-by: Deepak Ukey Signed-off-by: Viswas G Acked-by: Jack Wang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/pm8001/pm8001_hwi.c | 6 +++ drivers/scsi/pm8001/pm8001_sas.c | 7 +++ drivers/scsi/pm8001/pm8001_sas.h | 1 + drivers/scsi/pm8001/pm80xx_hwi.c | 80 +++++++++++++++++++++++++++++--- drivers/scsi/pm8001/pm80xx_hwi.h | 3 ++ 5 files changed, 91 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 10546faac58c..f374abfb7f1f 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1479,6 +1479,12 @@ u32 pm8001_mpi_msg_consume(struct pm8001_hba_info *pm8001_ha, } else { u32 producer_index; void *pi_virt = circularQ->pi_virt; + /* spurious interrupt during setup if + * kexec-ing and driver doing a doorbell access + * with the pre-kexec oq interrupt setup + */ + if (!pi_virt) + break; /* Update the producer index from SPC */ producer_index = pm8001_read_32(pi_virt); circularQ->producer_index = cpu_to_le32(producer_index); diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index d1fcd21f7f7d..e64a13f0bce1 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -374,6 +374,13 @@ static int pm8001_task_exec(struct sas_task *task, return 0; } pm8001_ha = pm8001_find_ha_by_dev(task->dev); + if (pm8001_ha->controller_fatal_error) { + struct task_status_struct *ts = &t->task_status; + + ts->resp = SAS_TASK_UNDELIVERED; + t->task_done(t); + return 0; + } PM8001_IO_DBG(pm8001_ha, pm8001_printk("pm8001_task_exec device \n ")); spin_lock_irqsave(&pm8001_ha->lock, flags); do { diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index e81a8fa7ef1a..e954ecd3f6c0 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -529,6 +529,7 @@ struct pm8001_hba_info { u32 logging_level; u32 fw_status; u32 smp_exp_mode; + bool controller_fatal_error; const struct firmware *fw_image; struct isr_param irq_vector[PM8001_MAX_MSIX_VEC]; }; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index eb4fee61df72..9edd61c063a1 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -572,6 +572,9 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_size); pm8001_mw32(address, MAIN_PCS_EVENT_LOG_OPTION, pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity); + /* Update Fatal error interrupt vector */ + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + ((pm8001_ha->number_of_intr - 1) << 8); pm8001_mw32(address, MAIN_FATAL_ERROR_INTERRUPT, pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt); pm8001_mw32(address, MAIN_EVENT_CRC_CHECK, @@ -1099,6 +1102,9 @@ static int pm80xx_chip_init(struct pm8001_hba_info *pm8001_ha) return -EBUSY; } + /* Initialize the controller fatal error flag */ + pm8001_ha->controller_fatal_error = false; + /* Initialize pci space address eg: mpi offset */ init_pci_device_addresses(pm8001_ha); init_default_table_values(pm8001_ha); @@ -1207,13 +1213,17 @@ pm80xx_chip_soft_rst(struct pm8001_hba_info *pm8001_ha) u32 bootloader_state; u32 ibutton0, ibutton1; - /* Check if MPI is in ready state to reset */ - if (mpi_uninit_check(pm8001_ha) != 0) { - PM8001_FAIL_DBG(pm8001_ha, - pm8001_printk("MPI state is not ready\n")); - return -1; + /* Process MPI table uninitialization only if FW is ready */ + if (!pm8001_ha->controller_fatal_error) { + /* Check if MPI is in ready state to reset */ + if (mpi_uninit_check(pm8001_ha) != 0) { + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk( + "MPI state is not ready scratch1 :0x%x\n", + regval)); + return -1; + } } - /* checked for reset register normal state; 0x0 */ regval = pm8001_cr32(pm8001_ha, 0, SPC_REG_SOFT_RESET); PM8001_INIT_DBG(pm8001_ha, @@ -3717,6 +3727,46 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb) } } +static void print_scratchpad_registers(struct pm8001_hba_info *pm8001_ha) +{ + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_1:0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_2: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_3: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_1: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_1))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_2: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_2))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_3: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_3))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_4: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_4))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_5: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_5))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_RSVD_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_6))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_RSVD_SCRATCH_PAD_1: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_7))); +} + static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) { struct outbound_queue_table *circularQ; @@ -3724,10 +3774,28 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u8 uninitialized_var(bc); u32 ret = MPI_IO_STATUS_FAIL; unsigned long flags; + u32 regval; + if (vec == (pm8001_ha->number_of_intr - 1)) { + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + if ((regval & SCRATCH_PAD_MIPSALL_READY) != + SCRATCH_PAD_MIPSALL_READY) { + pm8001_ha->controller_fatal_error = true; + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk( + "Firmware Fatal error! Regval:0x%x\n", regval)); + print_scratchpad_registers(pm8001_ha); + return ret; + } + } spin_lock_irqsave(&pm8001_ha->lock, flags); circularQ = &pm8001_ha->outbnd_q_tbl[vec]; do { + /* spurious interrupt during setup if kexec-ing and + * driver doing a doorbell access w/ the pre-kexec oq + * interrupt setup. + */ + if (!circularQ->pi_virt) + break; ret = pm8001_mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc); if (MPI_IO_STATUS_SUCCESS == ret) { /* process the outbound message */ diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index 7a443bad6163..411b414a9a0e 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1288,6 +1288,9 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 +#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ + SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_RAAE_READY) /* boot loader state */ #define SCRATCH_PAD1_BOOTSTATE_MASK 0x70 /* Bit 4-6 */ From 9e50bed60686a27c31199f3f9c371c9670b60e6a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 11 Sep 2018 19:21:09 +0900 Subject: [PATCH 0675/3715] kprobes: Don't call BUG_ON() if there is a kprobe in use on free list [ Upstream commit cbdd96f5586151e48317d90a403941ec23f12660 ] Instead of calling BUG_ON(), if we find a kprobe in use on free kprobe list, just remove it from the list and keep it on kprobe hash list as same as other in-use kprobes. Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/153666126882.21306.10738207224288507996.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/kprobes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f7a4602a76f9..d0fe20a5475f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -544,8 +544,14 @@ static void do_free_cleaned_kprobes(void) struct optimized_kprobe *op, *tmp; list_for_each_entry_safe(op, tmp, &freeing_list, list) { - BUG_ON(!kprobe_unused(&op->kp)); list_del_init(&op->list); + if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) { + /* + * This must not happen, but if there is a kprobe + * still in use, keep it on kprobes hash list. + */ + continue; + } free_aggr_kprobe(&op->kp); } } From 57441e0615f155fe8b5002c68a79e61c576e2830 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 10 Aug 2018 23:06:11 +0000 Subject: [PATCH 0676/3715] Drivers: hv: vmbus: Fix synic per-cpu context initialization [ Upstream commit f25a7ece08bdb1f2b3c4bbeae942682fc3a99dde ] If hv_synic_alloc() errors out, the state of the per-cpu context for some CPUs is unknown since the zero'ing is done as each CPU is iterated over. In such case, hv_synic_cleanup() may try to free memory based on uninitialized values. Fix this by zero'ing the per-cpu context for all CPUs before doing any memory allocations that might fail. Signed-off-by: Michael Kelley Reported-by: Dan Carpenter Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hv/hv.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fe041f22521d..23f312b4c6aa 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -148,6 +148,17 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) int hv_synic_alloc(void) { int cpu; + struct hv_per_cpu_context *hv_cpu; + + /* + * First, zero all per-cpu memory areas so hv_synic_free() can + * detect what memory has been allocated and cleanup properly + * after any failures. + */ + for_each_present_cpu(cpu) { + hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + memset(hv_cpu, 0, sizeof(*hv_cpu)); + } hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, GFP_ATOMIC); @@ -157,10 +168,8 @@ int hv_synic_alloc(void) } for_each_present_cpu(cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); - memset(hv_cpu, 0, sizeof(*hv_cpu)); tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); From 3801e9dd95c024c1e284d6ce4e143e9f0eb44223 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 7 Aug 2018 13:19:35 +0100 Subject: [PATCH 0677/3715] nvmem: core: return error code instead of NULL from nvmem_device_get [ Upstream commit ca6ac25cecf0e740d7cc8e03e0ebbf8acbeca3df ] nvmem_device_get() should return ERR_PTR() on error or valid pointer on success, but one of the code path seems to return NULL, so fix it. Reported-by: Niklas Cassel Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/nvmem/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index b414d9d207d4..08b171731664 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -617,7 +617,7 @@ static struct nvmem_device *nvmem_find(const char *name) d = bus_find_device(&nvmem_bus_type, NULL, (void *)name, nvmem_match); if (!d) - return NULL; + return ERR_PTR(-ENOENT); return to_nvmem_device(d); } From 9b8ba684bee9254e3a31f4675b7efca7589324d0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Jun 2018 09:48:07 -0400 Subject: [PATCH 0678/3715] media: dt-bindings: adv748x: Fix decimal unit addresses [ Upstream commit 27582f0ea97fe3e4a38beb98ab36cce4b6f029d5 ] With recent dtc and W=1: Warning (graph_port): video-receiver@70/port@10: graph node unit address error, expected "a" Warning (graph_port): video-receiver@70/port@11: graph node unit address error, expected "b" Unit addresses are always hexadecimal (without prefix), while the bases of reg property values depend on their prefixes. Fixes: e69595170b1cad85 ("media: adv748x: Add adv7481, adv7482 bindings") Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring Reviewed-by: Kieran Bingham Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/media/i2c/adv748x.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/media/i2c/adv748x.txt b/Documentation/devicetree/bindings/media/i2c/adv748x.txt index 21ffb5ed8183..54d1d3bc1869 100644 --- a/Documentation/devicetree/bindings/media/i2c/adv748x.txt +++ b/Documentation/devicetree/bindings/media/i2c/adv748x.txt @@ -73,7 +73,7 @@ Example: }; }; - port@10 { + port@a { reg = <10>; adv7482_txa: endpoint { @@ -83,7 +83,7 @@ Example: }; }; - port@11 { + port@b { reg = <11>; adv7482_txb: endpoint { From 5e19fb9b965dae63bbb7d7d00698e304b6ebaa79 Mon Sep 17 00:00:00 2001 From: Lao Wei Date: Mon, 9 Jul 2018 08:15:53 -0400 Subject: [PATCH 0679/3715] media: fix: media: pci: meye: validate offset to avoid arbitrary access [ Upstream commit eac7230fdb4672c2cb56f6a01a1744f562c01f80 ] Motion eye video4linux driver for Sony Vaio PictureBook desn't validate user-controlled parameter 'vma->vm_pgoff', a malicious process might access all of kernel memory from user space by trying pass different arbitrary address. Discussion: http://www.openwall.com/lists/oss-security/2018/07/06/1 Signed-off-by: Lao Wei Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/meye/meye.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 49e047e4a81e..926707c997ac 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -1460,7 +1460,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma) unsigned long page, pos; mutex_lock(&meye.lock); - if (size > gbuffers * gbufsize) { + if (size > gbuffers * gbufsize || offset > gbuffers * gbufsize - size) { mutex_unlock(&meye.lock); return -EINVAL; } From cd5bedea5e5cca50e28571a160a15a7816e8cd09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Aug 2018 15:56:21 -0400 Subject: [PATCH 0680/3715] media: dvb: fix compat ioctl translation [ Upstream commit 1ccbeeb888ac33627d91f1ccf0b84ef3bcadef24 ] The VIDEO_GET_EVENT and VIDEO_STILLPICTURE was added back in 2005 but it never worked because the command number is wrong. Using the right command number means we have a better chance of them actually doing the right thing, though clearly nobody has ever tried it successfully. I noticed these while auditing the remaining users of compat_time_t for y2038 bugs. This one is fine in that regard, it just never did anything. Fixes: 6e87abd0b8cb ("[DVB]: Add compat ioctl handling.") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- fs/compat_ioctl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ea52b98b39fa..033e8e6aabb7 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -161,6 +161,7 @@ struct compat_video_event { unsigned int frame_rate; } u; }; +#define VIDEO_GET_EVENT32 _IOR('o', 28, struct compat_video_event) static int do_video_get_event(struct file *file, unsigned int cmd, struct compat_video_event __user *up) @@ -172,7 +173,7 @@ static int do_video_get_event(struct file *file, if (kevent == NULL) return -EFAULT; - err = do_ioctl(file, cmd, (unsigned long)kevent); + err = do_ioctl(file, VIDEO_GET_EVENT, (unsigned long)kevent); if (!err) { err = convert_in_user(&kevent->type, &up->type); err |= convert_in_user(&kevent->timestamp, &up->timestamp); @@ -191,6 +192,7 @@ struct compat_video_still_picture { compat_uptr_t iFrame; int32_t size; }; +#define VIDEO_STILLPICTURE32 _IOW('o', 30, struct compat_video_still_picture) static int do_video_stillpicture(struct file *file, unsigned int cmd, struct compat_video_still_picture __user *up) @@ -213,7 +215,7 @@ static int do_video_stillpicture(struct file *file, if (err) return -EFAULT; - err = do_ioctl(file, cmd, (unsigned long) up_native); + err = do_ioctl(file, VIDEO_STILLPICTURE, (unsigned long) up_native); return err; } @@ -1476,9 +1478,9 @@ static long do_ioctl_trans(unsigned int cmd, return rtc_ioctl(file, cmd, argp); /* dvb */ - case VIDEO_GET_EVENT: + case VIDEO_GET_EVENT32: return do_video_get_event(file, cmd, argp); - case VIDEO_STILLPICTURE: + case VIDEO_STILLPICTURE32: return do_video_stillpicture(file, cmd, argp); case VIDEO_SET_SPU_PALETTE: return do_video_set_spu_palette(file, cmd, argp); From c9e2f562da34ae10d15a5ff3319ee4021c04815a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 30 Aug 2018 12:53:17 +0200 Subject: [PATCH 0681/3715] arm64: dts: meson: libretech: update board model [ Upstream commit b7eb0e26cc4a212fde09144cd49d4103170d2b9e ] There is actually several different libretech board with the CC suffix so the model name is not appropriate here. Update to something more specific Reported-by: Da Xue Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index d71cbf596d1f..0814b6b29b86 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -14,7 +14,7 @@ / { compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl"; - model = "Libre Technology CC"; + model = "Libre Computer Board AML-S905X-CC"; aliases { serial0 = &uart_AO; From 9f66cf82a2333f441b03241f503fab1852273dc0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Aug 2018 16:39:10 +0200 Subject: [PATCH 0682/3715] ALSA: intel8x0m: Register irq handler after register initializations [ Upstream commit 7064f376d4a10686f51c879401a569bb4babf9c6 ] The interrupt handler has to be acquired after the other resource initialization when allocated with IRQF_SHARED. Otherwise it's triggered before the resource gets ready, and may lead to unpleasant behavior. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/intel8x0m.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 3a4769a97d29..a626ee18628e 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1171,16 +1171,6 @@ static int snd_intel8x0m_create(struct snd_card *card, } port_inited: - if (request_irq(pci->irq, snd_intel8x0m_interrupt, IRQF_SHARED, - KBUILD_MODNAME, chip)) { - dev_err(card->dev, "unable to grab IRQ %d\n", pci->irq); - snd_intel8x0m_free(chip); - return -EBUSY; - } - chip->irq = pci->irq; - pci_set_master(pci); - synchronize_irq(chip->irq); - /* initialize offsets */ chip->bdbars_count = 2; tbl = intel_regs; @@ -1224,11 +1214,21 @@ static int snd_intel8x0m_create(struct snd_card *card, chip->int_sta_reg = ICH_REG_GLOB_STA; chip->int_sta_mask = int_sta_masks; + pci_set_master(pci); + if ((err = snd_intel8x0m_chip_init(chip, 1)) < 0) { snd_intel8x0m_free(chip); return err; } + if (request_irq(pci->irq, snd_intel8x0m_interrupt, IRQF_SHARED, + KBUILD_MODNAME, chip)) { + dev_err(card->dev, "unable to grab IRQ %d\n", pci->irq); + snd_intel8x0m_free(chip); + return -EBUSY; + } + chip->irq = pci->irq; + if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) { snd_intel8x0m_free(chip); return err; From e6b8633c15e0586c7cce507cfca060ece5401f5d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Sep 2018 11:37:45 +0300 Subject: [PATCH 0683/3715] pinctrl: at91-pio4: fix has_config check in atmel_pctl_dt_subnode_to_map() [ Upstream commit b97760ae8e3dc8bb91881c13425a0bff55f2bd85 ] Smatch complains about this condition: if (has_config && num_pins >= 1) The "has_config" variable is either uninitialized or true. The "num_pins" variable is unsigned and we verified that it is non-zero on the lines before so we know "num_pines >= 1" is true. Really, we could just check "num_configs" directly and remove the "has_config" variable. Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Signed-off-by: Dan Carpenter Acked-by: Ludovic Desroches Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-at91-pio4.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index e61e2f8c91ce..e9d797707255 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -483,7 +483,6 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned num_pins, num_configs, reserve; unsigned long *configs; struct property *pins; - bool has_config; u32 pinfunc; int ret, i; @@ -499,9 +498,6 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, return ret; } - if (num_configs) - has_config = true; - num_pins = pins->length / sizeof(u32); if (!num_pins) { dev_err(pctldev->dev, "no pins found in node %pOF\n", np); @@ -514,7 +510,7 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, * map for each pin. */ reserve = 1; - if (has_config && num_pins >= 1) + if (num_configs) reserve++; reserve *= num_pins; ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps, @@ -537,7 +533,7 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, pinctrl_utils_add_map_mux(pctldev, map, reserved_maps, num_maps, group, func); - if (has_config) { + if (num_configs) { ret = pinctrl_utils_add_map_configs(pctldev, map, reserved_maps, num_maps, group, configs, num_configs, From 06f540913df5cfc74a1febc30f2e20f0266c4f52 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Sep 2018 11:42:06 -0700 Subject: [PATCH 0684/3715] llc: avoid blocking in llc_sap_close() [ Upstream commit 9708d2b5b7c648e8e0a40d11e8cea12f6277f33c ] llc_sap_close() is called by llc_sap_put() which could be called in BH context in llc_rcv(). We can't block in BH. There is no reason to block it here, kfree_rcu() should be sufficient. Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/llc.h | 1 + net/llc/llc_core.c | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/llc.h b/include/net/llc.h index 890a87318014..df282d9b4017 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -66,6 +66,7 @@ struct llc_sap { int sk_count; struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES]; struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES]; + struct rcu_head rcu; }; static inline diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 260b3dc1b4a2..64d4bef04e73 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -127,9 +127,7 @@ void llc_sap_close(struct llc_sap *sap) list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); - synchronize_rcu(); - - kfree(sap); + kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { From 42fc909019365e3c1087accdeef66e0a8e6b132e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Wed, 25 Jul 2018 10:37:47 +0200 Subject: [PATCH 0685/3715] ARM: dts: qcom: ipq4019: fix cpu0's qcom,saw2 reg value [ Upstream commit bd73a3dd257fb838bd456a18eeee0ef0224b7a40 ] while compiling an ipq4019 target, dtc will complain: regulator@b089000 unit address format error, expected "2089000" The saw0 regulator reg value seems to be copied and pasted from qcom-ipq8064.dtsi. This patch fixes the reg value to match that of the unit address which in turn silences the warning. (There is no driver for qcom,saw2 right now. So this went unnoticed) Signed-off-by: Christian Lamparter Signed-off-by: John Crispin Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-ipq4019.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 10d112a4078e..19156cbb6003 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -234,7 +234,7 @@ saw0: regulator@b089000 { compatible = "qcom,saw2"; - reg = <0x02089000 0x1000>, <0x0b009000 0x1000>; + reg = <0x0b089000 0x1000>, <0x0b009000 0x1000>; regulator; }; From 58b03626a7b19e6f03533cf728a9790241c3362a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 29 Aug 2018 09:57:21 +0200 Subject: [PATCH 0686/3715] soc: qcom: wcnss_ctrl: Avoid string overflow [ Upstream commit 4c96ed170d658d8826d94edec8ac93ee777981a2 ] 'chinfo.name' is used as a NUL-terminated string, but using strncpy() with the length equal to the buffer size may result in lack of the termination: drivers//soc/qcom/wcnss_ctrl.c: In function 'qcom_wcnss_open_channel': drivers//soc/qcom/wcnss_ctrl.c:284:2: warning: 'strncpy' specified bound 32 equals destination size [-Wstringop-truncation] strncpy(chinfo.name, name, sizeof(chinfo.name)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This changes it to use the safer strscpy() instead. Signed-off-by: Niklas Cassel Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- drivers/soc/qcom/wcnss_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/wcnss_ctrl.c b/drivers/soc/qcom/wcnss_ctrl.c index df3ccb30bc2d..373400dd816d 100644 --- a/drivers/soc/qcom/wcnss_ctrl.c +++ b/drivers/soc/qcom/wcnss_ctrl.c @@ -281,7 +281,7 @@ struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name, rp struct rpmsg_channel_info chinfo; struct wcnss_ctrl *_wcnss = wcnss; - strncpy(chinfo.name, name, sizeof(chinfo.name)); + strscpy(chinfo.name, name, sizeof(chinfo.name)); chinfo.src = RPMSG_ADDR_ANY; chinfo.dst = RPMSG_ADDR_ANY; From dc87cafee594a0244824fa5668bf1b3e9540f286 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Fri, 14 Sep 2018 13:10:04 +0930 Subject: [PATCH 0687/3715] powerpc/vdso: Correct call frame information [ Upstream commit 56d20861c027498b5a1112b4f9f05b56d906fdda ] Call Frame Information is used by gdb for back-traces and inserting breakpoints on function return for the "finish" command. This failed when inside __kernel_clock_gettime. More concerning than difficulty debugging is that CFI is also used by stack frame unwinding code to implement exceptions. If you have an app that needs to handle asynchronous exceptions for some reason, and you are unlucky enough to get one inside the VDSO time functions, your app will crash. What's wrong: There is control flow in __kernel_clock_gettime that reaches label 99 without saving lr in r12. CFI info however is interpreted by the unwinder without reference to control flow: It's a simple matter of "Execute all the CFI opcodes up to the current address". That means the unwinder thinks r12 contains the return address at label 99. Disabuse it of that notion by resetting CFI for the return address at label 99. Note that the ".cfi_restore lr" could have gone anywhere from the "mtlr r12" a few instructions earlier to the instruction at label 99. I put the CFI as late as possible, because in general that's best practice (and if possible grouped with other CFI in order to reduce the number of CFI opcodes executed when unwinding). Using r12 as the return address is perfectly fine after the "mtlr r12" since r12 on that code path still contains the return address. __get_datapage also has a CFI error. That function temporarily saves lr in r0, and reflects that fact with ".cfi_register lr,r0". A later use of r0 means the CFI at that point isn't correct, as r0 no longer contains the return address. Fix that too. Signed-off-by: Alan Modra Tested-by: Reza Arbab Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vdso32/datapage.S | 1 + arch/powerpc/kernel/vdso32/gettimeofday.S | 1 + arch/powerpc/kernel/vdso64/datapage.S | 1 + arch/powerpc/kernel/vdso64/gettimeofday.S | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 3745113fcc65..2a7eb5452aba 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 769c2624e0a6..1e0bc5955a40 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index abf17feffe40..bf9668691511 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 382021324883..09b2a49f6dd5 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -124,6 +124,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc From b8ba110d14bccb47371e8b790414e2fddc47e9a3 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Thu, 13 Sep 2018 23:52:49 -0500 Subject: [PATCH 0688/3715] ARM: dts: socfpga: Fix I2C bus unit-address error [ Upstream commit cbbc488ed85061a765cf370c3e41f383c1e0add6 ] dtc has new checks for I2C buses. Fix the warnings in unit-addresses. arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dtb: Warning (i2c_bus_reg): /soc/i2c@ffc04000/adxl345@0: I2C bus unit address format error, expected "53" Signed-off-by: Rob Herring Signed-off-by: Dinh Nguyen Signed-off-by: Sasha Levin --- arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts index b280e6494193..31b01a998b2e 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts @@ -88,7 +88,7 @@ status = "okay"; clock-frequency = <100000>; - adxl345: adxl345@0 { + adxl345: adxl345@53 { compatible = "adi,adxl345"; reg = <0x53>; From ce87ef651769f2276bbbd5e62d5f4ef3d4b87b6c Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 13 Sep 2018 14:42:13 +0200 Subject: [PATCH 0689/3715] pinctrl: at91: don't use the same irqchip with multiple gpiochips [ Upstream commit 0c3dfa176912b5f87732545598200fb55e9c1978 ] Sharing the same irqchip with multiple gpiochips is not a good practice. For instance, when installing hooks, we change the state of the irqchip. The initial state of the irqchip for the second gpiochip to register is then disrupted. Signed-off-by: Ludovic Desroches Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-at91.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 569bc28cb909..404711f0985a 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1566,16 +1566,6 @@ void at91_pinctrl_gpio_resume(void) #define gpio_irq_set_wake NULL #endif /* CONFIG_PM */ -static struct irq_chip gpio_irqchip = { - .name = "GPIO", - .irq_ack = gpio_irq_ack, - .irq_disable = gpio_irq_mask, - .irq_mask = gpio_irq_mask, - .irq_unmask = gpio_irq_unmask, - /* .irq_set_type is set dynamically */ - .irq_set_wake = gpio_irq_set_wake, -}; - static void gpio_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -1616,12 +1606,22 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, struct gpio_chip *gpiochip_prev = NULL; struct at91_gpio_chip *prev = NULL; struct irq_data *d = irq_get_irq_data(at91_gpio->pioc_virq); + struct irq_chip *gpio_irqchip; int ret, i; + gpio_irqchip = devm_kzalloc(&pdev->dev, sizeof(*gpio_irqchip), GFP_KERNEL); + if (!gpio_irqchip) + return -ENOMEM; + at91_gpio->pioc_hwirq = irqd_to_hwirq(d); - /* Setup proper .irq_set_type function */ - gpio_irqchip.irq_set_type = at91_gpio->ops->irq_type; + gpio_irqchip->name = "GPIO"; + gpio_irqchip->irq_ack = gpio_irq_ack; + gpio_irqchip->irq_disable = gpio_irq_mask; + gpio_irqchip->irq_mask = gpio_irq_mask; + gpio_irqchip->irq_unmask = gpio_irq_unmask; + gpio_irqchip->irq_set_wake = gpio_irq_set_wake, + gpio_irqchip->irq_set_type = at91_gpio->ops->irq_type; /* Disable irqs of this PIO controller */ writel_relaxed(~0, at91_gpio->regbase + PIO_IDR); @@ -1632,7 +1632,7 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, * interrupt. */ ret = gpiochip_irqchip_add(&at91_gpio->chip, - &gpio_irqchip, + gpio_irqchip, 0, handle_edge_irq, IRQ_TYPE_NONE); @@ -1650,7 +1650,7 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, if (!gpiochip_prev) { /* Then register the chain on the parent IRQ */ gpiochip_set_chained_irqchip(&at91_gpio->chip, - &gpio_irqchip, + gpio_irqchip, at91_gpio->pioc_virq, gpio_irq_handler); return 0; From e269eb6f1f7b66df81d063ff91a57a0b4abf111e Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 14 Sep 2018 14:36:27 +0530 Subject: [PATCH 0690/3715] cxgb4: Fix endianness issue in t4_fwcache() [ Upstream commit 0dc235afc59a226d951352b0adf4a89b532a9d13 ] Do not put host-endian 0 or 1 into big endian feild. Reported-by: Al Viro Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 1802debbd3c7..39bcf27902e4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3750,7 +3750,7 @@ int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op) c.param[0].mnem = cpu_to_be32(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FWCACHE)); - c.param[0].val = (__force __be32)op; + c.param[0].val = cpu_to_be32(op); return t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), NULL); } From f927911d4abad751f877a7edc7261276c266db65 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:09 +0200 Subject: [PATCH 0691/3715] blok, bfq: do not plug I/O if all queues are weight-raised [ Upstream commit c8765de0adfcaaf4ffb2d951e07444f00ffa9453 ] To reduce latency for interactive and soft real-time applications, bfq privileges the bfq_queues containing the I/O of these applications. These privileged queues, referred-to as weight-raised queues, get a much higher share of the device throughput w.r.t. non-privileged queues. To preserve this higher share, the I/O of any non-weight-raised queue must be plugged whenever a sync weight-raised queue, while being served, remains temporarily empty. To attain this goal, bfq simply plugs any I/O (from any queue), if a sync weight-raised queue remains empty while in service. Unfortunately, this plugging typically lowers throughput with random I/O, on devices with internal queueing (because it reduces the filling level of the internal queues of the device). This commit addresses this issue by restricting the cases where plugging is performed: if a sync weight-raised queue remains empty while in service, then I/O plugging is performed only if some of the active bfq_queues are *not* weight-raised (which is actually the only circumstance where plugging is needed to preserve the higher share of the throughput of weight-raised queues). This restriction proved able to boost throughput in really many use cases needing only maximum throughput. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e65b0da1007b..93863c6173e6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3314,7 +3314,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * whether bfqq is being weight-raised, because * bfq_symmetric_scenario() does not take into account also * weight-raised queues (see comments on - * bfq_weights_tree_add()). + * bfq_weights_tree_add()). In particular, if bfqq is being + * weight-raised, it is important to idle only if there are + * other, non-weight-raised queues that may steal throughput + * to bfqq. Actually, we should be even more precise, and + * differentiate between interactive weight raising and + * soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -3326,7 +3331,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * to let requests be served in the desired order until all * the requests already queued in the device have been served. */ - asymmetric_scenario = bfqq->wr_coeff > 1 || + asymmetric_scenario = (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < bfqd->busy_queues) || !bfq_symmetric_scenario(bfqd); /* From 46af2022de198ebbf47141b7b33522e28733045d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:41 -0500 Subject: [PATCH 0692/3715] arm64: dts: meson: Fix erroneous SPI bus warnings [ Upstream commit 68ecb5c1920c5b98b1e717fd2349fba2ee5d4031 ] dtc has new checks for SPI buses. The meson dts files have a node named spi' which causes false positive warnings. As the node is a pinctrl child node, change the node name to be 'spi-pins' to fix the warnings. arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dtb: Warning (spi_bus_bridge): /soc/periphs@c8834000/pinctrl@4b0/spi: incorrect #address-cells for SPI bus Cc: Carlo Caione Cc: Kevin Hilman Cc: linux-amlogic@lists.infradead.org Signed-off-by: Rob Herring Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 2 +- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index af834cdbba79..250b5c11c0e2 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -413,7 +413,7 @@ }; }; - spi_pins: spi { + spi_pins: spi-pins { mux { groups = "spi_miso", "spi_mosi", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index fb8d76a17bc5..3c3057944960 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -310,7 +310,7 @@ }; }; - spi_pins: spi { + spi_pins: spi-pins { mux { groups = "spi_miso", "spi_mosi", From e0ecbca8ff27376cbaa68e5c64374344b85c760c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Sep 2018 11:39:04 +0300 Subject: [PATCH 0693/3715] power: supply: ab8500_fg: silence uninitialized variable warnings [ Upstream commit 54baff8d4e5dce2cef61953b1dc22079cda1ddb1 ] If kstrtoul() fails then we print "charge_full" when it's uninitialized. The debug printk doesn't add anything so I deleted it and cleaned these two functions up a bit. Signed-off-by: Dan Carpenter Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/ab8500_fg.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index c569f82a0071..b87768238b70 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -2437,17 +2437,14 @@ static ssize_t charge_full_store(struct ab8500_fg *di, const char *buf, size_t count) { unsigned long charge_full; - ssize_t ret; + int ret; ret = kstrtoul(buf, 10, &charge_full); + if (ret) + return ret; - dev_dbg(di->dev, "Ret %zd charge_full %lu", ret, charge_full); - - if (!ret) { - di->bat_cap.max_mah = (int) charge_full; - ret = count; - } - return ret; + di->bat_cap.max_mah = (int) charge_full; + return count; } static ssize_t charge_now_show(struct ab8500_fg *di, char *buf) @@ -2459,20 +2456,16 @@ static ssize_t charge_now_store(struct ab8500_fg *di, const char *buf, size_t count) { unsigned long charge_now; - ssize_t ret; + int ret; ret = kstrtoul(buf, 10, &charge_now); + if (ret) + return ret; - dev_dbg(di->dev, "Ret %zd charge_now %lu was %d", - ret, charge_now, di->bat_cap.prev_mah); - - if (!ret) { - di->bat_cap.user_mah = (int) charge_now; - di->flags.user_cap = true; - ret = count; - queue_delayed_work(di->fg_wq, &di->fg_periodic_work, 0); - } - return ret; + di->bat_cap.user_mah = (int) charge_now; + di->flags.user_cap = true; + queue_delayed_work(di->fg_wq, &di->fg_periodic_work, 0); + return count; } static struct ab8500_fg_sysfs_entry charge_full_attr = From 3a9539e9ba18703b21b9aa398673179e9c5eabe5 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 30 Aug 2018 14:50:11 +0300 Subject: [PATCH 0694/3715] power: reset: at91-poweroff: do not procede if at91_shdwc is allocated [ Upstream commit 9f1e44774be578fb92776add95f1fcaf8284d692 ] There should be only one instance of struct shdwc in the system. This is referenced through at91_shdwc. Return in probe if at91_shdwc is already allocated. Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/reset/at91-sama5d2_shdwc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index 31080c254124..037976a1fe40 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -246,6 +246,9 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) if (!pdev->dev.of_node) return -ENODEV; + if (at91_shdwc) + return -EBUSY; + at91_shdwc = devm_kzalloc(&pdev->dev, sizeof(*at91_shdwc), GFP_KERNEL); if (!at91_shdwc) return -ENOMEM; From 4fd86c4098a5a85b461335cdfcb3419e64c7ac50 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Tue, 17 Jul 2018 18:05:07 +0200 Subject: [PATCH 0695/3715] power: supply: max8998-charger: Fix platform data retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb90a2c6f77fe9b43d1e3f759bb2f13fe7fa1811 ] Since the max8998 MFD driver supports instantiation by DT, platform data retrieval is handled in MFD probe and cell drivers should get use the pdata field of max8998_dev struct to obtain them. Fixes: ee999fb3f17f ("mfd: max8998: Add support for Device Tree") Signed-off-by: Tomasz Figa Signed-off-by: Paweł Chmiel Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/max8998_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/max8998_charger.c b/drivers/power/supply/max8998_charger.c index b64cf0f14142..66438029bdd0 100644 --- a/drivers/power/supply/max8998_charger.c +++ b/drivers/power/supply/max8998_charger.c @@ -85,7 +85,7 @@ static const struct power_supply_desc max8998_battery_desc = { static int max8998_battery_probe(struct platform_device *pdev) { struct max8998_dev *iodev = dev_get_drvdata(pdev->dev.parent); - struct max8998_platform_data *pdata = dev_get_platdata(iodev->dev); + struct max8998_platform_data *pdata = iodev->pdata; struct power_supply_config psy_cfg = {}; struct max8998_battery_data *max8998; struct i2c_client *i2c; From 10ffb20b918b47c5baf49aa1efe6884672591175 Mon Sep 17 00:00:00 2001 From: Banajit Goswami Date: Mon, 27 Aug 2018 21:15:39 -0700 Subject: [PATCH 0696/3715] component: fix loop condition to call unbind() if bind() fails [ Upstream commit bdae566d5d9733b6e32b378668b84eadf28a94d4 ] During component_bind_all(), if bind() fails for any particular component associated with a master, unbind() should be called for all previous components in that master's match array, whose bind() might have completed successfully. As per the current logic, if bind() fails for the component at position 'n' in the master's match array, it would start calling unbind() from component in 'n'th position itself and work backwards, and will always skip calling unbind() for component in 0th position in the master's match array. Fix this by updating the loop condition, and the logic to refer to the components in master's match array, so that unbind() is called for all components starting from 'n-1'st position in the array, until (and including) component in 0th position. Signed-off-by: Banajit Goswami Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/component.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/component.c b/drivers/base/component.c index 89b032f2ffd2..08da6160e94d 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -461,9 +461,9 @@ int component_bind_all(struct device *master_dev, void *data) } if (ret != 0) { - for (; i--; ) - if (!master->match->compare[i].duplicate) { - c = master->match->compare[i].component; + for (; i > 0; i--) + if (!master->match->compare[i - 1].duplicate) { + c = master->match->compare[i - 1].component; component_unbind(c, master, data); } } From e32819f088faba6f52251f0a8dafb1b52eebb93b Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sat, 7 Jul 2018 17:52:47 +0000 Subject: [PATCH 0697/3715] kernfs: Fix range checks in kernfs_get_target_path [ Upstream commit a75e78f21f9ad4b810868c89dbbabcc3931591ca ] The terminating NUL byte is only there because the buffer is allocated with kzalloc(PAGE_SIZE, GFP_KERNEL), but since the range-check is off-by-one, and PAGE_SIZE==PATH_MAX, the returned string may not be zero-terminated if it is exactly PATH_MAX characters long. Furthermore also the initial loop may theoretically exceed PATH_MAX and cause a fault. Signed-off-by: Bernd Edlinger Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/kernfs/symlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 5145ae2f0572..d273e3accade 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -63,6 +63,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (base == kn) break; + if ((s - path) + 3 >= PATH_MAX) + return -ENAMETOOLONG; + strcpy(s, "../"); s += 3; base = base->parent; @@ -79,7 +82,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (len < 2) return -EINVAL; len--; - if ((s - path) + len > PATH_MAX) + if ((s - path) + len >= PATH_MAX) return -ENAMETOOLONG; /* reverse fillup of target string from target to base */ From 592f63897b86cac8782b6b28fcc92973c28a3ea8 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Fri, 14 Sep 2018 12:26:47 +0800 Subject: [PATCH 0698/3715] ip_gre: fix parsing gre header in ipgre_err [ Upstream commit b0350d51f001e6edc13ee4f253b98b50b05dd401 ] gre_parse_header stops parsing when csum_err is encountered, which means tpi->key is undefined and ip_tunnel_lookup will return NULL improperly. This patch introduce a NULL pointer as csum_err parameter. Even when csum_err is encountered, it won't return error and continue parsing gre header as expected. Fixes: 9f57c67c379d ("gre: Remove support for sharing GRE protocol hook.") Reported-by: Jiri Benc Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/gre_demux.c | 7 ++++--- net/ipv4/ip_gre.c | 9 +++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index b798862b6be5..7efe740c06eb 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -86,13 +86,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { + if (!skb_checksum_simple_validate(skb)) { + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + } else if (csum_err) { *csum_err = true; return -EINVAL; } - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); options++; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 71ff2531d973..9940a59306b5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -230,13 +230,10 @@ static void gre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; - bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), - iph->ihl * 4) < 0) { - if (!csum_err) /* ignore csum errors. */ - return; - } + if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), + iph->ihl * 4) < 0) + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, From bbce829fd3bc194a098aa7c64ca283c133221a0d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:36 -0500 Subject: [PATCH 0699/3715] ARM: dts: rockchip: Fix erroneous SPI bus dtc warnings on rk3036 [ Upstream commit 131c3eb428ccd5f0c784b9edb4f72ec296a045d2 ] dtc has new checks for SPI buses. The rk3036 dts file has a node named spi' which causes false positive warnings. As the node is a pinctrl child node, change the node name to be 'spi-pins' to fix the warnings. arch/arm/boot/dts/rk3036-evb.dtb: Warning (spi_bus_bridge): /pinctrl/spi: incorrect #address-cells for SPI bus arch/arm/boot/dts/rk3036-kylin.dtb: Warning (spi_bus_bridge): /pinctrl/spi: incorrect #address-cells for SPI bus arch/arm/boot/dts/rk3036-evb.dtb: Warning (spi_bus_bridge): /pinctrl/spi: incorrect #size-cells for SPI bus arch/arm/boot/dts/rk3036-kylin.dtb: Warning (spi_bus_bridge): /pinctrl/spi: incorrect #size-cells for SPI bus Cc: Heiko Stuebner Cc: linux-rockchip@lists.infradead.org Signed-off-by: Rob Herring Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 5c0a76493d22..03cf0c84ac0a 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -750,7 +750,7 @@ /* no rts / cts for uart2 */ }; - spi { + spi-pins { spi_txd:spi-txd { rockchip,pins = <1 29 RK_FUNC_3 &pcfg_pull_default>; }; From ad196348b6f4a9dad8ba499bc57d4ca9d35453c1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 Sep 2018 20:08:13 +0200 Subject: [PATCH 0700/3715] ACPI / LPSS: Exclude I2C busses shared with PUNIT from pmc_atom_d3_mask [ Upstream commit 86b62e5cd8965d3056f9e9ccdec51631c37add81 ] lpss_iosf_enter_d3_state() checks if all hw-blocks using the DMA controllers are in d3 before powering down the DMA controllers. But on devices, where the I2C bus connected to the PMIC is shared by the PUNIT, the controller for that bus will never reach d3 since it has an effectively empty _PS3 method. Instead it appears to automatically power-down during S0i3 and we never see it as being in d3. This causes the DMA controllers to never be powered-down on these devices, causing them to never reach S0i3. This commit uses the ACPI _SEM method to detect if an I2C bus is shared with the PUNIT and if it is, it removes it from the mask of devices which lpss_iosf_enter_d3_state() checks for. This fixes these devices never reaching any S0ix states. Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_lpss.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 51592dd45b06..1ab8d7223b25 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -98,6 +98,9 @@ struct lpss_private_data { u32 prv_reg_ctx[LPSS_PRV_REG_COUNT]; }; +/* Devices which need to be in D3 before lpss_iosf_enter_d3_state() proceeds */ +static u32 pmc_atom_d3_mask = 0xfe000ffe; + /* LPSS run time quirks */ static unsigned int lpss_quirks; @@ -174,6 +177,21 @@ static void byt_pwm_setup(struct lpss_private_data *pdata) static void byt_i2c_setup(struct lpss_private_data *pdata) { + const char *uid_str = acpi_device_uid(pdata->adev); + acpi_handle handle = pdata->adev->handle; + unsigned long long shared_host = 0; + acpi_status status; + long uid = 0; + + /* Expected to always be true, but better safe then sorry */ + if (uid_str) + uid = simple_strtol(uid_str, NULL, 10); + + /* Detect I2C bus shared with PUNIT and ignore its d3 status */ + status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host); + if (ACPI_SUCCESS(status) && shared_host && uid) + pmc_atom_d3_mask &= ~(BIT_LPSS2_F1_I2C1 << (uid - 1)); + lpss_deassert_reset(pdata); if (readl(pdata->mmio_base + pdata->dev_desc->prv_offset)) @@ -789,7 +807,7 @@ static void lpss_iosf_enter_d3_state(void) * Here we read the values related to LPSS power island, i.e. LPSS * devices, excluding both LPSS DMA controllers, along with SCC domain. */ - u32 func_dis, d3_sts_0, pmc_status, pmc_mask = 0xfe000ffe; + u32 func_dis, d3_sts_0, pmc_status; int ret; ret = pmc_atom_read(PMC_FUNC_DIS, &func_dis); @@ -807,7 +825,7 @@ static void lpss_iosf_enter_d3_state(void) * Shutdown both LPSS DMA controllers if and only if all other devices * are already in D3hot. */ - pmc_status = (~(d3_sts_0 | func_dis)) & pmc_mask; + pmc_status = (~(d3_sts_0 | func_dis)) & pmc_atom_d3_mask; if (pmc_status) goto exit; From 191e8c8865237fdebc8145a6778721b8696210b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Oct 2018 23:08:43 +0300 Subject: [PATCH 0701/3715] ath9k: Fix a locking bug in ath9k_add_interface() [ Upstream commit 461cf036057477805a8a391e5fd0f5264a5e56a8 ] We tried to revert commit d9c52fd17cb4 ("ath9k: fix tx99 with monitor mode interface") but accidentally missed part of the locking change. The lock has to be held earlier so that we're holding it when we do "sc->tx99_vif = vif;" and also there in the current code there is a stray unlock before we have taken the lock. Fixes: 6df0580be8bc ("ath9k: add back support for using active monitor interfaces for tx99") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 3589f1f3e744..72ad84fde5c1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1250,6 +1250,7 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, struct ath_vif *avp = (void *)vif->drv_priv; struct ath_node *an = &avp->mcast_node; + mutex_lock(&sc->mutex); if (IS_ENABLED(CONFIG_ATH9K_TX99)) { if (sc->cur_chan->nvifs >= 1) { mutex_unlock(&sc->mutex); @@ -1258,8 +1259,6 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, sc->tx99_vif = vif; } - mutex_lock(&sc->mutex); - ath_dbg(common, CONFIG, "Attach a VIF of type: %d\n", vif->type); sc->cur_chan->nvifs++; From 56ff8052bee16e7c93ac9d568233e9af9a878f9e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 17 Sep 2018 17:36:06 +0200 Subject: [PATCH 0702/3715] s390/qeth: invoke softirqs after napi_schedule() [ Upstream commit 4d19db777a2f32c9b76f6fd517ed8960576cb43e ] Calling napi_schedule() from process context does not ensure that the NET_RX softirq is run in a timely fashion. So trigger it manually. This is no big issue with current code. A call to ndo_open() is usually followed by a ndo_set_rx_mode() call, and for qeth this contains a spin_unlock_bh(). Except for OSN, where qeth_l2_set_rx_mode() bails out early. Nevertheless it's best to not depend on this behaviour, and just fix the issue at its source like all other drivers do. For instance see commit 83a0c6e58901 ("i40e: Invoke softirqs after napi_reschedule"). Fixes: a1c3ed4c9ca0 ("qeth: NAPI support for l2 and l3 discipline") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_l2_main.c | 3 +++ drivers/s390/net/qeth_l3_main.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2845316db554..6fa07c246915 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -869,7 +869,10 @@ static int __qeth_l2_open(struct net_device *dev) if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { napi_enable(&card->napi); + local_bh_disable(); napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } else rc = -EIO; return rc; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d9830c86d0c1..8bccfd686b73 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2849,7 +2849,10 @@ static int __qeth_l3_open(struct net_device *dev) if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { napi_enable(&card->napi); + local_bh_disable(); napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } else rc = -EIO; return rc; From a74a77a5cdd88a807d6f335d506c717c45ea8da6 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Fri, 10 Aug 2018 04:32:11 +0000 Subject: [PATCH 0703/3715] PCI/ACPI: Correct error message for ASPM disabling [ Upstream commit 1ad61b612b95980a4d970c52022aa01dfc0f6068 ] If _OSC execution fails today for platforms without an _OSC entry, code is printing a misleading message saying disabling ASPM as follows: acpi PNP0A03:00: _OSC failed (AE_NOT_FOUND); disabling ASPM We need to ensure that platform supports ASPM to begin with. Reported-by: Michael Kelley Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/acpi/pci_root.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index eb857d6ea1fe..96911360a28e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -454,8 +454,9 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) decode_osc_support(root, "OS supports", support); status = acpi_pci_osc_support(root, support); if (ACPI_FAILURE(status)) { - dev_info(&device->dev, "_OSC failed (%s); disabling ASPM\n", - acpi_format_exception(status)); + dev_info(&device->dev, "_OSC failed (%s)%s\n", + acpi_format_exception(status), + pcie_aspm_support_enabled() ? "; disabling ASPM" : ""); *no_aspm = 1; return; } From 9ae8ba50399015352dd9f111ac86a6f49953910d Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Mon, 3 Sep 2018 15:10:51 +0200 Subject: [PATCH 0704/3715] serial: uartps: Fix suspend functionality [ Upstream commit 4b9d33c6a30688344a3e95179654ea31b07f59b7 ] The driver's suspend/resume functions were buggy. If UART node contains any child node in the DT and the child is established a communication path with the parent UART. The relevant /dev/ttyPS* node will be not available for other operations. If the driver is trying to do any operations like suspend/resume without checking the tty->dev status it leads to the kernel crash/hang. This patch fix this issue by call the device_may_wake() with the generic parameter of type struct device. in the uart suspend and resume paths. It also fixes a race condition in the uart suspend path(i.e uart_suspend_port() should be called at the end of cdns_uart_suspend API this path updates the same) Signed-off-by: Nava kishore Manne Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/xilinx_uartps.c | 37 ++++++++---------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index b0da63737aa1..0dbfd02e3b19 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1342,24 +1342,11 @@ static struct uart_driver cdns_uart_uart_driver = { static int cdns_uart_suspend(struct device *device) { struct uart_port *port = dev_get_drvdata(device); - struct tty_struct *tty; - struct device *tty_dev; - int may_wake = 0; + int may_wake; - /* Get the tty which could be NULL so don't assume it's valid */ - tty = tty_port_tty_get(&port->state->port); - if (tty) { - tty_dev = tty->dev; - may_wake = device_may_wakeup(tty_dev); - tty_kref_put(tty); - } + may_wake = device_may_wakeup(device); - /* - * Call the API provided in serial_core.c file which handles - * the suspend. - */ - uart_suspend_port(&cdns_uart_uart_driver, port); - if (!(console_suspend_enabled && !may_wake)) { + if (console_suspend_enabled && may_wake) { unsigned long flags = 0; spin_lock_irqsave(&port->lock, flags); @@ -1374,7 +1361,11 @@ static int cdns_uart_suspend(struct device *device) spin_unlock_irqrestore(&port->lock, flags); } - return 0; + /* + * Call the API provided in serial_core.c file which handles + * the suspend. + */ + return uart_suspend_port(&cdns_uart_uart_driver, port); } /** @@ -1388,17 +1379,9 @@ static int cdns_uart_resume(struct device *device) struct uart_port *port = dev_get_drvdata(device); unsigned long flags = 0; u32 ctrl_reg; - struct tty_struct *tty; - struct device *tty_dev; - int may_wake = 0; + int may_wake; - /* Get the tty which could be NULL so don't assume it's valid */ - tty = tty_port_tty_get(&port->state->port); - if (tty) { - tty_dev = tty->dev; - may_wake = device_may_wakeup(tty_dev); - tty_kref_put(tty); - } + may_wake = device_may_wakeup(device); if (console_suspend_enabled && !may_wake) { struct cdns_uart *cdns_uart = port->private_data; From 925ed8333ee86738c2792e9244c85ec9e8839c2f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 13 Sep 2018 10:21:25 +0200 Subject: [PATCH 0705/3715] serial: samsung: Enable baud clock for UART reset procedure in resume [ Upstream commit 1ff3652bc7111df26b5807037f624be294cf69d5 ] Ensure that the baud clock is also enabled for UART register writes in driver resume. On Exynos5433 SoC this is needed to avoid external abort issue. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/samsung.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index f4b8e4e17a86..808373d4e37a 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1922,7 +1922,11 @@ static int s3c24xx_serial_resume(struct device *dev) if (port) { clk_prepare_enable(ourport->clk); + if (!IS_ERR(ourport->baudclk)) + clk_prepare_enable(ourport->baudclk); s3c24xx_serial_resetport(port, s3c24xx_port_to_cfg(port)); + if (!IS_ERR(ourport->baudclk)) + clk_disable_unprepare(ourport->baudclk); clk_disable_unprepare(ourport->clk); uart_resume_port(&s3c24xx_uart_drv, port); @@ -1945,7 +1949,11 @@ static int s3c24xx_serial_resume_noirq(struct device *dev) if (rx_enabled(port)) uintm &= ~S3C64XX_UINTM_RXD_MSK; clk_prepare_enable(ourport->clk); + if (!IS_ERR(ourport->baudclk)) + clk_prepare_enable(ourport->baudclk); wr_regl(port, S3C64XX_UINTM, uintm); + if (!IS_ERR(ourport->baudclk)) + clk_disable_unprepare(ourport->baudclk); clk_disable_unprepare(ourport->clk); } } From 73e22730e13e8df275423ff519e19508ea80e19d Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Tue, 7 Aug 2018 13:59:05 +0300 Subject: [PATCH 0706/3715] serial: mxs-auart: Fix potential infinite loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5963e8a3122471cadfe0eba41c4ceaeaa5c8bb4d ] On the error path of mxs_auart_request_gpio_irq() is performed backward iterating with index i of enum type. Underline enum type may be unsigned char. In this case check (--i >= 0) will be always true and error handling goes into infinite loop. The patch changes the check so that it is valid for signed and unsigned types. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/mxs-auart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 673c8fd7e34f..e83750831f15 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1638,8 +1638,9 @@ static int mxs_auart_request_gpio_irq(struct mxs_auart_port *s) /* * If something went wrong, rollback. + * Be careful: i may be unsigned. */ - while (err && (--i >= 0)) + while (err && (i-- > 0)) if (irq[i] >= 0) free_irq(irq[i], s); From 45503ce9086cabc9b3faaaed4563b2da8af1ffe2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 17 Sep 2018 22:08:13 -0700 Subject: [PATCH 0707/3715] samples/bpf: fix a compilation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 534e0e52bc23de588e81b5a6f75e10c8c4b189fc ] samples/bpf build failed with the following errors: $ make samples/bpf/ ... HOSTCC samples/bpf/sockex3_user.o /data/users/yhs/work/net-next/samples/bpf/sockex3_user.c:16:8: error: redefinition of ‘struct bpf_flow_keys’ struct bpf_flow_keys { ^ In file included from /data/users/yhs/work/net-next/samples/bpf/sockex3_user.c:4:0: ./usr/include/linux/bpf.h:2338:9: note: originally defined here struct bpf_flow_keys *flow_keys; ^ make[3]: *** [samples/bpf/sockex3_user.o] Error 1 Commit d58e468b1112d ("flow_dissector: implements flow dissector BPF hook") introduced struct bpf_flow_keys in include/uapi/linux/bpf.h and hence caused the naming conflict with samples/bpf/sockex3_user.c. The fix is to rename struct bpf_flow_keys in samples/bpf/sockex3_user.c to flow_keys to avoid the conflict. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- samples/bpf/sockex3_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 495ee02e2fb7..4d75674bee35 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -13,7 +13,7 @@ #define PARSE_IP_PROG_FD (prog_fd[0]) #define PROG_ARRAY_FD (map_fd[0]) -struct bpf_flow_keys { +struct flow_keys { __be32 src; __be32 dst; union { @@ -64,7 +64,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct bpf_flow_keys key = {}, next_key; + struct flow_keys key = {}, next_key; struct pair value; sleep(1); From 338856a48f39eba8518893fb0b3ea3f69bb19315 Mon Sep 17 00:00:00 2001 From: Peter Shih Date: Mon, 10 Sep 2018 11:54:21 +0800 Subject: [PATCH 0708/3715] spi: mediatek: Don't modify spi_transfer when transfer. [ Upstream commit 00bca73bfca4fb0ab089b94cad0fc83d8b49c25f ] Mediatek SPI driver modifies some fields (tx_buf, rx_buf, len, tx_dma, rx_dma) of the spi_transfer* passed in when doing transfer_one and in interrupt handler. This is somewhat unexpected, and there are some caller (e.g. Cr50 spi driver) that reuse the spi_transfer for multiple messages. Add a field to record how many bytes have been transferred, and calculate the right len / buffer based on it instead. Signed-off-by: Pi-Hsun Shih Change-Id: I23e218cd964f16c0b2b26127d4a5ca6529867673 Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-mt65xx.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 86bf45667a04..3dc31627c655 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -98,6 +98,7 @@ struct mtk_spi { struct clk *parent_clk, *sel_clk, *spi_clk; struct spi_transfer *cur_transfer; u32 xfer_len; + u32 num_xfered; struct scatterlist *tx_sgl, *rx_sgl; u32 tx_sgl_len, rx_sgl_len; const struct mtk_spi_compatible *dev_comp; @@ -385,6 +386,7 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mdata->cur_transfer = xfer; mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, xfer->len); + mdata->num_xfered = 0; mtk_spi_prepare_transfer(master, xfer); mtk_spi_setup_packet(master); @@ -415,6 +417,7 @@ static int mtk_spi_dma_transfer(struct spi_master *master, mdata->tx_sgl_len = 0; mdata->rx_sgl_len = 0; mdata->cur_transfer = xfer; + mdata->num_xfered = 0; mtk_spi_prepare_transfer(master, xfer); @@ -482,7 +485,7 @@ static int mtk_spi_setup(struct spi_device *spi) static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) { - u32 cmd, reg_val, cnt, remainder; + u32 cmd, reg_val, cnt, remainder, len; struct spi_master *master = dev_id; struct mtk_spi *mdata = spi_master_get_devdata(master); struct spi_transfer *trans = mdata->cur_transfer; @@ -497,36 +500,38 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, - trans->rx_buf, cnt); + trans->rx_buf + mdata->num_xfered, cnt); remainder = mdata->xfer_len % 4; if (remainder > 0) { reg_val = readl(mdata->base + SPI_RX_DATA_REG); - memcpy(trans->rx_buf + (cnt * 4), - ®_val, remainder); + memcpy(trans->rx_buf + + mdata->num_xfered + + (cnt * 4), + ®_val, + remainder); } } - trans->len -= mdata->xfer_len; - if (!trans->len) { + mdata->num_xfered += mdata->xfer_len; + if (mdata->num_xfered == trans->len) { spi_finalize_current_transfer(master); return IRQ_HANDLED; } - if (trans->tx_buf) - trans->tx_buf += mdata->xfer_len; - if (trans->rx_buf) - trans->rx_buf += mdata->xfer_len; - - mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, trans->len); + len = trans->len - mdata->num_xfered; + mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); mtk_spi_setup_packet(master); - cnt = trans->len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, trans->tx_buf, cnt); + cnt = len / 4; + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, + trans->tx_buf + mdata->num_xfered, cnt); - remainder = trans->len % 4; + remainder = len % 4; if (remainder > 0) { reg_val = 0; - memcpy(®_val, trans->tx_buf + (cnt * 4), remainder); + memcpy(®_val, + trans->tx_buf + (cnt * 4) + mdata->num_xfered, + remainder); writel(reg_val, mdata->base + SPI_TX_DATA_REG); } From 8d65f82de9f4cb88c7ee8a7fe1c9112283b5e66d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 21 Jun 2018 15:32:48 -0500 Subject: [PATCH 0709/3715] ipmi:dmi: Ignore IPMI SMBIOS entries with a zero base address [ Upstream commit 1574608f5f4204440d6d9f52b971aba967664764 ] Looking at logs from systems all over the place, it looks like tons of broken systems exist that set the base address to zero. I can only guess that is some sort of non-standard idea to mark the interface as not being present. It can't be zero, anyway, so just complain and ignore it. Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_dmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index c3a23ec3e76f..a37d9794170c 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -197,6 +197,10 @@ static void __init dmi_decode_ipmi(const struct dmi_header *dm) slave_addr = data[DMI_IPMI_SLAVEADDR]; memcpy(&base_addr, data + DMI_IPMI_ADDR, sizeof(unsigned long)); + if (!base_addr) { + pr_err("Base address is zero, assuming no IPMI interface\n"); + return; + } if (len >= DMI_IPMI_VER2_LENGTH) { if (type == IPMI_DMI_TYPE_SSIF) { offset = 0; From 3044fbd7a634933479b27081b1d03715c70fe6e1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 18 Sep 2018 14:09:43 +0800 Subject: [PATCH 0710/3715] net: hns3: fix return type of ndo_start_xmit function [ Upstream commit c9c3941186c5637caed131c4f4064411d6882299 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, also the implementation in this driver has returns 'netdev_tx_t' value, so just change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 3 ++- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index ebc056b9a0fd..84c0f22ac2db 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -424,7 +424,8 @@ static void hip04_start_tx_timer(struct hip04_priv *priv) ns, HRTIMER_MODE_REL); } -static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct hip04_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 25a6c8722eca..aab6fb10af94 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -736,7 +736,7 @@ static int hix5hd2_fill_sg_desc(struct hix5hd2_priv *priv, return 0; } -static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct hix5hd2_priv *priv = netdev_priv(dev); struct hix5hd2_desc *desc; From be0255db201fab9550cd807a1293d4ec722b6cd5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Aug 2018 15:44:48 -0300 Subject: [PATCH 0711/3715] powerpc/iommu: Avoid derefence before pointer check [ Upstream commit 984ecdd68de0fa1f63ce205d6c19ef5a7bc67b40 ] The tbl pointer is being derefenced by IOMMU_PAGE_SIZE prior the check if it is not NULL. Just moving the dereference code to after the check, where there will be guarantee that 'tbl' will not be NULL. Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index af7a20dc6e09..80b6caaa9b92 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); align = 0; if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) From 87227faf46b25dc70ce50c9d05efa2157917c18a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 15 Sep 2018 01:30:45 +1000 Subject: [PATCH 0712/3715] powerpc/64s/hash: Fix stab_rr off by one initialization [ Upstream commit 09b4438db13fa83b6219aee5993711a2aa2a0c64 ] This causes SLB alloation to start 1 beyond the start of the SLB. There is no real problem because after it wraps it stats behaving properly, it's just surprisig to see when looking at SLB traces. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/slb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 6d9bf014b3e7..2502fe3bfb54 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -315,7 +315,7 @@ void slb_initialize(void) #endif } - get_paca()->stab_rr = SLB_NUM_BOLTED; + get_paca()->stab_rr = SLB_NUM_BOLTED - 1; lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | vmalloc_llp; From 2c4c8ad782e4215b6f374535b4cac085e6228aff Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 17 Sep 2018 14:14:02 -0500 Subject: [PATCH 0713/3715] powerpc/pseries: Disable CPU hotplug across migrations [ Upstream commit 85a88cabad57d26d826dd94ea34d3a785824d802 ] When performing partition migrations all present CPUs must be online as all present CPUs must make the H_JOIN call as part of the migration process. Once all present CPUs make the H_JOIN call, one CPU is returned to make the rtas call to perform the migration to the destination system. During testing of migration and changing the SMT state we have found instances where CPUs are offlined, as part of the SMT state change, before they make the H_JOIN call. This results in a hung system where every CPU is either in H_JOIN or offline. To prevent this this patch disables CPU hotplug during the migration process. Signed-off-by: Nathan Fontenot Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/rtas.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 141d192c6953..a01f83ba739e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -984,6 +984,7 @@ int rtas_ibm_suspend_me(u64 handle) goto out; } + cpu_hotplug_disable(); stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -998,6 +999,7 @@ int rtas_ibm_suspend_me(u64 handle) printk(KERN_ERR "Error doing global join\n"); start_topology_update(); + cpu_hotplug_enable(); /* Take down CPUs not online prior to suspend */ cpuret = rtas_offline_cpus_mask(offline_mask); From 9bee4f9f4460db86c47fe01f9671af6a8951efda Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 14 Sep 2018 13:36:48 +0930 Subject: [PATCH 0714/3715] powerpc: Fix duplicate const clang warning in user access code [ Upstream commit e00d93ac9a189673028ac125a74b9bc8ae73eebc ] This re-applies commit b91c1e3e7a6f ("powerpc: Fix duplicate const clang warning in user access code") (Jun 2015) which was undone in commits: f2ca80905929 ("powerpc/sparse: Constify the address pointer in __get_user_nosleep()") (Feb 2017) d466f6c5cac1 ("powerpc/sparse: Constify the address pointer in __get_user_nocheck()") (Feb 2017) f84ed59a612d ("powerpc/sparse: Constify the address pointer in __get_user_check()") (Feb 2017) We see a large number of duplicate const errors in the user access code when building with llvm/clang: include/linux/pagemap.h:576:8: warning: duplicate 'const' declaration specifier [-Wduplicate-decl-specifier] ret = __get_user(c, uaddr); The problem is we are doing const __typeof__(*(ptr)), which will hit the warning if ptr is marked const. Removing const does not seem to have any effect on GCC code generation. Signed-off-by: Anton Blanchard Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 51f00c00d7e4..3865d1d23597 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -234,7 +234,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -248,7 +248,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ barrier_nospec(); \ @@ -262,7 +262,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ From 7dbc3efb7430abdddf0be186bd0ad2611d767d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Mon, 17 Sep 2018 16:07:07 +0200 Subject: [PATCH 0715/3715] RDMA/i40iw: Fix incorrect iterator type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 802fa45cd320de319e86c93bca72abec028ba059 ] Commit f27b4746f378 ("i40iw: add connection management code") uses an incorrect rcu iterator, whilst holding the rtnl_lock. Since the critical region invokes i40iw_manage_qhash(), which is a sleeping function, the rcu locking and traversal cannot be used. Signed-off-by: Håkon Bugge Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index b7f1ce5333cb..880c63579ba8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1667,7 +1667,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, unsigned long flags; rtnl_lock(); - for_each_netdev_rcu(&init_net, ip_dev) { + for_each_netdev(&init_net, ip_dev) { if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) { From 714ab224a8db6e8255c61a42613de9349ceb0bba Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Aug 2018 07:05:21 +0530 Subject: [PATCH 0716/3715] OPP: Protect dev_list with opp_table lock [ Upstream commit 3d2556992a878a2210d3be498416aee39e0c32aa ] The dev_list needs to be protected with a lock, else we may have simultaneous access (addition/removal) to it and that would be racy. Extend scope of the opp_table lock to protect dev_list as well. Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/base/power/opp/core.c | 21 +++++++++++++++++++-- drivers/base/power/opp/cpu.c | 2 ++ drivers/base/power/opp/opp.h | 2 +- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index d5e7e8cc4f22..8100c8769149 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -49,9 +49,14 @@ static struct opp_device *_find_opp_dev(const struct device *dev, static struct opp_table *_find_opp_table_unlocked(struct device *dev) { struct opp_table *opp_table; + bool found; list_for_each_entry(opp_table, &opp_tables, node) { - if (_find_opp_dev(dev, opp_table)) { + mutex_lock(&opp_table->lock); + found = !!_find_opp_dev(dev, opp_table); + mutex_unlock(&opp_table->lock); + + if (found) { _get_opp_table_kref(opp_table); return opp_table; @@ -711,6 +716,8 @@ struct opp_device *_add_opp_dev(const struct device *dev, /* Initialize opp-dev */ opp_dev->dev = dev; + + mutex_lock(&opp_table->lock); list_add(&opp_dev->node, &opp_table->dev_list); /* Create debugfs entries for the opp_table */ @@ -718,6 +725,7 @@ struct opp_device *_add_opp_dev(const struct device *dev, if (ret) dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", __func__, ret); + mutex_unlock(&opp_table->lock); return opp_dev; } @@ -736,6 +744,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev) if (!opp_table) return NULL; + mutex_init(&opp_table->lock); INIT_LIST_HEAD(&opp_table->dev_list); opp_dev = _add_opp_dev(dev, opp_table); @@ -757,7 +766,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev) BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); - mutex_init(&opp_table->lock); kref_init(&opp_table->kref); /* Secure the device table modification */ @@ -799,6 +807,10 @@ static void _opp_table_kref_release(struct kref *kref) if (!IS_ERR(opp_table->clk)) clk_put(opp_table->clk); + /* + * No need to take opp_table->lock here as we are guaranteed that no + * references to the OPP table are taken at this point. + */ opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, node); @@ -1702,6 +1714,9 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, { struct dev_pm_opp *opp, *tmp; + /* Protect dev_list */ + mutex_lock(&opp_table->lock); + /* Find if opp_table manages a single device */ if (list_is_singular(&opp_table->dev_list)) { /* Free static OPPs */ @@ -1712,6 +1727,8 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, } else { _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); } + + mutex_unlock(&opp_table->lock); } void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 2d87bc1adf38..66e406bd4d62 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -222,8 +222,10 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) cpumask_clear(cpumask); if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) { + mutex_lock(&opp_table->lock); list_for_each_entry(opp_dev, &opp_table->dev_list, node) cpumask_set_cpu(opp_dev->dev->id, cpumask); + mutex_unlock(&opp_table->lock); } else { cpumask_set_cpu(cpu_dev->id, cpumask); } diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 166eef990599..0a206c6b9086 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -124,7 +124,7 @@ enum opp_table_access { * @dev_list: list of devices that share these OPPs * @opp_list: table of opps * @kref: for reference count of the table. - * @lock: mutex protecting the opp_list. + * @lock: mutex protecting the opp_list and dev_list. * @np: struct device_node pointer for opp's DT node. * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. From dcd758c0346d1f03fca83ac4f5b8b1f589a3cda1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 15:16:22 -0500 Subject: [PATCH 0717/3715] libfdt: Ensure INT_MAX is defined in libfdt_env.h [ Upstream commit 53dd9dce6979bc54d64a3a09a2fb20187a025be7 ] The next update of libfdt has a new dependency on INT_MAX. Update the instances of libfdt_env.h in the kernel to either include the necessary header with the definition or define it locally. Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- arch/arm/boot/compressed/libfdt_env.h | 2 ++ arch/powerpc/boot/libfdt_env.h | 2 ++ include/linux/libfdt_env.h | 1 + 3 files changed, 5 insertions(+) diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index 07437816e098..b36c0289a308 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -6,6 +6,8 @@ #include #include +#define INT_MAX ((int)(~0U>>1)) + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index f52c31b1f48f..39155d3b2cef 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -5,6 +5,8 @@ #include #include +#define INT_MAX ((int)(~0U>>1)) + #include "of.h" typedef u32 uint32_t; diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 14997285e53d..1aa707ab19bb 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -2,6 +2,7 @@ #ifndef _LIBFDT_ENV_H #define _LIBFDT_ENV_H +#include /* For INT_MAX */ #include #include From b1fdcfbdb93cc899d19091a2385edb40a07420e3 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Mon, 17 Sep 2018 07:20:35 +0200 Subject: [PATCH 0718/3715] power: supply: twl4030_charger: fix charging current out-of-bounds [ Upstream commit 8314c212f995bc0d06b54ad02ef0ab4089781540 ] the charging current uses unsigned int variables, if we step back if the current is still low, we would run into negative which means setting the target to a huge value. Better add checks here. Signed-off-by: Andreas Kemnade Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/twl4030_charger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index 0cc12bfe7b02..d3cba954bab5 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -420,7 +420,8 @@ static void twl4030_current_worker(struct work_struct *data) if (v < USB_MIN_VOLT) { /* Back up and stop adjusting. */ - bci->usb_cur -= USB_CUR_STEP; + if (bci->usb_cur >= USB_CUR_STEP) + bci->usb_cur -= USB_CUR_STEP; bci->usb_cur_target = bci->usb_cur; } else if (bci->usb_cur >= bci->usb_cur_target || bci->usb_cur + USB_CUR_STEP > USB_MAX_CURRENT) { From 2ed6502bd9dd7f0dd1f0e0e6381d545a4fdcfa79 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Mon, 17 Sep 2018 07:00:07 +0200 Subject: [PATCH 0719/3715] power: supply: twl4030_charger: disable eoc interrupt on linear charge [ Upstream commit 079cdff3d0a09c5da10ae1be35def7a116776328 ] This avoids getting woken up from suspend after power interruptions when the bci wrongly thinks the battery is full just because of input current going low because of low input power Signed-off-by: Andreas Kemnade Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/twl4030_charger.c | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index d3cba954bab5..b20491016b1e 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -440,6 +440,7 @@ static void twl4030_current_worker(struct work_struct *data) static int twl4030_charger_enable_usb(struct twl4030_bci *bci, bool enable) { int ret; + u32 reg; if (bci->usb_mode == CHARGE_OFF) enable = false; @@ -453,14 +454,38 @@ static int twl4030_charger_enable_usb(struct twl4030_bci *bci, bool enable) bci->usb_enabled = 1; } - if (bci->usb_mode == CHARGE_AUTO) + if (bci->usb_mode == CHARGE_AUTO) { + /* Enable interrupts now. */ + reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_ICHGEOC | + TWL4030_TBATOR2 | TWL4030_TBATOR1 | + TWL4030_BATSTS); + ret = twl_i2c_write_u8(TWL4030_MODULE_INTERRUPTS, reg, + TWL4030_INTERRUPTS_BCIIMR1A); + if (ret < 0) { + dev_err(bci->dev, + "failed to unmask interrupts: %d\n", + ret); + return ret; + } /* forcing the field BCIAUTOUSB (BOOT_BCI[1]) to 1 */ ret = twl4030_clear_set_boot_bci(0, TWL4030_BCIAUTOUSB); + } /* forcing USBFASTMCHG(BCIMFSTS4[2]) to 1 */ ret = twl4030_clear_set(TWL_MODULE_MAIN_CHARGE, 0, TWL4030_USBFASTMCHG, TWL4030_BCIMFSTS4); if (bci->usb_mode == CHARGE_LINEAR) { + /* Enable interrupts now. */ + reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_TBATOR2 | + TWL4030_TBATOR1 | TWL4030_BATSTS); + ret = twl_i2c_write_u8(TWL4030_MODULE_INTERRUPTS, reg, + TWL4030_INTERRUPTS_BCIIMR1A); + if (ret < 0) { + dev_err(bci->dev, + "failed to unmask interrupts: %d\n", + ret); + return ret; + } twl4030_clear_set_boot_bci(TWL4030_BCIAUTOAC|TWL4030_CVENAC, 0); /* Watch dog key: WOVF acknowledge */ ret = twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE, 0x33, From c2e8f9ed18237079105c9adc8f01942c2a58a6a2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Sep 2018 18:23:39 +0800 Subject: [PATCH 0720/3715] net: toshiba: fix return type of ndo_start_xmit function [ Upstream commit bacade822524e02f662d88f784d2ae821a5546fb ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 4 ++-- drivers/net/ethernet/toshiba/ps3_gelic_net.h | 2 +- drivers/net/ethernet/toshiba/spider_net.c | 4 ++-- drivers/net/ethernet/toshiba/tc35815.c | 6 ++++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 88d74aef218a..75237c81c63d 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -845,9 +845,9 @@ static int gelic_card_kick_txdma(struct gelic_card *card, * @skb: packet to send out * @netdev: interface device structure * - * returns 0 on success, <0 on failure + * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure */ -int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) +netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct gelic_card *card = netdev_card(netdev); struct gelic_descr *descr; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 003d0452d9cb..fbbf9b54b173 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -370,7 +370,7 @@ void gelic_card_up(struct gelic_card *card); void gelic_card_down(struct gelic_card *card); int gelic_net_open(struct net_device *netdev); int gelic_net_stop(struct net_device *netdev); -int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); +netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); void gelic_net_tx_timeout(struct net_device *netdev); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index cec9e70ab995..da136b8843dd 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -880,9 +880,9 @@ out: * @skb: packet to send out * @netdev: interface device structure * - * returns 0 on success, !0 on failure + * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure */ -static int +static netdev_tx_t spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) { int cnt; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 9146068979d2..03afc4d8c3ec 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -474,7 +474,8 @@ static void free_rxbuf_skb(struct pci_dev *hwdev, struct sk_buff *skb, dma_addr_ /* Index to functions, as function prototypes. */ static int tc35815_open(struct net_device *dev); -static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t tc35815_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t tc35815_interrupt(int irq, void *dev_id); static int tc35815_rx(struct net_device *dev, int limit); static int tc35815_poll(struct napi_struct *napi, int budget); @@ -1248,7 +1249,8 @@ tc35815_open(struct net_device *dev) * invariant will hold if you make sure that the netif_*_queue() * calls are done at the proper times. */ -static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +tc35815_send_packet(struct sk_buff *skb, struct net_device *dev) { struct tc35815_local *lp = netdev_priv(dev); struct TxFD *txfd; From 2cef1eda48328fef25062c75b691b49cee473561 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Sep 2018 18:32:40 +0800 Subject: [PATCH 0721/3715] net: xilinx: fix return type of ndo_start_xmit function [ Upstream commit 81255af8d9d5565004792c295dde49344df450ca ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/ll_temac_main.c | 3 ++- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 3 ++- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 9 +++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60abc9250f56..2241f9897092 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -674,7 +674,8 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag) return 0; } -static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); struct cdmac_bd *cur_p; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b481cb174b23..9ccd08a051f6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -657,7 +657,8 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, * start the transmission. Additionally if checksum offloading is supported, * it populates AXI Stream Control fields with appropriate values. */ -static int axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) { u32 ii; u32 num_frag; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 69e31ceccfae..6f3e79159d7a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1005,9 +1005,10 @@ static int xemaclite_close(struct net_device *dev) * deferred and the Tx queue is stopped so that the deferred socket buffer can * be transmitted when the Emaclite device is free to transmit data. * - * Return: 0, always. + * Return: NETDEV_TX_OK, always. */ -static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) +static netdev_tx_t +xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) { struct net_local *lp = netdev_priv(dev); struct sk_buff *new_skb; @@ -1028,7 +1029,7 @@ static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) /* Take the time stamp now, since we can't do this in an ISR. */ skb_tx_timestamp(new_skb); spin_unlock_irqrestore(&lp->reset_lock, flags); - return 0; + return NETDEV_TX_OK; } spin_unlock_irqrestore(&lp->reset_lock, flags); @@ -1037,7 +1038,7 @@ static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) dev->stats.tx_bytes += len; dev_consume_skb_any(new_skb); - return 0; + return NETDEV_TX_OK; } /** From 70c3daaa03e965d3f2bd10696be130b376f73bca Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Sep 2018 18:45:12 +0800 Subject: [PATCH 0722/3715] net: broadcom: fix return type of ndo_start_xmit function [ Upstream commit 0c13b8d1aee87c35a2fbc1d85a1f766227cf54b5 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 5 +++-- drivers/net/ethernet/broadcom/sb1250-mac.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 68470c7c630a..35eb0119b015 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -571,12 +571,13 @@ static irqreturn_t bcm_enet_isr_dma(int irq, void *dev_id) /* * tx request callback */ -static int bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcm_enet_priv *priv; struct bcm_enet_desc *desc; u32 len_stat; - int ret; + netdev_tx_t ret; priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index ecdef42f0ae6..00230fe097d9 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -299,7 +299,7 @@ static enum sbmac_state sbmac_set_channel_state(struct sbmac_softc *, static void sbmac_promiscuous_mode(struct sbmac_softc *sc, int onoff); static uint64_t sbmac_addr2reg(unsigned char *ptr); static irqreturn_t sbmac_intr(int irq, void *dev_instance); -static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev); static void sbmac_setmulti(struct sbmac_softc *sc); static int sbmac_init(struct platform_device *pldev, long long base); static int sbmac_set_speed(struct sbmac_softc *s, enum sbmac_speed speed); @@ -2028,7 +2028,7 @@ static irqreturn_t sbmac_intr(int irq,void *dev_instance) * Return value: * nothing ********************************************************************* */ -static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev) { struct sbmac_softc *sc = netdev_priv(dev); unsigned long flags; From 804a9a60568afd7aa9c9a511e30c460d615157b2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Sep 2018 18:50:17 +0800 Subject: [PATCH 0723/3715] net: amd: fix return type of ndo_start_xmit function [ Upstream commit fe72352e37ae8478f4c97975a9831f0c50f22e73 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/am79c961a.c | 2 +- drivers/net/ethernet/amd/atarilance.c | 6 ++++-- drivers/net/ethernet/amd/declance.c | 2 +- drivers/net/ethernet/amd/sun3lance.c | 6 ++++-- drivers/net/ethernet/amd/sunlance.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++-- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index b11e910850f7..78d1e5385a3e 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -440,7 +440,7 @@ static void am79c961_timeout(struct net_device *dev) /* * Transmit a packet */ -static int +static netdev_tx_t am79c961_sendpacket(struct sk_buff *skb, struct net_device *dev) { struct dev_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index c5b81268c284..d3d44e07afbc 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -339,7 +339,8 @@ static unsigned long lance_probe1( struct net_device *dev, struct lance_addr *init_rec ); static int lance_open( struct net_device *dev ); static void lance_init_ring( struct net_device *dev ); -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ); +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t lance_interrupt( int irq, void *dev_id ); static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); @@ -769,7 +770,8 @@ static void lance_tx_timeout (struct net_device *dev) /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ) +static netdev_tx_t +lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); struct lance_ioreg *IO = lp->iobase; diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index c7cde58feaf7..290d070b293b 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -893,7 +893,7 @@ static void lance_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index 77b1db267730..da7e3d4f4166 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -236,7 +236,8 @@ struct lance_private { static int lance_probe( struct net_device *dev); static int lance_open( struct net_device *dev ); static void lance_init_ring( struct net_device *dev ); -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ); +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t lance_interrupt( int irq, void *dev_id); static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); @@ -511,7 +512,8 @@ static void lance_init_ring( struct net_device *dev ) } -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ) +static netdev_tx_t +lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); int entry, len; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 9845e07d40cd..1a44c8c26b8a 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1106,7 +1106,7 @@ static void lance_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); int entry, skblen, len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 75c4455e2271..c65d2cdcc7cf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1964,7 +1964,7 @@ static int xgbe_close(struct net_device *netdev) return 0; } -static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; @@ -1973,7 +1973,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) struct xgbe_ring *ring; struct xgbe_packet_data *packet; struct netdev_queue *txq; - int ret; + netdev_tx_t ret; DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len); From 53a5d204161bb0ffa4a4791be3ccb92e5b9b7956 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 19 Sep 2018 19:21:32 +0800 Subject: [PATCH 0724/3715] net: sun: fix return type of ndo_start_xmit function [ Upstream commit 0e0cc31f6999df18bb5cfd0bd83c892ed5633975 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Found by coccinelle. Signed-off-by: YueHaibing Acked-by: Shannon Nelson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- drivers/net/ethernet/sun/sunbmac.c | 3 ++- drivers/net/ethernet/sun/sunqe.c | 2 +- drivers/net/ethernet/sun/sunvnet.c | 2 +- drivers/net/ethernet/sun/sunvnet_common.c | 14 ++++++++------ drivers/net/ethernet/sun/sunvnet_common.h | 7 ++++--- 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5b56c24b6ed2..e6b96c2989b2 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -111,7 +111,7 @@ static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb, } /* Wrappers to common functions */ -static int vsw_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vsw_start_xmit(struct sk_buff *skb, struct net_device *dev) { return sunvnet_start_xmit_common(skb, dev, vsw_tx_port_find); } diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 3189722110c2..9a60fb2b4e9d 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -951,7 +951,8 @@ static void bigmac_tx_timeout(struct net_device *dev) } /* Put a packet on the wire. */ -static int bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bigmac *bp = netdev_priv(dev); int len, entry; diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index a6bcdcdd947e..82386a375bd2 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -569,7 +569,7 @@ out: } /* Get a packet queued to go onto the wire. */ -static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t qe_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sunqe *qep = netdev_priv(dev); struct sunqe_buffers *qbufs = qep->buffers; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 65347d2f139b..02ebbe74d93d 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -245,7 +245,7 @@ static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb, } /* Wrappers to common functions */ -static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) { return sunvnet_start_xmit_common(skb, dev, vnet_tx_port_find); } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index ecf456c7b6d1..fd84ff8bba31 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1215,9 +1215,10 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) return skb; } -static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)) +static netdev_tx_t +vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)) { struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; @@ -1320,9 +1321,10 @@ out_dropped: return NETDEV_TX_OK; } -int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)) +netdev_tx_t +sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)) { struct vnet_port *port = NULL; struct vio_dring_state *dr; diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 6a4dd1fb19bf..3fcb608fbbb3 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -136,9 +136,10 @@ int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); void sunvnet_tx_timeout_common(struct net_device *dev); -int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)); +netdev_tx_t +sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)); #ifdef CONFIG_NET_POLL_CONTROLLER void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp); #endif From f860e746fb08df1dcd3983fe9b604cb5e79b6b52 Mon Sep 17 00:00:00 2001 From: Fuyun Liang Date: Wed, 19 Sep 2018 18:29:54 +0100 Subject: [PATCH 0725/3715] net: hns3: Fix for setting speed for phy failed problem [ Upstream commit fd8133148eb6a733f9cfdaecd4d99f378e21d582 ] The function of genphy_read_status is that reading phy information from HW and using these information to update SW variable. If user is using ethtool to setting the speed of phy and service task is calling by hclge_get_mac_phy_link, the result of speed setting is uncertain. Because ethtool cmd will modified phydev and hclge_get_mac_phy_link also will modified phydev. Because phy state machine will update phy link periodically, we can just use phydev->link to check the link status. This patch removes function call of genphy_read_status. To ensure accuracy, this patch adds a phy state check. If phy state is not PHY_RUNNING, we consider link is down. Because in some scenarios, phydev->link may be link up, but phy state is not PHY_RUNNING. This is just an intermediate state. In fact, the link is not ready yet. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Fuyun Liang Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 86523e8993cb..3bb6181ff054 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2179,7 +2179,7 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev) mac_state = hclge_get_mac_link_status(hdev); if (hdev->hw.mac.phydev) { - if (!genphy_read_status(hdev->hw.mac.phydev)) + if (hdev->hw.mac.phydev->state == PHY_RUNNING) link_stat = mac_state & hdev->hw.mac.phydev->link; else From 947ef993e0cbd389f25c6deb545fc78bb55f7f55 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 19 Sep 2018 18:29:58 +0100 Subject: [PATCH 0726/3715] net: hns3: Fix parameter type for q_id in hclge_tm_q_to_qs_map_cfg() [ Upstream commit 32c7fbc8ffd752c6aa05d2dd7c13b0f0aa00ddaa ] So far all the places calling hclge_tm_q_to_qs_map_cfg() are assigning an u16 type value to "q_id", and in the processing of hclge_tm_q_to_qs_map_cfg(), it also converts the "q_id" to le16. The max tqp number for pf can be more than 256, we should use "u16" to store the queue id, instead of "u8", which may cause data lost. Fixes: 848440544b41 ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Signed-off-by: Jian Shen Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 55228b91d80b..3799cb2548ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -200,7 +200,7 @@ static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, } static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev, - u8 q_id, u16 qs_id) + u16 q_id, u16 qs_id) { struct hclge_nq_to_qs_link_cmd *map; struct hclge_desc desc; From 93c6eddad8473bc6582b865200add350ddab40ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 19 Sep 2018 14:42:50 -0700 Subject: [PATCH 0727/3715] nfp: provide a better warning when ring allocation fails [ Upstream commit 23d9f5531c7c28546954b0bf332134a9b8a38c0a ] NFP supports fairly enormous ring sizes (up to 256k descriptors). In commit 466271703867 ("nfp: use kvcalloc() to allocate SW buffer descriptor arrays") we have started using kvcalloc() functions to make sure the allocation of software state arrays doesn't hit the MAX_ORDER limit. Unfortunately, we can't use virtual mappings for the DMA region holding HW descriptors. In case this allocation fails instead of the generic (and fairly scary) warning/splat in the logs print a helpful message explaining what happened and suggesting how to fix it. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/netronome/nfp/nfp_net_common.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 6df2c8b2ce6f..bffa25d6dc29 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2169,9 +2169,13 @@ nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); - if (!tx_ring->txds) + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->txds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); goto err_alloc; + } sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); @@ -2314,9 +2318,13 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->rxds) + &rx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!rx_ring->rxds) { + netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + rx_ring->cnt); goto err_alloc; + } sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); From c4d64f2d93e1f16c3d8efc3d95461f7f6dbdff65 Mon Sep 17 00:00:00 2001 From: Nicolas Adell Date: Mon, 27 Aug 2018 15:59:56 +0200 Subject: [PATCH 0728/3715] usb: chipidea: imx: enable OTG overcurrent in case USB subsystem is already started [ Upstream commit 1dedbdf2bbb1ede8d96f35f9845ecae179dc1988 ] When initializing the USB subsystem before starting the kernel, OTG overcurrent detection is disabled. In case the OTG polarity of overcurrent is low active, the overcurrent detection is never enabled again and events cannot be reported as expected. Because imx usb overcurrent polarity is low active by default, only detection needs to be enable in usbmisc init function. Signed-off-by: Nicolas Adell Signed-off-by: Peter Chen Signed-off-by: Sasha Levin --- drivers/usb/chipidea/usbmisc_imx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 9f4a0185dd60..b7477fd4443a 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -343,6 +343,8 @@ static int usbmisc_imx6q_init(struct imx_usbmisc_data *data) } else if (data->oc_polarity == 1) { /* High active */ reg &= ~(MX6_BM_OVER_CUR_DIS | MX6_BM_OVER_CUR_POLARITY); + } else { + reg &= ~(MX6_BM_OVER_CUR_DIS); } writel(reg, usbmisc->base + data->index * 4); From 0407eece7ea1d27a3206f90dfbf30873c72cc3da Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 4 Sep 2018 17:18:58 +0200 Subject: [PATCH 0729/3715] usb: chipidea: Fix otg event handler [ Upstream commit 59739131e0ca06db7560f9073fff2fb83f6bc2a5 ] At OTG work running time, it's possible that several events need to be addressed (e.g. ID and VBUS events). The current implementation handles only one event at a time which leads to ignoring the other one. Fix it. Signed-off-by: Loic Poulain Signed-off-by: Peter Chen Signed-off-by: Sasha Levin --- drivers/usb/chipidea/otg.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 10236fe71522..8bf4032226ed 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -206,14 +206,17 @@ static void ci_otg_work(struct work_struct *work) } pm_runtime_get_sync(ci->dev); + if (ci->id_event) { ci->id_event = false; ci_handle_id_switch(ci); - } else if (ci->b_sess_valid_event) { + } + + if (ci->b_sess_valid_event) { ci->b_sess_valid_event = false; ci_handle_vbus_change(ci); - } else - dev_err(ci->dev, "unexpected event occurs at %s\n", __func__); + } + pm_runtime_put_sync(ci->dev); enable_irq(ci->irq); From 18b30a7a81508d85768c46ea61f720f6a5a64de4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:24 +0300 Subject: [PATCH 0730/3715] mlxsw: spectrum: Init shaper for TCs 8..15 [ Upstream commit a9f36656b519a9a21309793c306941a3cd0eeb8f ] With introduction of MC-aware mode to mlxsw, it became necessary to configure TCs above 7 as well. There is now code in mlxsw to disable ETS for these higher classes, but disablement of max shaper was neglected. By default, max shaper is currently disabled to begin with, so the problem is just cosmetic. However, for symmetry, do like we do for ETS configuration, and call mlxsw_sp_port_ets_maxrate_set() for both TC i and i + 8. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a909aa315a92..226187cba0e8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2825,6 +2825,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_MAS_DIS); if (err) return err; + + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; } /* Map all priorities to traffic class 0. */ From 581bcaf84e4b652655ac3d9de948d1e708147fba Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Sat, 8 Sep 2018 17:33:40 -0500 Subject: [PATCH 0731/3715] ARM: dts: am335x-evm: fix number of cpsw [ Upstream commit dcbf6b18d81bcdc51390ca1b258c17e2e13b7d0c ] am335x-evm has only one CPSW external port physically wired, but DT defines 2 ext. ports. As result, PHY connection failure reported for the second ext. port. Update DT to reflect am335x-evm board HW configuration, and, while here, switch to use phy-handle instead of phy_id. Signed-off-by: Grygorii Strashko Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am335x-evm.dts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index 478434ebff92..27ff3e689e96 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -724,6 +724,7 @@ pinctrl-0 = <&cpsw_default>; pinctrl-1 = <&cpsw_sleep>; status = "okay"; + slaves = <1>; }; &davinci_mdio { @@ -731,15 +732,14 @@ pinctrl-0 = <&davinci_mdio_default>; pinctrl-1 = <&davinci_mdio_sleep>; status = "okay"; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; }; &cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; - phy-mode = "rgmii-txid"; -}; - -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; + phy-handle = <ðphy0>; phy-mode = "rgmii-txid"; }; From 47df751764700abe7986e4d0cb07d6a5be481c65 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 17:41:30 +0800 Subject: [PATCH 0732/3715] f2fs: fix to recover inode's uid/gid during POR [ Upstream commit dc4cd1257c86451cec3e8e352cc376348e4f4af4 ] Step to reproduce this bug: 1. logon as root 2. mount -t f2fs /dev/sdd /mnt; 3. touch /mnt/file; 4. chown system /mnt/file; chgrp system /mnt/file; 5. xfs_io -f /mnt/file -c "fsync"; 6. godown /mnt; 7. umount /mnt; 8. mount -t f2fs /dev/sdd /mnt; After step 8) we will expect file's uid/gid are all system, but during recovery, these two fields were not been recovered, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index db357e9ad599..adbf2600c090 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -201,6 +201,8 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From abaa421d5dc06dde5fa291f47bb6f0c5c92b30c4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 Jun 2018 09:50:09 +0200 Subject: [PATCH 0733/3715] ARM: dts: ux500: Correct SCU unit address [ Upstream commit 2f217d24ecaec2012e628d21e244eef0608656a4 ] The unit address of the Cortex-A9 SCU device node contains one zero too many. Remove it. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ste-dbx5x0.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 2310a4e97768..3dc0028e108b 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -197,7 +197,7 @@ <0xa0410100 0x100>; }; - scu@a04100000 { + scu@a0410000 { compatible = "arm,cortex-a9-scu"; reg = <0xa0410000 0x100>; }; From f4b69c0e008b06e50c2f56796d70ef9cbb1c88ae Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 3 Jul 2018 10:30:03 +0200 Subject: [PATCH 0734/3715] ARM: dts: ux500: Fix LCDA clock line muxing [ Upstream commit ecde29569e3484e1d0a032bf4074449bce4d4a03 ] The "lcdaclk_b_1" group is muxed with the function "lcd" but needs a separate entry to be muxed in with "lcda" rather than "lcd". Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ste-href-family-pinctrl.dtsi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi b/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi index 5c5cea232743..1ec193b0c506 100644 --- a/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi +++ b/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi @@ -607,16 +607,20 @@ mcde { lcd_default_mode: lcd_default { - default_mux { + default_mux1 { /* Mux in VSI0 and all the data lines */ function = "lcd"; groups = "lcdvsi0_a_1", /* VSI0 for LCD */ "lcd_d0_d7_a_1", /* Data lines */ "lcd_d8_d11_a_1", /* TV-out */ - "lcdaclk_b_1", /* Clock line for TV-out */ "lcdvsi1_a_1"; /* VSI1 for HDMI */ }; + default_mux2 { + function = "lcda"; + groups = + "lcdaclk_b_1"; /* Clock line for TV-out */ + }; default_cfg1 { pins = "GPIO68_E1", /* VSI0 */ From bb4c022b0d53d80f74046dea8bfcf0c1759126de Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:34 -0500 Subject: [PATCH 0735/3715] ARM: dts: ste: Fix SPI controller node names [ Upstream commit 2f967f9e9fa076affb711da1a8389b5d33814fc6 ] SPI controller nodes should be named 'spi' rather than 'ssp'. Fixing the name enables dtc SPI bus checks. Signed-off-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ste-dbx5x0.dtsi | 4 ++-- arch/arm/boot/dts/ste-hrefprev60.dtsi | 2 +- arch/arm/boot/dts/ste-snowball.dts | 2 +- arch/arm/boot/dts/ste-u300.dts | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 3dc0028e108b..986767735e24 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -878,7 +878,7 @@ power-domains = <&pm_domains DOMAIN_VAPE>; }; - ssp@80002000 { + spi@80002000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x80002000 0x1000>; interrupts = ; @@ -892,7 +892,7 @@ power-domains = <&pm_domains DOMAIN_VAPE>; }; - ssp@80003000 { + spi@80003000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x80003000 0x1000>; interrupts = ; diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi index 3f14b4df69b4..94eeb7f1c947 100644 --- a/arch/arm/boot/dts/ste-hrefprev60.dtsi +++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi @@ -57,7 +57,7 @@ }; }; - ssp@80002000 { + spi@80002000 { /* * On the first generation boards, this SSP/SPI port was connected * to the AB8500. diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index ade1d0d4e5f4..1bf4358f8fa7 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -376,7 +376,7 @@ pinctrl-1 = <&i2c3_sleep_mode>; }; - ssp@80002000 { + spi@80002000 { pinctrl-names = "default"; pinctrl-0 = <&ssp0_snowball_mode>; }; diff --git a/arch/arm/boot/dts/ste-u300.dts b/arch/arm/boot/dts/ste-u300.dts index 62ecb6a2fa39..1bd1aba3322f 100644 --- a/arch/arm/boot/dts/ste-u300.dts +++ b/arch/arm/boot/dts/ste-u300.dts @@ -442,7 +442,7 @@ dma-names = "rx"; }; - spi: ssp@c0006000 { + spi: spi@c0006000 { compatible = "arm,pl022", "arm,primecell"; reg = <0xc0006000 0x1000>; interrupt-parent = <&vica>; From 0abe78e62eada89a6ca4d1e97a7c99eb572bce7e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 20 Sep 2018 16:13:37 -0700 Subject: [PATCH 0736/3715] spi: pic32: Use proper enum in dmaengine_prep_slave_rg [ Upstream commit 8cfde7847d5ed0bb77bace41519572963e43cd17 ] Clang warns when one enumerated type is converted implicitly to another: drivers/spi/spi-pic32.c:323:8: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] DMA_FROM_DEVICE, ^~~~~~~~~~~~~~~ drivers/spi/spi-pic32.c:333:8: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] DMA_TO_DEVICE, ^~~~~~~~~~~~~ 2 warnings generated. Use the proper enums from dma_transfer_direction (DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2, DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1) to satify Clang. Link: https://github.com/ClangBuiltLinux/linux/issues/159 Signed-off-by: Nathan Chancellor Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-pic32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c index f8a45af1fa9f..288002f6c613 100644 --- a/drivers/spi/spi-pic32.c +++ b/drivers/spi/spi-pic32.c @@ -320,7 +320,7 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s, desc_rx = dmaengine_prep_slave_sg(master->dma_rx, xfer->rx_sg.sgl, xfer->rx_sg.nents, - DMA_FROM_DEVICE, + DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc_rx) { ret = -EINVAL; @@ -330,7 +330,7 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s, desc_tx = dmaengine_prep_slave_sg(master->dma_tx, xfer->tx_sg.sgl, xfer->tx_sg.nents, - DMA_TO_DEVICE, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc_tx) { ret = -EINVAL; From 49b12aea4f443aedf531403141eb8862f28734ec Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 15 Sep 2018 21:38:24 -0700 Subject: [PATCH 0737/3715] cpufeature: avoid warning when compiling with clang [ Upstream commit c785896b21dd8e156326ff660050b0074d3431df ] The table id (second) argument to MODULE_DEVICE_TABLE is often referenced otherwise. This is not the case for CPU features. This leads to warnings when building the kernel with Clang: arch/arm/crypto/aes-ce-glue.c:450:1: warning: variable 'cpu_feature_match_AES' is not needed and will not be emitted [-Wunneeded-internal-declaration] module_cpu_feature_match(AES, aes_init); ^ Avoid warnings by using __maybe_unused, similar to commit 1f318a8bafcf ("modules: mark __inittest/__exittest as __maybe_unused"). Fixes: 67bad2fdb754 ("cpu: add generic support for CPU feature based module autoloading") Signed-off-by: Stefan Agner Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- include/linux/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h index 986c06c88d81..84d3c81b5978 100644 --- a/include/linux/cpufeature.h +++ b/include/linux/cpufeature.h @@ -45,7 +45,7 @@ * 'asm/cpufeature.h' of your favorite architecture. */ #define module_cpu_feature_match(x, __initfunc) \ -static struct cpu_feature const cpu_feature_match_ ## x[] = \ +static struct cpu_feature const __maybe_unused cpu_feature_match_ ## x[] = \ { { .feature = cpu_feature(x) }, { } }; \ MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ \ From a9791599935f620b3767161b3400fb6515a600cf Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 15 Sep 2018 21:38:25 -0700 Subject: [PATCH 0738/3715] crypto: arm/crc32 - avoid warning when compiling with Clang [ Upstream commit cd560235d8f9ddd94aa51e1c4dabdf3212b9b241 ] The table id (second) argument to MODULE_DEVICE_TABLE is often referenced otherwise. This is not the case for CPU features. This leads to a warning when building the kernel with Clang: arch/arm/crypto/crc32-ce-glue.c:239:33: warning: variable 'crc32_cpu_feature' is not needed and will not be emitted [-Wunneeded-internal-declaration] static const struct cpu_feature crc32_cpu_feature[] = { ^ Avoid warnings by using __maybe_unused, similar to commit 1f318a8bafcf ("modules: mark __inittest/__exittest as __maybe_unused"). Fixes: 2a9faf8b7e43 ("crypto: arm/crc32 - enable module autoloading based on CPU feature bits") Signed-off-by: Stefan Agner Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/arm/crypto/crc32-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 96e62ec105d0..cd9e93b46c2d 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void) ARRAY_SIZE(crc32_pmull_algs)); } -static const struct cpu_feature crc32_cpu_feature[] = { +static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = { { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } }; MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); From 1190bbd4687704be2a1f352869ae6d6c05bbd00c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:31 -0500 Subject: [PATCH 0739/3715] ARM: dts: marvell: Fix SPI and I2C bus warnings [ Upstream commit cf680cc5251487b9a39919c3cda31a108af19cf8 ] dtc has new checks for I2C and SPI buses. Fix the warnings in node names and unit-addresses. arch/arm/boot/dts/dove-cubox.dtb: Warning (i2c_bus_reg): /i2c-mux/i2c@0/clock-generator: I2C bus unit address format error, expected "60" arch/arm/boot/dts/dove-cubox-es.dtb: Warning (i2c_bus_reg): /i2c-mux/i2c@0/clock-generator: I2C bus unit address format error, expected "60" arch/arm/boot/dts/dove-cubox.dtb: Warning (spi_bus_bridge): /mbus/internal-regs/spi-ctrl@10600: node name for SPI buses should be 'spi' arch/arm/boot/dts/dove-cubox-es.dtb: Warning (spi_bus_bridge): /mbus/internal-regs/spi-ctrl@10600: node name for SPI buses should be 'spi' arch/arm/boot/dts/dove-dove-db.dtb: Warning (spi_bus_bridge): /mbus/internal-regs/spi-ctrl@10600: node name for SPI buses should be 'spi' arch/arm/boot/dts/dove-sbc-a510.dtb: Warning (spi_bus_bridge): /mbus/internal-regs/spi-ctrl@10600: node name for SPI buses should be 'spi' arch/arm/boot/dts/dove-sbc-a510.dtb: Warning (spi_bus_bridge): /mbus/internal-regs/spi-ctrl@14600: node name for SPI buses should be 'spi' arch/arm/boot/dts/orion5x-kuroboxpro.dtb: Warning (i2c_bus_reg): /soc/internal-regs/i2c@11000/rtc: I2C bus unit address format error, expected "32" arch/arm/boot/dts/orion5x-linkstation-lschl.dtb: Warning (i2c_bus_reg): /soc/internal-regs/i2c@11000/rtc: I2C bus unit address format error, expected "32" arch/arm/boot/dts/orion5x-linkstation-lsgl.dtb: Warning (i2c_bus_reg): /soc/internal-regs/i2c@11000/rtc: I2C bus unit address format error, expected "32" arch/arm/boot/dts/orion5x-linkstation-lswtgl.dtb: Warning (i2c_bus_reg): /soc/internal-regs/i2c@11000/rtc: I2C bus unit address format error, expected "32" Cc: Jason Cooper Cc: Andrew Lunn Cc: Sebastian Hesselbarth Cc: Gregory Clement Signed-off-by: Rob Herring Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/dove-cubox.dts | 2 +- arch/arm/boot/dts/dove.dtsi | 6 +++--- arch/arm/boot/dts/orion5x-linkstation.dtsi | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts index 580e3cbcfbf7..3e1584e787ae 100644 --- a/arch/arm/boot/dts/dove-cubox.dts +++ b/arch/arm/boot/dts/dove-cubox.dts @@ -87,7 +87,7 @@ status = "okay"; clock-frequency = <100000>; - si5351: clock-generator { + si5351: clock-generator@60 { compatible = "silabs,si5351a-msop"; reg = <0x60>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi index f4a07bb7c3a2..c78471b05ab4 100644 --- a/arch/arm/boot/dts/dove.dtsi +++ b/arch/arm/boot/dts/dove.dtsi @@ -155,7 +155,7 @@ 0xffffe000 MBUS_ID(0x03, 0x01) 0 0x0000800 /* CESA SRAM 2k */ 0xfffff000 MBUS_ID(0x0d, 0x00) 0 0x0000800>; /* PMU SRAM 2k */ - spi0: spi-ctrl@10600 { + spi0: spi@10600 { compatible = "marvell,orion-spi"; #address-cells = <1>; #size-cells = <0>; @@ -168,7 +168,7 @@ status = "disabled"; }; - i2c: i2c-ctrl@11000 { + i2c: i2c@11000 { compatible = "marvell,mv64xxx-i2c"; reg = <0x11000 0x20>; #address-cells = <1>; @@ -218,7 +218,7 @@ status = "disabled"; }; - spi1: spi-ctrl@14600 { + spi1: spi@14600 { compatible = "marvell,orion-spi"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/orion5x-linkstation.dtsi b/arch/arm/boot/dts/orion5x-linkstation.dtsi index e9991c83d7b7..117d71546ed0 100644 --- a/arch/arm/boot/dts/orion5x-linkstation.dtsi +++ b/arch/arm/boot/dts/orion5x-linkstation.dtsi @@ -156,7 +156,7 @@ &i2c { status = "okay"; - rtc { + rtc@32 { compatible = "ricoh,rs5c372a"; reg = <0x32>; }; From ed5f6f8d4ff4829c30ea7f5b9b3a904d7bbf9eb3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Sep 2018 11:22:12 +0200 Subject: [PATCH 0740/3715] x86/mce-inject: Reset injection struct after injection [ Upstream commit 7401a633c34adc7aefd3edfec60074cb0475a3e8 ] Clear the MCE struct which is used for collecting the injection details after injection. Also, populate it with more details from the machine. Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20180905081954.10391-1-bp@alien8.de Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 8fec687b3e44..f12141ba9a76 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -108,6 +108,9 @@ static void setup_inj_struct(struct mce *m) memset(m, 0, sizeof(struct mce)); m->cpuvendor = boot_cpu_data.x86_vendor; + m->time = ktime_get_real_seconds(); + m->cpuid = cpuid_eax(1); + m->microcode = boot_cpu_data.microcode; } /* Update fake mce registers on current CPU. */ @@ -576,6 +579,9 @@ static int inj_bank_set(void *data, u64 val) m->bank = val; do_inject(); + /* Reset injection struct */ + setup_inj_struct(&i_mce); + return 0; } From 7796870c62b254ea9dcf36f81169124c1e9d3662 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 21 Aug 2018 22:12:33 +0300 Subject: [PATCH 0741/3715] ARM: dts: clearfog: fix sdhci supply property name [ Upstream commit e807f0298144c06740022a2f900d86b7f115595e ] The vmmc phandle, like all power supply property names, must have the '-supply' suffix. Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/armada-388-clearfog.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-388-clearfog.dtsi b/arch/arm/boot/dts/armada-388-clearfog.dtsi index 68acfc968706..8a3bbb7d6cc1 100644 --- a/arch/arm/boot/dts/armada-388-clearfog.dtsi +++ b/arch/arm/boot/dts/armada-388-clearfog.dtsi @@ -89,7 +89,7 @@ &clearfog_sdhci_cd_pins>; pinctrl-names = "default"; status = "okay"; - vmmc = <®_3p3v>; + vmmc-supply = <®_3p3v>; wp-inverted; }; From 3d30bea686142879f9cf0ec396fb23f08918db66 Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Thu, 20 Sep 2018 11:22:51 -0700 Subject: [PATCH 0742/3715] bnx2x: Ignore bandwidth attention in single function mode [ Upstream commit 75a110a1783ef8324ffd763b24f4ac268253cbca ] This is a workaround for FW bug - MFW generates bandwidth attention in single function mode, which is only expected to be generated in multi function mode. This undesired attention in SF mode results in incorrect HW configuration and resulting into Tx timeout. Signed-off-by: Shahed Shaikh Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8f0c9f6de893..dbe8feec456c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3540,6 +3540,16 @@ static void bnx2x_drv_info_iscsi_stat(struct bnx2x *bp) */ static void bnx2x_config_mf_bw(struct bnx2x *bp) { + /* Workaround for MFW bug. + * MFW is not supposed to generate BW attention in + * single function mode. + */ + if (!IS_MF(bp)) { + DP(BNX2X_MSG_MCP, + "Ignoring MF BW config in single function mode\n"); + return; + } + if (bp->link_vars.link_up) { bnx2x_cmng_fns_init(bp, true, CMNG_FNS_MINMAX); bnx2x_link_sync_notify(bp); From 146fccec7ca7e96848cd696a3279d1f49fddc038 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Thu, 20 Sep 2018 16:52:03 +0900 Subject: [PATCH 0743/3715] samples/bpf: fix compilation failure [ Upstream commit 32c009798385ce21080beaa87a9b95faad3acd1e ] following commit: commit d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") added struct bpf_flow_keys which conflicts with the struct with same name in sockex2_kern.c and sockex3_kern.c similar to commit: commit 534e0e52bc23 ("samples/bpf: fix a compilation failure") we tried the rename it "flow_keys" but it also conflicted with struct having same name in include/net/flow_dissector.h. Hence renaming the struct to "flow_key_record". Also, this commit doesn't fix the compilation error completely because the similar struct is present in sockex3_kern.c. Hence renaming it in both files sockex3_user.c and sockex3_kern.c Signed-off-by: Prashant Bhole Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- samples/bpf/sockex2_kern.c | 11 ++++++----- samples/bpf/sockex3_kern.c | 8 ++++---- samples/bpf/sockex3_user.c | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index f58acfc92556..f2f9dbc021b0 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -14,7 +14,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct bpf_flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct bpf_flow_keys *flow) + struct flow_key_record *flow) { __u64 verlen; @@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto } static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct bpf_flow_keys *flow) + struct flow_key_record *flow) { *ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -96,7 +96,8 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro return nhoff; } -static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow) +static inline bool flow_dissector(struct __sk_buff *skb, + struct flow_key_record *flow) { __u64 nhoff = ETH_HLEN; __u64 ip_proto; @@ -198,7 +199,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct bpf_flow_keys flow = {}; + struct flow_key_record flow = {}; struct pair *value; u32 key; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 95907f8d2b17..c527b57d3ec8 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -61,7 +61,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct bpf_flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } struct globals { - struct bpf_flow_keys flow; + struct flow_key_record flow; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -114,14 +114,14 @@ struct pair { struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct bpf_flow_keys), + .key_size = sizeof(struct flow_key_record), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { - struct bpf_flow_keys key = g->flow; + struct flow_key_record key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 4d75674bee35..741b899b693f 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -13,7 +13,7 @@ #define PARSE_IP_PROG_FD (prog_fd[0]) #define PROG_ARRAY_FD (map_fd[0]) -struct flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -64,7 +64,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct flow_keys key = {}, next_key; + struct flow_key_record key = {}, next_key; struct pair value; sleep(1); From 700feda306559979fe5daccefa07365332408f50 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Sep 2018 17:05:40 -0700 Subject: [PATCH 0744/3715] net: phy: mdio-bcm-unimac: Allow configuring MDIO clock divider [ Upstream commit b78ac6ecd1b6b46f8767cbafa95a7b0b51b87ad8 ] Allow the configuration of the MDIO clock divider when the Device Tree contains 'clock-frequency' property (similar to I2C and SPI buses). Because the hardware may have lost its state during suspend/resume, re-apply the MDIO clock divider upon resumption. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../bindings/net/brcm,unimac-mdio.txt | 3 + drivers/net/phy/mdio-bcm-unimac.c | 83 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt index 4648948f7c3b..e15589f47787 100644 --- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt +++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt @@ -19,6 +19,9 @@ Optional properties: - interrupt-names: must be "mdio_done_error" when there is a share interrupt fed to this hardware block, or must be "mdio_done" for the first interrupt and "mdio_error" for the second when there are separate interrupts +- clocks: A reference to the clock supplying the MDIO bus controller +- clock-frequency: the MDIO bus clock that must be output by the MDIO bus + hardware, if absent, the default hardware values are used Child nodes of this MDIO bus controller node are standard Ethernet PHY device nodes as described in Documentation/devicetree/bindings/net/phy.txt diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 08e0647b85e2..f9d98a6e67bc 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,8 @@ struct unimac_mdio_priv { void __iomem *base; int (*wait_func) (void *wait_func_data); void *wait_func_data; + struct clk *clk; + u32 clk_freq; }; static inline u32 unimac_mdio_readl(struct unimac_mdio_priv *priv, u32 offset) @@ -189,6 +192,35 @@ static int unimac_mdio_reset(struct mii_bus *bus) return 0; } +static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv) +{ + unsigned long rate; + u32 reg, div; + + /* Keep the hardware default values */ + if (!priv->clk_freq) + return; + + if (!priv->clk) + rate = 250000000; + else + rate = clk_get_rate(priv->clk); + + div = (rate / (2 * priv->clk_freq)) - 1; + if (div & ~MDIO_CLK_DIV_MASK) { + pr_warn("Incorrect MDIO clock frequency, ignoring\n"); + return; + } + + /* The MDIO clock is the reference clock (typicaly 250Mhz) divided by + * 2 x (MDIO_CLK_DIV + 1) + */ + reg = unimac_mdio_readl(priv, MDIO_CFG); + reg &= ~(MDIO_CLK_DIV_MASK << MDIO_CLK_DIV_SHIFT); + reg |= div << MDIO_CLK_DIV_SHIFT; + unimac_mdio_writel(priv, reg, MDIO_CFG); +} + static int unimac_mdio_probe(struct platform_device *pdev) { struct unimac_mdio_pdata *pdata = pdev->dev.platform_data; @@ -215,9 +247,26 @@ static int unimac_mdio_probe(struct platform_device *pdev) return -ENOMEM; } + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (PTR_ERR(priv->clk) == -EPROBE_DEFER) + return PTR_ERR(priv->clk); + else + priv->clk = NULL; + + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + if (of_property_read_u32(np, "clock-frequency", &priv->clk_freq)) + priv->clk_freq = 0; + + unimac_mdio_clk_set(priv); + priv->mii_bus = mdiobus_alloc(); - if (!priv->mii_bus) - return -ENOMEM; + if (!priv->mii_bus) { + ret = -ENOMEM; + goto out_clk_disable; + } bus = priv->mii_bus; bus->priv = priv; @@ -251,6 +300,8 @@ static int unimac_mdio_probe(struct platform_device *pdev) out_mdio_free: mdiobus_free(bus); +out_clk_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -260,10 +311,37 @@ static int unimac_mdio_remove(struct platform_device *pdev) mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); + clk_disable_unprepare(priv->clk); return 0; } +static int unimac_mdio_suspend(struct device *d) +{ + struct unimac_mdio_priv *priv = dev_get_drvdata(d); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static int unimac_mdio_resume(struct device *d) +{ + struct unimac_mdio_priv *priv = dev_get_drvdata(d); + int ret; + + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + unimac_mdio_clk_set(priv); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops, + unimac_mdio_suspend, unimac_mdio_resume); + static const struct of_device_id unimac_mdio_ids[] = { { .compatible = "brcm,genet-mdio-v5", }, { .compatible = "brcm,genet-mdio-v4", }, @@ -279,6 +357,7 @@ static struct platform_driver unimac_mdio_driver = { .driver = { .name = UNIMAC_MDIO_DRV_NAME, .of_match_table = unimac_mdio_ids, + .pm = &unimac_mdio_pm_ops, }, .probe = unimac_mdio_probe, .remove = unimac_mdio_remove, From 53532b083ab4a5fb223f5a6ece76d554e7cc2757 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Sep 2018 10:42:15 +0800 Subject: [PATCH 0745/3715] net: micrel: fix return type of ndo_start_xmit function [ Upstream commit 2b49117a5abee8478b0470cba46ac74f93b4a479 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8695net.c | 2 +- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index bd51e057e915..b881f5d4a7f9 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -1164,7 +1164,7 @@ ks8695_timeout(struct net_device *ndev) * sk_buff and adds it to the TX ring. It then kicks the TX DMA * engine to ensure transmission begins. */ -static int +static netdev_tx_t ks8695_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct ks8695_priv *ksp = netdev_priv(ndev); diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index f3e9dd47b56f..adbe0a6fe0db 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -1020,9 +1020,9 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) * spin_lock_irqsave is required because tx and rx should be mutual exclusive. * So while tx is in-progress, prevent IRQ interrupt from happenning. */ -static int ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { - int retv = NETDEV_TX_OK; + netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); disable_irq(netdev->irq); From c2e6ce2eb3631c19974bc5ac2a6206d0b009e3d3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Sep 2018 10:50:32 +0800 Subject: [PATCH 0746/3715] net: freescale: fix return type of ndo_start_xmit function [ Upstream commit 06983aa526c759ebdf43f202d8d0491d9494e2f4 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 3 ++- drivers/net/ethernet/freescale/fec_mpc52xx.c | 3 ++- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 3 ++- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- drivers/net/ethernet/freescale/ucc_geth.c | 3 ++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index d5f8bf87519a..39b8b6730e77 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2036,7 +2036,8 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, return 0; } -static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) +static netdev_tx_t +dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { const int queue_mapping = skb_get_queue_mapping(skb); bool nonlinear = skb_is_nonlinear(skb); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 6d7269d87a85..b90bab72efdb 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -305,7 +305,8 @@ static int mpc52xx_fec_close(struct net_device *dev) * invariant will hold if you make sure that the netif_*_queue() * calls are done at the proper times. */ -static int mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); struct bcom_fec_bd *bd; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 28bd4cf61741..708082c255d0 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -481,7 +481,8 @@ static struct sk_buff *tx_skb_align_workaround(struct net_device *dev, } #endif -static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); cbd_t __iomem *bdp; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 63daae120b2d..27d0e3b9833c 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -112,7 +112,7 @@ const char gfar_driver_version[] = "2.0"; static int gfar_enet_open(struct net_device *dev); -static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); static void gfar_reset_task(struct work_struct *work); static void gfar_timeout(struct net_device *dev); static int gfar_close(struct net_device *dev); @@ -2334,7 +2334,7 @@ static inline bool gfar_csum_errata_76(struct gfar_private *priv, /* This is called by the kernel when a frame is ready for transmission. * It is pointed to by the dev->hard_start_xmit function pointer */ -static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); struct gfar_priv_tx_q *tx_queue = NULL; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 94df1ddc5dcb..bddf4c25ee6e 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3085,7 +3085,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* This is called by the kernel when a frame is ready for transmission. */ /* It is pointed to by the dev->hard_start_xmit function pointer */ -static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ucc_geth_private *ugeth = netdev_priv(dev); #ifdef CONFIG_UGETH_TX_ON_DEMAND From 9f5fe6d39938cf787a003f2d0e7c49faff093f57 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Fri, 21 Sep 2018 17:20:40 -0400 Subject: [PATCH 0747/3715] x86/CPU: Use correct macros for Cyrix calls [ Upstream commit 03b099bdcdf7125d4a63dc9ddeefdd454e05123d ] There are comments in processor-cyrix.h advising you to _not_ make calls using the deprecated macros in this style: setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); This is because it expands the macro into a non-functioning calling sequence. The calling order must be: outb(CX86_CCR2, 0x22); inb(0x23); From the comments: * When using the old macros a line like * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); * gets expanded to: * do { * outb((CX86_CCR2), 0x22); * outb((({ * outb((CX86_CCR2), 0x22); * inb(0x23); * }) | 0x88), 0x23); * } while (0); The new macros fix this problem, so use them instead. Signed-off-by: Matthew Whitehead Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia Zhang Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180921212041.13096-2-tedheadster@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/cyrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index fa61c870ada9..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -437,7 +437,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c) /* enable MAPEN */ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable cpuid */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* disable MAPEN */ setCx86(CX86_CCR3, ccr3); local_irq_restore(flags); From e63d40f685c632ed8e0d7a632e851c741e17cab6 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Fri, 21 Sep 2018 17:20:41 -0400 Subject: [PATCH 0748/3715] x86/CPU: Change query logic so CPUID is enabled before testing [ Upstream commit 2893cc8ff892fa74972d8dc0e1d0dc65116daaa3 ] Presently we check first if CPUID is enabled. If it is not already enabled, then we next call identify_cpu_without_cpuid() and clear X86_FEATURE_CPUID. Unfortunately, identify_cpu_without_cpuid() is the function where CPUID becomes _enabled_ on Cyrix 6x86/6x86L CPUs. Reverse the calling sequence so that CPUID is first enabled, and then check a second time to see if the feature has now been activated. [ bp: Massage commit message and remove trailing whitespace. ] Suggested-by: Andy Lutomirski Signed-off-by: Matthew Whitehead Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: David Woodhouse Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180921212041.13096-3-tedheadster@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c0c9c5a44e82..3d805e8b3739 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1066,6 +1066,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + /* cyrix could have cpuid enabled via c_identify()*/ if (have_cpuid_p()) { cpu_detect(c); @@ -1082,7 +1085,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); } else { - identify_cpu_without_cpuid(c); setup_clear_cpu_cap(X86_FEATURE_CPUID); } From 27afaf325b06a5899e21084a20ccafb439758114 Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Tue, 11 Sep 2018 14:49:23 -0700 Subject: [PATCH 0749/3715] MIPS: kexec: Relax memory restriction [ Upstream commit a6da4d6fdf8bd512c98d3ac7f1d16bc4bb282919 ] We can rely on the system kernel and the dump capture kernel themselves in memory usage. Being restrictive with 512MB limit may cause kexec tool failure on some platforms. Tested-by: Rachel Mozes Reported-by: Rachel Mozes Signed-off-by: Dengcheng Zhu Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20568/ Cc: pburton@wavecomp.com Cc: ralf@linux-mips.org Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/include/asm/kexec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index 493a3cc7c39a..cfdbe66575f4 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -12,11 +12,11 @@ #include /* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000) +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000) +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) /* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000) +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) /* Reserve 3*4096 bytes for board-specific info */ #define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096) From 551345e77a2dcd357be2eba8fb614ccf3db9f813 Mon Sep 17 00:00:00 2001 From: Vicente Bergas Date: Mon, 17 Sep 2018 15:47:14 +0200 Subject: [PATCH 0750/3715] arm64: dts: rockchip: Fix microSD in rk3399 sapphire board [ Upstream commit 88a20edf76091ee7f1bb459b89d714d53f0f8940 ] The microSD card slot in the Sapphire board is not working because of several issues: 1.- The vmmc power supply is missing in the DTS. It is capable of 3.0V and has a GPIO-based enable control. 2.- The vqmmc power supply can provide up to 3.3V, but it is capped in the DTS to just 3.0V because of the vmmc capability. This results in a conflict from the mmc driver requesting an unsupportable voltage range from 3.3V to 3.0V (min > max) as reported in dmesg. So, extend the range up to 3.3V. The hw should be able to stand this 0.3V tolerance. See mmc_regulator_set_vqmmc in drivers/mmc/core/core.c. 3.- The card detect signal is non-working. There is a known conflict with jtag, but the workaround in drivers/soc/rockchip/grf.c does not work. Adding the broken-cd attribute to the DTS fixes the issue. Signed-off-by: Vicente Bergas Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- .../boot/dts/rockchip/rk3399-sapphire.dtsi | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 82576011b959..075659847791 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -113,6 +113,19 @@ vin-supply = <&vcc_1v8>; }; + vcc3v0_sd: vcc3v0-sd { + compatible = "regulator-fixed"; + enable-active-high; + gpio = <&gpio0 RK_PA1 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc0_pwr_h>; + regulator-always-on; + regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3000000>; + regulator-name = "vcc3v0_sd"; + vin-supply = <&vcc3v3_sys>; + }; + vcc3v3_sys: vcc3v3-sys { compatible = "regulator-fixed"; regulator-name = "vcc3v3_sys"; @@ -315,7 +328,7 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3000000>; + regulator-max-microvolt = <3300000>; regulator-state-mem { regulator-on-in-suspend; regulator-suspend-microvolt = <3000000>; @@ -490,6 +503,13 @@ }; }; + sd { + sdmmc0_pwr_h: sdmmc0-pwr-h { + rockchip,pins = + ; + }; + }; + usb2 { vcc5v0_host_en: vcc5v0-host-en { rockchip,pins = @@ -537,6 +557,7 @@ }; &sdmmc { + broken-cd; bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; @@ -545,6 +566,7 @@ max-frequency = <150000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; + vmmc-supply = <&vcc3v0_sd>; vqmmc-supply = <&vcc_sdio>; status = "okay"; }; From 16afeb2513f8540d7e40aabc7d5bd3413f8a96de Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 07:44:09 -0400 Subject: [PATCH 0751/3715] media: pci: ivtv: Fix a sleep-in-atomic-context bug in ivtv_yuv_init() [ Upstream commit 8d11eb847de7d89c2754988c944d51a4f63e219b ] The driver may sleep in a interrupt handler. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] kzalloc(GFP_KERNEL) drivers/media/pci/ivtv/ivtv-yuv.c, 938: kzalloc in ivtv_yuv_init drivers/media/pci/ivtv/ivtv-yuv.c, 960: ivtv_yuv_init in ivtv_yuv_next_free drivers/media/pci/ivtv/ivtv-yuv.c, 1126: ivtv_yuv_next_free in ivtv_yuv_setup_stream_frame drivers/media/pci/ivtv/ivtv-irq.c, 827: ivtv_yuv_setup_stream_frame in ivtv_irq_dec_data_req drivers/media/pci/ivtv/ivtv-irq.c, 1013: ivtv_irq_dec_data_req in ivtv_irq_handler To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/ivtv/ivtv-yuv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index 44936d6d7c39..1380474519f2 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -935,7 +935,7 @@ static void ivtv_yuv_init(struct ivtv *itv) } /* We need a buffer for blanking when Y plane is offset - non-fatal if we can't get one */ - yi->blanking_ptr = kzalloc(720 * 16, GFP_KERNEL|__GFP_NOWARN); + yi->blanking_ptr = kzalloc(720 * 16, GFP_ATOMIC|__GFP_NOWARN); if (yi->blanking_ptr) { yi->blanking_dmaptr = pci_map_single(itv->pdev, yi->blanking_ptr, 720*16, PCI_DMA_TODEVICE); } else { From 0b9c70939b60221f2210ef55a325fec6dea8cde1 Mon Sep 17 00:00:00 2001 From: Brad Love Date: Thu, 6 Sep 2018 17:07:49 -0400 Subject: [PATCH 0752/3715] media: au0828: Fix incorrect error messages [ Upstream commit f347596f2bf114a3af3d80201c6e6bef538d884f ] Correcting red herring error messages. Where appropriate, replaces au0282_dev_register with: - au0828_analog_register - au0828_dvb_register Signed-off-by: Brad Love Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/au0828/au0828-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c index e3f63299f85c..07e3322bb182 100644 --- a/drivers/media/usb/au0828/au0828-core.c +++ b/drivers/media/usb/au0828/au0828-core.c @@ -632,7 +632,7 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Analog TV */ retval = au0828_analog_register(dev, interface); if (retval) { - pr_err("%s() au0282_dev_register failed to register on V4L2\n", + pr_err("%s() au0828_analog_register failed to register on V4L2\n", __func__); mutex_unlock(&dev->lock); goto done; @@ -641,7 +641,7 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Digital TV */ retval = au0828_dvb_register(dev); if (retval) - pr_err("%s() au0282_dev_register failed\n", + pr_err("%s() au0828_dvb_register failed\n", __func__); /* Remote controller */ From 5ccea94cb2c80ecb0d6444e62e2971ef24703769 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 15 Sep 2018 02:16:15 -0400 Subject: [PATCH 0753/3715] media: davinci: Fix implicit enum conversion warning [ Upstream commit 4158757395b300b6eb308fc20b96d1d231484413 ] Clang warns when one enumerated type is implicitly converted to another. drivers/media/platform/davinci/vpbe_display.c:524:24: warning: implicit conversion from enumeration type 'enum osd_v_exp_ratio' to different enumeration type 'enum osd_h_exp_ratio' [-Wenum-conversion] layer_info->h_exp = V_EXP_6_OVER_5; ~ ^~~~~~~~~~~~~~ 1 warning generated. This appears to be a copy and paste error judging from the couple of lines directly above this statement and the way that height is handled in the if block above this one. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/vpbe_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/davinci/vpbe_display.c b/drivers/media/platform/davinci/vpbe_display.c index 13d027031ff0..82b06cc48bd1 100644 --- a/drivers/media/platform/davinci/vpbe_display.c +++ b/drivers/media/platform/davinci/vpbe_display.c @@ -518,7 +518,7 @@ vpbe_disp_calculate_scale_factor(struct vpbe_display *disp_dev, else if (v_scale == 4) layer_info->v_zoom = ZOOM_X4; if (v_exp) - layer_info->h_exp = V_EXP_6_OVER_5; + layer_info->v_exp = V_EXP_6_OVER_5; } else { /* no scaling, only cropping. Set display area to crop area */ cfg->ysize = expected_ysize; From b197d34b6083bae1ea368e87c64156719ab94ad7 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 20 Sep 2018 11:34:36 +0200 Subject: [PATCH 0754/3715] ARM: dts: rockchip: explicitly set vcc_sd0 pin to gpio on rk3188-radxarock [ Upstream commit a2df0984e73fd9e1dad5fc3f1c307ec3de395e30 ] It is good practice to make the setting of gpio-pinctrls explicitly in the devicetree, and in this case even necessary. Rockchip boards start with iomux settings set to gpio for most pins and while the linux pinctrl driver also implicitly sets the gpio function if a pin is requested as gpio that is not necessarily true for other drivers. The issue in question stems from uboot, where the sdmmc_pwr pin is set to function 1 (sdmmc-power) by the bootrom when reading the 1st-stage loader. The regulator controlled by the pin is active-low though, so when the dwmmc hw-block sets its enabled bit, it actually disables the regulator. By changing the pin back to gpio we fix that behaviour. Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3188-radxarock.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index 53d6fc2fdbce..541a798d3d20 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -130,6 +130,8 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; gpio = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc_pwr>; startup-delay-us = <100000>; vin-supply = <&vcc_io>; }; @@ -348,6 +350,12 @@ }; }; + sd0 { + sdmmc_pwr: sdmmc-pwr { + rockchip,pins = ; + }; + }; + usb { host_vbus_drv: host-vbus-drv { rockchip,pins = <0 3 RK_FUNC_GPIO &pcfg_pull_none>; From 6e1fed986ac9cf32495bcf2b8885611e4b0132fe Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 2 Aug 2018 00:14:00 +0300 Subject: [PATCH 0755/3715] usb: gadget: uvc: configfs: Drop leaked references to config items [ Upstream commit 86f3daed59bceb4fa7981d85e89f63ebbae1d561 ] Some of the .allow_link() and .drop_link() operations implementations call config_group_find_item() and then leak the reference to the returned item. Fix this by dropping those references where needed. Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_configfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 844cb738bafd..fc604439b25a 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -543,6 +543,7 @@ static int uvcg_control_class_allow_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); return ret; } @@ -578,6 +579,7 @@ static void uvcg_control_class_drop_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); } @@ -2037,6 +2039,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); return ret; } @@ -2077,6 +2080,7 @@ static void uvcg_streaming_class_drop_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); } From f010f1618e937e0037e53302bb28d57511c30217 Mon Sep 17 00:00:00 2001 From: Joel Pepper Date: Tue, 29 May 2018 21:02:12 +0200 Subject: [PATCH 0756/3715] usb: gadget: uvc: configfs: Prevent format changes after linking header [ Upstream commit cb2200f7af8341aaf0c6abd7ba37e4c667c41639 ] While checks are in place to avoid attributes and children of a format being manipulated after the format is linked into the streaming header, the linked flag was never actually set, invalidating the protections. Update the flag as appropriate in the header link calls. Signed-off-by: Joel Pepper Reviewed-by: Kieran Bingham Signed-off-by: Laurent Pinchart Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_configfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index fc604439b25a..57f6e8a668cf 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -765,6 +765,7 @@ static int uvcg_streaming_header_allow_link(struct config_item *src, format_ptr->fmt = target_fmt; list_add_tail(&format_ptr->entry, &src_hdr->formats); ++src_hdr->num_fmt; + ++target_fmt->linked; out: mutex_unlock(&opts->lock); @@ -802,6 +803,8 @@ static void uvcg_streaming_header_drop_link(struct config_item *src, break; } + --target_fmt->linked; + out: mutex_unlock(&opts->lock); mutex_unlock(su_mutex); From 384bf41f4cf60dad4b202a506e34a991c78a3233 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 21 Sep 2018 16:30:50 -0700 Subject: [PATCH 0757/3715] i2c: aspeed: fix invalid clock parameters for very large divisors [ Upstream commit 17ccba67109cd0631f206cf49e17986218b47854 ] The function that computes clock parameters from divisors did not respect the maximum size of the bitfields that the parameters were written to. This fixes the bug. This bug can be reproduced with (and this fix verified with) the test at: https://kunit-review.googlesource.com/c/linux/+/1035/ Discovered-by-KUnit: https://kunit-review.googlesource.com/c/linux/+/1035/ Signed-off-by: Brendan Higgins Reviewed-by: Jae Hyun Yoo Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-aspeed.c | 65 +++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index a074735456bc..29574b9075fd 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -135,7 +135,8 @@ struct aspeed_i2c_bus { /* Synchronizes I/O mem access to base. */ spinlock_t lock; struct completion cmd_complete; - u32 (*get_clk_reg_val)(u32 divisor); + u32 (*get_clk_reg_val)(struct device *dev, + u32 divisor); unsigned long parent_clk_frequency; u32 bus_frequency; /* Transaction state. */ @@ -679,16 +680,27 @@ static const struct i2c_algorithm aspeed_i2c_algo = { #endif /* CONFIG_I2C_SLAVE */ }; -static u32 aspeed_i2c_get_clk_reg_val(u32 clk_high_low_max, u32 divisor) +static u32 aspeed_i2c_get_clk_reg_val(struct device *dev, + u32 clk_high_low_mask, + u32 divisor) { - u32 base_clk, clk_high, clk_low, tmp; + u32 base_clk_divisor, clk_high_low_max, clk_high, clk_low, tmp; + + /* + * SCL_high and SCL_low represent a value 1 greater than what is stored + * since a zero divider is meaningless. Thus, the max value each can + * store is every bit set + 1. Since SCL_high and SCL_low are added + * together (see below), the max value of both is the max value of one + * them times two. + */ + clk_high_low_max = (clk_high_low_mask + 1) * 2; /* * The actual clock frequency of SCL is: * SCL_freq = APB_freq / (base_freq * (SCL_high + SCL_low)) * = APB_freq / divisor * where base_freq is a programmable clock divider; its value is - * base_freq = 1 << base_clk + * base_freq = 1 << base_clk_divisor * SCL_high is the number of base_freq clock cycles that SCL stays high * and SCL_low is the number of base_freq clock cycles that SCL stays * low for a period of SCL. @@ -698,47 +710,59 @@ static u32 aspeed_i2c_get_clk_reg_val(u32 clk_high_low_max, u32 divisor) * SCL_low = clk_low + 1 * Thus, * SCL_freq = APB_freq / - * ((1 << base_clk) * (clk_high + 1 + clk_low + 1)) + * ((1 << base_clk_divisor) * (clk_high + 1 + clk_low + 1)) * The documentation recommends clk_high >= clk_high_max / 2 and * clk_low >= clk_low_max / 2 - 1 when possible; this last constraint * gives us the following solution: */ - base_clk = divisor > clk_high_low_max ? + base_clk_divisor = divisor > clk_high_low_max ? ilog2((divisor - 1) / clk_high_low_max) + 1 : 0; - tmp = (divisor + (1 << base_clk) - 1) >> base_clk; - clk_low = tmp / 2; - clk_high = tmp - clk_low; - if (clk_high) - clk_high--; + if (base_clk_divisor > ASPEED_I2CD_TIME_BASE_DIVISOR_MASK) { + base_clk_divisor = ASPEED_I2CD_TIME_BASE_DIVISOR_MASK; + clk_low = clk_high_low_mask; + clk_high = clk_high_low_mask; + dev_err(dev, + "clamping clock divider: divider requested, %u, is greater than largest possible divider, %u.\n", + divisor, (1 << base_clk_divisor) * clk_high_low_max); + } else { + tmp = (divisor + (1 << base_clk_divisor) - 1) + >> base_clk_divisor; + clk_low = tmp / 2; + clk_high = tmp - clk_low; - if (clk_low) - clk_low--; + if (clk_high) + clk_high--; + + if (clk_low) + clk_low--; + } return ((clk_high << ASPEED_I2CD_TIME_SCL_HIGH_SHIFT) & ASPEED_I2CD_TIME_SCL_HIGH_MASK) | ((clk_low << ASPEED_I2CD_TIME_SCL_LOW_SHIFT) & ASPEED_I2CD_TIME_SCL_LOW_MASK) - | (base_clk & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK); + | (base_clk_divisor + & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK); } -static u32 aspeed_i2c_24xx_get_clk_reg_val(u32 divisor) +static u32 aspeed_i2c_24xx_get_clk_reg_val(struct device *dev, u32 divisor) { /* * clk_high and clk_low are each 3 bits wide, so each can hold a max * value of 8 giving a clk_high_low_max of 16. */ - return aspeed_i2c_get_clk_reg_val(16, divisor); + return aspeed_i2c_get_clk_reg_val(dev, GENMASK(2, 0), divisor); } -static u32 aspeed_i2c_25xx_get_clk_reg_val(u32 divisor) +static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) { /* * clk_high and clk_low are each 4 bits wide, so each can hold a max * value of 16 giving a clk_high_low_max of 32. */ - return aspeed_i2c_get_clk_reg_val(32, divisor); + return aspeed_i2c_get_clk_reg_val(dev, GENMASK(3, 0), divisor); } /* precondition: bus.lock has been acquired. */ @@ -751,7 +775,7 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) clk_reg_val &= (ASPEED_I2CD_TIME_TBUF_MASK | ASPEED_I2CD_TIME_THDSTA_MASK | ASPEED_I2CD_TIME_TACST_MASK); - clk_reg_val |= bus->get_clk_reg_val(divisor); + clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); @@ -859,7 +883,8 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) if (!match) bus->get_clk_reg_val = aspeed_i2c_24xx_get_clk_reg_val; else - bus->get_clk_reg_val = (u32 (*)(u32))match->data; + bus->get_clk_reg_val = (u32 (*)(struct device *, u32)) + match->data; /* Initialize the I2C adapter */ spin_lock_init(&bus->lock); From d2c8e94b586686d2d2f58a1a0b3184036c82359a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Sep 2018 12:16:36 -0700 Subject: [PATCH 0758/3715] phy: brcm-sata: allow PHY_BRCM_SATA driver to be built for DSL SoCs [ Upstream commit 26728df4b254ae06247726a9a6e64823e39ac504 ] Broadcom ARM-based DSL SoCs (BCM63xx product line) have the same Broadcom SATA PHY that other SoCs are using, make it possible to select that driver on these platforms. Signed-off-by: Florian Fainelli Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/broadcom/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig index 64fc59c3ae6d..181b8fde2bfe 100644 --- a/drivers/phy/broadcom/Kconfig +++ b/drivers/phy/broadcom/Kconfig @@ -60,7 +60,8 @@ config PHY_NS2_USB_DRD config PHY_BRCM_SATA tristate "Broadcom SATA PHY driver" - depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || COMPILE_TEST + depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || \ + ARCH_BCM_63XX || COMPILE_TEST depends on OF select GENERIC_PHY default ARCH_BCM_IPROC From 1ddc4c618dfb74e4f0330e0d6da8fb0917cfbf92 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 21 Sep 2018 20:53:18 +0900 Subject: [PATCH 0759/3715] phy: renesas: rcar-gen3-usb2: fix vbus_ctrl for role sysfs [ Upstream commit 09938ea9d136243e8d1fed6d4d7a257764f28f6d ] This patch fixes and issue that the vbus_ctrl is disabled by rcar_gen3_init_from_a_peri_to_a_host(), so a usb host cannot supply the vbus. Note that this condition will exit when the otg irq happens even if we don't apply this patch. Fixes: 9bb86777fb71 ("phy: rcar-gen3-usb2: add sysfs for usb role swap") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index e8fe80312820..7f5e36bfeee8 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -195,7 +195,7 @@ static void rcar_gen3_init_from_a_peri_to_a_host(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_OBINTEN); writel(val & ~USB2_OBINT_BITS, usb2_base + USB2_OBINTEN); - rcar_gen3_enable_vbus_ctrl(ch, 0); + rcar_gen3_enable_vbus_ctrl(ch, 1); rcar_gen3_init_for_host(ch); writel(val | USB2_OBINT_BITS, usb2_base + USB2_OBINTEN); From 16f9d73aae71b29157f9ccd6623e6bccbab70c95 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 22 Sep 2018 11:44:05 +0200 Subject: [PATCH 0760/3715] phy: phy-twl4030-usb: fix denied runtime access [ Upstream commit 6c7103aa026094a4ee2c2708ec6977a6dfc5331d ] When runtime is not enabled, pm_runtime_get_sync() returns -EACCESS, the counter will be incremented but the resume callback not called, so enumeration and charging will not start properly. To avoid that happen, disable irq on suspend and recheck on resume. Practically this happens when the device is woken up from suspend by plugging in usb. Signed-off-by: Andreas Kemnade Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/ti/phy-twl4030-usb.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/phy/ti/phy-twl4030-usb.c b/drivers/phy/ti/phy-twl4030-usb.c index a44680d64f9b..c267afb68f07 100644 --- a/drivers/phy/ti/phy-twl4030-usb.c +++ b/drivers/phy/ti/phy-twl4030-usb.c @@ -144,6 +144,7 @@ #define PMBR1 0x0D #define GPIO_USB_4PIN_ULPI_2430C (3 << 0) +static irqreturn_t twl4030_usb_irq(int irq, void *_twl); /* * If VBUS is valid or ID is ground, then we know a * cable is present and we need to be runtime-enabled @@ -395,6 +396,33 @@ static void __twl4030_phy_power(struct twl4030_usb *twl, int on) WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0); } +static int __maybe_unused twl4030_usb_suspend(struct device *dev) +{ + struct twl4030_usb *twl = dev_get_drvdata(dev); + + /* + * we need enabled runtime on resume, + * so turn irq off here, so we do not get it early + * note: wakeup on usb plug works independently of this + */ + dev_dbg(twl->dev, "%s\n", __func__); + disable_irq(twl->irq); + + return 0; +} + +static int __maybe_unused twl4030_usb_resume(struct device *dev) +{ + struct twl4030_usb *twl = dev_get_drvdata(dev); + + dev_dbg(twl->dev, "%s\n", __func__); + enable_irq(twl->irq); + /* check whether cable status changed */ + twl4030_usb_irq(0, twl); + + return 0; +} + static int __maybe_unused twl4030_usb_runtime_suspend(struct device *dev) { struct twl4030_usb *twl = dev_get_drvdata(dev); @@ -655,6 +683,7 @@ static const struct phy_ops ops = { static const struct dev_pm_ops twl4030_usb_pm_ops = { SET_RUNTIME_PM_OPS(twl4030_usb_runtime_suspend, twl4030_usb_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(twl4030_usb_suspend, twl4030_usb_resume) }; static int twl4030_usb_probe(struct platform_device *pdev) From 1e61ce9d404511eb6904e79c1c5347f22ef06e8a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 10 Aug 2018 15:42:03 +0300 Subject: [PATCH 0761/3715] usb: gadget: uvc: Factor out video USB request queueing [ Upstream commit 9d1ff5dcb3cd3390b1e56f1c24ae42c72257c4a3 ] USB requests for video data are queued from two different locations in the driver, with the same code block occurring twice. Factor it out to a function. Signed-off-by: Laurent Pinchart Reviewed-by: Paul Elder Tested-by: Paul Elder Reviewed-by: Kieran Bingham Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_video.c | 30 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 0f01c04d7cbd..540917f54506 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -129,6 +129,19 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video, * Request handling */ +static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) +{ + int ret; + + ret = usb_ep_queue(video->ep, req, GFP_ATOMIC); + if (ret < 0) { + printk(KERN_INFO "Failed to queue request (%d).\n", ret); + usb_ep_set_halt(video->ep); + } + + return ret; +} + /* * I somehow feel that synchronisation won't be easy to achieve here. We have * three events that control USB requests submission: @@ -193,14 +206,13 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) video->encode(req, video, buf); - if ((ret = usb_ep_queue(ep, req, GFP_ATOMIC)) < 0) { - printk(KERN_INFO "Failed to queue request (%d).\n", ret); - usb_ep_set_halt(ep); - spin_unlock_irqrestore(&video->queue.irqlock, flags); + ret = uvcg_video_ep_queue(video, req); + spin_unlock_irqrestore(&video->queue.irqlock, flags); + + if (ret < 0) { uvcg_queue_cancel(queue, 0); goto requeue; } - spin_unlock_irqrestore(&video->queue.irqlock, flags); return; @@ -320,15 +332,13 @@ int uvcg_video_pump(struct uvc_video *video) video->encode(req, video, buf); /* Queue the USB request */ - ret = usb_ep_queue(video->ep, req, GFP_ATOMIC); + ret = uvcg_video_ep_queue(video, req); + spin_unlock_irqrestore(&queue->irqlock, flags); + if (ret < 0) { - printk(KERN_INFO "Failed to queue request (%d)\n", ret); - usb_ep_set_halt(video->ep); - spin_unlock_irqrestore(&queue->irqlock, flags); uvcg_queue_cancel(queue, 0); break; } - spin_unlock_irqrestore(&queue->irqlock, flags); } spin_lock_irqsave(&video->req_lock, flags); From 8049910f4f01fffe808df1978e1f1597058a0340 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 10 Aug 2018 15:44:57 +0300 Subject: [PATCH 0762/3715] usb: gadget: uvc: Only halt video streaming endpoint in bulk mode [ Upstream commit 8dbf9c7abefd5c1434a956d5c6b25e11183061a3 ] When USB requests for video data fail to be submitted, the driver signals a problem to the host by halting the video streaming endpoint. This is only valid in bulk mode, as isochronous transfers have no handshake phase and can't thus report a stall. The usb_ep_set_halt() call returns an error when using isochronous endpoints, which we happily ignore, but some UDCs complain in the kernel log. Fix this by only trying to halt the endpoint in bulk mode. Signed-off-by: Laurent Pinchart Reviewed-by: Paul Elder Tested-by: Paul Elder Reviewed-by: Kieran Bingham Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_video.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 540917f54506..d6bab12b0b47 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -136,7 +136,9 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) ret = usb_ep_queue(video->ep, req, GFP_ATOMIC); if (ret < 0) { printk(KERN_INFO "Failed to queue request (%d).\n", ret); - usb_ep_set_halt(video->ep); + /* Isochronous endpoints can't be halted. */ + if (usb_endpoint_xfer_bulk(video->ep->desc)) + usb_ep_set_halt(video->ep); } return ret; From bd1810421faad894bac51a378889665b3de476d8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:45 -0600 Subject: [PATCH 0763/3715] coresight: Fix handling of sinks [ Upstream commit c71369de02b285d9da526a526d8f2affc7b17c59 ] The coresight components could be operated either in sysfs mode or in perf mode. For some of the components, the mode of operation doesn't matter as they simply relay the data to the next component in the trace path. But for sinks, they need to be able to provide the trace data back to the user. Thus we need to make sure that "mode" is handled appropriately. e.g, the sysfs mode could have multiple sources driving the trace data, while perf mode doesn't allow sharing the sink. The coresight_enable_sink() however doesn't really allow this check to trigger as it skips the "enable_sink" callback if the component is already enabled, irrespective of the mode. This could cause mixing of data from different modes or even same mode (in perf), if the sources are different. Also, if we fail to enable the sink while enabling a path (where sink is the first component enabled), we could end up in disabling the components in the "entire" path which were not enabled in this trial, causing disruptions in the existing trace paths. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index e571e4010dff..366c1d493af3 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -140,12 +140,14 @@ static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) { int ret; - if (!csdev->enable) { - if (sink_ops(csdev)->enable) { - ret = sink_ops(csdev)->enable(csdev, mode); - if (ret) - return ret; - } + /* + * We need to make sure the "new" session is compatible with the + * existing "mode" of operation. + */ + if (sink_ops(csdev)->enable) { + ret = sink_ops(csdev)->enable(csdev, mode); + if (ret) + return ret; csdev->enable = true; } @@ -347,8 +349,14 @@ int coresight_enable_path(struct list_head *path, u32 mode) switch (type) { case CORESIGHT_DEV_TYPE_SINK: ret = coresight_enable_sink(csdev, mode); + /* + * Sink is the first component turned on. If we + * failed to enable the sink, there are no components + * that need disabling. Disabling the path here + * would mean we could disrupt an existing session. + */ if (ret) - goto err; + goto out; break; case CORESIGHT_DEV_TYPE_SOURCE: /* sources are enabled from either sysFS or Perf */ From b6d0dacc13bd1f69eca981d62ab1445046ff3953 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:47 -0600 Subject: [PATCH 0764/3715] coresight: perf: Fix per cpu path management [ Upstream commit 5ecabe4a76e8cdb61fa3e24862d9ca240a1c4ddf ] We create a coresight trace path for each online CPU when we start the event. We rely on the number of online CPUs and then go on to allocate an array matching the "number of online CPUs" for holding the path and then uses normal CPU id as the index to the array. This is problematic as we could have some offline CPUs causing us to access beyond the actual array size (e.g, on a dual SMP system, if CPU0 is offline, CPU1 could be really accessing beyond the array). The solution is to switch to per-cpu array for holding the path. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- .../hwtracing/coresight/coresight-etm-perf.c | 55 ++++++++++++++----- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8a0ad77574e7..99cbf5d5d1c1 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -44,7 +45,7 @@ struct etm_event_data { struct work_struct work; cpumask_t mask; void *snk_config; - struct list_head **path; + struct list_head * __percpu *path; }; static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); @@ -72,6 +73,18 @@ static const struct attribute_group *etm_pmu_attr_groups[] = { NULL, }; +static inline struct list_head ** +etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) +{ + return per_cpu_ptr(data->path, cpu); +} + +static inline struct list_head * +etm_event_cpu_path(struct etm_event_data *data, int cpu) +{ + return *etm_event_cpu_path_ptr(data, cpu); +} + static void etm_event_read(struct perf_event *event) {} static int etm_addr_filters_alloc(struct perf_event *event) @@ -131,23 +144,26 @@ static void free_event_data(struct work_struct *work) */ if (event_data->snk_config) { cpu = cpumask_first(mask); - sink = coresight_get_sink(event_data->path[cpu]); + sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); if (sink_ops(sink)->free_buffer) sink_ops(sink)->free_buffer(event_data->snk_config); } for_each_cpu(cpu, mask) { - if (!(IS_ERR_OR_NULL(event_data->path[cpu]))) - coresight_release_path(event_data->path[cpu]); + struct list_head **ppath; + + ppath = etm_event_cpu_path_ptr(event_data, cpu); + if (!(IS_ERR_OR_NULL(*ppath))) + coresight_release_path(*ppath); + *ppath = NULL; } - kfree(event_data->path); + free_percpu(event_data->path); kfree(event_data); } static void *alloc_event_data(int cpu) { - int size; cpumask_t *mask; struct etm_event_data *event_data; @@ -158,7 +174,6 @@ static void *alloc_event_data(int cpu) /* Make sure nothing disappears under us */ get_online_cpus(); - size = num_online_cpus(); mask = &event_data->mask; if (cpu != -1) @@ -175,8 +190,8 @@ static void *alloc_event_data(int cpu) * unused memory when dealing with single CPU trace scenarios is small * compared to the cost of searching through an optimized array. */ - event_data->path = kcalloc(size, - sizeof(struct list_head *), GFP_KERNEL); + event_data->path = alloc_percpu(struct list_head *); + if (!event_data->path) { kfree(event_data); return NULL; @@ -224,6 +239,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, /* Setup the path for each CPU in a trace session */ for_each_cpu(cpu, mask) { + struct list_head *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); @@ -235,9 +251,11 @@ static void *etm_setup_aux(int event_cpu, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev, sink); - if (IS_ERR(event_data->path[cpu])) + path = coresight_build_path(csdev, sink); + if (IS_ERR(path)) goto err; + + *etm_event_cpu_path_ptr(event_data, cpu) = path; } if (!sink_ops(sink)->alloc_buffer) @@ -266,6 +284,7 @@ static void etm_event_start(struct perf_event *event, int flags) struct etm_event_data *event_data; struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); + struct list_head *path; if (!csdev) goto fail; @@ -278,8 +297,9 @@ static void etm_event_start(struct perf_event *event, int flags) if (!event_data) goto fail; + path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ - sink = coresight_get_sink(event_data->path[cpu]); + sink = coresight_get_sink(path); if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) goto fail_end_stop; @@ -289,7 +309,7 @@ static void etm_event_start(struct perf_event *event, int flags) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF)) + if (coresight_enable_path(path, CS_MODE_PERF)) goto fail_end_stop; /* Tell the perf core the event is alive */ @@ -317,6 +337,7 @@ static void etm_event_stop(struct perf_event *event, int mode) struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct etm_event_data *event_data = perf_get_aux(handle); + struct list_head *path; if (event->hw.state == PERF_HES_STOPPED) return; @@ -324,7 +345,11 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!csdev) return; - sink = coresight_get_sink(event_data->path[cpu]); + path = etm_event_cpu_path(event_data, cpu); + if (!path) + return; + + sink = coresight_get_sink(path); if (!sink) return; @@ -355,7 +380,7 @@ static void etm_event_stop(struct perf_event *event, int mode) } /* Disabling the path make its elements available to other sessions */ - coresight_disable_path(event_data->path[cpu]); + coresight_disable_path(path); } static int etm_event_add(struct perf_event *event, int mode) From af525b7dcfc09a48d1a570d6c6f2961ff4f640b7 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:50 -0600 Subject: [PATCH 0765/3715] coresight: perf: Disable trace path upon source error [ Upstream commit 4f8ef21007531c3d7cb5b826e7b2c8999b65ecae ] We enable the trace path, before activating the source. If we fail to enable the source, we must disable the path to make sure it is available for another session. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 99cbf5d5d1c1..69349b93e874 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -317,11 +317,13 @@ static void etm_event_start(struct perf_event *event, int flags) /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) - goto fail_end_stop; + goto fail_disable_path; out: return; +fail_disable_path: + coresight_disable_path(path); fail_end_stop: perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); perf_aux_output_end(handle, 0); From ff39cce5c3e50b58443f44d9501f1c2b1f1cf8d3 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Thu, 20 Sep 2018 13:18:00 -0600 Subject: [PATCH 0766/3715] coresight: etm4x: Configure EL2 exception level when kernel is running in HYP [ Upstream commit b860801e3237ec4c74cf8de0be4816996757ae5c ] For non-VHE systems host kernel runs at EL1 and jumps to EL2 whenever hypervisor code should be executed. In this case ETM4x driver must restrict configuration to EL1 when it setups kernel tracing. However, there is no separate hypervisor privilege level when VHE is enabled, the host kernel runs at EL2. This patch fixes configuration of TRCACATRn register for VHE systems so that ETM_EXLEVEL_NS_HYP bit is used instead of ETM_EXLEVEL_NS_OS to on/off kernel tracing. At the same time, it moves common code to new helper. Signed-off-by: Tomasz Nowicki Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm4x.c | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index b0141ba7b741..fb392688281b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "coresight-etm4x.h" #include "coresight-etm-perf.h" @@ -623,7 +624,7 @@ static void etm4_set_default_config(struct etmv4_config *config) config->vinst_ctrl |= BIT(0); } -static u64 etm4_get_access_type(struct etmv4_config *config) +static u64 etm4_get_ns_access_type(struct etmv4_config *config) { u64 access_type = 0; @@ -634,17 +635,26 @@ static u64 etm4_get_access_type(struct etmv4_config *config) * Bit[13] Exception level 1 - OS * Bit[14] Exception level 2 - Hypervisor * Bit[15] Never implemented - * - * Always stay away from hypervisor mode. */ - access_type = ETM_EXLEVEL_NS_HYP; - - if (config->mode & ETM_MODE_EXCL_KERN) - access_type |= ETM_EXLEVEL_NS_OS; + if (!is_kernel_in_hyp_mode()) { + /* Stay away from hypervisor mode for non-VHE */ + access_type = ETM_EXLEVEL_NS_HYP; + if (config->mode & ETM_MODE_EXCL_KERN) + access_type |= ETM_EXLEVEL_NS_OS; + } else if (config->mode & ETM_MODE_EXCL_KERN) { + access_type = ETM_EXLEVEL_NS_HYP; + } if (config->mode & ETM_MODE_EXCL_USER) access_type |= ETM_EXLEVEL_NS_APP; + return access_type; +} + +static u64 etm4_get_access_type(struct etmv4_config *config) +{ + u64 access_type = etm4_get_ns_access_type(config); + /* * EXLEVEL_S, bits[11:8], don't trace anything happening * in secure state. @@ -898,20 +908,10 @@ void etm4_config_trace_mode(struct etmv4_config *config) addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP]; /* clear default config */ - addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS); + addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS | + ETM_EXLEVEL_NS_HYP); - /* - * EXLEVEL_NS, bits[15:12] - * The Exception levels are: - * Bit[12] Exception level 0 - Application - * Bit[13] Exception level 1 - OS - * Bit[14] Exception level 2 - Hypervisor - * Bit[15] Never implemented - */ - if (mode & ETM_MODE_EXCL_KERN) - addr_acc |= ETM_EXLEVEL_NS_OS; - else - addr_acc |= ETM_EXLEVEL_NS_APP; + addr_acc |= etm4_get_ns_access_type(config); config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc; config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc; From 3cd8af57e0f57098c340657f6870f5614c1230c7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 20 Sep 2018 13:18:02 -0600 Subject: [PATCH 0767/3715] coresight: tmc: Fix byte-address alignment for RRP [ Upstream commit e7753f3937610633a540f2be81be87531f96ff04 ] >From the comment in the code, it claims the requirement for byte-address alignment for RRP register: 'for 32-bit, 64-bit and 128-bit wide trace memory, the four LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must be 0s'. This isn't consistent with the program, the program sets five LSBs as zeros for 32/64/128-bit wide trace memory and set six LSBs zeros for 256-bit wide trace memory. After checking with the CoreSight Trace Memory Controller technical reference manual (ARM DDI 0461B, section 3.3.4 RAM Read Pointer Register), it proves the comment is right and the program does wrong setting. This patch fixes byte-address alignment for RRP by following correct definition in the technical reference manual. Cc: Mathieu Poirier Cc: Mike Leach Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e2513b786242..336194d059fe 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -442,10 +442,10 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, case TMC_MEM_INTF_WIDTH_32BITS: case TMC_MEM_INTF_WIDTH_64BITS: case TMC_MEM_INTF_WIDTH_128BITS: - mask = GENMASK(31, 5); + mask = GENMASK(31, 4); break; case TMC_MEM_INTF_WIDTH_256BITS: - mask = GENMASK(31, 6); + mask = GENMASK(31, 5); break; } From 0038e0fca9dc59569be43db00db659d6066831cc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 11 Sep 2018 10:44:03 -0700 Subject: [PATCH 0768/3715] misc: kgdbts: Fix restrict error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa0218ef733e6f247a1a3986e3eb12460064ac77 ] kgdbts current fails when compiled with restrict: drivers/misc/kgdbts.c: In function ‘configure_kgdbts’: drivers/misc/kgdbts.c:1070:2: error: ‘strcpy’ source argument is the same as destination [-Werror=restrict] strcpy(config, opt); ^~~~~~~~~~~~~~~~~~~ As the error says, config is being used in both the source and destination. Refactor the code to avoid the extra copy and put the parsing closer to the actual location. Signed-off-by: Laura Abbott Acked-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/kgdbts.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 94cbc5c98cae..05824ff6b916 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -981,6 +981,12 @@ static void kgdbts_run_tests(void) int nmi_sleep = 0; int i; + verbose = 0; + if (strstr(config, "V1")) + verbose = 1; + if (strstr(config, "V2")) + verbose = 2; + ptr = strchr(config, 'F'); if (ptr) fork_test = simple_strtol(ptr + 1, NULL, 10); @@ -1064,13 +1070,6 @@ static int kgdbts_option_setup(char *opt) return -ENOSPC; } strcpy(config, opt); - - verbose = 0; - if (strstr(config, "V1")) - verbose = 1; - if (strstr(config, "V2")) - verbose = 2; - return 0; } @@ -1082,9 +1081,6 @@ static int configure_kgdbts(void) if (!strlen(config) || isspace(config[0])) goto noconfig; - err = kgdbts_option_setup(config); - if (err) - goto noconfig; final_ack = 0; run_plant_and_detach_test(1); From 4cb9af0d93bd98fde55492885458f58fcf370b28 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 20 Sep 2018 10:29:13 +0800 Subject: [PATCH 0769/3715] misc: genwqe: should return proper error value. [ Upstream commit 02241995b004faa7d9ff628e97f24056190853f8 ] The function should return -EFAULT when copy_from_user fails. Even though the caller does not distinguish them. but we should keep backward compatibility. Signed-off-by: zhong jiang Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/genwqe/card_utils.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index cb1240985157..f55e6e822bea 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -298,7 +298,7 @@ static int genwqe_sgl_size(int num_pages) int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, void __user *user_addr, size_t user_size) { - int rc; + int ret = -ENOMEM; struct pci_dev *pci_dev = cd->pci_dev; sgl->fpage_offs = offset_in_page((unsigned long)user_addr); @@ -317,7 +317,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, if (get_order(sgl->sgl_size) > MAX_ORDER) { dev_err(&pci_dev->dev, "[%s] err: too much memory requested!\n", __func__); - return -ENOMEM; + return ret; } sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, @@ -325,7 +325,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, if (sgl->sgl == NULL) { dev_err(&pci_dev->dev, "[%s] err: no memory available!\n", __func__); - return -ENOMEM; + return ret; } /* Only use buffering on incomplete pages */ @@ -338,7 +338,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, /* Sync with user memory */ if (copy_from_user(sgl->fpage + sgl->fpage_offs, user_addr, sgl->fpage_size)) { - rc = -EFAULT; + ret = -EFAULT; goto err_out; } } @@ -351,7 +351,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, /* Sync with user memory */ if (copy_from_user(sgl->lpage, user_addr + user_size - sgl->lpage_size, sgl->lpage_size)) { - rc = -EFAULT; + ret = -EFAULT; goto err_out2; } } @@ -373,7 +373,8 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, sgl->sgl = NULL; sgl->sgl_dma_addr = 0; sgl->sgl_size = 0; - return -ENOMEM; + + return ret; } int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, From f1b10ba5739440ca51d66f0b2949cead88d2dd02 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Tue, 25 Sep 2018 13:01:27 -0600 Subject: [PATCH 0770/3715] vfio/pci: Fix potential memory leak in vfio_msi_cap_len [ Upstream commit 30ea32ab1951c80c6113f300fce2c70cd12659e4 ] Free allocated vdev->msi_perm in error path. Signed-off-by: Li Qiang Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 115a36f6f403..62023b4a373b 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) return -ENOMEM; ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); - if (ret) + if (ret) { + kfree(vdev->msi_perm); return ret; + } return len; } From d7bb792bc8ff8e96ea344f6752d7749f82e1ff46 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 25 Sep 2018 13:01:27 -0600 Subject: [PATCH 0771/3715] vfio/pci: Mask buggy SR-IOV VF INTx support [ Upstream commit db04264fe9bc0f2b62e036629f9afb530324b693 ] The SR-IOV spec requires that VFs must report zero for the INTx pin register as VFs are precluded from INTx support. It's much easier for the host kernel to understand whether a device is a VF and therefore whether a non-zero pin register value is bogus than it is to do the same in userspace. Override the INTx count for such devices and virtualize the pin register to provide a consistent view of the device to the user. As this is clearly a spec violation, warn about it to support hardware validation, but also provide a known whitelist as it doesn't do much good to continue complaining if the hardware vendor doesn't plan to fix it. Known devices with this issue: 8086:270c Tested-by: Gage Eads Reviewed-by: Ashok Raj Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci.c | 10 +++++++--- drivers/vfio/pci/vfio_pci_config.c | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 15b1cd4ef5a7..9bd3e7911af2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -433,10 +433,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; - pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin) - return 1; + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || + vdev->nointx || vdev->pdev->is_virtfn) + return 0; + + pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); + + return pin ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 62023b4a373b..423ea1f98441 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1611,6 +1611,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) return 0; } +/* + * Nag about hardware bugs, hopefully to have vendors fix them, but at least + * to collect a list of dependencies for the VF INTx pin quirk below. + */ +static const struct pci_device_id known_bogus_vf_intx_pin[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) }, + {} +}; + /* * For each device we allocate a pci_config_map that indicates the * capability occupying each dword and thus the struct perm_bits we @@ -1676,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev) if (pdev->is_virtfn) { *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); + + /* + * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register + * does not apply to VFs and VFs must implement this register + * as read-only with value zero. Userspace is not readily able + * to identify whether a device is a VF and thus that the pin + * definition on the device is bogus should it violate this + * requirement. We already virtualize the pin register for + * other purposes, so we simply need to replace the bogus value + * and consider VFs when we determine INTx IRQ count. + */ + if (vconfig[PCI_INTERRUPT_PIN] && + !pci_match_id(known_bogus_vf_intx_pin, pdev)) + pci_warn(pdev, + "Hardware bug: VF reports bogus INTx pin %d\n", + vconfig[PCI_INTERRUPT_PIN]); + + vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) From 001824a26e4cafc31bc3778fea272989fdbb0165 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 25 Sep 2018 10:56:52 +0800 Subject: [PATCH 0772/3715] scsi: libsas: always unregister the old device if going to discover new [ Upstream commit 32c850bf587f993b2620b91e5af8a64a7813f504 ] If we went into sas_rediscover_dev() the attached_sas_addr was already insured not to be zero. So it's unnecessary to check if the attached_sas_addr is zero. And although if the sas address is not changed, we always have to unregister the old device when we are going to register a new one. We cannot just leave the device there and bring up the new. Signed-off-by: Jason Yan CC: chenxiang CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 259ee0d3c3e6..7f2d00354a85 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2060,14 +2060,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) return res; } - /* delete the old link */ - if (SAS_ADDR(phy->attached_sas_addr) && - SAS_ADDR(sas_addr) != SAS_ADDR(phy->attached_sas_addr)) { - SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", - SAS_ADDR(dev->sas_addr), phy_id, - SAS_ADDR(phy->attached_sas_addr)); - sas_unregister_devs_sas_addr(dev, phy_id, last); - } + /* we always have to delete the old device when we went here */ + SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", + SAS_ADDR(dev->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr)); + sas_unregister_devs_sas_addr(dev, phy_id, last); return sas_discover_new(dev, phy_id); } From 7d2bd594292dc12dbb3f93eacf688736c0ce338b Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 25 Sep 2018 21:54:07 +0200 Subject: [PATCH 0773/3715] phy: lantiq: Fix compile warning [ Upstream commit 3a00dae006623d799266d85f28b5f76ef07d6b6c ] This local variable is unused, remove it. Fixes: dea54fbad332 ("phy: Add an USB PHY driver for the Lantiq SoCs using the RCU module") Signed-off-by: Hauke Mehrtens Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/lantiq/phy-lantiq-rcu-usb2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c index 986224fca9e9..5a180f71d8d4 100644 --- a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c +++ b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c @@ -156,7 +156,6 @@ static int ltq_rcu_usb2_of_parse(struct ltq_rcu_usb2_priv *priv, { struct device *dev = priv->dev; const __be32 *offset; - int ret; priv->reg_bits = of_device_get_match_data(dev); From cdd4bea5912cf9353f98728e671fbf319193b2f4 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 31 Aug 2018 14:42:33 +0200 Subject: [PATCH 0774/3715] ARM: dts: tegra30: fix xcvr-setup-use-fuses [ Upstream commit 564706f65cda3de52b09e51feb423a43940fe661 ] There was a dot instead of a comma. Fix this. Signed-off-by: Marcel Ziswiler Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra30.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index c3e9f1e847db..cb5b76e95813 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -840,7 +840,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <1>; nvidia,xcvr-lsrslew = <1>; nvidia,xcvr-hsslew = <32>; @@ -877,7 +877,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; nvidia,xcvr-hsslew = <32>; @@ -913,7 +913,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; nvidia,xcvr-hsslew = <32>; From 5377c07d286b7caf6aff40289800a51fd2068c29 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 31 Aug 2018 18:37:43 +0200 Subject: [PATCH 0775/3715] ARM: tegra: apalis_t30: fix mmc1 cmd pull-up [ Upstream commit 1c997fe4becdc6fcbc06e23982ceb65621e6572a ] Fix MMC1 cmd pin pull-up causing issues on carrier boards without external pull-up. Signed-off-by: Marcel Ziswiler Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra30-apalis.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra30-apalis.dtsi b/arch/arm/boot/dts/tegra30-apalis.dtsi index faa8cd2914e8..b9368d40bc6f 100644 --- a/arch/arm/boot/dts/tegra30-apalis.dtsi +++ b/arch/arm/boot/dts/tegra30-apalis.dtsi @@ -166,14 +166,14 @@ /* Apalis MMC1 */ sdmmc3_clk_pa6 { - nvidia,pins = "sdmmc3_clk_pa6", - "sdmmc3_cmd_pa7"; + nvidia,pins = "sdmmc3_clk_pa6"; nvidia,function = "sdmmc3"; nvidia,pull = ; nvidia,tristate = ; }; sdmmc3_dat0_pb7 { - nvidia,pins = "sdmmc3_dat0_pb7", + nvidia,pins = "sdmmc3_cmd_pa7", + "sdmmc3_dat0_pb7", "sdmmc3_dat1_pb6", "sdmmc3_dat2_pb5", "sdmmc3_dat3_pb4", From 6fa9a0023e331aeed0f7a94c7f719dc94240b308 Mon Sep 17 00:00:00 2001 From: Marc Dietrich Date: Thu, 2 Aug 2018 10:45:40 +0200 Subject: [PATCH 0776/3715] ARM: dts: paz00: fix wakeup gpio keycode [ Upstream commit ebea2a43fdafdbce918bd7e200b709d6c33b9f3b ] The power key is controlled solely by the EC, which only tiggeres this gpio after wakeup. Fixes immediately return to suspend after wake from LP1. Signed-off-by: Marc Dietrich Tested-by: Nicolas Chauvet Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra20-paz00.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 30436969adc0..1b8db91277b1 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -524,10 +524,10 @@ gpio-keys { compatible = "gpio-keys"; - power { - label = "Power"; + wakeup { + label = "Wakeup"; gpios = <&gpio TEGRA_GPIO(J, 7) GPIO_ACTIVE_LOW>; - linux,code = ; + linux,code = ; wakeup-source; }; }; From da4947410e512cbb9e2c6016d7598d2967f11560 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Sep 2018 17:06:29 +0800 Subject: [PATCH 0777/3715] net: smsc: fix return type of ndo_start_xmit function [ Upstream commit 6323d57f335ce1490d025cacc83fc10b07792130 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/smsc/smc911x.c | 3 ++- drivers/net/ethernet/smsc/smc91x.c | 3 ++- drivers/net/ethernet/smsc/smsc911x.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 05157442a980..42d35a87bcc9 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -514,7 +514,8 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) * now, or set the card to generates an interrupt when ready * for the packet. */ -static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); unsigned int free; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 080428762858..96ac0d3af6f5 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -638,7 +638,8 @@ done: if (!THROTTLE_TX_PKTS) * now, or set the card to generates an interrupt when ready * for the packet. */ -static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index f0afb88d7bc2..ce4bfecc26c7 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1786,7 +1786,8 @@ static int smsc911x_stop(struct net_device *dev) } /* Entry point for transmitting a packet */ -static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); unsigned int freespace; From 5c33279d8b5c5f0f7c7e8513aec6a71c0e5e7d7d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Sep 2018 17:13:05 +0800 Subject: [PATCH 0778/3715] net: faraday: fix return type of ndo_start_xmit function [ Upstream commit 0a715156656bddf4aa92d9868f850aeeb0465fd0 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftgmac100.c | 4 ++-- drivers/net/ethernet/faraday/ftmac100.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index bfda315a3f1b..a1baddcd6799 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -707,8 +707,8 @@ static bool ftgmac100_prep_tx_csum(struct sk_buff *skb, u32 *csum_vlan) return skb_checksum_help(skb) == 0; } -static int ftgmac100_hard_start_xmit(struct sk_buff *skb, - struct net_device *netdev) +static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); struct ftgmac100_txdes *txdes, *first; diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 415fd93e9930..769c627aace5 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -632,8 +632,8 @@ static void ftmac100_tx_complete(struct ftmac100 *priv) ; } -static int ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb, - dma_addr_t map) +static netdev_tx_t ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb, + dma_addr_t map) { struct net_device *netdev = priv->netdev; struct ftmac100_txdes *txdes; @@ -1013,7 +1013,8 @@ static int ftmac100_stop(struct net_device *netdev) return 0; } -static int ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ftmac100 *priv = netdev_priv(netdev); dma_addr_t map; From 471d39b9f0df0719e547e26fe651bc89338ce098 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:58 +0800 Subject: [PATCH 0779/3715] f2fs: fix to recover inode's project id during POR [ Upstream commit f4474aa6e5e901ee4af21f39f1b9115aaaaec503 ] Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O project_quota /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr -p 1 /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr -p /mnt/f2fs/file 0 -----------------N- /mnt/f2fs/file But actually, we expect the correct result is: 1 -----------------N- /mnt/f2fs/file The reason is we didn't recover inode.i_projid field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/recovery.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index adbf2600c090..87942cf2afe1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -203,6 +203,19 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mode = le16_to_cpu(raw->i_mode); i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + F2FS_I(inode)->i_projid = + make_kprojid(&init_user_ns, i_projid); + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From 34ef1373863cdadb1653f32886f25c3a8e323f6c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:03 +0800 Subject: [PATCH 0780/3715] f2fs: mark inode dirty explicitly in recover_inode() [ Upstream commit 4a1728cad6340bfbe17bd17fd158b2165cd99508 ] Mark inode dirty explicitly in the end of recover_inode() to make sure that all recoverable fields can be persisted later. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 87942cf2afe1..2eef266b656b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -227,6 +227,8 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_mark_inode_dirty_sync(inode, true); + if (file_enc_name(inode)) name = ""; else From 05b2e15620b3080787d26de7d700f5ba867d850a Mon Sep 17 00:00:00 2001 From: Justin Ernst Date: Tue, 25 Sep 2018 09:34:49 -0500 Subject: [PATCH 0781/3715] EDAC: Raise the maximum number of memory controllers [ Upstream commit 6b58859419554fb824e09cfdd73151a195473cbc ] We observe an oops in the skx_edac module during boot: EDAC MC0: Giving out device to module skx_edac controller Skylake Socket#0 IMC#0 EDAC MC1: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC2: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 ... EDAC MC13: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1 EDAC MC14: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0 EDAC MC15: Giving out device to module skx_edac controller Skylake Socket#1 IMC#1 Too many memory controllers: 16 EDAC MC: Removed device 0 for skx_edac Skylake Socket#0 IMC#0 We observe there are two memory controllers per socket, with a limit of 16. Raise the maximum number of memory controllers from 16 to 2 * MAX_NUMNODES (1024). [ bp: This is just a band-aid fix until we've sorted out the whole issue with the bus_type association and handling in EDAC and can get rid of this arbitrary limit. ] Signed-off-by: Justin Ernst Signed-off-by: Borislav Petkov Acked-by: Russ Anderson Cc: Mauro Carvalho Chehab Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20180925143449.284634-1-justin.ernst@hpe.com Signed-off-by: Sasha Levin --- include/linux/edac.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/edac.h b/include/linux/edac.h index cd75c173fd00..90f72336aea6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -17,6 +17,7 @@ #include #include #include +#include #define EDAC_DEVICE_NAME_LEN 31 @@ -667,6 +668,6 @@ struct mem_ctl_info { /* * Maximum number of memory controllers in the coherent fabric. */ -#define EDAC_MAX_MCS 16 +#define EDAC_MAX_MCS 2 * MAX_NUMNODES #endif From 3365b4bc9b56fd0dcb5346e48d423eeb39e3b32f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:26 -0500 Subject: [PATCH 0782/3715] ARM: dts: realview: Fix SPI controller node names [ Upstream commit 016add12977bcc30f77d7e48fc9a3a024cb46645 ] SPI controller nodes should be named 'spi' rather than 'ssp'. Fixing the name enables dtc SPI bus checks. Cc: Linus Walleij Signed-off-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/arm-realview-eb.dtsi | 2 +- arch/arm/boot/dts/arm-realview-pb1176.dts | 2 +- arch/arm/boot/dts/arm-realview-pb11mp.dts | 2 +- arch/arm/boot/dts/arm-realview-pbx.dtsi | 2 +- arch/arm/boot/dts/versatile-ab.dts | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/arm-realview-eb.dtsi b/arch/arm/boot/dts/arm-realview-eb.dtsi index e2e9599596e2..05379b6c1c13 100644 --- a/arch/arm/boot/dts/arm-realview-eb.dtsi +++ b/arch/arm/boot/dts/arm-realview-eb.dtsi @@ -334,7 +334,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp: ssp@1000d000 { + ssp: spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; clocks = <&sspclk>, <&pclk>; diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index c789564f2803..c1fd5615ddfe 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -343,7 +343,7 @@ clock-names = "apb_pclk"; }; - pb1176_ssp: ssp@1010b000 { + pb1176_ssp: spi@1010b000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1010b000 0x1000>; interrupt-parent = <&intc_dc1176>; diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts index 3944765ac4b0..e306f1cceb4e 100644 --- a/arch/arm/boot/dts/arm-realview-pb11mp.dts +++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts @@ -480,7 +480,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp@1000d000 { + spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; interrupt-parent = <&intc_pb11mp>; diff --git a/arch/arm/boot/dts/arm-realview-pbx.dtsi b/arch/arm/boot/dts/arm-realview-pbx.dtsi index aeb49c4bd773..2bf3958b2e6b 100644 --- a/arch/arm/boot/dts/arm-realview-pbx.dtsi +++ b/arch/arm/boot/dts/arm-realview-pbx.dtsi @@ -318,7 +318,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp: ssp@1000d000 { + ssp: spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; clocks = <&sspclk>, <&pclk>; diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts index 4a51612996bc..a9000d22b2c0 100644 --- a/arch/arm/boot/dts/versatile-ab.dts +++ b/arch/arm/boot/dts/versatile-ab.dts @@ -304,7 +304,7 @@ clock-names = "apb_pclk"; }; - ssp@101f4000 { + spi@101f4000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x101f4000 0x1000>; interrupts = <11>; From a8d3a42686e1c32e01f198e6af6af51ae03cfdb2 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 26 Sep 2018 16:50:17 -0500 Subject: [PATCH 0783/3715] firmware: dell_rbu: Make payload memory uncachable [ Upstream commit 6aecee6ad41cf97c0270f72da032c10eef025bf0 ] The dell_rbu driver takes firmware update payloads and puts them in memory so the system BIOS can find them after a reboot. This sometimes fails (though rarely), because the memory containing the payload is in the CPU cache but never gets written back to main memory before the system is rebooted (CPU cache contents are lost on reboot). With this patch, the payload memory will be changed to uncachable to ensure that the payload is actually in main memory before the system is rebooted. Signed-off-by: Stuart Hayes Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/firmware/dell_rbu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index 2f452f1f7c8a..53f27a6e2d76 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -45,6 +45,7 @@ #include #include #include +#include MODULE_AUTHOR("Abhay Salunke "); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); @@ -181,6 +182,11 @@ static int create_packet(void *data, size_t length) packet_data_temp_buf = NULL; } } + /* + * set to uncachable or it may never get written back before reboot + */ + set_memory_uc((unsigned long)packet_data_temp_buf, 1 << ordernum); + spin_lock(&rbu_data.lock); newpacket->data = packet_data_temp_buf; @@ -349,6 +355,8 @@ static void packet_empty_list(void) * to make sure there are no stale RBU packets left in memory */ memset(newpacket->data, 0, rbu_data.packetsize); + set_memory_wb((unsigned long)newpacket->data, + 1 << newpacket->ordernum); free_pages((unsigned long) newpacket->data, newpacket->ordernum); kfree(newpacket); From 191e19752460e0cabfe0a95a75a9a1ddd9b1cdd7 Mon Sep 17 00:00:00 2001 From: Balakrishna Godavarthi Date: Wed, 22 Aug 2018 17:34:11 +0530 Subject: [PATCH 0784/3715] Bluetooth: hci_serdev: clear HCI_UART_PROTO_READY to avoid closing proto races [ Upstream commit 7cf7846d27bfc9731e449857db3eec5e0e9701ba ] Clearing HCI_UART_PROTO_READY will avoid usage of proto function pointers before running the proto close function pointer. There is chance of kernel crash, due to usage of non proto close function pointers after proto close. Signed-off-by: Balakrishna Godavarthi Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_serdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 52e6d4d1608e..69c00a3db538 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -360,6 +360,7 @@ void hci_uart_unregister_device(struct hci_uart *hu) { struct hci_dev *hdev = hu->hdev; + clear_bit(HCI_UART_PROTO_READY, &hu->flags); hci_unregister_dev(hdev); hci_free_dev(hdev); From 433a53dc26330e53bded59c4ac31e274ceb8f86e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Sep 2018 13:39:22 +0300 Subject: [PATCH 0785/3715] Bluetooth: L2CAP: Detect if remote is not able to use the whole MPS [ Upstream commit a5c3021bb62b970713550db3f7fd08aa70665d7e ] If the remote is not able to fully utilize the MPS choosen recalculate the credits based on the actual amount it is sending that way it can still send packets of MTU size without credits dropping to 0. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0c2219f483d7..f63d9918b15a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6819,6 +6819,16 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) chan->sdu_len = sdu_len; chan->sdu_last_frag = skb; + /* Detect if remote is not able to use the selected MPS */ + if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) { + u16 mps_len = skb->len + L2CAP_SDULEN_SIZE; + + /* Adjust the number of credits */ + BT_DBG("chan->mps %u -> %u", chan->mps, mps_len); + chan->mps = mps_len; + l2cap_chan_le_send_credits(chan); + } + return 0; } From 6827e0b4692eaf496da6cfd1cd71dd26a8132d9c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 18 Sep 2018 22:29:50 +0000 Subject: [PATCH 0786/3715] x86/hyperv: Suppress "PCI: Fatal: No config space access function found" [ Upstream commit 2f285f46240d67060061d153786740d4df53cd78 ] A Generation-2 Linux VM on Hyper-V doesn't have the legacy PCI bus, and users always see the scary warning, which is actually harmless. Suppress it. Signed-off-by: Dexuan Cui Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: "H. Peter Anvin" Cc: KY Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: "devel@linuxdriverproject.org" Cc: Olaf Aepfle Cc: Andy Whitcroft Cc: Jason Wang Cc: Vitaly Kuznetsov Cc: Marcelo Cerri Cc: Josh Poulson Link: https://lkml.kernel.org/r/ --- arch/x86/hyperv/hv_init.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2e9d58cc371e..2653b7b25d17 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,6 +17,7 @@ * */ +#include #include #include #include @@ -101,6 +102,22 @@ static int hv_cpu_init(unsigned int cpu) return 0; } +static int __init hv_pci_init(void) +{ + int gen2vm = efi_enabled(EFI_BOOT); + + /* + * For Generation-2 VM, we exit from pci_arch_init() by returning 0. + * The purpose is to suppress the harmless warning: + * "PCI: Fatal: No config space access function found" + */ + if (gen2vm) + return 0; + + /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ + return 1; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -154,6 +171,8 @@ void hyperv_init(void) hyper_alloc_mmu(); + x86_init.pci.arch_init = hv_pci_init; + /* * Register Hyper-V specific clocksource. */ From 9f5f7e9fe2a155463cb9ab20f58c6f2ead2ed394 Mon Sep 17 00:00:00 2001 From: Christoph Manszewski Date: Mon, 17 Sep 2018 17:09:28 +0200 Subject: [PATCH 0787/3715] crypto: s5p-sss: Fix Fix argument list alignment [ Upstream commit 6c12b6ba45490eeb820fdceccf5a53f42a26799c ] Fix misalignment of continued argument list. Signed-off-by: Christoph Manszewski Reviewed-by: Krzysztof Kozlowski Acked-by: Kamil Konieczny Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/s5p-sss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index aec66159566d..9a5213cbcbe1 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -323,7 +323,7 @@ static void s5p_unset_indata(struct s5p_aes_dev *dev) } static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src, - struct scatterlist **dst) + struct scatterlist **dst) { void *pages; int len; @@ -569,7 +569,7 @@ static int s5p_set_indata_start(struct s5p_aes_dev *dev, } static int s5p_set_outdata_start(struct s5p_aes_dev *dev, - struct ablkcipher_request *req) + struct ablkcipher_request *req) { struct scatterlist *sg; int err; From 6081558fa8eaff568e44afedbf867937035283c9 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Mon, 17 Sep 2018 20:24:32 +0300 Subject: [PATCH 0788/3715] crypto: fix a memory leak in rsa-kcs1pad's encryption mode [ Upstream commit 3944f139d5592790b70bc64f197162e643a8512b ] The encryption mode of pkcs1pad never uses out_sg and out_buf, so there's no need to allocate the buffer, which presently is not even being freed. CC: Herbert Xu CC: linux-crypto@vger.kernel.org CC: "David S. Miller" Signed-off-by: Dan Aloni Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/rsa-pkcs1pad.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 407c64bdcdd9..3279b457c4ed 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); - req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); - if (!req_ctx->out_buf) { - kfree(req_ctx->in_buf); - return -ENOMEM; - } - - pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size, NULL); - akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_encrypt_sign_complete_cb, req); From a13fef816458140e4ace805d757fc445429d1393 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 29 May 2018 10:04:16 +0300 Subject: [PATCH 0789/3715] iwlwifi: dbg: don't crash if the firmware crashes in the middle of a debug dump [ Upstream commit 79f25b10c9da3dbc953e47033d0494e51580ac3b ] We can dump data from the firmware either when it crashes, or when the firmware is alive. Not all the data is available if the firmware is running (like the Tx / Rx FIFOs which are available only when the firmware is halted), so we first check that the firmware is alive to compute the required size for the dump and then fill the buffer with the data. When we allocate the buffer, we test the STATUS_FW_ERROR bit to check if the firmware is alive or not. This bit can be changed during the course of the dump since it is modified in the interrupt handler. We hit a case where we allocate the buffer while the firmware is sill working, and while we start to fill the buffer, the firmware crashes. Then we test STATUS_FW_ERROR again and decide to fill the buffer with data like the FIFOs even if no room was allocated for this data in the buffer. This means that we overflow the buffer that was allocated leading to memory corruption. To fix this, test the STATUS_FW_ERROR bit only once and rely on local variables to check if we should dump fifos or other firmware components. Fixes: 04fd2c28226f ("iwlwifi: mvm: add rxf and txf to dump data") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index e72c0b825420..4650b9e5da2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -775,7 +775,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) dump_data = iwl_fw_error_next_data(dump_data); /* We only dump the FIFOs if the FW is in error state */ - if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) { + if (fifo_data_len) { iwl_fw_dump_fifos(fwrt, &dump_data); if (radio_len) iwl_read_radio_regs(fwrt, &dump_data); From 0391334d04bc6c79ea155cc43465c319075bcde4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 May 2018 14:13:18 +0200 Subject: [PATCH 0790/3715] iwlwifi: api: annotate compressed BA notif array sizes [ Upstream commit 6f68cc367ab6578a33cca21b6056804165621f00 ] Annotate the compressed BA notification array sizes and make both of them 0-length since the length of 1 is just confusing - it may be different than that and the offset to the second one needs to be calculated in the C code anyhow. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/api/tx.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h index 14ad9fb895f9..a9c8352a7641 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h @@ -722,9 +722,9 @@ enum iwl_mvm_ba_resp_flags { * @tfd_cnt: number of TFD-Q elements * @ra_tid_cnt: number of RATID-Q elements * @tfd: array of TFD queue status updates. See &iwl_mvm_compressed_ba_tfd - * for details. + * for details. Length in @tfd_cnt. * @ra_tid: array of RA-TID queue status updates. For debug purposes only. See - * &iwl_mvm_compressed_ba_ratid for more details. + * &iwl_mvm_compressed_ba_ratid for more details. Length in @ra_tid_cnt. */ struct iwl_mvm_compressed_ba_notif { __le32 flags; @@ -741,7 +741,7 @@ struct iwl_mvm_compressed_ba_notif { __le32 tx_rate; __le16 tfd_cnt; __le16 ra_tid_cnt; - struct iwl_mvm_compressed_ba_tfd tfd[1]; + struct iwl_mvm_compressed_ba_tfd tfd[0]; struct iwl_mvm_compressed_ba_ratid ra_tid[0]; } __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */ From 423d05a3fc5f1d0212eca3a2b7344f900fb9857f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 11 Jun 2018 14:05:11 +0300 Subject: [PATCH 0791/3715] iwlwifi: mvm: Allow TKIP for AP mode [ Upstream commit 6f3df8c1192c873a6ad9a76328920f6f85af90a8 ] Support for setting keys for TKIP cipher suite was mistakenly removed for AP mode. Fix this. Fixes: 85aeb58cec1a ("iwlwifi: mvm: Enable security on new TX API") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index d31d84eebc5d..d16e2ed4419f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3067,10 +3067,6 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm, switch (keyconf->cipher) { case WLAN_CIPHER_SUITE_TKIP: - if (vif->type == NL80211_IFTYPE_AP) { - ret = -EINVAL; - break; - } addr = iwl_mvm_get_mac_addr(mvm, vif, sta); /* get phase 1 key from mac80211 */ ieee80211_get_key_rx_seq(keyconf, 0, &seq); From 9c71829435fc21a454786599c5a3a80cf3a3ae8e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0792/3715] scsi: NCR5380: Clear all unissued commands on host reset [ Upstream commit 1aeeeed7f03c576f096eede7b0384f99a98f588c ] When doing a host reset we should be clearing all outstanding commands, not just the command triggering the reset. [mkp: adjusted Hannes' SoB address] Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Cc: Ondrey Zary Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 8caa51797511..9131d30b2da7 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2309,7 +2309,7 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) spin_lock_irqsave(&hostdata->lock, flags); #if (NDEBUG & NDEBUG_ANY) - scmd_printk(KERN_INFO, cmd, __func__); + shost_printk(KERN_INFO, instance, __func__); #endif NCR5380_dprint(NDEBUG_ANY, instance); NCR5380_dprint_phase(NDEBUG_ANY, instance); @@ -2327,10 +2327,13 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) * commands! */ - if (list_del_cmd(&hostdata->unissued, cmd)) { + list_for_each_entry(ncmd, &hostdata->unissued, list) { + struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd); + cmd->result = DID_RESET << 16; cmd->scsi_done(cmd); } + INIT_LIST_HEAD(&hostdata->unissued); if (hostdata->selecting) { hostdata->selecting->result = DID_RESET << 16; From 9a24e85568a3635f54269f9ddfb6eb0b19f1ceb7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0793/3715] scsi: NCR5380: Have NCR5380_select() return a bool [ Upstream commit dad8261e643849ea134c7cd5c8e794e31d93b9eb ] The return value is taken to mean "retry" or "don't retry". Change it to bool to improve readability. Fix related comments. No functional change. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 46 +++++++++++++++++++----------------------- drivers/scsi/NCR5380.h | 2 +- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 9131d30b2da7..60e051c249a6 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -904,20 +904,16 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) return IRQ_RETVAL(handled); } -/* - * Function : int NCR5380_select(struct Scsi_Host *instance, - * struct scsi_cmnd *cmd) +/** + * NCR5380_select - attempt arbitration and selection for a given command + * @instance: the Scsi_Host instance + * @cmd: the scsi_cmnd to execute * - * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command, - * including ARBITRATION, SELECTION, and initial message out for - * IDENTIFY and queue messages. + * This routine establishes an I_T_L nexus for a SCSI command. This involves + * ARBITRATION, SELECTION and MESSAGE OUT phases and an IDENTIFY message. * - * Inputs : instance - instantiation of the 5380 driver on which this - * target lives, cmd - SCSI command to execute. - * - * Returns cmd if selection failed but should be retried, - * NULL if selection failed and should not be retried, or - * NULL if selection succeeded (hostdata->connected == cmd). + * Returns true if the operation should be retried. + * Returns false if it should not be retried. * * Side effects : * If bus busy, arbitration failed, etc, NCR5380_select() will exit @@ -925,16 +921,15 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) * SELECT_ENABLE will be set appropriately, the NCR5380 * will cease to drive any SCSI bus signals. * - * If successful : I_T_L or I_T_L_Q nexus will be established, - * instance->connected will be set to cmd. + * If successful : the I_T_L nexus will be established, and + * hostdata->connected will be set to cmd. * SELECT interrupt will be disabled. * * If failed (no target) : cmd->scsi_done() will be called, and the * cmd->result host byte set to DID_BAD_TARGET. */ -static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, - struct scsi_cmnd *cmd) +static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) __releases(&hostdata->lock) __acquires(&hostdata->lock) { struct NCR5380_hostdata *hostdata = shost_priv(instance); @@ -942,6 +937,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, unsigned char *data; int len; int err; + bool ret = true; NCR5380_dprint(NDEBUG_ARBITRATION, instance); dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n", @@ -950,7 +946,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, /* * Arbitration and selection phases are slow and involve dropping the * lock, so we have to watch out for EH. An exception handler may - * change 'selecting' to NULL. This function will then return NULL + * change 'selecting' to NULL. This function will then return false * so that the caller will forget about 'cmd'. (During information * transfer phases, EH may change 'connected' to NULL.) */ @@ -986,7 +982,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, if (!hostdata->selecting) { /* Command was aborted */ NCR5380_write(MODE_REG, MR_BASE); - return NULL; + return false; } if (err < 0) { NCR5380_write(MODE_REG, MR_BASE); @@ -1035,7 +1031,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, if (!hostdata->selecting) { NCR5380_write(MODE_REG, MR_BASE); NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); - return NULL; + return false; } dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n"); @@ -1118,13 +1114,13 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, /* Can't touch cmd if it has been reclaimed by the scsi ML */ if (!hostdata->selecting) - return NULL; + return false; cmd->result = DID_BAD_TARGET << 16; complete_cmd(instance, cmd); dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n"); - cmd = NULL; + ret = false; goto out; } @@ -1156,7 +1152,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, } if (!hostdata->selecting) { do_abort(instance); - return NULL; + return false; } dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n", @@ -1172,7 +1168,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); dsprintk(NDEBUG_SELECTION, instance, "IDENTIFY message transfer failed\n"); - cmd = NULL; + ret = false; goto out; } @@ -1187,13 +1183,13 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, initialize_SCp(cmd); - cmd = NULL; + ret = false; out: if (!hostdata->selecting) return NULL; hostdata->selecting = NULL; - return cmd; + return ret; } /* diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h index 8a6d002e6789..5935fd6d1a05 100644 --- a/drivers/scsi/NCR5380.h +++ b/drivers/scsi/NCR5380.h @@ -275,7 +275,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id); static void NCR5380_main(struct work_struct *work); static const char *NCR5380_info(struct Scsi_Host *instance); static void NCR5380_reselect(struct Scsi_Host *instance); -static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *); +static bool NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *); static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data); static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data); static int NCR5380_poll_politely2(struct NCR5380_hostdata *, From 7717d068005234836bc94b4d0b721e5ad249b06b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0794/3715] scsi: NCR5380: Withhold disconnect privilege for REQUEST SENSE [ Upstream commit 7c8ed783c2faa1e3f741844ffac41340338ea0f4 ] This is mostly needed because an AztecMonster II target has been observed disconnecting REQUEST SENSE commands and then failing to reselect properly. Suggested-by: Michael Schmitz Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 60e051c249a6..5f26aa2875bd 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -938,6 +938,8 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) int len; int err; bool ret = true; + bool can_disconnect = instance->irq != NO_IRQ && + cmd->cmnd[0] != REQUEST_SENSE; NCR5380_dprint(NDEBUG_ARBITRATION, instance); dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n", @@ -1157,7 +1159,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n", scmd_id(cmd)); - tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun); + tmp[0] = IDENTIFY(can_disconnect, cmd->device->lun); len = 1; data = tmp; From f41f9548e8a8ea1d4d00eace7f268c28a904eac5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0795/3715] scsi: NCR5380: Use DRIVER_SENSE to indicate valid sense data [ Upstream commit 070356513963be6196142acff56acc8359069fa1 ] When sense data is valid, call set_driver_byte(cmd, DRIVER_SENSE). Otherwise some callers of scsi_execute() will ignore sense data. Don't set DID_ERROR or DID_RESET just because sense data is missing. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 5f26aa2875bd..00397e89d652 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -513,11 +513,12 @@ static void complete_cmd(struct Scsi_Host *instance, if (hostdata->sensing == cmd) { /* Autosense processing ends here */ - if ((cmd->result & 0xff) != SAM_STAT_GOOD) { + if (status_byte(cmd->result) != GOOD) { scsi_eh_restore_cmnd(cmd, &hostdata->ses); - set_host_byte(cmd, DID_ERROR); - } else + } else { scsi_eh_restore_cmnd(cmd, &hostdata->ses); + set_driver_byte(cmd, DRIVER_SENSE); + } hostdata->sensing = NULL; } @@ -2271,7 +2272,6 @@ static int NCR5380_abort(struct scsi_cmnd *cmd) if (list_del_cmd(&hostdata->autosense, cmd)) { dsprintk(NDEBUG_ABORT, instance, "abort: removed %p from sense queue\n", cmd); - set_host_byte(cmd, DID_ERROR); complete_cmd(instance, cmd); } @@ -2350,7 +2350,6 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) list_for_each_entry(ncmd, &hostdata->autosense, list) { struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd); - set_host_byte(cmd, DID_RESET); cmd->scsi_done(cmd); } INIT_LIST_HEAD(&hostdata->autosense); From 1975b27cad1a7d6b7ea475d43e7718da88837ca0 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0796/3715] scsi: NCR5380: Check for invalid reselection target [ Upstream commit 7ef55f6744c45e3d7c85a3f74ada39b67ac741dd ] The X3T9.2 specification (draft) says, under "6.1.4.1 RESELECTION", that "the initiator shall not respond to a RESELECTION phase if other than two SCSI ID bits are on the DATA BUS." This issue (too many bits set) has been observed in the wild, so add a check. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 00397e89d652..a290ec632248 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2014,6 +2014,11 @@ static void NCR5380_reselect(struct Scsi_Host *instance) NCR5380_write(MODE_REG, MR_BASE); target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask); + if (!target_mask || target_mask & (target_mask - 1)) { + shost_printk(KERN_WARNING, instance, + "reselect: bad target_mask 0x%02x\n", target_mask); + return; + } dsprintk(NDEBUG_RESELECTION, instance, "reselect\n"); From d0d7a75816be2fb2b70619c9542f5c0a9e70e058 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0797/3715] scsi: NCR5380: Don't clear busy flag when abort fails [ Upstream commit 45ddc1b24806cc8f1a09f23dd4e7b6e4a8ae36e1 ] When NCR5380_abort() returns FAILED, the driver forgets that the target is still busy. Hence, further commands may be sent to the target, which may fail during selection and produce the error message, "reselection after won arbitration?". Prevent this by leaving the busy flag set when NCR5380_abort() fails. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index a290ec632248..b13290b3e5d3 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -522,8 +522,6 @@ static void complete_cmd(struct Scsi_Host *instance, hostdata->sensing = NULL; } - hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); - cmd->scsi_done(cmd); } @@ -1711,6 +1709,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); return; #endif case PHASE_DATAIN: @@ -1793,6 +1792,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) cmd, scmd_id(cmd), cmd->device->lun); hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); cmd->result &= ~0xffff; cmd->result |= cmd->SCp.Status; @@ -1952,6 +1952,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) NCR5380_transfer_pio(instance, &phase, &len, &data); if (msgout == ABORT) { hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); maybe_release_dma_irq(instance); @@ -2106,13 +2107,16 @@ static void NCR5380_reselect(struct Scsi_Host *instance) dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance, "reselect: removed %p from disconnected queue\n", tmp); } else { + int target = ffs(target_mask) - 1; + shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n", target_mask, lun); /* * Since we have an established nexus that we can't do anything * with, we must abort it. */ - do_abort(instance); + if (do_abort(instance) == 0) + hostdata->busy[target] &= ~(1 << lun); return; } @@ -2283,8 +2287,10 @@ static int NCR5380_abort(struct scsi_cmnd *cmd) out: if (result == FAILED) dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd); - else + else { + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd); + } queue_work(hostdata->work_q, &hostdata->main_task); maybe_release_dma_irq(instance); From 7f4e28afab0d11132abb9696fe552533f511add6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0798/3715] scsi: NCR5380: Don't call dsprintk() following reselection interrupt [ Upstream commit 08267216b3f8aa5adc204bdccf8deb72c1cd7665 ] The X3T9.2 specification (draft) says, under "6.1.4.1 RESELECTION", ... The reselected initiator shall then assert the BSY signal within a selection abort time of its most recent detection of being reselected; this is required for correct operation of the time-out procedure. The selection abort time is only 200 us which may be insufficient time for a printk() call. Move the diagnostics to the error paths. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index b13290b3e5d3..48f123601f57 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2021,8 +2021,6 @@ static void NCR5380_reselect(struct Scsi_Host *instance) return; } - dsprintk(NDEBUG_RESELECTION, instance, "reselect\n"); - /* * At this point, we have detected that our SCSI ID is on the bus, * SEL is true and BSY was false for at least one bus settle delay @@ -2035,6 +2033,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance) NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY); if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) { + shost_printk(KERN_ERR, instance, "reselect: !SEL timeout\n"); NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); return; } @@ -2046,6 +2045,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance) if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) { + shost_printk(KERN_ERR, instance, "reselect: REQ timeout\n"); do_abort(instance); return; } From 7459de2fceee874ba1ba2ba7d825290d9d141a54 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0799/3715] scsi: NCR5380: Handle BUS FREE during reselection [ Upstream commit ca694afad707cb3ae2fdef3b28454444d9ac726e ] The X3T9.2 specification (draft) says, under "6.1.4.2 RESELECTION time-out procedure", that a target may assert RST or go to BUS FREE phase if the initiator does not respond within 200 us. Something like this has been observed with AztecMonster II target. When it happens, all we can do is wait for the target to try again. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 48f123601f57..a85c5155fcf4 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2045,6 +2045,9 @@ static void NCR5380_reselect(struct Scsi_Host *instance) if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) { + if ((NCR5380_read(STATUS_REG) & (SR_BSY | SR_SEL)) == 0) + /* BUS FREE phase */ + return; shost_printk(KERN_ERR, instance, "reselect: REQ timeout\n"); do_abort(instance); return; From ce837affeb34e61cc808cf23f93a11b958dd2199 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 27 Sep 2018 11:17:11 +1000 Subject: [PATCH 0800/3715] scsi: NCR5380: Check for bus reset [ Upstream commit 6b0e87a6aafe12d75c2bea6fc8e49e88b98b3083 ] The SR_RST bit isn't latched. Hence, detecting a bus reset isn't reliable. When it is detected, the right thing to do is to drop all connected and disconnected commands. The code for that is already present so refactor it and call it when SR_RST is set. Tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 74 +++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index a85c5155fcf4..21377ac71168 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -131,6 +131,7 @@ static int do_abort(struct Scsi_Host *); static void do_reset(struct Scsi_Host *); +static void bus_reset_cleanup(struct Scsi_Host *); /** * initialize_SCp - init the scsi pointer field @@ -885,7 +886,14 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) /* Probably Bus Reset */ NCR5380_read(RESET_PARITY_INTERRUPT_REG); - dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n"); + if (sr & SR_RST) { + /* Certainly Bus Reset */ + shost_printk(KERN_WARNING, instance, + "bus reset interrupt\n"); + bus_reset_cleanup(instance); + } else { + dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n"); + } #ifdef SUN3_SCSI_VME dregs->csr |= CSR_DMA_ENABLE; #endif @@ -2303,31 +2311,12 @@ out: } -/** - * NCR5380_host_reset - reset the SCSI host - * @cmd: SCSI command undergoing EH - * - * Returns SUCCESS - */ - -static int NCR5380_host_reset(struct scsi_cmnd *cmd) +static void bus_reset_cleanup(struct Scsi_Host *instance) { - struct Scsi_Host *instance = cmd->device->host; struct NCR5380_hostdata *hostdata = shost_priv(instance); int i; - unsigned long flags; struct NCR5380_cmd *ncmd; - spin_lock_irqsave(&hostdata->lock, flags); - -#if (NDEBUG & NDEBUG_ANY) - shost_printk(KERN_INFO, instance, __func__); -#endif - NCR5380_dprint(NDEBUG_ANY, instance); - NCR5380_dprint_phase(NDEBUG_ANY, instance); - - do_reset(instance); - /* reset NCR registers */ NCR5380_write(MODE_REG, MR_BASE); NCR5380_write(TARGET_COMMAND_REG, 0); @@ -2339,14 +2328,6 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) * commands! */ - list_for_each_entry(ncmd, &hostdata->unissued, list) { - struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd); - - cmd->result = DID_RESET << 16; - cmd->scsi_done(cmd); - } - INIT_LIST_HEAD(&hostdata->unissued); - if (hostdata->selecting) { hostdata->selecting->result = DID_RESET << 16; complete_cmd(instance, hostdata->selecting); @@ -2380,6 +2361,41 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) queue_work(hostdata->work_q, &hostdata->main_task); maybe_release_dma_irq(instance); +} + +/** + * NCR5380_host_reset - reset the SCSI host + * @cmd: SCSI command undergoing EH + * + * Returns SUCCESS + */ + +static int NCR5380_host_reset(struct scsi_cmnd *cmd) +{ + struct Scsi_Host *instance = cmd->device->host; + struct NCR5380_hostdata *hostdata = shost_priv(instance); + unsigned long flags; + struct NCR5380_cmd *ncmd; + + spin_lock_irqsave(&hostdata->lock, flags); + +#if (NDEBUG & NDEBUG_ANY) + shost_printk(KERN_INFO, instance, __func__); +#endif + NCR5380_dprint(NDEBUG_ANY, instance); + NCR5380_dprint_phase(NDEBUG_ANY, instance); + + list_for_each_entry(ncmd, &hostdata->unissued, list) { + struct scsi_cmnd *scmd = NCR5380_to_scmd(ncmd); + + scmd->result = DID_RESET << 16; + scmd->scsi_done(scmd); + } + INIT_LIST_HEAD(&hostdata->unissued); + + do_reset(instance); + bus_reset_cleanup(instance); + spin_unlock_irqrestore(&hostdata->lock, flags); return SUCCESS; From e083cad420a148c5a25efeb517c06ebf7400c75d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:40 -0500 Subject: [PATCH 0801/3715] arm64: dts: amd: Fix SPI bus warnings [ Upstream commit e9f0878c4b2004ac19581274c1ae4c61ae3ca70e ] dtc has new checks for SPI buses. Fix the warnings in node names. arch/arm64/boot/dts/amd/amd-overdrive.dtb: Warning (spi_bus_bridge): /smb/ssp@e1030000: node name for SPI buses should be 'spi' arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dtb: Warning (spi_bus_bridge): /smb/ssp@e1030000: node name for SPI buses should be 'spi' arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dtb: Warning (spi_bus_bridge): /smb/ssp@e1030000: node name for SPI buses should be 'spi' Cc: Brijesh Singh Cc: Suravee Suthikulpanit Cc: Tom Lendacky Signed-off-by: Rob Herring Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 125f4deb52fe..b664e7af74eb 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -107,7 +107,7 @@ clock-names = "uartclk", "apb_pclk"; }; - spi0: ssp@e1020000 { + spi0: spi@e1020000 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; reg = <0 0xe1020000 0 0x1000>; @@ -117,7 +117,7 @@ clock-names = "apb_pclk"; }; - spi1: ssp@e1030000 { + spi1: spi@e1030000 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; reg = <0 0xe1030000 0 0x1000>; From a583b37a088b60668ca5d03484348cbf1f985280 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:44 -0500 Subject: [PATCH 0802/3715] arm64: dts: lg: Fix SPI controller node names [ Upstream commit 09bae3b64cb580c95329bd8d16f08f0a5cb81ec9 ] SPI controller nodes should be named 'spi' rather than 'ssp'. Fixing the name enables dtc SPI bus checks. Cc: Chanho Min Signed-off-by: Rob Herring Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/lg/lg1312.dtsi | 4 ++-- arch/arm64/boot/dts/lg/lg1313.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 860c8fb10795..4bde7b6f2b11 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -168,14 +168,14 @@ clock-names = "apb_pclk"; status="disabled"; }; - spi0: ssp@fe800000 { + spi0: spi@fe800000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe800000 0x1000>; interrupts = ; clocks = <&clk_bus>; clock-names = "apb_pclk"; }; - spi1: ssp@fe900000 { + spi1: spi@fe900000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe900000 0x1000>; interrupts = ; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 1887af654a7d..16ced1ff1ad3 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -168,14 +168,14 @@ clock-names = "apb_pclk"; status="disabled"; }; - spi0: ssp@fe800000 { + spi0: spi@fe800000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe800000 0x1000>; interrupts = ; clocks = <&clk_bus>; clock-names = "apb_pclk"; }; - spi1: ssp@fe900000 { + spi1: spi@fe900000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe900000 0x1000>; interrupts = ; From 52164377ed2be86665479876532f66368d9d96f8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Sep 2018 13:12:33 -0500 Subject: [PATCH 0803/3715] ARM: dts: lpc32xx: Fix SPI controller node names [ Upstream commit 11236ef582b8d66290bb3b3710e03ca1d85d8ad8 ] SPI controller nodes should be named 'spi' rather than 'ssp'. Fixing the name enables dtc SPI bus checks. Cc: Vladimir Zapolskiy Cc: Sylvain Lemieux Signed-off-by: Rob Herring Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index f22a33a01819..d077bd2b9583 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -179,7 +179,7 @@ * ssp0 and spi1 are shared pins; * enable one in your board dts, as needed. */ - ssp0: ssp@20084000 { + ssp0: spi@20084000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x20084000 0x1000>; interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; @@ -199,7 +199,7 @@ * ssp1 and spi2 are shared pins; * enable one in your board dts, as needed. */ - ssp1: ssp@2008c000 { + ssp1: spi@2008c000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x2008c000 0x1000>; interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; From 23acf86994d35b750f8879761796f449de31c0cd Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 24 Sep 2018 16:25:34 +0200 Subject: [PATCH 0804/3715] rtc: armada38x: fix possible race condition [ Upstream commit 7d61cbb945a753af08e247b5f10bdd5dbb8d6c80 ] The IRQ is requested before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before requesting the IRQ. Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-armada38x.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 21f355c37eab..10b5c8549039 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -390,7 +390,6 @@ MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table); static __init int armada38x_rtc_probe(struct platform_device *pdev) { - const struct rtc_class_ops *ops; struct resource *res; struct armada38x_rtc *rtc; const struct of_device_id *match; @@ -427,6 +426,11 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "no irq\n"); return rtc->irq; } + + rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc->rtc_dev)) + return PTR_ERR(rtc->rtc_dev); + if (devm_request_irq(&pdev->dev, rtc->irq, armada38x_rtc_alarm_irq, 0, pdev->name, rtc) < 0) { dev_warn(&pdev->dev, "Interrupt not available.\n"); @@ -436,28 +440,24 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) if (rtc->irq != -1) { device_init_wakeup(&pdev->dev, 1); - ops = &armada38x_rtc_ops; + rtc->rtc_dev->ops = &armada38x_rtc_ops; } else { /* * If there is no interrupt available then we can't * use the alarm */ - ops = &armada38x_rtc_ops_noirq; + rtc->rtc_dev->ops = &armada38x_rtc_ops_noirq; } rtc->data = (struct armada38x_rtc_data *)match->data; - /* Update RTC-MBUS bridge timing parameters */ rtc->data->update_mbus_timing(rtc); - rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name, - ops, THIS_MODULE); - if (IS_ERR(rtc->rtc_dev)) { - ret = PTR_ERR(rtc->rtc_dev); + ret = rtc_register_device(rtc->rtc_dev); + if (ret) dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - return ret; - } - return 0; + + return ret; } #ifdef CONFIG_PM_SLEEP From 85562d7a4fa73c35bcbe64565a801b0ee7230708 Mon Sep 17 00:00:00 2001 From: Tan Hu Date: Fri, 7 Sep 2018 16:33:33 +0800 Subject: [PATCH 0805/3715] netfilter: masquerade: don't flush all conntracks if only one address deleted on device [ Upstream commit 097f95d319f817e651bd51f8846aced92a55a6a1 ] We configured iptables as below, which only allowed incoming data on established connections: iptables -t mangle -A PREROUTING -m state --state ESTABLISHED -j ACCEPT iptables -t mangle -P PREROUTING DROP When deleting a secondary address, current masquerade implements would flush all conntracks on this device. All the established connections on primary address also be deleted, then subsequent incoming data on the connections would be dropped wrongly because it was identified as NEW connection. So when an address was delete, it should only flush connections related with the address. Signed-off-by: Tan Hu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 22 ++++++++++++++++++--- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 19 +++++++++++++++--- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index 0c366aad89cb..b531fe204323 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -105,12 +105,26 @@ static int masq_device_event(struct notifier_block *this, return NOTIFY_DONE; } +static int inet_cmp(struct nf_conn *ct, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)dev->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ifa->ifa_address == tuple->dst.u3.ip; +} + static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct netdev_notifier_info info; + struct net *net = dev_net(idev->dev); /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have @@ -120,8 +134,10 @@ static int masq_inet_event(struct notifier_block *this, if (idev->dead) return NOTIFY_DONE; - netdev_notifier_info_init(&info, idev->dev); - return masq_device_event(this, event, &info); + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + + return NOTIFY_DONE; } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 98f61fcb9108..b0f3745d1bee 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -88,18 +88,30 @@ static struct notifier_block masq_dev_notifier = { struct masq_dev_work { struct work_struct work; struct net *net; + struct in6_addr addr; int ifindex; }; +static int inet_cmp(struct nf_conn *ct, void *work) +{ + struct masq_dev_work *w = (struct masq_dev_work *)work; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)w->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); +} + static void iterate_cleanup_work(struct work_struct *work) { struct masq_dev_work *w; - long index; w = container_of(work, struct masq_dev_work, work); - index = w->ifindex; - nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); put_net(w->net); kfree(w); @@ -148,6 +160,7 @@ static int masq_inet_event(struct notifier_block *this, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = dev->ifindex; w->net = net; + w->addr = ifa->addr; schedule_work(&w->work); return NOTIFY_DONE; From 9d7681036dd09c5e4c8699f413188a9ab8b53efb Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 20 Sep 2018 19:13:32 +0300 Subject: [PATCH 0806/3715] usb: xhci-mtk: fix ISOC error when interval is zero [ Upstream commit 87173acc0d8f0987bda8827da35fff67f52ad15d ] If the interval equal zero, needn't round up to power of two for the number of packets in each ESIT, so fix it. Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mtk-sch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 6e7ddf6cafae..defaf950e631 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -122,7 +122,9 @@ static void setup_sch_info(struct usb_device *udev, } if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) { - if (esit_pkts <= sch_ep->esit) + if (sch_ep->esit == 1) + sch_ep->pkts = esit_pkts; + else if (esit_pkts <= sch_ep->esit) sch_ep->pkts = 1; else sch_ep->pkts = roundup_pow_of_two(esit_pkts) From 2b149bb378bc34545549d2e8c35f2bbdf1debc92 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 27 Aug 2018 18:29:29 +0300 Subject: [PATCH 0807/3715] fuse: use READ_ONCE on congestion_threshold and max_background [ Upstream commit 2a23f2b8adbe4bd584f936f7ac17a99750eed9d7 ] Since they are of unsigned int type, it's allowed to read them unlocked during reporting to userspace. Let's underline this fact with READ_ONCE() macroses. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 5be0339dcceb..42bed87dd5ea 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -107,7 +107,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file, if (!fc) return 0; - val = fc->max_background; + val = READ_ONCE(fc->max_background); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); @@ -144,7 +144,7 @@ static ssize_t fuse_conn_congestion_threshold_read(struct file *file, if (!fc) return 0; - val = fc->congestion_threshold; + val = READ_ONCE(fc->congestion_threshold); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); From 24adb0e7d14b59266993b548de9ea2b60ea85570 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 26 Sep 2018 09:44:18 +0000 Subject: [PATCH 0808/3715] IB/iser: Fix possible NULL deref at iser_inv_desc() [ Upstream commit 65f07f5a09dacf3b60619f196f096ea3671a5eda ] In case target remote invalidates bogus rkey and signature is not used, pi_ctx is NULL deref. The commit also fails the connection on bogus remote invalidation. Fixes: 59caaed7a72a ("IB/iser: Support the remote invalidation exception") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/iser/iser_initiator.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 2a07692007bd..a126750b65a9 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -592,13 +592,19 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) ib_conn->post_recv_buf_count--; } -static inline void +static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) { - if (likely(rkey == desc->rsc.mr->rkey)) + if (likely(rkey == desc->rsc.mr->rkey)) { desc->rsc.mr_valid = 0; - else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) + } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) { desc->pi_ctx->sig_mr_valid = 0; + } else { + iser_err("Bogus remote invalidation for rkey %#x\n", rkey); + return -EINVAL; + } + + return 0; } static int @@ -626,12 +632,14 @@ iser_check_remote_inv(struct iser_conn *iser_conn, if (iser_task->dir[ISER_DIR_IN]) { desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; - iser_inv_desc(desc, rkey); + if (unlikely(iser_inv_desc(desc, rkey))) + return -EINVAL; } if (iser_task->dir[ISER_DIR_OUT]) { desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; - iser_inv_desc(desc, rkey); + if (unlikely(iser_inv_desc(desc, rkey))) + return -EINVAL; } } else { iser_err("failed to get task for itt=%d\n", hdr->itt); From 7fdedf0c8d8eb93390c94533057d0666a10efd9e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Sep 2018 15:14:10 +0200 Subject: [PATCH 0809/3715] net: phy: mdio-bcm-unimac: mark PM functions as __maybe_unused [ Upstream commit 9b97123a584f60a5bca5a2663485768a1f6cd0a4 ] The newly added runtime-pm support causes a harmless warning when CONFIG_PM is disabled: drivers/net/phy/mdio-bcm-unimac.c:330:12: error: 'unimac_mdio_resume' defined but not used [-Werror=unused-function] static int unimac_mdio_resume(struct device *d) drivers/net/phy/mdio-bcm-unimac.c:321:12: error: 'unimac_mdio_suspend' defined but not used [-Werror=unused-function] static int unimac_mdio_suspend(struct device *d) Marking the functions as __maybe_unused is the easiest workaround and avoids adding #ifdef checks. Fixes: b78ac6ecd1b6 ("net: phy: mdio-bcm-unimac: Allow configuring MDIO clock divider") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mdio-bcm-unimac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index f9d98a6e67bc..52703bbd4d66 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -316,7 +316,7 @@ static int unimac_mdio_remove(struct platform_device *pdev) return 0; } -static int unimac_mdio_suspend(struct device *d) +static int __maybe_unused unimac_mdio_suspend(struct device *d) { struct unimac_mdio_priv *priv = dev_get_drvdata(d); @@ -325,7 +325,7 @@ static int unimac_mdio_suspend(struct device *d) return 0; } -static int unimac_mdio_resume(struct device *d) +static int __maybe_unused unimac_mdio_resume(struct device *d) { struct unimac_mdio_priv *priv = dev_get_drvdata(d); int ret; From 988f701a805bf980e70a557894d26564e10fc1a6 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Mon, 18 Nov 2019 11:26:07 +0800 Subject: [PATCH 0810/3715] memfd: Use radix_tree_deref_slot_protected to avoid the warning. The commit 391d4ee568b5 ("memfd: Fix locking when tagging pins") introduces the following warning messages. *WARNING: suspicious RCU usage in memfd_wait_for_pins* It is because we still use radix_tree_deref_slot without read_rcu_lock. We should use radix_tree_deref_slot_protected instead in the case. Cc: stable@vger.kernel.org Fixes: 391d4ee568b5 ("memfd: Fix locking when tagging pins") Signed-off-by: zhong jiang Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5b2cc9f9b1f1..e55aa460a2c0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2664,7 +2664,7 @@ static void shmem_tag_pins(struct address_space *mapping) spin_lock_irq(&mapping->tree_lock); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - page = radix_tree_deref_slot(slot); + page = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); if (!page || radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { slot = radix_tree_iter_retry(&iter); From e979aaf21323ed95f4244b039a3aab9f9bf459dd Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 13 Nov 2019 12:08:01 +0200 Subject: [PATCH 0811/3715] slcan: Fix memory leak in error path commit ed50e1600b4483c049ce76e6bd3b665a6a9300ed upstream. This patch is fixing memory leak reported by Syzkaller: BUG: memory leak unreferenced object 0xffff888067f65500 (size 4096): comm "syz-executor043", pid 454, jiffies 4294759719 (age 11.930s) hex dump (first 32 bytes): 73 6c 63 61 6e 30 00 00 00 00 00 00 00 00 00 00 slcan0.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a06eec0d>] __kmalloc+0x18b/0x2c0 [<0000000083306e66>] kvmalloc_node+0x3a/0xc0 [<000000006ac27f87>] alloc_netdev_mqs+0x17a/0x1080 [<0000000061a996c9>] slcan_open+0x3ae/0x9a0 [<000000001226f0f9>] tty_ldisc_open.isra.1+0x76/0xc0 [<0000000019289631>] tty_set_ldisc+0x28c/0x5f0 [<000000004de5a617>] tty_ioctl+0x48d/0x1590 [<00000000daef496f>] do_vfs_ioctl+0x1c7/0x1510 [<0000000059068dbc>] ksys_ioctl+0x99/0xb0 [<000000009a6eb334>] __x64_sys_ioctl+0x78/0xb0 [<0000000053d0332e>] do_syscall_64+0x16f/0x580 [<0000000021b83b99>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000008ea75434>] 0xffffffffffffffff Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: Marc Kleine-Budde Cc: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 5d067c1b987f..49427f44dc5b 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -613,6 +613,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + free_netdev(sl->dev); err_exit: rtnl_unlock(); From f56f3d0e65adb447b8b583c8ed4fbbe544c9bfde Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Nov 2019 18:00:54 +0100 Subject: [PATCH 0812/3715] Linux 4.14.155 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4d2d55691548..1f427c8bcc56 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 154 +SUBLEVEL = 155 EXTRAVERSION = NAME = Petit Gorille From 437a2a739c5f6f637ac29c0c613d2fbc791597c4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Oct 2019 13:42:06 +0100 Subject: [PATCH 0813/3715] FROMGIT: pinctrl: devicetree: Avoid taking direct reference to device name string When populating the pinctrl mapping table entries for a device, the 'dev_name' field for each entry is initialised to point directly at the string returned by 'dev_name()' for the device and subsequently used by 'create_pinctrl()' when looking up the mappings for the device being probed. This is unreliable in the presence of calls to 'dev_set_name()', which may reallocate the device name string leaving the pinctrl mappings with a dangling reference. This then leads to a use-after-free every time the name is dereferenced by a device probe: | BUG: KASAN: invalid-access in strcmp+0x20/0x64 | Read of size 1 at addr 13ffffc153494b00 by task modprobe/590 | Pointer tag: [13], memory tag: [fe] | | Call trace: | __kasan_report+0x16c/0x1dc | kasan_report+0x10/0x18 | check_memory_region | __hwasan_load1_noabort+0x4c/0x54 | strcmp+0x20/0x64 | create_pinctrl+0x18c/0x7f4 | pinctrl_get+0x90/0x114 | devm_pinctrl_get+0x44/0x98 | pinctrl_bind_pins+0x5c/0x450 | really_probe+0x1c8/0x9a4 | driver_probe_device+0x120/0x1d8 Follow the example of sysfs, and duplicate the device name string before stashing it away in the pinctrl mapping entries. Cc: Linus Walleij Reported-by: Elena Petrova Tested-by: Elena Petrova Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191002124206.22928-1-will@kernel.org Signed-off-by: Linus Walleij (cherry picked from commit be4c60b563edee3712d392aaeb0943a768df7023 https: //git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git devel) Bug: 140550171 Signed-off-by: Elena Petrova Change-Id: I90e9f4f64c694a195b0963b88bb32bd8cee42aa5 --- drivers/pinctrl/devicetree.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index c4aa411f5935..3a7c2d6e4d5f 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -40,6 +40,13 @@ struct pinctrl_dt_map { static void dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { + int i; + + for (i = 0; i < num_maps; ++i) { + kfree_const(map[i].dev_name); + map[i].dev_name = NULL; + } + if (pctldev) { const struct pinctrl_ops *ops = pctldev->desc->pctlops; if (ops->dt_free_map) @@ -74,7 +81,13 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Initialize common mapping table entry fields */ for (i = 0; i < num_maps; i++) { - map[i].dev_name = dev_name(p->dev); + const char *devname; + + devname = kstrdup_const(dev_name(p->dev), GFP_KERNEL); + if (!devname) + goto err_free_map; + + map[i].dev_name = devname; map[i].name = statename; if (pctldev) map[i].ctrl_dev_name = dev_name(pctldev->dev); @@ -82,10 +95,8 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Remember the converted mapping table entries */ dt_map = kzalloc(sizeof(*dt_map), GFP_KERNEL); - if (!dt_map) { - dt_free_map(pctldev, map, num_maps); - return -ENOMEM; - } + if (!dt_map) + goto err_free_map; dt_map->pctldev = pctldev; dt_map->map = map; @@ -93,6 +104,10 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, list_add_tail(&dt_map->node, &p->dt_maps); return pinctrl_register_map(map, num_maps, false); + +err_free_map: + dt_free_map(pctldev, map, num_maps); + return -ENOMEM; } struct pinctrl_dev *of_pinctrl_get(struct device_node *np) From 723aef6434dd82e8b4ded65eabafccab9174c5fc Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Wed, 31 Oct 2018 16:49:16 +0800 Subject: [PATCH 0814/3715] spi: mediatek: use correct mata->xfer_len when in fifo transfer commit a4d8f64f7267a88d4688f5c216926f5f6cafbae6 upstream. when xfer_len is greater than 64 bytes and use fifo mode to transfer, the actual length from the third time is mata->xfer_len but not len in mtk_spi_interrupt(). Signed-off-by: Leilk Liu Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-mt65xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 3dc31627c655..0c2867deb36f 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -522,11 +522,11 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); mtk_spi_setup_packet(master); - cnt = len / 4; + cnt = mdata->xfer_len / 4; iowrite32_rep(mdata->base + SPI_TX_DATA_REG, trans->tx_buf + mdata->num_xfered, cnt); - remainder = len % 4; + remainder = mdata->xfer_len % 4; if (remainder > 0) { reg_val = 0; memcpy(®_val, From e01dc7dda8836c90eaac6a51bd7df422e945910a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Feb 2019 14:20:36 +0100 Subject: [PATCH 0815/3715] tee: optee: add missing of_node_put after of_device_is_available commit c7c0d8df0b94a67377555a550b8d66ee2ad2f4ed upstream. Add an of_node_put when a tested device node is not available. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ identifier f; local idexpression e; expression x; @@ e = f(...); ... when != of_node_put(e) when != x = e when != e = x when any if (<+...of_device_is_available(e)...+>) { ... when != of_node_put(e) ( return e; | + of_node_put(e); return ...; ) } // Fixes: db878f76b9ff ("tee: optee: take DT status property into account") Signed-off-by: Julia Lawall Signed-off-by: Jens Wiklander Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- drivers/tee/optee/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ca79c2ba2ef2..834884c370c5 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -590,8 +590,10 @@ static int __init optee_driver_init(void) return -ENODEV; np = of_find_matching_node(fw_np, optee_match); - if (!np || !of_device_is_available(np)) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } optee = optee_probe(np); of_node_put(np); From d3a72ae0afff35dd65df15bcac59d086b9067fca Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 21 Nov 2019 14:51:46 +0100 Subject: [PATCH 0816/3715] Revert "OPP: Protect dev_list with opp_table lock" This reverts commit 714ab224a8db6e8255c61a42613de9349ceb0bba which is commit 3d2556992a878a2210d3be498416aee39e0c32aa upstream. Turns out to break the build on the odroid machines, so it needs to be reverted. Reported-by: Viresh Kumar Reported-by: "kernelci.org bot" Cc: Niklas Cassel Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/opp/core.c | 21 ++------------------- drivers/base/power/opp/cpu.c | 2 -- drivers/base/power/opp/opp.h | 2 +- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 8100c8769149..d5e7e8cc4f22 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -49,14 +49,9 @@ static struct opp_device *_find_opp_dev(const struct device *dev, static struct opp_table *_find_opp_table_unlocked(struct device *dev) { struct opp_table *opp_table; - bool found; list_for_each_entry(opp_table, &opp_tables, node) { - mutex_lock(&opp_table->lock); - found = !!_find_opp_dev(dev, opp_table); - mutex_unlock(&opp_table->lock); - - if (found) { + if (_find_opp_dev(dev, opp_table)) { _get_opp_table_kref(opp_table); return opp_table; @@ -716,8 +711,6 @@ struct opp_device *_add_opp_dev(const struct device *dev, /* Initialize opp-dev */ opp_dev->dev = dev; - - mutex_lock(&opp_table->lock); list_add(&opp_dev->node, &opp_table->dev_list); /* Create debugfs entries for the opp_table */ @@ -725,7 +718,6 @@ struct opp_device *_add_opp_dev(const struct device *dev, if (ret) dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", __func__, ret); - mutex_unlock(&opp_table->lock); return opp_dev; } @@ -744,7 +736,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev) if (!opp_table) return NULL; - mutex_init(&opp_table->lock); INIT_LIST_HEAD(&opp_table->dev_list); opp_dev = _add_opp_dev(dev, opp_table); @@ -766,6 +757,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev) BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); + mutex_init(&opp_table->lock); kref_init(&opp_table->kref); /* Secure the device table modification */ @@ -807,10 +799,6 @@ static void _opp_table_kref_release(struct kref *kref) if (!IS_ERR(opp_table->clk)) clk_put(opp_table->clk); - /* - * No need to take opp_table->lock here as we are guaranteed that no - * references to the OPP table are taken at this point. - */ opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, node); @@ -1714,9 +1702,6 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, { struct dev_pm_opp *opp, *tmp; - /* Protect dev_list */ - mutex_lock(&opp_table->lock); - /* Find if opp_table manages a single device */ if (list_is_singular(&opp_table->dev_list)) { /* Free static OPPs */ @@ -1727,8 +1712,6 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, } else { _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); } - - mutex_unlock(&opp_table->lock); } void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 66e406bd4d62..2d87bc1adf38 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -222,10 +222,8 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) cpumask_clear(cpumask); if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) { - mutex_lock(&opp_table->lock); list_for_each_entry(opp_dev, &opp_table->dev_list, node) cpumask_set_cpu(opp_dev->dev->id, cpumask); - mutex_unlock(&opp_table->lock); } else { cpumask_set_cpu(cpu_dev->id, cpumask); } diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 0a206c6b9086..166eef990599 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -124,7 +124,7 @@ enum opp_table_access { * @dev_list: list of devices that share these OPPs * @opp_list: table of opps * @kref: for reference count of the table. - * @lock: mutex protecting the opp_list and dev_list. + * @lock: mutex protecting the opp_list. * @np: struct device_node pointer for opp's DT node. * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. From a4ead70898f7cd29b681448d2c27b92cfb01ec9a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Nov 2019 21:28:31 +0300 Subject: [PATCH 0817/3715] net: cdc_ncm: Signedness bug in cdc_ncm_set_dgram_size() commit a56dcc6b455830776899ce3686735f1172e12243 upstream. This code is supposed to test for negative error codes and partial reads, but because sizeof() is size_t (unsigned) type then negative error codes are type promoted to high positive values and the condition doesn't work as expected. Fixes: 332f989a3b00 ("CDC-NCM: handle incomplete transfer of MTU") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index d53b4a41c583..cb4c9d419bd3 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -579,7 +579,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); - if (err < sizeof(max_datagram_size)) { + if (err != sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } From a99fc917708665d730332fe20c910e832d8526d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 14 May 2019 16:05:45 -0400 Subject: [PATCH 0818/3715] idr: Fix idr_get_next race with idr_remove commit 5c089fd0c73411f2170ab795c9ffc16718c7d007 upstream. If the entry is deleted from the IDR between the call to radix_tree_iter_find() and rcu_dereference_raw(), idr_get_next() will return NULL, which will end the iteration prematurely. We should instead continue to the next entry in the IDR. This only happens if the iteration is protected by the RCU lock. Most IDR users use a spinlock or semaphore to exclude simultaneous modifications. It was noticed once the PID allocator was converted to use the IDR, as it uses the RCU lock, but there may be other users elsewhere in the kernel. We can't use the normal pattern of calling radix_tree_deref_retry() (which catches both a retry entry in a leaf node and a node entry in the root) as the IDR supports storing entries which are unaligned, which will trigger an infinite loop if they are encountered. Instead, we have to explicitly check whether the entry is a retry entry. Fixes: 0a835c4f090a ("Reimplement IDR and IDA using the radix tree") Reported-by: Brendan Gregg Tested-by: Brendan Gregg Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Greg Kroah-Hartman --- lib/idr.c | 18 ++++++++-- tools/testing/radix-tree/idr-test.c | 52 +++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index edd9b2be1651..8c1a98d03164 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -111,13 +111,27 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; + void *entry = NULL; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, *nextid) { + entry = rcu_dereference_raw(*slot); + if (!entry) + continue; + if (!radix_tree_deref_retry(entry)) + break; + if (slot != (void *)&idr->idr_rt.rnode && + entry != (void *)RADIX_TREE_INTERNAL_NODE) + break; + slot = radix_tree_iter_retry(&iter); + } if (!slot) return NULL; + if (WARN_ON_ONCE(iter.index > INT_MAX)) + return NULL; + *nextid = iter.index; - return rcu_dereference_raw(*slot); + return entry; } EXPORT_SYMBOL(idr_get_next); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8e61aad0ca3f..07cec1b5a0d8 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -177,6 +177,57 @@ void idr_get_next_test(void) idr_destroy(&idr); } +static inline void *idr_mk_value(unsigned long v) +{ + BUG_ON((long)v < 0); + return (void *)((v & 1) | 2 | (v << 1)); +} + +DEFINE_IDR(find_idr); + +static void *idr_throbber(void *arg) +{ + time_t start = time(NULL); + int id = *(int *)arg; + + rcu_register_thread(); + do { + idr_alloc(&find_idr, idr_mk_value(id), id, id + 1, GFP_KERNEL); + idr_remove(&find_idr, id); + } while (time(NULL) < start + 10); + rcu_unregister_thread(); + + return NULL; +} + +void idr_find_test_1(int anchor_id, int throbber_id) +{ + pthread_t throbber; + time_t start = time(NULL); + + pthread_create(&throbber, NULL, idr_throbber, &throbber_id); + + BUG_ON(idr_alloc(&find_idr, idr_mk_value(anchor_id), anchor_id, + anchor_id + 1, GFP_KERNEL) != anchor_id); + + do { + int id = 0; + void *entry = idr_get_next(&find_idr, &id); + BUG_ON(entry != idr_mk_value(id)); + } while (time(NULL) < start + 11); + + pthread_join(throbber, NULL); + + idr_remove(&find_idr, anchor_id); + BUG_ON(!idr_is_empty(&find_idr)); +} + +void idr_find_test(void) +{ + idr_find_test_1(100000, 0); + idr_find_test_1(0, 100000); +} + void idr_checks(void) { unsigned long i; @@ -234,6 +285,7 @@ void idr_checks(void) idr_null_test(); idr_nowait_test(); idr_get_next_test(); + idr_find_test(); } /* From b55841e18213b42e6ee380e0cc1c03c9b406fce0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 18 Oct 2019 20:19:33 -0700 Subject: [PATCH 0819/3715] mm/memory_hotplug: don't access uninitialized memmaps in shrink_pgdat_span() commit 00d6c019b5bc175cee3770e0e659f2b5f4804ea5 upstream. We might use the nid of memmaps that were never initialized. For example, if the memmap was poisoned, we will crash the kernel in pfn_to_nid() right now. Let's use the calculated boundaries of the separate zones instead. This now also avoids having to iterate over a whole bunch of subsections again, after shrinking one zone. Before commit d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug"), the memmap was initialized to 0 and the node was set to the right value. After that commit, the node might be garbage. We'll have to fix shrink_zone_span() next. Link: http://lkml.kernel.org/r/20191006085646.5768-4-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Aneesh Kumar K.V Cc: Oscar Salvador Cc: David Hildenbrand Cc: Michal Hocko Cc: Pavel Tatashin Cc: Dan Williams Cc: Wei Yang Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Damian Tometzki Cc: Dave Hansen Cc: Fenghua Yu Cc: Gerald Schaefer Cc: Greg Kroah-Hartman Cc: Halil Pasic Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jun Yao Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: "Matthew Wilcox (Oracle)" Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Robin Murphy Cc: Steve Capper Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 77 ++++++++++----------------------------------- 1 file changed, 16 insertions(+), 61 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c9d3a49bd4e2..9cd25b19e111 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -459,70 +459,25 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, zone_span_writeunlock(zone); } -static void shrink_pgdat_span(struct pglist_data *pgdat, - unsigned long start_pfn, unsigned long end_pfn) +static void update_pgdat_span(struct pglist_data *pgdat) { - unsigned long pgdat_start_pfn = pgdat->node_start_pfn; - unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ - unsigned long pgdat_end_pfn = p; - unsigned long pfn; - struct mem_section *ms; - int nid = pgdat->node_id; + unsigned long node_start_pfn = 0, node_end_pfn = 0; + struct zone *zone; - if (pgdat_start_pfn == start_pfn) { - /* - * If the section is smallest section in the pgdat, it need - * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages. - * In this case, we find second smallest valid mem_section - * for shrinking zone. - */ - pfn = find_smallest_section_pfn(nid, NULL, end_pfn, - pgdat_end_pfn); - if (pfn) { - pgdat->node_start_pfn = pfn; - pgdat->node_spanned_pages = pgdat_end_pfn - pfn; - } - } else if (pgdat_end_pfn == end_pfn) { - /* - * If the section is biggest section in the pgdat, it need - * shrink pgdat->node_spanned_pages. - * In this case, we find second biggest valid mem_section for - * shrinking zone. - */ - pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn, - start_pfn); - if (pfn) - pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1; + for (zone = pgdat->node_zones; + zone < pgdat->node_zones + MAX_NR_ZONES; zone++) { + unsigned long zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + + /* No need to lock the zones, they can't change. */ + if (zone_end_pfn > node_end_pfn) + node_end_pfn = zone_end_pfn; + if (zone->zone_start_pfn < node_start_pfn) + node_start_pfn = zone->zone_start_pfn; } - /* - * If the section is not biggest or smallest mem_section in the pgdat, - * it only creates a hole in the pgdat. So in this case, we need not - * change the pgdat. - * But perhaps, the pgdat has only hole data. Thus it check the pgdat - * has only hole or not. - */ - pfn = pgdat_start_pfn; - for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) - continue; - - if (pfn_to_nid(pfn) != nid) - continue; - - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) - continue; - - /* If we find valid section, we have nothing to do */ - return; - } - - /* The pgdat has no valid section */ - pgdat->node_start_pfn = 0; - pgdat->node_spanned_pages = 0; + pgdat->node_start_pfn = node_start_pfn; + pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } static void __remove_zone(struct zone *zone, unsigned long start_pfn) @@ -533,7 +488,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); - shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages); + update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); } From 630ca7d4c08948e25ae4d6dc8c6f1fa783b56a41 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 5 Nov 2019 21:17:10 -0800 Subject: [PATCH 0820/3715] mm/memory_hotplug: fix updating the node span commit 656d571193262a11c2daa4012e53e4d645bbce56 upstream. We recently started updating the node span based on the zone span to avoid touching uninitialized memmaps. Currently, we will always detect the node span to start at 0, meaning a node can easily span too many pages. pgdat_is_empty() will still work correctly if all zones span no pages. We should skip over all zones without spanned pages and properly handle the first detected zone that spans pages. Unfortunately, in contrast to the zone span (/proc/zoneinfo), the node span cannot easily be inspected and tested. The node span gives no real guarantees when an architecture supports memory hotplug, meaning it can easily contain holes or span pages of different nodes. The node span is not really used after init on architectures that support memory hotplug. E.g., we use it in mm/memory_hotplug.c:try_offline_node() and in mm/kmemleak.c:kmemleak_scan(). These users seem to be fine. Link: http://lkml.kernel.org/r/20191027222714.5313-1-david@redhat.com Fixes: 00d6c019b5bc ("mm/memory_hotplug: don't access uninitialized memmaps in shrink_pgdat_span()") Signed-off-by: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Cc: Stephen Rothwell Cc: Dan Williams Cc: Pavel Tatashin Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9cd25b19e111..d4affa9982ca 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -470,6 +470,14 @@ static void update_pgdat_span(struct pglist_data *pgdat) zone->spanned_pages; /* No need to lock the zones, they can't change. */ + if (!zone->spanned_pages) + continue; + if (!node_end_pfn) { + node_start_pfn = zone->zone_start_pfn; + node_end_pfn = zone_end_pfn; + continue; + } + if (zone_end_pfn > node_end_pfn) node_end_pfn = zone_end_pfn; if (zone->zone_start_pfn < node_start_pfn) From 6bdcda5c99a51458f0c24a5a42ef88a7d2db2d6e Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 19 Nov 2019 17:10:06 -0500 Subject: [PATCH 0821/3715] arm64: uaccess: Ensure PAN is re-enabled after unhandled uaccess fault commit 94bb804e1e6f0a9a77acf20d7c70ea141c6c821e upstream. A number of our uaccess routines ('__arch_clear_user()' and '__arch_copy_{in,from,to}_user()') fail to re-enable PAN if they encounter an unhandled fault whilst accessing userspace. For CPUs implementing both hardware PAN and UAO, this bug has no effect when both extensions are in use by the kernel. For CPUs implementing hardware PAN but not UAO, this means that a kernel using hardware PAN may execute portions of code with PAN inadvertently disabled, opening us up to potential security vulnerabilities that rely on userspace access from within the kernel which would usually be prevented by this mechanism. In other words, parts of the kernel run the same way as they would on a CPU without PAN implemented/emulated at all. For CPUs not implementing hardware PAN and instead relying on software emulation via 'CONFIG_ARM64_SW_TTBR0_PAN=y', the impact is unfortunately much worse. Calling 'schedule()' with software PAN disabled means that the next task will execute in the kernel using the page-table and ASID of the previous process even after 'switch_mm()', since the actual hardware switch is deferred until return to userspace. At this point, or if there is a intermediate call to 'uaccess_enable()', the page-table and ASID of the new process are installed. Sadly, due to the changes introduced by KPTI, this is not an atomic operation and there is a very small window (two instructions) where the CPU is configured with the page-table of the old task and the ASID of the new task; a speculative access in this state is disastrous because it would corrupt the TLB entries for the new task with mappings from the previous address space. As Pavel explains: | I was able to reproduce memory corruption problem on Broadcom's SoC | ARMv8-A like this: | | Enable software perf-events with PERF_SAMPLE_CALLCHAIN so userland's | stack is accessed and copied. | | The test program performed the following on every CPU and forking | many processes: | | unsigned long *map = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, | MAP_SHARED | MAP_ANONYMOUS, -1, 0); | map[0] = getpid(); | sched_yield(); | if (map[0] != getpid()) { | fprintf(stderr, "Corruption detected!"); | } | munmap(map, PAGE_SIZE); | | From time to time I was getting map[0] to contain pid for a | different process. Ensure that PAN is re-enabled when returning after an unhandled user fault from our uaccess routines. Cc: Catalin Marinas Reviewed-by: Mark Rutland Tested-by: Mark Rutland Cc: Fixes: 338d4f49d6f7 ("arm64: kernel: Add support for Privileged Access Never") Signed-off-by: Pavel Tatashin [will: rewrote commit message] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/lib/clear_user.S | 1 + arch/arm64/lib/copy_from_user.S | 1 + arch/arm64/lib/copy_in_user.S | 1 + arch/arm64/lib/copy_to_user.S | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 21ba0b29621b..4374020c824a 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -57,5 +57,6 @@ ENDPROC(__arch_clear_user) .section .fixup,"ax" .align 2 9: mov x0, x2 // return the original size + uaccess_disable_not_uao x2, x3 ret .previous diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 20305d485046..96b22c0fa343 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -75,5 +75,6 @@ ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 54b75deb1d16..e56c705f1f23 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -77,5 +77,6 @@ ENDPROC(__arch_copy_in_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index fda6172d6b88..6b99b939c50f 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -74,5 +74,6 @@ ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous From 50df719a6c82b1aeccbbd55f895ef0d7f2943718 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 21 Jul 2019 22:19:56 +0200 Subject: [PATCH 0822/3715] fbdev: Ditch fb_edid_add_monspecs commit 3b8720e63f4a1fc6f422a49ecbaa3b59c86d5aaf upstream. It's dead code ever since commit 34280340b1dc74c521e636f45cd728f9abf56ee2 Author: Geert Uytterhoeven Date: Fri Dec 4 17:01:43 2015 +0100 fbdev: Remove unused SH-Mobile HDMI driver Also with this gone we can remove the cea_modes db. This entire thing is massively incomplete anyway, compared to the CEA parsing that drm_edid.c does. Acked-by: Linus Torvalds Cc: Tavis Ormandy Signed-off-by: Daniel Vetter Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20190721201956.941-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmon.c | 95 ------------------------------- drivers/video/fbdev/core/modedb.c | 57 ------------------- include/linux/fb.h | 3 - 3 files changed, 155 deletions(-) diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index 2b2d67328514..ed202f1e13b8 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -997,97 +997,6 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) DPRINTK("========================================\n"); } -/** - * fb_edid_add_monspecs() - add monitor video modes from E-EDID data - * @edid: 128 byte array with an E-EDID block - * @spacs: monitor specs to be extended - */ -void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) -{ - unsigned char *block; - struct fb_videomode *m; - int num = 0, i; - u8 svd[64], edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; - u8 pos = 4, svd_n = 0; - - if (!edid) - return; - - if (!edid_checksum(edid)) - return; - - if (edid[0] != 0x2 || - edid[2] < 4 || edid[2] > 128 - DETAILED_TIMING_DESCRIPTION_SIZE) - return; - - DPRINTK(" Short Video Descriptors\n"); - - while (pos < edid[2]) { - u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7; - pr_debug("Data block %u of %u bytes\n", type, len); - if (type == 2) { - for (i = pos; i < pos + len; i++) { - u8 idx = edid[pos + i] & 0x7f; - svd[svd_n++] = idx; - pr_debug("N%sative mode #%d\n", - edid[pos + i] & 0x80 ? "" : "on-n", idx); - } - } else if (type == 3 && len >= 3) { - /* Check Vendor Specific Data Block. For HDMI, - it is always 00-0C-03 for HDMI Licensing, LLC. */ - if (edid[pos + 1] == 3 && edid[pos + 2] == 0xc && - edid[pos + 3] == 0) - specs->misc |= FB_MISC_HDMI; - } - pos += len + 1; - } - - block = edid + edid[2]; - - DPRINTK(" Extended Detailed Timings\n"); - - for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE; - i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) - if (PIXEL_CLOCK != 0) - edt[num++] = block - edid; - - /* Yikes, EDID data is totally useless */ - if (!(num + svd_n)) - return; - - m = kzalloc((specs->modedb_len + num + svd_n) * - sizeof(struct fb_videomode), GFP_KERNEL); - - if (!m) - return; - - memcpy(m, specs->modedb, specs->modedb_len * sizeof(struct fb_videomode)); - - for (i = specs->modedb_len; i < specs->modedb_len + num; i++) { - get_detailed_timing(edid + edt[i - specs->modedb_len], &m[i]); - if (i == specs->modedb_len) - m[i].flag |= FB_MODE_IS_FIRST; - pr_debug("Adding %ux%u@%u\n", m[i].xres, m[i].yres, m[i].refresh); - } - - for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) { - int idx = svd[i - specs->modedb_len - num]; - if (!idx || idx >= ARRAY_SIZE(cea_modes)) { - pr_warn("Reserved SVD code %d\n", idx); - } else if (!cea_modes[idx].xres) { - pr_warn("Unimplemented SVD code %d\n", idx); - } else { - memcpy(&m[i], cea_modes + idx, sizeof(m[i])); - pr_debug("Adding SVD #%d: %ux%u@%u\n", idx, - m[i].xres, m[i].yres, m[i].refresh); - } - } - - kfree(specs->modedb); - specs->modedb = m; - specs->modedb_len = specs->modedb_len + num + svd_n; -} - /* * VESA Generalized Timing Formula (GTF) */ @@ -1497,9 +1406,6 @@ int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var) void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) { } -void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) -{ -} void fb_destroy_modedb(struct fb_videomode *modedb) { } @@ -1607,7 +1513,6 @@ EXPORT_SYMBOL(fb_firmware_edid); EXPORT_SYMBOL(fb_parse_edid); EXPORT_SYMBOL(fb_edid_to_monspecs); -EXPORT_SYMBOL(fb_edid_add_monspecs); EXPORT_SYMBOL(fb_get_mode); EXPORT_SYMBOL(fb_validate_mode); EXPORT_SYMBOL(fb_destroy_modedb); diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index 455a15f70172..a9d76e1b4378 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -289,63 +289,6 @@ static const struct fb_videomode modedb[] = { }; #ifdef CONFIG_FB_MODE_HELPERS -const struct fb_videomode cea_modes[65] = { - /* #1: 640x480p@59.94/60Hz */ - [1] = { - NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #3: 720x480p@59.94/60Hz */ - [3] = { - NULL, 60, 720, 480, 37037, 60, 16, 30, 9, 62, 6, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #5: 1920x1080i@59.94/60Hz */ - [5] = { - NULL, 60, 1920, 1080, 13763, 148, 88, 15, 2, 44, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_INTERLACED, 0, - }, - /* #7: 720(1440)x480iH@59.94/60Hz */ - [7] = { - NULL, 60, 1440, 480, 18554/*37108*/, 114, 38, 15, 4, 124, 3, 0, - FB_VMODE_INTERLACED, 0, - }, - /* #9: 720(1440)x240pH@59.94/60Hz */ - [9] = { - NULL, 60, 1440, 240, 18554, 114, 38, 16, 4, 124, 3, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #18: 720x576pH@50Hz */ - [18] = { - NULL, 50, 720, 576, 37037, 68, 12, 39, 5, 64, 5, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #19: 1280x720p@50Hz */ - [19] = { - NULL, 50, 1280, 720, 13468, 220, 440, 20, 5, 40, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_NONINTERLACED, 0, - }, - /* #20: 1920x1080i@50Hz */ - [20] = { - NULL, 50, 1920, 1080, 13480, 148, 528, 15, 5, 528, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_INTERLACED, 0, - }, - /* #32: 1920x1080p@23.98/24Hz */ - [32] = { - NULL, 24, 1920, 1080, 13468, 148, 638, 36, 4, 44, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_NONINTERLACED, 0, - }, - /* #35: (2880)x480p4x@59.94/60Hz */ - [35] = { - NULL, 60, 2880, 480, 9250, 240, 64, 30, 9, 248, 6, 0, - FB_VMODE_NONINTERLACED, 0, - }, -}; - const struct fb_videomode vesa_modes[] = { /* 0 640x350-85 VESA */ { NULL, 85, 640, 350, 31746, 96, 32, 60, 32, 64, 3, diff --git a/include/linux/fb.h b/include/linux/fb.h index bc24e48e396d..ccd1f74ca6ab 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -725,8 +725,6 @@ extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var); extern const unsigned char *fb_firmware_edid(struct device *device); extern void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs); -extern void fb_edid_add_monspecs(unsigned char *edid, - struct fb_monspecs *specs); extern void fb_destroy_modedb(struct fb_videomode *modedb); extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb); extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter); @@ -800,7 +798,6 @@ struct dmt_videomode { extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; -extern const struct fb_videomode cea_modes[65]; extern const struct dmt_videomode dmt_modes[]; struct fb_modelist { From 7af18b25bce0827335ab8b70b6887fb325c6fb76 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Sep 2018 17:15:38 +0800 Subject: [PATCH 0823/3715] net: ovs: fix return type of ndo_start_xmit function [ Upstream commit eddf11e18dff0e8671e06ce54e64cfc843303ab9 ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/openvswitch/vport-internal_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 1c09ad457d2a..1083b5e90134 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -44,7 +44,8 @@ static struct internal_dev *internal_dev_priv(struct net_device *netdev) } /* Called with rcu_read_lock_bh. */ -static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { int len, err; @@ -63,7 +64,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) } else { netdev->stats.tx_errors++; } - return 0; + return NETDEV_TX_OK; } static int internal_dev_open(struct net_device *netdev) From 296269bc53129ec17fff615c534341e66c3c1b3c Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Sep 2018 17:18:14 +0800 Subject: [PATCH 0824/3715] net: xen-netback: fix return type of ndo_start_xmit function [ Upstream commit a9ca7f17c6d240e269a24cbcd76abf9a940309dd ] The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, so make sure the implementation in this driver has returns 'netdev_tx_t' value, and change the function return type to netdev_tx_t. Found by coccinelle. Signed-off-by: YueHaibing Acked-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netback/interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 2641e76d03d9..b5fa910b47b7 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -172,7 +172,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, return vif->hash.mapping[skb_get_hash_raw(skb) % size]; } -static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; From 358455734dd7c1a1fba346cab22f4184b0ce899b Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Fri, 28 Sep 2018 11:34:42 +0530 Subject: [PATCH 0825/3715] ARM: dts: dra7: Enable workaround for errata i870 in PCIe host mode [ Upstream commit b830526f304764753fcb8b4a563a94080e982a6c ] Add ti,syscon-unaligned-access property to PCIe RC nodes to set appropriate bits in CTRL_CORE_SMA_SW_7 register to enable workaround for errata i870. Signed-off-by: Vignesh R Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 09686d73f947..fec965009b9f 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -314,6 +314,7 @@ <0 0 0 2 &pcie1_intc 2>, <0 0 0 3 &pcie1_intc 3>, <0 0 0 4 &pcie1_intc 4>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 1>; status = "disabled"; pcie1_intc: interrupt-controller { interrupt-controller; @@ -367,6 +368,7 @@ <0 0 0 2 &pcie2_intc 2>, <0 0 0 3 &pcie2_intc 3>, <0 0 0 4 &pcie2_intc 4>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 2>; pcie2_intc: interrupt-controller { interrupt-controller; #address-cells = <0>; From c26aff78aa2bf3b4449cef83057276ed55a0d7f4 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Fri, 28 Sep 2018 17:54:00 +0200 Subject: [PATCH 0826/3715] ARM: dts: omap5: enable OTG role for DWC3 controller [ Upstream commit 656c1a65ab555ee5c7cd0d6aee8ab82ca3c1795f ] Since SMPS10 and OTG cable detection extcon are described here, and work to enable OTG power when an OTG cable is plugged in, we can define OTG mode in the controller (which is disabled by default in omap5.dtsi). Tested on OMAP5EVM and Pyra. Suggested-by: Roger Quadros Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap5-board-common.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7824b2631cb6..f65343f8e1d6 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -694,6 +694,10 @@ vbus-supply = <&smps10_out1_reg>; }; +&dwc3 { + dr_mode = "otg"; +}; + &mcspi1 { }; From ba02192598b897105762d30f673805e84aa9971d Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 26 Sep 2018 19:28:37 +0100 Subject: [PATCH 0827/3715] net: hns3: Fix for netdev not up problem when setting mtu [ Upstream commit 93d8daf460183871a965dae339839d9e35d44309 ] Currently hns3_nic_change_mtu will try to down the netdev before setting mtu, and it does not up the netdev when the setting fails, which causes netdev not up problem. This patch fixes it by not returning when the setting fails. Fixes: a8e8b7ff3517 ("net: hns3: Add support to change MTU in HNS3 hardware") Signed-off-by: Yunsheng Lin Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 69726908e72c..5483cb23c08a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1307,13 +1307,11 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) } ret = h->ae_algo->ops->set_mtu(h, new_mtu); - if (ret) { + if (ret) netdev_err(netdev, "failed to change MTU in hardware %d\n", ret); - return ret; - } - - netdev->mtu = new_mtu; + else + netdev->mtu = new_mtu; /* if the netdev was running earlier, bring it up again */ if (if_running && hns3_nic_net_open(netdev)) From 715119d3f15464bb5f02a64f78c606ed2d1eb483 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 15:25:21 -0700 Subject: [PATCH 0828/3715] f2fs: return correct errno in f2fs_gc [ Upstream commit 61f7725aa148ee870436a29d3a24d5c00ab7e9af ] This fixes overriding error number in f2fs_gc. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ceb6023786bd..67120181dc2a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1091,7 +1091,7 @@ stop: put_gc_inode(&gc_list); - if (sync) + if (sync && !ret) ret = sec_freed ? 0 : -EAGAIN; return ret; } From 375d917188a104b23b41e7887cc87b837d84fcde Mon Sep 17 00:00:00 2001 From: Philipp Rossak Date: Wed, 1 Aug 2018 11:48:01 +0200 Subject: [PATCH 0829/3715] ARM: dts: sun8i: h3-h5: ir register size should be the whole memory block [ Upstream commit 6c700289a3e84d5d3f2a95cf27732a7f7fce105b ] The size of the register should be the size of the whole memory block, not just the registers, that are needed. Signed-off-by: Philipp Rossak Acked-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sunxi-h3-h5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 11240a8313c2..03f37081fc64 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -594,7 +594,7 @@ clock-names = "apb", "ir"; resets = <&r_ccu RST_APB0_IR>; interrupts = ; - reg = <0x01f02000 0x40>; + reg = <0x01f02000 0x400>; status = "disabled"; }; From 5320b434be4bd94b04b535f46087f4db140fb8e5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 8 Sep 2018 22:09:48 -0400 Subject: [PATCH 0830/3715] SUNRPC: Fix priority queue fairness [ Upstream commit f42f7c283078ce3c1e8368b140e270755b1ae313 ] Fix up the priority queue to not batch by owner, but by queue, so that we allow '1 << priority' elements to be dequeued before switching to the next priority queue. The owner field is still used to wake up requests in round robin order by owner to avoid single processes hogging the RPC layer by loading the queues. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/sched.h | 2 - net/sunrpc/sched.c | 113 +++++++++++++++++------------------ 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d96e74e114c0..c9548a63d09b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -188,7 +188,6 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ @@ -204,7 +203,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f9db5fe52d36..aff76fb43430 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -99,37 +99,64 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) -{ - struct list_head *q = &queue->tasks[queue->priority]; - struct rpc_task *task; - - if (!list_empty(q)) { - task = list_first_entry(q, struct rpc_task, u.tk_wait.list); - if (task->tk_owner == queue->owner) - list_move_tail(&task->u.tk_wait.list, q); - } -} - static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { if (queue->priority != priority) { - /* Fairness: rotate the list when changing priority */ - rpc_rotate_queue_owner(queue); queue->priority = priority; + queue->nr = 1U << priority; } } -static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); +} + +/* + * Add a request to a queue list + */ +static void +__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task) +{ + struct rpc_task *t; + + list_for_each_entry(t, q, u.tk_wait.list) { + if (t->tk_owner == task->tk_owner) { + list_add_tail(&task->u.tk_wait.links, + &t->u.tk_wait.links); + /* Cache the queue head in task->u.tk_wait.list */ + task->u.tk_wait.list.next = q; + task->u.tk_wait.list.prev = NULL; + return; + } + } + INIT_LIST_HEAD(&task->u.tk_wait.links); + list_add_tail(&task->u.tk_wait.list, q); +} + +/* + * Remove request from a queue list + */ +static void +__rpc_list_dequeue_task(struct rpc_task *task) +{ + struct list_head *q; + struct rpc_task *t; + + if (task->u.tk_wait.list.prev == NULL) { + list_del(&task->u.tk_wait.links); + return; + } + if (!list_empty(&task->u.tk_wait.links)) { + t = list_first_entry(&task->u.tk_wait.links, + struct rpc_task, + u.tk_wait.links); + /* Assume __rpc_list_enqueue_task() cached the queue head */ + q = t->u.tk_wait.list.next; + list_add_tail(&t->u.tk_wait.list, q); + list_del(&task->u.tk_wait.links); + } + list_del(&task->u.tk_wait.list); } /* @@ -139,22 +166,9 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned char queue_priority) { - struct list_head *q; - struct rpc_task *t; - - INIT_LIST_HEAD(&task->u.tk_wait.links); if (unlikely(queue_priority > queue->maxpriority)) queue_priority = queue->maxpriority; - if (queue_priority > queue->priority) - rpc_set_waitqueue_priority(queue, queue_priority); - q = &queue->tasks[queue_priority]; - list_for_each_entry(t, q, u.tk_wait.list) { - if (t->tk_owner == task->tk_owner) { - list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); - return; - } - } - list_add_tail(&task->u.tk_wait.list, q); + __rpc_list_enqueue_task(&queue->tasks[queue_priority], task); } /* @@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, */ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) { - struct rpc_task *t; - - if (!list_empty(&task->u.tk_wait.links)) { - t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); - list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); - list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); - } + __rpc_list_dequeue_task(task); } /* @@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - list_del(&task->u.tk_wait.list); + else + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -481,17 +490,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->owner == task->tk_owner) { - if (--queue->nr) - goto out; - list_move_tail(&task->u.tk_wait.list, q); - } - /* - * Check if we need to switch queues. - */ - goto new_owner; + if (!list_empty(q) && --queue->nr) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; } /* @@ -503,7 +504,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -513,8 +514,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_owner: - rpc_set_waitqueue_owner(queue, task->tk_owner); out: return task; } From 73289863ed4adb2afdac324be6efae8459305a8d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 26 Sep 2018 10:55:53 -0700 Subject: [PATCH 0831/3715] IB/hfi1: Ensure ucast_dlid access doesnt exceed bounds [ Upstream commit 3144533bf667c8e53bb20656b78295960073e57b ] The dlid assignment made by looking into the u_ucast_dlid array does not do an explicit check for the size of the array. The code path to arrive at def_port, the index value is long and complicated so its best to just have an explicit check here. Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index a72278e9cd27..9c8ddaaa6fbb 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -351,7 +351,8 @@ static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, if (unlikely(!dlid)) v_warn("Null dlid in MAC address\n"); } else if (def_port != OPA_VNIC_INVALID_PORT) { - dlid = info->vesw.u_ucast_dlid[def_port]; + if (def_port < OPA_VESW_MAX_NUM_DEF_PORT) + dlid = info->vesw.u_ucast_dlid[def_port]; } } From 1a41082b01127d33048446f8b82beb32c7bdf580 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Sep 2018 17:59:50 +0100 Subject: [PATCH 0832/3715] iommu/io-pgtable-arm: Fix race handling in split_blk_unmap() [ Upstream commit 85c7a0f1ef624ef58173ef52ea77780257bdfe04 ] In removing the pagetable-wide lock, we gained the possibility of the vanishingly unlikely case where we have a race between two concurrent unmappers splitting the same block entry. The logic to handle this is fairly straightforward - whoever loses the race frees their partial next-level table and instead dereferences the winner's newly-installed entry in order to fall back to a regular unmap, which intentionally echoes the pre-existing case of recursively splitting a 1GB block down to 4KB pages by installing a full table of 2MB blocks first. Unfortunately, the chump who implemented that logic failed to update the condition check for that fallback, meaning that if said race occurs at the last level (where the loser's unmap_idx is valid) then the unmap won't actually happen. Fix that to properly account for both the race and recursive cases. Fixes: 2c3d273eabe8 ("iommu/io-pgtable-arm: Support lockless operation") Signed-off-by: Robin Murphy [will: re-jig control flow to avoid duplicate cmpxchg test] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/io-pgtable-arm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e8018a308868..17a9225283dd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -551,13 +551,12 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, return 0; tablep = iopte_deref(pte, data); + } else if (unmap_idx >= 0) { + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + return size; } - if (unmap_idx < 0) - return __arm_lpae_unmap(data, iova, size, lvl, tablep); - - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - return size; + return __arm_lpae_unmap(data, iova, size, lvl, tablep); } static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, From c1997d7041c75a82502cbd895f223e532bccc931 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:37 +0100 Subject: [PATCH 0833/3715] kvm: arm/arm64: Fix stage2_flush_memslot for 4 level page table [ Upstream commit d2db7773ba864df6b4e19643dfc54838550d8049 ] So far we have only supported 3 level page table with fixed IPA of 40bits, where PUD is folded. With 4 level page tables, we need to check if the PUD entry is valid or not. Fix stage2_flush_memslot() to do this check, before walking down the table. Acked-by: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- virt/kvm/arm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1f4cac53b923..9f69202d8e49 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -375,7 +375,8 @@ static void stage2_flush_memslot(struct kvm *kvm, pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + if (!stage2_pgd_none(*pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } From dc4d24291c4a0a203702fba27c289805ae19b0d0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:55 +0530 Subject: [PATCH 0834/3715] arm64/numa: Report correct memblock range for the dummy node [ Upstream commit 77cfe950901e5c13aca2df6437a05f39dd9a929b ] The dummy node ID is marked into all memory ranges on the system. So the dummy node really extends the entire memblock.memory. Hence report correct extent information for the dummy node using memblock range helper functions instead of the range [0LLU, PFN_PHYS(max_pfn) - 1)]. Fixes: 1a2db30034 ("arm64, numa: Add NUMA support for arm64 platforms") Acked-by: Punit Agrawal Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index dad128ba98bf..e9c843e0c172 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -419,7 +419,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); From 1ab0274c2eec3c74edbe4603fb00db07ae72bd02 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 6 Sep 2018 19:46:20 +0300 Subject: [PATCH 0835/3715] ath10k: fix vdev-start timeout on error [ Upstream commit 833fd34d743c728afe6d127ef7bee67e7d9199a8 ] The vdev-start-response message should cause the completion to fire, even in the error case. Otherwise, the user still gets no useful information and everything is blocked until the timeout period. Add some warning text to print out the invalid status code to aid debugging, and propagate failure code. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 19 ++++++++++++++++--- drivers/net/wireless/ath/ath10k/wmi.h | 8 +++++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 949ebb3e967b..be9ec265dfe5 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -881,6 +881,7 @@ struct ath10k { struct completion install_key_done; + int last_wmi_vdev_start_status; struct completion vdev_setup_done; struct workqueue_struct *workqueue; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 8c4bb56c262f..dff34448588f 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -955,7 +955,7 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar) if (time_left == 0) return -ETIMEDOUT; - return 0; + return ar->last_wmi_vdev_start_status; } static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 4d6c2986c40d..25f51ca06093 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3133,18 +3133,31 @@ void ath10k_wmi_event_vdev_start_resp(struct ath10k *ar, struct sk_buff *skb) { struct wmi_vdev_start_ev_arg arg = {}; int ret; + u32 status; ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_START_RESP_EVENTID\n"); + ar->last_wmi_vdev_start_status = 0; + ret = ath10k_wmi_pull_vdev_start(ar, skb, &arg); if (ret) { ath10k_warn(ar, "failed to parse vdev start event: %d\n", ret); - return; + ar->last_wmi_vdev_start_status = ret; + goto out; } - if (WARN_ON(__le32_to_cpu(arg.status))) - return; + status = __le32_to_cpu(arg.status); + if (WARN_ON_ONCE(status)) { + ath10k_warn(ar, "vdev-start-response reports status error: %d (%s)\n", + status, (status == WMI_VDEV_START_CHAN_INVALID) ? + "chan-invalid" : "unknown"); + /* Setup is done one way or another though, so we should still + * do the completion, so don't return here. + */ + ar->last_wmi_vdev_start_status = -EINVAL; + } +out: complete(&ar->vdev_setup_done); } diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index d0e05aa437e3..947b74c64fec 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6480,11 +6480,17 @@ struct wmi_ch_info_ev_arg { __le32 rx_frame_count; }; +/* From 10.4 firmware, not sure all have the same values. */ +enum wmi_vdev_start_status { + WMI_VDEV_START_OK = 0, + WMI_VDEV_START_CHAN_INVALID, +}; + struct wmi_vdev_start_ev_arg { __le32 vdev_id; __le32 req_id; __le32 resp_type; /* %WMI_VDEV_RESP_ */ - __le32 status; + __le32 status; /* See wmi_vdev_start_status enum above */ }; struct wmi_peer_kick_ev_arg { From 7ce00c576f5ec4319083e66aef6befdea23a61b1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 1 Oct 2018 10:33:02 -0700 Subject: [PATCH 0836/3715] ata: ahci_brcm: Allow using driver or DSL SoCs [ Upstream commit 7fb44929cb0e5cdcde143e1ca3ca57b5b8247db0 ] The Broadcom STB AHCI controller is the same as the one found on DSL SoCs, so we will utilize the same driver on these systems as well. Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index cb5339166563..229a5ccd6b73 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -102,7 +102,8 @@ config SATA_AHCI_PLATFORM config AHCI_BRCM tristate "Broadcom AHCI SATA support" - depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP + depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \ + ARCH_BCM_63XX help This option enables support for the AHCI SATA3 controller found on Broadcom SoC's. From 78e2b71efdb2eecf216fdc40ea55d6d1a8716169 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 1 Oct 2018 17:26:59 +0300 Subject: [PATCH 0837/3715] ath9k: fix reporting calculated new FFT upper max [ Upstream commit 4fb5837ac2bd46a85620b297002c704e9958f64d ] Since the debug print code is outside of the loop, it shouldn't use the loop iterator anymore but instead print the found maximum index. Cc: Nick Kossifidis Signed-off-by: Simon Wunderlich Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/common-spectral.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index a41bcbda1d9e..37d5994eb1cc 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -411,7 +411,7 @@ ath_cmn_process_ht20_40_fft(struct ath_rx_status *rs, ath_dbg(common, SPECTRAL_SCAN, "Calculated new upper max 0x%X at %i\n", - tmp_mag, i); + tmp_mag, fft_sample_40.upper_max_index); } else for (i = dc_pos; i < SPECTRAL_HT20_40_NUM_BINS; i++) { if (fft_sample_40.data[i] == (upper_mag >> max_exp)) From 5e57274c63b332cf3a73277db7fc48ada598c2df Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 15 Sep 2018 11:04:40 +0800 Subject: [PATCH 0838/3715] usb: gadget: udc: fotg210-udc: Fix a sleep-in-atomic-context bug in fotg210_get_status() [ Upstream commit 2337a77c1cc86bc4e504ecf3799f947659c86026 ] The driver may sleep in an interrupt handler. The function call path (from bottom to top) in Linux-4.17 is: [FUNC] fotg210_ep_queue(GFP_KERNEL) drivers/usb/gadget/udc/fotg210-udc.c, 744: fotg210_ep_queue in fotg210_get_status drivers/usb/gadget/udc/fotg210-udc.c, 768: fotg210_get_status in fotg210_setup_packet drivers/usb/gadget/udc/fotg210-udc.c, 949: fotg210_setup_packet in fotg210_irq (interrupt handler) To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC. If possible, spin_unlock() and spin_lock() around fotg210_ep_queue() can be also removed. This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/fotg210-udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c index d17d7052605b..6866a0be249e 100644 --- a/drivers/usb/gadget/udc/fotg210-udc.c +++ b/drivers/usb/gadget/udc/fotg210-udc.c @@ -744,7 +744,7 @@ static void fotg210_get_status(struct fotg210_udc *fotg210, fotg210->ep0_req->length = 2; spin_unlock(&fotg210->lock); - fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_KERNEL); + fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_ATOMIC); spin_lock(&fotg210->lock); } From cd51659e864b2c9e8d844f9ddde6b84df39da856 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 11 Sep 2018 12:42:05 -0700 Subject: [PATCH 0839/3715] usb: dwc3: gadget: Check ENBLSLPM before sending ep command [ Upstream commit 87dd96111b0bb8e616fcbd74dbf4bb4182f2c596 ] When operating in USB 2.0 speeds (HS/FS), if GUSB2PHYCFG.ENBLSLPM or GUSB2PHYCFG.SUSPHY is set, it must be cleared before issuing an endpoint command. Current implementation only save and restore GUSB2PHYCFG.SUSPHY configuration. We must save and clear both GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY settings. Restore them after the command is completed. DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5916340c4162..e96b22d6fa52 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -277,27 +277,36 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, const struct usb_endpoint_descriptor *desc = dep->endpoint.desc; struct dwc3 *dwc = dep->dwc; u32 timeout = 1000; + u32 saved_config = 0; u32 reg; int cmd_status = 0; - int susphy = false; int ret = -EINVAL; /* - * Synopsys Databook 2.60a states, on section 6.3.2.5.[1-8], that if - * we're issuing an endpoint command, we must check if - * GUSB2PHYCFG.SUSPHY bit is set. If it is, then we need to clear it. + * When operating in USB 2.0 speeds (HS/FS), if GUSB2PHYCFG.ENBLSLPM or + * GUSB2PHYCFG.SUSPHY is set, it must be cleared before issuing an + * endpoint command. * - * We will also set SUSPHY bit to what it was before returning as stated - * by the same section on Synopsys databook. + * Save and clear both GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY + * settings. Restore them after the command is completed. + * + * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ if (dwc->gadget.speed <= USB_SPEED_HIGH) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { - susphy = true; + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } + + if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) { + saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM; + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + } + + if (saved_config) + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { @@ -395,9 +404,9 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, } } - if (unlikely(susphy)) { + if (saved_config) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - reg |= DWC3_GUSB2PHYCFG_SUSPHY; + reg |= saved_config; dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } From a6ee16520c0a16916f9514211518c0972b05edaf Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Mon, 24 Sep 2018 18:10:22 +0200 Subject: [PATCH 0840/3715] nl80211: Fix a GET_KEY reply attribute [ Upstream commit efdfce7270de85a8706d1ea051bef3a7486809ff ] Use the NL80211_KEY_IDX attribute inside the NL80211_ATTR_KEY in NL80211_CMD_GET_KEY responses to comply with nl80211_key_policy. This is unlikely to affect existing userspace. Signed-off-by: Andrew Zaborowski Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9627c52c3f93..df8c5312f26a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3118,7 +3118,7 @@ static void get_key_callback(void *c, struct key_params *params) params->cipher))) goto nla_put_failure; - if (nla_put_u8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx)) + if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx)) goto nla_put_failure; nla_nest_end(cookie->msg, key); From c642963035a94af894402d998b58983b13d1f2c0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 1 Oct 2018 16:13:47 +0200 Subject: [PATCH 0841/3715] irqchip/irq-mvebu-icu: Fix wrong private data retrieval [ Upstream commit 2b4dab69dcca13c5be2ddaf1337ae4accd087de6 ] The irq_domain structure has an host_data pointer that just stores private data. It is meant to not be touched by the IRQ core. However, when it comes to MSI, the MSI layer adds its own private data there with a structure that also has a host_data pointer. Because this IRQ domain is an MSI domain, to access private data we should do a d->host_data->host_data, also wrapped as 'platform_msi_get_host_data()'. This bug was lying there silently because the 'icu' structure retrieved this way was just called by dev_err(), only producing a '(NULL device *):' output on the console. Reviewed-by: Thomas Petazzoni Signed-off-by: Miquel Raynal Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mvebu-icu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index e18c48d3a92e..6a77b9ea8e41 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -92,7 +92,7 @@ static int mvebu_icu_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { - struct mvebu_icu *icu = d->host_data; + struct mvebu_icu *icu = platform_msi_get_host_data(d); unsigned int icu_group; /* Check the count of the parameters in dt */ From b14bef1238754ec0ff46ac682cad26c3b04a64b7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 23 Sep 2018 06:54:11 -0700 Subject: [PATCH 0842/3715] watchdog: w83627hf_wdt: Support NCT6796D, NCT6797D, NCT6798D [ Upstream commit 57cbf0e3a0fd48e5ad8f3884562e8dde4827c1c8 ] The watchdog controller on NCT6796D, NCT6797D, and NCT6798D is compatible with the wtachdog controller on other Nuvoton chips. Signed-off-by: Guenter Roeck Reviewed-by: Wim Van Sebroeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/w83627hf_wdt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c index 7817836bff55..4b9365d4de7a 100644 --- a/drivers/watchdog/w83627hf_wdt.c +++ b/drivers/watchdog/w83627hf_wdt.c @@ -50,7 +50,7 @@ static int cr_wdt_csr; /* WDT control & status register */ enum chips { w83627hf, w83627s, w83697hf, w83697ug, w83637hf, w83627thf, w83687thf, w83627ehf, w83627dhg, w83627uhg, w83667hg, w83627dhg_p, w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793, - nct6795, nct6102 }; + nct6795, nct6796, nct6102 }; static int timeout; /* in seconds */ module_param(timeout, int, 0); @@ -100,6 +100,7 @@ MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)"); #define NCT6792_ID 0xc9 #define NCT6793_ID 0xd1 #define NCT6795_ID 0xd3 +#define NCT6796_ID 0xd4 /* also NCT9697D, NCT9698D */ #define W83627HF_WDT_TIMEOUT 0xf6 #define W83697HF_WDT_TIMEOUT 0xf4 @@ -209,6 +210,7 @@ static int w83627hf_init(struct watchdog_device *wdog, enum chips chip) case nct6792: case nct6793: case nct6795: + case nct6796: case nct6102: /* * These chips have a fixed WDTO# output pin (W83627UHG), @@ -407,6 +409,9 @@ static int wdt_find(int addr) case NCT6795_ID: ret = nct6795; break; + case NCT6796_ID: + ret = nct6796; + break; case NCT6102_ID: ret = nct6102; cr_wdt_timeout = NCT6102D_WDT_TIMEOUT; @@ -450,6 +455,7 @@ static int __init wdt_init(void) "NCT6792", "NCT6793", "NCT6795", + "NCT6796", "NCT6102", }; From 67697026c0dec36e6bd09e26838724ddc8af3c0e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 10 Sep 2018 18:29:09 +1000 Subject: [PATCH 0843/3715] KVM: PPC: Inform the userspace about TCE update failures [ Upstream commit f7960e299f13f069d6f3d4e157d91bfca2669677 ] We return H_TOO_HARD from TCE update handlers when we think that the next handler (realmode -> virtual mode -> user mode) has a chance to handle the request; H_HARDWARE/H_CLOSED otherwise. This changes the handlers to return H_TOO_HARD on every error giving the userspace an opportunity to handle any request or at least log them all. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_64_vio.c | 8 ++++---- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 2c6cce8e7cfd..5e4446296021 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -404,7 +404,7 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, long ret; if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) - return H_HARDWARE; + return H_TOO_HARD; if (dir == DMA_NONE) return H_SUCCESS; @@ -434,15 +434,15 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return H_TOO_HARD; if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) - return H_HARDWARE; + return H_TOO_HARD; if (mm_iommu_mapped_inc(mem)) - return H_CLOSED; + return H_TOO_HARD; ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); if (WARN_ON_ONCE(ret)) { mm_iommu_mapped_dec(mem); - return H_HARDWARE; + return H_TOO_HARD; } if (dir != DMA_NONE) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 23d6d1592f11..c75e5664fe3d 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -264,14 +264,14 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, &hpa))) - return H_HARDWARE; + return H_TOO_HARD; pua = (void *) vmalloc_to_phys(pua); if (WARN_ON_ONCE_RM(!pua)) return H_HARDWARE; if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) - return H_CLOSED; + return H_TOO_HARD; ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); if (ret) { @@ -448,7 +448,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, rmap = (void *) vmalloc_to_phys(rmap); if (WARN_ON_ONCE_RM(!rmap)) - return H_HARDWARE; + return H_TOO_HARD; /* * Synchronize with the MMU notifier callbacks in From 3794a32dbc6cf86d3cfcb697662a5054960f3176 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 11 Sep 2018 16:40:20 -0700 Subject: [PATCH 0844/3715] dmaengine: ep93xx: Return proper enum in ep93xx_dma_chan_direction [ Upstream commit 9524d6b265f9b2b9a61fceb2ee2ce1c2a83e39ca ] Clang warns when implicitly converting from one enumerated type to another. Avoid this by using the equivalent value from the expected type. In file included from drivers/dma/ep93xx_dma.c:30: ./include/linux/platform_data/dma-ep93xx.h:88:10: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] return DMA_NONE; ~~~~~~ ^~~~~~~~ 1 warning generated. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- include/linux/platform_data/dma-ep93xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h index f8f1f6b952a6..eb9805bb3fe8 100644 --- a/include/linux/platform_data/dma-ep93xx.h +++ b/include/linux/platform_data/dma-ep93xx.h @@ -85,7 +85,7 @@ static inline enum dma_transfer_direction ep93xx_dma_chan_direction(struct dma_chan *chan) { if (!ep93xx_dma_chan_is_m2p(chan)) - return DMA_NONE; + return DMA_TRANS_NONE; /* even channels are for TX, odd for RX */ return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; From b9ca94b6fc778a58356e00784b96aec4338569be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 11 Sep 2018 16:20:25 -0700 Subject: [PATCH 0845/3715] dmaengine: timb_dma: Use proper enum in td_prep_slave_sg [ Upstream commit 5e621f5d538985f010035c6f3e28c22829d36db1 ] Clang warns when implicitly converting from one enumerated type to another. Avoid this by using the equivalent value from the expected type. drivers/dma/timb_dma.c:548:27: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] td_desc->desc_list_len, DMA_MEM_TO_DEV); ^~~~~~~~~~~~~~ 1 warning generated. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/timb_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 896bafb7a532..cf6588cc3efd 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -545,7 +545,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan, } dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys, - td_desc->desc_list_len, DMA_MEM_TO_DEV); + td_desc->desc_list_len, DMA_TO_DEVICE); return &td_desc->txd; } From 3610daee861f4b4a2758fc7087c53b5842c34f6e Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 2 Oct 2018 12:43:51 -0400 Subject: [PATCH 0846/3715] ext4: fix build error when DX_DEBUG is defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 799578ab16e86b074c184ec5abbda0bc698c7b0b ] Enabling DX_DEBUG triggers the build error below. info is an attribute of the dxroot structure. linux/fs/ext4/namei.c:2264:12: error: ‘info’ undeclared (first use in this function); did you mean ‘insl’? info->indirect_levels)); Fixes: e08ac99fa2a2 ("ext4: add largedir feature") Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o Reviewed-by: Lukas Czerner Signed-off-by: Sasha Levin --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 162e853dc5d6..212b01861d94 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2293,7 +2293,7 @@ again: dxroot->info.indirect_levels += 1; dxtrace(printk(KERN_DEBUG "Creating %d level index...\n", - info->indirect_levels)); + dxroot->info.indirect_levels)); err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; From 4bec32f211ff4a6269a35089559fab2559ce7602 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 27 Aug 2018 19:50:56 -0500 Subject: [PATCH 0847/3715] clk: keystone: Enable TISCI clocks if K3_ARCH [ Upstream commit 2f149e6e14bcb5e581e49307b54aafcd6f74a74f ] K3_ARCH uses TISCI for clocks as well. Enable the same for the driver support. Signed-off-by: Nishanth Menon Acked-by: Santosh Shilimkar Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/Makefile | 1 + drivers/clk/keystone/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index f7f761b02bed..8ca03d9d693b 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_ARCH_HISI) += hisilicon/ obj-y += imgtec/ obj-$(CONFIG_ARCH_MXC) += imx/ obj-$(CONFIG_MACH_INGENIC) += ingenic/ +obj-$(CONFIG_ARCH_K3) += keystone/ obj-$(CONFIG_ARCH_KEYSTONE) += keystone/ obj-$(CONFIG_MACH_LOONGSON32) += loongson1/ obj-$(CONFIG_ARCH_MEDIATEK) += mediatek/ diff --git a/drivers/clk/keystone/Kconfig b/drivers/clk/keystone/Kconfig index 7e9f0176578a..b04927d06cd1 100644 --- a/drivers/clk/keystone/Kconfig +++ b/drivers/clk/keystone/Kconfig @@ -7,7 +7,7 @@ config COMMON_CLK_KEYSTONE config TI_SCI_CLK tristate "TI System Control Interface clock drivers" - depends on (ARCH_KEYSTONE || COMPILE_TEST) && OF + depends on (ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST) && OF depends on TI_SCI_PROTOCOL default ARCH_KEYSTONE ---help--- From 711b32942cea36da666624977eeebd1aea2bcd7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:25:36 -0400 Subject: [PATCH 0848/3715] sunrpc: Fix connect metrics [ Upstream commit 3968a8a5310404c2f0b9e4d9f28cab13a12bc4fd ] For TCP, the logic in xprt_connect_status is currently never invoked to record a successful connection. Commit 2a4919919a97 ("SUNRPC: Return EAGAIN instead of ENOTCONN when waking up xprt->pending") changed the way TCP xprt's are awoken after a connect succeeds. Instead, change connection-oriented transports to bump connect_count and compute connect_time the moment that XPRT_CONNECTED is set. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- net/sunrpc/xprt.c | 14 ++++---------- net/sunrpc/xprtrdma/transport.c | 6 +++++- net/sunrpc/xprtsock.c | 10 ++++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d0282cc88b14..b852c34bb637 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -795,17 +795,11 @@ void xprt_connect(struct rpc_task *task) static void xprt_connect_status(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - - if (task->tk_status == 0) { - xprt->stat.connect_count++; - xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; + switch (task->tk_status) { + case 0: dprintk("RPC: %5u xprt_connect_status: connection established\n", task->tk_pid); - return; - } - - switch (task->tk_status) { + break; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: @@ -822,7 +816,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - xprt->servername); + task->tk_rqstp->rq_xprt->servername); task->tk_status = -EIO; } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8cf5ccfe180d..b1b40a1be8c5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -238,8 +238,12 @@ rpcrdma_connect_worker(struct work_struct *work) if (++xprt->connect_cookie == 0) /* maintain a reserved value */ ++xprt->connect_cookie; if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) + if (!xprt_test_and_set_connected(xprt)) { + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, 0); + } } else { if (xprt_test_and_clear_connected(xprt)) xprt_wake_pending_tasks(xprt, -ENOTCONN); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 05a58cc1b0cd..a42871a59f3b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1592,6 +1592,9 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); xprt_clear_connecting(xprt); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock(&xprt->transport_lock); @@ -2008,8 +2011,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, } /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -2041,6 +2042,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport) case 0: dprintk("RPC: xprt %p connected to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_set_connected(xprt); case -ENOBUFS: break; @@ -2361,8 +2365,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { From 36be9cd26716a923275e47c8912b28449d63fb1f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 1 Oct 2018 19:44:41 +0300 Subject: [PATCH 0849/3715] mei: samples: fix a signedness bug in amt_host_if_call() [ Upstream commit 185647813cac080453cb73a2e034a8821049f2a7 ] "out_buf_sz" needs to be signed for the error handling to work. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- samples/mei/mei-amt-version.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index bb9988914a56..32234481ad7d 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -370,7 +370,7 @@ static uint32_t amt_host_if_call(struct amt_host_if *acmd, unsigned int expected_sz) { uint32_t in_buf_sz; - uint32_t out_buf_sz; + ssize_t out_buf_sz; ssize_t written; uint32_t status; struct amt_host_if_resp_header *msg_hdr; From 30069fcd345543d4717f48d9c0f87a6ce942e032 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 30 Sep 2018 20:51:43 -0700 Subject: [PATCH 0850/3715] cxgb4: Use proper enum in cxgb4_dcb_handle_fw_update [ Upstream commit 3b0b8f0d9a259f6a428af63e7a77547325f8e081 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c:303:7: warning: implicit conversion from enumeration type 'enum cxgb4_dcb_state' to different enumeration type 'enum cxgb4_dcb_state_input' [-Wenum-conversion] ? CXGB4_DCB_STATE_FW_ALLSYNCED ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c:304:7: warning: implicit conversion from enumeration type 'enum cxgb4_dcb_state' to different enumeration type 'enum cxgb4_dcb_state_input' [-Wenum-conversion] : CXGB4_DCB_STATE_FW_INCOMPLETE); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 warnings generated. Use the equivalent value of the expected type to silence Clang while resulting in no functional change. CXGB4_DCB_STATE_FW_INCOMPLETE = CXGB4_DCB_INPUT_FW_INCOMPLETE = 2 CXGB4_DCB_STATE_FW_ALLSYNCED = CXGB4_DCB_INPUT_FW_ALLSYNCED = 3 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6ee2ed30626b..306b4b320616 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -266,8 +266,8 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, enum cxgb4_dcb_state_input input = ((pcmd->u.dcb.control.all_syncd_pkd & FW_PORT_CMD_ALL_SYNCD_F) - ? CXGB4_DCB_STATE_FW_ALLSYNCED - : CXGB4_DCB_STATE_FW_INCOMPLETE); + ? CXGB4_DCB_INPUT_FW_ALLSYNCED + : CXGB4_DCB_INPUT_FW_INCOMPLETE); if (dcb->dcb_version != FW_PORT_DCB_VER_UNKNOWN) { dcb_running_version = FW_PORT_CMD_DCB_VERSION_G( From 2781da76bd354fc8e9d31cec3ad3a0a017361f4e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 30 Sep 2018 20:47:38 -0700 Subject: [PATCH 0851/3715] cxgb4: Use proper enum in IEEE_FAUX_SYNC [ Upstream commit 258b6d141878530ba1f8fc44db683822389de914 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c:390:4: warning: implicit conversion from enumeration type 'enum cxgb4_dcb_state' to different enumeration type 'enum cxgb4_dcb_state_input' [-Wenum-conversion] IEEE_FAUX_SYNC(dev, dcb); ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h:70:10: note: expanded from macro 'IEEE_FAUX_SYNC' CXGB4_DCB_STATE_FW_ALLSYNCED); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the equivalent value of the expected type to silence Clang while resulting in no functional change. CXGB4_DCB_STATE_FW_ALLSYNCED = CXGB4_DCB_INPUT_FW_ALLSYNCED = 3 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index ccf24d3dc982..2c418c405c50 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -67,7 +67,7 @@ do { \ if ((__dcb)->dcb_version == FW_PORT_DCB_VER_IEEE) \ cxgb4_dcb_state_fsm((__dev), \ - CXGB4_DCB_STATE_FW_ALLSYNCED); \ + CXGB4_DCB_INPUT_FW_ALLSYNCED); \ } while (0) /* States we can be in for a port's Data Center Bridging. From 4d1af1c64fb83053212775be4ca0abaa6b2788e8 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 27 Sep 2018 13:40:57 +0530 Subject: [PATCH 0852/3715] powerpc/pseries: Fix DTL buffer registration [ Upstream commit db787af1b8a6b4be428ee2ea7d409dafcaa4a43c ] When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set, we register the DTL buffer for a cpu when the associated file under powerpc/dtl in debugfs is opened. When doing so, we need to set the size of the buffer being registered in the second u32 word of the buffer. This needs to be in big endian, but we are not doing the conversion resulting in the below error showing up in dmesg: dtl_start: DTL registration for cpu 0 (hw 0) failed with -4 Fix this in the obvious manner. Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.") Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/dtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 18014cdeb590..c762689e0eb3 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -149,7 +149,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); From 5c07e4774cf83ae0ad403c3221d78110d4e7eaf8 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 27 Sep 2018 13:40:58 +0530 Subject: [PATCH 0853/3715] powerpc/pseries: Fix how we iterate over the DTL entries [ Upstream commit 9258227e9dd1da8feddb07ad9702845546a581c9 ] When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set, we look up dtl_idx in the lppaca to determine the number of entries in the buffer. Since lppaca is in big endian, we need to do an endian conversion before using this in our calculation to determine the number of entries in the buffer. Without this, we do not iterate over the existing entries in the DTL buffer properly. Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.") Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/dtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index c762689e0eb3..ef6595153642 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -184,7 +184,7 @@ static void dtl_stop(struct dtl *dtl) static u64 dtl_current_index(struct dtl *dtl) { - return lppaca_of(dtl->cpu).dtl_idx; + return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ From 41a4901469907cc6b75b492152798077e3170e89 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 26 Sep 2018 20:09:32 +0800 Subject: [PATCH 0854/3715] powerpc/xive: Move a dereference below a NULL test [ Upstream commit cd5ff94577e004e0a4457e70d0ef3a030f4010b8 ] Move the dereference of xc below the NULL test. Signed-off-by: zhong jiang Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/xive/common.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 818fc5351591..110d8bb16ebb 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1008,12 +1008,13 @@ static void xive_ipi_eoi(struct irq_data *d) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); - DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", - d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - /* Handle possible race with unplug and drop stale IPIs */ if (!xc) return; + + DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", + d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); + xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); xive_do_queue_eoi(xc); } From e041a6e677394d64edb09b6340a8f3b33c476ca7 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 2 Oct 2018 16:00:35 +0300 Subject: [PATCH 0855/3715] ARM: dts: at91: sama5d4_xplained: fix addressable nand flash size [ Upstream commit df90fc64367ffdb6f1b5c0f0c4940d44832b0174 ] sama5d4_xplained comes with a 4Gb NAND flash. Increase the rootfs size to match this limit. Signed-off-by: Tudor Ambarus Signed-off-by: Ludovic Desroches Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index cf712444b2c2..10f2fb9e0ea6 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -240,7 +240,7 @@ rootfs@800000 { label = "rootfs"; - reg = <0x800000 0x0f800000>; + reg = <0x800000 0x1f800000>; }; }; }; From 3578651713e5d4e620ebff7671da1fd2cc692a4f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 2 Oct 2018 16:00:36 +0300 Subject: [PATCH 0856/3715] ARM: dts: at91: at91sam9x5cm: fix addressable nand flash size [ Upstream commit 6f270d88a0c4a11725afd8fd2001ae408733afbf ] at91sam9x5cm comes with a 2Gb NAND flash. Fix the rootfs size to match this limit. Signed-off-by: Tudor Ambarus Signed-off-by: Ludovic Desroches Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91sam9x5cm.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91sam9x5cm.dtsi b/arch/arm/boot/dts/at91sam9x5cm.dtsi index bdeaa0b64a5b..0a673a7082be 100644 --- a/arch/arm/boot/dts/at91sam9x5cm.dtsi +++ b/arch/arm/boot/dts/at91sam9x5cm.dtsi @@ -88,7 +88,7 @@ rootfs@800000 { label = "rootfs"; - reg = <0x800000 0x1f800000>; + reg = <0x800000 0x0f800000>; }; }; }; From 191cda60bf0877656e77608650c7294bae2594e4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 20 Sep 2018 16:30:25 -0700 Subject: [PATCH 0857/3715] mtd: rawnand: sh_flctl: Use proper enum for flctl_dma_fifo0_transfer [ Upstream commit e2bfa4ca23d9b5a7bdfcf21319fad9b59e38a05c ] Clang warns when one enumerated type is converted implicitly to another: drivers/mtd/nand/raw/sh_flctl.c:483:46: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0) ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~ drivers/mtd/nand/raw/sh_flctl.c:542:46: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0) ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~ 2 warnings generated. Use the proper enums from dma_data_direction to satisfy Clang. DMA_MEM_TO_DEV = DMA_TO_DEVICE = 1 DMA_DEV_TO_MEM = DMA_FROM_DEVICE = 2 Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin --- drivers/mtd/nand/sh_flctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index e7f3c98487e6..43db80e5d994 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -480,7 +480,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) /* initiate DMA transfer */ if (flctl->chan_fifo0_rx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0) + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0) goto convert; /* DMA success */ /* do polling transfer */ @@ -539,7 +539,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, /* initiate DMA transfer */ if (flctl->chan_fifo0_tx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0) + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0) return; /* DMA success */ /* do polling transfer */ From 7896a8171e18b1a738d710d69490f748202270b7 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 21 Sep 2018 14:26:38 +0800 Subject: [PATCH 0858/3715] PM / hibernate: Check the success of generating md5 digest before hibernation [ Upstream commit 749fa17093ff67b31dea864531a3698b6a95c26c ] Currently if get_e820_md5() fails, then it will hibernate nevertheless. Actually the error code should be propagated to upper caller so that the hibernation could be aware of the result and terminates the process if md5 digest fails. Suggested-by: Thomas Gleixner Acked-by: Pavel Machek Signed-off-by: Chen Yu Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- arch/x86/power/hibernate_64.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 9c80966c80ba..692a179b1ba3 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -250,9 +250,9 @@ static int get_e820_md5(struct e820_table *table, void *buf) return ret; } -static void hibernation_e820_save(void *buf) +static int hibernation_e820_save(void *buf) { - get_e820_md5(e820_table_firmware, buf); + return get_e820_md5(e820_table_firmware, buf); } static bool hibernation_e820_mismatch(void *buf) @@ -272,8 +272,9 @@ static bool hibernation_e820_mismatch(void *buf) return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; } #else -static void hibernation_e820_save(void *buf) +static int hibernation_e820_save(void *buf) { + return 0; } static bool hibernation_e820_mismatch(void *buf) @@ -318,9 +319,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) rdr->magic = RESTORE_MAGIC; - hibernation_e820_save(rdr->e820_digest); - - return 0; + return hibernation_e820_save(rdr->e820_digest); } /** From c0b2af64f99e6f548121aaeac172211968f5b1eb Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 23 Aug 2018 13:34:53 +0200 Subject: [PATCH 0859/3715] tools: PCI: Fix compilation warnings [ Upstream commit fef31ecaaf2c5c54db85b35e893bf8abec96b93f ] Current compilation produces the following warnings: tools/pci/pcitest.c: In function 'run_test': tools/pci/pcitest.c:56:9: warning: unused variable 'time' [-Wunused-variable] double time; ^~~~ tools/pci/pcitest.c:55:25: warning: unused variable 'end' [-Wunused-variable] struct timespec start, end; ^~~ tools/pci/pcitest.c:55:18: warning: unused variable 'start' [-Wunused-variable] struct timespec start, end; ^~~~~ tools/pci/pcitest.c:146:1: warning: control reaches end of non-void function [-Wreturn-type] } ^ Fix them: - remove unused variables - change function return from int to void, since it's not used Signed-off-by: Gustavo Pimentel [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- tools/pci/pcitest.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 9074b477bff0..8ca1c62bc06d 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -43,17 +42,15 @@ struct pci_test { unsigned long size; }; -static int run_test(struct pci_test *test) +static void run_test(struct pci_test *test) { long ret; int fd; - struct timespec start, end; - double time; fd = open(test->device, O_RDWR); if (fd < 0) { perror("can't open PCI Endpoint Test device"); - return fd; + return; } if (test->barnum >= 0 && test->barnum <= 5) { From a2667a032c6207c9d058678d4f5f8422e20603aa Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 8 Sep 2018 23:54:05 +0300 Subject: [PATCH 0860/3715] clocksource/drivers/sh_cmt: Fixup for 64-bit machines [ Upstream commit 22627c6f3ed3d9d0df13eec3c831b08f8186c38e ] When trying to use CMT for clockevents on R-Car gen3 SoCs, I noticed that 'max_delta_ns' for the broadcast timer (CMT) was shown as 1000 in /proc/timer_list. It turned out that when calculating it, the driver did 1 << 32 (causing what I think was undefined behavior) resulting in a zero delta, later clamped to 1000 by cev_delta2ns(). The root cause turned out to be that the driver abused *unsigned long* for the CMT register values (which are 16/32-bit), so that the calculation of 'ch->max_match_value' in sh_cmt_setup_channel() used the wrong branch. Using more proper 'u32' instead fixed 'max_delta_ns' and even fixed the switching an active clocksource to CMT (which caused the system to turn non-interactive before). Signed-off-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/sh_cmt.c | 72 +++++++++++++++++------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index e09e8bf0bb9b..560541f53c8d 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -75,18 +75,17 @@ struct sh_cmt_info { enum sh_cmt_model model; unsigned long width; /* 16 or 32 bit version of hardware block */ - unsigned long overflow_bit; - unsigned long clear_bits; + u32 overflow_bit; + u32 clear_bits; /* callbacks for CMSTR and CMCSR access */ - unsigned long (*read_control)(void __iomem *base, unsigned long offs); + u32 (*read_control)(void __iomem *base, unsigned long offs); void (*write_control)(void __iomem *base, unsigned long offs, - unsigned long value); + u32 value); /* callbacks for CMCNT and CMCOR access */ - unsigned long (*read_count)(void __iomem *base, unsigned long offs); - void (*write_count)(void __iomem *base, unsigned long offs, - unsigned long value); + u32 (*read_count)(void __iomem *base, unsigned long offs); + void (*write_count)(void __iomem *base, unsigned long offs, u32 value); }; struct sh_cmt_channel { @@ -100,9 +99,9 @@ struct sh_cmt_channel { unsigned int timer_bit; unsigned long flags; - unsigned long match_value; - unsigned long next_match_value; - unsigned long max_match_value; + u32 match_value; + u32 next_match_value; + u32 max_match_value; raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; @@ -157,24 +156,22 @@ struct sh_cmt_device { #define SH_CMT32_CMCSR_CKS_RCLK1 (7 << 0) #define SH_CMT32_CMCSR_CKS_MASK (7 << 0) -static unsigned long sh_cmt_read16(void __iomem *base, unsigned long offs) +static u32 sh_cmt_read16(void __iomem *base, unsigned long offs) { return ioread16(base + (offs << 1)); } -static unsigned long sh_cmt_read32(void __iomem *base, unsigned long offs) +static u32 sh_cmt_read32(void __iomem *base, unsigned long offs) { return ioread32(base + (offs << 2)); } -static void sh_cmt_write16(void __iomem *base, unsigned long offs, - unsigned long value) +static void sh_cmt_write16(void __iomem *base, unsigned long offs, u32 value) { iowrite16(value, base + (offs << 1)); } -static void sh_cmt_write32(void __iomem *base, unsigned long offs, - unsigned long value) +static void sh_cmt_write32(void __iomem *base, unsigned long offs, u32 value) { iowrite32(value, base + (offs << 2)); } @@ -236,7 +233,7 @@ static const struct sh_cmt_info sh_cmt_info[] = { #define CMCNT 1 /* channel register */ #define CMCOR 2 /* channel register */ -static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmstr(struct sh_cmt_channel *ch) { if (ch->iostart) return ch->cmt->info->read_control(ch->iostart, 0); @@ -244,8 +241,7 @@ static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch) return ch->cmt->info->read_control(ch->cmt->mapbase, 0); } -static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, u32 value) { if (ch->iostart) ch->cmt->info->write_control(ch->iostart, 0, value); @@ -253,39 +249,35 @@ static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, ch->cmt->info->write_control(ch->cmt->mapbase, 0, value); } -static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) { return ch->cmt->info->read_control(ch->ioctrl, CMCSR); } -static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_control(ch->ioctrl, CMCSR, value); } -static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) { return ch->cmt->info->read_count(ch->ioctrl, CMCNT); } -static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_count(ch->ioctrl, CMCNT, value); } -static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_count(ch->ioctrl, CMCOR, value); } -static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch, - int *has_wrapped) +static u32 sh_cmt_get_counter(struct sh_cmt_channel *ch, u32 *has_wrapped) { - unsigned long v1, v2, v3; - int o1, o2; + u32 v1, v2, v3; + u32 o1, o2; o1 = sh_cmt_read_cmcsr(ch) & ch->cmt->info->overflow_bit; @@ -305,7 +297,8 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch, static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start) { - unsigned long flags, value; + unsigned long flags; + u32 value; /* start stop register shared by multiple timer channels */ raw_spin_lock_irqsave(&ch->cmt->lock, flags); @@ -412,11 +405,11 @@ static void sh_cmt_disable(struct sh_cmt_channel *ch) static void sh_cmt_clock_event_program_verify(struct sh_cmt_channel *ch, int absolute) { - unsigned long new_match; - unsigned long value = ch->next_match_value; - unsigned long delay = 0; - unsigned long now = 0; - int has_wrapped; + u32 value = ch->next_match_value; + u32 new_match; + u32 delay = 0; + u32 now = 0; + u32 has_wrapped; now = sh_cmt_get_counter(ch, &has_wrapped); ch->flags |= FLAG_REPROGRAM; /* force reprogram */ @@ -613,9 +606,10 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs) static u64 sh_cmt_clocksource_read(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); - unsigned long flags, raw; + unsigned long flags; unsigned long value; - int has_wrapped; + u32 has_wrapped; + u32 raw; raw_spin_lock_irqsave(&ch->lock, flags); value = ch->total_cycles; From 2f41366258fde1ba0b7822e4b841206ff26d9236 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Mon, 10 Sep 2018 23:22:16 +0300 Subject: [PATCH 0861/3715] clocksource/drivers/sh_cmt: Fix clocksource width for 32-bit machines [ Upstream commit 37e7742c55ba856eaec7e35673ee370f36eb17f3 ] The driver seems to abuse *unsigned long* not only for the (32-bit) register values but also for the 'sh_cmt_channel::total_cycles' which needs to always be 64-bit -- as a result, the clocksource's mask is needlessly clamped down to 32-bits on the 32-bit machines... Fixes: 19bdc9d061bc ("clocksource: sh_cmt clocksource support") Reported-by: Geert Uytterhoeven Signed-off-by: Sergei Shtylyov Reviewed-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/sh_cmt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 560541f53c8d..3cd62f7c33e3 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -105,7 +105,7 @@ struct sh_cmt_channel { raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; - unsigned long total_cycles; + u64 total_cycles; bool cs_enabled; }; @@ -607,8 +607,8 @@ static u64 sh_cmt_clocksource_read(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); unsigned long flags; - unsigned long value; u32 has_wrapped; + u64 value; u32 raw; raw_spin_lock_irqsave(&ch->lock, flags); @@ -682,7 +682,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, cs->disable = sh_cmt_clocksource_disable; cs->suspend = sh_cmt_clocksource_suspend; cs->resume = sh_cmt_clocksource_resume; - cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8); + cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8); cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n", From b6ba15616007f9fe0b10eb6e8f0a9ac014e68a6f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Oct 2018 15:04:41 +1000 Subject: [PATCH 0862/3715] md: allow metadata updates while suspending an array - fix [ Upstream commit 059421e041eb461fb2b3e81c9adaec18ef03ca3c ] Commit 35bfc52187f6 ("md: allow metadata update while suspending.") added support for allowing md_check_recovery() to still perform metadata updates while the array is entering the 'suspended' state. This is needed to allow the processes of entering the state to complete. Unfortunately, the patch doesn't really work. The test for "mddev->suspended" at the start of md_check_recovery() means that the function doesn't try to do anything at all while entering suspend. This patch moves the code of updating the metadata while suspending to *before* the test on mddev->suspended. Reported-by: Jeff Mahoney Fixes: 35bfc52187f6 ("md: allow metadata update while suspending.") Signed-off-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin --- drivers/md/md.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e529cef5483a..b942c74f1ce8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8736,6 +8736,18 @@ static void md_start_sync(struct work_struct *ws) */ void md_check_recovery(struct mddev *mddev) { + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { + /* Write superblock - thread that called mddev_suspend() + * holds reconfig_mutex for us. + */ + set_bit(MD_UPDATING_SB, &mddev->flags); + smp_mb__after_atomic(); + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) + md_update_sb(mddev, 0); + clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); + wake_up(&mddev->sb_wait); + } + if (mddev->suspended) return; @@ -8896,16 +8908,6 @@ void md_check_recovery(struct mddev *mddev) unlock: wake_up(&mddev->sb_wait); mddev_unlock(mddev); - } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { - /* Write superblock - thread that called mddev_suspend() - * holds reconfig_mutex for us. - */ - set_bit(MD_UPDATING_SB, &mddev->flags); - smp_mb__after_atomic(); - if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) - md_update_sb(mddev, 0); - clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); - wake_up(&mddev->sb_wait); } } EXPORT_SYMBOL(md_check_recovery); From 1b262d5d5ab87a24fdad8c4ad30f23f79d1bc720 Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Wed, 5 Sep 2018 09:00:51 +0200 Subject: [PATCH 0863/3715] ixgbe: Fix ixgbe TX hangs with XDP_TX beyond queue limit [ Upstream commit 8d7179b1e2d64b3493c0114916486fe92e6109a9 ] We have Tx hang when number Tx and XDP queues are more than 64. In XDP always is MTQC == 0x0 (64TxQs). We need more space for Tx queues. Signed-off-by: Radoslaw Tyl Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 01c120d656c5..d1472727ef88 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3490,12 +3490,18 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) else mtqc |= IXGBE_MTQC_64VF; } else { - if (tcs > 4) + if (tcs > 4) { mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; - else if (tcs > 1) + } else if (tcs > 1) { mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else - mtqc = IXGBE_MTQC_64Q_1PB; + } else { + u8 max_txq = adapter->num_tx_queues + + adapter->num_xdp_queues; + if (max_txq > 63) + mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; + else + mtqc = IXGBE_MTQC_64Q_1PB; + } } IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); From 01b71317fedfbf6a6c56a77713e3ba28b1c13e00 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 03:13:59 -0700 Subject: [PATCH 0864/3715] i40e: Use proper enum in i40e_ndo_set_vf_link_state [ Upstream commit 43ade6ad18416b8fd5bb3c9e9789faa666527eec ] Clang warns when one enumerated type is converted implicitly to another. drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4214:42: warning: implicit conversion from enumeration type 'enum i40e_aq_link_speed' to different enumeration type 'enum virtchnl_link_speed' [-Wenum-conversion] pfe.event_data.link_event.link_speed = I40E_LINK_SPEED_40GB; ~ ^~~~~~~~~~~~~~~~~~~~ 1 warning generated. Use the proper enum from virtchnl_link_speed, which has the same value as I40E_LINK_SPEED_40GB, VIRTCHNL_LINK_SPEED_40GB. This appears to be missed by commit ff3f4cc267f6 ("virtchnl: finish conversion to virtchnl interface"). Link: https://github.com/ClangBuiltLinux/linux/issues/81 Signed-off-by: Nathan Chancellor Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index b3307b1b3aac..fae3625ec0b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3201,7 +3201,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) vf->link_forced = true; vf->link_up = true; pfe.event_data.link_event.link_status = true; - pfe.event_data.link_event.link_speed = I40E_LINK_SPEED_40GB; + pfe.event_data.link_event.link_speed = VIRTCHNL_LINK_SPEED_40GB; break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; From 2778c84d3754966ee3da5f1eef8a4717fbfface2 Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 24 Sep 2018 09:24:20 +0200 Subject: [PATCH 0865/3715] ixgbe: Fix crash with VFs and flow director on interface flap [ Upstream commit 5d826d209164b0752c883607be4cdbbcf7cab494 ] This patch fix crash when we have restore flow director filters after reset adapter. In ixgbe_fdir_filter_restore() filter->action is outside of the rx_ring array, as it has a VF identifier in the upper 32 bits. Signed-off-by: Radoslaw Tyl Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d1472727ef88..4801d96c4fa9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5129,6 +5129,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; struct ixgbe_fdir_filter *filter; + u64 action; spin_lock(&adapter->fdir_perfect_lock); @@ -5137,12 +5138,17 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { + action = filter->action; + if (action != IXGBE_FDIR_DROP_QUEUE && action != 0) + action = + (action >> ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF) - 1; + ixgbe_fdir_write_perfect_filter_82599(hw, &filter->filter, filter->sw_idx, - (filter->action == IXGBE_FDIR_DROP_QUEUE) ? + (action == IXGBE_FDIR_DROP_QUEUE) ? IXGBE_FDIR_DROP_QUEUE : - adapter->rx_ring[filter->action]->reg_idx); + adapter->rx_ring[action]->reg_idx); } spin_unlock(&adapter->fdir_perfect_lock); From de4f2ad656dbc84e897d959f6076e73c145850d8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Sep 2018 03:55:16 +0000 Subject: [PATCH 0866/3715] IB/mthca: Fix error return code in __mthca_init_one() [ Upstream commit 39f2495618c5e980d2873ea3f2d1877dd253e07a ] Fix to return a negative error code from the mthca_cmd_init() error handling case instead of 0, as done elsewhere in this function. Fixes: 80fd8238734c ("[PATCH] IB/mthca: Encapsulate command interface init") Signed-off-by: Wei Yongjun Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mthca/mthca_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index e36a9bc52268..ccf50dafce9c 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_free_dev; } - if (mthca_cmd_init(mdev)) { + err = mthca_cmd_init(mdev); + if (err) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } From df0fa028d4f79fe4a7790f03d4461ccfcf3763be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 12:57:16 -0700 Subject: [PATCH 0867/3715] IB/mlx4: Avoid implicit enumerated type conversion [ Upstream commit b56511c15713ba6c7572e77a41f7ddba9c1053ec ] Clang warns when one enumerated type is implicitly converted to another. drivers/infiniband/hw/mlx4/mad.c:1811:41: warning: implicit conversion from enumeration type 'enum mlx4_ib_qp_flags' to different enumeration type 'enum ib_qp_create_flags' [-Wenum-conversion] qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; ~ ^~~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/mlx4/mad.c:1819:41: warning: implicit conversion from enumeration type 'enum mlx4_ib_qp_flags' to different enumeration type 'enum ib_qp_create_flags' [-Wenum-conversion] qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; ~ ^~~~~~~~~~~~~~~~~ The type mlx4_ib_qp_flags explicitly provides supplemental values to the type ib_qp_create_flags. Make that clear to Clang by changing the create_flags type to u32. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4a4319331989..73cc5cfb72e0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1120,7 +1120,7 @@ struct ib_qp_init_attr { struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; - enum ib_qp_create_flags create_flags; + u32 create_flags; /* * Only needed for special QP types, or when using the RW API. From b0cf701d8713986a354bd43f8f85808a65f2957c Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Oct 2018 11:45:38 -0700 Subject: [PATCH 0868/3715] ACPICA: Never run _REG on system_memory and system_IO [ Upstream commit 8b1cafdcb4b75c5027c52f1e82b47ebe727ad7ed ] These address spaces are defined by the ACPI spec to be "always available", and thus _REG should never be run on them. Provides compatibility with other ACPI implementations. Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/acevents.h | 2 ++ drivers/acpi/acpica/aclocal.h | 2 +- drivers/acpi/acpica/evregion.c | 17 +++++++++++++++-- drivers/acpi/acpica/evrgnini.c | 6 +----- drivers/acpi/acpica/evxfregn.c | 1 - 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index a2adfd42f85c..bfddcd989974 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -245,6 +245,8 @@ acpi_ev_default_region_setup(acpi_handle handle, acpi_status acpi_ev_initialize_region(union acpi_operand_object *region_obj); +u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); + /* * evsci - SCI (System Control Interrupt) handling/dispatch */ diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 0d45b8bb1678..b10e92de7dd8 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -429,9 +429,9 @@ struct acpi_simple_repair_info { /* Info for running the _REG methods */ struct acpi_reg_walk_info { - acpi_adr_space_type space_id; u32 function; u32 reg_run_count; + acpi_adr_space_type space_id; }; /***************************************************************************** diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 28b447ff92df..3a3277f98292 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -677,6 +677,19 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, ACPI_FUNCTION_TRACE(ev_execute_reg_methods); + /* + * These address spaces do not need a call to _REG, since the ACPI + * specification defines them as: "must always be accessible". Since + * they never change state (never become unavailable), no need to ever + * call _REG on them. Also, a data_table is not a "real" address space, + * so do not call _REG. September 2018. + */ + if ((space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) || + (space_id == ACPI_ADR_SPACE_SYSTEM_IO) || + (space_id == ACPI_ADR_SPACE_DATA_TABLE)) { + return_VOID; + } + info.space_id = space_id; info.function = function; info.reg_run_count = 0; @@ -738,8 +751,8 @@ acpi_ev_reg_run(acpi_handle obj_handle, } /* - * We only care about regions.and objects that are allowed to have address - * space handlers + * We only care about regions and objects that are allowed to have + * address space handlers */ if ((node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) { return (AE_OK); diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 93ec528bcd9a..3b48f1ecb55b 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -50,9 +50,6 @@ #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evrgnini") -/* Local prototypes */ -static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); - /******************************************************************************* * * FUNCTION: acpi_ev_system_memory_region_setup @@ -67,7 +64,6 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); * DESCRIPTION: Setup a system_memory operation region * ******************************************************************************/ - acpi_status acpi_ev_system_memory_region_setup(acpi_handle handle, u32 function, @@ -347,7 +343,7 @@ acpi_ev_pci_config_region_setup(acpi_handle handle, * ******************************************************************************/ -static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) +u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) { acpi_status status; struct acpi_pnp_device_id *hid; diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index beba9d56a0d8..742a9fe6e235 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -227,7 +227,6 @@ acpi_remove_address_space_handler(acpi_handle device, */ region_obj = handler_obj->address_space.region_list; - } /* Remove this Handler object from the list */ From 991d57ecd8bc0db05186a4ecb2341748b1b7e97e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 2 Oct 2018 09:01:04 +1000 Subject: [PATCH 0869/3715] powerpc/time: Use clockevents_register_device(), fixing an issue with large decrementer [ Upstream commit 8b78fdb045de60a4eb35460092bbd3cffa925353 ] We currently cap the decrementer clockevent at 4 seconds, even on systems with large decrementer support. Fix this by converting the code to use clockevents_register_device() which calculates the upper bound based on the max_delta passed in. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/time.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fe6f3a285455..870e75d30459 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -984,10 +984,10 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); + clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); - - clockevents_register_device(dec); } static void enable_large_decrementer(void) @@ -1035,18 +1035,7 @@ static void __init set_decrementer_max(void) static void __init init_decrementer_clockevent(void) { - int cpu = smp_processor_id(); - - clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); - - decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(decrementer_max, &decrementer_clockevent); - decrementer_clockevent.max_delta_ticks = decrementer_max; - decrementer_clockevent.min_delta_ns = - clockevent_delta2ns(2, &decrementer_clockevent); - decrementer_clockevent.min_delta_ticks = 2; - - register_decrementer_clockevent(cpu); + register_decrementer_clockevent(smp_processor_id()); } void secondary_cpu_time_init(void) From e57ae05d10f8e6e57c94bfcf8e36cb5f105a7ff8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 3 Oct 2018 19:37:54 -0700 Subject: [PATCH 0870/3715] ata: ep93xx: Use proper enums for directions [ Upstream commit 6adde4a36f1b6a562a1057fbb1065007851050e7 ] Clang warns when one enumerated type is implicitly converted to another. drivers/ata/pata_ep93xx.c:662:36: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] drv_data->dma_rx_data.direction = DMA_FROM_DEVICE; ~ ^~~~~~~~~~~~~~~ drivers/ata/pata_ep93xx.c:670:36: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] drv_data->dma_tx_data.direction = DMA_TO_DEVICE; ~ ^~~~~~~~~~~~~ drivers/ata/pata_ep93xx.c:681:19: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] conf.direction = DMA_FROM_DEVICE; ~ ^~~~~~~~~~~~~~~ drivers/ata/pata_ep93xx.c:692:19: warning: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Wenum-conversion] conf.direction = DMA_TO_DEVICE; ~ ^~~~~~~~~~~~~ Use the equivalent valued enums from the expected type so that Clang no longer warns about a conversion. DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1 DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2 Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Nathan Chancellor Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/pata_ep93xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 0a550190955a..cc6d06c1b2c7 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -659,7 +659,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) * start of new transfer. */ drv_data->dma_rx_data.port = EP93XX_DMA_IDE; - drv_data->dma_rx_data.direction = DMA_FROM_DEVICE; + drv_data->dma_rx_data.direction = DMA_DEV_TO_MEM; drv_data->dma_rx_data.name = "ep93xx-pata-rx"; drv_data->dma_rx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_rx_data); @@ -667,7 +667,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) return; drv_data->dma_tx_data.port = EP93XX_DMA_IDE; - drv_data->dma_tx_data.direction = DMA_TO_DEVICE; + drv_data->dma_tx_data.direction = DMA_MEM_TO_DEV; drv_data->dma_tx_data.name = "ep93xx-pata-tx"; drv_data->dma_tx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_tx_data); @@ -678,7 +678,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure receive channel direction and source address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_FROM_DEVICE; + conf.direction = DMA_DEV_TO_MEM; conf.src_addr = drv_data->udma_in_phys; conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_rx_channel, &conf)) { @@ -689,7 +689,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure transmit channel direction and destination address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_TO_DEVICE; + conf.direction = DMA_MEM_TO_DEV; conf.dst_addr = drv_data->udma_out_phys; conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_tx_channel, &conf)) { From 86f049f29a2d8d7daa573c51a93d73d7c0f5f4aa Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Tue, 28 Aug 2018 09:49:42 -0400 Subject: [PATCH 0871/3715] media: rc: ir-rc6-decoder: enable toggle bit for Kathrein RCU-676 remote [ Upstream commit 85e4af0a7ae2f146769b7475ae531bf8a3f3afb4 ] The Kathrein RCU-676 remote uses the 32-bit rc6 protocol and toggles bit 15 (0x8000) on repeated button presses, like MCE remotes. Add it's customer code 0x80460000 to the 32-bit rc6 toggle handling code to get proper scancodes and toggle reports. Signed-off-by: Matthias Reichl Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/ir-rc6-decoder.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/ir-rc6-decoder.c b/drivers/media/rc/ir-rc6-decoder.c index 5d0d2fe3b7a7..90f7930444a1 100644 --- a/drivers/media/rc/ir-rc6-decoder.c +++ b/drivers/media/rc/ir-rc6-decoder.c @@ -40,6 +40,7 @@ #define RC6_6A_MCE_TOGGLE_MASK 0x8000 /* for the body bits */ #define RC6_6A_LCC_MASK 0xffff0000 /* RC6-6A-32 long customer code mask */ #define RC6_6A_MCE_CC 0x800f0000 /* MCE customer code */ +#define RC6_6A_KATHREIN_CC 0x80460000 /* Kathrein RCU-676 customer code */ #ifndef CHAR_BIT #define CHAR_BIT 8 /* Normally in */ #endif @@ -252,13 +253,17 @@ again: toggle = 0; break; case 32: - if ((scancode & RC6_6A_LCC_MASK) == RC6_6A_MCE_CC) { + switch (scancode & RC6_6A_LCC_MASK) { + case RC6_6A_MCE_CC: + case RC6_6A_KATHREIN_CC: protocol = RC_PROTO_RC6_MCE; toggle = !!(scancode & RC6_6A_MCE_TOGGLE_MASK); scancode &= ~RC6_6A_MCE_TOGGLE_MASK; - } else { + break; + default: protocol = RC_PROTO_RC6_6A_32; toggle = 0; + break; } break; default: From 71e8281e081236a715afefdb92b3ea510d9c3d52 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 06:00:45 -0400 Subject: [PATCH 0872/3715] media: pxa_camera: Fix check for pdev->dev.of_node [ Upstream commit 44d7f1a77d8c84f8e42789b5475b74ae0e6d4758 ] Clang warns that the address of a pointer will always evaluated as true in a boolean context. drivers/media/platform/pxa_camera.c:2400:17: warning: address of 'pdev->dev.of_node' will always evaluate to 'true' [-Wpointer-bool-conversion] if (&pdev->dev.of_node && !pcdev->pdata) { ~~~~~~~~~~^~~~~~~ ~~ 1 warning generated. Judging from the rest of the kernel, it seems like this was an error and just the value of of_node should be checked rather than the address. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/pxa_camera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c index edca993c2b1f..d270a23299cc 100644 --- a/drivers/media/platform/pxa_camera.c +++ b/drivers/media/platform/pxa_camera.c @@ -2374,7 +2374,7 @@ static int pxa_camera_probe(struct platform_device *pdev) pcdev->res = res; pcdev->pdata = pdev->dev.platform_data; - if (&pdev->dev.of_node && !pcdev->pdata) { + if (pdev->dev.of_node && !pcdev->pdata) { err = pxa_camera_pdata_from_dt(&pdev->dev, pcdev, &pcdev->asd); } else { pcdev->platform_flags = pcdev->pdata->flags; From 99730f861962cbc549d271adf29aa6a47d62d0ec Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 17 Sep 2018 07:30:54 -0400 Subject: [PATCH 0873/3715] media: i2c: adv748x: Support probing a single output [ Upstream commit eccf442ce156ec2b4e06b1239d5fdcb0c732f63f ] Currently the adv748x driver will fail to probe unless both of its output endpoints (TXA and TXB) are connected. Make the driver support probing provided that there is at least one input, and one output connected and protect the clean-up function from accessing un-initialized fields. Following patches will fix other uses of un-initialized TXs in the driver, such as power management functions. Tested-by: Laurent Pinchart Signed-off-by: Jacopo Mondi Signed-off-by: Kieran Bingham Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/adv748x/adv748x-core.c | 25 +++++++++++++++++++++--- drivers/media/i2c/adv748x/adv748x-csi2.c | 18 ++++++----------- drivers/media/i2c/adv748x/adv748x.h | 2 ++ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/media/i2c/adv748x/adv748x-core.c b/drivers/media/i2c/adv748x/adv748x-core.c index 5ee14f2c2747..cfec08593ac8 100644 --- a/drivers/media/i2c/adv748x/adv748x-core.c +++ b/drivers/media/i2c/adv748x/adv748x-core.c @@ -642,7 +642,8 @@ static int adv748x_parse_dt(struct adv748x_state *state) { struct device_node *ep_np = NULL; struct of_endpoint ep; - bool found = false; + bool out_found = false; + bool in_found = false; for_each_endpoint_of_node(state->dev->of_node, ep_np) { of_graph_parse_endpoint(ep_np, &ep); @@ -667,10 +668,17 @@ static int adv748x_parse_dt(struct adv748x_state *state) of_node_get(ep_np); state->endpoints[ep.port] = ep_np; - found = true; + /* + * At least one input endpoint and one output endpoint shall + * be defined. + */ + if (ep.port < ADV748X_PORT_TXA) + in_found = true; + else + out_found = true; } - return found ? 0 : -ENODEV; + return in_found && out_found ? 0 : -ENODEV; } static void adv748x_dt_cleanup(struct adv748x_state *state) @@ -702,6 +710,17 @@ static int adv748x_probe(struct i2c_client *client, state->i2c_clients[ADV748X_PAGE_IO] = client; i2c_set_clientdata(client, state); + /* + * We can not use container_of to get back to the state with two TXs; + * Initialize the TXs's fields unconditionally on the endpoint + * presence to access them later. + */ + state->txa.state = state->txb.state = state; + state->txa.page = ADV748X_PAGE_TXA; + state->txb.page = ADV748X_PAGE_TXB; + state->txa.port = ADV748X_PORT_TXA; + state->txb.port = ADV748X_PORT_TXB; + /* Discover and process ports declared by the Device tree endpoints */ ret = adv748x_parse_dt(state); if (ret) { diff --git a/drivers/media/i2c/adv748x/adv748x-csi2.c b/drivers/media/i2c/adv748x/adv748x-csi2.c index 979825d4a419..0953ba0bcc09 100644 --- a/drivers/media/i2c/adv748x/adv748x-csi2.c +++ b/drivers/media/i2c/adv748x/adv748x-csi2.c @@ -265,19 +265,10 @@ static int adv748x_csi2_init_controls(struct adv748x_csi2 *tx) int adv748x_csi2_init(struct adv748x_state *state, struct adv748x_csi2 *tx) { - struct device_node *ep; int ret; - /* We can not use container_of to get back to the state with two TXs */ - tx->state = state; - tx->page = is_txa(tx) ? ADV748X_PAGE_TXA : ADV748X_PAGE_TXB; - - ep = state->endpoints[is_txa(tx) ? ADV748X_PORT_TXA : ADV748X_PORT_TXB]; - if (!ep) { - adv_err(state, "No endpoint found for %s\n", - is_txa(tx) ? "txa" : "txb"); - return -ENODEV; - } + if (!is_tx_enabled(tx)) + return 0; /* Initialise the virtual channel */ adv748x_csi2_set_virtual_channel(tx, 0); @@ -287,7 +278,7 @@ int adv748x_csi2_init(struct adv748x_state *state, struct adv748x_csi2 *tx) is_txa(tx) ? "txa" : "txb"); /* Ensure that matching is based upon the endpoint fwnodes */ - tx->sd.fwnode = of_fwnode_handle(ep); + tx->sd.fwnode = of_fwnode_handle(state->endpoints[tx->port]); /* Register internal ops for incremental subdev registration */ tx->sd.internal_ops = &adv748x_csi2_internal_ops; @@ -320,6 +311,9 @@ err_free_media: void adv748x_csi2_cleanup(struct adv748x_csi2 *tx) { + if (!is_tx_enabled(tx)) + return; + v4l2_async_unregister_subdev(&tx->sd); media_entity_cleanup(&tx->sd.entity); v4l2_ctrl_handler_free(&tx->ctrl_hdl); diff --git a/drivers/media/i2c/adv748x/adv748x.h b/drivers/media/i2c/adv748x/adv748x.h index cc4151b5b31e..296c5f8a8c63 100644 --- a/drivers/media/i2c/adv748x/adv748x.h +++ b/drivers/media/i2c/adv748x/adv748x.h @@ -94,6 +94,7 @@ struct adv748x_csi2 { struct adv748x_state *state; struct v4l2_mbus_framefmt format; unsigned int page; + unsigned int port; struct media_pad pads[ADV748X_CSI2_NR_PADS]; struct v4l2_ctrl_handler ctrl_hdl; @@ -102,6 +103,7 @@ struct adv748x_csi2 { #define notifier_to_csi2(n) container_of(n, struct adv748x_csi2, notifier) #define adv748x_sd_to_csi2(sd) container_of(sd, struct adv748x_csi2, sd) +#define is_tx_enabled(_tx) ((_tx)->state->endpoints[(_tx)->port] != NULL) enum adv748x_hdmi_pads { ADV748X_HDMI_SINK, From 3b37711c9e7f20a79ca9a0c6fa3a9aac6ca17a9a Mon Sep 17 00:00:00 2001 From: Michael Pobega Date: Thu, 4 Oct 2018 14:58:21 -0400 Subject: [PATCH 0874/3715] ALSA: hda/sigmatel - Disable automute for Elo VuPoint [ Upstream commit d153135e93a50cdb6f1b52e238909e9965b56056 ] The Elo VuPoint 15MX has two headphone jacks of which neither work by default. Disabling automute allows ALSA to work normally with the speakers & left headphone jack. Future pin configuration changes may be required in the future to get the right headphone jack working in tandem. Signed-off-by: Michael Pobega Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_sigmatel.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 63d15b545b33..7cd147411b22 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -77,6 +77,7 @@ enum { STAC_DELL_M6_BOTH, STAC_DELL_EQ, STAC_ALIENWARE_M17X, + STAC_ELO_VUPOINT_15MX, STAC_92HD89XX_HP_FRONT_JACK, STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK, STAC_92HD73XX_ASUS_MOBO, @@ -1897,6 +1898,18 @@ static void stac92hd73xx_fixup_no_jd(struct hda_codec *codec, codec->no_jack_detect = 1; } + +static void stac92hd73xx_disable_automute(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct sigmatel_spec *spec = codec->spec; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + spec->gen.suppress_auto_mute = 1; +} + static const struct hda_fixup stac92hd73xx_fixups[] = { [STAC_92HD73XX_REF] = { .type = HDA_FIXUP_FUNC, @@ -1922,6 +1935,10 @@ static const struct hda_fixup stac92hd73xx_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = stac92hd73xx_fixup_alienware_m17x, }, + [STAC_ELO_VUPOINT_15MX] = { + .type = HDA_FIXUP_FUNC, + .v.func = stac92hd73xx_disable_automute, + }, [STAC_92HD73XX_INTEL] = { .type = HDA_FIXUP_PINS, .v.pins = intel_dg45id_pin_configs, @@ -1960,6 +1977,7 @@ static const struct hda_model_fixup stac92hd73xx_models[] = { { .id = STAC_DELL_M6_BOTH, .name = "dell-m6" }, { .id = STAC_DELL_EQ, .name = "dell-eq" }, { .id = STAC_ALIENWARE_M17X, .name = "alienware" }, + { .id = STAC_ELO_VUPOINT_15MX, .name = "elo-vupoint-15mx" }, { .id = STAC_92HD73XX_ASUS_MOBO, .name = "asus-mobo" }, {} }; @@ -2009,6 +2027,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = { "Alienware M17x", STAC_ALIENWARE_M17X), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0490, "Alienware M17x R3", STAC_DELL_EQ), + SND_PCI_QUIRK(0x1059, 0x1011, + "ELO VuPoint 15MX", STAC_ELO_VUPOINT_15MX), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1927, "HP Z1 G2", STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2b17, From 145fd9a5f66212231b4d50153acefee579c187b6 Mon Sep 17 00:00:00 2001 From: Cameron Kaiser Date: Tue, 31 Jul 2018 07:39:21 -0700 Subject: [PATCH 0875/3715] KVM: PPC: Book3S PR: Exiting split hack mode needs to fixup both PC and LR [ Upstream commit 1006284c5e411872333967b1970c2ca46a9e225f ] When an OS (currently only classic Mac OS) is running in KVM-PR and makes a linked jump from code with split hack addressing enabled into code that does not, LR is not correctly updated and reflects the previously munged PC. To fix this, this patch undoes the address munge when exiting split hack mode so that code relying on LR being a proper address will now execute. This does not affect OS X or other operating systems running on KVM-PR. Signed-off-by: Cameron Kaiser Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d38280b01ef0..1eda81249937 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -79,8 +79,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { ulong pc = kvmppc_get_pc(vcpu); + ulong lr = kvmppc_get_lr(vcpu); if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; } } From 4d5cc2f0b26217522daa623ca84c044ecfe31c17 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 30 Sep 2018 18:03:11 +0200 Subject: [PATCH 0876/3715] USB: serial: cypress_m8: fix interrupt-out transfer length [ Upstream commit 56445eef55cb5904096fed7a73cf87b755dfffc7 ] Fix interrupt-out transfer length which was being set to the transfer-buffer length rather than the size of the outgoing packet. Note that no slab data was leaked as the whole transfer buffer is always cleared before each transfer. Fixes: 9aa8dae7b1fa ("cypress_m8: use usb_fill_int_urb where appropriate") Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cypress_m8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 90110de715e0..d0aa4c853f56 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -773,7 +773,7 @@ send: usb_fill_int_urb(port->interrupt_out_urb, port->serial->dev, usb_sndintpipe(port->serial->dev, port->interrupt_out_endpointAddress), - port->interrupt_out_buffer, port->interrupt_out_size, + port->interrupt_out_buffer, actual_size, cypress_write_int_callback, port, priv->write_urb_interval); result = usb_submit_urb(port->interrupt_out_urb, GFP_ATOMIC); if (result) { From 8660ea2737942aebc4299feeffb492f7f09f5812 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 4 Oct 2018 15:34:45 +0200 Subject: [PATCH 0877/3715] mtd: physmap_of: Release resources on error [ Upstream commit ef0de747f7ad179c7698a5b0e28db05f18ecbf57 ] During probe, if there was an error the memory region and the memory map were not properly released.This can lead a system unusable if deferred probe is in use. Replace mem_request and map with devm_ioremap_resource Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/maps/physmap_of_core.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/maps/physmap_of_core.c b/drivers/mtd/maps/physmap_of_core.c index b1bd4faecfb2..5d8399742c75 100644 --- a/drivers/mtd/maps/physmap_of_core.c +++ b/drivers/mtd/maps/physmap_of_core.c @@ -30,7 +30,6 @@ struct of_flash_list { struct mtd_info *mtd; struct map_info map; - struct resource *res; }; struct of_flash { @@ -55,18 +54,10 @@ static int of_flash_remove(struct platform_device *dev) mtd_concat_destroy(info->cmtd); } - for (i = 0; i < info->list_size; i++) { + for (i = 0; i < info->list_size; i++) if (info->list[i].mtd) map_destroy(info->list[i].mtd); - if (info->list[i].map.virt) - iounmap(info->list[i].map.virt); - - if (info->list[i].res) { - release_resource(info->list[i].res); - kfree(info->list[i].res); - } - } return 0; } @@ -214,10 +205,11 @@ static int of_flash_probe(struct platform_device *dev) err = -EBUSY; res_size = resource_size(&res); - info->list[i].res = request_mem_region(res.start, res_size, - dev_name(&dev->dev)); - if (!info->list[i].res) + info->list[i].map.virt = devm_ioremap_resource(&dev->dev, &res); + if (IS_ERR(info->list[i].map.virt)) { + err = PTR_ERR(info->list[i].map.virt); goto err_out; + } err = -ENXIO; width = of_get_property(dp, "bank-width", NULL); @@ -240,15 +232,6 @@ static int of_flash_probe(struct platform_device *dev) if (err) goto err_out; - err = -ENOMEM; - info->list[i].map.virt = ioremap(info->list[i].map.phys, - info->list[i].map.size); - if (!info->list[i].map.virt) { - dev_err(&dev->dev, "Failed to ioremap() flash" - " region\n"); - goto err_out; - } - simple_map_init(&info->list[i].map); /* From 73267b435c31868c61770a8371765e5e552b5174 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Oct 2018 19:22:27 +0200 Subject: [PATCH 0878/3715] cpu/SMT: State SMT is disabled even with nosmt and without "=force" [ Upstream commit d0e7d14455d41163126afecd0fcce935463cc512 ] When booting with "nosmt=force" a message is issued into dmesg to confirm that SMT has been force-disabled but such a message is not issued when only "nosmt" is on the kernel command line. Fix that. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20181004172227.10094-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 96f970d77339..49273130e4f1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -376,6 +376,7 @@ void __init cpu_smt_disable(bool force) pr_info("SMT: Force disabled\n"); cpu_smt_control = CPU_SMT_FORCE_DISABLED; } else { + pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } } From 9dc57cbdc1f1ff226ad8cdb2026f88303d623964 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Thu, 27 Sep 2018 14:59:44 +0000 Subject: [PATCH 0879/3715] brcmfmac: reduce timeout for action frame scan [ Upstream commit edb6d6885bef82d1eac432dbeca9fbf4ec349d7e ] Finding a common channel to send an action frame out is required for some action types. Since a loop with several scan retry is used to find the channel, a short wait time could be considered for each attempt. This patch reduces the wait time from 1500 to 450 msec for each action frame scan. This patch fixes the WFA p2p certification 5.1.20 failure caused by the long action frame send time. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 450f2216fac2..c9566c903672 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -74,7 +74,7 @@ #define P2P_AF_MAX_WAIT_TIME msecs_to_jiffies(2000) #define P2P_INVALID_CHANNEL -1 #define P2P_CHANNEL_SYNC_RETRY 5 -#define P2P_AF_FRM_SCAN_MAX_WAIT msecs_to_jiffies(1500) +#define P2P_AF_FRM_SCAN_MAX_WAIT msecs_to_jiffies(450) #define P2P_DEFAULT_SLEEP_TIME_VSDB 200 /* WiFi P2P Public Action Frame OUI Subtypes */ @@ -1139,7 +1139,6 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) { struct afx_hdl *afx_hdl = &p2p->afx_hdl; struct brcmf_cfg80211_vif *pri_vif; - unsigned long duration; s32 retry; brcmf_dbg(TRACE, "Enter\n"); @@ -1155,7 +1154,6 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) * pending action frame tx is cancelled. */ retry = 0; - duration = msecs_to_jiffies(P2P_AF_FRM_SCAN_MAX_WAIT); while ((retry < P2P_CHANNEL_SYNC_RETRY) && (afx_hdl->peer_chan == P2P_INVALID_CHANNEL)) { afx_hdl->is_listen = false; @@ -1163,7 +1161,8 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) retry); /* search peer on peer's listen channel */ schedule_work(&afx_hdl->afx_work); - wait_for_completion_timeout(&afx_hdl->act_frm_scan, duration); + wait_for_completion_timeout(&afx_hdl->act_frm_scan, + P2P_AF_FRM_SCAN_MAX_WAIT); if ((afx_hdl->peer_chan != P2P_INVALID_CHANNEL) || (!test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, &p2p->status))) @@ -1176,7 +1175,7 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) afx_hdl->is_listen = true; schedule_work(&afx_hdl->afx_work); wait_for_completion_timeout(&afx_hdl->act_frm_scan, - duration); + P2P_AF_FRM_SCAN_MAX_WAIT); } if ((afx_hdl->peer_chan != P2P_INVALID_CHANNEL) || (!test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, From a8b87a1ca9da29ffacd9e518d1581f359c9b4cb3 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Thu, 27 Sep 2018 14:59:49 +0000 Subject: [PATCH 0880/3715] brcmfmac: fix full timeout waiting for action frame on-channel tx [ Upstream commit fbf07000960d9c8a13fdc17c6de0230d681c7543 ] The driver sends an action frame down and waits for a completion signal triggered by the received BRCMF_E_ACTION_FRAME_OFF_CHAN_COMPLETE event to continue the process. However, the action frame could be transmitted either on the current channel or on an off channel. For the on-channel case, only BRCMF_E_ACTION_FRAME_COMPLETE event will be received when the frame is transmitted, which make the driver always wait a full timeout duration. This patch has the completion signal be triggered by receiving the BRCMF_E_ACTION_FRAME_COMPLETE event for the on-channel case. This change fixes WFA p2p certification 5.1.19 failure. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- .../wireless/broadcom/brcm80211/brcmfmac/p2p.c | 17 +++++++++++++++-- .../wireless/broadcom/brcm80211/brcmfmac/p2p.h | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index c9566c903672..4a883f4bbf88 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1462,10 +1462,12 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, return 0; if (e->event_code == BRCMF_E_ACTION_FRAME_COMPLETE) { - if (e->status == BRCMF_E_STATUS_SUCCESS) + if (e->status == BRCMF_E_STATUS_SUCCESS) { set_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); - else { + if (!p2p->wait_for_offchan_complete) + complete(&p2p->send_af_done); + } else { set_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); /* If there is no ack, we don't need to wait for * WLC_E_ACTION_FRAME_OFFCHAN_COMPLETE event @@ -1516,6 +1518,17 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, p2p->af_sent_channel = le32_to_cpu(af_params->channel); p2p->af_tx_sent_jiffies = jiffies; + if (test_bit(BRCMF_P2P_STATUS_DISCOVER_LISTEN, &p2p->status) && + p2p->af_sent_channel == + ieee80211_frequency_to_channel(p2p->remain_on_channel.center_freq)) + p2p->wait_for_offchan_complete = false; + else + p2p->wait_for_offchan_complete = true; + + brcmf_dbg(TRACE, "Waiting for %s tx completion event\n", + (p2p->wait_for_offchan_complete) ? + "off-channel" : "on-channel"); + timeout = wait_for_completion_timeout(&p2p->send_af_done, P2P_AF_MAX_WAIT_TIME); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index 0e8b34d2d85c..39f0d0218088 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -124,6 +124,7 @@ struct afx_hdl { * @gon_req_action: about to send go negotiation requets frame. * @block_gon_req_tx: drop tx go negotiation requets frame. * @p2pdev_dynamically: is p2p device if created by module param or supplicant. + * @wait_for_offchan_complete: wait for off-channel tx completion event. */ struct brcmf_p2p_info { struct brcmf_cfg80211_info *cfg; @@ -144,6 +145,7 @@ struct brcmf_p2p_info { bool gon_req_action; bool block_gon_req_tx; bool p2pdev_dynamically; + bool wait_for_offchan_complete; }; s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced); From 565493cd08f876fccc6e857365862ff69a0533d5 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 5 Oct 2018 10:11:36 +0000 Subject: [PATCH 0881/3715] qtnfmac: pass sgi rate info flag to wireless core [ Upstream commit d5657b709e2a92a0e581109010765d1d485580df ] SGI should be passed to wireless core as a part of rate structure. Otherwise wireless core performs incorrect rate calculation when SGI is enabled in hardware but not reported to host. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 4206886b110c..ed087bbc6f63 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -485,6 +485,9 @@ qtnf_sta_info_parse_rate(struct rate_info *rate_dst, rate_dst->flags |= RATE_INFO_FLAGS_MCS; else if (rate_src->flags & QLINK_STA_INFO_RATE_FLAG_VHT_MCS) rate_dst->flags |= RATE_INFO_FLAGS_VHT_MCS; + + if (rate_src->flags & QLINK_STA_INFO_RATE_FLAG_SHORT_GI) + rate_dst->flags |= RATE_INFO_FLAGS_SHORT_GI; } static void From f39e28ef0b6953db2750caad4bc3a01933fdf516 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 5 Oct 2018 10:11:40 +0000 Subject: [PATCH 0882/3715] qtnfmac: drop error reports for out-of-bounds key indexes [ Upstream commit 35da3fe63b8647ce3cc52fccdf186a60710815fb ] On disconnect wireless core attempts to remove all the supported keys. Following cfg80211_ops conventions, firmware returns -ENOENT code for the out-of-bound key indexes. This is a normal behavior, so no need to report errors for this case. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index a450bc6bc774..d02f68792ce4 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -509,9 +509,16 @@ static int qtnf_del_key(struct wiphy *wiphy, struct net_device *dev, int ret; ret = qtnf_cmd_send_del_key(vif, key_index, pairwise, mac_addr); - if (ret) - pr_err("VIF%u.%u: failed to delete key: idx=%u pw=%u\n", - vif->mac->macid, vif->vifid, key_index, pairwise); + if (ret) { + if (ret == -ENOENT) { + pr_debug("VIF%u.%u: key index %d out of bounds\n", + vif->mac->macid, vif->vifid, key_index); + } else { + pr_err("VIF%u.%u: failed to delete key: idx=%u pw=%u\n", + vif->mac->macid, vif->vifid, + key_index, pairwise); + } + } return ret; } From 9cfe234df9ffd03252d84626b8bb29d52e494df0 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Mon, 24 Sep 2018 13:00:56 +0200 Subject: [PATCH 0883/3715] clk: samsung: exynos5420: Define CLK_SECKEY gate clock only or Exynos5420 [ Upstream commit d32dd2a1a0f80edad158c9a1ba5f47650d9504a0 ] The bit of GATE_BUS_PERIS1 for CLK_SECKEY is just reserved on exynos5422/5800, not exynos5420. Define gate clk for exynos5420 to handle the bit only on exynos5420. Signed-off-by: Joonyoung Shim [m.szyprow: rewrote commit subject] Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos5420.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 500a55415e90..a882f7038bce 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -633,6 +633,7 @@ static const struct samsung_div_clock exynos5420_div_clks[] __initconst = { }; static const struct samsung_gate_clock exynos5420_gate_clks[] __initconst = { + GATE(CLK_SECKEY, "seckey", "aclk66_psgen", GATE_BUS_PERIS1, 1, 0, 0), GATE(CLK_MAU_EPLL, "mau_epll", "mout_mau_epll_clk", SRC_MASK_TOP7, 20, CLK_SET_RATE_PARENT, 0), }; @@ -1167,8 +1168,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_TMU, "tmu", "aclk66_psgen", GATE_IP_PERIS, 21, 0, 0), GATE(CLK_TMU_GPU, "tmu_gpu", "aclk66_psgen", GATE_IP_PERIS, 22, 0, 0), - GATE(CLK_SECKEY, "seckey", "aclk66_psgen", GATE_BUS_PERIS1, 1, 0, 0), - /* GEN Block */ GATE(CLK_ROTATOR, "rotator", "mout_user_aclk266", GATE_IP_GEN, 1, 0, 0), GATE(CLK_JPEG, "jpeg", "aclk300_jpeg", GATE_IP_GEN, 2, 0, 0), From 71307cd1164c1e99aaf3ad23059debaa4fff2402 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 2 Oct 2018 13:52:10 +0200 Subject: [PATCH 0884/3715] clk: samsung: Use clk_hw API for calling clk framework from clk notifiers [ Upstream commit 1da220e3a5d22fccda0bc8542997abc1d1741268 ] clk_notifier_register() documentation states, that the provided notifier callbacks associated with the notifier must not re-enter into the clk framework by calling any top-level clk APIs. Fix this by replacing clk_get_rate() calls with clk_hw_get_rate(), which is safe in this context. Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-cpu.c | 6 +++--- drivers/clk/samsung/clk-cpu.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/samsung/clk-cpu.c b/drivers/clk/samsung/clk-cpu.c index 6686e8ba61f9..82f023f29a61 100644 --- a/drivers/clk/samsung/clk-cpu.c +++ b/drivers/clk/samsung/clk-cpu.c @@ -152,7 +152,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, struct exynos_cpuclk *cpuclk, void __iomem *base) { const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg; - unsigned long alt_prate = clk_get_rate(cpuclk->alt_parent); + unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent); unsigned long alt_div = 0, alt_div_mask = DIV_MASK; unsigned long div0, div1 = 0, mux_reg; unsigned long flags; @@ -280,7 +280,7 @@ static int exynos5433_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, struct exynos_cpuclk *cpuclk, void __iomem *base) { const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg; - unsigned long alt_prate = clk_get_rate(cpuclk->alt_parent); + unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent); unsigned long alt_div = 0, alt_div_mask = DIV_MASK; unsigned long div0, div1 = 0, mux_reg; unsigned long flags; @@ -432,7 +432,7 @@ int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx, else cpuclk->clk_nb.notifier_call = exynos_cpuclk_notifier_cb; - cpuclk->alt_parent = __clk_lookup(alt_parent); + cpuclk->alt_parent = __clk_get_hw(__clk_lookup(alt_parent)); if (!cpuclk->alt_parent) { pr_err("%s: could not lookup alternate parent %s\n", __func__, alt_parent); diff --git a/drivers/clk/samsung/clk-cpu.h b/drivers/clk/samsung/clk-cpu.h index d4b6b517fe1b..bd38c6aa3897 100644 --- a/drivers/clk/samsung/clk-cpu.h +++ b/drivers/clk/samsung/clk-cpu.h @@ -49,7 +49,7 @@ struct exynos_cpuclk_cfg_data { */ struct exynos_cpuclk { struct clk_hw hw; - struct clk *alt_parent; + struct clk_hw *alt_parent; void __iomem *ctrl_base; spinlock_t *lock; const struct exynos_cpuclk_cfg_data *cfg; From c371855df52b9601a8db8a69ba96abd185107301 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 1 Oct 2018 10:43:47 -0700 Subject: [PATCH 0885/3715] i2c: brcmstb: Allow enabling the driver on DSL SoCs [ Upstream commit e1eba2ea54a2de0e4c58d87270d25706bb77b844 ] ARCH_BCM_63XX which is used by ARM-based DSL SoCs from Broadcom uses the same controller, make it possible to select the STB driver and update the Kconfig and help text a bit. Signed-off-by: Florian Fainelli Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 45a3f3ca29b3..b72a25585d52 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -429,12 +429,13 @@ config I2C_BCM_KONA If you do not need KONA I2C interface, say N. config I2C_BRCMSTB - tristate "BRCM Settop I2C controller" - depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST + tristate "BRCM Settop/DSL I2C controller" + depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_63XX || \ + COMPILE_TEST default y help If you say yes to this option, support will be included for the - I2C interface on the Broadcom Settop SoCs. + I2C interface on the Broadcom Settop/DSL SoCs. If you do not need I2C interface, say N. From d9871d9c527b690b5a78c8a76b3f4f94fa03980d Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Oct 2018 14:45:00 -0400 Subject: [PATCH 0886/3715] NFSv4.x: fix lock recovery during delegation recall [ Upstream commit 44f411c353bf6d98d5a34f8f1b8605d43b2e50b8 ] Running "./nfstest_delegation --runtest recall26" uncovers that client doesn't recover the lock when we have an appending open, where the initial open got a write delegation. Instead of checking for the passed in open context against the file lock's open context. Check that the state is the same. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/delegation.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 61bc0a6ba08b..04d57e11577e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -101,7 +101,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags) return nfs4_do_check_delegation(inode, flags, false); } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -116,7 +116,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file) != ctx) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); @@ -163,7 +163,7 @@ again: seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) - err = nfs_delegation_claim_locks(ctx, state, stateid); + err = nfs_delegation_claim_locks(state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); From bf4dc7348166a4c2c909b946da5e13652539eecf Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 5 Oct 2018 00:03:10 +0300 Subject: [PATCH 0887/3715] dmaengine: ioat: fix prototype of ioat_enumerate_channels [ Upstream commit f4d34aa8c887a8a2d23ef546da0efa10e3f77241 ] Signed-off-by: Rami Rosen Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ioat/init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 68680e4151ea..9103a0425f75 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -129,7 +129,7 @@ static void ioat_init_channel(struct ioatdma_device *ioat_dma, struct ioatdma_chan *ioat_chan, int idx); static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); -static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma); static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); static int ioat_dca_enabled = 1; @@ -575,7 +575,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma) * ioat_enumerate_channels - find and initialize the device's channels * @ioat_dma: the ioat dma device to be enumerated */ -static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma) { struct ioatdma_chan *ioat_chan; struct device *dev = &ioat_dma->pdev->dev; @@ -594,7 +594,7 @@ static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); xfercap_log &= 0x1f; /* bits [4:0] valid */ if (xfercap_log == 0) - return 0; + return; dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); for (i = 0; i < dma->chancnt; i++) { @@ -611,7 +611,6 @@ static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) } } dma->chancnt = i; - return i; } /** From c14493c19528f5cee3e432a5adb53f289fcfd46c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 5 Oct 2018 08:51:45 -0400 Subject: [PATCH 0888/3715] media: cec-gpio: select correct Signal Free Time [ Upstream commit c439d5c1e13dbf66cff53455432f21d4d0536c51 ] If a receive is in progress or starts before the transmit has a chance, then lower the Signal Free Time of the upcoming transmit to no more than CEC_SIGNAL_FREE_TIME_NEW_INITIATOR. This is per the specification requirements. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/cec-pin.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/media/cec/cec-pin.c b/drivers/media/cec/cec-pin.c index c003b8eac617..68fc6a24d077 100644 --- a/drivers/media/cec/cec-pin.c +++ b/drivers/media/cec/cec-pin.c @@ -529,6 +529,17 @@ static enum hrtimer_restart cec_pin_timer(struct hrtimer *timer) /* Start bit, switch to receive state */ pin->ts = ts; pin->state = CEC_ST_RX_START_BIT_LOW; + /* + * If a transmit is pending, then that transmit should + * use a signal free time of no more than + * CEC_SIGNAL_FREE_TIME_NEW_INITIATOR since it will + * have a new initiator due to the receive that is now + * starting. + */ + if (pin->tx_msg.len && pin->tx_signal_free_time > + CEC_SIGNAL_FREE_TIME_NEW_INITIATOR) + pin->tx_signal_free_time = + CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; break; } if (pin->ts == 0) @@ -690,6 +701,15 @@ static int cec_pin_adap_transmit(struct cec_adapter *adap, u8 attempts, { struct cec_pin *pin = adap->pin; + /* + * If a receive is in progress, then this transmit should use + * a signal free time of max CEC_SIGNAL_FREE_TIME_NEW_INITIATOR + * since when it starts transmitting it will have a new initiator. + */ + if (pin->state != CEC_ST_IDLE && + signal_free_time > CEC_SIGNAL_FREE_TIME_NEW_INITIATOR) + signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; + pin->tx_signal_free_time = signal_free_time; pin->tx_msg = *msg; pin->work_tx_status = 0; From 5e40cfbeaf3b56cabc45be2f28c99a45788a64f1 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Fri, 5 Oct 2018 11:44:45 -0700 Subject: [PATCH 0889/3715] Input: st1232 - set INPUT_PROP_DIRECT property [ Upstream commit 20bbb312079494a406c10c90932e3c80837c9d94 ] This is how userspace checks for touchscreen devices most reliably. Signed-off-by: Martin Kepplinger Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/st1232.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index be5615c6bf8f..482f97e1c9d3 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -203,6 +203,7 @@ static int st1232_ts_probe(struct i2c_client *client, input_dev->id.bustype = BUS_I2C; input_dev->dev.parent = &client->dev; + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); __set_bit(EV_SYN, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __set_bit(EV_ABS, input_dev->evbit); From 5611a2b8a713b52d7c4eeddf1844d22d1f863720 Mon Sep 17 00:00:00 2001 From: Julian Sax Date: Fri, 5 Oct 2018 11:48:31 -0700 Subject: [PATCH 0890/3715] Input: silead - try firmware reload after unsuccessful resume [ Upstream commit dde27443211062e841806feaf690674b7c3a599f ] A certain silead controller (Chip ID: 0x56810000) loses its firmware after suspend, causing the resume to fail. This patch tries to load the firmware, should a resume error occur and retries the resuming. Signed-off-by: Julian Sax Acked-by: Hans de Goede Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/silead.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 0dbcf105f7db..7c0eeef29b3c 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -534,20 +534,33 @@ static int __maybe_unused silead_ts_suspend(struct device *dev) static int __maybe_unused silead_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + bool second_try = false; int error, status; silead_ts_set_power(client, SILEAD_POWER_ON); + retry: error = silead_ts_reset(client); if (error) return error; + if (second_try) { + error = silead_ts_load_fw(client); + if (error) + return error; + } + error = silead_ts_startup(client); if (error) return error; status = silead_ts_get_status(client); if (status != SILEAD_STATUS_OK) { + if (!second_try) { + second_try = true; + dev_dbg(dev, "Reloading firmware after unsuccessful resume\n"); + goto retry; + } dev_err(dev, "Resume error, status: 0x%02x\n", status); return -ENODEV; } From d26bd1ce5d3925d1cf9abb2f41f928b29cb77819 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 Sep 2018 19:37:22 -0500 Subject: [PATCH 0891/3715] remoteproc: Check for NULL firmwares in sysfs interface [ Upstream commit faeadbb64094757150a8c2a3175ca418dbdd472c ] The remoteproc framework provides a sysfs file 'firmware' for modifying the firmware image name from userspace. Add an additional check to ensure NULL firmwares are errored out right away, rather than getting a delayed error while requesting a firmware during the start of a remoteproc later on. Tested-by: Arnaud Pouliquen Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/remoteproc/remoteproc_sysfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/remoteproc/remoteproc_sysfs.c b/drivers/remoteproc/remoteproc_sysfs.c index 47be411400e5..3a4c3d7cafca 100644 --- a/drivers/remoteproc/remoteproc_sysfs.c +++ b/drivers/remoteproc/remoteproc_sysfs.c @@ -48,6 +48,11 @@ static ssize_t firmware_store(struct device *dev, } len = strcspn(buf, "\n"); + if (!len) { + dev_err(dev, "can't provide a NULL firmware\n"); + err = -EINVAL; + goto out; + } p = kstrndup(buf, len, GFP_KERNEL); if (!p) { From 0c43276429a07b1bc7a210b5e782e04dbe454c27 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Sun, 30 Sep 2018 11:10:31 +0800 Subject: [PATCH 0892/3715] kexec: Allocate decrypted control pages for kdump if SME is enabled [ Upstream commit 9cf38d5559e813cccdba8b44c82cc46ba48d0896 ] When SME is enabled in the first kernel, it needs to allocate decrypted pages for kdump because when the kdump kernel boots, these pages need to be accessed decrypted in the initial boot stage, before SME is enabled. [ bp: clean up text. ] Signed-off-by: Lianbo Jiang Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Cc: kexec@lists.infradead.org Cc: tglx@linutronix.de Cc: mingo@redhat.com Cc: hpa@zytor.com Cc: akpm@linux-foundation.org Cc: dan.j.williams@intel.com Cc: bhelgaas@google.com Cc: baiyaowei@cmss.chinamobile.com Cc: tiwai@suse.de Cc: brijesh.singh@amd.com Cc: dyoung@redhat.com Cc: bhe@redhat.com Cc: jroedel@suse.de Link: https://lkml.kernel.org/r/20180930031033.22110-3-lijiang@redhat.com Signed-off-by: Sasha Levin --- kernel/kexec_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8f15665ab616..27cf24e285e0 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -473,6 +473,10 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, } } + /* Ensure that these pages are decrypted if SME is enabled. */ + if (pages) + arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0); + return pages; } @@ -867,6 +871,7 @@ static int kimage_load_crash_segment(struct kimage *image, result = -ENOMEM; goto out; } + arch_kexec_post_alloc_pages(page_address(page), 1, 0); ptr = kmap(page); ptr += maddr & ~PAGE_MASK; mchunk = min_t(size_t, mbytes, @@ -884,6 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image, result = copy_from_user(ptr, buf, uchunk); kexec_flush_icache_page(page); kunmap(page); + arch_kexec_pre_free_pages(page_address(page), 1); if (result) { result = -EFAULT; goto out; From 4251e64c2287dad32381eee6357860184dc35044 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Oct 2018 15:13:07 +0200 Subject: [PATCH 0893/3715] x86/olpc: Fix build error with CONFIG_MFD_CS5535=m [ Upstream commit fa112cf1e8bc693d5a666b1c479a2859c8b6e0f1 ] When building a 32-bit config which has the above MFD item as module but OLPC_XO1_PM is enabled =y - which is bool, btw - the kernel fails building with: ld: arch/x86/platform/olpc/olpc-xo1-pm.o: in function `xo1_pm_remove': /home/boris/kernel/linux/arch/x86/platform/olpc/olpc-xo1-pm.c:159: undefined reference to `mfd_cell_disable' ld: arch/x86/platform/olpc/olpc-xo1-pm.o: in function `xo1_pm_probe': /home/boris/kernel/linux/arch/x86/platform/olpc/olpc-xo1-pm.c:133: undefined reference to `mfd_cell_enable' make: *** [Makefile:1030: vmlinux] Error 1 Force MFD_CS5535 to y if OLPC_XO1_PM is enabled. Signed-off-by: Borislav Petkov Cc: Lubomir Rintel Cc: x86@kernel.org Link: http://lkml.kernel.org/r/20181005131750.GA5366@zn.tnic Signed-off-by: Sasha Levin --- arch/x86/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b58daecc591e..c55870ac907e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2716,8 +2716,7 @@ config OLPC config OLPC_XO1_PM bool "OLPC XO-1 Power Management" - depends on OLPC && MFD_CS5535 && PM_SLEEP - select MFD_CORE + depends on OLPC && MFD_CS5535=y && PM_SLEEP ---help--- Add support for poweroff and suspend of the OLPC XO-1 laptop. From 348d69f0d53f394461adc8ef4baa7b7f4a408848 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 14 Sep 2018 17:43:28 +0200 Subject: [PATCH 0894/3715] dmaengine: rcar-dmac: set scatter/gather max segment size [ Upstream commit 97d49c59e219acac576e16293a6b8cb99302f62f ] Fix warning when running with CONFIG_DMA_API_DEBUG_SG=y by allocating a device_dma_parameters structure and filling in the max segment size. Signed-off-by: Wolfram Sang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sh/rcar-dmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 19c7433e8309..f7ca57125ac7 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -200,6 +200,7 @@ struct rcar_dmac { struct dma_device engine; struct device *dev; void __iomem *iomem; + struct device_dma_parameters parms; unsigned int n_channels; struct rcar_dmac_chan *channels; @@ -1764,6 +1765,8 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); + dmac->dev->dma_parms = &dmac->parms; + dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); ret = rcar_dmac_parse_of(&pdev->dev, dmac); From c0933fa586b472a381310a5e911d27811087c889 Mon Sep 17 00:00:00 2001 From: Radu Solea Date: Tue, 2 Oct 2018 19:01:50 +0000 Subject: [PATCH 0895/3715] crypto: mxs-dcp - Fix SHA null hashes and output length [ Upstream commit c709eebaf5c5faa8a0f140355f9cfe67e8f7afb1 ] DCP writes at least 32 bytes in the output buffer instead of hash length as documented. Add intermediate buffer to prevent write out of bounds. When requested to produce null hashes DCP fails to produce valid output. Add software workaround to bypass hardware and return valid output. Signed-off-by: Radu Solea Signed-off-by: Leonard Crestez Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/mxs-dcp.c | 47 +++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index a98a25733a22..4615dbee22d0 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -28,9 +28,24 @@ #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE +#define DCP_SHA_PAY_SZ 64 #define DCP_ALIGNMENT 64 +/* + * Null hashes to align with hw behavior on imx6sl and ull + * these are flipped for consistency with hw output + */ +const uint8_t sha1_null_hash[] = + "\x09\x07\xd8\xaf\x90\x18\x60\x95\xef\xbf" + "\x55\x32\x0d\x4b\x6b\x5e\xee\xa3\x39\xda"; + +const uint8_t sha256_null_hash[] = + "\x55\xb8\x52\x78\x1b\x99\x95\xa4" + "\x4c\x93\x9b\x64\xe4\x41\xae\x27" + "\x24\xb9\x6f\x99\xc8\xf4\xfb\x9a" + "\x14\x1c\xfc\x98\x42\xc4\xb0\xe3"; + /* DCP DMA descriptor. */ struct dcp_dma_desc { uint32_t next_cmd_addr; @@ -48,6 +63,7 @@ struct dcp_coherent_block { uint8_t aes_in_buf[DCP_BUF_SZ]; uint8_t aes_out_buf[DCP_BUF_SZ]; uint8_t sha_in_buf[DCP_BUF_SZ]; + uint8_t sha_out_buf[DCP_SHA_PAY_SZ]; uint8_t aes_key[2 * AES_KEYSIZE_128]; @@ -513,8 +529,6 @@ static int mxs_dcp_run_sha(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); - struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; dma_addr_t digest_phys = 0; @@ -536,10 +550,23 @@ static int mxs_dcp_run_sha(struct ahash_request *req) desc->payload = 0; desc->status = 0; + /* + * Align driver with hw behavior when generating null hashes + */ + if (rctx->init && rctx->fini && desc->size == 0) { + struct hash_alg_common *halg = crypto_hash_alg_common(tfm); + const uint8_t *sha_buf = + (actx->alg == MXS_DCP_CONTROL1_HASH_SELECT_SHA1) ? + sha1_null_hash : sha256_null_hash; + memcpy(sdcp->coh->sha_out_buf, sha_buf, halg->digestsize); + ret = 0; + goto done_run; + } + /* Set HASH_TERM bit for last transfer block. */ if (rctx->fini) { - digest_phys = dma_map_single(sdcp->dev, req->result, - halg->digestsize, DMA_FROM_DEVICE); + digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, + DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } @@ -547,9 +574,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) ret = mxs_dcp_start_dma(actx); if (rctx->fini) - dma_unmap_single(sdcp->dev, digest_phys, halg->digestsize, + dma_unmap_single(sdcp->dev, digest_phys, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); +done_run: dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE); return ret; @@ -567,6 +595,7 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; + uint8_t *out_buf = sdcp->coh->sha_out_buf; uint8_t *src_buf; @@ -621,11 +650,9 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) actx->fill = 0; - /* For some reason, the result is flipped. */ - for (i = 0; i < halg->digestsize / 2; i++) { - swap(req->result[i], - req->result[halg->digestsize - i - 1]); - } + /* For some reason the result is flipped */ + for (i = 0; i < halg->digestsize; i++) + req->result[i] = out_buf[halg->digestsize - i - 1]; } return 0; From b154510584140eb0c3fa0dca7ed8fa8782c5d3ef Mon Sep 17 00:00:00 2001 From: Radu Solea Date: Tue, 2 Oct 2018 19:01:52 +0000 Subject: [PATCH 0896/3715] crypto: mxs-dcp - Fix AES issues [ Upstream commit fadd7a6e616b89c7f4f7bfa7b824f290bab32c3c ] The DCP driver does not obey cryptlen, when doing android CTS this results in passing to hardware input stream lengths which are not multiple of block size. Add a check to prevent future erroneous stream lengths from reaching the hardware and adjust the scatterlist walking code to obey cryptlen. Also properly copy-out the IV for chaining. Signed-off-by: Radu Solea Signed-off-by: Franck LENORMAND Signed-off-by: Leonard Crestez Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/mxs-dcp.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 4615dbee22d0..e1e1e8110790 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -225,6 +225,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, DCP_BUF_SZ, DMA_FROM_DEVICE); + if (actx->fill % AES_BLOCK_SIZE) { + dev_err(sdcp->dev, "Invalid block size!\n"); + ret = -EINVAL; + goto aes_done_run; + } + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -254,6 +260,7 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx); +aes_done_run: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); @@ -280,13 +287,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) uint8_t *out_tmp, *src_buf, *dst_buf = NULL; uint32_t dst_off = 0; + uint32_t last_out_len = 0; uint8_t *key = sdcp->coh->aes_key; int ret = 0; int split = 0; - unsigned int i, len, clen, rem = 0; + unsigned int i, len, clen, rem = 0, tlen = 0; int init = 0; + bool limit_hit = false; actx->fill = 0; @@ -305,6 +314,11 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) for_each_sg(req->src, src, nents, i) { src_buf = sg_virt(src); len = sg_dma_len(src); + tlen += len; + limit_hit = tlen > req->nbytes; + + if (limit_hit) + len = req->nbytes - (tlen - len); do { if (actx->fill + len > out_off) @@ -321,13 +335,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * If we filled the buffer or this is the last SG, * submit the buffer. */ - if (actx->fill == out_off || sg_is_last(src)) { + if (actx->fill == out_off || sg_is_last(src) || + limit_hit) { ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; out_tmp = out_buf; + last_out_len = actx->fill; while (dst && actx->fill) { if (!split) { dst_buf = sg_virt(dst); @@ -350,6 +366,19 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) } } } while (len); + + if (limit_hit) + break; + } + + /* Copy the IV for CBC for chaining */ + if (!rctx->ecb) { + if (rctx->enc) + memcpy(req->info, out_buf+(last_out_len-AES_BLOCK_SIZE), + AES_BLOCK_SIZE); + else + memcpy(req->info, in_buf+(last_out_len-AES_BLOCK_SIZE), + AES_BLOCK_SIZE); } return ret; From 3dc925644961b2e71ee5c6a8f10c11bd5443290e Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 7 Oct 2018 10:22:42 +0800 Subject: [PATCH 0897/3715] xfrm: use correct size to initialise sp->ovec [ Upstream commit f1193e915748291fb205a908db33bd3debece6e2 ] This place should want to initialize array, not a element, so it should be sizeof(array) instead of sizeof(element) but now this array only has one element, so no error in this condition that XFRM_MAX_OFFLOAD_DEPTH is 1 Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 06dec32503bd..fc0a9ce1be18 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -130,7 +130,7 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; - memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + memset(sp->ovec, 0, sizeof(sp->ovec)); if (src) { int i; From 4b9d1ab7112cfe9c3011acb661d98bad6c15a6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Sun, 30 Sep 2018 19:53:13 -0700 Subject: [PATCH 0898/3715] ACPI / SBS: Fix rare oops when removing modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 757c968c442397f1249bb775a7c8c03842e3e0c7 ] There was a small race when removing the sbshc module where smbus_alarm() had queued acpi_smbus_callback() for deferred execution but it hadn't been run yet, so that when it did run hc had been freed and the module unloaded, resulting in an invalid paging request. A similar race existed when removing the sbs module with regards to acpi_sbs_callback() (which is called from acpi_smbus_callback()). We therefore need to ensure no callbacks are pending or executing before the cleanups are done and the modules are removed. Signed-off-by: Ronald Tschalär Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/osl.c | 1 + drivers/acpi/sbshc.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 191e86c62037..9da7e7d874bd 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1116,6 +1116,7 @@ void acpi_os_wait_events_complete(void) flush_workqueue(kacpid_wq); flush_workqueue(kacpi_notify_wq); } +EXPORT_SYMBOL(acpi_os_wait_events_complete); struct acpi_hp_work { struct work_struct work; diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 7a3431018e0a..5008ead4609a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -196,6 +196,7 @@ int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc) hc->callback = NULL; hc->context = NULL; mutex_unlock(&hc->lock); + acpi_os_wait_events_complete(); return 0; } @@ -292,6 +293,7 @@ static int acpi_smbus_hc_remove(struct acpi_device *device) hc = acpi_driver_data(device); acpi_ec_remove_query_handler(hc->ec, hc->query_bit); + acpi_os_wait_events_complete(); kfree(hc); device->driver_data = NULL; return 0; From fa315cc7cb6cbb76b47c06441787b3aa85514312 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 1 Jul 2018 14:52:06 +0300 Subject: [PATCH 0899/3715] iwlwifi: mvm: don't send keys when entering D3 [ Upstream commit 8c7fd6a365eb5b2647b2c01918730d0a485b9f85 ] In the past, we needed to program the keys when entering D3. This was since we replaced the image. However, now that there is a single image, this is no longer needed. Note that RSC is sent separately in a new command. This solves issues with newer devices that support PN offload. Since driver re-sent the keys, the PN got zeroed and the receiver dropped the next packets, until PN caught up again. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index b205a7bfb828..65c51c698328 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -947,8 +947,10 @@ int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm, { struct iwl_wowlan_kek_kck_material_cmd kek_kck_cmd = {}; struct iwl_wowlan_tkip_params_cmd tkip_cmd = {}; + bool unified = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); struct wowlan_key_data key_data = { - .configure_keys = !d0i3, + .configure_keys = !d0i3 && !unified, .use_rsc_tsc = false, .tkip = &tkip_cmd, .use_tkip = false, From bfbcef70a60178af1d18052e5a02e89a53ba8ab9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 18 Sep 2018 16:08:52 -0700 Subject: [PATCH 0900/3715] x86/fsgsbase/64: Fix ptrace() to read the FS/GS base accurately [ Upstream commit 07e1d88adaaeab247b300926f78cc3f950dbeda3 ] On 64-bit kernels ptrace can read the FS/GS base using the register access APIs (PTRACE_PEEKUSER, etc.) or PTRACE_ARCH_PRCTL. Make both of these mechanisms return the actual FS/GS base. This will improve debuggability by providing the correct information to ptracer such as GDB. [ chang: Rebased and revised patch description. ] [ mingo: Revised the changelog some more. ] Signed-off-by: Andy Lutomirski Signed-off-by: Chang S. Bae Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Markus T Metzger Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1537312139-5580-2-git-send-email-chang.seok.bae@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/ptrace.c | 62 +++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 584cdd475bb3..734549492a18 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "tls.h" @@ -343,6 +344,49 @@ static int set_segment_reg(struct task_struct *task, return 0; } +static unsigned long task_seg_base(struct task_struct *task, + unsigned short selector) +{ + unsigned short idx = selector >> 3; + unsigned long base; + + if (likely((selector & SEGMENT_TI_MASK) == 0)) { + if (unlikely(idx >= GDT_ENTRIES)) + return 0; + + /* + * There are no user segments in the GDT with nonzero bases + * other than the TLS segments. + */ + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return 0; + + idx -= GDT_ENTRY_TLS_MIN; + base = get_desc_base(&task->thread.tls_array[idx]); + } else { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + + /* + * If performance here mattered, we could protect the LDT + * with RCU. This is a slow path, though, so we can just + * take the mutex. + */ + mutex_lock(&task->mm->context.lock); + ldt = task->mm->context.ldt; + if (unlikely(idx >= ldt->nr_entries)) + base = 0; + else + base = get_desc_base(ldt->entries + idx); + mutex_unlock(&task->mm->context.lock); +#else + base = 0; +#endif + } + + return base; +} + #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -436,18 +480,16 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct, fs_base): { - /* - * XXX: This will not behave as expected if called on - * current or if fsindex != 0. - */ - return task->thread.fsbase; + if (task->thread.fsindex == 0) + return task->thread.fsbase; + else + return task_seg_base(task, task->thread.fsindex); } case offsetof(struct user_regs_struct, gs_base): { - /* - * XXX: This will not behave as expected if called on - * current or if fsindex != 0. - */ - return task->thread.gsbase; + if (task->thread.gsindex == 0) + return task->thread.gsbase; + else + return task_seg_base(task, task->thread.gsindex); } #endif } From 1fb543f3890f991484c1b2328ee0a4521a5b2a4f Mon Sep 17 00:00:00 2001 From: Masaharu Hayakawa Date: Thu, 30 Aug 2018 01:32:07 +0200 Subject: [PATCH 0901/3715] mmc: tmio: Fix SCC error detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b85fb0a1c8aeaaa40d08945d51a6656b512173f0 ] SDR104, HS200 and HS400 need to check for SCC error. If SCC error is detected, retuning is necessary. Signed-off-by: Masaharu Hayakawa [Niklas: update commit message] Signed-off-by: Niklas Söderlund Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/tmio_mmc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2437fcde915a..01e51b794575 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -914,8 +914,8 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) if (mrq->cmd->error || (mrq->data && mrq->data->error)) tmio_mmc_abort_dma(host); - if (host->check_scc_error) - host->check_scc_error(host); + if (host->check_scc_error && host->check_scc_error(host)) + mrq->cmd->error = -EILSEQ; /* If SET_BLOCK_COUNT, continue with main command */ if (host->mrq && !mrq->cmd->error) { From c2ddc149cdf3d87a141eb4bd56fbfb543784778f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Oct 2018 12:57:36 +0200 Subject: [PATCH 0902/3715] fbdev: sbuslib: use checked version of put_user() [ Upstream commit d8bad911e5e55e228d59c0606ff7e6b8131ca7bf ] I'm not sure why the code assumes that only the first put_user() needs an access_ok() check. I have made all the put_user() and get_user() calls checked. Signed-off-by: Dan Carpenter Cc: Philippe Ombredanne Cc: Mathieu Malaterre Cc: Peter Malone , Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/sbuslib.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c index a436d44f1b7f..90c51330969c 100644 --- a/drivers/video/fbdev/sbuslib.c +++ b/drivers/video/fbdev/sbuslib.c @@ -106,11 +106,11 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, struct fbtype __user *f = (struct fbtype __user *) arg; if (put_user(type, &f->fb_type) || - __put_user(info->var.yres, &f->fb_height) || - __put_user(info->var.xres, &f->fb_width) || - __put_user(fb_depth, &f->fb_depth) || - __put_user(0, &f->fb_cmsize) || - __put_user(fb_size, &f->fb_cmsize)) + put_user(info->var.yres, &f->fb_height) || + put_user(info->var.xres, &f->fb_width) || + put_user(fb_depth, &f->fb_depth) || + put_user(0, &f->fb_cmsize) || + put_user(fb_size, &f->fb_cmsize)) return -EFAULT; return 0; } @@ -125,10 +125,10 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, unsigned int index, count, i; if (get_user(index, &c->index) || - __get_user(count, &c->count) || - __get_user(ured, &c->red) || - __get_user(ugreen, &c->green) || - __get_user(ublue, &c->blue)) + get_user(count, &c->count) || + get_user(ured, &c->red) || + get_user(ugreen, &c->green) || + get_user(ublue, &c->blue)) return -EFAULT; cmap.len = 1; @@ -165,10 +165,10 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, u8 red, green, blue; if (get_user(index, &c->index) || - __get_user(count, &c->count) || - __get_user(ured, &c->red) || - __get_user(ugreen, &c->green) || - __get_user(ublue, &c->blue)) + get_user(count, &c->count) || + get_user(ured, &c->red) || + get_user(ugreen, &c->green) || + get_user(ublue, &c->blue)) return -EFAULT; if (index + count > cmap->len) From e5379b2bc7e11c157a4a1d9e7fb81f52b79c66b1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Oct 2018 12:57:36 +0200 Subject: [PATCH 0903/3715] fbdev: sbuslib: integer overflow in sbusfb_ioctl_helper() [ Upstream commit e5017716adb8aa5c01c52386c1b7470101ffe9c5 ] The "index + count" addition can overflow. Both come directly from the user. This bug leads to an information leak. Signed-off-by: Dan Carpenter Cc: Peter Malone Cc: Philippe Ombredanne Cc: Mathieu Malaterre Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/sbuslib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c index 90c51330969c..01a7110e61a7 100644 --- a/drivers/video/fbdev/sbuslib.c +++ b/drivers/video/fbdev/sbuslib.c @@ -171,7 +171,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, get_user(ublue, &c->blue)) return -EFAULT; - if (index + count > cmap->len) + if (index > cmap->len || count > cmap->len - index) return -EINVAL; for (i = 0; i < count; i++) { From 784505c2378a1d96a4c97d2ec055ececfdcf4914 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Oct 2018 13:14:35 +0200 Subject: [PATCH 0904/3715] reset: Fix potential use-after-free in __of_reset_control_get() [ Upstream commit b790c8ea5593d6dc3580adfad8e117eeb56af874 ] Calling of_node_put() decreases the reference count of a device tree object, and may free some data. However, the of_phandle_args structure embedding it is passed to reset_controller_dev.of_xlate() after that, so it may still be accessed. Move the call to of_node_put() down to fix this. Signed-off-by: Geert Uytterhoeven [p.zabel@pengutronix.de: moved of_node_put after mutex_unlock] Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/reset/core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index da4292e9de97..72b96b5c75a8 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -466,28 +466,29 @@ struct reset_control *__of_reset_control_get(struct device_node *node, break; } } - of_node_put(args.np); if (!rcdev) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(-EPROBE_DEFER); + rstc = ERR_PTR(-EPROBE_DEFER); + goto out; } if (WARN_ON(args.args_count != rcdev->of_reset_n_cells)) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(-EINVAL); + rstc = ERR_PTR(-EINVAL); + goto out; } rstc_id = rcdev->of_xlate(rcdev, &args); if (rstc_id < 0) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(rstc_id); + rstc = ERR_PTR(rstc_id); + goto out; } /* reset_list_mutex also protects the rcdev's reset_control list */ rstc = __reset_control_get_internal(rcdev, rstc_id, shared); +out: mutex_unlock(&reset_list_mutex); + of_node_put(args.np); return rstc; } From 5750a2d4939d6f82bfd7edf1a77e3844898ac3cf Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 8 Oct 2018 20:41:15 +0800 Subject: [PATCH 0905/3715] bcache: recal cached_dev_sectors on detach [ Upstream commit 46010141da6677b81cc77f9b47f8ac62bd1cbfd3 ] Recal cached_dev_sectors on cached_dev detached, as recal done on cached_dev attached. Update the cached_dev_sectors before bcache_device_detach called as bcache_device_detach will set bcache_device->c to NULL. Signed-off-by: Shenghui Wang Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1a270e2262f5..690aeb09bbf5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -905,6 +905,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); From ebf4b548902e52724de640b546acc07b2fc17ff4 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Fri, 5 Oct 2018 12:22:17 -0400 Subject: [PATCH 0906/3715] media: dw9714: Fix error handling in probe function [ Upstream commit f9a0b14240a2d0bd196d35e8aac73df6eabd6382 ] Fixed the case where v4l2_async_unregister_subdev() is called unnecessarily in the error handling path in probe function. Signed-off-by: Rajmohan Mani Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/dw9714.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c index 95af4fc99cd0..c1273bcd5901 100644 --- a/drivers/media/i2c/dw9714.c +++ b/drivers/media/i2c/dw9714.c @@ -182,7 +182,8 @@ static int dw9714_probe(struct i2c_client *client) return 0; err_cleanup: - dw9714_subdev_cleanup(dw9714_dev); + v4l2_ctrl_handler_free(&dw9714_dev->ctrls_vcm); + media_entity_cleanup(&dw9714_dev->sd.entity); dev_err(&client->dev, "Probe failed: %d\n", rval); return rval; } From c0be5255a16a4b0fa08363eb6acc72886052affe Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Nov 2017 13:02:36 +0100 Subject: [PATCH 0907/3715] s390/kasan: avoid vdso instrumentation [ Upstream commit 348498458505e202df41b6b9a78da448d39298b7 ] vdso is mapped into user space processes, which won't have kasan shodow mapped. Reviewed-by: Martin Schwidefsky Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso32/Makefile | 3 ++- arch/s390/kernel/vdso64/Makefile | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 101cadabfc89..6d87f800b4f2 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -25,9 +25,10 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 36bbafcf4a77..4bc166b8c0cb 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -25,9 +25,10 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so From 27647dde3018985379d58feacf2fc3f11e91b478 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 8 Oct 2018 10:05:20 +0200 Subject: [PATCH 0908/3715] proc/vmcore: Fix i386 build error of missing copy_oldmem_page_encrypted() [ Upstream commit cf089611f4c446285046fcd426d90c18f37d2905 ] Lianbo reported a build error with a particular 32-bit config, see Link below for details. Provide a weak copy_oldmem_page_encrypted() function which architectures can override, in the same manner other functionality in that file is supplied. Reported-by: Lianbo Jiang Signed-off-by: Borislav Petkov CC: x86@kernel.org Link: http://lkml.kernel.org/r/710b9d95-2f70-eadf-c4a1-c3dc80ee4ebb@redhat.com Signed-off-by: Sasha Levin --- fs/proc/vmcore.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 885d445afa0d..ce400f97370d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -164,6 +164,16 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, pfn, size, prot); } +/* + * Architectures which support memory encryption override this. + */ +ssize_t __weak +copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + return copy_oldmem_page(pfn, buf, csize, offset, userbuf); +} + /* * Copy to either kernel or user space */ From 0f07c78437601b01eb2a75589561bea050a045e7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 13:21:31 -0700 Subject: [PATCH 0909/3715] backlight: lm3639: Unconditionally call led_classdev_unregister [ Upstream commit 7cea645ae9c5a54aa7904fddb2cdf250acd63a6c ] Clang warns that the address of a pointer will always evaluated as true in a boolean context. drivers/video/backlight/lm3639_bl.c:403:14: warning: address of 'pchip->cdev_torch' will always evaluate to 'true' [-Wpointer-bool-conversion] if (&pchip->cdev_torch) ~~ ~~~~~~~^~~~~~~~~~ drivers/video/backlight/lm3639_bl.c:405:14: warning: address of 'pchip->cdev_flash' will always evaluate to 'true' [-Wpointer-bool-conversion] if (&pchip->cdev_flash) ~~ ~~~~~~~^~~~~~~~~~ 2 warnings generated. These statements have been present since 2012, introduced by commit 0f59858d5119 ("backlight: add new lm3639 backlight driver"). Given that they have been called unconditionally since then presumably without any issues, removing the always true if statements to fix the warnings without any real world changes. Link: https://github.com/ClangBuiltLinux/linux/issues/119 Signed-off-by: Nathan Chancellor Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3639_bl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c index cd50df5807ea..086611c7bc03 100644 --- a/drivers/video/backlight/lm3639_bl.c +++ b/drivers/video/backlight/lm3639_bl.c @@ -400,10 +400,8 @@ static int lm3639_remove(struct i2c_client *client) regmap_write(pchip->regmap, REG_ENABLE, 0x00); - if (&pchip->cdev_torch) - led_classdev_unregister(&pchip->cdev_torch); - if (&pchip->cdev_flash) - led_classdev_unregister(&pchip->cdev_flash); + led_classdev_unregister(&pchip->cdev_torch); + led_classdev_unregister(&pchip->cdev_flash); if (pchip->bled) device_remove_file(&(pchip->bled->dev), &dev_attr_bled_mode); return 0; From 1faf8a9c7614303a5d29a047bbc9cc00c9412d67 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Sat, 30 Jun 2018 16:03:16 +0530 Subject: [PATCH 0910/3715] mfd: ti_am335x_tscadc: Keep ADC interface on if child is wakeup capable [ Upstream commit c974ac771479327b5424f60d58845e31daddadea ] If a child device like touchscreen is wakeup capable, then keep ADC interface on, so that a touching resistive screen will generate wakeup event to the system. Signed-off-by: Vignesh R Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/ti_am335x_tscadc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 5894d6c16fab..ca9f0c8d1ed0 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -296,11 +296,24 @@ static int ti_tscadc_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused ti_tscadc_can_wakeup(struct device *dev, void *data) +{ + return device_may_wakeup(dev); +} + static int __maybe_unused tscadc_suspend(struct device *dev) { struct ti_tscadc_dev *tscadc = dev_get_drvdata(dev); regmap_write(tscadc->regmap, REG_SE, 0x00); + if (device_for_each_child(dev, NULL, ti_tscadc_can_wakeup)) { + u32 ctrl; + + regmap_read(tscadc->regmap, REG_CTRL, &ctrl); + ctrl &= ~(CNTRLREG_POWERDOWN); + ctrl |= CNTRLREG_TSCSSENB; + regmap_write(tscadc->regmap, REG_CTRL, ctrl); + } pm_runtime_put_sync(dev); return 0; From 695583334b6b7f82c39ee124edfbfa48145ed571 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:53 +0800 Subject: [PATCH 0911/3715] printk: Give error on attempt to set log buffer length to over 2G [ Upstream commit e6fe3e5b7d16e8f146a4ae7fe481bc6e97acde1e ] The current printk() is ready to handle log buffer size up to 2G. Give an explicit error for users who want to use larger log buffer. Also fix printk formatting to show the 2G as a positive number. Link: http://lkml.kernel.org/r/20181008135916.gg4kkmoki5bgtco5@pathway.suse.cz Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Suggested-by: Sergey Senozhatsky Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky [pmladek: Fixed to the really safe limit 2GB.] Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5aa96098c64d..5b33c14ab8b2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -432,6 +432,7 @@ static u32 clear_idx; /* record buffer */ #define LOG_ALIGN __alignof__(struct printk_log) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#define LOG_BUF_LEN_MAX (u32)(1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -1032,18 +1033,23 @@ void log_buf_vmcoreinfo_setup(void) static unsigned long __initdata new_log_buf_len; /* we practice scaling the ring buffer by powers of 2 */ -static void __init log_buf_len_update(unsigned size) +static void __init log_buf_len_update(u64 size) { + if (size > (u64)LOG_BUF_LEN_MAX) { + size = (u64)LOG_BUF_LEN_MAX; + pr_err("log_buf over 2G is not supported.\n"); + } + if (size) size = roundup_pow_of_two(size); if (size > log_buf_len) - new_log_buf_len = size; + new_log_buf_len = (unsigned long)size; } /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned int size; + u64 size; if (!str) return -EINVAL; @@ -1113,7 +1119,7 @@ void __init setup_log_buf(int early) } if (unlikely(!new_log_buf)) { - pr_err("log_buf_len: %ld bytes not available\n", + pr_err("log_buf_len: %lu bytes not available\n", new_log_buf_len); return; } @@ -1126,8 +1132,8 @@ void __init setup_log_buf(int early) memcpy(log_buf, __log_buf, __LOG_BUF_LEN); logbuf_unlock_irqrestore(flags); - pr_info("log_buf_len: %d bytes\n", log_buf_len); - pr_info("early log buf free: %d(%d%%)\n", + pr_info("log_buf_len: %u bytes\n", log_buf_len); + pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); } From 5e63d5649ae3d47b9a2185544fbe71ed66882be4 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 4 Oct 2018 11:44:02 -0400 Subject: [PATCH 0912/3715] media: isif: fix a NULL pointer dereference bug [ Upstream commit a26ac6c1bed951b2066cc4b2257facd919e35c0b ] In isif_probe(), there is a while loop to get the ISIF base address and linearization table0 and table1 address. In the loop body, the function platform_get_resource() is called to get the resource. If platform_get_resource() returns NULL, the loop is terminated and the execution goes to 'fail_nobase_res'. Suppose the loop is terminated at the first iteration because platform_get_resource() returns NULL and the execution goes to 'fail_nobase_res'. Given that there is another while loop at 'fail_nobase_res' and i equals to 0, one iteration of the second while loop will be executed. However, the second while loop does not check the return value of platform_get_resource(). This can cause a NULL pointer dereference bug if the return value is a NULL pointer. This patch avoids the above issue by adding a check in the second while loop after the call to platform_get_resource(). Signed-off-by: Wenwen Wang Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/isif.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/davinci/isif.c b/drivers/media/platform/davinci/isif.c index 5813b49391ed..90d0f13283ae 100644 --- a/drivers/media/platform/davinci/isif.c +++ b/drivers/media/platform/davinci/isif.c @@ -1102,7 +1102,8 @@ fail_nobase_res: while (i >= 0) { res = platform_get_resource(pdev, IORESOURCE_MEM, i); - release_mem_region(res->start, resource_size(res)); + if (res) + release_mem_region(res->start, resource_size(res)); i--; } vpfe_unregister_ccdc_device(&isif_hw_dev); From e30c1d9fc20112cf0b4d7ffaf9a0471d0d0a7f49 Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Mon, 8 Oct 2018 12:15:40 -0500 Subject: [PATCH 0913/3715] GFS2: Flush the GFS2 delete workqueue before stopping the kernel threads [ Upstream commit 1eb8d7387908022951792a46fa040ad3942b3b08 ] Flushing the workqueue can cause operations to happen which might call gfs2_log_reserve(), or get stuck waiting for locks taken by such operations. gfs2_log_reserve() can io_schedule(). If this happens, it will never wake because the only thing which can wake it is gfs2_logd() which was already stopped. This causes umount of a gfs2 filesystem to wedge permanently if, for example, the umount immediately follows a large delete operation. When this occured, the following stack trace was obtained from the umount command [] flush_workqueue+0x1c8/0x520 [] gfs2_make_fs_ro+0x69/0x160 [gfs2] [] gfs2_put_super+0xa9/0x1c0 [gfs2] [] generic_shutdown_super+0x6f/0x100 [] kill_block_super+0x27/0x70 [] gfs2_kill_sb+0x71/0x80 [gfs2] [] deactivate_locked_super+0x3b/0x70 [] deactivate_super+0x59/0x60 [] cleanup_mnt+0x58/0x80 [] __cleanup_mnt+0x12/0x20 [] task_work_run+0x7d/0xa0 [] exit_to_usermode_loop+0x73/0x98 [] syscall_return_slowpath+0x41/0x50 [] int_ret_from_sys_call+0x25/0x8f [] 0xffffffffffffffff Signed-off-by: Tim Smith Signed-off-by: Mark Syms Signed-off-by: Bob Peterson Signed-off-by: Sasha Levin --- fs/gfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8e54f2e3a304..c3f3f1ae4e1b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -845,10 +845,10 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; + flush_workqueue(gfs2_delete_workqueue); kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); - flush_workqueue(gfs2_delete_workqueue); gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); From cf205535189770ec61025722a6b426fe28b6bef8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 6 Oct 2018 14:01:42 -0400 Subject: [PATCH 0914/3715] media: cx231xx: fix potential sign-extension overflow on large shift [ Upstream commit 32ae592036d7aeaabcccb2b1715373a68639a768 ] Shifting the u8 value[3] by an int can lead to sign-extension overflow. For example, if value[3] is 0xff and the shift is 24 then it is promoted to int and then the top bit is sign-extended so that all upper 32 bits are set. Fix this by casting value[3] to a u32 before the shift. Detected by CoverityScan, CID#1016522 ("Unintended sign extension") Fixes: e0d3bafd0258 ("V4L/DVB (10954): Add cx231xx USB driver") Signed-off-by: Colin Ian King Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/cx231xx/cx231xx-video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c index 179b8481a870..fd33c2e9327d 100644 --- a/drivers/media/usb/cx231xx/cx231xx-video.c +++ b/drivers/media/usb/cx231xx/cx231xx-video.c @@ -1389,7 +1389,7 @@ int cx231xx_g_register(struct file *file, void *priv, ret = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, (u16)reg->reg, value, 4); reg->val = value[0] | value[1] << 8 | - value[2] << 16 | value[3] << 24; + value[2] << 16 | (u32)value[3] << 24; reg->size = 4; break; case 1: /* AFE - read byte */ From f1dda24adacd9fc99764a8767c927d3d33e630fc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 27 Sep 2018 09:21:55 -0500 Subject: [PATCH 0915/3715] x86/kexec: Correct KEXEC_BACKUP_SRC_END off-by-one error [ Upstream commit 51fbf14f2528a8c6401290e37f1c893a2412f1d3 ] The only use of KEXEC_BACKUP_SRC_END is as an argument to walk_system_ram_res(): int crash_load_segments(struct kimage *image) { ... walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END, image, determine_backup_region); walk_system_ram_res() expects "start, end" arguments that are inclusive, i.e., the range to be walked includes both the start and end addresses. KEXEC_BACKUP_SRC_END was previously defined as (640 * 1024UL), which is the first address *past* the desired 0-640KB range. Define KEXEC_BACKUP_SRC_END as (640 * 1024UL - 1) so the KEXEC_BACKUP_SRC region is [0-0x9ffff], not [0-0xa0000]. Fixes: dd5f726076cc ("kexec: support for kexec on panic using new system call") Signed-off-by: Bjorn Helgaas Signed-off-by: Borislav Petkov CC: "H. Peter Anvin" CC: Andrew Morton CC: Brijesh Singh CC: Greg Kroah-Hartman CC: Ingo Molnar CC: Lianbo Jiang CC: Takashi Iwai CC: Thomas Gleixner CC: Tom Lendacky CC: Vivek Goyal CC: baiyaowei@cmss.chinamobile.com CC: bhe@redhat.com CC: dan.j.williams@intel.com CC: dyoung@redhat.com CC: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/153805811578.1157.6948388946904655969.stgit@bhelgaas-glaptop.roam.corp.google.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index f327236f0fa7..5125fca472bb 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -67,7 +67,7 @@ struct kimage; /* Memory to backup during crash kdump */ #define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ +#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* * CPU does not save ss and sp on stack if execution is already From 4d7ddcdbdf436932493274073efac62ec2523868 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 4 Oct 2018 00:52:52 +0200 Subject: [PATCH 0916/3715] gpio: syscon: Fix possible NULL ptr usage [ Upstream commit 70728c29465bc4bfa7a8c14304771eab77e923c7 ] The priv->data->set can be NULL while flags contains GPIO_SYSCON_FEAT_OUT and chip->set is valid pointer. This happens in case the controller uses the default GPIO setter. Always use chip->set to access the setter to avoid possible NULL pointer dereferencing. Signed-off-by: Marek Vasut Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-syscon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c index 537cec7583fc..cf88a0bfe99e 100644 --- a/drivers/gpio/gpio-syscon.c +++ b/drivers/gpio/gpio-syscon.c @@ -122,7 +122,7 @@ static int syscon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, int val) BIT(offs % SYSCON_REG_BITS)); } - priv->data->set(chip, offset, val); + chip->set(chip, offset, val); return 0; } From 9d736a3533741f35df9579521dc454ab08d232c8 Mon Sep 17 00:00:00 2001 From: Hieu Tran Dang Date: Tue, 2 Oct 2018 18:06:36 +0700 Subject: [PATCH 0917/3715] spi: fsl-lpspi: Prevent FIFO under/overrun by default [ Upstream commit de8978c388c66b8fca192213ec9f0727e964c652 ] Certain devices don't work well when a transmit FIFO underrun or receive FIFO overrun occurs. Example is the SAF400x radio chip when running at high speed which leads to garbage being sent to/received from the chip. In which case, it should stall waiting for further data to be available before proceeding. This patch unset the NOSTALL bit in CFGR1 by default to prevent this issue. Signed-off-by: Hieu Tran Dang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-lpspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index cb3c73007ca1..8fe51f7541bb 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -287,7 +287,7 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi) fsl_lpspi_set_watermark(fsl_lpspi); - temp = CFGR1_PCSCFG | CFGR1_MASTER | CFGR1_NOSTALL; + temp = CFGR1_PCSCFG | CFGR1_MASTER; if (fsl_lpspi->config.mode & SPI_CS_HIGH) temp |= CFGR1_PCSPOL; writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1); From 596b42285406eb1622ec9f892e17cdf49760f7bb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 9 Oct 2018 10:11:53 +0200 Subject: [PATCH 0918/3715] pinctrl: gemini: Mask and set properly [ Upstream commit d17f477c5bc6b4a5dd9f51ae263870da132a8e89 ] The code was written under the assumption that the regmap_update_bits() would mask the bits in the mask and set the bits in the value. It missed the points that it will not set bits in the value unless these are also masked in the mask. Set value bits that are not in the mask will simply be ignored. Fixes: 06351d133dea ("pinctrl: add a Gemini SoC pin controller") Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-gemini.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-gemini.c b/drivers/pinctrl/pinctrl-gemini.c index 39e6221e7100..05441dc2519d 100644 --- a/drivers/pinctrl/pinctrl-gemini.c +++ b/drivers/pinctrl/pinctrl-gemini.c @@ -2164,7 +2164,8 @@ static int gemini_pmx_set_mux(struct pinctrl_dev *pctldev, func->name, grp->name); regmap_read(pmx->map, GLOBAL_MISC_CTRL, &before); - regmap_update_bits(pmx->map, GLOBAL_MISC_CTRL, grp->mask, + regmap_update_bits(pmx->map, GLOBAL_MISC_CTRL, + grp->mask | grp->value, grp->value); regmap_read(pmx->map, GLOBAL_MISC_CTRL, &after); From 58fd14e758b652d02212304093133d45b68f1f37 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 20 Sep 2018 19:18:34 +0000 Subject: [PATCH 0919/3715] spi: spidev: Fix OF tree warning logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 605b3bec73cbd74b4ac937b580cd0b47d1300484 ] spidev will make a big fuss if a device tree node binds a device by using "spidev" as the node's compatible property. However, the logic for this isn't looking for "spidev" in the compatible, but rather checking that the device is NOT compatible with spidev's list of devices. This causes a false positive if a device not named "rohm,dh2228fv", etc. binds to spidev, even if a means other than putting "spidev" in the device tree was used. E.g., the sysfs driver_override attribute. Signed-off-by: Trent Piepho Reviewed-by: Jan Kundrát Tested-by: Jan Kundrát Reviewed-by: Geert Uytterhoeven Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spidev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index cda10719d1d1..c5fe08bc34a0 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -724,11 +724,9 @@ static int spidev_probe(struct spi_device *spi) * compatible string, it is a Linux implementation thing * rather than a description of the hardware. */ - if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) { - dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n"); - WARN_ON(spi->dev.of_node && - !of_match_device(spidev_dt_ids, &spi->dev)); - } + WARN(spi->dev.of_node && + of_device_is_compatible(spi->dev.of_node, "spidev"), + "%pOF: buggy DT: spidev listed directly in DT\n", spi->dev.of_node); spidev_probe_acpi(spi); From 57cae03f53fc4b6c647520532f4d5822f80bcdb3 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Mon, 8 Oct 2018 19:26:48 +0100 Subject: [PATCH 0920/3715] ARM: 8802/1: Call syscall_trace_exit even when system call skipped [ Upstream commit f18aef742c8fbd68e280dff0a63ba0ca6ee8ad85 ] On at least x86 and ARM64, and as documented in the ptrace man page a skipped system call will still cause a syscall exit ptrace stop. Previous to this commit 32-bit ARM did not, resulting in strace being confused when seccomp skips system calls. This change also impacts programs that use ptrace to skip system calls. Fixes: ad75b51459ae ("ARM: 7579/1: arch/allow a scno of -1 to not cause a SIGILL") Signed-off-by: Timothy E Baldwin Signed-off-by: Eugene Syromyatnikov Reviewed-by: Kees Cook Tested-by: Kees Cook Tested-by: Eugene Syromyatnikov Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/entry-common.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index d7dc808a3d15..08a7132f5600 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -282,16 +282,15 @@ __sys_trace: cmp scno, #-1 @ skip the syscall? bne 2b add sp, sp, #S_OFF @ restore stack - b ret_slow_syscall -__sys_trace_return: - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 +__sys_trace_return_nosave: + enable_irq_notrace mov r0, sp bl syscall_trace_exit b ret_slow_syscall -__sys_trace_return_nosave: - enable_irq_notrace +__sys_trace_return: + str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 mov r0, sp bl syscall_trace_exit b ret_slow_syscall From fff32b6c51beefb45a3ef798529552965373f90e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 5 Sep 2018 15:54:01 +0100 Subject: [PATCH 0921/3715] orangefs: rate limit the client not running info message [ Upstream commit 2978d873471005577e7b68a528b4f256a529b030 ] Currently accessing various /sys/fs/orangefs files will spam the kernel log with the following info message when the client is not running: [ 491.489284] sysfs_service_op_show: Client not running :-5: Rate limit this info message to make it less spammy. Signed-off-by: Colin Ian King Signed-off-by: Mike Marshall Signed-off-by: Sasha Levin --- fs/orangefs/orangefs-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 079a465796f3..bc56df2ae705 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -323,7 +323,7 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj, /* Can't do a service_operation if the client is not running... */ rc = is_daemon_in_service(); if (rc) { - pr_info("%s: Client not running :%d:\n", + pr_info_ratelimited("%s: Client not running :%d:\n", __func__, is_daemon_in_service()); goto out; From 216bbe2f33fde45902ecbafd21c1dc935a2cf8d0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 10 Oct 2018 20:39:18 +0200 Subject: [PATCH 0922/3715] pinctrl: gemini: Fix up TVC clock group [ Upstream commit a85c928f6a7856a09e47d9b37faa3407c7ac6a8e ] The previous fix made the TVC clock get muxed in on the D-Link DIR-685 instead of giving nagging warnings of this not working. Not good. We didn't want that, as it breaks video. Create a specific group for the TVC CLK, and break out a specific GPIO group for it on the SL3516 so we can use that line as GPIO if we don't need the TVC CLK. Fixes: d17f477c5bc6 ("pinctrl: gemini: Mask and set properly") Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-gemini.c | 44 ++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-gemini.c b/drivers/pinctrl/pinctrl-gemini.c index 05441dc2519d..78fa26c1a89f 100644 --- a/drivers/pinctrl/pinctrl-gemini.c +++ b/drivers/pinctrl/pinctrl-gemini.c @@ -551,13 +551,16 @@ static const unsigned int tvc_3512_pins[] = { 319, /* TVC_DATA[1] */ 301, /* TVC_DATA[2] */ 283, /* TVC_DATA[3] */ - 265, /* TVC_CLK */ 320, /* TVC_DATA[4] */ 302, /* TVC_DATA[5] */ 284, /* TVC_DATA[6] */ 266, /* TVC_DATA[7] */ }; +static const unsigned int tvc_clk_3512_pins[] = { + 265, /* TVC_CLK */ +}; + /* NAND flash pins */ static const unsigned int nflash_3512_pins[] = { 199, 200, 201, 202, 216, 217, 218, 219, 220, 234, 235, 236, 237, 252, @@ -589,7 +592,7 @@ static const unsigned int pflash_3512_pins_extended[] = { /* Serial flash pins CE0, CE1, DI, DO, CK */ static const unsigned int sflash_3512_pins[] = { 230, 231, 232, 233, 211 }; -/* The GPIO0A (0) pin overlap with TVC and extended parallel flash */ +/* The GPIO0A (0) pin overlap with TVC CLK and extended parallel flash */ static const unsigned int gpio0a_3512_pins[] = { 265 }; /* The GPIO0B (1-4) pins overlap with TVC and ICE */ @@ -772,7 +775,13 @@ static const struct gemini_pin_group gemini_3512_pin_groups[] = { .num_pins = ARRAY_SIZE(tvc_3512_pins), /* Conflict with character LCD and ICE */ .mask = LCD_PADS_ENABLE, - .value = TVC_PADS_ENABLE | TVC_CLK_PAD_ENABLE, + .value = TVC_PADS_ENABLE, + }, + { + .name = "tvcclkgrp", + .pins = tvc_clk_3512_pins, + .num_pins = ARRAY_SIZE(tvc_clk_3512_pins), + .value = TVC_CLK_PAD_ENABLE, }, /* * The construction is done such that it is possible to use a serial @@ -809,8 +818,8 @@ static const struct gemini_pin_group gemini_3512_pin_groups[] = { .name = "gpio0agrp", .pins = gpio0a_3512_pins, .num_pins = ARRAY_SIZE(gpio0a_3512_pins), - /* Conflict with TVC */ - .mask = TVC_PADS_ENABLE, + /* Conflict with TVC CLK */ + .mask = TVC_CLK_PAD_ENABLE, }, { .name = "gpio0bgrp", @@ -1476,13 +1485,16 @@ static const unsigned int tvc_3516_pins[] = { 311, /* TVC_DATA[1] */ 394, /* TVC_DATA[2] */ 374, /* TVC_DATA[3] */ - 333, /* TVC_CLK */ 354, /* TVC_DATA[4] */ 395, /* TVC_DATA[5] */ 312, /* TVC_DATA[6] */ 334, /* TVC_DATA[7] */ }; +static const unsigned int tvc_clk_3516_pins[] = { + 333, /* TVC_CLK */ +}; + /* NAND flash pins */ static const unsigned int nflash_3516_pins[] = { 243, 260, 261, 224, 280, 262, 281, 264, 300, 263, 282, 301, 320, 283, @@ -1515,7 +1527,7 @@ static const unsigned int pflash_3516_pins_extended[] = { static const unsigned int sflash_3516_pins[] = { 296, 338, 295, 359, 339 }; /* The GPIO0A (0-4) pins overlap with TVC and extended parallel flash */ -static const unsigned int gpio0a_3516_pins[] = { 333, 354, 395, 312, 334 }; +static const unsigned int gpio0a_3516_pins[] = { 354, 395, 312, 334 }; /* The GPIO0B (5-7) pins overlap with ICE */ static const unsigned int gpio0b_3516_pins[] = { 375, 396, 376 }; @@ -1547,6 +1559,9 @@ static const unsigned int gpio0j_3516_pins[] = { 359, 339 }; /* The GPIO0K (30,31) pins overlap with NAND flash */ static const unsigned int gpio0k_3516_pins[] = { 275, 298 }; +/* The GPIO0L (0) pins overlap with TVC_CLK */ +static const unsigned int gpio0l_3516_pins[] = { 333 }; + /* The GPIO1A (0-4) pins that overlap with IDE and parallel flash */ static const unsigned int gpio1a_3516_pins[] = { 221, 200, 222, 201, 220 }; @@ -1693,7 +1708,13 @@ static const struct gemini_pin_group gemini_3516_pin_groups[] = { .num_pins = ARRAY_SIZE(tvc_3516_pins), /* Conflict with character LCD */ .mask = LCD_PADS_ENABLE, - .value = TVC_PADS_ENABLE | TVC_CLK_PAD_ENABLE, + .value = TVC_PADS_ENABLE, + }, + { + .name = "tvcclkgrp", + .pins = tvc_clk_3516_pins, + .num_pins = ARRAY_SIZE(tvc_clk_3516_pins), + .value = TVC_CLK_PAD_ENABLE, }, /* * The construction is done such that it is possible to use a serial @@ -1804,6 +1825,13 @@ static const struct gemini_pin_group gemini_3516_pin_groups[] = { /* Conflict with parallel and NAND flash */ .value = PFLASH_PADS_DISABLE | NAND_PADS_DISABLE, }, + { + .name = "gpio0lgrp", + .pins = gpio0l_3516_pins, + .num_pins = ARRAY_SIZE(gpio0l_3516_pins), + /* Conflict with TVE CLK */ + .mask = TVC_CLK_PAD_ENABLE, + }, { .name = "gpio1agrp", .pins = gpio1a_3516_pins, From fdb7d2fc03eac8b19435bc28b020160351903cf6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 21 Sep 2018 12:10:47 +0200 Subject: [PATCH 0923/3715] hwmon: (pwm-fan) Silence error on probe deferral [ Upstream commit 9f67f7583e77fe5dc57aab3a6159c2642544eaad ] Probe deferrals aren't actual errors, so silence the error message in case the PWM cannot yet be acquired. Signed-off-by: Thierry Reding Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/pwm-fan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 6d30bec04f2d..f981da686d7e 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -221,8 +221,12 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm = devm_of_pwm_get(&pdev->dev, pdev->dev.of_node, NULL); if (IS_ERR(ctx->pwm)) { - dev_err(&pdev->dev, "Could not get PWM\n"); - return PTR_ERR(ctx->pwm); + ret = PTR_ERR(ctx->pwm); + + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get PWM: %d\n", ret); + + return ret; } platform_set_drvdata(pdev, ctx); From 31f3c76efc6273c55a59363ad3a37d2b0a5a0b80 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sat, 29 Sep 2018 14:44:06 -0700 Subject: [PATCH 0924/3715] hwmon: (ina3221) Fix INA3221_CONFIG_MODE macros [ Upstream commit 791ebc9d34e9d212fc03742c31654b017d385926 ] The three INA3221_CONFIG_MODE macros are not correctly defined here. The MODE3-1 bits are located at BIT 2-0 according to the datasheet. So this patch just fixes them by shifting all of them with a correct offset. However, this isn't a crital bug fix as the driver does not use any of them at this point. Signed-off-by: Nicolin Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina3221.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index e6b49500c52a..8c9555313fc3 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -38,9 +38,9 @@ #define INA3221_WARN3 0x0c #define INA3221_MASK_ENABLE 0x0f -#define INA3221_CONFIG_MODE_SHUNT BIT(1) -#define INA3221_CONFIG_MODE_BUS BIT(2) -#define INA3221_CONFIG_MODE_CONTINUOUS BIT(3) +#define INA3221_CONFIG_MODE_SHUNT BIT(0) +#define INA3221_CONFIG_MODE_BUS BIT(1) +#define INA3221_CONFIG_MODE_CONTINUOUS BIT(2) #define INA3221_RSHUNT_DEFAULT 10000 From b72443ac86e762f31681861dac3f6b4e58ee7e38 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Oct 2018 20:06:36 +0200 Subject: [PATCH 0925/3715] netfilter: nft_compat: do not dump private area [ Upstream commit d701d8117200399d85e63a737d2e4e897932f3b6 ] Zero pad private area, otherwise we expose private kernel pointer to userspace. This patch also zeroes the tail area after the ->matchsize and ->targetsize that results from XT_ALIGN(). Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7344ec7fff2a..8281656808ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -291,6 +291,24 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(me); } +static int nft_extension_dump_info(struct sk_buff *skb, int attr, + const void *info, + unsigned int size, unsigned int user_size) +{ + unsigned int info_size, aligned_size = XT_ALIGN(size); + struct nlattr *nla; + + nla = nla_reserve(skb, attr, aligned_size); + if (!nla) + return -1; + + info_size = user_size ? : size; + memcpy(nla_data(nla), info, info_size); + memset(nla_data(nla) + info_size, 0, aligned_size - info_size); + + return 0; +} + static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct xt_target *target = expr->ops->data; @@ -298,7 +316,8 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || - nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info)) + nft_extension_dump_info(skb, NFTA_TARGET_INFO, info, + target->targetsize, target->usersize)) goto nla_put_failure; return 0; @@ -534,7 +553,8 @@ static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || - nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info)) + nft_extension_dump_info(skb, NFTA_MATCH_INFO, info, + match->matchsize, match->usersize)) goto nla_put_failure; return 0; From da8f0b1fa0b1329a5b75d460ff12840c26ad310b Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 4 Oct 2018 13:02:53 +0800 Subject: [PATCH 0926/3715] misc: cxl: Fix possible null pointer dereference [ Upstream commit 3dac3583bf1a61db6aaf31dfd752c677a4400afd ] It is not safe to dereference an object before a null test. It is not needed and just remove them. Ftrace can be used instead. Signed-off-by: zhong jiang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/cxl/guest.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 1a64eb185cfd..de2ce5539545 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1028,8 +1028,6 @@ err1: void cxl_guest_remove_afu(struct cxl_afu *afu) { - pr_devel("in %s - AFU(%d)\n", __func__, afu->slice); - if (!afu) return; From cc29bec8bbfb682bd49f7efa9225c50a6c7235fe Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 6 Oct 2018 19:35:03 +0200 Subject: [PATCH 0927/3715] mac80211: minstrel: fix using short preamble CCK rates on HT clients [ Upstream commit 37439f2d6e43ae79e22be9be159f0af157468f82 ] mi->supported[MINSTREL_CCK_GROUP] needs to be updated short preamble rates need to be marked as supported regardless of whether it's currently enabled. Its state can change at any time without a rate_update call. Fixes: 782dda00ab8e ("mac80211: minstrel_ht: move short preamble check out of get_rate") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rc80211_minstrel_ht.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4a5bdad9f303..25cb3e5f8b48 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1132,7 +1132,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; u16 sta_cap = sta->ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; - struct sta_info *sinfo = container_of(sta, struct sta_info, sta); int use_vht; int n_supported = 0; int ack_dur; @@ -1258,8 +1257,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (!n_supported) goto use_legacy; - if (test_sta_flag(sinfo, WLAN_STA_SHORT_PREAMBLE)) - mi->cck_supported_short |= mi->cck_supported_short << 4; + mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); From 21082313c028bb0ff4eae44029e202dbcb81814b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 6 Oct 2018 19:35:04 +0200 Subject: [PATCH 0928/3715] mac80211: minstrel: fix CCK rate group streams value [ Upstream commit 80df9be67c44cb636bbc92caeddad8caf334c53c ] Fixes a harmless underflow issue when CCK rates are actively being used Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 25cb3e5f8b48..bc97d31907f6 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -129,7 +129,7 @@ #define CCK_GROUP \ [MINSTREL_CCK_GROUP] = { \ - .streams = 0, \ + .streams = 1, \ .flags = 0, \ .duration = { \ CCK_DURATION_LIST(false), \ From 28b6a09fbf23d2616af88b13b5654e89bda74777 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 6 Oct 2018 19:35:05 +0200 Subject: [PATCH 0929/3715] mac80211: minstrel: fix sampling/reporting of CCK rates in HT mode [ Upstream commit 972b66b86f85f4e8201db454f4c3e9d990cf9836 ] Long/short preamble selection cannot be sampled separately, since it depends on the BSS state. Because of that, sampling attempts to currently not used preamble modes are not counted in the statistics, which leads to CCK rates being sampled too often. Fix statistics accounting for long/short preamble by increasing the index where necessary. Fix excessive CCK rate sampling by dropping unsupported sample attempts. This improves throughput on 2.4 GHz channels Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rc80211_minstrel_ht.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index bc97d31907f6..e57811e4b91f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -282,7 +282,8 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, break; /* short preamble */ - if (!(mi->supported[group] & BIT(idx))) + if ((mi->supported[group] & BIT(idx + 4)) && + (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; } return &mi->groups[group].rates[idx]; @@ -1077,18 +1078,23 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return; sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; + sample_idx %= MCS_GROUP_RATES; + + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && + (sample_idx >= 4) != txrc->short_preamble) + return; + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; - if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES, sample_group->streams); } else { - rate->idx = sample_idx % MCS_GROUP_RATES + - (sample_group->streams - 1) * 8; + rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; From 74c0a15d28d1320bb8bfc88213579e45963ab665 Mon Sep 17 00:00:00 2001 From: Huibin Hong Date: Wed, 10 Oct 2018 11:00:32 +0200 Subject: [PATCH 0930/3715] spi: rockchip: initialize dma_slave_config properly [ Upstream commit dd8fd2cbc73f8650f651da71fc61a6e4f30c1566 ] The rxconf and txconf structs are allocated on the stack, so make sure we zero them before filling out the relevant fields. Signed-off-by: Huibin Hong Signed-off-by: Emil Renner Berthing Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-rockchip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index fdcf3076681b..185bbdce62b1 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -445,6 +445,9 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs) struct dma_slave_config rxconf, txconf; struct dma_async_tx_descriptor *rxdesc, *txdesc; + memset(&rxconf, 0, sizeof(rxconf)); + memset(&txconf, 0, sizeof(txconf)); + spin_lock_irqsave(&rs->lock, flags); rs->state &= ~RXBUSY; rs->state &= ~TXBUSY; From 75f10bdb5595455425d6c5a0381379d89967c40b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 11 Oct 2018 07:47:52 +0000 Subject: [PATCH 0931/3715] mlxsw: spectrum_switchdev: Check notification relevance based on upper device [ Upstream commit 5050f6ae253ad1307af3486c26fc4f94287078b7 ] VxLAN FDB updates are sent with the VxLAN device which is not our upper and will therefore be ignored by current code. Solve this by checking whether the upper device (bridge) is our upper. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 8a1788108f52..698de51b3fef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1939,8 +1939,15 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct mlxsw_sp_switchdev_event_work *switchdev_work; struct switchdev_notifier_fdb_info *fdb_info = ptr; + struct net_device *br_dev; - if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + /* Tunnel devices are not our uppers, so check their master instead */ + br_dev = netdev_master_upper_dev_get_rcu(dev); + if (!br_dev) + return NOTIFY_DONE; + if (!netif_is_bridge_master(br_dev)) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev)) return NOTIFY_DONE; switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); From 1dee3a3efdb877419639f3cafb1f91cfcf9c11ab Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Dec 2018 19:27:44 +0200 Subject: [PATCH 0932/3715] ARM: dts: omap5: Fix dual-role mode on Super-Speed port [ Upstream commit a763ecc15d0e37c3a15ff6825183061209832685 ] OMAP5's Super-Speed USB port has a software mailbox register that needs to be fed with VBUS and ID events from an external VBUS/ID comparator. Without this, Host role will not work correctly. Fixes: 656c1a65ab55 ("ARM: dts: omap5: enable OTG role for DWC3 controller") Reported-by: H. Nikolaus Schaller Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap5-board-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index f65343f8e1d6..c58f14de0145 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -695,6 +695,7 @@ }; &dwc3 { + extcon = <&extcon_usb3>; dr_mode = "otg"; }; From d09d148cad42abf45addbf6f1d39733e2993d899 Mon Sep 17 00:00:00 2001 From: Alan Mikhak Date: Thu, 23 May 2019 14:18:00 -0700 Subject: [PATCH 0933/3715] tools: PCI: Fix broken pcitest compilation [ Upstream commit 8a5e0af240e07dd3d4897eb8ff52aab757da7fab ] pcitest is currently broken due to the following compiler error and related warning. Fix by changing the run_test() function signature to return an integer result. pcitest.c: In function run_test: pcitest.c:143:9: warning: return with a value, in function returning void return (ret < 0) ? ret : 1 - ret; /* return 0 if test succeeded */ pcitest.c: In function main: pcitest.c:232:9: error: void value not ignored as it ought to be return run_test(test); Fixes: fef31ecaaf2c ("tools: PCI: Fix compilation warnings") Signed-off-by: Alan Mikhak Signed-off-by: Lorenzo Pieralisi Reviewed-by: Paul Walmsley Signed-off-by: Sasha Levin --- tools/pci/pcitest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 8ca1c62bc06d..7002df55826f 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -42,15 +42,15 @@ struct pci_test { unsigned long size; }; -static void run_test(struct pci_test *test) +static int run_test(struct pci_test *test) { - long ret; + int ret = -EINVAL; int fd; fd = open(test->device, O_RDWR); if (fd < 0) { perror("can't open PCI Endpoint Test device"); - return; + return -ENODEV; } if (test->barnum >= 0 && test->barnum <= 5) { From cd3cac5198777cabf5ab6a8226148431e42a0ddf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 17 Oct 2018 23:39:41 +1100 Subject: [PATCH 0934/3715] powerpc/time: Fix clockevent_decrementer initalisation for PR KVM [ Upstream commit b4d16ab58c41ff0125822464bdff074cebd0fe47 ] In the recent commit 8b78fdb045de ("powerpc/time: Use clockevents_register_device(), fixing an issue with large decrementer") we changed the way we initialise the decrementer clockevent(s). We no longer initialise the mult & shift values of decrementer_clockevent itself. This has the effect of breaking PR KVM, because it uses those values in kvmppc_emulate_dec(). The symptom is guest kernels spin forever mid-way through boot. For now fix it by assigning back to decrementer_clockevent the mult and shift values. Fixes: 8b78fdb045de ("powerpc/time: Use clockevents_register_device(), fixing an issue with large decrementer") Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/time.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 870e75d30459..7c7c5a16284d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -988,6 +988,10 @@ static void register_decrementer_clockevent(int cpu) printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); + + /* Set values for KVM, see kvm_emulate_dec() */ + decrementer_clockevent.mult = dec->mult; + decrementer_clockevent.shift = dec->shift; } static void enable_large_decrementer(void) From ee2df37dd9a392260387c6d392d053c8f0538c0f Mon Sep 17 00:00:00 2001 From: Takeshi Saito Date: Wed, 15 May 2019 20:23:46 +0200 Subject: [PATCH 0935/3715] mmc: tmio: fix SCC error handling to avoid false positive CRC error [ Upstream commit 51b72656bb39fdcb8f3174f4007bcc83ad1d275f ] If an SCC error occurs during a read/write command execution, a false positive CRC error message is output. mmcblk0: response CRC error sending r/w cmd command, card status 0x900 check_scc_error() checks SCC_RVSREQ.RVSERR bit. RVSERR detects a correction error in the next (up or down) delay tap position. However, since the command is successful, only retuning needs to be executed. This has been confirmed by HW engineers. Thus, on SCC error, set retuning flag instead of setting an error code. Fixes: b85fb0a1c8ae ("mmc: tmio: Fix SCC error detection") Signed-off-by: Takeshi Saito [wsa: updated comment and commit message, removed some braces] Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Reviewed-by: Yoshihiro Shimoda Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/tmio_mmc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 01e51b794575..2fd862dc9770 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -914,8 +914,9 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) if (mrq->cmd->error || (mrq->data && mrq->data->error)) tmio_mmc_abort_dma(host); + /* SCC error means retune, but executed command was still successful */ if (host->check_scc_error && host->check_scc_error(host)) - mrq->cmd->error = -EILSEQ; + mmc_retune_needed(host->mmc); /* If SET_BLOCK_COUNT, continue with main command */ if (host->mrq && !mrq->cmd->error) { From 43598c571e7ed29e4c81e35b4a870fe6b9f8d58e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 24 Nov 2019 08:23:35 +0100 Subject: [PATCH 0936/3715] Linux 4.14.156 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1f427c8bcc56..b1db48ad832e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 155 +SUBLEVEL = 156 EXTRAVERSION = NAME = Petit Gorille From 714ada7cabc7c07f8ec20063895e80364f0ac952 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Mon, 25 Nov 2019 16:07:29 -0800 Subject: [PATCH 0937/3715] ANDROID: removed CONFIG_PM_WAKELOCKS Change-Id: Ie61dae6654cd677a53a733a478384143d736f746 Signed-off-by: Ram Muthiah Bug: 143710295 Bug: 115946999 Test: Treehugger --- arch/arm64/configs/cuttlefish_defconfig | 3 --- arch/x86/configs/x86_64_cuttlefish_defconfig | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 66b476d89814..02051840d72b 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -63,9 +63,6 @@ CONFIG_RANDOMIZE_BASE=y # CONFIG_EFI is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y -CONFIG_PM_WAKELOCKS=y -CONFIG_PM_WAKELOCKS_LIMIT=0 -# CONFIG_PM_WAKELOCKS_GC is not set CONFIG_PM_DEBUG=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 625142b2f8bc..55a88517c3a8 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -65,9 +65,6 @@ CONFIG_PHYSICAL_START=0x200000 CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 reboot=p" -CONFIG_PM_WAKELOCKS=y -CONFIG_PM_WAKELOCKS_LIMIT=0 -# CONFIG_PM_WAKELOCKS_GC is not set CONFIG_PM_DEBUG=y CONFIG_ACPI_PROCFS_POWER=y # CONFIG_ACPI_FAN is not set From 08265ef6179e82ca70d5712223d568f725f371fb Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 15 Nov 2019 12:12:25 -0800 Subject: [PATCH 0938/3715] net/mlx4_en: fix mlx4 ethtool -N insertion [ Upstream commit 34e59836565e36fade1464e054a3551c1a0364be ] ethtool expects ETHTOOL_GRXCLSRLALL to set ethtool_rxnfc->data with the total number of entries in the rx classifier table. Surprisingly, mlx4 is missing this part (in principle ethtool could still move forward and try the insert). Tested: compiled and run command: phh13:~# ethtool -N eth1 flow-type udp4 queue 4 Added rule with ID 255 Signed-off-by: Luigi Rizzo Reviewed-by: Tariq Toukan Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d631cd94ee63..25a15bdc125e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1722,6 +1722,7 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); break; case ETHTOOL_GRXCLSRLALL: + cmd->data = MAX_NUM_OF_FS_RULES; while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { err = mlx4_en_get_flow(dev, cmd, i); if (!err) From 9ed49fc95f37a457d940324c033c20d85cefb930 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Nov 2019 15:34:38 +0300 Subject: [PATCH 0939/3715] net: rtnetlink: prevent underflows in do_setvfinfo() [ Upstream commit d658c8f56ec7b3de8051a24afb25da9ba3c388c5 ] The "ivm->vf" variable is a u32, but the problem is that a number of drivers cast it to an int and then forget to check for negatives. An example of this is in the cxgb4 driver. drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 2890 static int cxgb4_mgmt_get_vf_config(struct net_device *dev, 2891 int vf, struct ifla_vf_info *ivi) ^^^^^^ 2892 { 2893 struct port_info *pi = netdev_priv(dev); 2894 struct adapter *adap = pi->adapter; 2895 struct vf_info *vfinfo; 2896 2897 if (vf >= adap->num_vfs) ^^^^^^^^^^^^^^^^^^^ 2898 return -EINVAL; 2899 vfinfo = &adap->vfinfo[vf]; ^^^^^^^^^^^^^^^^^^^^^^^^^^ There are 48 functions affected. drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c:8435 hclge_set_vf_vlan_filter() warn: can 'vfid' underflow 's32min-2147483646' drivers/net/ethernet/freescale/enetc/enetc_pf.c:377 enetc_pf_set_vf_mac() warn: can 'vf' underflow 's32min-2147483646' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2899 cxgb4_mgmt_get_vf_config() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2960 cxgb4_mgmt_set_vf_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3019 cxgb4_mgmt_set_vf_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3038 cxgb4_mgmt_set_vf_vlan() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3086 cxgb4_mgmt_set_vf_link_state() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb/cxgb2.c:791 get_eeprom() warn: can 'i' underflow 's32min-(-4),0,4-s32max' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:82 bnxt_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:164 bnxt_set_vf_trust() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:186 bnxt_get_vf_config() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:228 bnxt_set_vf_mac() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:264 bnxt_set_vf_vlan() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:293 bnxt_set_vf_bw() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:333 bnxt_set_vf_link_state() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:2595 bnx2x_vf_op_prep() warn: can 'vfidx' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:2595 bnx2x_vf_op_prep() warn: can 'vfidx' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2281 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2285 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2286 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2292 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2297 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1832 qlcnic_sriov_set_vf_mac() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1864 qlcnic_sriov_set_vf_tx_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1937 qlcnic_sriov_set_vf_vlan() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:2005 qlcnic_sriov_get_vf_config() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:2036 qlcnic_sriov_set_vf_spoofchk() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/emulex/benet/be_main.c:1914 be_get_vf_config() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1915 be_get_vf_config() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1922 be_set_vf_tvt() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1951 be_clear_vf_tvt() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:2063 be_set_vf_tx_rate() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:2091 be_set_vf_link_state() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:2609 ice_set_vf_port_vlan() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3050 ice_get_vf_cfg() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3103 ice_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3181 ice_set_vf_mac() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3237 ice_set_vf_trust() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3286 ice_set_vf_link_state() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:3919 i40e_validate_vf() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:3957 i40e_ndo_set_vf_mac() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4104 i40e_ndo_set_vf_port_vlan() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4263 i40e_ndo_set_vf_bw() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4309 i40e_ndo_get_vf_config() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4371 i40e_ndo_set_vf_link_state() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4441 i40e_ndo_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4441 i40e_ndo_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4504 i40e_ndo_set_vf_trust() warn: can 'vf_id' underflow 's32min-2147483646' Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 925af6b43017..b598e9909fec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1767,6 +1767,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + if (ivm->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, @@ -1778,6 +1780,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + if (ivv->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, @@ -1810,6 +1814,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (len == 0) return -EINVAL; + if (ivvl[0]->vf >= INT_MAX) + return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) @@ -1820,6 +1826,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); @@ -1838,6 +1846,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_rate) err = ops->ndo_set_vf_rate(dev, ivt->vf, @@ -1850,6 +1860,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + if (ivs->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, @@ -1861,6 +1873,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + if (ivl->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, @@ -1874,6 +1888,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ivrssq_en->vf >= INT_MAX) + return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); @@ -1884,6 +1900,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); @@ -1894,15 +1912,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; - return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; From 561f9a0fb445f23543cff7eaa0ad38f363362f9c Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Thu, 21 Nov 2019 17:52:15 +0000 Subject: [PATCH 0940/3715] sfc: Only cancel the PPS workqueue if it exists [ Upstream commit 723eb53690041740a13ac78efeaf6804f5d684c9 ] The workqueue only exists for the primary PF. For other functions we hit a WARN_ON in kernel/workqueue.c. Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP") Signed-off-by: Martin Habets Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ptp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..f22690792697 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1320,7 +1320,8 @@ void efx_ptp_remove(struct efx_nic *efx) (void)efx_ptp_disable(efx); cancel_work_sync(&efx->ptp_data->work); - cancel_work_sync(&efx->ptp_data->pps_work); + if (efx->ptp_data->pps_workwq) + cancel_work_sync(&efx->ptp_data->pps_work); skb_queue_purge(&efx->ptp_data->rxq); skb_queue_purge(&efx->ptp_data->txq); From 058fcda9e025ca3463645688e9091159d0926993 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 13 Nov 2019 14:42:00 +0200 Subject: [PATCH 0941/3715] net/mlx5e: Fix set vf link state error flow [ Upstream commit 751021218f7e66ee9bbaa2be23056e447cd75ec4 ] Before this commit the ndo always returned success. Fix that. Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore") Signed-off-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 090d54275a7d..387758fc6be4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1783,7 +1783,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, unlock: mutex_unlock(&esw->state_lock); - return 0; + return err; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, From a87cd32dd7007739681b6677ba70bef00b88a872 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 17 Nov 2019 10:18:59 +0200 Subject: [PATCH 0942/3715] net/mlxfw: Verify FSM error code translation doesn't exceed array size [ Upstream commit 30e9e0550bf693c94bc15827781fe42dd60be634 ] Array mlxfw_fsm_state_err_str contains value to string translation, when values are provided by mlxfw_dev. If value is larger than MLXFW_FSM_STATE_ERR_MAX, return "unknown error" as expected instead of reading an address than exceed array size. Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process") Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 2cf89126fb23..d765e7a69d6b 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -86,6 +86,8 @@ retry: return err; if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { + fsm_state_err = min_t(enum mlxfw_fsm_state_err, + fsm_state_err, MLXFW_FSM_STATE_ERR_MAX); pr_err("Firmware flash failed: %s\n", mlxfw_fsm_state_err_str[fsm_state_err]); return -EINVAL; From faacb24993b5505eaa60a1607aa6d16497568188 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 19 Nov 2019 23:47:33 +0100 Subject: [PATCH 0943/3715] net/sched: act_pedit: fix WARN() in the traffic path [ Upstream commit f67169fef8dbcc1ac6a6a109ecaad0d3b259002c ] when configuring act_pedit rules, the number of keys is validated only on addition of a new entry. This is not sufficient to avoid hitting a WARN() in the traffic path: for example, it is possible to replace a valid entry with a new one having 0 extended keys, thus causing splats in dmesg like: pedit BUG: index 42 WARNING: CPU: 2 PID: 4054 at net/sched/act_pedit.c:410 tcf_pedit_act+0xc84/0x1200 [act_pedit] [...] RIP: 0010:tcf_pedit_act+0xc84/0x1200 [act_pedit] Code: 89 fa 48 c1 ea 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ac 00 00 00 48 8b 44 24 10 48 c7 c7 a0 c4 e4 c0 8b 70 18 e8 1c 30 95 ea <0f> 0b e9 a0 fa ff ff e8 00 03 f5 ea e9 14 f4 ff ff 48 89 58 40 e9 RSP: 0018:ffff888077c9f320 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffffac2983a2 RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff888053927bec RBP: dffffc0000000000 R08: ffffed100a726209 R09: ffffed100a726209 R10: 0000000000000001 R11: ffffed100a726208 R12: ffff88804beea780 R13: ffff888079a77400 R14: ffff88804beea780 R15: ffff888027ab2000 FS: 00007fdeec9bd740(0000) GS:ffff888053900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffdb3dfd000 CR3: 000000004adb4006 CR4: 00000000001606e0 Call Trace: tcf_action_exec+0x105/0x3f0 tcf_classify+0xf2/0x410 __dev_queue_xmit+0xcbf/0x2ae0 ip_finish_output2+0x711/0x1fb0 ip_output+0x1bf/0x4b0 ip_send_skb+0x37/0xa0 raw_sendmsg+0x180c/0x2430 sock_sendmsg+0xdb/0x110 __sys_sendto+0x257/0x2b0 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0xa5/0x4e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fdeeb72e993 Code: 48 8b 0d e0 74 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 0d d6 2c 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 4b cc 00 00 48 89 04 24 RSP: 002b:00007ffdb3de8a18 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055c81972b700 RCX: 00007fdeeb72e993 RDX: 0000000000000040 RSI: 000055c81972b700 RDI: 0000000000000003 RBP: 00007ffdb3dea130 R08: 000055c819728510 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 000055c81972b6c0 R14: 000055c81972969c R15: 0000000000000080 Fix this moving the check on 'nkeys' earlier in tcf_pedit_init(), so that attempts to install rules having 0 keys are always rejected with -EINVAL. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_pedit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b6f6bfad8b2a..fb0caa500ac8 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -46,7 +46,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, int err = -EINVAL; int rem; - if (!nla || !n) + if (!nla) return NULL; keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); @@ -163,6 +163,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(pattr); + if (!parm->nkeys) + return -EINVAL; + ksize = parm->nkeys * sizeof(struct tc_pedit_key); if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; @@ -172,8 +175,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return PTR_ERR(keys_ex); if (!tcf_idr_check(tn, parm->index, a, bind)) { - if (!parm->nkeys) - return -EINVAL; ret = tcf_idr_create(tn, parm->index, est, a, &act_pedit_ops, bind, false); if (ret) From 4e846d399cde242861c0705a702d95a0e707d4fc Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 30 Jul 2019 17:43:33 +0200 Subject: [PATCH 0944/3715] vhost/vsock: split packets to send using multiple buffers commit 6dbd3e66e7785a2f055bf84d98de9b8fd31ff3f5 upstream. If the packets to sent to the guest are bigger than the buffer available, we can split them, using multiple buffers and fixing the length in the packet header. This is safe since virtio-vsock supports only stream sockets. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 66 ++++++++++++++++++------- net/vmw_vsock/virtio_transport_common.c | 15 ++++-- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 5b9db5deffbb..491de830b8d9 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -103,7 +103,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct iov_iter iov_iter; unsigned out, in; size_t nbytes; - size_t len; + size_t iov_len, payload_len; int head; spin_lock_bh(&vsock->send_pkt_list_lock); @@ -148,8 +148,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + iov_len = iov_length(&vq->iov[out], in); + if (iov_len < sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Buffer len [%zu] too small\n", iov_len); + break; + } + + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len); + payload_len = pkt->len - pkt->off; + + /* If the packet is greater than the space available in the + * buffer, we split it using multiple buffers. + */ + if (payload_len > iov_len - sizeof(pkt->hdr)) + payload_len = iov_len - sizeof(pkt->hdr); + + /* Set the correct length in the header */ + pkt->hdr.len = cpu_to_le32(payload_len); nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); if (nbytes != sizeof(pkt->hdr)) { @@ -158,33 +174,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { + nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len, + &iov_iter); + if (nbytes != payload_len) { virtio_transport_free_pkt(pkt); vq_err(vq, "Faulted on copying pkt buf\n"); break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); added = true; - if (pkt->reply) { - int val; - - val = atomic_dec_return(&vsock->queued_replies); - - /* Do we have resources to resume tx processing? */ - if (val + 1 == tx_vq->num) - restart_tx = true; - } - /* Deliver to monitoring devices all correctly transmitted * packets. */ virtio_transport_deliver_tap_pkt(pkt); - total_len += pkt->len; - virtio_transport_free_pkt(pkt); + pkt->off += payload_len; + total_len += payload_len; + + /* If we didn't send all the payload we can requeue the packet + * to send it with the next available buffer. + */ + if (pkt->off < pkt->len) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + } else { + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx + * processing? + */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a8eb0657c1e8..d20f43057323 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -92,8 +92,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -133,8 +142,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; From c3736f400667bc9384a805d22ca290a3c925b886 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Nov 2019 17:07:46 +0100 Subject: [PATCH 0945/3715] gpio: max77620: Fixup debounce delays commit b0391479ae04dfcbd208b9571c375064caad9a57 upstream. When converting milliseconds to microseconds in commit fffa6af94894 ("gpio: max77620: Use correct unit for debounce times") some ~1 ms gaps were introduced between the various ranges supported by the controller. Fix this by changing the start of each range to the value immediately following the end of the previous range. This way a debounce time of, say 8250 us will translate into 16 ms instead of returning an -EINVAL error. Typically the debounce delay is only ever set through device tree and specified in milliseconds, so we can never really hit this issue because debounce times are always a multiple of 1000 us. The only notable exception for this is drivers/mmc/host/mmc-spi.c where the CD GPIO is requested, which passes a 1 us debounce time. According to a comment preceeding that code this should actually be 1 ms (i.e. 1000 us). Reported-by: Pavel Machek Signed-off-by: Thierry Reding Acked-by: Pavel Machek Cc: Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-max77620.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index ac6c1c0548b6..78254ed93206 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -163,13 +163,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio, case 0: val = MAX77620_CNFG_GPIO_DBNC_None; break; - case 1000 ... 8000: + case 1 ... 8000: val = MAX77620_CNFG_GPIO_DBNC_8ms; break; - case 9000 ... 16000: + case 8001 ... 16000: val = MAX77620_CNFG_GPIO_DBNC_16ms; break; - case 17000 ... 32000: + case 16001 ... 32000: val = MAX77620_CNFG_GPIO_DBNC_32ms; break; default: From f71e52cb3270653090cda296b2da02c010aea790 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 12 Nov 2019 17:10:26 -0500 Subject: [PATCH 0946/3715] tools: gpio: Correctly add make dependencies for gpio_utils commit 0161a94e2d1c713bd34d72bc0239d87c31747bf7 upstream. gpio tools fail to build correctly with make parallelization: $ make -s -j24 ld: gpio-utils.o: file not recognized: file truncated make[1]: *** [/home/labbott/linux_upstream/tools/build/Makefile.build:145: lsgpio-in.o] Error 1 make: *** [Makefile:43: lsgpio-in.o] Error 2 make: *** Waiting for unfinished jobs.... This is because gpio-utils.o is used across multiple targets. Fix this by making gpio-utios.o a proper dependency. Cc: Signed-off-by: Laura Abbott Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- tools/gpio/Build | 1 + tools/gpio/Makefile | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/gpio/Build b/tools/gpio/Build index 620c1937d957..4141f35837db 100644 --- a/tools/gpio/Build +++ b/tools/gpio/Build @@ -1,3 +1,4 @@ +gpio-utils-y += gpio-utils.o lsgpio-y += lsgpio.o gpio-utils.o gpio-hammer-y += gpio-hammer.o gpio-utils.o gpio-event-mon-y += gpio-event-mon.o gpio-utils.o diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index f8bc8656a544..6a73c06e069c 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -35,11 +35,15 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h prepare: $(OUTPUT)include/linux/gpio.h +GPIO_UTILS_IN := $(output)gpio-utils-in.o +$(GPIO_UTILS_IN): prepare FORCE + $(Q)$(MAKE) $(build)=gpio-utils + # # lsgpio # LSGPIO_IN := $(OUTPUT)lsgpio-in.o -$(LSGPIO_IN): prepare FORCE +$(LSGPIO_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=lsgpio $(OUTPUT)lsgpio: $(LSGPIO_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ @@ -48,7 +52,7 @@ $(OUTPUT)lsgpio: $(LSGPIO_IN) # gpio-hammer # GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o -$(GPIO_HAMMER_IN): prepare FORCE +$(GPIO_HAMMER_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=gpio-hammer $(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ @@ -57,7 +61,7 @@ $(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN) # gpio-event-mon # GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o -$(GPIO_EVENT_MON_IN): prepare FORCE +$(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=gpio-event-mon $(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ From 97ffb770da162ef71cfc34f9ce4b9542fd2134e1 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Tue, 19 Nov 2019 14:09:11 +0800 Subject: [PATCH 0947/3715] nbd:fix memory leak in nbd_get_socket() commit dff10bbea4be47bdb615b036c834a275b7c68133 upstream. Before returning NULL, put the sock first. Cc: stable@vger.kernel.org Fixes: cf1b2326b734 ("nbd: verify socket is supported during setup") Reviewed-by: Josef Bacik Reviewed-by: Mike Christie Signed-off-by: Sun Ke Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 34dfadd4dcd4..a609ce1d468b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -931,6 +931,7 @@ static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, if (sock->ops->shutdown == sock_no_shutdown) { dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); *err = -EINVAL; + sockfd_put(sock); return NULL; } From b4c640499287ae5c19ec32998d9401165ee149f3 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 14 Nov 2019 13:25:48 +0100 Subject: [PATCH 0948/3715] virtio_console: allocate inbufs in add_port() only if it is needed commit d791cfcbf98191122af70b053a21075cb450d119 upstream. When we hot unplug a virtserialport and then try to hot plug again, it fails: (qemu) chardev-add socket,id=serial0,path=/tmp/serial0,server,nowait (qemu) device_add virtserialport,bus=virtio-serial0.0,nr=2,\ chardev=serial0,id=serial0,name=serial0 (qemu) device_del serial0 (qemu) device_add virtserialport,bus=virtio-serial0.0,nr=2,\ chardev=serial0,id=serial0,name=serial0 kernel error: virtio-ports vport2p2: Error allocating inbufs qemu error: virtio-serial-bus: Guest failure in adding port 2 for device \ virtio-serial0.0 This happens because buffers for the in_vq are allocated when the port is added but are not released when the port is unplugged. They are only released when virtconsole is removed (see a7a69ec0d8e4) To avoid the problem and to be symmetric, we could allocate all the buffers in init_vqs() as they are released in remove_vqs(), but it sounds like a waste of memory. Rather than that, this patch changes add_port() logic to ignore ENOSPC error in fill_queue(), which means queue has already been filled. Fixes: a7a69ec0d8e4 ("virtio_console: free buffers after reset") Cc: mst@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Laurent Vivier Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 65454acd4b97..5200772ab0bd 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1366,24 +1366,24 @@ static void set_console_size(struct port *port, u16 rows, u16 cols) port->cons.ws.ws_col = cols; } -static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) +static int fill_queue(struct virtqueue *vq, spinlock_t *lock) { struct port_buffer *buf; - unsigned int nr_added_bufs; + int nr_added_bufs; int ret; nr_added_bufs = 0; do { buf = alloc_buf(vq->vdev, PAGE_SIZE, 0); if (!buf) - break; + return -ENOMEM; spin_lock_irq(lock); ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); free_buf(buf, true); - break; + return ret; } nr_added_bufs++; spin_unlock_irq(lock); @@ -1403,7 +1403,6 @@ static int add_port(struct ports_device *portdev, u32 id) char debugfs_name[16]; struct port *port; dev_t devt; - unsigned int nr_added_bufs; int err; port = kmalloc(sizeof(*port), GFP_KERNEL); @@ -1462,11 +1461,13 @@ static int add_port(struct ports_device *portdev, u32 id) spin_lock_init(&port->outvq_lock); init_waitqueue_head(&port->waitqueue); - /* Fill the in_vq with buffers so the host can send us data. */ - nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock); - if (!nr_added_bufs) { + /* We can safely ignore ENOSPC because it means + * the queue already has buffers. Buffers are removed + * only by virtcons_remove(), not by unplug_port() + */ + err = fill_queue(port->in_vq, &port->inbuf_lock); + if (err < 0 && err != -ENOSPC) { dev_err(port->dev, "Error allocating inbufs\n"); - err = -ENOMEM; goto free_device; } @@ -2099,14 +2100,11 @@ static int virtcons_probe(struct virtio_device *vdev) INIT_WORK(&portdev->control_work, &control_work_handler); if (multiport) { - unsigned int nr_added_bufs; - spin_lock_init(&portdev->c_ivq_lock); spin_lock_init(&portdev->c_ovq_lock); - nr_added_bufs = fill_queue(portdev->c_ivq, - &portdev->c_ivq_lock); - if (!nr_added_bufs) { + err = fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); + if (err < 0) { dev_err(&vdev->dev, "Error allocating buffers for control queue\n"); /* @@ -2117,7 +2115,7 @@ static int virtcons_probe(struct virtio_device *vdev) VIRTIO_CONSOLE_DEVICE_READY, 0); /* Device was functional: we need full cleanup. */ virtcons_remove(vdev); - return -ENOMEM; + return err; } } else { /* From 151ec39119af4aa53c9572f3092f4e390f603aaa Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 21 Nov 2019 17:53:52 -0800 Subject: [PATCH 0949/3715] Revert "fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()" commit 94b07b6f9e2e996afff7395de6b35f34f4cb10bf upstream. This reverts commit 56e94ea132bb5c2c1d0b60a6aeb34dcb7d71a53d. Commit 56e94ea132bb ("fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()") introduces a regression that fail to create directory with mount option user_xattr and acl. Actually the reported NULL pointer dereference case can be correctly handled by loc->xl_ops->xlo_add_entry(), so revert it. Link: http://lkml.kernel.org/r/1573624916-83825-1-git-send-email-joseph.qi@linux.alibaba.com Fixes: 56e94ea132bb ("fs: ocfs2: fix possible null-pointer dereferences in ocfs2_xa_prepare_entry()") Signed-off-by: Joseph Qi Reported-by: Thomas Voegtle Acked-by: Changwei Ge Cc: Jia-Ju Bai Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/xattr.c | 58 ++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index eca49da6d7e0..77740ef5a8e8 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1497,6 +1497,18 @@ static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, return loc->xl_ops->xlo_check_space(loc, xi); } +static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) +{ + loc->xl_ops->xlo_add_entry(loc, name_hash); + loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); + /* + * We can't leave the new entry's xe_name_offset at zero or + * add_namevalue() will go nuts. We set it to the size of our + * storage so that it can never be less than any other entry. + */ + loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); +} + static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, struct ocfs2_xattr_info *xi) { @@ -2128,31 +2140,29 @@ static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, if (rc) goto out; - if (!loc->xl_entry) { - rc = -EINVAL; - goto out; - } - - if (ocfs2_xa_can_reuse_entry(loc, xi)) { - orig_value_size = loc->xl_entry->xe_value_size; - rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); - if (rc) - goto out; - goto alloc_value; - } - - if (!ocfs2_xattr_is_local(loc->xl_entry)) { - orig_clusters = ocfs2_xa_value_clusters(loc); - rc = ocfs2_xa_value_truncate(loc, 0, ctxt); - if (rc) { - mlog_errno(rc); - ocfs2_xa_cleanup_value_truncate(loc, - "overwriting", - orig_clusters); - goto out; + if (loc->xl_entry) { + if (ocfs2_xa_can_reuse_entry(loc, xi)) { + orig_value_size = loc->xl_entry->xe_value_size; + rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); + if (rc) + goto out; + goto alloc_value; } - } - ocfs2_xa_wipe_namevalue(loc); + + if (!ocfs2_xattr_is_local(loc->xl_entry)) { + orig_clusters = ocfs2_xa_value_clusters(loc); + rc = ocfs2_xa_value_truncate(loc, 0, ctxt); + if (rc) { + mlog_errno(rc); + ocfs2_xa_cleanup_value_truncate(loc, + "overwriting", + orig_clusters); + goto out; + } + } + ocfs2_xa_wipe_namevalue(loc); + } else + ocfs2_xa_add_entry(loc, name_hash); /* * If we get here, we have a blank entry. Fill it. We grow our From 7a9c2bb3838427f780034e1f498b44d6bd2e8eda Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 21 Nov 2019 17:54:01 -0800 Subject: [PATCH 0950/3715] mm/ksm.c: don't WARN if page is still mapped in remove_stable_node() commit 9a63236f1ad82d71a98aa80320b6cb618fb32f44 upstream. It's possible to hit the WARN_ON_ONCE(page_mapped(page)) in remove_stable_node() when it races with __mmput() and squeezes in between ksm_exit() and exit_mmap(). WARNING: CPU: 0 PID: 3295 at mm/ksm.c:888 remove_stable_node+0x10c/0x150 Call Trace: remove_all_stable_nodes+0x12b/0x330 run_store+0x4ef/0x7b0 kernfs_fop_write+0x200/0x420 vfs_write+0x154/0x450 ksys_write+0xf9/0x1d0 do_syscall_64+0x99/0x510 entry_SYSCALL_64_after_hwframe+0x49/0xbe Remove the warning as there is nothing scary going on. Link: http://lkml.kernel.org/r/20191119131850.5675-1-aryabinin@virtuozzo.com Fixes: cbf86cfe04a6 ("ksm: remove old stable nodes more thoroughly") Signed-off-by: Andrey Ryabinin Acked-by: Hugh Dickins Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/ksm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index f50cc573815f..764486ffcd16 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -849,13 +849,13 @@ static int remove_stable_node(struct stable_node *stable_node) return 0; } - if (WARN_ON_ONCE(page_mapped(page))) { - /* - * This should not happen: but if it does, just refuse to let - * merge_across_nodes be switched - there is no need to panic. - */ - err = -EBUSY; - } else { + /* + * Page could be still mapped if this races with __mmput() running in + * between ksm_exit() and exit_mmap(). Just refuse to let + * merge_across_nodes/max_page_sharing be switched. + */ + err = -EBUSY; + if (!page_mapped(page)) { /* * The stable node did not yet appear stale to get_ksm_page(), * since that allows for an unmapped ksm page to be recognized From 1d8849b9dd33364f71e7bceb06f73cb4ca808496 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Nov 2019 13:32:03 +0000 Subject: [PATCH 0951/3715] drm/i915/userptr: Try to acquire the page lock around set_page_dirty() commit 2d691aeca4aecbb8d0414a777a46981a8e142b05 upstream. set_page_dirty says: For pages with a mapping this should be done under the page lock for the benefit of asynchronous memory errors who prefer a consistent dirty state. This rule can be broken in some special cases, but should be better not to. Under those rules, it is only safe for us to use the plain set_page_dirty calls for shmemfs/anonymous memory. Userptr may be used with real mappings and so needs to use the locked version (set_page_dirty_lock). However, following a try_to_unmap() we may want to remove the userptr and so call put_pages(). However, try_to_unmap() acquires the page lock and so we must avoid recursively locking the pages ourselves -- which means that we cannot safely acquire the lock around set_page_dirty(). Since we can't be sure of the lock, we have to risk skip dirtying the page, or else risk calling set_page_dirty() without a lock and so risk fs corruption. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112012 Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") References: cb6d7c7dc7ff ("drm/i915/userptr: Acquire the page lock around set_page_dirty()") References: 505a8ec7e11a ("Revert "drm/i915/userptr: Acquire the page lock around set_page_dirty()"") References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers") Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191111133205.11590-1-chris@chris-wilson.co.uk (cherry picked from commit 0d4bbe3d407f79438dc4f87943db21f7134cfc65) Signed-off-by: Joonas Lahtinen (cherry picked from commit cee7fb437edcdb2f9f8affa959e274997f5dca4d) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_userptr.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 05ae8c4a8a1b..480d20758324 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -690,8 +690,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_gtt_finish_pages(obj, pages); for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); From 7aaf04476a050a4a20f18d89748ff2906a9390b6 Mon Sep 17 00:00:00 2001 From: Kiernan Hager Date: Mon, 20 Nov 2017 14:18:44 -0700 Subject: [PATCH 0952/3715] platform/x86: asus-nb-wmi: Support ALS on the Zenbook UX430UQ [ Upstream commit db2582afa7444a0ce6bb1ebf1431715969a10b06 ] This patch adds support for ALS on the Zenbook UX430UQ to the asus_nb_wmi driver. It also renames "quirk_asus_ux330uak" to "quirk_asus_forceals" because it is now used for more than one model of computer, and should thus have a more general name. Signed-off-by: Kiernan Hager [andy: massaged commit message, fixed indentation and commas in the code] Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-nb-wmi.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 9c4b0d7f15c3..b6f2ff95c3ed 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -111,7 +111,7 @@ static struct quirk_entry quirk_asus_x550lb = { .xusb2pr = 0x01D9, }; -static struct quirk_entry quirk_asus_ux330uak = { +static struct quirk_entry quirk_asus_forceals = { .wmi_force_als_set = true, }; @@ -387,7 +387,7 @@ static const struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UX330UAK"), }, - .driver_data = &quirk_asus_ux330uak, + .driver_data = &quirk_asus_forceals, }, { .callback = dmi_matched, @@ -398,6 +398,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_x550lb, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. UX430UQ", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX430UQ"), + }, + .driver_data = &quirk_asus_forceals, + }, {}, }; From efb868d452dd1724007c9b3310d2062a53472a97 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Jun 2019 09:02:02 +0200 Subject: [PATCH 0953/3715] platform/x86: asus-wmi: Only Tell EC the OS will handle display hotkeys from asus_nb_wmi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 401fee8195d401b2b94dee57383f627050724d5b ] Commit 78f3ac76d9e5 ("platform/x86: asus-wmi: Tell the EC the OS will handle the display off hotkey") causes the backlight to be permanently off on various EeePC laptop models using the eeepc-wmi driver (Asus EeePC 1015BX, Asus EeePC 1025C). The asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL) call added by that commit is made conditional in this commit and only enabled in the quirk_entry structs in the asus-nb-wmi driver fixing the broken display / backlight on various EeePC laptop models. Cc: João Paulo Rechi Vita Fixes: 78f3ac76d9e5 ("platform/x86: asus-wmi: Tell the EC the OS will handle the display off hotkey") Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-nb-wmi.c | 8 ++++++++ drivers/platform/x86/asus-wmi.c | 2 +- drivers/platform/x86/asus-wmi.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index b6f2ff95c3ed..59f3a37a44d7 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -78,10 +78,12 @@ static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, static struct quirk_entry quirk_asus_unknown = { .wapf = 0, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_q500a = { .i8042_filter = asus_q500a_i8042_filter, + .wmi_backlight_set_devstate = true, }; /* @@ -92,26 +94,32 @@ static struct quirk_entry quirk_asus_q500a = { static struct quirk_entry quirk_asus_x55u = { .wapf = 4, .wmi_backlight_power = true, + .wmi_backlight_set_devstate = true, .no_display_toggle = true, }; static struct quirk_entry quirk_asus_wapf4 = { .wapf = 4, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_x200ca = { .wapf = 2, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_ux303ub = { .wmi_backlight_native = true, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_x550lb = { + .wmi_backlight_set_devstate = true, .xusb2pr = 0x01D9, }; static struct quirk_entry quirk_asus_forceals = { + .wmi_backlight_set_devstate = true, .wmi_force_als_set = true, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 3f662cd774d7..1c1999600717 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2147,7 +2147,7 @@ static int asus_wmi_add(struct platform_device *pdev) err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) goto fail_backlight; - } else + } else if (asus->driver->quirks->wmi_backlight_set_devstate) err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); status = wmi_install_notify_handler(asus->driver->event_guid, diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 6c1311f4b04d..57a79bddb286 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -44,6 +44,7 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_power; bool wmi_backlight_native; + bool wmi_backlight_set_devstate; bool wmi_force_als_set; int wapf; /* From 0e52a00e450f065d8d48af0a15757d8de9fbe821 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2019 15:59:38 +0200 Subject: [PATCH 0954/3715] mwifiex: Fix NL80211_TX_POWER_LIMITED [ Upstream commit 65a576e27309120e0621f54d5c81eb9128bd56be ] NL80211_TX_POWER_LIMITED was treated as NL80211_TX_POWER_AUTOMATIC, which is the opposite of what should happen and can cause nasty regulatory problems. if/else converted to a switch without default to make gcc warn on unhandled enum values. Signed-off-by: Adrian Bunk Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 13 +++++++++++-- drivers/net/wireless/marvell/mwifiex/ioctl.h | 1 + drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 11 +++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index dde47c548818..5e8e34a08b2d 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -362,11 +362,20 @@ mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, struct mwifiex_power_cfg power_cfg; int dbm = MBM_TO_DBM(mbm); - if (type == NL80211_TX_POWER_FIXED) { + switch (type) { + case NL80211_TX_POWER_FIXED: power_cfg.is_power_auto = 0; + power_cfg.is_power_fixed = 1; power_cfg.power_level = dbm; - } else { + break; + case NL80211_TX_POWER_LIMITED: + power_cfg.is_power_auto = 0; + power_cfg.is_power_fixed = 0; + power_cfg.power_level = dbm; + break; + case NL80211_TX_POWER_AUTOMATIC: power_cfg.is_power_auto = 1; + break; } priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 48e154e1865d..0dd592ea6e83 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -267,6 +267,7 @@ struct mwifiex_ds_encrypt_key { struct mwifiex_power_cfg { u32 is_power_auto; + u32 is_power_fixed; u32 power_level; }; diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 82828a207963..a8043d76152a 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -728,6 +728,9 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, txp_cfg = (struct host_cmd_ds_txpwr_cfg *) buf; txp_cfg->action = cpu_to_le16(HostCmd_ACT_GEN_SET); if (!power_cfg->is_power_auto) { + u16 dbm_min = power_cfg->is_power_fixed ? + dbm : priv->min_tx_power_level; + txp_cfg->mode = cpu_to_le32(1); pg_tlv = (struct mwifiex_types_power_group *) (buf + sizeof(struct host_cmd_ds_txpwr_cfg)); @@ -742,7 +745,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x03; pg->modulation_class = MOD_CLASS_HR_DSSS; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg++; /* Power group for modulation class OFDM */ @@ -750,7 +753,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x07; pg->modulation_class = MOD_CLASS_OFDM; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg++; /* Power group for modulation class HTBW20 */ @@ -758,7 +761,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x20; pg->modulation_class = MOD_CLASS_HT; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg->ht_bandwidth = HT_BW_20; pg++; @@ -767,7 +770,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x20; pg->modulation_class = MOD_CLASS_HT; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg->ht_bandwidth = HT_BW_40; } From cddd05ac1bf4db9e6cac63dbc4c069ce33878965 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 12 Oct 2018 14:25:22 +0900 Subject: [PATCH 0955/3715] ALSA: isight: fix leak of reference to firewire unit in error path of .probe callback [ Upstream commit 51e68fb0929c29e47e9074ca3e99ffd6021a1c5a ] In some error paths, reference count of firewire unit is not decreased. This commit fixes the bug. Fixes: 5b14ec25a79b('ALSA: firewire: release reference count of firewire unit in .remove callback of bus driver') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/isight.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 5826aa8362f1..9edb26ab16e9 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -639,7 +639,7 @@ static int isight_probe(struct fw_unit *unit, if (!isight->audio_base) { dev_err(&unit->device, "audio unit base not found\n"); err = -ENXIO; - goto err_unit; + goto error; } fw_iso_resources_init(&isight->resources, unit); @@ -668,12 +668,12 @@ static int isight_probe(struct fw_unit *unit, dev_set_drvdata(&unit->device, isight); return 0; - -err_unit: - fw_unit_put(isight->unit); - mutex_destroy(&isight->mutex); error: snd_card_free(card); + + mutex_destroy(&isight->mutex); + fw_unit_put(isight->unit); + return err; } From 453ca166d3209d4e48fa3a8742c3e871ac8aa18f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 10 Oct 2018 20:33:08 +0900 Subject: [PATCH 0956/3715] printk: fix integer overflow in setup_log_buf() [ Upstream commit d2130e82e9454304e9b91ba9da551b5989af8c27 ] The way we calculate logbuf free space percentage overflows signed integer: int free; free = __LOG_BUF_LEN - log_next_idx; pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); We support LOG_BUF_LEN of up to 1<<25 bytes. Since setup_log_buf() is called during early init, logbuf is mostly empty, so __LOG_BUF_LEN - log_next_idx is close to 1<<25. Thus when we multiply it by 100, we overflow signed integer value range: 100 is 2^6 + 2^5 + 2^2. Example, booting with LOG_BUF_LEN 1<<25 and log_buf_len=2G boot param: [ 0.075317] log_buf_len: -2147483648 bytes [ 0.075319] early log buf free: 33549896(-28%) Make "free" unsigned integer and use appropriate printk() specifier. Link: http://lkml.kernel.org/r/20181010113308.9337-1-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5b33c14ab8b2..4e50beb162c0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1099,7 +1099,7 @@ void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; - int free; + unsigned int free; if (log_buf != __log_buf) return; From 2e3be85994be58dca05c0bf2a26b50d1c76f1f8e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Sep 2018 15:30:25 +0100 Subject: [PATCH 0957/3715] gfs2: Fix marking bitmaps non-full [ Upstream commit ec23df2b0cf3e1620f5db77972b7fb735f267eff ] Reservations in gfs can span multiple gfs2_bitmaps (but they won't span multiple resource groups). When removing a reservation, we want to clear the GBF_FULL flags of all involved gfs2_bitmaps, not just that of the first bitmap. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Reviewed-by: Steven Whitehouse Signed-off-by: Sasha Levin --- fs/gfs2/rgrp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0d72baae5150..7cb0672294df 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -623,7 +623,10 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) + + rs->rs_free - 1; + struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, }; + struct gfs2_bitmap *start, *last; /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); @@ -634,7 +637,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) it will force the number to be recalculated later. */ rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &bi->bi_flags); + if (gfs2_rbm_from_block(&last_rbm, last_block)) + return; + start = rbm_bi(&rs->rs_rbm); + last = rbm_bi(&last_rbm); + do + clear_bit(GBF_FULL, &start->bi_flags); + while (start++ != last); } } From bff8a174e055e06f7856bcb1e34989ae48fbc9f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Sep 2018 20:53:46 -0400 Subject: [PATCH 0958/3715] pty: fix compat ioctls [ Upstream commit 50f45326afab723df529eca54095e2feac24da2d ] pointer-taking ones need compat_ptr(); int-taking one doesn't. Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- drivers/tty/pty.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 9e26c530d2dd..b3208b1b1028 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -28,6 +28,7 @@ #include #include #include +#include #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP @@ -488,6 +489,7 @@ static int pty_bsd_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } +#ifdef CONFIG_COMPAT static long pty_bsd_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { @@ -495,8 +497,11 @@ static long pty_bsd_compat_ioctl(struct tty_struct *tty, * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ - return pty_bsd_ioctl(tty, cmd, arg); + return pty_bsd_ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); } +#else +#define pty_bsd_compat_ioctl NULL +#endif static int legacy_count = CONFIG_LEGACY_PTY_COUNT; /* @@ -676,6 +681,7 @@ static int pty_unix98_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } +#ifdef CONFIG_COMPAT static long pty_unix98_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { @@ -683,8 +689,12 @@ static long pty_unix98_compat_ioctl(struct tty_struct *tty, * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ - return pty_unix98_ioctl(tty, cmd, arg); + return pty_unix98_ioctl(tty, cmd, + cmd == TIOCSIG ? arg : (unsigned long)compat_ptr(arg)); } +#else +#define pty_unix98_compat_ioctl NULL +#endif /** * ptm_unix98_lookup - find a pty master From 004ff4e09bb5f8a7e3212c5ef2642944bc7c524a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Sep 2018 20:57:18 -0400 Subject: [PATCH 0959/3715] synclink_gt(): fix compat_ioctl() [ Upstream commit 27230e51349fde075598c1b59d15e1ff802f3f6e ] compat_ptr() for pointer-taking ones... Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- drivers/tty/synclink_gt.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 636b8ae29b46..344e8c427c7e 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1187,14 +1187,13 @@ static long slgt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct slgt_info *info = tty->driver_data; - int rc = -ENOIOCTLCMD; + int rc; if (sanity_check(info, tty->name, "compat_ioctl")) return -ENODEV; DBGINFO(("%s compat_ioctl() cmd=%08X\n", info->device_name, cmd)); switch (cmd) { - case MGSL_IOCSPARAMS32: rc = set_params32(info, compat_ptr(arg)); break; @@ -1214,18 +1213,11 @@ static long slgt_compat_ioctl(struct tty_struct *tty, case MGSL_IOCWAITGPIO: case MGSL_IOCGXSYNC: case MGSL_IOCGXCTRL: - case MGSL_IOCSTXIDLE: - case MGSL_IOCTXENABLE: - case MGSL_IOCRXENABLE: - case MGSL_IOCTXABORT: - case TIOCMIWAIT: - case MGSL_IOCSIF: - case MGSL_IOCSXSYNC: - case MGSL_IOCSXCTRL: - rc = ioctl(tty, cmd, arg); + rc = ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); break; + default: + rc = ioctl(tty, cmd, arg); } - DBGINFO(("%s compat_ioctl() cmd=%08X rc=%d\n", info->device_name, cmd, rc)); return rc; } From 40509aa7832306bd93faae648d4e2a3413938dcb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 1 Oct 2018 19:44:58 +0300 Subject: [PATCH 0960/3715] powerpc: Fix signedness bug in update_flash_db() [ Upstream commit 014704e6f54189a203cc14c7c0bb411b940241bc ] The "count < sizeof(struct os_area_db)" comparison is type promoted to size_t so negative values of "count" are treated as very high values and we accidentally return success instead of a negative error code. This doesn't really change runtime much but it fixes a static checker warning. Signed-off-by: Dan Carpenter Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/ps3/os-area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 3db53e8aff92..9b2ef76578f0 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -664,7 +664,7 @@ static int update_flash_db(void) db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff); count = os_area_flash_write(db, sizeof(struct os_area_db), pos); - if (count < sizeof(struct os_area_db)) { + if (count < 0 || count < sizeof(struct os_area_db)) { pr_debug("%s: os_area_flash_write failed %zd\n", __func__, count); error = count < 0 ? count : -EIO; From e04a317c9624bdaf420cdc1324e3dbe2d3b6555e Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 10 Oct 2018 13:15:22 +1030 Subject: [PATCH 0961/3715] powerpc/boot: Disable vector instructions [ Upstream commit e8e132e6885962582784b6fa16a80d07ea739c0f ] This will avoid auto-vectorisation when building with higher optimisation levels. We don't know if the machine can support VSX and even if it's present it's probably not going to be enabled at this point in boot. These flag were both added prior to GCC 4.6 which is the minimum compiler version supported by upstream, thanks to Segher for the details. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/boot/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e2a5a932c24a..5807c9d8e56d 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -24,8 +24,8 @@ compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -Os -msoft-float -pipe \ - -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ + -fno-strict-aliasing -Os -msoft-float -mno-altivec -mno-vsx \ + -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -D$(compress-y) BOOTCC := $(CC) From 484278a79fa5824c902a94a6bc18a8046ea9b952 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Wed, 12 Sep 2018 11:23:22 +1000 Subject: [PATCH 0962/3715] powerpc/eeh: Fix use of EEH_PE_KEEP on wrong field [ Upstream commit 473af09b56dc4be68e4af33220ceca6be67aa60d ] eeh_add_to_parent_pe() sometimes removes the EEH_PE_KEEP flag, but it incorrectly removes it from pe->type, instead of pe->state. However, rather than clearing it from the correct field, remove it. Inspection of the code shows that it can't ever have had any effect (even if it had been cleared from the correct field), because the field is never tested after it is cleared by the statement in question. The clear statement was added by commit 807a827d4e74 ("powerpc/eeh: Keep PE during hotplug"), but it didn't explain why it was necessary. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/eeh_pe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 8545a9523b9b..7339ca4fdc19 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -381,7 +381,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); + parent->type &= ~EEH_PE_INVALID; parent = parent->parent; } From 99096f289bde739098cd3117c22a44cc143647fb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 13 Oct 2018 13:28:43 +0300 Subject: [PATCH 0963/3715] EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr() [ Upstream commit d8c27ba86a2fd806d3957e5a9b30e66dfca2a61d ] Fix memory leak in L2c threaded interrupt handler. [ bp: Rewrite commit message. ] Fixes: 41003396f932 ("EDAC, thunderx: Add Cavium ThunderX EDAC driver") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov CC: David Daney CC: Jan Glauber CC: Mauro Carvalho Chehab CC: Sergey Temerkhanov CC: linux-edac Link: http://lkml.kernel.org/r/20181013102843.GG16086@mwanda Signed-off-by: Sasha Levin --- drivers/edac/thunderx_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index f35d87519a3e..dfefa39e9351 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1905,7 +1905,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) default: dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", l2c->pdev->device); - return IRQ_NONE; + goto err_free; } while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, @@ -1927,7 +1927,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) l2c->ring_tail++; } - return IRQ_HANDLED; + ret = IRQ_HANDLED; err_free: kfree(other); From 3bbeacffa14dd5b85dfb30543533eb3855989304 Mon Sep 17 00:00:00 2001 From: Ali MJ Al-Nasrawy Date: Wed, 3 Oct 2018 19:21:39 +0300 Subject: [PATCH 0964/3715] brcmsmac: AP mode: update beacon when TIM changes [ Upstream commit 2258ee58baa554609a3cc3996276e4276f537b6d ] Beacons are not updated to reflect TIM changes. This is not compliant with power-saving client stations as the beacons do not have valid TIM and can cause the network to stall at random occasions and to have highly variable latencies. Fix it by updating beacon templates on mac80211 set_tim callback. Addresses an issue described in: https://marc.info/?i=20180911163534.21312d08%20()%20manjaro Signed-off-by: Ali MJ Al-Nasrawy Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- .../broadcom/brcm80211/brcmsmac/mac80211_if.c | 26 +++++++++++++++++++ .../broadcom/brcm80211/brcmsmac/main.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index ddfdfe177e24..257968fb3111 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -502,6 +502,7 @@ brcms_ops_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } spin_lock_bh(&wl->lock); + wl->wlc->vif = vif; wl->mute_tx = false; brcms_c_mute(wl->wlc, false); if (vif->type == NL80211_IFTYPE_STATION) @@ -519,6 +520,11 @@ brcms_ops_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) static void brcms_ops_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { + struct brcms_info *wl = hw->priv; + + spin_lock_bh(&wl->lock); + wl->wlc->vif = NULL; + spin_unlock_bh(&wl->lock); } static int brcms_ops_config(struct ieee80211_hw *hw, u32 changed) @@ -937,6 +943,25 @@ static void brcms_ops_set_tsf(struct ieee80211_hw *hw, spin_unlock_bh(&wl->lock); } +static int brcms_ops_beacon_set_tim(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, bool set) +{ + struct brcms_info *wl = hw->priv; + struct sk_buff *beacon = NULL; + u16 tim_offset = 0; + + spin_lock_bh(&wl->lock); + if (wl->wlc->vif) + beacon = ieee80211_beacon_get_tim(hw, wl->wlc->vif, + &tim_offset, NULL); + if (beacon) + brcms_c_set_new_beacon(wl->wlc, beacon, tim_offset, + wl->wlc->vif->bss_conf.dtim_period); + spin_unlock_bh(&wl->lock); + + return 0; +} + static const struct ieee80211_ops brcms_ops = { .tx = brcms_ops_tx, .start = brcms_ops_start, @@ -955,6 +980,7 @@ static const struct ieee80211_ops brcms_ops = { .flush = brcms_ops_flush, .get_tsf = brcms_ops_get_tsf, .set_tsf = brcms_ops_set_tsf, + .set_tim = brcms_ops_beacon_set_tim, }; void brcms_dpc(unsigned long data) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h index c4d135cff04a..9f76b880814e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h @@ -563,6 +563,7 @@ struct brcms_c_info { struct wiphy *wiphy; struct scb pri_scb; + struct ieee80211_vif *vif; struct sk_buff *beacon; u16 beacon_tim_offset; From 9c7f98086f0bca48b988826f98372a990b0a1d5a Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Thu, 11 Oct 2018 15:55:26 +0800 Subject: [PATCH 0965/3715] ath10k: allocate small size dma memory in ath10k_pci_diag_write_mem [ Upstream commit 0738b4998c6d1caf9ca2447b946709a7278c70f1 ] ath10k_pci_diag_write_mem may allocate big size of the dma memory based on the parameter nbytes. Take firmware diag download as example, the biggest size is about 500K. In some systems, the allocation is likely to fail because it can't acquire such a large contiguous dma memory. The fix is to allocate a small size dma memory. In the loop, driver copies the data to the allocated dma memory and writes to the destination until all the data is written. Tested with QCA6174 PCI with firmware-6.bin_WLAN.RM.4.4.1-00119-QCARMSWP-1, this also affects QCA9377 PCI. Signed-off-by: Carl Huang Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/pci.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 27ab3eb47534..0298ddc1ff06 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1039,10 +1039,9 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, struct ath10k_ce *ce = ath10k_ce_priv(ar); int ret = 0; u32 *buf; - unsigned int completed_nbytes, orig_nbytes, remaining_bytes; + unsigned int completed_nbytes, alloc_nbytes, remaining_bytes; struct ath10k_ce_pipe *ce_diag; void *data_buf = NULL; - u32 ce_data; /* Host buffer address in CE space */ dma_addr_t ce_data_base = 0; int i; @@ -1056,9 +1055,10 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, * 1) 4-byte alignment * 2) Buffer in DMA-able space */ - orig_nbytes = nbytes; + alloc_nbytes = min_t(unsigned int, nbytes, DIAG_TRANSFER_LIMIT); + data_buf = (unsigned char *)dma_alloc_coherent(ar->dev, - orig_nbytes, + alloc_nbytes, &ce_data_base, GFP_ATOMIC); if (!data_buf) { @@ -1066,9 +1066,6 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, goto done; } - /* Copy caller's data to allocated DMA buf */ - memcpy(data_buf, data, orig_nbytes); - /* * The address supplied by the caller is in the * Target CPU virtual address space. @@ -1081,12 +1078,14 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, */ address = ath10k_pci_targ_cpu_to_ce_addr(ar, address); - remaining_bytes = orig_nbytes; - ce_data = ce_data_base; + remaining_bytes = nbytes; while (remaining_bytes) { /* FIXME: check cast */ nbytes = min_t(int, remaining_bytes, DIAG_TRANSFER_LIMIT); + /* Copy caller's data to allocated DMA buf */ + memcpy(data_buf, data, nbytes); + /* Set up to receive directly into Target(!) address */ ret = __ath10k_ce_rx_post_buf(ce_diag, &address, address); if (ret != 0) @@ -1096,7 +1095,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, * Request CE to send caller-supplied data that * was copied to bounce buffer to Target(!) address. */ - ret = ath10k_ce_send_nolock(ce_diag, NULL, (u32)ce_data, + ret = ath10k_ce_send_nolock(ce_diag, NULL, ce_data_base, nbytes, 0, 0); if (ret != 0) goto done; @@ -1137,12 +1136,12 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, remaining_bytes -= nbytes; address += nbytes; - ce_data += nbytes; + data += nbytes; } done: if (data_buf) { - dma_free_coherent(ar->dev, orig_nbytes, data_buf, + dma_free_coherent(ar->dev, alloc_nbytes, data_buf, ce_data_base); } From 9070aed76726230abe09161703d815591e9613b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Oct 2018 14:56:14 -0600 Subject: [PATCH 0966/3715] skd: fixup usage of legacy IO API [ Upstream commit 6d1f9dfde7343c4ebfb8f84dcb333af571bb3b22 ] We need to be using the mq variant of request requeue here. Fixes: ca33dd92968b ("skd: Convert to blk-mq") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/skd_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 64d0fc17c174..95649025cde7 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -1417,7 +1417,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_BUSY_IMMINENT: skd_log_skreq(skdev, skreq, "retry(busy)"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); dev_info(&skdev->pdev->dev, "drive BUSY imminent\n"); skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT; skdev->timer_countdown = SKD_TIMER_MINUTES(20); @@ -1427,7 +1427,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_REQUEUE_REQUEST: if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); break; } /* fall through */ From 4da708e823082606be08f26e7df8dc356734bf9b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Oct 2018 13:20:48 -0600 Subject: [PATCH 0967/3715] cdrom: don't attempt to fiddle with cdo->capability [ Upstream commit 8f94004e2a51a3ea195cf3447eb5d5906f36d8b3 ] We can't modify cdo->capability as it is defined as a const. Change the modification hack to just WARN_ON_ONCE() if we hit any of the invalid combinations. This fixes a regression for pcd, which doesn't work after the constify patch. Fixes: 853fe1bf7554 ("cdrom: Make device operations read-only") Tested-by: Ondrej Zary Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/cdrom/cdrom.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index ea6558d4864c..90dd8e7291da 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -410,10 +410,10 @@ static int cdrom_get_disc_info(struct cdrom_device_info *cdi, * hack to have the capability flags defined const, while we can still * change it here without gcc complaining at every line. */ -#define ENSURE(call, bits) \ -do { \ - if (cdo->call == NULL) \ - *change_capability &= ~(bits); \ +#define ENSURE(cdo, call, bits) \ +do { \ + if (cdo->call == NULL) \ + WARN_ON_ONCE((cdo)->capability & (bits)); \ } while (0) /* @@ -589,7 +589,6 @@ int register_cdrom(struct cdrom_device_info *cdi) { static char banner_printed; const struct cdrom_device_ops *cdo = cdi->ops; - int *change_capability = (int *)&cdo->capability; /* hack */ cd_dbg(CD_OPEN, "entering register_cdrom\n"); @@ -601,16 +600,16 @@ int register_cdrom(struct cdrom_device_info *cdi) cdrom_sysctl_register(); } - ENSURE(drive_status, CDC_DRIVE_STATUS); + ENSURE(cdo, drive_status, CDC_DRIVE_STATUS); if (cdo->check_events == NULL && cdo->media_changed == NULL) - *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); - ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); - ENSURE(lock_door, CDC_LOCK); - ENSURE(select_speed, CDC_SELECT_SPEED); - ENSURE(get_last_session, CDC_MULTI_SESSION); - ENSURE(get_mcn, CDC_MCN); - ENSURE(reset, CDC_RESET); - ENSURE(generic_packet, CDC_GENERIC_PACKET); + WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC)); + ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); + ENSURE(cdo, lock_door, CDC_LOCK); + ENSURE(cdo, select_speed, CDC_SELECT_SPEED); + ENSURE(cdo, get_last_session, CDC_MULTI_SESSION); + ENSURE(cdo, get_mcn, CDC_MCN); + ENSURE(cdo, reset, CDC_RESET); + ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET); cdi->mc_flags = 0; cdi->options = CDO_USE_FFLAGS; From 327d058b2d68913485c18ba37317a7c44b42dfd8 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 12 Oct 2018 22:48:22 +0300 Subject: [PATCH 0968/3715] spi: sh-msiof: fix deferred probing [ Upstream commit f34c6e6257aa477cdfe7e9bbbecd3c5648ecda69 ] Since commit 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") platform_get_irq() can return -EPROBE_DEFER. However, the driver overrides an error returned by that function with -ENOENT which breaks the deferred probing. Propagate upstream an error code returned by platform_get_irq() and remove the bogus "platform" from the error message, while at it... Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergei Shtylyov Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-sh-msiof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index db2a529accae..a7bd3c92356b 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -1283,8 +1283,8 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) i = platform_get_irq(pdev, 0); if (i < 0) { - dev_err(&pdev->dev, "cannot get platform IRQ\n"); - ret = -ENOENT; + dev_err(&pdev->dev, "cannot get IRQ\n"); + ret = i; goto err1; } From 6e7515fb82f9080109509b85186d1d433b9d4cb4 Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Sat, 13 Oct 2018 15:20:47 +0800 Subject: [PATCH 0969/3715] mmc: mediatek: fix cannot receive new request when msdc_cmd_is_ready fail [ Upstream commit f38a9774ddde9d79b3487dd888edd8b8623552af ] when msdc_cmd_is_ready return fail, the req_timeout work has not been inited and cancel_delayed_work() will return false, then, the request return directly and never call mmc_request_done(). so need call mod_delayed_work() before msdc_cmd_is_ready() Signed-off-by: Chaotian Jing Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/mtk-sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 267f7ab08420..a2ac9938d945 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -885,6 +885,7 @@ static void msdc_start_command(struct msdc_host *host, WARN_ON(host->cmd); host->cmd = cmd; + mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT); if (!msdc_cmd_is_ready(host, mrq, cmd)) return; @@ -896,7 +897,6 @@ static void msdc_start_command(struct msdc_host *host, cmd->error = 0; rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd); - mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT); sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask); writel(cmd->arg, host->base + SDC_ARG); From 67631651aa75387a578329cca3b62d1d083439a8 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 13 Sep 2018 11:35:10 +0300 Subject: [PATCH 0970/3715] btrfs: handle error of get_old_root [ Upstream commit 315bed43fea532650933e7bba316a7601d439edf ] In btrfs_search_old_slot get_old_root is always used with the assumption it cannot fail. However, this is not true in rare circumstance it can fail and return null. This will lead to null point dereference when the header is read. Fix this by checking the return value and properly handling NULL by setting ret to -EIO and returning gracefully. Coverity-id: 1087503 Signed-off-by: Nikolay Borisov Reviewed-by: Lu Fengqi Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 27983fd657ab..d2263caff307 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2988,6 +2988,10 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, again: b = get_old_root(root, time_seq); + if (!b) { + ret = -EIO; + goto done; + } level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; From 498189239f00f1c467462322e7023390271c8216 Mon Sep 17 00:00:00 2001 From: Duncan Laurie Date: Fri, 12 Oct 2018 10:04:45 -0600 Subject: [PATCH 0971/3715] gsmi: Fix bug in append_to_eventlog sysfs handler [ Upstream commit 655603de68469adaff16842ac17a5aec9c9ce89b ] The sysfs handler should return the number of bytes consumed, which in the case of a successful write is the entire buffer. Also fix a bug where param.data_len was being set to (count - (2 * sizeof(u32))) instead of just (count - sizeof(u32)). The latter is correct because we skip over the leading u32 which is our param.type, but we were also incorrectly subtracting sizeof(u32) on the line where we were actually setting param.data_len: param.data_len = count - sizeof(u32); This meant that for our example event.kernel_software_watchdog with total length 10 bytes, param.data_len was just 2 prior to this change. To test, successfully append an event to the log with gsmi sysfs. This sample event is for a "Kernel Software Watchdog" > xxd -g 1 event.kernel_software_watchdog 0000000: 01 00 00 00 ad de 06 00 00 00 > cat event.kernel_software_watchdog > /sys/firmware/gsmi/append_to_eventlog > mosys eventlog list | tail -1 14 | 2012-06-25 10:14:14 | Kernl Event | Software Watchdog Signed-off-by: Duncan Laurie Reviewed-by: Vadim Bendebury Reviewed-by: Stefan Reinauer Signed-off-by: Furquan Shaikh Tested-by: Furquan Shaikh Reviewed-by: Aaron Durbin Reviewed-by: Justin TerAvest [zwisler: updated changelog for 2nd bug fix and upstream] Signed-off-by: Ross Zwisler Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/firmware/google/gsmi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index c8f169bf2e27..62337be07afc 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -480,11 +480,10 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj, if (count < sizeof(u32)) return -EINVAL; param.type = *(u32 *)buf; - count -= sizeof(u32); buf += sizeof(u32); /* The remaining buffer is the data payload */ - if (count > gsmi_dev.data_buf->length) + if ((count - sizeof(u32)) > gsmi_dev.data_buf->length) return -EINVAL; param.data_len = count - sizeof(u32); @@ -504,7 +503,7 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj, spin_unlock_irqrestore(&gsmi_dev.lock, flags); - return rc; + return (rc == 0) ? count : rc; } From 436a4721765a66f43d31f808baf51be560951c3e Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 10 Oct 2018 18:38:28 -0500 Subject: [PATCH 0972/3715] misc: mic: fix a DMA pool free failure [ Upstream commit 6b995f4eec34745f6cb20d66d5277611f0b3c3fa ] In _scif_prog_signal(), the boolean variable 'x100' is used to indicate whether the MIC Coprocessor is X100. If 'x100' is true, the status descriptor will be used to write the value to the destination. Otherwise, a DMA pool will be allocated for this purpose. Specifically, if the DMA pool is allocated successfully, two memory addresses will be returned. One is for the CPU and the other is for the device to access the DMA pool. The former is stored to the variable 'status' and the latter is stored to the variable 'src'. After the allocation, the address in 'src' is saved to 'status->src_dma_addr', which is actually in the DMA pool, and 'src' is then modified. Later on, if an error occurs, the execution flow will transfer to the label 'dma_fail', which will check 'x100' and free up the allocated DMA pool if 'x100' is false. The point here is that 'status->src_dma_addr' is used for freeing up the DMA pool. As mentioned before, 'status->src_dma_addr' is in the DMA pool. And thus, the device is able to modify this data. This can potentially cause failures when freeing up the DMA pool because of the modified device address. This patch avoids the above issue by using the variable 'src' (with necessary calculation) to free up the DMA pool. Signed-off-by: Wenwen Wang Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/mic/scif/scif_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c index cac3bcc308a7..7bb929f05d85 100644 --- a/drivers/misc/mic/scif/scif_fence.c +++ b/drivers/misc/mic/scif/scif_fence.c @@ -272,7 +272,7 @@ static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val) dma_fail: if (!x100) dma_pool_free(ep->remote_dev->signal_pool, status, - status->src_dma_addr); + src - offsetof(struct scif_status, val)); alloc_fail: return err; } From 9c6b1927f85618ad5998c65243a4787c9e228cb1 Mon Sep 17 00:00:00 2001 From: Julien Folly Date: Tue, 9 Oct 2018 19:47:45 +0200 Subject: [PATCH 0973/3715] w1: IAD Register is yet readable trough iad sys file. Fix snprintf (%u for unsigned, count for max size). [ Upstream commit 6eaafbb6998e999467cf78a76e155ee00e372b14 ] IAD Register is yet readable trough the "iad" sys file. A write to the "iad" sys file enables or disables the current measurement, but it was not possible to get the measured value by reading it. Fix: %u in snprintf for unsigned values (vdd and vad) Fix: Avoid possibles overflows (Usage of the 'count' variables) Signed-off-by: Julien Folly Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/w1/slaves/w1_ds2438.c | 66 +++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/drivers/w1/slaves/w1_ds2438.c b/drivers/w1/slaves/w1_ds2438.c index bf641a191d07..7c4e33dbee4d 100644 --- a/drivers/w1/slaves/w1_ds2438.c +++ b/drivers/w1/slaves/w1_ds2438.c @@ -186,8 +186,8 @@ static int w1_ds2438_change_config_bit(struct w1_slave *sl, u8 mask, u8 value) return -1; } -static uint16_t w1_ds2438_get_voltage(struct w1_slave *sl, - int adc_input, uint16_t *voltage) +static int w1_ds2438_get_voltage(struct w1_slave *sl, + int adc_input, uint16_t *voltage) { unsigned int retries = W1_DS2438_RETRIES; u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; @@ -235,6 +235,25 @@ post_unlock: return ret; } +static int w1_ds2438_get_current(struct w1_slave *sl, int16_t *voltage) +{ + u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; + int ret; + + mutex_lock(&sl->master->bus_mutex); + + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { + /* The voltage measured across current sense resistor RSENS. */ + *voltage = (((int16_t) w1_buf[DS2438_CURRENT_MSB]) << 8) | ((int16_t) w1_buf[DS2438_CURRENT_LSB]); + ret = 0; + } else + ret = -1; + + mutex_unlock(&sl->master->bus_mutex); + + return ret; +} + static ssize_t iad_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -257,6 +276,27 @@ static ssize_t iad_write(struct file *filp, struct kobject *kobj, return ret; } +static ssize_t iad_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + int16_t voltage; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + if (w1_ds2438_get_current(sl, &voltage) == 0) { + ret = snprintf(buf, count, "%i\n", voltage); + } else + ret = -EIO; + + return ret; +} + static ssize_t page0_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -272,9 +312,13 @@ static ssize_t page0_read(struct file *filp, struct kobject *kobj, mutex_lock(&sl->master->bus_mutex); + /* Read no more than page0 size */ + if (count > DS2438_PAGE_SIZE) + count = DS2438_PAGE_SIZE; + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { - memcpy(buf, &w1_buf, DS2438_PAGE_SIZE); - ret = DS2438_PAGE_SIZE; + memcpy(buf, &w1_buf, count); + ret = count; } else ret = -EIO; @@ -289,7 +333,6 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; int16_t temp; if (off != 0) @@ -298,8 +341,7 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_temperature(sl, &temp) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", temp); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%i\n", temp); } else ret = -EIO; @@ -312,7 +354,6 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; uint16_t voltage; if (off != 0) @@ -321,8 +362,7 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VAD, &voltage) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%u\n", voltage); } else ret = -EIO; @@ -335,7 +375,6 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; uint16_t voltage; if (off != 0) @@ -344,15 +383,14 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VDD, &voltage) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%u\n", voltage); } else ret = -EIO; return ret; } -static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, NULL, iad_write, 1); +static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, iad_read, iad_write, 0); static BIN_ATTR_RO(page0, DS2438_PAGE_SIZE); static BIN_ATTR_RO(temperature, 0/* real length varies */); static BIN_ATTR_RO(vad, 0/* real length varies */); From 37aeb6076aeafa1a20c80ea2299498aa9c870d36 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 7 Sep 2018 14:44:25 +0200 Subject: [PATCH 0974/3715] m68k: fix command-line parsing when passed from u-boot [ Upstream commit 381fdd62c38344a771aed06adaf14aae65c47454 ] This patch fixes command_line array zero-terminated one byte over the end of the array, causing boot to hang. Signed-off-by: Angelo Dureghello Signed-off-by: Greg Ungerer Signed-off-by: Sasha Levin --- arch/m68k/kernel/uboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c index b29c3b241e1b..107082877064 100644 --- a/arch/m68k/kernel/uboot.c +++ b/arch/m68k/kernel/uboot.c @@ -102,5 +102,5 @@ __init void process_uboot_commandline(char *commandp, int size) } parse_uboot_commandline(commandp, len); - commandp[size - 1] = 0; + commandp[len - 1] = 0; } From fb7a0caf45dae36dd750d596939fb805ab5b72f5 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 8 Oct 2018 03:28:01 -0700 Subject: [PATCH 0975/3715] RDMA/bnxt_re: Fix qp async event reporting [ Upstream commit 4c01f2e3a906a0d2d798be5751c331cf501bc129 ] Reports affiliated async event on the qp-async event channel instead of global event channel. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bf811b23bc95..7d00b6a53ed8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -782,12 +782,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, struct ib_event ib_event; ib_event.device = ibdev; - if (qp) + if (qp) { ib_event.element.qp = qp; - else + ib_event.event = event; + if (qp->event_handler) + qp->event_handler(&ib_event, qp->qp_context); + + } else { ib_event.element.port_num = port_num; - ib_event.event = event; - ib_dispatch_event(&ib_event); + ib_event.event = event; + ib_dispatch_event(&ib_event); + } } #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02 From e4f1ef290e89628877088f6d2723d33e7c9090d0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 16 Oct 2018 08:22:28 +0200 Subject: [PATCH 0976/3715] pinctrl: sunxi: Fix a memory leak in 'sunxi_pinctrl_build_state()' [ Upstream commit a93a676b079144009f55fff2ab0e34c3b7258c8a ] If 'krealloc()' fails, 'pctl->functions' is set to NULL. We should instead use a temp variable in order to be able to free the previously allocated memeory, in case of OOM. Signed-off-by: Christophe JAILLET Acked-by: Maxime Ripard Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 52edf3b5988d..cc8b86a16da0 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1039,6 +1039,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, static int sunxi_pinctrl_build_state(struct platform_device *pdev) { struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev); + void *ptr; int i; /* @@ -1105,13 +1106,15 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) } /* And now allocated and fill the array for real */ - pctl->functions = krealloc(pctl->functions, - pctl->nfunctions * sizeof(*pctl->functions), - GFP_KERNEL); - if (!pctl->functions) { + ptr = krealloc(pctl->functions, + pctl->nfunctions * sizeof(*pctl->functions), + GFP_KERNEL); + if (!ptr) { kfree(pctl->functions); + pctl->functions = NULL; return -ENOMEM; } + pctl->functions = ptr; for (i = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; From 94e415e7377d7899425451371e11d8129127d459 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Oct 2018 17:12:02 +0200 Subject: [PATCH 0977/3715] pwm: lpss: Only set update bit if we are actually changing the settings [ Upstream commit 2153bbc12f77fb2203276befc0f0dddbfb023bb1 ] According to the datasheet the update bit must be set if the on-time-div or the base-unit changes. Now that we properly order device resume on Cherry Trail so that the GFX0 _PS0 method no longer exits with an error, we end up with a sequence of events where we are writing the same values twice in a row. First the _PS0 method restores the duty cycle of 0% the GPU driver set on suspend and then the GPU driver first updates just the enabled bit in the pwm_state from 0 to 1, causing us to write the same values again, before restoring the pre-suspend duty-cycle in a separate pwm_apply call. When writing the update bit the second time, without changing any of the values the update bit clears immediately / instantly, instead of staying 1 for a while as usual. After this the next setting of the update bit seems to be ignored, causing the restoring of the pre-suspend duty-cycle to not get applied. This makes the backlight come up with a 0% dutycycle after suspend/resume. Any further brightness changes after this do work. This commit moves the setting of the update bit into pwm_lpss_prepare() and only sets the bit if we have actually changed any of the values. This avoids the setting of the update bit the second time we configure the PWM to 0% dutycycle, this fixes the backlight coming up with 0% duty-cycle after a suspend/resume. Signed-off-by: Hans de Goede Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-lpss.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4721a264bac2..1e69c1c9ec09 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -97,7 +97,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, unsigned long long on_time_div; unsigned long c = lpwm->info->clk_rate, base_unit_range; unsigned long long base_unit, freq = NSEC_PER_SEC; - u32 ctrl; + u32 orig_ctrl, ctrl; do_div(freq, period_ns); @@ -114,13 +114,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, do_div(on_time_div, period_ns); on_time_div = 255ULL - on_time_div; - ctrl = pwm_lpss_read(pwm); + orig_ctrl = ctrl = pwm_lpss_read(pwm); ctrl &= ~PWM_ON_TIME_DIV_MASK; ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT); base_unit &= base_unit_range; ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT; ctrl |= on_time_div; - pwm_lpss_write(pwm, ctrl); + + if (orig_ctrl != ctrl) { + pwm_lpss_write(pwm, ctrl); + pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE); + } } static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) @@ -144,7 +148,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; } pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false); ret = pwm_lpss_wait_for_update(pwm); if (ret) { @@ -157,7 +160,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (ret) return ret; pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); return pwm_lpss_wait_for_update(pwm); } } else if (pwm_is_enabled(pwm)) { From 8cec125035a53bd6276837bcc065b5c978bae334 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:46 -0700 Subject: [PATCH 0978/3715] amiflop: clean up on errors during setup [ Upstream commit 53d0f8dbde89cf6c862c7a62e00c6123e02cba41 ] The error handling in fd_probe_drives() doesn't clean up at all. Fix it up in preparation for converting to blk-mq. While we're here, get rid of the commented out amiga_floppy_remove(). Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/amiflop.c | 84 ++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 49908c74bfcb..b0e8d8364876 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1699,11 +1699,41 @@ static const struct block_device_operations floppy_fops = { .check_events = amiga_check_events, }; +static struct gendisk *fd_alloc_disk(int drive) +{ + struct gendisk *disk; + + disk = alloc_disk(1); + if (!disk) + goto out; + + disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; + goto out_put_disk; + } + + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out_cleanup_queue; + + return disk; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; +out_put_disk: + put_disk(disk); +out: + unit[drive].type->code = FD_NODRIVE; + return NULL; +} + static int __init fd_probe_drives(void) { int drive,drives,nomem; - printk(KERN_INFO "FD: probing units\nfound "); + pr_info("FD: probing units\nfound"); drives=0; nomem=0; for(drive=0;drivecode == FD_NODRIVE) continue; - disk = alloc_disk(1); + + disk = fd_alloc_disk(drive); if (!disk) { - unit[drive].type->code = FD_NODRIVE; + pr_cont(" no mem for fd%d", drive); + nomem = 1; continue; } unit[drive].gendisk = disk; - - disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); - if (!disk->queue) { - unit[drive].type->code = FD_NODRIVE; - continue; - } - drives++; - if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { - printk("no mem for "); - unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */ - drives--; - nomem = 1; - } - printk("fd%d ",drive); + + pr_cont(" fd%d",drive); disk->major = FLOPPY_MAJOR; disk->first_minor = drive; disk->fops = &floppy_fops; @@ -1742,11 +1762,11 @@ static int __init fd_probe_drives(void) } if ((drives > 0) || (nomem == 0)) { if (drives == 0) - printk("no drives"); - printk("\n"); + pr_cont(" no drives"); + pr_cont("\n"); return drives; } - printk("\n"); + pr_cont("\n"); return -ENOMEM; } @@ -1837,30 +1857,6 @@ out_blkdev: return ret; } -#if 0 /* not safe to unload */ -static int __exit amiga_floppy_remove(struct platform_device *pdev) -{ - int i; - - for( i = 0; i < FD_MAX_UNITS; i++) { - if (unit[i].type->code != FD_NODRIVE) { - struct request_queue *q = unit[i].gendisk->queue; - del_gendisk(unit[i].gendisk); - put_disk(unit[i].gendisk); - kfree(unit[i].trackbuf); - if (q) - blk_cleanup_queue(q); - } - } - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - free_irq(IRQ_AMIGA_CIAA_TB, NULL); - free_irq(IRQ_AMIGA_DSKBLK, NULL); - custom.dmacon = DMAF_DISK; /* disable DMA */ - amiga_chip_free(raw_buf); - unregister_blkdev(FLOPPY_MAJOR, "fd"); -} -#endif - static struct platform_driver amiga_floppy_driver = { .driver = { .name = "amiga-floppy", From a1ebc30640bd85c792f403a090ea9d59dd599c1f Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Tue, 16 Oct 2018 03:59:18 -0700 Subject: [PATCH 0979/3715] qed: Align local and global PTT to propagate through the APIs. [ Upstream commit 706d08913d1f68610c32b4a001026aa989878dd9 ] Align the use of local PTT to propagate through the qed_mcp* API's. Global ptt should not be used. Register access should be done through layers. Register address is mapped into a PTT, PF translation table. Several interface functions require a PTT to direct read/write into register. There is a pool of PTT maintained, and several PTT are used simultaneously to access device registers in different flows. Same PTT should not be used in flows that can run concurrently. To avoid running out of PTT resources, too many PTT should not be acquired without releasing them. Every PF has a global PTT, which is used throughout the life of PF, in most important flows for register access. Generic functions acquire the PTT locally and release after the use. This patch aligns the use of Global PTT and Local PTT accordingly. Signed-off-by: Rahul Verma Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed.h | 2 +- drivers/net/ethernet/qlogic/qed/qed_main.c | 22 ++++++++++++++---- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 27 ++++++++++------------ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 5 ++-- drivers/net/ethernet/qlogic/qed/qed_vf.c | 2 +- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 91003bc6f00b..6c4714a8b54c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -829,7 +829,7 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf); /* Prototypes */ int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info); -void qed_link_update(struct qed_hwfn *hwfn); +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 557332f1f886..52e747fd9c83 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1389,6 +1389,7 @@ static int qed_get_link_data(struct qed_hwfn *hwfn, } static void qed_fill_link(struct qed_hwfn *hwfn, + struct qed_ptt *ptt, struct qed_link_output *if_link) { struct qed_mcp_link_params params; @@ -1469,7 +1470,7 @@ static void qed_fill_link(struct qed_hwfn *hwfn, /* TODO - fill duplex properly */ if_link->duplex = DUPLEX_FULL; - qed_mcp_get_media_type(hwfn->cdev, &media_type); + qed_mcp_get_media_type(hwfn, ptt, &media_type); if_link->port = qed_get_port_type(media_type); if_link->autoneg = params.speed.autoneg; @@ -1525,21 +1526,34 @@ static void qed_fill_link(struct qed_hwfn *hwfn, static void qed_get_current_link(struct qed_dev *cdev, struct qed_link_output *if_link) { + struct qed_hwfn *hwfn; + struct qed_ptt *ptt; int i; - qed_fill_link(&cdev->hwfns[0], if_link); + hwfn = &cdev->hwfns[0]; + if (IS_PF(cdev)) { + ptt = qed_ptt_acquire(hwfn); + if (ptt) { + qed_fill_link(hwfn, ptt, if_link); + qed_ptt_release(hwfn, ptt); + } else { + DP_NOTICE(hwfn, "Failed to fill link; No PTT\n"); + } + } else { + qed_fill_link(hwfn, NULL, if_link); + } for_each_hwfn(cdev, i) qed_inform_vf_link_state(&cdev->hwfns[i]); } -void qed_link_update(struct qed_hwfn *hwfn) +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt) { void *cookie = hwfn->cdev->ops_cookie; struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common; struct qed_link_output if_link; - qed_fill_link(hwfn, &if_link); + qed_fill_link(hwfn, ptt, &if_link); qed_inform_vf_link_state(hwfn); if (IS_LEAD_HWFN(hwfn) && cookie) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 7938abe9a301..ef17ca09d303 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1352,7 +1352,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link); - qed_link_update(p_hwfn); + qed_link_update(p_hwfn, p_ptt); out: spin_unlock_bh(&p_hwfn->mcp_info->link_lock); } @@ -1722,12 +1722,10 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_media_type) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[0]; - struct qed_ptt *p_ptt; - - if (IS_VF(cdev)) + if (IS_VF(p_hwfn->cdev)) return -EINVAL; if (!qed_mcp_is_init(p_hwfn)) { @@ -1735,16 +1733,15 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) return -EBUSY; } - *p_media_type = MEDIA_UNSPECIFIED; + if (!p_ptt) { + *p_media_type = MEDIA_UNSPECIFIED; + return -EINVAL; + } - p_ptt = qed_ptt_acquire(p_hwfn); - if (!p_ptt) - return -EBUSY; - - *p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, media_type)); - - qed_ptt_release(p_hwfn, p_ptt); + *p_media_type = qed_rd(p_hwfn, p_ptt, + p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, + media_type)); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index f1fe5e3427ea..8fcdb2c3e5db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -284,14 +284,15 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, * @brief Get media type value of the port. * * @param cdev - qed dev pointer + * @param p_ptt * @param mfw_ver - media type value * * @return int - * 0 - Operation was successul. * -EBUSY - Operation failed */ -int qed_mcp_get_media_type(struct qed_dev *cdev, - u32 *media_type); +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *media_type); /** * @brief General function for sending commands to the MCP diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 3220086f99de..a2a9921b467b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1669,7 +1669,7 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn) ops->ports_update(cookie, vxlan_port, geneve_port); /* Always update link configuration according to bulletin */ - qed_link_update(hwfn); + qed_link_update(hwfn, NULL); } void qed_iov_vf_task(struct work_struct *work) From d30bb1106a1d7f3edda7cc464e8cdf8fef10dba1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 11:12:23 +0200 Subject: [PATCH 0980/3715] scsi: ips: fix missing break in switch [ Upstream commit 5d25ff7a544889bc4b749fda31778d6a18dddbcb ] Add missing break statement in order to prevent the code from falling through to case TEST_UNIT_READY. Addresses-Coverity-ID: 1357338 ("Missing break in switch") Suggested-by: Martin K. Petersen Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ips.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 67621308eb9c..ea652f1e2071 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3497,6 +3497,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) case START_STOP: scb->scsi_cmd->result = DID_OK << 16; + break; case TEST_UNIT_READY: case INQUIRY: From 9f0b41be6aff47c24c6431bdc76f86b9cd647a0d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Sep 2018 09:23:42 -0700 Subject: [PATCH 0981/3715] KVM: nVMX: reset cache/shadows when switching loaded VMCS [ Upstream commit b7031fd40fcc741b0f9b0c04c8d844e445858b84 ] Reset the vm_{entry,exit}_controls_shadow variables as well as the segment cache after loading a new VMCS in vmx_switch_vmcs(). The shadows/cache track VMCS data, i.e. they're stale every time we switch to a new VMCS regardless of reason. This fixes a bug where stale control shadows would be consumed after a nested VMExit due to a failed consistency check. Suggested-by: Jim Mattson Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab6384efc791..1c4e5eb8be83 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10000,6 +10000,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; put_cpu(); + + vm_entry_controls_reset_shadow(vmx); + vm_exit_controls_reset_shadow(vmx); + vmx_segment_cache_clear(vmx); } /* @@ -11428,7 +11432,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); - vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { leave_guest_mode(vcpu); @@ -12172,9 +12175,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, } vmx_switch_vmcs(vcpu, &vmx->vmcs01); - vm_entry_controls_reset_shadow(vmx); - vm_exit_controls_reset_shadow(vmx); - vmx_segment_cache_clear(vmx); /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); From 8bf3bb3b979cfc4d8f4c71cb0a05ac9fcfed4dd8 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 11 Oct 2018 19:40:43 +0200 Subject: [PATCH 0982/3715] KVM/x86: Fix invvpid and invept register operand size in 64-bit mode [ Upstream commit 5ebb272b2ea7e02911a03a893f8d922d49f9bb4a ] Register operand size of invvpid and invept instruction in 64-bit mode has always 64 bits. Adjust inline function argument type to reflect correct size. Signed-off-by: Uros Bizjak Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1c4e5eb8be83..f67fc0f359ff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1602,7 +1602,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) return -1; } -static inline void __invvpid(int ext, u16 vpid, gva_t gva) +static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) { struct { u64 vpid : 16; @@ -1616,7 +1616,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } -static inline void __invept(int ext, u64 eptp, gpa_t gpa) +static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) { struct { u64 eptp, gpa; From 01b52faa122e8f978449912076af4a9e5ee4d15a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 26 Sep 2018 17:11:50 -0700 Subject: [PATCH 0983/3715] scsi: isci: Use proper enumerated type in atapi_d2h_reg_frame_handler [ Upstream commit e9e9a103528c7e199ead6e5374c9c52cf16b5802 ] Clang warns when one enumerated type is implicitly converted to another. drivers/scsi/isci/request.c:1629:13: warning: implicit conversion from enumeration type 'enum sci_io_status' to different enumeration type 'enum sci_status' [-Wenum-conversion] status = SCI_IO_FAILURE_RESPONSE_VALID; ~ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/isci/request.c:1631:12: warning: implicit conversion from enumeration type 'enum sci_io_status' to different enumeration type 'enum sci_status' [-Wenum-conversion] status = SCI_IO_FAILURE_RESPONSE_VALID; ~ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ status is of type sci_status but SCI_IO_FAILURE_RESPONSE_VALID is of type sci_io_status. Use SCI_FAILURE_IO_RESPONSE_VALID, which is from sci_status and has SCI_IO_FAILURE_RESPONSE_VALID's exact value since that is what SCI_IO_FAILURE_RESPONSE_VALID is mapped to in the isci.h file. Link: https://github.com/ClangBuiltLinux/linux/issues/153 Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/isci/request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index ed197bc8e801..2f151708b59a 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -1626,9 +1626,9 @@ static enum sci_status atapi_d2h_reg_frame_handler(struct isci_request *ireq, if (status == SCI_SUCCESS) { if (ireq->stp.rsp.status & ATA_ERR) - status = SCI_IO_FAILURE_RESPONSE_VALID; + status = SCI_FAILURE_IO_RESPONSE_VALID; } else { - status = SCI_IO_FAILURE_RESPONSE_VALID; + status = SCI_FAILURE_IO_RESPONSE_VALID; } if (status != SCI_SUCCESS) { From c55e02c6f3ae2bfcd0b21549618a392558582b5d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 26 Sep 2018 17:12:00 -0700 Subject: [PATCH 0984/3715] scsi: isci: Change sci_controller_start_task's return type to sci_status [ Upstream commit 362b5da3dfceada6e74ecdd7af3991bbe42c0c0f ] Clang warns when an enumerated type is implicitly converted to another. drivers/scsi/isci/request.c:3476:13: warning: implicit conversion from enumeration type 'enum sci_task_status' to different enumeration type 'enum sci_status' [-Wenum-conversion] status = sci_controller_start_task(ihost, ~ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/isci/host.c:2744:10: warning: implicit conversion from enumeration type 'enum sci_status' to different enumeration type 'enum sci_task_status' [-Wenum-conversion] return SCI_SUCCESS; ~~~~~~ ^~~~~~~~~~~ drivers/scsi/isci/host.c:2753:9: warning: implicit conversion from enumeration type 'enum sci_status' to different enumeration type 'enum sci_task_status' [-Wenum-conversion] return status; ~~~~~~ ^~~~~~ Avoid all of these implicit conversion by just making sci_controller_start_task use sci_status. This silences Clang and has no functional change since sci_task_status has all of its values mapped to something in sci_status. Link: https://github.com/ClangBuiltLinux/linux/issues/153 Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/isci/host.c | 8 ++++---- drivers/scsi/isci/host.h | 2 +- drivers/scsi/isci/task.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 609dafd661d1..da4583a2fa23 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -2717,9 +2717,9 @@ enum sci_status sci_controller_continue_io(struct isci_request *ireq) * the task management request. * @task_request: the handle to the task request object to start. */ -enum sci_task_status sci_controller_start_task(struct isci_host *ihost, - struct isci_remote_device *idev, - struct isci_request *ireq) +enum sci_status sci_controller_start_task(struct isci_host *ihost, + struct isci_remote_device *idev, + struct isci_request *ireq) { enum sci_status status; @@ -2728,7 +2728,7 @@ enum sci_task_status sci_controller_start_task(struct isci_host *ihost, "%s: SCIC Controller starting task from invalid " "state\n", __func__); - return SCI_TASK_FAILURE_INVALID_STATE; + return SCI_FAILURE_INVALID_STATE; } status = sci_remote_device_start_task(ihost, idev, ireq); diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index b3539928073c..6bc3f022630a 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -489,7 +489,7 @@ enum sci_status sci_controller_start_io( struct isci_remote_device *idev, struct isci_request *ireq); -enum sci_task_status sci_controller_start_task( +enum sci_status sci_controller_start_task( struct isci_host *ihost, struct isci_remote_device *idev, struct isci_request *ireq); diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 6dcaed0c1fc8..fb6eba331ac6 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -258,7 +258,7 @@ static int isci_task_execute_tmf(struct isci_host *ihost, struct isci_tmf *tmf, unsigned long timeout_ms) { DECLARE_COMPLETION_ONSTACK(completion); - enum sci_task_status status = SCI_TASK_FAILURE; + enum sci_status status = SCI_FAILURE; struct isci_request *ireq; int ret = TMF_RESP_FUNC_FAILED; unsigned long flags; @@ -301,7 +301,7 @@ static int isci_task_execute_tmf(struct isci_host *ihost, /* start the TMF io. */ status = sci_controller_start_task(ihost, idev, ireq); - if (status != SCI_TASK_SUCCESS) { + if (status != SCI_SUCCESS) { dev_dbg(&ihost->pdev->dev, "%s: start_io failed - status = 0x%x, request = %p\n", __func__, From 118da72f0040313ab4b82e4ec1f77121238d0343 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 3 Oct 2018 18:06:15 -0700 Subject: [PATCH 0985/3715] scsi: iscsi_tcp: Explicitly cast param in iscsi_sw_tcp_host_get_param [ Upstream commit 20054597f169090109fc3f0dfa1a48583f4178a4 ] Clang warns when one enumerated type is implicitly converted to another. drivers/scsi/iscsi_tcp.c:803:15: warning: implicit conversion from enumeration type 'enum iscsi_host_param' to different enumeration type 'enum iscsi_param' [-Wenum-conversion] &addr, param, buf); ^~~~~ 1 warning generated. iscsi_conn_get_addr_param handles ISCSI_HOST_PARAM_IPADDRESS just fine so add an explicit cast to iscsi_param to make it clear to Clang that this is expected behavior. Link: https://github.com/ClangBuiltLinux/linux/issues/153 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/iscsi_tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index e11eff6b0e97..045207b5560e 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -798,7 +798,8 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, return rc; return iscsi_conn_get_addr_param((struct sockaddr_storage *) - &addr, param, buf); + &addr, + (enum iscsi_param)param, buf); default: return iscsi_host_get_param(shost, param, buf); } From 26be807bd13918af7ef760e3deaec713e087524c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 10 Oct 2018 14:40:07 -0700 Subject: [PATCH 0986/3715] crypto: ccree - avoid implicit enum conversion [ Upstream commit 18e732b8035d175181aae2ded127994cb01694f7 ] Clang warns when one enumerated type is implicitly converted to another and this happens in several locations in this driver, ultimately related to the set_cipher_{mode,config0} functions. set_cipher_mode expects a mode of type drv_cipher_mode and set_cipher_config0 expects a mode of type drv_crypto_direction. drivers/crypto/ccree/cc_ivgen.c:58:35: warning: implicit conversion from enumeration type 'enum cc_desc_direction' to different enumeration type 'enum drv_crypto_direction' [-Wenum-conversion] set_cipher_config0(&iv_seq[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT); drivers/crypto/ccree/cc_hash.c:99:28: warning: implicit conversion from enumeration type 'enum cc_hash_conf_pad' to different enumeration type 'enum drv_crypto_direction' [-Wenum-conversion] set_cipher_config0(desc, HASH_DIGEST_RESULT_LITTLE_ENDIAN); drivers/crypto/ccree/cc_aead.c:1643:30: warning: implicit conversion from enumeration type 'enum drv_hash_hw_mode' to different enumeration type 'enum drv_cipher_mode' [-Wenum-conversion] set_cipher_mode(&desc[idx], DRV_HASH_HW_GHASH); Since this fundamentally isn't a problem because these values just represent simple integers for a shift operation, make it clear to Clang that this is okay by making the mode parameter in both functions an int. Link: https://github.com/ClangBuiltLinux/linux/issues/46 Signed-off-by: Nathan Chancellor Acked-by: Gilad Ben-Yossef Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/staging/ccree/cc_hw_queue_defs.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/ccree/cc_hw_queue_defs.h b/drivers/staging/ccree/cc_hw_queue_defs.h index 2ae0f655e7a0..b86f47712e30 100644 --- a/drivers/staging/ccree/cc_hw_queue_defs.h +++ b/drivers/staging/ccree/cc_hw_queue_defs.h @@ -467,8 +467,7 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc, * @pdesc: pointer HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ -static inline void set_cipher_mode(struct cc_hw_desc *pdesc, - enum drv_cipher_mode mode) +static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } @@ -479,8 +478,7 @@ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, * @pdesc: pointer HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ -static inline void set_cipher_config0(struct cc_hw_desc *pdesc, - enum drv_crypto_direction mode) +static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode); } From 2f19721faf274aac3b073bf4fd810079dc38e0e1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Oct 2018 08:08:20 -0700 Subject: [PATCH 0987/3715] nvmet-fcloop: suppress a compiler warning [ Upstream commit 1216e9ef18b84f4fb5934792368fb01eb3540520 ] Building with W=1 enables the compiler warning -Wimplicit-fallthrough=3. That option does not recognize the fall-through comment in the fcloop driver. Add a fall-through comment that is recognized for -Wimplicit-fallthrough=3. This patch avoids that the compiler reports the following warning when building with W=1: drivers/nvme/target/fcloop.c:647:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (op == NVMET_FCOP_READDATA) ^ Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/fcloop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 0b0a4825b3eb..096523d8dd42 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -535,6 +535,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { From aa3c631e49d65fe4ce8d3078e752b18c52df0c44 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 10 Sep 2018 14:01:44 +0200 Subject: [PATCH 0988/3715] clk: mmp2: fix the clock id for sdh2_clk and sdh3_clk [ Upstream commit 4917fb90eec7c26dac1497ada3bd4a325f670fcc ] A typo that makes it impossible to get the correct clocks for MMP2_CLK_SDH2 and MMP2_CLK_SDH3. Signed-off-by: Lubomir Rintel Fixes: 1ec770d92a62 ("clk: mmp: add mmp2 DT support for clock driver") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mmp/clk-of-mmp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index 0fc75c395957..d083b860f083 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -227,8 +227,8 @@ static struct mmp_param_gate_clk apmu_gate_clks[] = { /* The gate clocks has mux parent. */ {MMP2_CLK_SDH0, "sdh0_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH0, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, {MMP2_CLK_SDH1, "sdh1_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH1, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, - {MMP2_CLK_SDH1, "sdh2_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH2, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, - {MMP2_CLK_SDH1, "sdh3_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH3, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, + {MMP2_CLK_SDH2, "sdh2_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH2, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, + {MMP2_CLK_SDH3, "sdh3_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH3, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, {MMP2_CLK_DISP0, "disp0_clk", "disp0_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1b, 0x1b, 0x0, 0, &disp0_lock}, {MMP2_CLK_DISP0_SPHY, "disp0_sphy_clk", "disp0_sphy_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1024, 0x1024, 0x0, 0, &disp0_lock}, {MMP2_CLK_DISP1, "disp1_clk", "disp1_div", CLK_SET_RATE_PARENT, APMU_DISP1, 0x1b, 0x1b, 0x0, 0, &disp1_lock}, From e5e9bb8127a353baac80790ae8f00f06b9d78fa2 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 16 Oct 2018 16:21:39 +0200 Subject: [PATCH 0989/3715] clk: at91: audio-pll: fix audio pmc type [ Upstream commit 7fa75007b7d7421aea59ff2b12ab1bd65a5abfa6 ] The allocation for the audio pmc is using the size of struct clk_audio_pad instead of struct clk_audio_pmc. This works fine because the former is larger than the latter but it is safer to be correct. Fixes: ("0865805d82d4 clk: at91: add audio pll clock drivers") Signed-off-by: Alexandre Belloni Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/at91/clk-audio-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-audio-pll.c b/drivers/clk/at91/clk-audio-pll.c index da7bafcfbe70..b3eaf654fac9 100644 --- a/drivers/clk/at91/clk-audio-pll.c +++ b/drivers/clk/at91/clk-audio-pll.c @@ -509,7 +509,7 @@ static void __init of_sama5d2_clk_audio_pll_pad_setup(struct device_node *np) static void __init of_sama5d2_clk_audio_pll_pmc_setup(struct device_node *np) { - struct clk_audio_pad *apmc_ck; + struct clk_audio_pmc *apmc_ck; struct clk_init_data init = {}; apmc_ck = kzalloc(sizeof(*apmc_ck), GFP_KERNEL); From 92020724dd6a5bd902364c319fd42aad079fc40a Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Tue, 16 Oct 2018 12:47:29 +0200 Subject: [PATCH 0990/3715] ASoC: tegra_sgtl5000: fix device_node refcounting [ Upstream commit a85227da2dcc291b762c8482a505bc7d0d2d4b07 ] Similar to the following: commit 4321723648b0 ("ASoC: tegra_alc5632: fix device_node refcounting") commit 7c5dfd549617 ("ASoC: tegra: fix device_node refcounting") Signed-off-by: Marcel Ziswiler Acked-by: Jon Hunter Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/tegra/tegra_sgtl5000.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra_sgtl5000.c b/sound/soc/tegra/tegra_sgtl5000.c index 45a4aa9d2a47..901457da25ec 100644 --- a/sound/soc/tegra/tegra_sgtl5000.c +++ b/sound/soc/tegra/tegra_sgtl5000.c @@ -149,14 +149,14 @@ static int tegra_sgtl5000_driver_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Property 'nvidia,i2s-controller' missing/invalid\n"); ret = -EINVAL; - goto err; + goto err_put_codec_of_node; } tegra_sgtl5000_dai.platform_of_node = tegra_sgtl5000_dai.cpu_of_node; ret = tegra_asoc_utils_init(&machine->util_data, &pdev->dev); if (ret) - goto err; + goto err_put_cpu_of_node; ret = snd_soc_register_card(card); if (ret) { @@ -169,6 +169,13 @@ static int tegra_sgtl5000_driver_probe(struct platform_device *pdev) err_fini_utils: tegra_asoc_utils_fini(&machine->util_data); +err_put_cpu_of_node: + of_node_put(tegra_sgtl5000_dai.cpu_of_node); + tegra_sgtl5000_dai.cpu_of_node = NULL; + tegra_sgtl5000_dai.platform_of_node = NULL; +err_put_codec_of_node: + of_node_put(tegra_sgtl5000_dai.codec_of_node); + tegra_sgtl5000_dai.codec_of_node = NULL; err: return ret; } @@ -183,6 +190,12 @@ static int tegra_sgtl5000_driver_remove(struct platform_device *pdev) tegra_asoc_utils_fini(&machine->util_data); + of_node_put(tegra_sgtl5000_dai.cpu_of_node); + tegra_sgtl5000_dai.cpu_of_node = NULL; + tegra_sgtl5000_dai.platform_of_node = NULL; + of_node_put(tegra_sgtl5000_dai.codec_of_node); + tegra_sgtl5000_dai.codec_of_node = NULL; + return ret; } From 9d11d6cb2511aba2c616db2fcb8083ba2d7a49df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Oct 2018 16:17:14 +0200 Subject: [PATCH 0991/3715] scsi: dc395x: fix dma API usage in srb_done [ Upstream commit 3a5bd7021184dec2946f2a4d7a8943f8a5713e52 ] We can't just transfer ownership to the CPU and then unmap, as this will break with swiotlb. Instead unmap the command and sense buffer a little earlier in the I/O completion handler and get rid of the pci_dma_sync_sg_for_cpu call entirely. Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/dc395x.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 5ee7f44cf869..9da0ac360848 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3450,14 +3450,12 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, } } - if (dir != PCI_DMA_NONE && scsi_sg_count(cmd)) - pci_dma_sync_sg_for_cpu(acb->dev, scsi_sglist(cmd), - scsi_sg_count(cmd), dir); - ckc_only = 0; /* Check Error Conditions */ ckc_e: + pci_unmap_srb(acb, srb); + if (cmd->cmnd[0] == INQUIRY) { unsigned char *base = NULL; struct ScsiInqData *ptr; @@ -3511,7 +3509,6 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, cmd, cmd->result); srb_free_insert(acb, srb); } - pci_unmap_srb(acb, srb); cmd->scsi_done(cmd); waiting_process_next(acb); From e2702ea63bd5134a48cc4846f93050ac969039b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Oct 2018 16:17:15 +0200 Subject: [PATCH 0992/3715] scsi: dc395x: fix DMA API usage in sg_update_list [ Upstream commit 6c404a68bf83b4135a8a9aa1c388ebdf98e8ba7f ] We need to transfer device ownership to the CPU before we can manipulate the mapped data. Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/dc395x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 9da0ac360848..830b2d2dcf20 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -1972,6 +1972,11 @@ static void sg_update_list(struct ScsiReqBlk *srb, u32 left) xferred -= psge->length; } else { /* Partial SG entry done */ + pci_dma_sync_single_for_cpu(srb->dcb-> + acb->dev, + srb->sg_bus_addr, + SEGMENTX_LEN, + PCI_DMA_TODEVICE); psge->length -= xferred; psge->address += xferred; srb->sg_index = idx; From ef36d52351c96506328043423655cf674173d8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sat, 13 Oct 2018 14:40:31 +0200 Subject: [PATCH 0993/3715] net: dsa: mv88e6xxx: Fix 88E6141/6341 2500mbps SERDES speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 26422340da467538cd65eaa9c65538039ee99c8c ] This is a fix for the port_set_speed method for the Topaz family. Currently the same method is used as for the Peridot family, but this is wrong for the SERDES port. On Topaz, the SERDES port is port 5, not 9 and 10 as in Peridot. Moreover setting alt_bit on Topaz only makes sense for port 0 (for (differentiating 100mbps vs 200mbps). The SERDES port does not support more than 2500mbps, so alt_bit does not make any difference. Signed-off-by: Marek Behún Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- drivers/net/dsa/mv88e6xxx/port.c | 25 +++++++++++++++++++++++-- drivers/net/dsa/mv88e6xxx/port.h | 1 + 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0fff1502267a..be17194487c6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2527,7 +2527,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3029,7 +3029,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 2cffecfe86e3..fd0a88c56031 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -203,8 +203,11 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000; break; case 2500: - ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | - MV88E6390_PORT_MAC_CTL_ALTSPEED; + if (alt_bit) + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | + MV88E6390_PORT_MAC_CTL_ALTSPEED; + else + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000; break; case 10000: /* all bits set, fall through... */ @@ -266,6 +269,24 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) return mv88e6xxx_port_set_speed(chip, port, speed, false, false); } +/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */ +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 5 ? 1000 : 2500; + + if (speed > 2500) + return -EOPNOTSUPP; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed == 2500 && port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, !port, true); +} + /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) { diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index ccdc67fe9079..8a645683cf6b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -262,6 +262,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); From 1a16d5c6ea96088d9e70443f07d93dd965306755 Mon Sep 17 00:00:00 2001 From: Kyeongdon Kim Date: Tue, 16 Oct 2018 14:57:26 +0900 Subject: [PATCH 0994/3715] net: fix warning in af_unix [ Upstream commit 33c4368ee2589c165aebd8d388cbd91e9adb9688 ] This fixes the "'hash' may be used uninitialized in this function" net/unix/af_unix.c:1041:20: warning: 'hash' may be used uninitialized in this function [-Wmaybe-uninitialized] addr->hash = hash ^ sk->sk_type; Signed-off-by: Kyeongdon Kim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4de9dfd14d09..99f581a61cfa 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr) static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { + *hashp = 0; + if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) From 801997aefee57addbdfbe4dfb00b0a1c62e864b0 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 17 Oct 2018 10:04:21 +0000 Subject: [PATCH 0995/3715] net: ena: Fix Kconfig dependency on X86 [ Upstream commit 8c590f9776386b8f697fd0b7ed6142ae6e3de79e ] The Kconfig limitation of X86 is to too wide. The ENA driver only requires a little endian dependency. Change the dependency to be on little endian CPU. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig index 99b30353541a..9e87d7b8360f 100644 --- a/drivers/net/ethernet/amazon/Kconfig +++ b/drivers/net/ethernet/amazon/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON config ENA_ETHERNET tristate "Elastic Network Adapter (ENA) support" - depends on (PCI_MSI && X86) + depends on PCI_MSI && !CPU_BIG_ENDIAN ---help--- This driver supports Elastic Network Adapter (ENA)" From 5506ef8616385a1d1c571ab6c586405a762bedbb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 18 Oct 2018 17:21:29 +1100 Subject: [PATCH 0996/3715] xfs: fix use-after-free race in xfs_buf_rele [ Upstream commit 37fd1678245f7a5898c1b05128bc481fb403c290 ] When looking at a 4.18 based KASAN use after free report, I noticed that racing xfs_buf_rele() may race on dropping the last reference to the buffer and taking the buffer lock. This was the symptom displayed by the KASAN report, but the actual issue that was reported had already been fixed in 4.19-rc1 by commit e339dd8d8b04 ("xfs: use sync buffer I/O for sync delwri queue submission"). Despite this, I think there is still an issue with xfs_buf_rele() in this code: release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); spin_lock(&bp->b_lock); if (!release) { ..... If two threads race on the b_lock after both dropping a reference and one getting dropping the last reference so release = true, we end up with: CPU 0 CPU 1 atomic_dec_and_lock() atomic_dec_and_lock() spin_lock(&bp->b_lock) spin_lock(&bp->b_lock) b_lru_ref = 0> freebuf = true spin_unlock(&bp->b_lock) xfs_buf_free(bp) spin_unlock(&bp->b_lock) IOWs, we can't safely take bp->b_lock after dropping the hold reference because the buffer may go away at any time after we drop that reference. However, this can be fixed simply by taking the bp->b_lock before we drop the reference. It is safe to nest the pag_buf_lock inside bp->b_lock as the pag_buf_lock is only used to serialise against lookup in xfs_buf_find() and no other locks are held over or under the pag_buf_lock there. Make this clear by documenting the buffer lock orders at the top of the file. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Carlos Maiolino Signed-off-by: Sasha Levin --- fs/xfs/xfs_buf.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e4a623956df5..e5970ecdfd58 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -58,6 +58,32 @@ static kmem_zone_t *xfs_buf_zone; #define xb_to_gfp(flags) \ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) +/* + * Locking orders + * + * xfs_buf_ioacct_inc: + * xfs_buf_ioacct_dec: + * b_sema (caller holds) + * b_lock + * + * xfs_buf_stale: + * b_sema (caller holds) + * b_lock + * lru_lock + * + * xfs_buf_rele: + * b_lock + * pag_buf_lock + * lru_lock + * + * xfs_buftarg_wait_rele + * lru_lock + * b_lock (trylock due to inversion) + * + * xfs_buftarg_isolate + * lru_lock + * b_lock (trylock due to inversion) + */ static inline int xfs_buf_is_vmapped( @@ -983,8 +1009,18 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); - release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); + /* + * We grab the b_lock here first to serialise racing xfs_buf_rele() + * calls. The pag_buf_lock being taken on the last reference only + * serialises against racing lookups in xfs_buf_find(). IOWs, the second + * to last reference we drop here is not serialised against the last + * reference until we take bp->b_lock. Hence if we don't grab b_lock + * first, the last "release" reference can win the race to the lock and + * free the buffer before the second-to-last reference is processed, + * leading to a use-after-free scenario. + */ spin_lock(&bp->b_lock); + release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); if (!release) { /* * Drop the in-flight state if the buffer is already on the LRU From 830f3b829f5a2edfef1050a3eb6068b31241813a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 17 Oct 2018 16:59:51 -0400 Subject: [PATCH 0997/3715] kprobes, x86/ptrace.h: Make regs_get_kernel_stack_nth() not fault on bad stack [ Upstream commit c2712b858187f5bcd7b042fe4daa3ba3a12635c0 ] Andy had some concerns about using regs_get_kernel_stack_nth() in a new function regs_get_kernel_argument() as if there's any error in the stack code, it could cause a bad memory access. To be on the safe side, call probe_kernel_read() on the stack address to be extra careful in accessing the memory. A helper function, regs_get_kernel_stack_nth_addr(), was added to just return the stack address (or NULL if not on the stack), that will be used to find the address (and could be used by other functions) and read the address with kernel_probe_read(). Requested-by: Andy Lutomirski Signed-off-by: Steven Rostedt (VMware) Reviewed-by: Joel Fernandes (Google) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20181017165951.09119177@gandalf.local.home Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/include/asm/ptrace.h | 42 +++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14131dd06b29..8603d127f73c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -231,24 +231,52 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); } +/** + * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns the address of the @n th entry of the + * kernel stack which is specified by @regs. If the @n th entry is NOT in + * the kernel stack, this returns NULL. + */ +static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return addr; + else + return NULL; +} + +/* To avoid include hell, we can't include uaccess.h */ +extern long probe_kernel_read(void *dst, const void *src, size_t size); + /** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); - addr += n; - if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; - else - return 0; + unsigned long *addr; + unsigned long val; + long ret; + + addr = regs_get_kernel_stack_nth_addr(regs, n); + if (addr) { + ret = probe_kernel_read(&val, addr, sizeof(val)); + if (!ret) + return val; + } + return 0; } #define arch_has_single_step() (1) From 7d704466f6533232e7ab2cfa37119f4b281f5439 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Oct 2018 16:38:15 +0200 Subject: [PATCH 0998/3715] PM / Domains: Deal with multiple states but no governor in genpd [ Upstream commit 2c9b7f8772033cc8bafbd4eefe2ca605bf3eb094 ] A caller of pm_genpd_init() that provides some states for the genpd via the ->states pointer in the struct generic_pm_domain, should also provide a governor. This because it's the job of the governor to pick a state that satisfies the constraints. Therefore, let's print a warning to inform the user about such bogus configuration and avoid to bail out, by instead picking the shallowest state before genpd invokes the ->power_off() callback. Signed-off-by: Ulf Hansson Reviewed-by: Lina Iyer Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/domain.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c276ba1c0a19..e811f2414889 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -369,6 +369,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, return -EAGAIN; } + /* Default to shallowest state. */ + if (!genpd->gov) + genpd->state_idx = 0; + if (genpd->power_off) { int ret; @@ -1598,6 +1602,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, ret = genpd_set_default_power_state(genpd); if (ret) return ret; + } else if (!gov) { + pr_warn("%s : no governor for states\n", genpd->name); } mutex_lock(&gpd_list_lock); From e7d723976099baad7eb092ded45ca5dbf3105a10 Mon Sep 17 00:00:00 2001 From: Philipp Klocke Date: Thu, 18 Oct 2018 12:33:02 +0200 Subject: [PATCH 0999/3715] ALSA: i2c/cs8427: Fix int to char conversion [ Upstream commit eb7ebfa3c1989aa8e59d5e68ab3cddd7df1bfb27 ] Compiling with clang yields the following warning: sound/i2c/cs8427.c:140:31: warning: implicit conversion from 'int' to 'char' changes value from 160 to -96 [-Wconstant-conversion] data[0] = CS8427_REG_AUTOINC | CS8427_REG_CORU_DATABUF; ~ ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ Because CS8427_REG_AUTOINC is defined as 128, it is too big for a char field. So change data from char to unsigned char, that it can hold the value. This patch does not change the generated code. Signed-off-by: Philipp Klocke Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/i2c/cs8427.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c index 7e21621e492a..7fd1b4000883 100644 --- a/sound/i2c/cs8427.c +++ b/sound/i2c/cs8427.c @@ -118,7 +118,7 @@ static int snd_cs8427_send_corudata(struct snd_i2c_device *device, struct cs8427 *chip = device->private_data; char *hw_data = udata ? chip->playback.hw_udata : chip->playback.hw_status; - char data[32]; + unsigned char data[32]; int err, idx; if (!memcmp(hw_data, ndata, count)) From ee10dcb0d52146cfe2e8d1354e9bca723ebb3ec6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 15 Oct 2018 11:18:49 +1100 Subject: [PATCH 1000/3715] macintosh/windfarm_smu_sat: Fix debug output [ Upstream commit fc0c8b36d379a046525eacb9c3323ca635283757 ] There's some antiquated debug output that's trying to do a hand-made hexdump and turning into horrible 1-byte-per-line output these days. Use print_hex_dump() instead Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- drivers/macintosh/windfarm_smu_sat.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index da7f4fc1a51d..a0f61eb853c5 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -22,14 +22,6 @@ #define VERSION "1.0" -#define DEBUG - -#ifdef DEBUG -#define DBG(args...) printk(args) -#else -#define DBG(args...) do { } while(0) -#endif - /* If the cache is older than 800ms we'll refetch it */ #define MAX_AGE msecs_to_jiffies(800) @@ -106,13 +98,10 @@ struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id, int id, buf[i+2] = data[3]; buf[i+3] = data[2]; } -#ifdef DEBUG - DBG(KERN_DEBUG "sat %d partition %x:", sat_id, id); - for (i = 0; i < len; ++i) - DBG(" %x", buf[i]); - DBG("\n"); -#endif + printk(KERN_DEBUG "sat %d partition %x:", sat_id, id); + print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); if (size) *size = len; return (struct smu_sdbp_header *) buf; @@ -132,13 +121,13 @@ static int wf_sat_read_cache(struct wf_sat *sat) if (err < 0) return err; sat->last_read = jiffies; + #ifdef LOTSA_DEBUG { int i; - DBG(KERN_DEBUG "wf_sat_get: data is"); - for (i = 0; i < 16; ++i) - DBG(" %.2x", sat->cache[i]); - DBG("\n"); + printk(KERN_DEBUG "wf_sat_get: data is"); + print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET, + 16, 1, sat->cache, 16, false); } #endif return 0; From 8f8f0fb67fce47cb60c743371aa436dff5fedffa Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 15 Oct 2018 18:48:07 -0600 Subject: [PATCH 1001/3715] PCI: vmd: Detach resources after stopping root bus [ Upstream commit dc8af3a827df6d4bb925d3b81b7ec94a7cce9482 ] The VMD removal path calls pci_stop_root_busi(), which tears down the pcie tree, including detaching all of the attached drivers. During driver detachment, devices may use pci_release_region() to release resources. This path relies on the resource being accessible in resource tree. By detaching the child domain from the parent resource domain prior to stopping the bus, we are preventing the list traversal from finding the resource to be freed. If we instead detach the resource after stopping the bus, we will have properly freed the resource and detaching is simply accounting at that point. Without this order, the resource is never freed and is orphaned on VMD removal, leading to a warning: [ 181.940162] Trying to free nonexistent resource Fixes: 2c2c5c5cd213 ("x86/PCI: VMD: Attach VMD resources to parent domain's resource tree") Signed-off-by: Jon Derrick [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/pci/host/vmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 2537b022f42d..af6d5da10ea5 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -753,12 +753,12 @@ static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); - vmd_detach_resources(vmd); sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); pci_remove_root_bus(vmd->bus); vmd_cleanup_srcu(vmd); vmd_teardown_dma_ops(vmd); + vmd_detach_resources(vmd); irq_domain_remove(vmd->irq_domain); } From 1827fafe4887a41246fef1e3876e8a988968d2e9 Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Tue, 16 Oct 2018 14:20:08 +0200 Subject: [PATCH 1002/3715] USB: misc: appledisplay: fix backlight update_status return code [ Upstream commit 090158555ff8d194a98616034100b16697dd80d0 ] Upon success the update_status handler returns a positive number corresponding to the number of bytes transferred by usb_control_msg. However the return code of the update_status handler should indicate if an error occurred(negative) or how many bytes of the user's input to sysfs that was consumed. Return code zero indicates all bytes were consumed. The bug can for example result in the update_status handler being called twice, the second time with only the "unconsumed" part of the user's input to sysfs. Effectively setting an incorrect brightness. Change the update_status handler to return zero for all successful transactions and forward usb_control_msg's error code upon failure. Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/appledisplay.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 03be7c75c5be..3b59eaf81eef 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -160,8 +160,11 @@ static int appledisplay_bl_update_status(struct backlight_device *bd) pdata->msgdata, 2, ACD_USB_TIMEOUT); mutex_unlock(&pdata->sysfslock); - - return retval; + + if (retval < 0) + return retval; + else + return 0; } static int appledisplay_bl_get_brightness(struct backlight_device *bd) From 02322e7a97f9e5a19400901b9e9c978b728129e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Oct 2018 19:03:43 +0100 Subject: [PATCH 1003/3715] usbip: tools: fix atoi() on non-null terminated string [ Upstream commit e325808c0051b16729ffd472ff887c6cae5c6317 ] Currently the call to atoi is being passed a single char string that is not null terminated, so there is a potential read overrun along the stack when parsing for an integer value. Fix this by instead using a 2 char string that is initialized to all zeros to ensure that a 1 char read into the string is always terminated with a \0. Detected by cppcheck: "Invalid atoi() argument nr 1. A nul-terminated string is required." Fixes: 3391ba0e2792 ("usbip: tools: Extract generic code to be shared with vudc backend") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- tools/usb/usbip/libsrc/usbip_host_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 6ff7b601f854..f5ad219a324e 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -43,7 +43,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) int size; int fd; int length; - char status; + char status[2] = { 0 }; int value = 0; size = snprintf(status_attr_path, sizeof(status_attr_path), @@ -61,14 +61,14 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) return -1; } - length = read(fd, &status, 1); + length = read(fd, status, 1); if (length < 0) { err("error reading attribute %s", status_attr_path); close(fd); return -1; } - value = atoi(&status); + value = atoi(status); return value; } From 6bc0bc3753d8d91615ad9065bb90c5b15f824d77 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Fri, 12 Oct 2018 20:24:25 +0200 Subject: [PATCH 1004/3715] dm raid: avoid bitmap with raid4/5/6 journal device [ Upstream commit d857ad75edf3c0066fcd920746f9dc75382b3324 ] With raid4/5/6, journal device and write intent bitmap are mutually exclusive. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 151211b4cb1b..2c5912e75514 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2441,7 +2441,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) } /* Enable bitmap creation for RAID levels != 0 */ - mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096); + mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096); mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; if (!test_and_clear_bit(FirstUse, &rdev->flags)) { From a06db4111844606a9b604a2a3374d9ac6a8e1496 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Oct 2018 17:03:56 -0400 Subject: [PATCH 1005/3715] SUNRPC: Fix a compile warning for cmpxchg64() [ Upstream commit e732f4485a150492b286f3efc06f9b34dd6b9995 ] Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/auth_gss/gss_krb5_seal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 1d74d653e6c0..ad0dcb69395d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -63,6 +63,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH From 100305625727f2681d74e70f2ec1cea559266345 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Oct 2018 15:27:02 -0400 Subject: [PATCH 1006/3715] sunrpc: safely reallow resvport min/max inversion [ Upstream commit 826799e66e8683e5698e140bb9ef69afc8c0014e ] Commits ffb6ca33b04b and e08ea3a96fc7 prevent setting xprt_min_resvport greater than xprt_max_resvport, but may also break simple code that sets one parameter then the other, if the new range does not overlap the old. Also it looks racy to me, unless there's some serialization I'm not seeing. Granted it would probably require malicious privileged processes (unless there's a chance these might eventually be settable in unprivileged containers), but still it seems better not to let userspace panic the kernel. Simpler seems to be to allow setting the parameters to whatever you want but interpret xprt_min_resvport > xprt_max_resvport as the empty range. Fixes: ffb6ca33b04b "sunrpc: Prevent resvport min/max inversion..." Fixes: e08ea3a96fc7 "sunrpc: Prevent rexvport min/max inversion..." Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/xprtsock.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a42871a59f3b..f75b5b7c1fc2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -127,7 +127,7 @@ static struct ctl_table xs_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport + .extra2 = &xprt_max_resvport_limit }, { .procname = "max_resvport", @@ -135,7 +135,7 @@ static struct ctl_table xs_tunables_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &xprt_min_resvport, + .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { @@ -1754,11 +1754,17 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } -static unsigned short xs_get_random_port(void) +static int xs_get_random_port(void) { - unsigned short range = xprt_max_resvport - xprt_min_resvport + 1; - unsigned short rand = (unsigned short) prandom_u32() % range; - return rand + xprt_min_resvport; + unsigned short min = xprt_min_resvport, max = xprt_max_resvport; + unsigned short range; + unsigned short rand; + + if (max < min) + return -EADDRINUSE; + range = max - min + 1; + rand = (unsigned short) prandom_u32() % range; + return rand + min; } /** @@ -1815,9 +1821,9 @@ static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) transport->srcport = xs_sock_getport(sock); } -static unsigned short xs_get_srcport(struct sock_xprt *transport) +static int xs_get_srcport(struct sock_xprt *transport) { - unsigned short port = transport->srcport; + int port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1838,7 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_storage myaddr; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport); + int port = xs_get_srcport(transport); unsigned short last; /* @@ -1856,8 +1862,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) * transport->xprt.resvport == 1) xs_get_srcport above will * ensure that port is non-zero and we will bind as needed. */ - if (port == 0) - return 0; + if (port <= 0) + return port; memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { @@ -3286,12 +3292,8 @@ static int param_set_uint_minmax(const char *val, static int param_set_portnr(const char *val, const struct kernel_param *kp) { - if (kp->arg == &xprt_min_resvport) - return param_set_uint_minmax(val, kp, - RPC_MIN_RESVPORT, - xprt_max_resvport); return param_set_uint_minmax(val, kp, - xprt_min_resvport, + RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } From a68ec8fc46c4dd9a8b123e75a7ab7bdf35cb5d65 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 11:04:19 -0700 Subject: [PATCH 1007/3715] atm: zatm: Fix empty body Clang warnings [ Upstream commit 64b9d16e2d02ca6e5dc8fcd30cfd52b0ecaaa8f4 ] Clang warns: drivers/atm/zatm.c:513:7: error: while loop has empty body [-Werror,-Wempty-body] zwait; ^ drivers/atm/zatm.c:513:7: note: put the semicolon on a separate line to silence this warning Get rid of this warning by using an empty do-while loop. While we're at it, add parentheses to make it clear that this is a function-like macro. Link: https://github.com/ClangBuiltLinux/linux/issues/42 Suggested-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/zatm.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2c288d1f42bb..817c7edfec0b 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -126,7 +126,7 @@ static unsigned long dummy[2] = {0,0}; #define zin_n(r) inl(zatm_dev->base+r*4) #define zin(r) inl(zatm_dev->base+uPD98401_##r*4) #define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4) -#define zwait while (zin(CMR) & uPD98401_BUSY) +#define zwait() do {} while (zin(CMR) & uPD98401_BUSY) /* RX0, RX1, TX0, TX1 */ static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 }; @@ -140,7 +140,7 @@ static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) { - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -149,10 +149,10 @@ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr) { - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER); } @@ -241,7 +241,7 @@ static void refill_pool(struct atm_dev *dev,int pool) } if (first) { spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(virt_to_bus(first),CER); zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count, CMR); @@ -508,9 +508,9 @@ static int open_rx_first(struct atm_vcc *vcc) } if (zatm_vcc->pool < 0) return -EMSGSIZE; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -571,21 +571,21 @@ static void close_rx(struct atm_vcc *vcc) pos = vcc->vci >> 1; shift = (1-(vcc->vci & 1)) << 4; zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos); - zwait; + zwait(); zout(uPD98401_NOP,CMR); - zwait; + zwait(); zout(uPD98401_NOP,CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); } spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); udelay(10); /* why oh why ... ? */ zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel " "%d\n",vcc->dev->number,zatm_vcc->rx_chan); @@ -699,7 +699,7 @@ printk("NONONONOO!!!!\n"); skb_queue_tail(&zatm_vcc->tx_queue,skb); DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+ uPD98401_TXVC_QRP)); - zwait; + zwait(); zout(uPD98401_TX_READY | (zatm_vcc->tx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -891,12 +891,12 @@ static void close_tx(struct atm_vcc *vcc) } spin_lock_irqsave(&zatm_dev->lock, flags); #if 0 - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); #endif - zwait; + zwait(); zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel " "%d\n",vcc->dev->number,chan); @@ -926,9 +926,9 @@ static int open_tx_first(struct atm_vcc *vcc) zatm_vcc->tx_chan = 0; if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -1559,7 +1559,7 @@ static void zatm_phy_put(struct atm_dev *dev,unsigned char value, struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -1571,10 +1571,10 @@ static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr) struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER) & 0xff; } From 156c7d83c90de9d8afd5a5bd8b4536c38bda7939 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Oct 2018 14:39:29 +0100 Subject: [PATCH 1008/3715] s390/perf: Return error when debug_register fails [ Upstream commit ec0c0bb489727de0d4dca6a00be6970ab8a3b30a ] Return an error when the function debug_register() fails allocating the debug handle. Also remove the registered debug handle when the initialization fails later on. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index d99155793c26..2e2fd9535f86 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1610,14 +1610,17 @@ static int __init init_cpum_sampling_pmu(void) } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); - if (!sfdbg) + if (!sfdbg) { pr_err("Registering for s390dbf failed\n"); + return -ENOMEM; + } debug_register_view(sfdbg, &debug_sprintf_view); err = register_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); if (err) { pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + debug_unregister(sfdbg); goto out; } @@ -1626,6 +1629,7 @@ static int __init init_cpum_sampling_pmu(void) pr_cpumsf_err(RS_INIT_FAILURE_PERF); unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); + debug_unregister(sfdbg); goto out; } From 9fa14d7b75f6accd4cedf360d2fb190012f02f8b Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 15 Oct 2018 12:08:28 +0530 Subject: [PATCH 1009/3715] spi: omap2-mcspi: Set FIFO DMA trigger level to word length [ Upstream commit b682cffa3ac6d9d9e16e9b413c45caee3b391fab ] McSPI has 32 byte FIFO in Transmit-Receive mode. Current code tries to configuration FIFO watermark level for DMA trigger to be GCD of transfer length and max FIFO size which would mean trigger level may be set to 32 for transmit-receive mode if length is aligned. This does not work in case of SPI slave mode where FIFO always needs to have data ready whenever master starts the clock. With DMA trigger size of 32 there will be a small window during slave TX where DMA is still putting data into FIFO but master would have started clock for next byte, resulting in shifting out of stale data. Similarly, on Slave RX side there may be RX FIFO overflow Fix this by setting FIFO watermark for DMA trigger to word length. This means DMA is triggered as soon as FIFO has space for word length bytes and DMA would make sure FIFO is almost always full therefore improving FIFO occupancy in both master and slave mode. Signed-off-by: Vignesh R Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-omap2-mcspi.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 9bf64e6eca9b..517d0ade586b 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -298,7 +298,7 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, struct omap2_mcspi_cs *cs = spi->controller_state; struct omap2_mcspi *mcspi; unsigned int wcnt; - int max_fifo_depth, fifo_depth, bytes_per_word; + int max_fifo_depth, bytes_per_word; u32 chconf, xferlevel; mcspi = spi_master_get_devdata(master); @@ -314,10 +314,6 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, else max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH; - fifo_depth = gcd(t->len, max_fifo_depth); - if (fifo_depth < 2 || fifo_depth % bytes_per_word != 0) - goto disable_fifo; - wcnt = t->len / bytes_per_word; if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT) goto disable_fifo; @@ -325,16 +321,17 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, xferlevel = wcnt << 16; if (t->rx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFER; - xferlevel |= (fifo_depth - 1) << 8; + xferlevel |= (bytes_per_word - 1) << 8; } + if (t->tx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFET; - xferlevel |= fifo_depth - 1; + xferlevel |= bytes_per_word - 1; } mcspi_write_reg(master, OMAP2_MCSPI_XFERLEVEL, xferlevel); mcspi_write_chconf0(spi, chconf); - mcspi->fifo_depth = fifo_depth; + mcspi->fifo_depth = max_fifo_depth; return; } @@ -601,7 +598,6 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer) struct dma_slave_config cfg; enum dma_slave_buswidth width; unsigned es; - u32 burst; void __iomem *chstat_reg; void __iomem *irqstat_reg; int wait_res; @@ -623,22 +619,14 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer) } count = xfer->len; - burst = 1; - - if (mcspi->fifo_depth > 0) { - if (count > mcspi->fifo_depth) - burst = mcspi->fifo_depth / es; - else - burst = count / es; - } memset(&cfg, 0, sizeof(cfg)); cfg.src_addr = cs->phys + OMAP2_MCSPI_RX0; cfg.dst_addr = cs->phys + OMAP2_MCSPI_TX0; cfg.src_addr_width = width; cfg.dst_addr_width = width; - cfg.src_maxburst = burst; - cfg.dst_maxburst = burst; + cfg.src_maxburst = es; + cfg.dst_maxburst = es; rx = xfer->rx_buf; tx = xfer->tx_buf; From d50730b4d6cefd1a5bcda0ab2ff2f7e90306d18f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Oct 2018 10:52:52 -0700 Subject: [PATCH 1010/3715] sparc: Fix parport build warnings. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46b8306480fb424abd525acc1763da1c63a27d8a ] If PARPORT_PC_FIFO is not enabled, do not provide the dma lock macros and lock definition. Otherwise: ./arch/sparc/include/asm/parport.h:24:24: warning: ‘dma_spin_lock’ defined but not used [-Wunused-variable] static DEFINE_SPINLOCK(dma_spin_lock); ^~~~~~~~~~~~~ ./include/linux/spinlock_types.h:81:39: note: in definition of macro ‘DEFINE_SPINLOCK’ #define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/include/asm/parport.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 05df5f043053..3c5a1c620f0f 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -21,6 +21,7 @@ */ #define HAS_DMA +#ifdef CONFIG_PARPORT_PC_FIFO static DEFINE_SPINLOCK(dma_spin_lock); #define claim_dma_lock() \ @@ -31,6 +32,7 @@ static DEFINE_SPINLOCK(dma_spin_lock); #define release_dma_lock(__flags) \ spin_unlock_irqrestore(&dma_spin_lock, __flags); +#endif static struct sparc_ebus_info { struct ebus_dma_info info; From 27b1ef75f5794c743b5996ebcad807e0a71e2734 Mon Sep 17 00:00:00 2001 From: Aravinda Prasad Date: Tue, 16 Oct 2018 17:20:05 +0530 Subject: [PATCH 1011/3715] powerpc/pseries: Export raw per-CPU VPA data via debugfs [ Upstream commit c6c26fb55e8e4b3fc376be5611685990a17de27a ] This patch exports the raw per-CPU VPA data via debugfs. A per-CPU file is created which exports the VPA data of that CPU to help debug some of the VPA related issues or to analyze the per-CPU VPA related statistics. v3: Removed offline CPU check. v2: Included offline CPU check and other review comments. Signed-off-by: Aravinda Prasad Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/lpar.c | 54 +++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index eb738ef57792..c0ae3847b8db 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "pseries.h" @@ -1036,3 +1037,56 @@ static int __init reserve_vrma_context_id(void) return 0; } machine_device_initcall(pseries, reserve_vrma_context_id); + +#ifdef CONFIG_DEBUG_FS +/* debugfs file interface for vpa data */ +static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + int cpu = (long)filp->private_data; + struct lppaca *lppaca = &lppaca_of(cpu); + + return simple_read_from_buffer(buf, len, pos, lppaca, + sizeof(struct lppaca)); +} + +static const struct file_operations vpa_fops = { + .open = simple_open, + .read = vpa_file_read, + .llseek = default_llseek, +}; + +static int __init vpa_debugfs_init(void) +{ + char name[16]; + long i; + static struct dentry *vpa_dir; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); + if (!vpa_dir) { + pr_warn("%s: can't create vpa root dir\n", __func__); + return -ENOMEM; + } + + /* set up the per-cpu vpa file*/ + for_each_possible_cpu(i) { + struct dentry *d; + + sprintf(name, "cpu-%ld", i); + + d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, + &vpa_fops); + if (!d) { + pr_warn("%s: can't create per-cpu vpa file\n", + __func__); + return -ENOMEM; + } + } + + return 0; +} +machine_arch_initcall(pseries, vpa_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ From 1a62bbf3edf08ccded88761431e8614fe7494df0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 28 Sep 2018 09:10:29 +0800 Subject: [PATCH 1012/3715] ceph: fix dentry leak in ceph_readdir_prepopulate [ Upstream commit c58f450bd61511d897efc2ea472c69630635b557 ] Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3818027c12f5..5999d806de78 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1631,7 +1631,6 @@ retry_lookup: if (IS_ERR(realdn)) { err = PTR_ERR(realdn); d_drop(dn); - dn = NULL; goto next_item; } dn = realdn; From 3a17e015ec5bcba313a76f73a20f375329233765 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 19 Oct 2018 13:43:45 -0700 Subject: [PATCH 1013/3715] rtc: s35390a: Change buf's type to u8 in s35390a_init [ Upstream commit ef0f02fd69a02b50e468a4ddbe33e3d81671e248 ] Clang warns: drivers/rtc/rtc-s35390a.c:124:27: warning: implicit conversion from 'int' to 'char' changes value from 192 to -64 [-Wconstant-conversion] buf = S35390A_FLAG_RESET | S35390A_FLAG_24H; ~ ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~ 1 warning generated. Update buf to be an unsigned 8-bit integer, which matches the buf member in struct i2c_msg. https://github.com/ClangBuiltLinux/linux/issues/145 Signed-off-by: Nathan Chancellor Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-s35390a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 7067bca5c20d..6bfff0a6d655 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -108,7 +108,7 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) static int s35390a_init(struct s35390a *s35390a) { - char buf; + u8 buf; int ret; unsigned initcount = 0; From 18acb442527d0558dd8e6d6f2452f5d794dec1dd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:16 +0800 Subject: [PATCH 1014/3715] f2fs: fix to spread clear_cold_data() [ Upstream commit 2baf07818549c8bb8d7b3437e889b86eab56d38e ] We need to drop PG_checked flag on page as well when we clear PG_uptodate flag, in order to avoid treating the page as GCing one later. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 8 +++++++- fs/f2fs/dir.c | 1 + fs/f2fs/segment.c | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cc5729445194..ac3fa4bbed2d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1445,6 +1445,7 @@ int do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: @@ -1597,8 +1598,10 @@ done: out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); @@ -2158,6 +2161,8 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, } } + clear_cold_data(page); + /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return drop_inmem_page(inode, page); @@ -2176,6 +2181,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; + clear_cold_data(page); set_page_private(page, 0); ClearPagePrivate(page); return 1; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c0c933ad43c8..4abefd841b6c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -745,6 +745,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); remove_dirty_inode(dir); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e5fca35e47d..2cd0d126ef8f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -251,8 +251,10 @@ retry: } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); + clear_cold_data(page); + } set_page_private(page, 0); ClearPagePrivate(page); f2fs_put_page(page, 1); From a8ab2f7969b034a810d295e2514626c504ee9d89 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 19 Oct 2018 11:00:30 -0700 Subject: [PATCH 1015/3715] mISDN: Fix type of switch control variable in ctrl_teimanager [ Upstream commit aeb5e02aca91522733eb1db595ac607d30c87767 ] Clang warns (trimmed for brevity): drivers/isdn/mISDN/tei.c:1193:7: warning: overflow converting case value to switch condition type (2147764552 to 18446744071562348872) [-Wswitch] case IMHOLD_L1: ^ drivers/isdn/mISDN/tei.c:1187:7: warning: overflow converting case value to switch condition type (2147764550 to 18446744071562348870) [-Wswitch] case IMCLEAR_L2: ^ 2 warnings generated. The root cause is that the _IOC macro can generate really large numbers, which don't find into type int. My research into how GCC and Clang are handling this at a low level didn't prove fruitful and surveying the kernel tree shows that aside from here and a few places in the scsi subsystem, everything that uses _IOC is at least of type 'unsigned int'. Make that change here because as nothing in this function cares about the signedness of the variable and it removes ambiguity, which is never good when dealing with compilers. While we're here, remove the unnecessary local variable ret (just return -EINVAL and 0 directly). Link: https://github.com/ClangBuiltLinux/linux/issues/67 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/tei.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 12d9e5f4beb1..58635b5f296f 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -1180,8 +1180,7 @@ static int ctrl_teimanager(struct manager *mgr, void *arg) { /* currently we only have one option */ - int *val = (int *)arg; - int ret = 0; + unsigned int *val = (unsigned int *)arg; switch (val[0]) { case IMCLEAR_L2: @@ -1197,9 +1196,9 @@ ctrl_teimanager(struct manager *mgr, void *arg) test_and_clear_bit(OPTION_L1_HOLD, &mgr->options); break; default: - ret = -EINVAL; + return -EINVAL; } - return ret; + return 0; } /* This function does create a L2 for fixed TEI in NT Mode */ From dd99b3b9d60e79c068e0f37c8c3e2fa00a66c123 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 Oct 2018 23:11:11 +0300 Subject: [PATCH 1016/3715] qlcnic: fix a return in qlcnic_dcb_get_capability() [ Upstream commit c94f026fb742b2d3199422751dbc4f6fc0e753d8 ] These functions are supposed to return one on failure and zero on success. Returning a zero here could cause uninitialized variable bugs in several of the callers. For example: drivers/scsi/cxgbi/cxgb4i/cxgb4i.c:1660 get_iscsi_dcb_priority() error: uninitialized symbol 'caps'. Fixes: 48365e485275 ("qlcnic: dcb: Add support for CEE Netlink interface.") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c index 4b76c69fe86d..834208e55f7b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c @@ -883,7 +883,7 @@ static u8 qlcnic_dcb_get_capability(struct net_device *netdev, int capid, struct qlcnic_adapter *adapter = netdev_priv(netdev); if (!test_bit(QLCNIC_DCB_STATE, &adapter->dcb->state)) - return 0; + return 1; switch (capid) { case DCB_CAP_ATTR_PG: From 5de459c874400367ce17da43e7f0b11e5c7f24cd Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 22 Oct 2018 21:51:36 +0300 Subject: [PATCH 1017/3715] net: ethernet: ti: cpsw: unsync mcast entries while switch promisc mode [ Upstream commit 9737cc99dd14b5b8b9d267618a6061feade8ea68 ] After flushing all mcast entries from the table, the ones contained in mc list of ndev are not restored when promisc mode is toggled off, because they are considered as synched with ALE, thus, in order to restore them after promisc mode - reset syncing info. This fix touches only switch mode devices, including single port boards like Beagle Bone. Fixes: commit 5da1948969bc ("net: ethernet: ti: cpsw: fix lost of mcast packets while rx_mode update") Signed-off-by: Ivan Khoronzhuk Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 8cb44eabc283..a44838aac97d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -601,6 +601,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS, -1); + __dev_mc_unsync(ndev, NULL); /* Flood All Unicast Packets to Host port */ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); From b583f37009be4e2686b4f5a46f4389ac96b65a83 Mon Sep 17 00:00:00 2001 From: Sapthagiri Baratam Date: Tue, 21 Aug 2018 19:52:44 +0530 Subject: [PATCH 1018/3715] mfd: arizona: Correct calling of runtime_put_sync [ Upstream commit 6b269a41a4520f7eb639e61a45ebbb9c9267d5e0 ] Don't call runtime_put_sync when clk32k_ref is ARIZONA_32KZ_MCLK2 as there is no corresponding runtime_get_sync call. MCLK1 is not in the AoD power domain so if it is used as 32kHz clock source we need to hold a runtime PM reference to keep the device from going into low power mode. Fixes: cdd8da8cc66b ("mfd: arizona: Add gating of external MCLKn clocks") Signed-off-by: Sapthagiri Baratam Acked-by: Charles Keepax Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/arizona-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index d8e3184bd27c..ad8a5296c50b 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -52,8 +52,10 @@ int arizona_clk32k_enable(struct arizona *arizona) if (ret != 0) goto err_ref; ret = clk_prepare_enable(arizona->mclk[ARIZONA_MCLK1]); - if (ret != 0) - goto err_pm; + if (ret != 0) { + pm_runtime_put_sync(arizona->dev); + goto err_ref; + } break; case ARIZONA_32KZ_MCLK2: ret = clk_prepare_enable(arizona->mclk[ARIZONA_MCLK2]); @@ -67,8 +69,6 @@ int arizona_clk32k_enable(struct arizona *arizona) ARIZONA_CLK_32K_ENA); } -err_pm: - pm_runtime_put_sync(arizona->dev); err_ref: if (ret != 0) arizona->clk32k_ref--; From ff00b5656fb0de41a23c8c5ecb8d848b5fd88614 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 28 Aug 2018 17:02:40 -0300 Subject: [PATCH 1019/3715] mfd: mc13xxx-core: Fix PMIC shutdown when reading ADC values [ Upstream commit 55143439b7b501882bea9d95a54adfe00ffc79a3 ] When trying to read any MC13892 ADC channel on a imx51-babbage board: The MC13892 PMIC shutdowns completely. After debugging this issue and comparing the MC13892 and MC13783 initializations done in the vendor kernel, it was noticed that the CHRGRAWDIV bit of the ADC0 register was not being set. This bit is set by default after power on, but the driver was clearing it. After setting this bit it is possible to read the ADC values correctly. Signed-off-by: Fabio Estevam Tested-by: Chris Healy Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/mc13xxx-core.c | 3 ++- include/linux/mfd/mc13xxx.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 6c16f170529f..75d52034f89d 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -278,7 +278,8 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, if (ret) goto out; - adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2; + adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2 | + MC13XXX_ADC0_CHRGRAWDIV; adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC; if (channel > 7) diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index 638222e43e48..93011c61aafd 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -247,6 +247,7 @@ struct mc13xxx_platform_data { #define MC13XXX_ADC0_TSMOD0 (1 << 12) #define MC13XXX_ADC0_TSMOD1 (1 << 13) #define MC13XXX_ADC0_TSMOD2 (1 << 14) +#define MC13XXX_ADC0_CHRGRAWDIV (1 << 15) #define MC13XXX_ADC0_ADINC1 (1 << 16) #define MC13XXX_ADC0_ADINC2 (1 << 17) From 03336f71d1cbebb15add363d13060863d8df9762 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Aug 2018 19:52:52 +0300 Subject: [PATCH 1020/3715] mfd: intel_soc_pmic_bxtwc: Chain power button IRQs as well [ Upstream commit 9f8ddee1dab836ca758ca8fc555ab5a3aaa5d3fd ] Power button IRQ actually has a second level of interrupts to distinguish between UI and POWER buttons. Moreover, current implementation looks awkward in approach to handle second level IRQs by first level related IRQ chip. To address above issues, split power button IRQ to be chained as well. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/intel_soc_pmic_bxtwc.c | 41 ++++++++++++++++++++++-------- include/linux/mfd/intel_soc_pmic.h | 1 + 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c index 15bc052704a6..9ca1f8c015de 100644 --- a/drivers/mfd/intel_soc_pmic_bxtwc.c +++ b/drivers/mfd/intel_soc_pmic_bxtwc.c @@ -31,8 +31,8 @@ /* Interrupt Status Registers */ #define BXTWC_IRQLVL1 0x4E02 -#define BXTWC_PWRBTNIRQ 0x4E03 +#define BXTWC_PWRBTNIRQ 0x4E03 #define BXTWC_THRM0IRQ 0x4E04 #define BXTWC_THRM1IRQ 0x4E05 #define BXTWC_THRM2IRQ 0x4E06 @@ -47,10 +47,9 @@ /* Interrupt MASK Registers */ #define BXTWC_MIRQLVL1 0x4E0E -#define BXTWC_MPWRTNIRQ 0x4E0F - #define BXTWC_MIRQLVL1_MCHGR BIT(5) +#define BXTWC_MPWRBTNIRQ 0x4E0F #define BXTWC_MTHRM0IRQ 0x4E12 #define BXTWC_MTHRM1IRQ 0x4E13 #define BXTWC_MTHRM2IRQ 0x4E14 @@ -66,9 +65,7 @@ /* Whiskey Cove PMIC share same ACPI ID between different platforms */ #define BROXTON_PMIC_WC_HRV 4 -/* Manage in two IRQ chips since mask registers are not consecutive */ enum bxtwc_irqs { - /* Level 1 */ BXTWC_PWRBTN_LVL1_IRQ = 0, BXTWC_TMU_LVL1_IRQ, BXTWC_THRM_LVL1_IRQ, @@ -77,9 +74,11 @@ enum bxtwc_irqs { BXTWC_CHGR_LVL1_IRQ, BXTWC_GPIO_LVL1_IRQ, BXTWC_CRIT_LVL1_IRQ, +}; - /* Level 2 */ - BXTWC_PWRBTN_IRQ, +enum bxtwc_irqs_pwrbtn { + BXTWC_PWRBTN_IRQ = 0, + BXTWC_UIBTN_IRQ, }; enum bxtwc_irqs_bcu { @@ -113,7 +112,10 @@ static const struct regmap_irq bxtwc_regmap_irqs[] = { REGMAP_IRQ_REG(BXTWC_CHGR_LVL1_IRQ, 0, BIT(5)), REGMAP_IRQ_REG(BXTWC_GPIO_LVL1_IRQ, 0, BIT(6)), REGMAP_IRQ_REG(BXTWC_CRIT_LVL1_IRQ, 0, BIT(7)), - REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 1, 0x03), +}; + +static const struct regmap_irq bxtwc_regmap_irqs_pwrbtn[] = { + REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 0, 0x01), }; static const struct regmap_irq bxtwc_regmap_irqs_bcu[] = { @@ -125,7 +127,7 @@ static const struct regmap_irq bxtwc_regmap_irqs_adc[] = { }; static const struct regmap_irq bxtwc_regmap_irqs_chgr[] = { - REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, BIT(5)), + REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, 0x20), REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 0, 0x1f), REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 1, 0x1f), }; @@ -144,7 +146,16 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip = { .mask_base = BXTWC_MIRQLVL1, .irqs = bxtwc_regmap_irqs, .num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs), - .num_regs = 2, + .num_regs = 1, +}; + +static struct regmap_irq_chip bxtwc_regmap_irq_chip_pwrbtn = { + .name = "bxtwc_irq_chip_pwrbtn", + .status_base = BXTWC_PWRBTNIRQ, + .mask_base = BXTWC_MPWRBTNIRQ, + .irqs = bxtwc_regmap_irqs_pwrbtn, + .num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_pwrbtn), + .num_regs = 1, }; static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = { @@ -472,6 +483,16 @@ static int bxtwc_probe(struct platform_device *pdev) return ret; } + ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data, + BXTWC_PWRBTN_LVL1_IRQ, + IRQF_ONESHOT, + &bxtwc_regmap_irq_chip_pwrbtn, + &pmic->irq_chip_data_pwrbtn); + if (ret) { + dev_err(&pdev->dev, "Failed to add PWRBTN IRQ chip\n"); + return ret; + } + ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data, BXTWC_TMU_LVL1_IRQ, IRQF_ONESHOT, diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 5aacdb017a9f..806a4f095312 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -25,6 +25,7 @@ struct intel_soc_pmic { int irq; struct regmap *regmap; struct regmap_irq_chip_data *irq_chip_data; + struct regmap_irq_chip_data *irq_chip_data_pwrbtn; struct regmap_irq_chip_data *irq_chip_data_tmu; struct regmap_irq_chip_data *irq_chip_data_bcu; struct regmap_irq_chip_data *irq_chip_data_adc; From 3fd636f722adde51bb97119e1c786984a9c54bce Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 5 Sep 2018 13:54:07 +0200 Subject: [PATCH 1021/3715] mfd: max8997: Enale irq-wakeup unconditionally [ Upstream commit efddff27c886e729a7f84a7205bd84d7d4af7336 ] IRQ wake up support for MAX8997 driver was initially configured by respective property in pdata. However, after the driver conversion to device-tree, setting it was left as 'todo'. Nowadays most of other PMIC MFD drivers initialized from device-tree assume that they can be an irq wakeup source, so enable it also for MAX8997. This fixes support for wakeup from MAX8997 RTC alarm. Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/max8997.c | 8 +------- include/linux/mfd/max8997.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c index 2d6e2c392786..4a2fc59d5901 100644 --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@ -155,12 +155,6 @@ static struct max8997_platform_data *max8997_i2c_parse_dt_pdata( pd->ono = irq_of_parse_and_map(dev->of_node, 1); - /* - * ToDo: the 'wakeup' member in the platform data is more of a linux - * specfic information. Hence, there is no binding for that yet and - * not parsed here. - */ - return pd; } @@ -248,7 +242,7 @@ static int max8997_i2c_probe(struct i2c_client *i2c, */ /* MAX8997 has a power button input. */ - device_init_wakeup(max8997->dev, pdata->wakeup); + device_init_wakeup(max8997->dev, true); return ret; diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index cf815577bd68..3ae1fe743bc3 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -178,7 +178,6 @@ struct max8997_led_platform_data { struct max8997_platform_data { /* IRQ */ int ono; - int wakeup; /* ---- PMIC ---- */ struct max8997_regulator_data *regulators; From c033f4b76b6168367b83c7a44ff48ac4a31fa14b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 30 Aug 2018 23:16:13 +0900 Subject: [PATCH 1022/3715] selftests/ftrace: Fix to test kprobe $comm arg only if available [ Upstream commit 2452c96e617a0ff6fb2692e55217a3fa57a7322c ] Test $comm in kprobe-event argument syntax testcase only if it is supported on the kernel because $comm has been introduced 4.8 kernel. So on older stable kernel, it should be skipped. Signed-off-by: Masami Hiramatsu Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin --- .../selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 231bcd2c4eb5..1e7ac6f3362f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -71,8 +71,11 @@ test_badarg "\$stackp" "\$stack0+10" "\$stack1-10" echo "r ${PROBEFUNC} \$retval" > kprobe_events ! echo "p ${PROBEFUNC} \$retval" > kprobe_events +# $comm was introduced in 4.8, older kernels reject it. +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then : "Comm access" test_goodarg "\$comm" +fi : "Indirect memory access" test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \ From b8676497ea4474bd3bd71a9efbe3a65e9fa761ad Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 26 Sep 2018 13:07:11 -0600 Subject: [PATCH 1023/3715] selftests: watchdog: fix message when /dev/watchdog open fails [ Upstream commit 9a244229a4b850b11952a0df79607c69b18fd8df ] When /dev/watchdog open fails, watchdog exits with "watchdog not enabled" message. This is incorrect when open fails due to insufficient privilege. Fix message to clearly state the reason when open fails with EACCESS when a non-root user runs it. Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin --- tools/testing/selftests/watchdog/watchdog-test.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index 6e290874b70e..e029e2017280 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -89,7 +89,13 @@ int main(int argc, char *argv[]) fd = open("/dev/watchdog", O_WRONLY); if (fd == -1) { - printf("Watchdog device not enabled.\n"); + if (errno == ENOENT) + printf("Watchdog device not enabled.\n"); + else if (errno == EACCES) + printf("Run watchdog as root.\n"); + else + printf("Watchdog device open failed %s\n", + strerror(errno)); exit(-1); } From debe34e0354f38e4eb281bf4aa7542b9811dd430 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Wed, 26 Sep 2018 15:23:08 -0600 Subject: [PATCH 1024/3715] selftests: watchdog: Fix error message. [ Upstream commit 04d5e4bd37516ad60854eb74592c7dbddd75d277 ] Printf's say errno but print the string version of error. Make consistent. Signed-off-by: Jerry Hoemann Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin --- tools/testing/selftests/watchdog/watchdog-test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index e029e2017280..f1c6e025cbe5 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -109,7 +109,7 @@ int main(int argc, char *argv[]) printf("Last boot is caused by: %s.\n", (flags != 0) ? "Watchdog" : "Power-On-Reset"); else - printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno)); + printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno)); break; case 'd': flags = WDIOS_DISABLECARD; @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog card disabled.\n"); else - printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno)); + printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno)); break; case 'e': flags = WDIOS_ENABLECARD; @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog card enabled.\n"); else - printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno)); + printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno)); break; case 'p': ping_rate = strtoul(optarg, NULL, 0); @@ -139,7 +139,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog timeout set to %u seconds.\n", flags); else - printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno)); + printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno)); break; default: usage(argv[0]); From 986a4216501036e6855e8170c4d12971565624c0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Oct 2018 09:20:15 +0200 Subject: [PATCH 1025/3715] thermal: rcar_thermal: Prevent hardware access during system suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3a31386217628ffe2491695be2db933c25dde785 ] On r8a7791/koelsch, sometimes the following message is printed during system suspend: rcar_thermal e61f0000.thermal: thermal sensor was broken This happens if the workqueue runs while the device is already suspended. Fix this by using the freezable system workqueue instead, cfr. commit 51e20d0e3a60cf46 ("thermal: Prevent polling from happening during system suspend"). Fixes: e0a5172e9eec7f0d ("thermal: rcar: add interrupt support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/rcar_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 73e5fee6cf1d..83126e2dce36 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -401,8 +401,8 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data) rcar_thermal_for_each_priv(priv, common) { if (rcar_thermal_had_changed(priv, status)) { rcar_thermal_irq_disable(priv); - schedule_delayed_work(&priv->work, - msecs_to_jiffies(300)); + queue_delayed_work(system_freezable_wq, &priv->work, + msecs_to_jiffies(300)); } } From 8ba716e81648b2a0ffe016d2cad74d795ba399cd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 24 Oct 2018 20:15:17 +0900 Subject: [PATCH 1026/3715] bpf: devmap: fix wrong interface selection in notifier_call [ Upstream commit f592f804831f1cf9d1f9966f58c80f150e6829b5 ] The dev_map_notification() removes interface in devmap if unregistering interface's ifindex is same. But only checking ifindex is not enough because other netns can have same ifindex. so that wrong interface selection could occurred. Hence netdev pointer comparison code is added. v2: compare netdev pointer instead of using net_eq() (Daniel Borkmann) v1: Initial patch Fixes: 2ddf71e23cc2 ("net: add notifier hooks for devmap bpf map") Signed-off-by: Taehee Yoo Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/devmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 482bf42e21a4..1060eee6c8d5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -388,8 +388,7 @@ static int dev_map_notification(struct notifier_block *notifier, struct bpf_dtab_netdev *dev, *odev; dev = READ_ONCE(dtab->netdev_map[i]); - if (!dev || - dev->dev->ifindex != netdev->ifindex) + if (!dev || netdev != dev->dev) continue; odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); if (dev == odev) From 88b0950ca323a7b18b6337beb0a028b7458c06f9 Mon Sep 17 00:00:00 2001 From: Felipe Rechia Date: Wed, 24 Oct 2018 10:57:22 -0300 Subject: [PATCH 1027/3715] powerpc/process: Fix flush_all_to_thread for SPE [ Upstream commit e901378578c62202594cba0f6c076f3df365ec91 ] Fix a bug introduced by the creation of flush_all_to_thread() for processors that have SPE (Signal Processing Engine) and use it to compute floating-point operations. >From userspace perspective, the problem was seen in attempts of computing floating-point operations which should generate exceptions. For example: fork(); float x = 0.0 / 0.0; isnan(x); // forked process returns False (should be True) The operation above also should always cause the SPEFSCR FINV bit to be set. However, the SPE floating-point exceptions were turned off after a fork(). Kernel versions prior to the bug used flush_spe_to_thread(), which first saves SPEFSCR register values in tsk->thread and then calls giveup_spe(tsk). After commit 579e633e764e, the save_all() function was called first to giveup_spe(), and then the SPEFSCR register values were saved in tsk->thread. This would save the SPEFSCR register values after disabling SPE for that thread, causing the bug described above. Fixes 579e633e764e ("powerpc: create flush_all_to_thread()") Signed-off-by: Felipe Rechia Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5e5da2073fdf..ba0d4f9a99ba 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -567,12 +567,11 @@ void flush_all_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - save_all(tsk); - #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); #endif + save_all(tsk); preempt_enable(); } From 34589bee6edd2381db775aaf0af87380a533c026 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 26 Oct 2018 15:39:49 -0700 Subject: [PATCH 1028/3715] sparc64: Rework xchg() definition to avoid warnings. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c2fc9cddc1ffdef8ada1dc8404e5affae849953 ] Such as: fs/ocfs2/file.c: In function ‘ocfs2_file_write_iter’: ./arch/sparc/include/asm/cmpxchg_64.h:55:22: warning: value computed is not used [-Wunused-value] #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) and drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c: In function ‘ixgbevf_xdp_setup’: ./arch/sparc/include/asm/cmpxchg_64.h:55:22: warning: value computed is not used [-Wunused-value] #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/include/asm/cmpxchg_64.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index f71ef3729888..316faa0130ba 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -52,7 +52,12 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long return val; } -#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr,x) \ +({ __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) void __xchg_called_with_bad_pointer(void); From f9fcda363d6ddf2e69c9c3e9042a453540a93fff Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 26 Oct 2018 15:02:30 -0700 Subject: [PATCH 1029/3715] arm64: lib: use C string functions with KASAN enabled [ Upstream commit 19a2ca0fb560fd7be7b5293c6b652c6d6078dcde ] ARM64 has asm implementation of memchr(), memcmp(), str[r]chr(), str[n]cmp(), str[n]len(). KASAN don't see memory accesses in asm code, thus it can potentially miss many bugs. Ifdef out __HAVE_ARCH_* defines of these functions when KASAN is enabled, so the generic implementations from lib/string.c will be used. We can't just remove the asm functions because efistub uses them. And we can't have two non-weak functions either, so declare the asm functions as weak. Link: http://lkml.kernel.org/r/20180920135631.23833-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Kyeongdon Kim Cc: Alexander Potapenko Cc: Ard Biesheuvel Cc: Dmitry Vyukov Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/arm64/include/asm/string.h | 14 ++++++++------ arch/arm64/kernel/arm64ksyms.c | 7 +++++-- arch/arm64/lib/memchr.S | 2 +- arch/arm64/lib/memcmp.S | 2 +- arch/arm64/lib/strchr.S | 2 +- arch/arm64/lib/strcmp.S | 2 +- arch/arm64/lib/strlen.S | 2 +- arch/arm64/lib/strncmp.S | 2 +- arch/arm64/lib/strnlen.S | 2 +- arch/arm64/lib/strrchr.S | 2 +- 10 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index dd95d33a5bd5..03a6c256b7ec 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -16,6 +16,7 @@ #ifndef __ASM_STRING_H #define __ASM_STRING_H +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRRCHR extern char *strrchr(const char *, int c); @@ -34,6 +35,13 @@ extern __kernel_size_t strlen(const char *); #define __HAVE_ARCH_STRNLEN extern __kernel_size_t strnlen(const char *, __kernel_size_t); +#define __HAVE_ARCH_MEMCMP +extern int memcmp(const void *, const void *, size_t); + +#define __HAVE_ARCH_MEMCHR +extern void *memchr(const void *, int, __kernel_size_t); +#endif + #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); extern void *__memcpy(void *, const void *, __kernel_size_t); @@ -42,16 +50,10 @@ extern void *__memcpy(void *, const void *, __kernel_size_t); extern void *memmove(void *, const void *, __kernel_size_t); extern void *__memmove(void *, const void *, __kernel_size_t); -#define __HAVE_ARCH_MEMCHR -extern void *memchr(const void *, int, __kernel_size_t); - #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); extern void *__memset(void *, int, __kernel_size_t); -#define __HAVE_ARCH_MEMCMP -extern int memcmp(const void *, const void *, size_t); - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #define __HAVE_ARCH_MEMCPY_FLUSHCACHE void memcpy_flushcache(void *dst, const void *src, size_t cnt); diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..9eedf839e739 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -44,20 +44,23 @@ EXPORT_SYMBOL(__arch_copy_in_user); EXPORT_SYMBOL(memstart_addr); /* string / mem functions */ +#ifndef CONFIG_KASAN EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#endif + EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); -EXPORT_SYMBOL(memchr); -EXPORT_SYMBOL(memcmp); /* atomic bitops */ EXPORT_SYMBOL(set_bit); diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 4444c1d25f4b..0f164a4baf52 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -30,7 +30,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -ENTRY(memchr) +WEAK(memchr) and w1, w1, #0xff 1: subs x2, x2, #1 b.mi 2f diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 2a4e239bd17a..fb295f52e9f8 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -58,7 +58,7 @@ pos .req x11 limit_wd .req x12 mask .req x13 -ENTRY(memcmp) +WEAK(memcmp) cbz limit, .Lret0 eor tmp1, src1, src2 tst tmp1, #7 diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index dae0cf5591f9..7c83091d1bcd 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -29,7 +29,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -ENTRY(strchr) +WEAK(strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 471fe61760ef..7d5d15398bfb 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -60,7 +60,7 @@ tmp3 .req x9 zeroones .req x10 pos .req x11 -ENTRY(strcmp) +WEAK(strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 55ccc8e24c08..8e0b14205dcb 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -56,7 +56,7 @@ pos .req x12 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -ENTRY(strlen) +WEAK(strlen) mov zeroones, #REP8_01 bic src, srcin, #15 ands tmp1, srcin, #15 diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e267044761c6..66bd145935d9 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -64,7 +64,7 @@ limit_wd .req x13 mask .req x14 endloop .req x15 -ENTRY(strncmp) +WEAK(strncmp) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index eae38da6e0bb..355be04441fe 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -59,7 +59,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -ENTRY(strnlen) +WEAK(strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 61eabd9a289a..f3b9f8e2917c 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -29,7 +29,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -ENTRY(strrchr) +WEAK(strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 From 4308d2f4cffefc8f57a8e866bc7577f6bfeea59c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 26 Oct 2018 15:02:52 -0700 Subject: [PATCH 1030/3715] fs/ocfs2/dlm/dlmdebug.c: fix a sleep-in-atomic-context bug in dlm_print_one_mle() [ Upstream commit 999865764f5f128896402572b439269acb471022 ] The kernel module may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] get_zeroed_page(GFP_NOFS) fs/ocfs2/dlm/dlmdebug.c, 332: get_zeroed_page in dlm_print_one_mle fs/ocfs2/dlm/dlmmaster.c, 240: dlm_print_one_mle in __dlm_put_mle fs/ocfs2/dlm/dlmmaster.c, 255: __dlm_put_mle in dlm_put_mle fs/ocfs2/dlm/dlmmaster.c, 254: spin_lock in dlm_put_ml [FUNC] get_zeroed_page(GFP_NOFS) fs/ocfs2/dlm/dlmdebug.c, 332: get_zeroed_page in dlm_print_one_mle fs/ocfs2/dlm/dlmmaster.c, 240: dlm_print_one_mle in __dlm_put_mle fs/ocfs2/dlm/dlmmaster.c, 222: __dlm_put_mle in dlm_put_mle_inuse fs/ocfs2/dlm/dlmmaster.c, 219: spin_lock in dlm_put_mle_inuse To fix this bug, GFP_NOFS is replaced with GFP_ATOMIC. This bug is found by my static analysis tool DSAC. Link: http://lkml.kernel.org/r/20180901112528.27025-1-baijiaju1990@gmail.com Signed-off-by: Jia-Ju Bai Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/dlm/dlmdebug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 9b984cae4c4e..1d6dc8422899 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -329,7 +329,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle) { char *buf; - buf = (char *) get_zeroed_page(GFP_NOFS); + buf = (char *) get_zeroed_page(GFP_ATOMIC); if (buf) { dump_mle(mle, buf, PAGE_SIZE - 1); free_page((unsigned long)buf); From 76cacae1ed773b8b0a939c0e4e150c7c05d745ec Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 26 Oct 2018 15:09:45 -0700 Subject: [PATCH 1031/3715] mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock [ Upstream commit 64081362e8ff4587b4554087f3cfc73d3e0a4cd7 ] We've recently seen a workload on XFS filesystems with a repeatable deadlock between background writeback and a multi-process application doing concurrent writes and fsyncs to a small range of a file. range_cyclic writeback Process 1 Process 2 xfs_vm_writepages write_cache_pages writeback_index = 2 cycled = 0 .... find page 2 dirty lock Page 2 ->writepage page 2 writeback page 2 clean page 2 added to bio no more pages write() locks page 1 dirties page 1 locks page 2 dirties page 1 fsync() .... xfs_vm_writepages write_cache_pages start index 0 find page 1 towrite lock Page 1 ->writepage page 1 writeback page 1 clean page 1 added to bio find page 2 towrite lock Page 2 page 2 is writeback write() locks page 1 dirties page 1 fsync() .... xfs_vm_writepages write_cache_pages start index 0 !done && !cycled sets index to 0, restarts lookup find page 1 dirty find page 1 towrite lock Page 1 page 1 is writeback lock Page 1 DEADLOCK because: - process 1 needs page 2 writeback to complete to make enough progress to issue IO pending for page 1 - writeback needs page 1 writeback to complete so process 2 can progress and unlock the page it is blocked on, then it can issue the IO pending for page 2 - process 2 can't make progress until process 1 issues IO for page 1 The underlying cause of the problem here is that range_cyclic writeback is processing pages in descending index order as we hold higher index pages in a structure controlled from above write_cache_pages(). The write_cache_pages() caller needs to be able to submit these pages for IO before write_cache_pages restarts writeback at mapping index 0 to avoid wcp inverting the page lock/writeback wait order. generic_writepages() is not susceptible to this bug as it has no private context held across write_cache_pages() - filesystems using this infrastructure always submit pages in ->writepage immediately and so there is no problem with range_cyclic going back to mapping index 0. However: mpage_writepages() has a private bio context, exofs_writepages() has page_collect fuse_writepages() has fuse_fill_wb_data nfs_writepages() has nfs_pageio_descriptor xfs_vm_writepages() has xfs_writepage_ctx All of these ->writepages implementations can hold pages under writeback in their private structures until write_cache_pages() returns, and hence they are all susceptible to this deadlock. Also worth noting is that ext4 has it's own bastardised version of write_cache_pages() and so it /may/ have an equivalent deadlock. I looked at the code long enough to understand that it has a similar retry loop for range_cyclic writeback reaching the end of the file and then promptly ran away before my eyes bled too much. I'll leave it for the ext4 developers to determine if their code is actually has this deadlock and how to fix it if it has. There's a few ways I can see avoid this deadlock. There's probably more, but these are the first I've though of: 1. get rid of range_cyclic altogether 2. range_cyclic always stops at EOF, and we start again from writeback index 0 on the next call into write_cache_pages() 2a. wcp also returns EAGAIN to ->writepages implementations to indicate range cyclic has hit EOF. writepages implementations can then flush the current context and call wpc again to continue. i.e. lift the retry into the ->writepages implementation 3. range_cyclic uses trylock_page() rather than lock_page(), and it skips pages it can't lock without blocking. It will already do this for pages under writeback, so this seems like a no-brainer 3a. all non-WB_SYNC_ALL writeback uses trylock_page() to avoid blocking as per pages under writeback. I don't think #1 is an option - range_cyclic prevents frequently dirtied lower file offset from starving background writeback of rarely touched higher file offsets. #2 is simple, and I don't think it will have any impact on performance as going back to the start of the file implies an immediate seek. We'll have exactly the same number of seeks if we switch writeback to another inode, and then come back to this one later and restart from index 0. #2a is pretty much "status quo without the deadlock". Moving the retry loop up into the wcp caller means we can issue IO on the pending pages before calling wcp again, and so avoid locking or waiting on pages in the wrong order. I'm not convinced we need to do this given that we get the same thing from #2 on the next writeback call from the writeback infrastructure. #3 is really just a band-aid - it doesn't fix the access/wait inversion problem, just prevents it from becoming a deadlock situation. I'd prefer we fix the inversion, not sweep it under the carpet like this. #3a is really an optimisation that just so happens to include the band-aid fix of #3. So it seems that the simplest way to fix this issue is to implement solution #2 Link: http://lkml.kernel.org/r/20181005054526.21507-1-david@fromorbit.com Signed-off-by: Dave Chinner Reviewed-by: Jan Kara Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page-writeback.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e001de5ac50c..a40c075fd8f1 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2150,6 +2150,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback); * not miss some pages (e.g., because some other process has cleared TOWRITE * tag we set). The rule we follow is that TOWRITE tag can be cleared only * by the process clearing the DIRTY tag (and submitting the page for IO). + * + * To avoid deadlocks between range_cyclic writeback and callers that hold + * pages in PageWriteback to aggregate IO until write_cache_pages() returns, + * we do not loop back to the start of the file. Doing so causes a page + * lock/page writeback access order inversion - we should only ever lock + * multiple pages in ascending page->index order, and looping back to the start + * of the file violates that rule and causes deadlocks. */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, @@ -2164,7 +2171,6 @@ int write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - int cycled; int range_whole = 0; int tag; @@ -2172,23 +2178,17 @@ int write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; -retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; @@ -2296,17 +2296,14 @@ continue_unlock: pagevec_release(&pvec); cond_resched(); } - if (!cycled && !done) { - /* - * range_cyclic: - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - cycled = 1; - index = 0; - end = writeback_index - 1; - goto retry; - } + + /* + * If we hit the last page and there is more work to be done: wrap + * back the index back to the start of the file for the next + * time we are called. + */ + if (wbc->range_cyclic && !done) + done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; From 57487d5ee23fc0c76fdd494a75a619a06b4436d9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sun, 28 Oct 2018 09:33:09 +0100 Subject: [PATCH 1032/3715] macsec: update operstate when lower device changes [ Upstream commit e6ac075882b2afcdf2d5ab328ce4ab42a1eb9593 ] Like all other virtual devices (macvlan, vlan), the operstate of a macsec device should match the state of its lower device. This is done by calling netif_stacked_transfer_operstate from its netdevice notifier. We also need to call netif_stacked_transfer_operstate when a new macsec device is created, so that its operstate is set properly. This is only relevant when we try to bring the device up directly when we create it. Radu Rendec proposed a similar patch, inspired from the 802.1q driver, that included changing the administrative state of the macsec device, instead of just the operstate. This version is similar to what the macvlan driver does, and updates only the operstate. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Reported-by: Radu Rendec Reported-by: Patrick Talbert Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9bcb7c3e879f..40e8f11f20cb 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3273,6 +3273,9 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) goto del_dev; + netif_stacked_transfer_operstate(real_dev, dev); + linkwatch_fire_event(dev); + macsec_generation++; return 0; @@ -3444,6 +3447,20 @@ static int macsec_notify(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; switch (event) { + case NETDEV_DOWN: + case NETDEV_UP: + case NETDEV_CHANGE: { + struct macsec_dev *m, *n; + struct macsec_rxh_data *rxd; + + rxd = macsec_data_rtnl(real_dev); + list_for_each_entry_safe(m, n, &rxd->secys, secys) { + struct net_device *dev = m->secy.netdev; + + netif_stacked_transfer_operstate(real_dev, dev); + } + break; + } case NETDEV_UNREGISTER: { struct macsec_dev *m, *n; struct macsec_rxh_data *rxd; From 282db3976826a5a7df307ce004ac6cb5778b3a25 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sun, 28 Oct 2018 09:33:10 +0100 Subject: [PATCH 1033/3715] macsec: let the administrator set UP state even if lowerdev is down [ Upstream commit 07bddef9839378bd6f95b393cf24c420529b4ef1 ] Currently, the kernel doesn't let the administrator set a macsec device up unless its lower device is currently up. This is inconsistent, as a macsec device that is up won't automatically go down when its lower device goes down. Now that linkstate propagation works, there's really no reason for this limitation, so let's remove it. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Reported-by: Radu Rendec Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 40e8f11f20cb..9bb65e0af7dd 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2798,9 +2798,6 @@ static int macsec_dev_open(struct net_device *dev) struct net_device *real_dev = macsec->real_dev; int err; - if (!(real_dev->flags & IFF_UP)) - return -ENETDOWN; - err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) return err; From ee29369424cae436dc1cf1a623f2b3b5b2bd02eb Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Sat, 27 Oct 2018 19:52:14 +0800 Subject: [PATCH 1034/3715] block: fix the DISCARD request merge [ Upstream commit 69840466086d2248898020a08dda52732686c4e6 ] There are two cases when handle DISCARD merge. If max_discard_segments == 1, the bios/requests need to be contiguous to merge. If max_discard_segments > 1, it takes every bio as a range and different range needn't to be contiguous. But now, attempt_merge screws this up. It always consider contiguity for DISCARD for the case max_discard_segments > 1 and cannot merge contiguous DISCARD for the case max_discard_segments == 1, because rq_attempt_discard_merge always returns false in this case. This patch fixes both of the two cases above. Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-merge.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 8d60a5bbcef9..94650cdf2924 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -659,6 +659,31 @@ static void blk_account_io_merge(struct request *req) part_stat_unlock(); } } +/* + * Two cases of handling DISCARD merge: + * If max_discard_segments > 1, the driver takes every bio + * as a range and send them to controller together. The ranges + * needn't to be contiguous. + * Otherwise, the bios/requests will be handled as same as + * others which should be contiguous. + */ +static inline bool blk_discard_mergable(struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD && + queue_max_discard_segments(req->q) > 1) + return true; + return false; +} + +enum elv_merge blk_try_req_merge(struct request *req, struct request *next) +{ + if (blk_discard_mergable(req)) + return ELEVATOR_DISCARD_MERGE; + else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) + return ELEVATOR_BACK_MERGE; + + return ELEVATOR_NO_MERGE; +} /* * For non-mq, this has to be called with the request spinlock acquired. @@ -676,12 +701,6 @@ static struct request *attempt_merge(struct request_queue *q, if (req_op(req) != req_op(next)) return NULL; - /* - * not contiguous - */ - if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) - return NULL; - if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || req_no_special_merge(next)) @@ -705,11 +724,19 @@ static struct request *attempt_merge(struct request_queue *q, * counts here. Handle DISCARDs separately, as they * have separate settings. */ - if (req_op(req) == REQ_OP_DISCARD) { + + switch (blk_try_req_merge(req, next)) { + case ELEVATOR_DISCARD_MERGE: if (!req_attempt_discard_merge(q, req, next)) return NULL; - } else if (!ll_merge_requests_fn(q, req, next)) + break; + case ELEVATOR_BACK_MERGE: + if (!ll_merge_requests_fn(q, req, next)) + return NULL; + break; + default: return NULL; + } /* * If failfast settings disagree or any of the two is already @@ -834,8 +861,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (req_op(rq) == REQ_OP_DISCARD && - queue_max_discard_segments(rq->q) > 1) + if (blk_discard_mergable(rq)) return ELEVATOR_DISCARD_MERGE; else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; From 23e5f2eb8b43e128037708d28ff32510094db807 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Oct 2018 12:01:47 +0900 Subject: [PATCH 1035/3715] i2c: uniphier-f: make driver robust against concurrency [ Upstream commit f1fdcbbdf45d9609f3d4063b67e9ea941ba3a58f ] This is unlikely to happen, but it is possible for a CPU to enter the interrupt handler just after wait_for_completion_timeout() has expired. If this happens, the hardware is accessed from multiple contexts concurrently. Disable the IRQ after wait_for_completion_timeout(), and do nothing from the handler when the IRQ is disabled. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index bc26ec822e26..b9a0690b4fd7 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -98,6 +98,7 @@ struct uniphier_fi2c_priv { unsigned int flags; unsigned int busy_cnt; unsigned int clk_cycle; + spinlock_t lock; /* IRQ synchronization */ }; static void uniphier_fi2c_fill_txfifo(struct uniphier_fi2c_priv *priv, @@ -162,7 +163,10 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) struct uniphier_fi2c_priv *priv = dev_id; u32 irq_status; + spin_lock(&priv->lock); + irq_status = readl(priv->membase + UNIPHIER_FI2C_INT); + irq_status &= priv->enabled_irqs; dev_dbg(&priv->adap.dev, "interrupt: enabled_irqs=%04x, irq_status=%04x\n", @@ -230,6 +234,8 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) goto handled; } + spin_unlock(&priv->lock); + return IRQ_NONE; data_done: @@ -246,6 +252,8 @@ complete: handled: uniphier_fi2c_clear_irqs(priv); + spin_unlock(&priv->lock); + return IRQ_HANDLED; } @@ -311,7 +319,7 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, { struct uniphier_fi2c_priv *priv = i2c_get_adapdata(adap); bool is_read = msg->flags & I2C_M_RD; - unsigned long time_left; + unsigned long time_left, flags; dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n", is_read ? "receive" : "transmit", msg->addr, msg->len, stop); @@ -342,6 +350,12 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, priv->membase + UNIPHIER_FI2C_CR); time_left = wait_for_completion_timeout(&priv->comp, adap->timeout); + + spin_lock_irqsave(&priv->lock, flags); + priv->enabled_irqs = 0; + uniphier_fi2c_set_irqs(priv); + spin_unlock_irqrestore(&priv->lock, flags); + if (!time_left) { dev_err(&adap->dev, "transaction timeout.\n"); uniphier_fi2c_recover(priv); @@ -546,6 +560,7 @@ static int uniphier_fi2c_probe(struct platform_device *pdev) priv->clk_cycle = clk_rate / bus_speed; init_completion(&priv->comp); + spin_lock_init(&priv->lock); priv->adap.owner = THIS_MODULE; priv->adap.algo = &uniphier_fi2c_algo; priv->adap.dev.parent = dev; From 17b30b7d4f9621df57436e43d6912c28acbd6752 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Oct 2018 12:01:48 +0900 Subject: [PATCH 1036/3715] i2c: uniphier-f: fix occasional timeout error [ Upstream commit 39226aaa85f002d695e3cafade3309e12ffdaecd ] Currently, a timeout error could happen at a repeated START condition. For a (non-repeated) START condition, the controller starts sending data when the UNIPHIER_FI2C_CR_STA bit is set. However, for a repeated START condition, the hardware starts running when the slave address is written to the TX FIFO - the write to the UNIPHIER_FI2C_CR register is actually unneeded. Because the hardware is already running before the IRQ is enabled for a repeated START, the driver may miss the IRQ event. In most cases, this problem does not show up since modern CPUs are much faster than the I2C transfer. However, it is still possible that a context switch happens after the controller starts, but before the IRQ register is set up. To fix this, - Do not write UNIPHIER_FI2C_CR for repeated START conditions. - Enable IRQ *before* writing the slave address to the TX FIFO. - Disable IRQ for the current CPU while queuing up the TX FIFO; If the CPU is interrupted by some task, the interrupt handler might be invoked due to the empty TX FIFO before completing the setup. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 33 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index b9a0690b4fd7..bbd5b137aa21 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -260,6 +260,8 @@ handled: static void uniphier_fi2c_tx_init(struct uniphier_fi2c_priv *priv, u16 addr) { priv->enabled_irqs |= UNIPHIER_FI2C_INT_TE; + uniphier_fi2c_set_irqs(priv); + /* do not use TX byte counter */ writel(0, priv->membase + UNIPHIER_FI2C_TBC); /* set slave address */ @@ -292,6 +294,8 @@ static void uniphier_fi2c_rx_init(struct uniphier_fi2c_priv *priv, u16 addr) priv->enabled_irqs |= UNIPHIER_FI2C_INT_RF; } + uniphier_fi2c_set_irqs(priv); + /* set slave address with RD bit */ writel(UNIPHIER_FI2C_DTTX_CMD | UNIPHIER_FI2C_DTTX_RD | addr << 1, priv->membase + UNIPHIER_FI2C_DTTX); @@ -315,14 +319,16 @@ static void uniphier_fi2c_recover(struct uniphier_fi2c_priv *priv) } static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, - struct i2c_msg *msg, bool stop) + struct i2c_msg *msg, bool repeat, + bool stop) { struct uniphier_fi2c_priv *priv = i2c_get_adapdata(adap); bool is_read = msg->flags & I2C_M_RD; unsigned long time_left, flags; - dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n", - is_read ? "receive" : "transmit", msg->addr, msg->len, stop); + dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, repeat=%d, stop=%d\n", + is_read ? "receive" : "transmit", msg->addr, msg->len, + repeat, stop); priv->len = msg->len; priv->buf = msg->buf; @@ -338,16 +344,24 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, writel(UNIPHIER_FI2C_RST_TBRST | UNIPHIER_FI2C_RST_RBRST, priv->membase + UNIPHIER_FI2C_RST); /* reset TX/RX FIFO */ + spin_lock_irqsave(&priv->lock, flags); + if (is_read) uniphier_fi2c_rx_init(priv, msg->addr); else uniphier_fi2c_tx_init(priv, msg->addr); - uniphier_fi2c_set_irqs(priv); - dev_dbg(&adap->dev, "start condition\n"); - writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STA, - priv->membase + UNIPHIER_FI2C_CR); + /* + * For a repeated START condition, writing a slave address to the FIFO + * kicks the controller. So, the UNIPHIER_FI2C_CR register should be + * written only for a non-repeated START condition. + */ + if (!repeat) + writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STA, + priv->membase + UNIPHIER_FI2C_CR); + + spin_unlock_irqrestore(&priv->lock, flags); time_left = wait_for_completion_timeout(&priv->comp, adap->timeout); @@ -408,6 +422,7 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { struct i2c_msg *msg, *emsg = msgs + num; + bool repeat = false; int ret; ret = uniphier_fi2c_check_bus_busy(adap); @@ -418,9 +433,11 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, /* Emit STOP if it is the last message or I2C_M_STOP is set. */ bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); - ret = uniphier_fi2c_master_xfer_one(adap, msg, stop); + ret = uniphier_fi2c_master_xfer_one(adap, msg, repeat, stop); if (ret) return ret; + + repeat = !stop; } return num; From 7c8372c6a249cae2b30237198db39ba5ea23cf95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Oct 2018 12:01:49 +0900 Subject: [PATCH 1037/3715] i2c: uniphier-f: fix race condition when IRQ is cleared [ Upstream commit eaba68785c2d24ebf1f0d46c24e11b79cc2f94c7 ] The current IRQ handler clears all the IRQ status bits when it bails out. This is dangerous because it might clear away the status bits that have just been set while processing the current handler. If this happens, the IRQ event for the latest transfer is lost forever. The IRQ status bits must be cleared *before* the next transfer is kicked. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index bbd5b137aa21..928ea9930d17 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -143,9 +143,10 @@ static void uniphier_fi2c_set_irqs(struct uniphier_fi2c_priv *priv) writel(priv->enabled_irqs, priv->membase + UNIPHIER_FI2C_IE); } -static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv) +static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv, + u32 mask) { - writel(-1, priv->membase + UNIPHIER_FI2C_IC); + writel(mask, priv->membase + UNIPHIER_FI2C_IC); } static void uniphier_fi2c_stop(struct uniphier_fi2c_priv *priv) @@ -172,6 +173,8 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) "interrupt: enabled_irqs=%04x, irq_status=%04x\n", priv->enabled_irqs, irq_status); + uniphier_fi2c_clear_irqs(priv, irq_status); + if (irq_status & UNIPHIER_FI2C_INT_STOP) goto complete; @@ -250,8 +253,6 @@ complete: } handled: - uniphier_fi2c_clear_irqs(priv); - spin_unlock(&priv->lock); return IRQ_HANDLED; @@ -340,7 +341,7 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, priv->flags |= UNIPHIER_FI2C_STOP; reinit_completion(&priv->comp); - uniphier_fi2c_clear_irqs(priv); + uniphier_fi2c_clear_irqs(priv, U32_MAX); writel(UNIPHIER_FI2C_RST_TBRST | UNIPHIER_FI2C_RST_RBRST, priv->membase + UNIPHIER_FI2C_RST); /* reset TX/RX FIFO */ From 63cf63befc46a110697863458047181c0e34aa15 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Tue, 25 Sep 2018 08:47:13 +0100 Subject: [PATCH 1038/3715] um: Make line/tty semantics use true write IRQ [ Upstream commit 917e2fd2c53eb3c4162f5397555cbd394390d4bc ] This fixes a long standing bug where large amounts of output could freeze the tty (most commonly seen on stdio console). While the bug has always been there it became more pronounced after moving to the new interrupt controller. The line semantics are now changed to have true IRQ write semantics which should further improve the tty/line subsystem stability and performance Signed-off-by: Anton Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/drivers/line.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 366e57f5e8d6..7e524efed584 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -261,7 +261,7 @@ static irqreturn_t line_write_interrupt(int irq, void *data) if (err == 0) { spin_unlock(&line->lock); return IRQ_NONE; - } else if (err < 0) { + } else if ((err < 0) && (err != -EAGAIN)) { line->head = line->buffer; line->tail = line->buffer; } From 850fc69e08b2d1b70c1dd999d6d37b26aa37cf13 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:40:55 +1100 Subject: [PATCH 1039/3715] vfs: avoid problematic remapping requests into partial EOF block [ Upstream commit 07d19dc9fbe9128378b9e226abe886fd8fd473df ] A deduplication data corruption is exposed in XFS and btrfs. It is caused by extending the block match range to include the partial EOF block, but then allowing unknown data beyond EOF to be considered a "match" to data in the destination file because the comparison is only made to the end of the source file. This corrupts the destination file when the source extent is shared with it. The VFS remapping prep functions only support whole block dedupe, but we still need to appear to support whole file dedupe correctly. Hence if the dedupe request includes the last block of the souce file, don't include it in the actual dedupe operation. If the rest of the range dedupes successfully, then reject the entire request. A subsequent patch will enable us to shorten dedupe requests correctly. When reflinking sub-file ranges, a data corruption can occur when the source file range includes a partial EOF block. This shares the unknown data beyond EOF into the second file at a position inside EOF, exposing stale data in the second file. If the reflink request includes the last block of the souce file, only proceed with the reflink operation if it lands at or past the destination file's current EOF. If it lands within the destination file EOF, reject the entire request with -EINVAL and make the caller go the hard way. A subsequent patch will enable us to shorten reflink requests correctly. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/read_write.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 38a8bcccf0dd..e8136a72c13f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1709,6 +1709,34 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) return security_file_permission(file, write ? MAY_WRITE : MAY_READ); } +/* + * Ensure that we don't remap a partial EOF block in the middle of something + * else. Assume that the offsets have already been checked for block + * alignment. + * + * For deduplication we always scale down to the previous block because we + * can't meaningfully compare post-EOF contents. + * + * For clone we only link a partial EOF block above the destination file's EOF. + */ +static int generic_remap_check_len(struct inode *inode_in, + struct inode *inode_out, + loff_t pos_out, + u64 *len, + bool is_dedupe) +{ + u64 blkmask = i_blocksize(inode_in) - 1; + + if ((*len & blkmask) == 0) + return 0; + + if (is_dedupe) + *len &= ~blkmask; + else if (pos_out + *len < i_size_read(inode_out)) + return -EINVAL; + + return 0; +} /* * Check that the two inodes are eligible for cloning, the ranges make @@ -1815,6 +1843,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, return -EBADE; } + ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, + is_dedupe); + if (ret) + return ret; + return 1; } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); From f1b3512388a7778cec40b5f892290257726e8c23 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 31 Oct 2018 11:39:34 +1030 Subject: [PATCH 1040/3715] powerpc/xmon: Relax frame size for clang [ Upstream commit 9c87156cce5a63735d1218f0096a65c50a7a32aa ] When building with clang (8 trunk, 7.0 release) the frame size limit is hit: arch/powerpc/xmon/xmon.c:452:12: warning: stack frame size of 2576 bytes in function 'xmon_core' [-Wframe-larger-than=] Some investigation by Naveen indicates this is due to clang saving the addresses to printf format strings on the stack. While this issue is investigated, bump up the frame size limit for xmon when building with clang. Link: https://github.com/ClangBuiltLinux/linux/issues/252 Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/xmon/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 549e99e71112..ac5ee067aa51 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -13,6 +13,12 @@ UBSAN_SANITIZE := n ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))) +ifdef CONFIG_CC_IS_CLANG +# clang stores addresses on the stack causing the frame size to blow +# out. See https://github.com/ClangBuiltLinux/linux/issues/252 +KBUILD_CFLAGS += -Wframe-larger-than=4096 +endif + ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o From ff6aea7cda2a6f8de1d0a25a085eeffa11e4eb96 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 29 Oct 2018 22:23:50 +1100 Subject: [PATCH 1041/3715] selftests/powerpc/signal: Fix out-of-tree build [ Upstream commit 27825349d7b238533a47e3d98b8bb0efd886b752 ] We should use TEST_GEN_PROGS, not TEST_PROGS. That tells the selftests makefile (lib.mk) that those tests are generated (built), and so it adds the $(OUTPUT) prefix for us, making the out-of-tree build work correctly. It also means we don't need our own clean rule, lib.mk does it. We also have to update the signal_tm rule to use $(OUTPUT). Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- tools/testing/selftests/powerpc/signal/Makefile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index a7cbd5082e27..4213978f3ee2 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,14 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := signal signal_tm - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c signal.S +TEST_GEN_PROGS := signal signal_tm CFLAGS += -maltivec -signal_tm: CFLAGS += -mhtm +$(OUTPUT)/signal_tm: CFLAGS += -mhtm include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S From 0a67a82b5673838c09beb292e6e2d1c643db1246 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 29 Oct 2018 22:23:52 +1100 Subject: [PATCH 1042/3715] selftests/powerpc/switch_endian: Fix out-of-tree build [ Upstream commit 266bac361d5677e61a6815bd29abeb3bdced2b07 ] For the out-of-tree build to work we need to tell switch_endian_test to look for check-reversed.S in $(OUTPUT). Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- tools/testing/selftests/powerpc/switch_endian/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index 30b8ff8fb82e..e4cedfe9753d 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -7,6 +7,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S include ../../lib.mk +$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o From 9679a2e9e67709ce9db8ee3806d6937be4e9c1ae Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 29 Oct 2018 22:23:53 +1100 Subject: [PATCH 1043/3715] selftests/powerpc/cache_shape: Fix out-of-tree build [ Upstream commit 69f8117f17b332a68cd8f4bf8c2d0d3d5b84efc5 ] Use TEST_GEN_PROGS and don't redefine all, this makes the out-of-tree build work. We need to move the extra dependencies below the include of lib.mk, because it adds the $(OUTPUT) prefix if it's defined. We can also drop the clean rule, lib.mk does it for us. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- tools/testing/selftests/powerpc/cache_shape/Makefile | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 1be547434a49..7e0c175b8297 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -1,11 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := cache_shape - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c +TEST_GEN_PROGS := cache_shape include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c From 94cd21cdfa54c0cb02b826ddf3e9474337ccf8cb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 30 Oct 2018 15:04:59 -0700 Subject: [PATCH 1044/3715] linux/bitmap.h: handle constant zero-size bitmaps correctly [ Upstream commit 7275b097851a5e2e0dd4da039c7e96b59ac5314e ] The static inlines in bitmap.h do not handle a compile-time constant nbits==0 correctly (they dereference the passed src or dst pointers, despite only 0 words being valid to access). I had the 0-day buildbot chew on a patch [1] that would cause build failures for such cases without complaining, suggesting that we don't have any such users currently, at least for the 70 .config/arch combinations that was built. Should any turn up, make sure they use the out-of-line versions, which do handle nbits==0 correctly. This is of course not the most efficient, but it's much less churn than teaching all the static inlines an "if (zero_const_nbits())", and since we don't have any current instances, this doesn't affect existing code at all. [1] lkml.kernel.org/r/20180815085539.27485-1-linux@rasmusvillemoes.dk Link: http://lkml.kernel.org/r/20180818131623.8755-3-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Yury Norov Cc: Rasmus Villemoes Cc: Sudeep Holla Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/bitmap.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 835c2271196a..1990b88bd0ab 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -185,8 +185,13 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) +/* + * The static inlines below do not handle constant nbits==0 correctly, + * so make such users (should any ever turn up) call the out-of-line + * versions. + */ #define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { From 16be83252233ad0f4c16664e833d6b3c72e9e47f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 30 Oct 2018 15:05:07 -0700 Subject: [PATCH 1045/3715] linux/bitmap.h: fix type of nbits in bitmap_shift_right() [ Upstream commit d9873969fa8725dc6a5a21ab788c057fd8719751 ] Most other bitmap API, including the OOL version __bitmap_shift_right, take unsigned nbits. This was accidentally left out from 2fbad29917c98. Link: http://lkml.kernel.org/r/20180818131623.8755-5-linux@rasmusvillemoes.dk Fixes: 2fbad29917c98 ("lib: bitmap: change bitmap_shift_right to take unsigned parameters") Signed-off-by: Rasmus Villemoes Reported-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Sudeep Holla Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 1990b88bd0ab..aec255fb62aa 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -355,7 +355,7 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, } static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, int nbits) + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; From 30f1d1147abc52240cc1f9c10414f22afc5b3772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:04 -0700 Subject: [PATCH 1046/3715] hfsplus: fix BUG on bnode parent update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 19a9d0f1acf75e8be8cfba19c1a34e941846fa2b ] Creating, renaming or deleting a file may hit BUG_ON() if the first record of both a leaf node and its parent are changed, and if this forces the parent to be split. This bug is triggered by xfstests generic/027, somewhat rarely; here is a more reliable reproducer: truncate -s 50M fs.iso mkfs.hfsplus fs.iso mount fs.iso /mnt i=1000 while [ $i -le 2400 ]; do touch /mnt/$i &>/dev/null ((++i)) done i=2400 while [ $i -ge 1000 ]; do mv /mnt/$i /mnt/$(perl -e "print $i x61") &>/dev/null ((--i)) done The issue is that a newly created bnode is being put twice. Reset new_node to NULL in hfs_brec_update_parent() before reaching goto again. Link: http://lkml.kernel.org/r/5ee1db09b60373a15890f6a7c835d00e76bf601d.1535682461.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/brec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index d3f36982f685..0f53a486d2c1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -448,6 +448,7 @@ skip: /* restore search_key */ hfs_bnode_read_key(node, fd->search_key, 14); } + new_node = NULL; } if (!rec && node->parent) From 44ddfe1ced43d2da1237cbfae6392a1d3532c09e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:11 -0700 Subject: [PATCH 1047/3715] hfs: fix BUG on bnode parent update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ef75bcc5763d130451a99825f247d301088b790b ] hfs_brec_update_parent() may hit BUG_ON() if the first record of both a leaf node and its parent are changed, and if this forces the parent to be split. It is not possible for this to happen on a valid hfs filesystem because the index nodes have fixed length keys. For reasons I ignore, the hfs module does have support for a number of hfsplus features. A corrupt btree header may report variable length keys and trigger this BUG, so it's better to fix it. Link: http://lkml.kernel.org/r/cf9b02d57f806217a2b1bf5db8c3e39730d8f603.1535682463.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Reviewed-by: Andrew Morton Cc: Christoph Hellwig Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/brec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index da25c49203cc..896396554bcc 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -445,6 +445,7 @@ skip: /* restore search_key */ hfs_bnode_read_key(node, fd->search_key, 14); } + new_node = NULL; } if (!rec && node->parent) From 71db3989910218b45c624473029228a56a82cb88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:14 -0700 Subject: [PATCH 1048/3715] hfsplus: prevent btree data loss on ENOSPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d92915c35bfaf763d78bf1d5ac7f183420e3bd99 ] Inserting or deleting a record in a btree may require splitting several of its nodes. If we hit ENOSPC halfway through, the new nodes will be left orphaned and their records will be lost. This could mean lost inodes, extents or xattrs. Henceforth, check the available disk space before making any changes. This still leaves the potential problem of corruption on ENOMEM. The patch can be tested with xfstests generic/027. Link: http://lkml.kernel.org/r/4596eef22fbda137b4ffa0272d92f0da15364421.1536269129.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/attributes.c | 10 ++++++++ fs/hfsplus/btree.c | 52 +++++++++++++++++++++++++---------------- fs/hfsplus/catalog.c | 24 +++++++++++++++++++ fs/hfsplus/extents.c | 4 ++++ fs/hfsplus/hfsplus_fs.h | 2 ++ 5 files changed, 72 insertions(+), 20 deletions(-) diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 2bab6b3cdba4..e6d554476db4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -217,6 +217,11 @@ int hfsplus_create_attr(struct inode *inode, if (err) goto failed_init_create_attr; + /* Fail early and avoid ENOSPC during the btree operation */ + err = hfs_bmap_reserve(fd.tree, fd.tree->depth + 1); + if (err) + goto failed_create_attr; + if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); @@ -313,6 +318,11 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) if (err) return err; + /* Fail early and avoid ENOSPC during the btree operation */ + err = hfs_bmap_reserve(fd.tree, fd.tree->depth); + if (err) + goto out; + if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 3de3bc4918b5..66774f4cb4fd 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -342,6 +342,34 @@ static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) return node; } +/* Make sure @tree has enough space for the @rsvd_nodes */ +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes) +{ + struct inode *inode = tree->inode; + struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + u32 count; + int res; + + if (rsvd_nodes <= 0) + return 0; + + while (tree->free_nodes < rsvd_nodes) { + res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); + if (res) + return res; + hip->phys_size = inode->i_size = + (loff_t)hip->alloc_blocks << + HFSPLUS_SB(tree->sb)->alloc_blksz_shift; + hip->fs_blocks = + hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; + inode_set_bytes(inode, inode->i_size); + count = inode->i_size >> tree->node_size_shift; + tree->free_nodes += count - tree->node_count; + tree->node_count = count; + } + return 0; +} + struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) { struct hfs_bnode *node, *next_node; @@ -351,27 +379,11 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) u16 off16; u16 len; u8 *data, byte, m; - int i; + int i, res; - while (!tree->free_nodes) { - struct inode *inode = tree->inode; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); - u32 count; - int res; - - res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); - if (res) - return ERR_PTR(res); - hip->phys_size = inode->i_size = - (loff_t)hip->alloc_blocks << - HFSPLUS_SB(tree->sb)->alloc_blksz_shift; - hip->fs_blocks = - hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; - inode_set_bytes(inode, inode->i_size); - count = inode->i_size >> tree->node_size_shift; - tree->free_nodes = count - tree->node_count; - tree->node_count = count; - } + res = hfs_bmap_reserve(tree, 1); + if (res) + return ERR_PTR(res); nidx = 0; node = hfs_bnode_find(tree, nidx); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index a196369ba779..35472cba750e 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -265,6 +265,14 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * fd.tree->depth); + if (err) + goto err2; + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? @@ -333,6 +341,14 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str) if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * (int)fd.tree->depth - 2); + if (err) + goto out; + if (!str) { int len; @@ -433,6 +449,14 @@ int hfsplus_rename_cat(u32 cnid, return err; dst_fd = src_fd; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most twice. + */ + err = hfs_bmap_reserve(src_fd.tree, 4 * (int)src_fd.tree->depth - 1); + if (err) + goto out; + /* find the old dir entry and read the data */ err = hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index e8770935ce6d..284d7fb73e86 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -100,6 +100,10 @@ static int __hfsplus_ext_write_extent(struct inode *inode, if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return res; + /* Fail early and avoid ENOSPC during the btree operation */ + res = hfs_bmap_reserve(fd->tree, fd->tree->depth + 1); + if (res) + return res; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a015044daa05..dbb55d823385 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -312,6 +312,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write +#define hfs_bmap_reserve hfsplus_bmap_reserve #define hfs_bmap_alloc hfsplus_bmap_alloc #define hfs_bmap_free hfsplus_bmap_free #define hfs_bnode_read hfsplus_bnode_read @@ -396,6 +397,7 @@ u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, u64 sectors, struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id); void hfs_btree_close(struct hfs_btree *tree); int hfs_btree_write(struct hfs_btree *tree); +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes); struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree); void hfs_bmap_free(struct hfs_bnode *node); From e11318583164ff5ce50fa5ec6c2c7b5360816321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:17 -0700 Subject: [PATCH 1049/3715] hfs: prevent btree data loss on ENOSPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 54640c7502e5ed41fbf4eedd499e85f9acc9698f ] Inserting a new record in a btree may require splitting several of its nodes. If we hit ENOSPC halfway through, the new nodes will be left orphaned and their records will be lost. This could mean lost inodes or extents. Henceforth, check the available disk space before making any changes. This still leaves the potential problem of corruption on ENOMEM. There is no need to reserve space before deleting a catalog record, as we do for hfsplus. This difference is because hfs index nodes have fixed length keys. Link: http://lkml.kernel.org/r/ab5fc8a7d5ffccfd5f27b1cf2cb4ceb6c110da74.1536269131.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/btree.c | 47 ++++++++++++++++++++++++++++------------------- fs/hfs/btree.h | 1 + fs/hfs/catalog.c | 16 ++++++++++++++++ fs/hfs/extent.c | 4 ++++ 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 9bdff5e40626..19017d296173 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -220,6 +220,30 @@ static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) return node; } +/* Make sure @tree has enough space for the @rsvd_nodes */ +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes) +{ + struct inode *inode = tree->inode; + u32 count; + int res; + + while (tree->free_nodes < rsvd_nodes) { + res = hfs_extend_file(inode); + if (res) + return res; + HFS_I(inode)->phys_size = inode->i_size = + (loff_t)HFS_I(inode)->alloc_blocks * + HFS_SB(tree->sb)->alloc_blksz; + HFS_I(inode)->fs_blocks = inode->i_size >> + tree->sb->s_blocksize_bits; + inode_set_bytes(inode, inode->i_size); + count = inode->i_size >> tree->node_size_shift; + tree->free_nodes += count - tree->node_count; + tree->node_count = count; + } + return 0; +} + struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) { struct hfs_bnode *node, *next_node; @@ -229,26 +253,11 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) u16 off16; u16 len; u8 *data, byte, m; - int i; + int i, res; - while (!tree->free_nodes) { - struct inode *inode = tree->inode; - u32 count; - int res; - - res = hfs_extend_file(inode); - if (res) - return ERR_PTR(res); - HFS_I(inode)->phys_size = inode->i_size = - (loff_t)HFS_I(inode)->alloc_blocks * - HFS_SB(tree->sb)->alloc_blksz; - HFS_I(inode)->fs_blocks = inode->i_size >> - tree->sb->s_blocksize_bits; - inode_set_bytes(inode, inode->i_size); - count = inode->i_size >> tree->node_size_shift; - tree->free_nodes = count - tree->node_count; - tree->node_count = count; - } + res = hfs_bmap_reserve(tree, 1); + if (res) + return ERR_PTR(res); nidx = 0; node = hfs_bnode_find(tree, nidx); diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index c8b252dbb26c..dcc2aab1b2c4 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -82,6 +82,7 @@ struct hfs_find_data { extern struct hfs_btree *hfs_btree_open(struct super_block *, u32, btree_keycmp); extern void hfs_btree_close(struct hfs_btree *); extern void hfs_btree_write(struct hfs_btree *); +extern int hfs_bmap_reserve(struct hfs_btree *, int); extern struct hfs_bnode * hfs_bmap_alloc(struct hfs_btree *); extern void hfs_bmap_free(struct hfs_bnode *node); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 8a66405b0f8b..d365bf0b8c77 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -97,6 +97,14 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * fd.tree->depth); + if (err) + goto err2; + hfs_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFS_CDR_THD : HFS_CDR_FTH, @@ -295,6 +303,14 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name, return err; dst_fd = src_fd; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(src_fd.tree, 2 * src_fd.tree->depth); + if (err) + goto out; + /* find the old dir entry and read the data */ hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 5d0182654580..0c638c612152 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -117,6 +117,10 @@ static int __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) if (HFS_I(inode)->flags & HFS_FLG_EXT_NEW) { if (res != -ENOENT) return res; + /* Fail early and avoid ENOSPC during the btree operation */ + res = hfs_bmap_reserve(fd->tree, fd->tree->depth + 1); + if (res) + return res; hfs_brec_insert(fd, HFS_I(inode)->cached_extents, sizeof(hfs_extent_rec)); HFS_I(inode)->flags &= ~(HFS_FLG_EXT_DIRTY|HFS_FLG_EXT_NEW); } else { From 0a729bac19dd40ba7214cd220672c854d35ed2e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:21 -0700 Subject: [PATCH 1050/3715] hfsplus: fix return value of hfsplus_get_block() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 839c3a6a5e1fbc8542d581911b35b2cb5cd29304 ] Direct writes to empty inodes fail with EIO. The generic direct-io code is in part to blame (a patch has been submitted as "direct-io: allow direct writes to empty inodes"), but hfsplus is worse affected than the other filesystems because the fallback to buffered I/O doesn't happen. The problem is the return value of hfsplus_get_block() when called with !create. Change it to be more consistent with the other modules. Link: http://lkml.kernel.org/r/2cd1301404ec7cf1e39c8f11a01a4302f1460ad6.1539195310.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Reviewed-by: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/extents.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 284d7fb73e86..58f296bfd438 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -237,7 +237,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, ablock = iblock >> sbi->fs_shift; if (iblock >= hip->fs_blocks) { - if (iblock > hip->fs_blocks || !create) + if (!create) + return 0; + if (iblock > hip->fs_blocks) return -EIO; if (ablock >= hip->alloc_blocks) { res = hfsplus_file_extend(inode, false); From 42934683c91167ce68ddc2323d97acc8c3e5cc69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:24 -0700 Subject: [PATCH 1051/3715] hfs: fix return value of hfs_get_block() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1267a07be5ebbff2d2739290f3d043ae137c15b4 ] Direct writes to empty inodes fail with EIO. The generic direct-io code is in part to blame (a patch has been submitted as "direct-io: allow direct writes to empty inodes"), but hfs is worse affected than the other filesystems because the fallback to buffered I/O doesn't happen. The problem is the return value of hfs_get_block() when called with !create. Change it to be more consistent with the other modules. Link: http://lkml.kernel.org/r/4538ab8c35ea37338490525f0f24cbc37227528c.1539195310.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Reviewed-by: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/extent.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 0c638c612152..5f1ff97a3b98 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -345,7 +345,9 @@ int hfs_get_block(struct inode *inode, sector_t block, ablock = (u32)block / HFS_SB(sb)->fs_div; if (block >= HFS_I(inode)->fs_blocks) { - if (block > HFS_I(inode)->fs_blocks || !create) + if (!create) + return 0; + if (block > HFS_I(inode)->fs_blocks) return -EIO; if (ablock >= HFS_I(inode)->alloc_blocks) { res = hfs_extend_file(inode); From 47b8860bb8a1e7ba71a95cd753ff049afe557153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:27 -0700 Subject: [PATCH 1052/3715] hfsplus: update timestamps on truncate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dc8844aada735890a6de109bef327f5df36a982e ] The vfs takes care of updating ctime and mtime on ftruncate(), but on truncate() it must be done by the module. This patch can be tested with xfstests generic/313. Link: http://lkml.kernel.org/r/9beb0913eea37288599e8e1b7cec8768fb52d1b8.1539316825.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Reviewed-by: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 190c60efbc99..5b31f4730ee9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -262,6 +262,7 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) } truncate_setsize(inode, attr->ia_size); hfsplus_file_truncate(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); } setattr_copy(inode, attr); From dcc4cf3be923916d04a37a395f4aefbf096e1e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:31 -0700 Subject: [PATCH 1053/3715] hfs: update timestamp on truncate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8cd3cb5061730af085a3f9890a3352f162b4e20c ] The vfs takes care of updating mtime on ftruncate(), but on truncate() it must be done by the module. Link: http://lkml.kernel.org/r/e1611eda2985b672ed2d8677350b4ad8c2d07e8a.1539316825.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Reviewed-by: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2538b49cc349..350afd67bd69 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -642,6 +642,8 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) truncate_setsize(inode, attr->ia_size); hfs_file_truncate(inode); + inode->i_atime = inode->i_mtime = inode->i_ctime = + current_time(inode); } setattr_copy(inode, attr); From 496a7776387064524eaedc7e3ede14f82c21cfd4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 30 Oct 2018 15:06:35 -0700 Subject: [PATCH 1054/3715] fs/hfs/extent.c: fix array out of bounds read of array extent [ Upstream commit 6c9a3f843a29d6894dfc40df338b91dbd78f0ae3 ] Currently extent and index i are both being incremented causing an array out of bounds read on extent[i]. Fix this by removing the extraneous increment of extent. Ernesto said: : This is only triggered when deleting a file with a resource fork. I : may be wrong because the documentation isn't clear, but I don't think : you can create those under linux. So I guess nobody was testing them. : : > A disk space leak, perhaps? : : That's what it looks like in general. hfs_free_extents() won't do : anything if the block count doesn't add up, and the error will be : ignored. Now, if the block count randomly does add up, we could see : some corruption. Detected by CoverityScan, CID#711541 ("Out of bounds read") Link: http://lkml.kernel.org/r/20180831140538.31566-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: Ernesto A. Fernndez Cc: David Howells Cc: Al Viro Cc: Hin-Tak Leung Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/extent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 5f1ff97a3b98..263d5028d9d1 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -304,7 +304,7 @@ int hfs_free_fork(struct super_block *sb, struct hfs_cat_file *file, int type) return 0; blocks = 0; - for (i = 0; i < 3; extent++, i++) + for (i = 0; i < 3; i++) blocks += be16_to_cpu(extent[i].count); res = hfs_free_extents(sb, extent, blocks, blocks); From 5cb8388a680a363ba9a8cca8f81687f9b0d238bb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 30 Oct 2018 15:10:24 -0700 Subject: [PATCH 1055/3715] mm/memory_hotplug: make add_memory() take the device_hotplug_lock [ Upstream commit 8df1d0e4a265f25dc1e7e7624ccdbcb4a6630c89 ] add_memory() currently does not take the device_hotplug_lock, however is aleady called under the lock from arch/powerpc/platforms/pseries/hotplug-memory.c drivers/acpi/acpi_memhotplug.c to synchronize against CPU hot-remove and similar. In general, we should hold the device_hotplug_lock when adding memory to synchronize against online/offline request (e.g. from user space) - which already resulted in lock inversions due to device_lock() and mem_hotplug_lock - see 30467e0b3be ("mm, hotplug: fix concurrent memory hot-add deadlock"). add_memory()/add_memory_resource() will create memory block devices, so this really feels like the right thing to do. Holding the device_hotplug_lock makes sure that a memory block device can really only be accessed (e.g. via .online/.state) from user space, once the memory has been fully added to the system. The lock is not held yet in drivers/xen/balloon.c arch/powerpc/platforms/powernv/memtrace.c drivers/s390/char/sclp_cmd.c drivers/hv/hv_balloon.c So, let's either use the locked variants or take the lock. Don't export add_memory_resource(), as it once was exported to be used by XEN, which is never built as a module. If somebody requires it, we also have to export a locked variant (as device_hotplug_lock is never exported). Link: http://lkml.kernel.org/r/20180925091457.28651-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Pavel Tatashin Reviewed-by: Rafael J. Wysocki Reviewed-by: Rashmica Gupta Reviewed-by: Oscar Salvador Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Greg Kroah-Hartman Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Nathan Fontenot Cc: John Allen Cc: Michal Hocko Cc: Dan Williams Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Mathieu Malaterre Cc: Pavel Tatashin Cc: YASUAKI ISHIMATSU Cc: Balbir Singh Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jonathan Corbet Cc: Kate Stewart Cc: "K. Y. Srinivasan" Cc: Martin Schwidefsky Cc: Michael Neuling Cc: Philippe Ombredanne Cc: Stephen Hemminger Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- .../platforms/pseries/hotplug-memory.c | 2 +- drivers/acpi/acpi_memhotplug.c | 2 +- drivers/base/memory.c | 9 ++++++-- drivers/xen/balloon.c | 3 +++ include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 22 ++++++++++++++++--- 6 files changed, 32 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 93e09f108ca1..99a3cf51c5ba 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -787,7 +787,7 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) nid = memory_add_physaddr_to_nid(lmb->base_addr); /* Add the memory */ - rc = add_memory(nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { dlpar_remove_device_tree_lmb(lmb); return rc; diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 6b0d3ef7309c..2ccfbb61ca89 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -228,7 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) if (node < 0) node = memory_add_physaddr_to_nid(info->start_addr); - result = add_memory(node, info->start_addr, info->length); + result = __add_memory(node, info->start_addr, info->length); /* * If the memory block has been used by the kernel, add_memory() diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c617e00f4361..8e5818e735e2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -517,15 +517,20 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) return -EINVAL; + ret = lock_device_hotplug_sysfs(); + if (ret) + goto out; + nid = memory_add_physaddr_to_nid(phys_addr); - ret = add_memory(nid, phys_addr, - MIN_MEMORY_BLOCK_SIZE * sections_per_block); + ret = __add_memory(nid, phys_addr, + MIN_MEMORY_BLOCK_SIZE * sections_per_block); if (ret) goto out; ret = count; out: + unlock_device_hotplug(); return ret; } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 7d521babc020..71a6deeb4e71 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -356,7 +356,10 @@ static enum bp_state reserve_additional_memory(void) * callers drop the mutex before trying again. */ mutex_unlock(&balloon_mutex); + /* add_memory_resource() requires the device_hotplug lock */ + lock_device_hotplug(); rc = add_memory_resource(nid, resource, memhp_auto_online); + unlock_device_hotplug(); mutex_lock(&balloon_mutex); if (rc) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..d36a02935391 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -316,6 +316,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); +extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d4affa9982ca..e368a4e0c7cb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1073,7 +1073,12 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(&mem->dev); } -/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +/* + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug + * and online/offline operations (triggered e.g. by sysfs). + * + * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG + */ int __ref add_memory_resource(int nid, struct resource *res, bool online) { u64 start, size; @@ -1166,9 +1171,9 @@ out: mem_hotplug_done(); return ret; } -EXPORT_SYMBOL_GPL(add_memory_resource); -int __ref add_memory(int nid, u64 start, u64 size) +/* requires device_hotplug_lock, see add_memory_resource() */ +int __ref __add_memory(int nid, u64 start, u64 size) { struct resource *res; int ret; @@ -1182,6 +1187,17 @@ int __ref add_memory(int nid, u64 start, u64 size) release_memory_resource(res); return ret; } + +int add_memory(int nid, u64 start, u64 size) +{ + int rc; + + lock_device_hotplug(); + rc = __add_memory(nid, start, size); + unlock_device_hotplug(); + + return rc; +} EXPORT_SYMBOL_GPL(add_memory); #ifdef CONFIG_MEMORY_HOTREMOVE From 8bcacb4e6959d22ca9f54d260e0edcfec9265bac Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 12 Oct 2018 13:13:39 +0200 Subject: [PATCH 1056/3715] igb: shorten maximum PHC timecounter update interval [ Upstream commit 094bf4d0e9657f6ea1ee3d7e07ce3970796949ce ] The timecounter needs to be updated at least once per ~550 seconds in order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old timestamp. Since commit 500462a9d ("timers: Switch to a non-cascading wheel"), scheduling of delayed work seems to be less accurate and a requested delay of 540 seconds may actually be longer than 550 seconds. Shorten the delay to 480 seconds to be sure the timecounter is updated in time. This fixes an issue with HW timestamps on 82580/I350/I354 being off by ~1100 seconds for few seconds every ~9 minutes. Cc: Jacob Keller Cc: Richard Cochran Cc: Thomas Gleixner Signed-off-by: Miroslav Lichvar Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_ptp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 0746b19ec6d3..295d27f33104 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -65,9 +65,15 @@ * * The 40 bit 82580 SYSTIM overflows every * 2^40 * 10^-9 / 60 = 18.3 minutes. + * + * SYSTIM is converted to real time using a timecounter. As + * timecounter_cyc2time() allows old timestamps, the timecounter + * needs to be updated at least once per half of the SYSTIM interval. + * Scheduling of delayed work is not very accurate, so we aim for 8 + * minutes to be sure the actual interval is shorter than 9.16 minutes. */ -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8) #define IGB_PTP_TX_TIMEOUT (HZ * 15) #define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT) #define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0) From 246db31c2326c4d38d0062eca5321dcdaabf0acb Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Tue, 30 Oct 2018 21:50:44 +0800 Subject: [PATCH 1057/3715] net: hns3: bugfix for buffer not free problem during resetting [ Upstream commit 73b907a083b8a8c1c62cb494bc9fbe6ae086c460 ] When hns3_get_ring_config()/hns3_queue_to_ring()/ hns3_get_vector_ring_chain() failed during resetting, the allocated memory has not been freed before these three functions return. So this patch adds error handler in these functions to fix it. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hns3_enet.c | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 5483cb23c08a..e9cff8ed5e07 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2300,7 +2300,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = tx_ring->tqp->tqp_index; @@ -2324,7 +2324,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, while (rx_ring) { chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = rx_ring->tqp->tqp_index; @@ -2336,6 +2336,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, } return 0; + +err_free_chain: + cur_chain = head->next; + while (cur_chain) { + chain = cur_chain->next; + devm_kfree(&pdev->dev, chain); + cur_chain = chain; + } + + return -ENOMEM; } static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, @@ -2530,8 +2540,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp, return ret; ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX); - if (ret) + if (ret) { + devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring); return ret; + } return 0; } @@ -2556,6 +2568,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv) return 0; err: + while (i--) { + devm_kfree(priv->dev, priv->ring_data[i].ring); + devm_kfree(priv->dev, + priv->ring_data[i + h->kinfo.num_tqps].ring); + } + devm_kfree(&pdev->dev, priv->ring_data); return ret; } From 6cfdf432b42192abd5369c556f2ecf2a789e8ce9 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 11 Jun 2018 16:13:12 -0400 Subject: [PATCH 1058/3715] ntb_netdev: fix sleep time mismatch [ Upstream commit a861594b1b7ffd630f335b351c4e9f938feadb8e ] The tx_time should be in usecs (according to the comment above the variable), but the setting of the timer during the rearming is done in msecs. Change it to match the expected units. Fixes: e74bfeedad08 ("NTB: Add flow control to the ntb_netdev") Suggested-by: Gerd W. Haeussler Signed-off-by: Jon Mason Acked-by: Dave Jiang Signed-off-by: Sasha Levin --- drivers/net/ntb_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 0250aa9ae2cb..97bf49ad81a6 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -236,7 +236,7 @@ static void ntb_netdev_tx_timer(unsigned long data) struct ntb_netdev *dev = netdev_priv(ndev); if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { - mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time)); + mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); } else { /* Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() From 5a20e7ea811318fafb43f9c2f0d19c87902c1371 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 28 Aug 2018 17:13:59 -0700 Subject: [PATCH 1059/3715] ntb: intel: fix return value for ndev_vec_mask() [ Upstream commit 7756e2b5d68c36e170a111dceea22f7365f83256 ] ndev_vec_mask() should be returning u64 mask value instead of int. Otherwise the mask value returned can be incorrect for larger vectors. Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Dave Jiang Tested-by: Lucas Van Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/intel/ntb_hw_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 2557e2c05b90..58068f1447bb 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -348,7 +348,7 @@ static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits, return 0; } -static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) +static inline u64 ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) { u64 shift, mask; From a1af736d55d020f811cfb1413eb8e420ef1bed20 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 30 Oct 2018 16:37:10 -0700 Subject: [PATCH 1060/3715] arm64: makefile fix build of .i file in external module case [ Upstream commit 98356eb0ae499c63e78073ccedd9a5fc5c563288 ] After 'a66649dab350 arm64: fix vdso-offsets.h dependency' if one will try to build .i file in case of external kernel module, build fails complaining that prepare0 target is missing. This issue came up with SystemTap when it tries to build variety of .i files for its own generated kernel modules trying to figure given kernel features/capabilities. The issue is that prepare0 is defined in top level Makefile only if KBUILD_EXTMOD is not defined. .i file rule depends on prepare and in case KBUILD_EXTMOD defined top level Makefile contains empty rule for prepare. But after mentioned commit arch/arm64/Makefile would introduce dependency on prepare0 through its own prepare target. Fix it to put proper ifdef KBUILD_EXTMOD around code introduced by mentioned commit. It matches what top level Makefile does. Acked-by: Kevin Brodsky Signed-off-by: Victor Kamensky Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0c5f70e6d5cf..8c4bc5a2c61f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -149,6 +149,7 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since # asm-offsets.h is included in some files used to generate vdso-offsets.h, and @@ -158,6 +159,7 @@ archclean: prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h +endif define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' From 97e1db17bc1ef4c2e1789bc9323c7be44fba53f8 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 2 Nov 2018 15:48:19 -0700 Subject: [PATCH 1061/3715] ocfs2: don't put and assigning null to bh allocated outside [ Upstream commit cf76c78595ca87548ca5e45c862ac9e0949c4687 ] ocfs2_read_blocks() and ocfs2_read_blocks_sync() are both used to read several blocks from disk. Currently, the input argument *bhs* can be NULL or NOT. It depends on the caller's behavior. If the function fails in reading blocks from disk, the corresponding bh will be assigned to NULL and put. Obviously, above process for non-NULL input bh is not appropriate. Because the caller doesn't even know its bhs are put and re-assigned. If buffer head is managed by caller, ocfs2_read_blocks and ocfs2_read_blocks_sync() should not evaluate it to NULL. It will cause caller accessing illegal memory, thus crash. Link: http://lkml.kernel.org/r/HK2PR06MB045285E0F4FBB561F9F2F9B3D5680@HK2PR06MB0452.apcprd06.prod.outlook.com Signed-off-by: Changwei Ge Reviewed-by: Guozhonghua Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/buffer_head_io.c | 77 ++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 9f8250df99f1..f9b84f7a3e4b 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -99,25 +99,34 @@ out: return ret; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]) { int status = 0; unsigned int i; struct buffer_head *bh; + int new_bh = 0; trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); if (!nr) goto bail; + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { status = -ENOMEM; mlog_errno(status); - goto bail; + break; } } bh = bhs[i]; @@ -157,9 +166,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, submit_bh(REQ_OP_READ, 0, bh); } +read_failure: for (i = nr; i > 0; i--) { bh = bhs[i - 1]; + if (unlikely(status)) { + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i - 1] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } + continue; + } + /* No need to wait on the buffer if it's managed by JBD. */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -169,8 +195,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, * so we can safely record this and loop back * to cleanup the other buffers. */ status = -EIO; - put_bh(bh); - bhs[i - 1] = NULL; + goto read_failure; } } @@ -178,6 +203,9 @@ bail: return status; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, struct buffer_head *bhs[], int flags, int (*validate)(struct super_block *sb, @@ -187,6 +215,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, int i, ignore_cache = 0; struct buffer_head *bh; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); + int new_bh = 0; trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); @@ -212,6 +241,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, goto bail; } + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + ocfs2_metadata_cache_io_lock(ci); for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { @@ -220,7 +254,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ocfs2_metadata_cache_io_unlock(ci); status = -ENOMEM; mlog_errno(status); - goto bail; + /* Don't forget to put previous bh! */ + break; } } bh = bhs[i]; @@ -314,16 +349,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, } } - status = 0; - +read_failure: for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; if (!(flags & OCFS2_BH_READAHEAD)) { - if (status) { - /* Clear the rest of the buffers on error */ - put_bh(bh); - bhs[i] = NULL; + if (unlikely(status)) { + /* Clear the buffers on error including those + * ever succeeded in reading + */ + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } continue; } /* We know this can't have changed as we hold the @@ -341,9 +387,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, * uptodate. */ status = -EIO; clear_buffer_needs_validate(bh); - put_bh(bh); - bhs[i] = NULL; - continue; + goto read_failure; } if (buffer_needs_validate(bh)) { @@ -353,11 +397,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, BUG_ON(buffer_jbd(bh)); clear_buffer_needs_validate(bh); status = validate(sb, bh); - if (status) { - put_bh(bh); - bhs[i] = NULL; - continue; - } + if (status) + goto read_failure; } } From e2017fc025afe801a0059fbc6e9c06c9fe9212cc Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 2 Nov 2018 15:48:27 -0700 Subject: [PATCH 1062/3715] ocfs2: fix clusters leak in ocfs2_defrag_extent() [ Upstream commit 6194ae4242dec0c9d604bc05df83aa9260a899e4 ] ocfs2_defrag_extent() might leak allocated clusters. When the file system has insufficient space, the number of claimed clusters might be less than the caller wants. If that happens, the original code might directly commit the transaction without returning clusters. This patch is based on code in ocfs2_add_clusters_in_btree(). [akpm@linux-foundation.org: include localalloc.h, reduce scope of data_ac] Link: http://lkml.kernel.org/r/20180904041621.16874-3-lchen@suse.com Signed-off-by: Larry Chen Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/move_extents.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f55f82ca3425..1565dd8e8856 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -25,6 +25,7 @@ #include "ocfs2_ioctl.h" #include "alloc.h" +#include "localalloc.h" #include "aops.h" #include "dlmglue.h" #include "extent_map.h" @@ -222,6 +223,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + int need_free = 0; if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!ocfs2_is_refcount_inode(inode)); @@ -312,6 +314,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; + need_free = 1; goto out_commit; } } @@ -336,6 +339,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, mlog_errno(ret); out_commit: + if (need_free && context->data_ac) { + struct ocfs2_alloc_context *data_ac = context->data_ac; + + if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + new_phys_cpos, new_len); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), + new_len); + } + ocfs2_commit_trans(osb, handle); out_unlock_mutex: From ab630e537970e7e3ee2f2539bdbfc9618d761499 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Oct 2018 08:39:13 -0700 Subject: [PATCH 1063/3715] net: do not abort bulk send on BQL status [ Upstream commit fe60faa5063822f2d555f4f326c7dd72a60929bf ] Before calling dev_hard_start_xmit(), upper layers tried to cook optimal skb list based on BQL budget. Problem is that GSO packets can end up comsuming more than the BQL budget. Breaking the loop is not useful, since requeued packets are ahead of any packets still in the qdisc. It is also more expensive, since next TX completion will push these packets later, while skbs are not in cpu caches. It is also a behavior difference with TSO packets, that can break the BQL limit by a large amount. Note that drivers should use __netdev_tx_sent_queue() in order to have optimal xmit_more support, and avoid useless atomic operations as shown in the following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9d6beb9de924..3ce68484ed5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3029,7 +3029,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de } skb = next; - if (netif_xmit_stopped(txq) && skb) { + if (netif_tx_queue_stopped(txq) && skb) { rc = NETDEV_TX_BUSY; break; } From d81abc59d3f35ea52ec89abe15c5b80267e8f0a7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Nov 2018 14:22:25 +0100 Subject: [PATCH 1064/3715] sched/topology: Fix off by one bug [ Upstream commit 993f0b0510dad98b4e6e39506834dab0d13fd539 ] With the addition of the NUMA identity level, we increased @level by one and will run off the end of the array in the distance sort loop. Fixed: 051f3ca02e46 ("sched/topology: Introduce NUMA identity node sched domain") Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 9dcd80ed9d4c..867d173dab48 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1347,7 +1347,7 @@ void sched_init_numa(void) int level = 0; int i, j, k; - sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); + sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); if (!sched_domains_numa_distance) return; From 024a3ea35bdae12dd37dc81b5df57e9da01dff99 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 26 Sep 2018 16:12:07 +0100 Subject: [PATCH 1065/3715] sched/fair: Don't increase sd->balance_interval on newidle balance [ Upstream commit 3f130a37c442d5c4d66531b240ebe9abfef426b5 ] When load_balance() fails to move some load because of task affinity, we end up increasing sd->balance_interval to delay the next periodic balance in the hopes that next time we look, that annoying pinned task(s) will be gone. However, idle_balance() pays no attention to sd->balance_interval, yet it will still lead to an increase in balance_interval in case of pinned tasks. If we're going through several newidle balances (e.g. we have a periodic task), this can lead to a huge increase of the balance_interval in a very small amount of time. To prevent that, don't increase the balance interval when going through a newidle balance. This is a similar approach to what is done in commit 58b26c4c0257 ("sched: Increment cache_nice_tries only on periodic lb"), where we disregard newidle balance and rely on periodic balance for more stable results. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: patrick.bellasi@arm.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1537974727-30788-2-git-send-email-valentin.schneider@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index feeb52880d35..67433fbdcb5a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8319,13 +8319,22 @@ out_all_pinned: sd->nr_balance_failed = 0; out_one_pinned: + ld_moved = 0; + + /* + * idle_balance() disregards balance intervals, so we could repeatedly + * reach this code, which would lead to balance_interval skyrocketting + * in a short amount of time. Skip the balance_interval increase logic + * to avoid that. + */ + if (env.idle == CPU_NEWLY_IDLE) + goto out; + /* tune up the balancing interval */ if (((env.flags & LBF_ALL_PINNED) && sd->balance_interval < MAX_PINNED_INTERVAL) || (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; - - ld_moved = 0; out: return ld_moved; } From c4509088acdfc8d1018e97ad0350f5edabada984 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 16:36:55 +0100 Subject: [PATCH 1066/3715] openvswitch: fix linking without CONFIG_NF_CONNTRACK_LABELS [ Upstream commit a277d516de5f498c91d91189717ef7e01102ad27 ] When CONFIG_CC_OPTIMIZE_FOR_DEBUGGING is enabled, the compiler fails to optimize out a dead code path, which leads to a link failure: net/openvswitch/conntrack.o: In function `ovs_ct_set_labels': conntrack.c:(.text+0x2e60): undefined reference to `nf_connlabels_replace' In this configuration, we can take a shortcut, and completely remove the contrack label code. This may also help the regular optimization. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/openvswitch/conntrack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0171b27a2b81..48d81857961c 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1083,7 +1083,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &info->labels.mask); if (err) return err; - } else if (labels_nonzero(&info->labels.mask)) { + } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, &info->labels.mask); if (err) From 231b0b5a42316dd3c23145dcbcd34cd04a4cdf7e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 18 Oct 2018 15:07:29 +0800 Subject: [PATCH 1067/3715] clk: sunxi-ng: enable so-said LDOs for A64 SoC's pll-mipi clock [ Upstream commit 859783d1390035e29ba850963bded2b4ffdf43b5 ] In the user manual of A64 SoC, the bit 22 and 23 of pll-mipi control register is called "LDO{1,2}_EN", and according to the BSP source code from Allwinner , the LDOs are enabled during the clock's enabling process. The clock failed to generate output if the two LDOs are not enabled. Add the two bits to the clock's gate bits, so that the LDOs are enabled when the PLL is enabled. Fixes: c6a0637460c2 ("clk: sunxi-ng: Add A64 clocks") Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 2bb4cabf802f..36a30a3cfad7 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -158,7 +158,12 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_gpu_clk, "pll-gpu", #define SUN50I_A64_PLL_MIPI_REG 0x040 static struct ccu_nkm pll_mipi_clk = { - .enable = BIT(31), + /* + * The bit 23 and 22 are called "LDO{1,2}_EN" on the SoC's + * user manual, and by experiments the PLL doesn't work without + * these bits toggled. + */ + .enable = BIT(31) | BIT(23) | BIT(22), .lock = BIT(28), .n = _SUNXI_CCU_MULT(8, 4), .k = _SUNXI_CCU_MULT_MIN(4, 2, 2), From 92a69ad6ae0b4bed047b7574189f037de61b3db8 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 10 Oct 2018 16:22:57 -0400 Subject: [PATCH 1068/3715] audit: print empty EXECVE args [ Upstream commit ea956d8be91edc702a98b7fe1f9463e7ca8c42ab ] Empty executable arguments were being skipped when printing out the list of arguments in an EXECVE record, making it appear they were somehow lost. Include empty arguments as an itemized empty string. Reproducer: autrace /bin/ls "" "/etc" ausearch --start recent -m execve -i | grep EXECVE type=EXECVE msg=audit(10/03/2018 13:04:03.208:1391) : argc=3 a0=/bin/ls a2=/etc With fix: type=EXECVE msg=audit(10/03/2018 21:51:38.290:194) : argc=3 a0=/bin/ls a1= a2=/etc type=EXECVE msg=audit(1538617898.290:194): argc=3 a0="/bin/ls" a1="" a2="/etc" Passes audit-testsuite. GH issue tracker at https://github.com/linux-audit/audit-kernel/issues/99 Signed-off-by: Richard Guy Briggs [PM: cleaned up the commit metadata] Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 76d789d6cea0..ffa8d64f6fef 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1102,7 +1102,7 @@ static void audit_log_execve_info(struct audit_context *context, } /* write as much as we can to the audit log */ - if (len_buf > 0) { + if (len_buf >= 0) { /* NOTE: some magic numbers here - basically if we * can't fit a reasonable amount of data into the * existing audit buffer, flush it and start with From ab5ef999c77ad0b728640e2855840f39fd039c53 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 3 Nov 2018 16:39:28 +0100 Subject: [PATCH 1069/3715] btrfs: avoid link error with CONFIG_NO_AUTO_INLINE [ Upstream commit 7e17916b35797396f681a3270245fd29c1e4c250 ] Note: this patch fixes a problem in a feature outside of btrfs ("kernel hacking: add a config option to disable compiler auto-inlining") and is applied ahead of time due to cross-subsystem dependencies. On 32-bit ARM with gcc-8, I see a link error with the addition of the CONFIG_NO_AUTO_INLINE option: fs/btrfs/super.o: In function `btrfs_statfs': super.c:(.text+0x67b8): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x67fc): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x6858): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x6920): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x693c): undefined reference to `__aeabi_uldivmod' fs/btrfs/super.o:super.c:(.text+0x6958): more undefined references to `__aeabi_uldivmod' follow So far this is the only file that shows the behavior, so I'd propose to just work around it by marking the functions as 'static inline' that normally get inlined here. The reference to __aeabi_uldivmod comes from a div_u64() which has an optimization for a constant division that uses a straight '/' operator when the result should be known to the compiler. My interpretation is that as we turn off inlining, gcc still expects the result to be constant but fails to use that constant value. Link: https://lkml.kernel.org/r/20181103153941.1881966-1-arnd@arndb.de Reviewed-by: Nikolay Borisov Reviewed-by: Changbin Du Signed-off-by: Arnd Bergmann [ add the note ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 49a02bf091ae..204d585e012a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1863,7 +1863,7 @@ restore: } /* Used to sort the devices by max_avail(descending sort) */ -static int btrfs_cmp_device_free_bytes(const void *dev_info1, +static inline int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > @@ -1892,8 +1892,8 @@ static inline void btrfs_descending_sort_devices( * The helper to calc the free space on the devices that can be used to store * file data. */ -static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, - u64 *free_bytes) +static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, + u64 *free_bytes) { struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; From db8a90a551b5b09e892014fbc0175cd1aacff940 Mon Sep 17 00:00:00 2001 From: Lior David Date: Wed, 31 Oct 2018 10:52:24 +0200 Subject: [PATCH 1070/3715] wil6210: fix locking in wmi_call [ Upstream commit dc57731dbd535880fe6ced31c229262c34df7d64 ] Switch from spin_lock to spin_lock_irqsave, because wmi_ev_lock is used inside interrupt handler. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/wil6210/wmi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index d63d7c326801..798516f42f2f 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1002,15 +1002,16 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len, { int rc; unsigned long remain; + ulong flags; mutex_lock(&wil->wmi_mutex); - spin_lock(&wil->wmi_ev_lock); + spin_lock_irqsave(&wil->wmi_ev_lock, flags); wil->reply_id = reply_id; wil->reply_buf = reply; wil->reply_size = reply_size; reinit_completion(&wil->wmi_call); - spin_unlock(&wil->wmi_ev_lock); + spin_unlock_irqrestore(&wil->wmi_ev_lock, flags); rc = __wmi_send(wil, cmdid, buf, len); if (rc) @@ -1030,11 +1031,11 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len, } out: - spin_lock(&wil->wmi_ev_lock); + spin_lock_irqsave(&wil->wmi_ev_lock, flags); wil->reply_id = 0; wil->reply_buf = NULL; wil->reply_size = 0; - spin_unlock(&wil->wmi_ev_lock); + spin_unlock_irqrestore(&wil->wmi_ev_lock, flags); mutex_unlock(&wil->wmi_mutex); From c9a9d45a69a1c4d8f8307158f22579b39a70f3d0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 16 Oct 2018 09:39:40 +0200 Subject: [PATCH 1071/3715] wlcore: Fix the return value in case of error in 'wlcore_vendor_cmd_smart_config_start()' [ Upstream commit 3419348a97bcc256238101129d69b600ceb5cc70 ] We return 0 unconditionally at the end of 'wlcore_vendor_cmd_smart_config_start()'. However, 'ret' is set to some error codes in several error handling paths and we already return some error codes at the beginning of the function. Return 'ret' instead to propagate the error code. Fixes: 80ff8063e87c ("wlcore: handle smart config vendor commands") Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/vendor_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c index 5c0bcb1fe1a1..e75c3cee0252 100644 --- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c +++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c @@ -66,7 +66,7 @@ wlcore_vendor_cmd_smart_config_start(struct wiphy *wiphy, out: mutex_unlock(&wl->mutex); - return 0; + return ret; } static int From cdae1c71cb99ce05ce992646999906dcbb943668 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Oct 2018 13:51:03 +0200 Subject: [PATCH 1072/3715] rtl8xxxu: Fix missing break in switch [ Upstream commit 307b00c5e695857ca92fc6a4b8ab6c48f988a1b1 ] Add missing break statement in order to prevent the code from falling through to the default case. Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 7806a4d2b1fc..91b01ca32e75 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5691,6 +5691,7 @@ static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, break; case WLAN_CIPHER_SUITE_TKIP: key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; + break; default: return -EOPNOTSUPP; } From 81883e1b0004d7fdfa53454269469475b2b73b64 Mon Sep 17 00:00:00 2001 From: Ali MJ Al-Nasrawy Date: Tue, 23 Oct 2018 19:12:35 +0300 Subject: [PATCH 1073/3715] brcmsmac: never log "tid x is not agg'able" by default [ Upstream commit 96fca788e5788b7ea3b0050eb35a343637e0a465 ] This message greatly spams the log under heavy Tx of frames with BK access class which is especially true when operating as AP. It is also not informative as the "agg'ablity" of TIDs are set once and never change. Fix this by logging only in debug mode. Signed-off-by: Ali MJ Al-Nasrawy Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- .../net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 257968fb3111..66f1f41b1380 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -846,8 +846,8 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw, status = brcms_c_aggregatable(wl->wlc, tid); spin_unlock_bh(&wl->lock); if (!status) { - brcms_err(wl->wlc->hw->d11core, - "START: tid %d is not agg\'able\n", tid); + brcms_dbg_ht(wl->wlc->hw->d11core, + "START: tid %d is not agg\'able\n", tid); return -EINVAL; } ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); From b73e048133f363e83fddd0defd4d48b5cdabd839 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Oct 2018 11:33:34 +0300 Subject: [PATCH 1074/3715] wireless: airo: potential buffer overflow in sprintf() [ Upstream commit 3d39e1bb1c88f32820c5f9271f2c8c2fb9a52bac ] It looks like we wanted to print a maximum of BSSList_rid.ssidLen bytes of the ssid, but we accidentally use "%*s" (width) instead of "%.*s" (precision) so if the ssid doesn't have a NUL terminator this could lead to an overflow. Static analysis. Not tested. Fixes: e174961ca1a0 ("net: convert print_mac to %pM") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 54201c02fdb8..fc49255bab00 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -5464,7 +5464,7 @@ static int proc_BSSList_open( struct inode *inode, struct file *file ) { we have to add a spin lock... */ rc = readBSSListRid(ai, doLoseSync, &BSSList_rid); while(rc == 0 && BSSList_rid.index != cpu_to_le16(0xffff)) { - ptr += sprintf(ptr, "%pM %*s rssi = %d", + ptr += sprintf(ptr, "%pM %.*s rssi = %d", BSSList_rid.bssid, (int)BSSList_rid.ssidLen, BSSList_rid.ssid, From 54c96ccb3e5cd8f7f213f0d84c3e8ff31c33265b Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 5 Nov 2018 19:25:30 +0800 Subject: [PATCH 1075/3715] rtlwifi: rtl8192de: Fix misleading REG_MCUFWDL information [ Upstream commit 7d129adff3afbd3a449bc3593f2064ac546d58d3 ] RT_TRACE shows REG_MCUFWDL value as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print hexadecimal, as was intended. Cc: Ping-Ke Shih Cc: Kalle Valo Signed-off-by: Shaokun Zhang Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index f4129cf96e7c..bad70a4206fb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -173,7 +173,7 @@ static int _rtl92d_fw_init(struct ieee80211_hw *hw) rtl_read_byte(rtlpriv, FW_MAC1_READY)); } RT_TRACE(rtlpriv, COMP_FW, DBG_DMESG, - "Polling FW ready fail!! REG_MCUFWDL:0x%08ul\n", + "Polling FW ready fail!! REG_MCUFWDL:0x%08x\n", rtl_read_dword(rtlpriv, REG_MCUFWDL)); return -1; } From 670a0b329da806d0d99dcb47dc71efab61d4633b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Nov 2018 15:15:16 -0800 Subject: [PATCH 1076/3715] net: dsa: bcm_sf2: Turn on PHY to allow successful registration [ Upstream commit c04a17d2a9ccf1eaba1c5a56f83e997540a70556 ] We are binding to the PHY using the SF2 slave MDIO bus that we create, binding involves reading the PHY's MII_PHYSID1/2 which won't be possible if the PHY is turned off. Temporarily turn it on/off for the bus probing to succeeed. This fixes unbind/bind problems where the port connecting to that PHY would be in error since it could not connect to it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 604c5abc08eb..af666951a959 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1196,12 +1196,16 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) return ret; } + bcm_sf2_gphy_enable_set(priv->dev->ds, true); + ret = bcm_sf2_mdio_register(ds); if (ret) { pr_err("failed to register MDIO bus\n"); return ret; } + bcm_sf2_gphy_enable_set(priv->dev->ds, false); + ret = bcm_sf2_cfp_rst(priv); if (ret) { pr_err("failed to reset CFP\n"); From e9a57946f976ab2c389dffe9c4b3d9e90c1c5268 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Wed, 31 Oct 2018 18:53:36 +0530 Subject: [PATCH 1077/3715] scsi: mpt3sas: Fix Sync cache command failure during driver unload [ Upstream commit 9029a72500b95578a35877a43473b82cb0386c53 ] This is to fix SYNC CACHE and START STOP command failures with DID_NO_CONNECT during driver unload. In driver's IO submission patch (i.e. in driver's .queuecommand()) driver won't allow any SCSI commands to the IOC when ioc->remove_host flag is set and hence SYNC CACHE commands which are issued to the target drives (where write cache is enabled) during driver unload time is failed with DID_NO_CONNECT status. Now modified the driver to allow SYNC CACHE and START STOP commands to IOC, even when remove_host flag is set. Signed-off-by: Suganath Prabu Reviewed-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index b28efddab7b1..9ef0c6265cd2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3328,6 +3328,40 @@ _scsih_tm_tr_complete(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, return _scsih_check_for_pending_tm(ioc, smid); } +/** _scsih_allow_scmd_to_device - check whether scmd needs to + * issue to IOC or not. + * @ioc: per adapter object + * @scmd: pointer to scsi command object + * + * Returns true if scmd can be issued to IOC otherwise returns false. + */ +inline bool _scsih_allow_scmd_to_device(struct MPT3SAS_ADAPTER *ioc, + struct scsi_cmnd *scmd) +{ + + if (ioc->pci_error_recovery) + return false; + + if (ioc->hba_mpi_version_belonged == MPI2_VERSION) { + if (ioc->remove_host) + return false; + + return true; + } + + if (ioc->remove_host) { + + switch (scmd->cmnd[0]) { + case SYNCHRONIZE_CACHE: + case START_STOP: + return true; + default: + return false; + } + } + + return true; +} /** * _scsih_sas_control_complete - completion routine @@ -4100,7 +4134,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } - if (ioc->pci_error_recovery || ioc->remove_host) { + if (!(_scsih_allow_scmd_to_device(ioc, scmd))) { scmd->result = DID_NO_CONNECT << 16; scmd->scsi_done(scmd); return 0; From fdb507e489275e3433911c8ec04c7420a6a29e1c Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Wed, 31 Oct 2018 18:53:37 +0530 Subject: [PATCH 1078/3715] scsi: mpt3sas: Don't modify EEDPTagMode field setting on SAS3.5 HBA devices [ Upstream commit 6cd1bc7b9b5075d395ba0120923903873fc7ea0e ] If EEDPTagMode field in manufacturing page11 is set then unset it. This is needed to fix a hardware bug only in SAS3/SAS2 cards. So, skipping EEDPTagMode changes in Manufacturing page11 for SAS 3.5 controllers. Signed-off-by: Suganath Prabu Reviewed-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 7bfe53f48d1d..817a7963a038 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3140,7 +3140,7 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc) * flag unset in NVDATA. */ mpt3sas_config_get_manufacturing_pg11(ioc, &mpi_reply, &ioc->manu_pg11); - if (ioc->manu_pg11.EEDPTagMode == 0) { + if (!ioc->is_gen35_ioc && ioc->manu_pg11.EEDPTagMode == 0) { pr_err("%s: overriding NVDATA EEDPTagMode setting\n", ioc->name); ioc->manu_pg11.EEDPTagMode &= ~0x3; From 68daeaf6040670f88db77de718c7126be1f64d07 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Wed, 31 Oct 2018 18:53:38 +0530 Subject: [PATCH 1079/3715] scsi: mpt3sas: Fix driver modifying persistent data in Manufacturing page11 [ Upstream commit 97f35194093362a63b33caba2485521ddabe2c95 ] Currently driver is modifying both current & NVRAM/persistent data in Manufacturing page11. Driver should change only current copy of Manufacturing page11. It should not modify the persistent data. So removed the section of code where driver is modifying the persistent data of Manufacturing page11. Signed-off-by: Suganath Prabu Reviewed-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_config.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index dd6270125614..58acbff40abc 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -674,10 +674,6 @@ mpt3sas_config_set_manufacturing_pg11(struct MPT3SAS_ADAPTER *ioc, r = _config_request(ioc, &mpi_request, mpi_reply, MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, sizeof(*config_page)); - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM; - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, - sizeof(*config_page)); out: return r; } From ebe4d12e4ece1441eef65cc242efe37bfe8fbb0b Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Tue, 16 Oct 2018 23:37:41 -0700 Subject: [PATCH 1080/3715] scsi: megaraid_sas: Fix msleep granularity [ Upstream commit 9155cf30a3c4ef97e225d6daddf9bd4b173267e8 ] In megasas_transition_to_ready() driver waits 180seconds for controller to change FW state. Here we are calling msleep(1) in a loop for this. As explained in timers-howto.txt, msleep(1) will actually sleep longer than 1ms. If a faulty controller is connected, we will end up waiting for much more than 180 seconds causing unnecessary delays during load. Change the granularity of msleep() call from 1ms to 1000ms. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8595d83229b7..23a9f0777fa6 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3823,12 +3823,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) /* * The cur_state should not last for more than max_wait secs */ - for (i = 0; i < (max_wait * 1000); i++) { + for (i = 0; i < max_wait; i++) { curr_abs_state = instance->instancet-> read_fw_status_reg(instance->reg_set); if (abs_state == curr_abs_state) { - msleep(1); + msleep(1000); } else break; } From 34b5a88f3aa5fd205b246b8ebc19fdd0f58a6a74 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Tue, 16 Oct 2018 23:37:44 -0700 Subject: [PATCH 1081/3715] scsi: megaraid_sas: Fix goto labels in error handling [ Upstream commit 8a25fa17b6ed6e6c8101e9c68a10ae68a9025f2c ] During init, if pci_alloc_irq_vectors() fails, the driver has not yet setup the IRQs. Fix the goto labels and error handling for this case. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 23a9f0777fa6..577513649afb 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5324,7 +5324,7 @@ static int megasas_init_fw(struct megasas_instance *instance) if (!instance->msix_vectors) { i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); if (i < 0) - goto fail_setup_irqs; + goto fail_init_adapter; } megasas_setup_reply_map(instance); @@ -5541,9 +5541,8 @@ static int megasas_init_fw(struct megasas_instance *instance) fail_get_ld_pd_list: instance->instancet->disable_intr(instance); -fail_init_adapter: megasas_destroy_irqs(instance); -fail_setup_irqs: +fail_init_adapter: if (instance->msix_vectors) pci_free_irq_vectors(instance->pdev); instance->msix_vectors = 0; From 7e47227230d4cac4c9534de35e8637103dac0630 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 23 Oct 2018 13:41:06 -0700 Subject: [PATCH 1082/3715] scsi: lpfc: fcoe: Fix link down issue after 1000+ link bounces [ Upstream commit 036cad1f1ac9ce03e2db94b8460f98eaf1e1ee4c ] On FCoE adapters, when running link bounce test in a loop, initiator failed to login with switch switch and required driver reload to recover. Switch reached a point where all subsequent FLOGIs would be LS_RJT'd. Further testing showed the condition to be related to not performing FCF discovery between FLOGI's. Fix by monitoring FLOGI failures and once a repeated error is seen repeat FCF discovery. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 2 ++ drivers/scsi/lpfc/lpfc_hbadisc.c | 20 ++++++++++++++++++++ drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 11 ++--------- drivers/scsi/lpfc/lpfc_sli4.h | 1 + 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index ddd29752d96d..95449f97101d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1152,6 +1152,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } if (!rc) { @@ -1166,6 +1167,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } } diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index b970933a218d..d850077c5e22 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1999,6 +1999,26 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *vport, uint16_t fcf_index) "failover and change port state:x%x/x%x\n", phba->pport->port_state, LPFC_VPORT_UNKNOWN); phba->pport->port_state = LPFC_VPORT_UNKNOWN; + + if (!phba->fcf.fcf_redisc_attempted) { + lpfc_unregister_fcf(phba); + + rc = lpfc_sli4_redisc_fcf_table(phba); + if (!rc) { + lpfc_printf_log(phba, KERN_INFO, LOG_FIP, + "3195 Rediscover FCF table\n"); + phba->fcf.fcf_redisc_attempted = 1; + lpfc_sli4_clear_fcf_rr_bmask(phba); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3196 Rediscover FCF table " + "failed. Status:x%x\n", rc); + } + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3197 Already rediscover FCF table " + "attempted. No more retry\n"); + } goto stop_flogi_current_fcf; } else { lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_ELS, diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 25612ccf6ff2..15bcd00dd7a2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4997,7 +4997,7 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba, break; } /* If fast FCF failover rescan event is pending, do nothing */ - if (phba->fcf.fcf_flag & FCF_REDISC_EVT) { + if (phba->fcf.fcf_flag & (FCF_REDISC_EVT | FCF_REDISC_PEND)) { spin_unlock_irq(&phba->hbalock); break; } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 6c2b098b7609..ebf7d3cda367 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -18056,15 +18056,8 @@ next_priority: goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); - if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) - return LPFC_FCOE_FCF_NEXT_NONE; - else { - lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, - "3063 Only FCF available idx %d, flag %x\n", - next_fcf_index, - phba->fcf.fcf_pri[next_fcf_index].fcf_rec.flag); - return next_fcf_index; - } + + return LPFC_FCOE_FCF_NEXT_NONE; } if (next_fcf_index < LPFC_SLI4_FCF_TBL_INDX_MAX && diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 60200385fe00..a132a83ef233 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -265,6 +265,7 @@ struct lpfc_fcf { #define FCF_REDISC_EVT 0x100 /* FCF rediscovery event to worker thread */ #define FCF_REDISC_FOV 0x200 /* Post FCF rediscovery fast failover */ #define FCF_REDISC_PROG (FCF_REDISC_PEND | FCF_REDISC_EVT) + uint16_t fcf_redisc_attempted; uint32_t addr_mode; uint32_t eligible_fcf_cnt; struct lpfc_fcf_rec current_rec; From 275cda44213c75f5830dc8fc667923aac04dd410 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 23 Oct 2018 13:41:09 -0700 Subject: [PATCH 1083/3715] scsi: lpfc: Correct loss of fc4 type on remote port address change [ Upstream commit d83ca3ea833d7a66d49225e4191c4e37cab8f079 ] An address change for a remote port cause PRLI for the wrong protocol to be sent. The node copy done in the discovery code skipped copying the fc4 protocols supported as well. Fix the copy logic for the address change. Beefed up log messages in this area as well. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 26 +++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_nportdisc.c | 5 +++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 95449f97101d..e5db20e8979d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1550,8 +1550,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, */ new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName); + /* return immediately if the WWPN matches ndlp */ if (new_ndlp == ndlp && NLP_CHK_NODE_ACT(new_ndlp)) return ndlp; + if (phba->sli_rev == LPFC_SLI_REV4) { active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool, GFP_KERNEL); @@ -1560,9 +1562,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "3178 PLOGI confirm: ndlp %p x%x: new_ndlp %p\n", - ndlp, ndlp->nlp_DID, new_ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3178 PLOGI confirm: ndlp x%x x%x x%x: " + "new_ndlp x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_fc4_type, + (new_ndlp ? new_ndlp->nlp_DID : 0), + (new_ndlp ? new_ndlp->nlp_flag : 0), + (new_ndlp ? new_ndlp->nlp_fc4_type : 0)); if (!new_ndlp) { rc = memcmp(&ndlp->nlp_portname, name, @@ -1611,6 +1617,14 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } + /* At this point in this routine, we know new_ndlp will be + * returned. however, any previous GID_FTs that were done + * would have updated nlp_fc4_type in ndlp, so we must ensure + * new_ndlp has the right value. + */ + if (vport->fc_flag & FC_FABRIC) + new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type; + lpfc_unreg_rpi(vport, new_ndlp); new_ndlp->nlp_DID = ndlp->nlp_DID; new_ndlp->nlp_prev_state = ndlp->nlp_prev_state; @@ -1732,6 +1746,12 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); + + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3173 PLOGI confirm exit: new_ndlp x%x x%x x%x\n", + new_ndlp->nlp_DID, new_ndlp->nlp_flag, + new_ndlp->nlp_fc4_type); + return new_ndlp; } diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index a0658d158228..043bca6449cd 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -2829,8 +2829,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* DSM in event on NPort in state */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, "0211 DSM in event x%x on NPort x%x in " - "state %d Data: x%x\n", - evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag); + "state %d Data: x%x x%x\n", + evt, ndlp->nlp_DID, cur_state, + ndlp->nlp_flag, ndlp->nlp_fc4_type); lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, "DSM in: evt:%d ste:%d did:x%x", From eabc0324f896bfaec98bf749c88753f5dfa2fed8 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Fri, 2 Nov 2018 14:18:20 -0600 Subject: [PATCH 1084/3715] dlm: fix invalid free [ Upstream commit d968b4e240cfe39d39d80483bac8bca8716fd93c ] dlm_config_nodes() does not allocate nodes on failure, so we should not free() nodes when it fails. Signed-off-by: Tycho Andersen Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/member.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 3fda3832cf6a..cad6d85911a8 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -680,7 +680,7 @@ int dlm_ls_start(struct dlm_ls *ls) error = dlm_config_nodes(ls->ls_name, &nodes, &count); if (error < 0) - goto fail; + goto fail_rv; spin_lock(&ls->ls_recover_lock); @@ -712,8 +712,9 @@ int dlm_ls_start(struct dlm_ls *ls) return 0; fail: - kfree(rv); kfree(nodes); + fail_rv: + kfree(rv); return error; } From fcd1fe97aebbb18118e2fe9be6c940bab695c285 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Fri, 2 Nov 2018 14:18:22 -0600 Subject: [PATCH 1085/3715] dlm: don't leak kernel pointer to userspace [ Upstream commit 9de30f3f7f4d31037cfbb7c787e1089c1944b3a7 ] In copy_result_to_user(), we first create a struct dlm_lock_result, which contains a struct dlm_lksb, the last member of which is a pointer to the lvb. Unfortunately, we copy the entire struct dlm_lksb to the result struct, which is then copied to userspace at the end of the function, leaking the contents of sb_lvbptr, which is a valid kernel pointer in some cases (indeed, later in the same function the data it points to is copied to userspace). It is an error to leak kernel pointers to userspace, as it undermines KASLR protections (see e.g. 65eea8edc31 ("floppy: Do not copy a kernel pointer to user memory in FDGETPRM ioctl") for another example of this). Signed-off-by: Tycho Andersen Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d18e7a539f11..1f0c071d4a86 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -702,7 +702,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, result.version[0] = DLM_DEVICE_VERSION_MAJOR; result.version[1] = DLM_DEVICE_VERSION_MINOR; result.version[2] = DLM_DEVICE_VERSION_PATCH; - memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); + memcpy(&result.lksb, &ua->lksb, offsetof(struct dlm_lksb, sb_lvbptr)); result.user_lksb = ua->user_lksb; /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated From 2271c9500434af2a26b2c9eadeb3c0b075409fb5 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 7 Nov 2018 15:36:07 +0000 Subject: [PATCH 1086/3715] vrf: mark skb for multicast or link-local as enslaved to VRF [ Upstream commit 6f12fa775530195a501fb090d092c637f32d0cc5 ] The skb for packets that are multicast or to a link-local address are not marked as being enslaved to a VRF, if they are received on a socket bound to the VRF. This is needed for ND and it is preferable for the kernel not to have to deal with the additional use-cases if ll or mcast packets are handled as enslaved. However, this does not allow service instances listening on unbound and bound to VRF sockets to distinguish the VRF used, if packets are sent as multicast or to a link-local address. The fix is for the VRF driver to also mark these skb as being enslaved to the VRF. Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 03e4fcdfeab7..e0cea5c05f0e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -996,24 +996,23 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; - bool need_strict; + bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + bool is_ndisc = ipv6_ndisc_frame(skb); - /* loopback traffic; do not push through packet taps again. - * Reset pkt_type for upper layers to process skb + /* loopback, multicast & non-ND link-local traffic; do not push through + * packet taps again. Reset pkt_type for upper layers to process skb */ - if (skb->pkt_type == PACKET_LOOPBACK) { + if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; - skb->pkt_type = PACKET_HOST; + if (skb->pkt_type == PACKET_LOOPBACK) + skb->pkt_type = PACKET_HOST; goto out; } - /* if packet is NDISC or addressed to multicast or link-local - * then keep the ingress interface - */ - need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); - if (!ipv6_ndisc_frame(skb) && !need_strict) { + /* if packet is NDISC then keep the ingress interface */ + if (!is_ndisc) { vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; From 1939580da1f592c7ba8122b02c6d6532ed64452e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Nov 2018 09:43:52 -0800 Subject: [PATCH 1087/3715] ACPICA: Use %d for signed int print formatting instead of %u [ Upstream commit f8ddf49b420112e28bdd23d7ad52d7991a0ccbe3 ] Fix warnings found using static analysis with cppcheck, use %d printf format specifier for signed ints rather than %u Signed-off-by: Colin Ian King Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/acpi/tools/acpidump/apmain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 943b6b614683..bed0794e3295 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -139,7 +139,7 @@ static int ap_insert_action(char *argument, u32 to_be_done) current_action++; if (current_action > AP_MAX_ACTIONS) { - fprintf(stderr, "Too many table options (max %u)\n", + fprintf(stderr, "Too many table options (max %d)\n", AP_MAX_ACTIONS); return (-1); } From 9f49ba339b3af5b0a833178b2529c6b9873f7e6f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Nov 2018 02:08:43 +0000 Subject: [PATCH 1088/3715] net: bcmgenet: return correct value 'ret' from bcmgenet_power_down [ Upstream commit 0db55093b56618088b9a1d445eb6e43b311bea33 ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/ethernet/broadcom/genet/bcmgenet.c: In function 'bcmgenet_power_down': drivers/net/ethernet/broadcom/genet/bcmgenet.c:1136:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable] bcmgenet_power_down should return 'ret' instead of 0. Fixes: ca8cf341903f ("net: bcmgenet: propagate errors from bcmgenet_power_down") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 1cc4fb27c13b..b6af286fa5c7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1138,7 +1138,7 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, break; } - return 0; + return ret; } static void bcmgenet_power_up(struct bcmgenet_priv *priv, From 6aa04c8e8bbcc8aafd4d5fdbdef646eb871d5cde Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Fri, 12 Oct 2018 19:38:26 -0700 Subject: [PATCH 1089/3715] of: unittest: allow base devicetree to have symbol metadata [ Upstream commit 5babefb7f7ab1f23861336d511cc666fa45ede82 ] The overlay metadata nodes in the FDT created from testcases.dts are not handled properly. The __fixups__ and __local_fixups__ node were added to the live devicetree, but should not be. Only the first property in the /__symbols__ node was added to the live devicetree if the live devicetree already contained a /__symbols node. All of the node's properties must be added. Tested-by: Alan Tull Signed-off-by: Frank Rowand Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 87650d42682f..9d204649c963 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -910,20 +910,44 @@ static void __init of_unittest_platform_populate(void) * of np into dup node (present in live tree) and * updates parent of children of np to dup. * - * @np: node already present in live tree + * @np: node whose properties are being added to the live tree * @dup: node present in live tree to be updated */ static void update_node_properties(struct device_node *np, struct device_node *dup) { struct property *prop; + struct property *save_next; struct device_node *child; - - for_each_property_of_node(np, prop) - of_add_property(dup, prop); + int ret; for_each_child_of_node(np, child) child->parent = dup; + + /* + * "unittest internal error: unable to add testdata property" + * + * If this message reports a property in node '/__symbols__' then + * the respective unittest overlay contains a label that has the + * same name as a label in the live devicetree. The label will + * be in the live devicetree only if the devicetree source was + * compiled with the '-@' option. If you encounter this error, + * please consider renaming __all__ of the labels in the unittest + * overlay dts files with an odd prefix that is unlikely to be + * used in a real devicetree. + */ + + /* + * open code for_each_property_of_node() because of_add_property() + * sets prop->next to NULL + */ + for (prop = np->properties; prop != NULL; prop = save_next) { + save_next = prop->next; + ret = of_add_property(dup, prop); + if (ret) + pr_err("unittest internal error: unable to add testdata property %pOF/%s", + np, prop->name); + } } /** @@ -932,18 +956,23 @@ static void update_node_properties(struct device_node *np, * * @np: Node to attach to live tree */ -static int attach_node_and_children(struct device_node *np) +static void attach_node_and_children(struct device_node *np) { struct device_node *next, *dup, *child; unsigned long flags; const char *full_name; full_name = kasprintf(GFP_KERNEL, "%pOF", np); + + if (!strcmp(full_name, "/__local_fixups__") || + !strcmp(full_name, "/__fixups__")) + return; + dup = of_find_node_by_path(full_name); kfree(full_name); if (dup) { update_node_properties(np, dup); - return 0; + return; } child = np->child; @@ -964,8 +993,6 @@ static int attach_node_and_children(struct device_node *np) attach_node_and_children(child); child = next; } - - return 0; } /** From 6cfe9bdfd794e8f91d48e422bfbfad921303b034 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 19 Oct 2018 14:42:59 +0530 Subject: [PATCH 1090/3715] cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces [ Upstream commit 113f3aaa81bd56aba02659786ed65cbd9cb9a6fc ] Currently when an AP and STA interfaces are active in the same or different radios, regulatory settings are restored whenever the STA disconnects. This restores all channel information including dfs states in all radios. For example, if an AP interface is active in one radio and STA in another, when radar is detected on the AP interface, the dfs state of the channel will be changed to UNAVAILABLE. But when the STA interface disconnects, this issues a regulatory disconnect hint which restores all regulatory settings in all the radios attached and thereby losing the stored dfs state on the other radio where the channel was marked as unavailable earlier. Hence prevent such regulatory restore whenever another active beaconing interface is present in the same or other radios. Signed-off-by: Sriram R Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/sme.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d014aea07160..66cccd16c24a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void) * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. + * Also if there is any other active beaconing interface we + * need not issue a disconnect hint and reset any info such + * as chan dfs state, etc. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss) + if (wdev->conn || wdev->current_bss || + cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); } From a6f3f79932f9cbd7f5f98efe825091174a1799c1 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Wed, 31 Oct 2018 20:11:47 -0400 Subject: [PATCH 1091/3715] pinctrl: qcom: spmi-gpio: fix gpio-hog related boot issues [ Upstream commit 149a96047237574b756d872007c006acd0cc6687 ] When attempting to setup up a gpio hog, device probing would repeatedly fail with -EPROBE_DEFERED errors. It was caused by a circular dependency between the gpio and pinctrl frameworks. If the gpio-ranges property is present in device tree, then the gpio framework will handle the gpio pin registration and eliminate the circular dependency. See Christian Lamparter's commit a86caa9ba5d7 ("pinctrl: msm: fix gpio-hog related boot issues") for a detailed commit message that explains the issue in much more detail. The code comment in this commit came from Christian's commit. Signed-off-by: Brian Masney Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 22aaf4375fac..0f0049dfaa3a 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1023,10 +1023,23 @@ static int pmic_gpio_probe(struct platform_device *pdev) return ret; } - ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, npins); - if (ret) { - dev_err(dev, "failed to add pin range\n"); - goto err_range; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, + npins); + if (ret) { + dev_err(dev, "failed to add pin range\n"); + goto err_range; + } } return 0; From 3a93991bbb0c5195d7561d7b8b229135f467edba Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 1 Nov 2018 08:00:08 -0700 Subject: [PATCH 1092/3715] pinctrl: lpc18xx: Use define directive for PIN_CONFIG_GPIO_PIN_INT [ Upstream commit f24bfb39975c241374cadebbd037c17960cf1412 ] Clang warns when one enumerated type is implicitly converted to another: drivers/pinctrl/pinctrl-lpc18xx.c:643:29: warning: implicit conversion from enumeration type 'enum lpc18xx_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] {"nxp,gpio-pin-interrupt", PIN_CONFIG_GPIO_PIN_INT, 0}, ~ ^~~~~~~~~~~~~~~~~~~~~~~ drivers/pinctrl/pinctrl-lpc18xx.c:648:12: warning: implicit conversion from enumeration type 'enum lpc18xx_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] PCONFDUMP(PIN_CONFIG_GPIO_PIN_INT, "gpio pin int", NULL, true), ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/pinctrl/pinconf-generic.h:163:11: note: expanded from macro 'PCONFDUMP' .param = a, .display = b, .format = c, .has_arg = d \ ^ 2 warnings generated. It is expected that pinctrl drivers can extend pin_config_param because of the gap between PIN_CONFIG_END and PIN_CONFIG_MAX so this conversion isn't an issue. Most drivers that take advantage of this define the PIN_CONFIG variables as constants, rather than enumerated values. Do the same thing here so that Clang no longer warns. Link: https://github.com/ClangBuiltLinux/linux/issues/140 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-lpc18xx.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-lpc18xx.c b/drivers/pinctrl/pinctrl-lpc18xx.c index d090f37ca4a1..8b4e3582af6e 100644 --- a/drivers/pinctrl/pinctrl-lpc18xx.c +++ b/drivers/pinctrl/pinctrl-lpc18xx.c @@ -630,14 +630,8 @@ static const struct pinctrl_pin_desc lpc18xx_pins[] = { LPC18XX_PIN(i2c0_sda, PIN_I2C0_SDA), }; -/** - * enum lpc18xx_pin_config_param - possible pin configuration parameters - * @PIN_CONFIG_GPIO_PIN_INT: route gpio to the gpio pin interrupt - * controller. - */ -enum lpc18xx_pin_config_param { - PIN_CONFIG_GPIO_PIN_INT = PIN_CONFIG_END + 1, -}; +/* PIN_CONFIG_GPIO_PIN_INT: route gpio to the gpio pin interrupt controller */ +#define PIN_CONFIG_GPIO_PIN_INT (PIN_CONFIG_END + 1) static const struct pinconf_generic_params lpc18xx_params[] = { {"nxp,gpio-pin-interrupt", PIN_CONFIG_GPIO_PIN_INT, 0}, From 0c7ebaf98433eee21d5418244e38c673b215875e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 7 Nov 2018 01:56:40 -0700 Subject: [PATCH 1093/3715] pinctrl: zynq: Use define directive for PIN_CONFIG_IO_STANDARD [ Upstream commit cd8a145a066a1a3beb0ae615c7cb2ee4217418d7 ] Clang warns when one enumerated type is implicitly converted to another: drivers/pinctrl/pinctrl-zynq.c:985:18: warning: implicit conversion from enumeration type 'enum zynq_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] {"io-standard", PIN_CONFIG_IOSTANDARD, zynq_iostd_lvcmos18}, ~ ^~~~~~~~~~~~~~~~~~~~~ drivers/pinctrl/pinctrl-zynq.c:990:16: warning: implicit conversion from enumeration type 'enum zynq_pin_config_param' to different enumeration type 'enum pin_config_param' [-Wenum-conversion] = { PCONFDUMP(PIN_CONFIG_IOSTANDARD, "IO-standard", NULL, true), ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/pinctrl/pinconf-generic.h:163:11: note: expanded from macro 'PCONFDUMP' .param = a, .display = b, .format = c, .has_arg = d \ ^ 2 warnings generated. It is expected that pinctrl drivers can extend pin_config_param because of the gap between PIN_CONFIG_END and PIN_CONFIG_MAX so this conversion isn't an issue. Most drivers that take advantage of this define the PIN_CONFIG variables as constants, rather than enumerated values. Do the same thing here so that Clang no longer warns. Signed-off-by: Nathan Chancellor Acked-by: Michal Simek Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-zynq.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index a0daf27042bd..90fd37e8207b 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -971,15 +971,12 @@ enum zynq_io_standards { zynq_iostd_max }; -/** - * enum zynq_pin_config_param - possible pin configuration parameters - * @PIN_CONFIG_IOSTANDARD: if the pin can select an IO standard, the argument to +/* + * PIN_CONFIG_IOSTANDARD: if the pin can select an IO standard, the argument to * this parameter (on a custom format) tells the driver which alternative * IO standard to use. */ -enum zynq_pin_config_param { - PIN_CONFIG_IOSTANDARD = PIN_CONFIG_END + 1, -}; +#define PIN_CONFIG_IOSTANDARD (PIN_CONFIG_END + 1) static const struct pinconf_generic_params zynq_dt_params[] = { {"io-standard", PIN_CONFIG_IOSTANDARD, zynq_iostd_lvcmos18}, From a2d8b2fa76db5aee9f742645519a68c49a9178d3 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Wed, 17 Oct 2018 13:10:54 +0530 Subject: [PATCH 1094/3715] PCI: keystone: Use quirk to limit MRRS for K2G [ Upstream commit 148e340c0696369fadbbddc8f4bef801ed247d71 ] PCI controller in K2G also has a limitation that memory read request size (MRRS) must not exceed 256 bytes. Use the quirk to limit MRRS (added for K2HK, K2L and K2E) for K2G as well. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Signed-off-by: Sasha Levin --- drivers/pci/dwc/pci-keystone.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c index 9bc52e4cf52a..3ea8288c1605 100644 --- a/drivers/pci/dwc/pci-keystone.c +++ b/drivers/pci/dwc/pci-keystone.c @@ -39,6 +39,7 @@ #define PCIE_RC_K2HK 0xb008 #define PCIE_RC_K2E 0xb009 #define PCIE_RC_K2L 0xb00a +#define PCIE_RC_K2G 0xb00b #define to_keystone_pcie(x) dev_get_drvdata((x)->dev) @@ -53,6 +54,8 @@ static void quirk_limit_mrrs(struct pci_dev *dev) .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2L), .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2G), + .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, { 0, }, }; From 13ae5985660904a43880850a22fe73d80381bb95 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 15 Jan 2019 12:28:32 +0530 Subject: [PATCH 1095/3715] spi: omap2-mcspi: Fix DMA and FIFO event trigger size mismatch [ Upstream commit baf8b9f8d260c55a86405f70a384c29cda888476 ] Commit b682cffa3ac6 ("spi: omap2-mcspi: Set FIFO DMA trigger level to word length") broke SPI transfers where bits_per_word != 8. This is because of mimsatch between McSPI FIFO level event trigger size (SPI word length) and DMA request size(word length * maxburst). This leads to data corruption, lockup and errors like: spi1.0: EOW timed out Fix this by setting DMA maxburst size to 1 so that McSPI FIFO level event trigger size matches DMA request size. Fixes: b682cffa3ac6 ("spi: omap2-mcspi: Set FIFO DMA trigger level to word length") Cc: stable@vger.kernel.org Reported-by: David Lechner Tested-by: David Lechner Signed-off-by: Vignesh R Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-omap2-mcspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 517d0ade586b..1db4d3c1d2bf 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -625,8 +625,8 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer) cfg.dst_addr = cs->phys + OMAP2_MCSPI_TX0; cfg.src_addr_width = width; cfg.dst_addr_width = width; - cfg.src_maxburst = es; - cfg.dst_maxburst = es; + cfg.src_maxburst = 1; + cfg.dst_maxburst = 1; rx = xfer->rx_buf; tx = xfer->tx_buf; From f15199f288f9423d813a85a3841e701a74743e34 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Dec 2018 12:55:25 +0900 Subject: [PATCH 1096/3715] i2c: uniphier-f: fix timeout error after reading 8 bytes [ Upstream commit c2a653deaa81f5a750c0dfcbaf9f8e5195cbe4a5 ] I was totally screwed up in commit eaba68785c2d ("i2c: uniphier-f: fix race condition when IRQ is cleared"). Since that commit, if the number of read bytes is multiple of the FIFO size (8, 16, 24... bytes), the STOP condition could be issued twice, depending on the timing. If this happens, the controller will go wrong, resulting in the timeout error. It was more than 3 years ago when I wrote this driver, so my memory about this hardware was vague. Please let me correct the description in the commit log of eaba68785c2d. Clearing the IRQ status on exiting the IRQ handler is absolutely fine. This controller makes a pause while any IRQ status is asserted. If the IRQ status is cleared first, the hardware may start the next transaction before the IRQ handler finishes what it supposed to do. This partially reverts the bad commit with clear comments so that I will never repeat this mistake. I also investigated what is happening at the last moment of the read mode. The UNIPHIER_FI2C_INT_RF interrupt is asserted a bit earlier (by half a period of the clock cycle) than UNIPHIER_FI2C_INT_RB. I consulted a hardware engineer, and I got the following information: UNIPHIER_FI2C_INT_RF asserted at the falling edge of SCL at the 8th bit. UNIPHIER_FI2C_INT_RB asserted at the rising edge of SCL at the 9th (ACK) bit. In order to avoid calling uniphier_fi2c_stop() twice, check the latter interrupt. I also commented this because it is obscure hardware internal. Fixes: eaba68785c2d ("i2c: uniphier-f: fix race condition when IRQ is cleared") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index 928ea9930d17..dd0687e36a47 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -173,8 +173,6 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) "interrupt: enabled_irqs=%04x, irq_status=%04x\n", priv->enabled_irqs, irq_status); - uniphier_fi2c_clear_irqs(priv, irq_status); - if (irq_status & UNIPHIER_FI2C_INT_STOP) goto complete; @@ -214,7 +212,13 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) if (irq_status & (UNIPHIER_FI2C_INT_RF | UNIPHIER_FI2C_INT_RB)) { uniphier_fi2c_drain_rxfifo(priv); - if (!priv->len) + /* + * If the number of bytes to read is multiple of the FIFO size + * (msg->len == 8, 16, 24, ...), the INT_RF bit is set a little + * earlier than INT_RB. We wait for INT_RB to confirm the + * completion of the current message. + */ + if (!priv->len && (irq_status & UNIPHIER_FI2C_INT_RB)) goto data_done; if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) { @@ -253,6 +257,13 @@ complete: } handled: + /* + * This controller makes a pause while any bit of the IRQ status is + * asserted. Clear the asserted bit to kick the controller just before + * exiting the handler. + */ + uniphier_fi2c_clear_irqs(priv, irq_status); + spin_unlock(&priv->lock); return IRQ_HANDLED; From 058aa7307046ee8afd4386bb9710df5518d94569 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Mon, 8 Apr 2019 12:07:17 +0800 Subject: [PATCH 1097/3715] mm/memory_hotplug: Do not unlock when fails to take the device_hotplug_lock [ Upstream commit d2ab99403ee00d8014e651728a4702ea1ae5e52c ] When adding the memory by probing memory block in sysfs interface, there is an obvious issue that we will unlock the device_hotplug_lock when fails to takes it. That issue was introduced in Commit 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock") We should drop out in time when fails to take the device_hotplug_lock. Fixes: 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock") Reported-by: Yang yingliang Signed-off-by: zhong jiang Reviewed-by: Oscar Salvador Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8e5818e735e2..fe1557aa9b10 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -519,7 +519,7 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, ret = lock_device_hotplug_sysfs(); if (ret) - goto out; + return ret; nid = memory_add_physaddr_to_nid(phys_addr); ret = __add_memory(nid, phys_addr, From 0293f8d1bdd21b3eb71032edb5832f9090dea48e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 12 Dec 2018 15:27:38 -0800 Subject: [PATCH 1098/3715] ipv6: Fix handling of LLA with VRF and sockets bound to VRF [ Upstream commit c2027d1e17582903e368abf5d4838b22a98f2b7b ] A recent commit allows sockets bound to a VRF to receive ipv6 link local packets. However, it only works for UDP and worse TCP connection attempts to the LLA with the only listener bound to the VRF just hang where as before the client gets a reset and connection refused. Fix by adjusting ir_iif for LL addresses and packets received through a device enslaved to a VRF. Fixes: 6f12fa775530 ("vrf: mark skb for multicast or link-local as enslaved to VRF") Reported-by: Donald Sharp Cc: Mike Manning Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/tcp_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7b4ce3f9e2f4..5ec73cf386df 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -718,6 +718,7 @@ static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk_listener); @@ -725,7 +726,7 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if (!sk_listener->sk_bound_dev_if && + if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); From 23ec01fdb1c3b9c6c4bf8399f74bb6e297e640e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Feb 2019 11:09:54 +0100 Subject: [PATCH 1099/3715] cfg80211: call disconnect_wk when AP stops [ Upstream commit e005bd7ddea06784c1eb91ac5bb6b171a94f3b05 ] Since we now prevent regulatory restore during STA disconnect if concurrent AP interfaces are active, we need to reschedule this check when the AP state changes. This fixes never doing a restore when an AP is the last interface to stop. Or to put it another way: we need to re-check after anything we check here changes. Cc: stable@vger.kernel.org Fixes: 113f3aaa81bd ("cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/ap.c | 2 ++ net/wireless/core.h | 2 ++ net/wireless/sme.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 63682176c96c..c4bd3ecef508 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -40,6 +40,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 90f90c7d8bf9..507ec6446eb6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -429,6 +429,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 66cccd16c24a..8344153800e2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* From 79d404a2aa86efe4f1ade51e054318bd811cce71 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 1 Nov 2019 21:42:44 +0100 Subject: [PATCH 1100/3715] Bluetooth: Fix invalid-free in bcsp_close() commit cf94da6f502d8caecabd56b194541c873c8a7a3c upstream. Syzbot reported an invalid-free that I introduced fixing a memleak. bcsp_recv() also frees bcsp->rx_skb but never nullifies its value. Nullify bcsp->rx_skb every time it is freed. Signed-off-by: Tomas Bortoli Reported-by: syzbot+a0d209a4676664613e76@syzkaller.appspotmail.com Signed-off-by: Marcel Holtmann Cc: Alexander Potapenko Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_bcsp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 57a7f4255ac0..ee6c403de6af 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -605,6 +605,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) if (*ptr == 0xc0) { BT_ERR("Short BCSP packet"); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_START; bcsp->rx_count = 0; } else @@ -620,6 +621,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; @@ -644,6 +646,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; From fdfce30d9877e61f14692eb70df7f76a42a3726b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 11 Nov 2019 14:12:27 -0800 Subject: [PATCH 1101/3715] KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved commit a78986aae9b2988f8493f9f65a587ee433e83bc3 upstream. Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and instead manually handle ZONE_DEVICE on a case-by-case basis. For things like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal pages, e.g. put pages grabbed via gup(). But for flows such as setting A/D bits or shifting refcounts for transparent huge pages, KVM needs to to avoid processing ZONE_DEVICE pages as the flows in question lack the underlying machinery for proper handling of ZONE_DEVICE pages. This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup() when running a KVM guest backed with /dev/dax memory, as KVM straight up doesn't put any references to ZONE_DEVICE pages acquired by gup(). Note, Dan Williams proposed an alternative solution of doing put_page() on ZONE_DEVICE pages immediately after gup() in order to simplify the auditing needed to ensure is_zone_device_page() is called if and only if the backing device is pinned (via gup()). But that approach would break kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til unmap() when accessing guest memory, unlike KVM's secondary MMU, which coordinates with mmu_notifier invalidations to avoid creating stale page references, i.e. doesn't rely on pages being pinned. [*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl Reported-by: Adam Borowski Analyzed-by: David Hildenbrand Acked-by: Dan Williams Cc: stable@vger.kernel.org Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini [sean: backport to 4.x; resolve conflict in mmu.c] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 8 ++++---- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 26 +++++++++++++++++++++++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8cd26e50d41c..c0b0135ef07f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3177,7 +3177,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * here. */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - level == PT_PAGE_TABLE_LEVEL && + !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; @@ -5344,9 +5344,9 @@ restart: * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ - if (sp->role.direct && - !kvm_is_reserved_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && + !kvm_is_zone_device_pfn(pfn) && + PageTransCompoundMap(pfn_to_page(pfn))) { drop_spte(kvm, sptep); need_tlb_flush = 1; goto restart; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bb4758ffd403..7668c68ddb5b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -890,6 +890,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea61162b2b53..cdaacdf7bc87 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -142,10 +142,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, { } +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +{ + /* + * The metadata used by is_zone_device_page() to determine whether or + * not a page is ZONE_DEVICE is guaranteed to be valid if and only if + * the device has been pinned, e.g. by get_user_pages(). WARN if the + * page_count() is zero to help detect bad usage of this helper. + */ + if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + return false; + + return is_zone_device_page(pfn_to_page(pfn)); +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { + /* + * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting + * perspective they are "normal" pages, albeit with slightly different + * usage rules. + */ if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn); return true; } @@ -1730,7 +1750,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn) void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) @@ -1741,7 +1761,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); From 19ba14ec43060a0646a5f1f366abf75478499500 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Sat, 19 Oct 2019 14:42:23 +0300 Subject: [PATCH 1102/3715] ath10k: Fix a NULL-ptr-deref bug in ath10k_usb_alloc_urb_from_pipe commit bfd6e6e6c5d2ee43a3d9902b36e01fc7527ebb27 upstream. The `ar_usb` field of `ath10k_usb_pipe_usb_pipe` objects are initialized to point to the containing `ath10k_usb` object according to endpoint descriptors read from the device side, as shown below in `ath10k_usb_setup_pipe_resources`: for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; // get the address from endpoint descriptor pipe_num = ath10k_usb_get_logical_pipe_num(ar_usb, endpoint->bEndpointAddress, &urbcount); ...... // select the pipe object pipe = &ar_usb->pipes[pipe_num]; // initialize the ar_usb field pipe->ar_usb = ar_usb; } The driver assumes that the addresses reported in endpoint descriptors from device side to be complete. If a device is malicious and does not report complete addresses, it may trigger NULL-ptr-deref `ath10k_usb_alloc_urb_from_pipe` and `ath10k_usb_free_urb_to_pipe`. This patch fixes the bug by preventing potential NULL-ptr-deref. Signed-off-by: Hui Peng Reported-by: Hui Peng Reported-by: Mathias Payer Reviewed-by: Greg Kroah-Hartman [groeck: Add driver tag to subject, fix build warning] Signed-off-by: Guenter Roeck Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/usb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index f09a4ad2e9de..f9c79e21ab22 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -49,6 +49,10 @@ ath10k_usb_alloc_urb_from_pipe(struct ath10k_usb_pipe *pipe) struct ath10k_urb_context *urb_context = NULL; unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return NULL; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); if (!list_empty(&pipe->urb_list_head)) { urb_context = list_first_entry(&pipe->urb_list_head, @@ -66,6 +70,10 @@ static void ath10k_usb_free_urb_to_pipe(struct ath10k_usb_pipe *pipe, { unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); pipe->urb_cnt++; From 91bb5e09c9c9820e67e962d5bb00bfb86dcfbf48 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 27 Sep 2019 01:56:04 +0300 Subject: [PATCH 1103/3715] ath9k_hw: fix uninitialized variable data commit 80e84f36412e0c5172447b6947068dca0d04ee82 upstream. Currently, data variable in ar9003_hw_thermo_cal_apply() could be uninitialized if ar9300_otp_read_word() will fail to read the value. Initialize data variable with 0 to prevent an undefined behavior. This will be enough to handle error case when ar9300_otp_read_word() fails. Fixes: 80fe43f2bbd5 ("ath9k_hw: Read and configure thermocal for AR9462") Cc: Rajkumar Manoharan Cc: John W. Linville Cc: Kalle Valo Cc: "David S. Miller" Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 3dbfd86ebe36..76385834a7de 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -4116,7 +4116,7 @@ static void ar9003_hw_thermometer_apply(struct ath_hw *ah) static void ar9003_hw_thermo_cal_apply(struct ath_hw *ah) { - u32 data, ko, kg; + u32 data = 0, ko, kg; if (!AR_SREV_9462_20_OR_LATER(ah)) return; From 02e98a0d49ba63e70af4da25a728ab51d7e6bee8 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Mon, 11 Nov 2019 16:43:20 -0800 Subject: [PATCH 1104/3715] md/raid10: prevent access of uninitialized resync_pages offset commit 45422b704db392a6d79d07ee3e3670b11048bd53 upstream. Due to unneeded multiplication in the out_free_pages portion of r10buf_pool_alloc(), when using a 3-copy raid10 layout, it is possible to access a resync_pages offset that has not been initialized. This access translates into a crash of the system within resync_free_pages() while passing a bad pointer to put_page(). Remove the multiplication, preventing access to the uninitialized area. Fixes: f0250618361db ("md: raid10: don't use bio's vec table to manage resync pages") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: John Pittman Suggested-by: David Jeffery Reviewed-by: Laurence Oberman Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 433e78f453da..d08d77b9674f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -226,7 +226,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) out_free_pages: while (--j >= 0) - resync_free_pages(&rps[j * 2]); + resync_free_pages(&rps[j]); j = 0; out_free_bio: From b5ca5acbe49449598882b102440df587e40c147a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Nov 2019 17:53:56 -0800 Subject: [PATCH 1105/3715] mm/memory_hotplug: don't access uninitialized memmaps in shrink_zone_span() commit 7ce700bf11b5e2cb84e4352bbdf2123a7a239c84 upstream. Let's limit shrinking to !ZONE_DEVICE so we can fix the current code. We should never try to touch the memmap of offline sections where we could have uninitialized memmaps and could trigger BUGs when calling page_to_nid() on poisoned pages. There is no reliable way to distinguish an uninitialized memmap from an initialized memmap that belongs to ZONE_DEVICE, as we don't have anything like SECTION_IS_ONLINE we can use similar to pfn_to_online_section() for !ZONE_DEVICE memory. E.g., set_zone_contiguous() similarly relies on pfn_to_online_section() and will therefore never set a ZONE_DEVICE zone consecutive. Stopping to shrink the ZONE_DEVICE therefore results in no observable changes, besides /proc/zoneinfo indicating different boundaries - something we can totally live with. Before commit d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug"), the memmap was initialized with 0 and the node with the right value. So the zone might be wrong but not garbage. After that commit, both the zone and the node will be garbage when touching uninitialized memmaps. Toshiki reported a BUG (race between delayed initialization of ZONE_DEVICE memmaps without holding the memory hotplug lock and concurrent zone shrinking). https://lkml.org/lkml/2019/11/14/1040 "Iteration of create and destroy namespace causes the panic as below: kernel BUG at mm/page_alloc.c:535! CPU: 7 PID: 2766 Comm: ndctl Not tainted 5.4.0-rc4 #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:set_pfnblock_flags_mask+0x95/0xf0 Call Trace: memmap_init_zone_device+0x165/0x17c memremap_pages+0x4c1/0x540 devm_memremap_pages+0x1d/0x60 pmem_attach_disk+0x16b/0x600 [nd_pmem] nvdimm_bus_probe+0x69/0x1c0 really_probe+0x1c2/0x3e0 driver_probe_device+0xb4/0x100 device_driver_attach+0x4f/0x60 bind_store+0xc9/0x110 kernfs_fop_write+0x116/0x190 vfs_write+0xa5/0x1a0 ksys_write+0x59/0xd0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 While creating a namespace and initializing memmap, if you destroy the namespace and shrink the zone, it will initialize the memmap outside the zone and trigger VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page) in set_pfnblock_flags_mask()." This BUG is also mitigated by this commit, where we for now stop to shrink the ZONE_DEVICE zone until we can do it in a safe and clean way. Link: http://lkml.kernel.org/r/20191006085646.5768-5-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reported-by: Aneesh Kumar K.V Reported-by: Toshiki Fukasawa Cc: Oscar Salvador Cc: David Hildenbrand Cc: Michal Hocko Cc: Pavel Tatashin Cc: Dan Williams Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Damian Tometzki Cc: Dave Hansen Cc: Fenghua Yu Cc: Gerald Schaefer Cc: Greg Kroah-Hartman Cc: Halil Pasic Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jun Yao Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: "Matthew Wilcox (Oracle)" Cc: Mel Gorman Cc: Michael Ellerman Cc: Mike Rapoport Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Robin Murphy Cc: Steve Capper Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Wei Yang Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: David Hildenbrand Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e368a4e0c7cb..2d6626ab29d1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -343,12 +343,8 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; - for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(start_pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(start_pfn))) continue; if (unlikely(pfn_to_nid(start_pfn) != nid)) @@ -368,15 +364,12 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; unsigned long pfn; /* pfn is the end pfn of a memory section. */ pfn = end_pfn - 1; for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(pfn))) continue; if (unlikely(pfn_to_nid(pfn) != nid)) @@ -398,7 +391,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ unsigned long zone_end_pfn = z; unsigned long pfn; - struct mem_section *ms; int nid = zone_to_nid(zone); zone_span_writelock(zone); @@ -436,9 +428,7 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, */ pfn = zone_start_pfn; for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(pfn))) continue; if (page_zone(pfn_to_page(pfn)) != zone) @@ -494,6 +484,16 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) int nr_pages = PAGES_PER_SECTION; unsigned long flags; +#ifdef CONFIG_ZONE_DEVICE + /* + * Zone shrinking code cannot properly deal with ZONE_DEVICE. So + * we will not try to shrink the zones - which is okay as + * set_zone_contiguous() cannot deal with ZONE_DEVICE either way. + */ + if (zone_idx(zone) == ZONE_DEVICE) + return; +#endif + pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); update_pgdat_span(pgdat); From 14c047598f4a2b4f6343fdf383de6770bca80ce8 Mon Sep 17 00:00:00 2001 From: Max Uvarov Date: Tue, 28 May 2019 13:00:49 +0300 Subject: [PATCH 1106/3715] net: phy: dp83867: fix speed 10 in sgmii mode commit 333061b924539c0de081339643f45514f5f1c1e6 upstream. For supporting 10Mps speed in SGMII mode DP83867_10M_SGMII_RATE_ADAPT bit of DP83867_10M_SGMII_CFG register has to be cleared by software. That does not affect speeds 100 and 1000 so can be done on init. Signed-off-by: Max Uvarov Cc: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller [ adapted for kernels without phy_modify_mmd ] Signed-off-by: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83867.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 12b09e6e03ba..81106314e6da 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -37,6 +37,8 @@ #define DP83867_STRAP_STS1 0x006E #define DP83867_RGMIIDCTL 0x0086 #define DP83867_IO_MUX_CFG 0x0170 +#define DP83867_10M_SGMII_CFG 0x016F +#define DP83867_10M_SGMII_RATE_ADAPT_MASK BIT(7) #define DP83867_SW_RESET BIT(15) #define DP83867_SW_RESTART BIT(14) @@ -283,6 +285,23 @@ static int dp83867_config_init(struct phy_device *phydev) } } + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + /* For support SPEED_10 in SGMII mode + * DP83867_10M_SGMII_RATE_ADAPT bit + * has to be cleared by software. That + * does not affect SPEED_100 and + * SPEED_1000. + */ + val = phy_read_mmd(phydev, DP83867_DEVADDR, + DP83867_10M_SGMII_CFG); + val &= ~DP83867_10M_SGMII_RATE_ADAPT_MASK; + ret = phy_write_mmd(phydev, DP83867_DEVADDR, + DP83867_10M_SGMII_CFG, val); + + if (ret) + return ret; + } + /* Enable Interrupt output INT_OE in CFG3 register */ if (phy_interrupt_is_valid(phydev)) { val = phy_read(phydev, DP83867_CFG3); From 60d489b574e765465e2ca9c730588383e8d8d1a8 Mon Sep 17 00:00:00 2001 From: Max Uvarov Date: Tue, 28 May 2019 13:00:50 +0300 Subject: [PATCH 1107/3715] net: phy: dp83867: increase SGMII autoneg timer duration commit 1a97a477e666cbdededab93bd3754e508f0c09d7 upstream. After reset SGMII Autoneg timer is set to 2us (bits 6 and 5 are 01). That is not enough to finalize autonegatiation on some devices. Increase this timer duration to maximum supported 16ms. Signed-off-by: Max Uvarov Cc: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller [ adapted for kernels without phy_modify_mmd ] Signed-off-by: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83867.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 81106314e6da..e03e91d5f1b1 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -33,6 +33,12 @@ /* Extended Registers */ #define DP83867_CFG4 0x0031 +#define DP83867_CFG4_SGMII_ANEG_MASK (BIT(5) | BIT(6)) +#define DP83867_CFG4_SGMII_ANEG_TIMER_11MS (3 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_800US (2 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_2US (1 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_16MS (0 << 5) + #define DP83867_RGMIICTL 0x0032 #define DP83867_STRAP_STS1 0x006E #define DP83867_RGMIIDCTL 0x0086 @@ -300,6 +306,18 @@ static int dp83867_config_init(struct phy_device *phydev) if (ret) return ret; + + /* After reset SGMII Autoneg timer is set to 2us (bits 6 and 5 + * are 01). That is not enough to finalize autoneg on some + * devices. Increase this timer duration to maximum 16ms. + */ + val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4); + val &= ~DP83867_CFG4_SGMII_ANEG_MASK; + val |= DP83867_CFG4_SGMII_ANEG_TIMER_16MS; + ret = phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, val); + + if (ret) + return ret; } /* Enable Interrupt output INT_OE in CFG3 register */ From ff711a424576865f96337a29f02f932a84015d4e Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 7 Aug 2018 16:33:48 +0530 Subject: [PATCH 1108/3715] arm64: fix for bad_mode() handler to always result in panic commit e4ba15debcfd27f60d43da940a58108783bff2a6 upstream. The bad_mode() handler is called if we encounter an uunknown exception, with the expectation that the subsequent call to panic() will halt the system. Unfortunately, if the exception calling bad_mode() is taken from EL0, then the call to die() can end up killing the current user task and calling schedule() instead of falling through to panic(). Remove the die() call altogether, since we really want to bring down the machine in this "impossible" case. Signed-off-by: Hari Vyas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a4e49e947684..5ae9c86c30d1 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -648,7 +648,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_irq_disable(); panic("bad mode"); } From 5b07222df0e9c2ecee316bcec2f9b3c2ecb937dd Mon Sep 17 00:00:00 2001 From: Bo Yan Date: Tue, 23 Jan 2018 13:57:55 -0800 Subject: [PATCH 1109/3715] cpufreq: Skip cpufreq resume if it's not suspended commit 703cbaa601ff3fb554d1246c336ba727cc083ea0 upstream. cpufreq_resume can be called even without preceding cpufreq_suspend. This can happen in following scenario: suspend_devices_and_enter --> dpm_suspend_start --> dpm_prepare --> device_prepare : this function errors out --> dpm_suspend: this is skipped due to dpm_prepare failure this means cpufreq_suspend is skipped over --> goto Recover_platform, due to previous error --> goto Resume_devices --> dpm_resume_end --> dpm_resume --> cpufreq_resume In case schedutil is used as frequency governor, cpufreq_resume will eventually call sugov_start, which does following: memset(sg_cpu, 0, sizeof(*sg_cpu)); .... This effectively erases function pointer for frequency update, causing crash later on. The function pointer would have been set correctly if subsequent cpufreq_add_update_util_hook runs successfully, but that function returns earlier because cpufreq_suspend was not called: if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu))) return; The fix is to check cpufreq_suspended first, if it's false, that means cpufreq_suspend was not called in the first place, so do not resume cpufreq. Signed-off-by: Bo Yan Acked-by: Viresh Kumar [ rjw: Dropped printing a message ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4aa3c5331666..52fc08a92bb9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1673,6 +1673,9 @@ void cpufreq_resume(void) if (!cpufreq_driver) return; + if (unlikely(!cpufreq_suspended)) + return; + cpufreq_suspended = false; if (!has_target() && !cpufreq_driver->resume) From bebf269c6d48a2a495697238e89d45a3559fbfc7 Mon Sep 17 00:00:00 2001 From: Gang He Date: Fri, 2 Nov 2018 15:48:03 -0700 Subject: [PATCH 1110/3715] ocfs2: remove ocfs2_is_o2cb_active() commit a634644751c46238df58bbfe992e30c1668388db upstream. Remove ocfs2_is_o2cb_active(). We have similar functions to identify which cluster stack is being used via osb->osb_cluster_stack. Secondly, the current implementation of ocfs2_is_o2cb_active() is not totally safe. Based on the design of stackglue, we need to get ocfs2_stack_lock before using ocfs2_stack related data structures, and that active_stack pointer can be NULL in the case of mount failure. Link: http://lkml.kernel.org/r/1495441079-11708-1-git-send-email-ghe@suse.com Signed-off-by: Gang He Reviewed-by: Joseph Qi Reviewed-by: Eric Ren Acked-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 2 +- fs/ocfs2/stackglue.c | 6 ------ fs/ocfs2/stackglue.h | 3 --- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5193218f5889..e961015fb484 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3422,7 +3422,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, * we can recover correctly from node failure. Otherwise, we may get * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. */ - if (!ocfs2_is_o2cb_active() && + if (ocfs2_userspace_stack(osb) && lockres->l_ops->flags & LOCK_TYPE_USES_LVB) lvb = 1; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index d6c350ba25b9..c4b029c43464 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; -inline int ocfs2_is_o2cb_active(void) -{ - return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); -} -EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); - static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index e3036e1790e8..f2dce10fae54 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ -int ocfs2_is_o2cb_active(void); - extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ From 81fdf5046fabe9c9936a0fbcf4fc0f64f88e6ec3 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Aug 2019 14:30:07 +0100 Subject: [PATCH 1111/3715] ARM: 8904/1: skip nomap memblocks while finding the lowmem/highmem boundary commit 1d31999cf04c21709f72ceb17e65b54a401330da upstream. adjust_lowmem_bounds() checks every memblocks in order to find the boundary between lowmem and highmem. However some memblocks could be marked as NOMAP so they are not used by kernel, which should be skipped while calculating the boundary. Signed-off-by: Chester Lin Reviewed-by: Mike Rapoport Signed-off-by: Russell King Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 70e560cf8ca0..d8cbe772f690 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1195,6 +1195,9 @@ void __init adjust_lowmem_bounds(void) phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; + if (memblock_is_nomap(reg)) + continue; + if (reg->base < vmalloc_limit) { if (block_end > lowmem_limit) /* From 578c60fb4ac9d4211577d73dd52c8b01375736c1 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 22 Oct 2019 17:04:11 +0300 Subject: [PATCH 1112/3715] ARC: perf: Accommodate big-endian CPU commit 5effc09c4907901f0e71e68e5f2e14211d9a203f upstream. 8-letter strings representing ARC perf events are stores in two 32-bit registers as ASCII characters like that: "IJMP", "IALL", "IJMPTAK" etc. And the same order of bytes in the word is used regardless CPU endianness. Which means in case of big-endian CPU core we need to swap bytes to get the same order as if it was on little-endian CPU. Otherwise we're seeing the following error message on boot: ------------------------->8---------------------- ARC perf : 8 counters (32 bits), 40 conditions, [overflow IRQ support] sysfs: cannot create duplicate filename '/devices/arc_pct/events/pmji' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.18 #3 Stack Trace: arc_unwind_core+0xd4/0xfc dump_stack+0x64/0x80 sysfs_warn_dup+0x46/0x58 sysfs_add_file_mode_ns+0xb2/0x168 create_files+0x70/0x2a0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/events/core.c:12144 perf_event_sysfs_init+0x70/0xa0 Failed to register pmu: arc_pct, reason -17 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.18 #3 Stack Trace: arc_unwind_core+0xd4/0xfc dump_stack+0x64/0x80 __warn+0x9c/0xd4 warn_slowpath_fmt+0x22/0x2c perf_event_sysfs_init+0x70/0xa0 ---[ end trace a75fb9a9837bd1ec ]--- ------------------------->8---------------------- What happens here we're trying to register more than one raw perf event with the same name "PMJI". Why? Because ARC perf events are 4 to 8 letters and encoded into two 32-bit words. In this particular case we deal with 2 events: * "IJMP____" which counts all jump & branch instructions * "IJMPC___" which counts only conditional jumps & branches Those strings are split in two 32-bit words this way "IJMP" + "____" & "IJMP" + "C___" correspondingly. Now if we read them swapped due to CPU core being big-endian then we read "PMJI" + "____" & "PMJI" + "___C". And since we interpret read array of ASCII letters as a null-terminated string on big-endian CPU we end up with 2 events of the same name "PMJI". Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 2ce24e74f879..a509b77ef80d 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -488,8 +488,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* loop thru all available h/w condition indexes */ for (j = 0; j < cc_bcr.c; j++) { write_aux_reg(ARC_REG_CC_INDEX, j); - cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); - cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0)); + cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1)); /* See if it has been mapped to a perf event_id */ for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { From fbb0e381e73094a1288cd5ec459e853ed995c358 Mon Sep 17 00:00:00 2001 From: Alexander Kapshuk Date: Tue, 24 Sep 2019 07:46:59 +0300 Subject: [PATCH 1113/3715] x86/insn: Fix awk regexp warnings commit 700c1018b86d0d4b3f1f2d459708c0cdf42b521d upstream. gawk 5.0.1 generates the following regexp warnings: GEN /home/sasha/torvalds/tools/objtool/arch/x86/lib/inat-tables.c awk: ../arch/x86/tools/gen-insn-attr-x86.awk:260: warning: regexp escape sequence `\:' is not a known regexp operator awk: ../arch/x86/tools/gen-insn-attr-x86.awk:350: (FILENAME=../arch/x86/lib/x86-opcode-map.txt FNR=41) warning: regexp escape sequence `\&' is not a known regexp operator Ealier versions of gawk are not known to generate these warnings. The gawk manual referenced below does not list characters ':' and '&' as needing escaping, so 'unescape' them. See https://www.gnu.org/software/gawk/manual/html_node/Escape-Sequences.html for more info. Running diff on the output generated by the script before and after applying the patch reported no differences. [ bp: Massage commit message. ] [ Caught the respective tools header discrepancy. ] Reported-by: kbuild test robot Signed-off-by: Alexander Kapshuk Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: "Peter Zijlstra (Intel)" Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190924044659.3785-1-alexander.kapshuk@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/tools/gen-insn-attr-x86.awk | 4 ++-- tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index From d68d0c043eaa7d2f3e1ef3070116076983026fd5 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 15 Nov 2019 11:14:44 -0500 Subject: [PATCH 1114/3715] x86/speculation: Fix incorrect MDS/TAA mitigation status commit 64870ed1b12e235cfca3f6c6da75b542c973ff78 upstream. For MDS vulnerable processors with TSX support, enabling either MDS or TAA mitigations will enable the use of VERW to flush internal processor buffers at the right code path. IOW, they are either both mitigated or both not. However, if the command line options are inconsistent, the vulnerabilites sysfs files may not report the mitigation status correctly. For example, with only the "mds=off" option: vulnerabilities/mds:Vulnerable; SMT vulnerable vulnerabilities/tsx_async_abort:Mitigation: Clear CPU buffers; SMT vulnerable The mds vulnerabilities file has wrong status in this case. Similarly, the taa vulnerability file will be wrong with mds mitigation on, but taa off. Change taa_select_mitigation() to sync up the two mitigation status and have them turned off if both "mds=off" and "tsx_async_abort=off" are present. Update documentation to emphasize the fact that both "mds=off" and "tsx_async_abort=off" have to be specified together for processors that are affected by both TAA and MDS to be effective. [ bp: Massage and add kernel-parameters.txt change too. ] Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort") Signed-off-by: Waiman Long Signed-off-by: Borislav Petkov Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: linux-doc@vger.kernel.org Cc: Mark Gross Cc: Cc: Pawan Gupta Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Tyler Hicks Cc: x86-ml Link: https://lkml.kernel.org/r/20191115161445.30809-2-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/mds.rst | 7 +++++-- .../admin-guide/hw-vuln/tsx_async_abort.rst | 5 ++++- Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++ arch/x86/kernel/cpu/bugs.c | 17 +++++++++++++++-- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index e3a796c0d3a2..2d19c9f4c1fe 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -265,8 +265,11 @@ time with the option "mds=". The valid arguments for this option are: ============ ============================================================= -Not specifying this option is equivalent to "mds=full". - +Not specifying this option is equivalent to "mds=full". For processors +that are affected by both TAA (TSX Asynchronous Abort) and MDS, +specifying just "mds=off" without an accompanying "tsx_async_abort=off" +will have no effect as the same mitigation is used for both +vulnerabilities. Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst index fddbd7579c53..af6865b822d2 100644 --- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst @@ -174,7 +174,10 @@ the option "tsx_async_abort=". The valid arguments for this option are: CPU is not vulnerable to cross-thread TAA attacks. ============ ============================================================= -Not specifying this option is equivalent to "tsx_async_abort=full". +Not specifying this option is equivalent to "tsx_async_abort=full". For +processors that are affected by both TAA and MDS, specifying just +"tsx_async_abort=off" without an accompanying "mds=off" will have no +effect as the same mitigation is used for both vulnerabilities. The kernel command line also allows to control the TSX feature using the parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 05596e05bc71..b0da6050a254 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2254,6 +2254,12 @@ SMT on vulnerable CPUs off - Unconditionally disable MDS mitigation + On TAA-affected machines, mds=off can be prevented by + an active TAA mitigation as both vulnerabilities are + mitigated with the same mechanism so in order to disable + this mitigation, you need to specify tsx_async_abort=off + too. + Not specifying this option is equivalent to mds=full. @@ -4588,6 +4594,11 @@ vulnerable to cross-thread TAA attacks. off - Unconditionally disable TAA mitigation + On MDS-affected machines, tsx_async_abort=off can be + prevented by an active MDS mitigation as both vulnerabilities + are mitigated with the same mechanism so in order to disable + this mitigation, you need to specify mds=off too. + Not specifying this option is equivalent to tsx_async_abort=full. On CPUs which are MDS affected and deploy MDS mitigation, TAA mitigation is not diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8596811843cc..c065e7f5f62f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -304,8 +304,12 @@ static void __init taa_select_mitigation(void) return; } - /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ - if (taa_mitigation == TAA_MITIGATION_OFF) + /* + * TAA mitigation via VERW is turned off if both + * tsx_async_abort=off and mds=off are specified. + */ + if (taa_mitigation == TAA_MITIGATION_OFF && + mds_mitigation == MDS_MITIGATION_OFF) goto out; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) @@ -339,6 +343,15 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); + /* + * Update MDS mitigation, if necessary, as the mds_user_clear is + * now enabled for TAA mitigation. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } out: pr_info("%s\n", taa_strings[taa_mitigation]); } From 432c339a4f6faa4a18405dfe30055bb8af3e8cba Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 15 Nov 2019 11:14:45 -0500 Subject: [PATCH 1115/3715] x86/speculation: Fix redundant MDS mitigation message commit cd5a2aa89e847bdda7b62029d94e95488d73f6b2 upstream. Since MDS and TAA mitigations are inter-related for processors that are affected by both vulnerabilities, the followiing confusing messages can be printed in the kernel log: MDS: Vulnerable MDS: Mitigation: Clear CPU buffers To avoid the first incorrect message, defer the printing of MDS mitigation after the TAA mitigation selection has been done. However, that has the side effect of printing TAA mitigation first before MDS mitigation. [ bp: Check box is affected/mitigations are disabled first before printing and massage. ] Suggested-by: Pawan Gupta Signed-off-by: Waiman Long Signed-off-by: Borislav Petkov Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Mark Gross Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Tony Luck Cc: Tyler Hicks Cc: x86-ml Link: https://lkml.kernel.org/r/20191115161445.30809-3-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c065e7f5f62f..7896a34f53b5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ @@ -108,6 +109,12 @@ void __init check_bugs(void) mds_select_mitigation(); taa_select_mitigation(); + /* + * As MDS and TAA mitigations are inter-related, print MDS + * mitigation until after TAA mitigation selection is done. + */ + mds_print_mitigation(); + arch_smt_update(); #ifdef CONFIG_X86_32 @@ -245,6 +252,12 @@ static void __init mds_select_mitigation(void) (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } +} + +static void __init mds_print_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + return; pr_info("%s\n", mds_strings[mds_mitigation]); } From 96b59fd4c7f5ca1d3c6238a11a29ddb0e98e4531 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 23 Sep 2019 15:09:58 -0500 Subject: [PATCH 1116/3715] nbd: prevent memory leak commit 03bf73c315edca28f47451913177e14cd040a216 upstream. In nbd_add_socket when krealloc succeeds, if nsock's allocation fail the reallocted memory is leak. The correct behaviour should be assigning the reallocted memory to config->socks right after success. Reviewed-by: Josef Bacik Signed-off-by: Navid Emamdoost Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a609ce1d468b..929bd255a290 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -970,14 +970,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, sockfd_put(sock); return -ENOMEM; } + + config->socks = socks; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); if (!nsock) { sockfd_put(sock); return -ENOMEM; } - config->socks = socks; - nsock->fallback_index = -1; nsock->dead = false; mutex_init(&nsock->tx_lock); From d987de580cecd4f37891460c59a2e940c238b8d2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 21 Nov 2019 11:37:10 +0100 Subject: [PATCH 1117/3715] nfc: port100: handle command failure cleanly commit 5f9f0b11f0816b35867f2cf71e54d95f53f03902 upstream. If starting the transfer of a command suceeds but the transfer for the reply fails, it is not enough to initiate killing the transfer for the command may still be running. You need to wait for the killing to finish before you can reuse URB and buffer. Reported-and-tested-by: syzbot+711468aa5c3a1eabf863@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/port100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index bb43cebda9dc..60ae382f50da 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -792,7 +792,7 @@ static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out, rc = port100_submit_urb_for_ack(dev, GFP_KERNEL); if (rc) - usb_unlink_urb(dev->out_urb); + usb_kill_urb(dev->out_urb); exit: mutex_unlock(&dev->out_urb_lock); From 2e3fa167100b2ebefa75c3b74d2336f0d5c05166 Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Mon, 9 Sep 2019 06:43:31 -0300 Subject: [PATCH 1118/3715] media: vivid: Set vid_cap_streaming and vid_out_streaming to true commit b4add02d2236fd5f568db141cfd8eb4290972eb3 upstream. When vbi stream is started, followed by video streaming, the vid_cap_streaming and vid_out_streaming were not being set to true, which would cause the video stream to stop when vbi stream is stopped. This patch allows to set vid_cap_streaming and vid_out_streaming to true. According to Hans Verkuil it appears that these 'if (dev->kthread_vid_cap)' checks are a left-over from the original vivid development and should never have been there. Signed-off-by: Vandana BN Cc: # for v3.18 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-vid-cap.c | 3 --- drivers/media/platform/vivid/vivid-vid-out.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index 4ca3d600aa84..c66568e8f388 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -239,9 +239,6 @@ static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count) if (vb2_is_streaming(&dev->vb_vid_out_q)) dev->can_loop_video = vivid_vid_can_loop(dev); - if (dev->kthread_vid_cap) - return 0; - dev->vid_cap_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); for (i = 0; i < VIDEO_MAX_FRAME; i++) diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c index 0b1b6218ede8..3e7a26d15074 100644 --- a/drivers/media/platform/vivid/vivid-vid-out.c +++ b/drivers/media/platform/vivid/vivid-vid-out.c @@ -158,9 +158,6 @@ static int vid_out_start_streaming(struct vb2_queue *vq, unsigned count) if (vb2_is_streaming(&dev->vb_vid_cap_q)) dev->can_loop_video = vivid_vid_can_loop(dev); - if (dev->kthread_vid_out) - return 0; - dev->vid_out_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); if (dev->start_streaming_error) { From 4a2c9b037a08b8496f04a76987332ca6f19bd794 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Sun, 3 Nov 2019 23:17:19 +0100 Subject: [PATCH 1119/3715] media: vivid: Fix wrong locking that causes race conditions on streaming stop commit 6dcd5d7a7a29c1e4b8016a06aed78cd650cd8c27 upstream. There is the same incorrect approach to locking implemented in vivid_stop_generating_vid_cap(), vivid_stop_generating_vid_out() and sdr_cap_stop_streaming(). These functions are called during streaming stopping with vivid_dev.mutex locked. And they all do the same mistake while stopping their kthreads, which need to lock this mutex as well. See the example from vivid_stop_generating_vid_cap(): /* shutdown control thread */ vivid_grab_controls(dev, false); mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_vid_cap); dev->kthread_vid_cap = NULL; mutex_lock(&dev->mutex); But when this mutex is unlocked, another vb2_fop_read() can lock it instead of vivid_thread_vid_cap() and manipulate the buffer queue. That causes a use-after-free access later. To fix those issues let's: 1. avoid unlocking the mutex in vivid_stop_generating_vid_cap(), vivid_stop_generating_vid_out() and sdr_cap_stop_streaming(); 2. use mutex_trylock() with schedule_timeout_uninterruptible() in the loops of the vivid kthread handlers. Signed-off-by: Alexander Popov Acked-by: Linus Torvalds Tested-by: Hans Verkuil Signed-off-by: Hans Verkuil Cc: # for v3.18 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-kthread-cap.c | 8 +++++--- drivers/media/platform/vivid/vivid-kthread-out.c | 8 +++++--- drivers/media/platform/vivid/vivid-sdr-cap.c | 8 +++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/vivid/vivid-kthread-cap.c b/drivers/media/platform/vivid/vivid-kthread-cap.c index d300e5e7eadc..2ca9c928ed2f 100644 --- a/drivers/media/platform/vivid/vivid-kthread-cap.c +++ b/drivers/media/platform/vivid/vivid-kthread-cap.c @@ -777,7 +777,11 @@ static int vivid_thread_vid_cap(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->cap_seq_resync) { dev->jiffies_vid_cap = cur_jiffies; @@ -930,8 +934,6 @@ void vivid_stop_generating_vid_cap(struct vivid_dev *dev, bool *pstreaming) /* shutdown control thread */ vivid_grab_controls(dev, false); - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_vid_cap); dev->kthread_vid_cap = NULL; - mutex_lock(&dev->mutex); } diff --git a/drivers/media/platform/vivid/vivid-kthread-out.c b/drivers/media/platform/vivid/vivid-kthread-out.c index 7c8d75852816..ed5d8fb854b4 100644 --- a/drivers/media/platform/vivid/vivid-kthread-out.c +++ b/drivers/media/platform/vivid/vivid-kthread-out.c @@ -147,7 +147,11 @@ static int vivid_thread_vid_out(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->out_seq_resync) { dev->jiffies_vid_out = cur_jiffies; @@ -301,8 +305,6 @@ void vivid_stop_generating_vid_out(struct vivid_dev *dev, bool *pstreaming) /* shutdown control thread */ vivid_grab_controls(dev, false); - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_vid_out); dev->kthread_vid_out = NULL; - mutex_lock(&dev->mutex); } diff --git a/drivers/media/platform/vivid/vivid-sdr-cap.c b/drivers/media/platform/vivid/vivid-sdr-cap.c index ebd7b9c4dd83..4f49c9a47d49 100644 --- a/drivers/media/platform/vivid/vivid-sdr-cap.c +++ b/drivers/media/platform/vivid/vivid-sdr-cap.c @@ -149,7 +149,11 @@ static int vivid_thread_sdr_cap(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->sdr_cap_seq_resync) { dev->jiffies_sdr_cap = cur_jiffies; @@ -309,10 +313,8 @@ static void sdr_cap_stop_streaming(struct vb2_queue *vq) } /* shutdown control thread */ - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_sdr_cap); dev->kthread_sdr_cap = NULL; - mutex_lock(&dev->mutex); } const struct vb2_ops vivid_sdr_cap_qops = { From 0ab8d923cf428efa748bf7a57347b2437b0a5778 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 7 Oct 2019 12:09:53 -0300 Subject: [PATCH 1120/3715] media: usbvision: Fix races among open, close, and disconnect commit 9e08117c9d4efc1e1bc6fce83dab856d9fd284b6 upstream. Visual inspection of the usbvision driver shows that it suffers from three races between its open, close, and disconnect handlers. In particular, the driver is careful to update its usbvision->user and usbvision->remove_pending flags while holding the private mutex, but: usbvision_v4l2_close() and usbvision_radio_close() don't hold the mutex while they check the value of usbvision->remove_pending; usbvision_disconnect() doesn't hold the mutex while checking the value of usbvision->user; and also, usbvision_v4l2_open() and usbvision_radio_open() don't check whether the device has been unplugged before allowing the user to open the device files. Each of these can potentially lead to usbvision_release() being called twice and use-after-free errors. This patch fixes the races by reading the flags while the mutex is still held and checking for pending removes before allowing an open to succeed. Signed-off-by: Alan Stern CC: Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbvision/usbvision-video.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index 960272d3c924..4c39c502d616 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -328,6 +328,10 @@ static int usbvision_v4l2_open(struct file *file) if (mutex_lock_interruptible(&usbvision->v4l2_lock)) return -ERESTARTSYS; + if (usbvision->remove_pending) { + err_code = -ENODEV; + goto unlock; + } if (usbvision->user) { err_code = -EBUSY; } else { @@ -391,6 +395,7 @@ unlock: static int usbvision_v4l2_close(struct file *file) { struct usb_usbvision *usbvision = video_drvdata(file); + int r; PDEBUG(DBG_IO, "close"); @@ -405,9 +410,10 @@ static int usbvision_v4l2_close(struct file *file) usbvision_scratch_free(usbvision); usbvision->user--; + r = usbvision->remove_pending; mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->remove_pending) { + if (r) { printk(KERN_INFO "%s: Final disconnect\n", __func__); usbvision_release(usbvision); return 0; @@ -1091,6 +1097,11 @@ static int usbvision_radio_open(struct file *file) if (mutex_lock_interruptible(&usbvision->v4l2_lock)) return -ERESTARTSYS; + + if (usbvision->remove_pending) { + err_code = -ENODEV; + goto out; + } err_code = v4l2_fh_open(file); if (err_code) goto out; @@ -1123,6 +1134,7 @@ out: static int usbvision_radio_close(struct file *file) { struct usb_usbvision *usbvision = video_drvdata(file); + int r; PDEBUG(DBG_IO, ""); @@ -1135,9 +1147,10 @@ static int usbvision_radio_close(struct file *file) usbvision_audio_off(usbvision); usbvision->radio = 0; usbvision->user--; + r = usbvision->remove_pending; mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->remove_pending) { + if (r) { printk(KERN_INFO "%s: Final disconnect\n", __func__); v4l2_fh_release(file); usbvision_release(usbvision); @@ -1562,6 +1575,7 @@ err_usb: static void usbvision_disconnect(struct usb_interface *intf) { struct usb_usbvision *usbvision = to_usbvision(usb_get_intfdata(intf)); + int u; PDEBUG(DBG_PROBE, ""); @@ -1578,13 +1592,14 @@ static void usbvision_disconnect(struct usb_interface *intf) v4l2_device_disconnect(&usbvision->v4l2_dev); usbvision_i2c_unregister(usbvision); usbvision->remove_pending = 1; /* Now all ISO data will be ignored */ + u = usbvision->user; usb_put_dev(usbvision->dev); usbvision->dev = NULL; /* USB device is no more */ mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->user) { + if (u) { printk(KERN_INFO "%s: In use, disconnect pending\n", __func__); wake_up_interruptible(&usbvision->wait_frame); From af6bf45e59b0899bc77b0dccd47f476d1f6f1832 Mon Sep 17 00:00:00 2001 From: Kai Shen Date: Thu, 7 Nov 2019 05:08:17 +0000 Subject: [PATCH 1121/3715] cpufreq: Add NULL checks to show() and store() methods of cpufreq commit e6e8df07268c1f75dd9215536e2ce4587b70f977 upstream. Add NULL checks to show() and store() in cpufreq.c to avoid attempts to invoke a NULL callback. Though some interfaces of cpufreq are set as read-only, users can still get write permission using chmod which can lead to a kernel crash, as follows: chmod +w /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq echo 1 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq This bug was found in linux 4.19. Signed-off-by: Kai Shen Reported-by: Feilong Lin Reviewed-by: Feilong Lin Acked-by: Viresh Kumar [ rjw: Subject & changelog ] Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 52fc08a92bb9..480e8c13567c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -911,6 +911,9 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; + if (!fattr->show) + return -EIO; + down_read(&policy->rwsem); ret = fattr->show(policy, buf); up_read(&policy->rwsem); @@ -925,6 +928,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + if (!fattr->store) + return -EIO; + cpus_read_lock(); if (cpu_online(policy->cpu)) { From a96430cca803da3b353ede3231692eea22d86715 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 29 Jul 2019 23:14:55 -0300 Subject: [PATCH 1122/3715] media: uvcvideo: Fix error path in control parsing failure commit 8c279e9394cade640ed86ec6c6645a0e7df5e0b6 upstream. When parsing the UVC control descriptors fails, the error path tries to cleanup a media device that hasn't been initialised, potentially resulting in a crash. Fix this by initialising the media device before the error handling path can be reached. Fixes: 5a254d751e52 ("[media] uvcvideo: Register a v4l2_device") Reported-by: syzbot+c86454eb3af9e8a4da20@syzkaller.appspotmail.com Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index c2939d080997..6445b638f207 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -2059,6 +2059,20 @@ static int uvc_probe(struct usb_interface *intf, sizeof(dev->name) - len); } + /* Initialize the media device. */ +#ifdef CONFIG_MEDIA_CONTROLLER + dev->mdev.dev = &intf->dev; + strscpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); + if (udev->serial) + strscpy(dev->mdev.serial, udev->serial, + sizeof(dev->mdev.serial)); + usb_make_path(udev, dev->mdev.bus_info, sizeof(dev->mdev.bus_info)); + dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); + media_device_init(&dev->mdev); + + dev->vdev.mdev = &dev->mdev; +#endif + /* Parse the Video Class control descriptor. */ if (uvc_parse_control(dev) < 0) { uvc_trace(UVC_TRACE_PROBE, "Unable to parse UVC " @@ -2079,19 +2093,7 @@ static int uvc_probe(struct usb_interface *intf, "linux-uvc-devel mailing list.\n"); } - /* Initialize the media device and register the V4L2 device. */ -#ifdef CONFIG_MEDIA_CONTROLLER - dev->mdev.dev = &intf->dev; - strlcpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); - if (udev->serial) - strlcpy(dev->mdev.serial, udev->serial, - sizeof(dev->mdev.serial)); - strcpy(dev->mdev.bus_info, udev->devpath); - dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); - media_device_init(&dev->mdev); - - dev->vdev.mdev = &dev->mdev; -#endif + /* Register the V4L2 device. */ if (v4l2_device_register(&intf->dev, &dev->vdev) < 0) goto error; From 9a611c200ee2777bb3551ee1d979bf923b93653a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 30 Jul 2019 09:48:27 +0200 Subject: [PATCH 1123/3715] media: b2c2-flexcop-usb: add sanity checking commit 1b976fc6d684e3282914cdbe7a8d68fdce19095c upstream. The driver needs an isochronous endpoint to be present. It will oops in its absence. Add checking for it. Reported-by: syzbot+d93dff37e6a89431c158@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/b2c2/flexcop-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index a8f3169e30b3..ac4fddfd0a43 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -537,6 +537,9 @@ static int flexcop_usb_probe(struct usb_interface *intf, struct flexcop_device *fc = NULL; int ret; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + if ((fc = flexcop_device_kmalloc(sizeof(struct flexcop_usb))) == NULL) { err("out of memory\n"); return -ENOMEM; From 06f4d7ae2c425114ae5abc9ce0becc4a86f09df5 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Sun, 13 Oct 2019 23:08:45 -0300 Subject: [PATCH 1124/3715] media: cxusb: detect cxusb_ctrl_msg error in query commit ca8f245f284eeffa56f3b7a5eb6fc503159ee028 upstream. Don't use uninitialized ircode[] in cxusb_rc_query() when cxusb_ctrl_msg() fails to populate its contents. syzbot reported: dvb-usb: bulk message failed: -22 (1/-30591) ===================================================== BUG: KMSAN: uninit-value in ir_lookup_by_scancode drivers/media/rc/rc-main.c:494 [inline] BUG: KMSAN: uninit-value in rc_g_keycode_from_table drivers/media/rc/rc-main.c:582 [inline] BUG: KMSAN: uninit-value in rc_keydown+0x1a6/0x6f0 drivers/media/rc/rc-main.c:816 CPU: 1 PID: 11436 Comm: kworker/1:2 Not tainted 5.3.0-rc7+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events dvb_usb_read_remote_control Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x13a/0x2b0 mm/kmsan/kmsan_report.c:108 __msan_warning+0x73/0xe0 mm/kmsan/kmsan_instr.c:250 bsearch+0x1dd/0x250 lib/bsearch.c:41 ir_lookup_by_scancode drivers/media/rc/rc-main.c:494 [inline] rc_g_keycode_from_table drivers/media/rc/rc-main.c:582 [inline] rc_keydown+0x1a6/0x6f0 drivers/media/rc/rc-main.c:816 cxusb_rc_query+0x2e1/0x360 drivers/media/usb/dvb-usb/cxusb.c:548 dvb_usb_read_remote_control+0xf9/0x290 drivers/media/usb/dvb-usb/dvb-usb-remote.c:261 process_one_work+0x1572/0x1ef0 kernel/workqueue.c:2269 worker_thread+0x111b/0x2460 kernel/workqueue.c:2415 kthread+0x4b5/0x4f0 kernel/kthread.c:256 ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:355 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:150 [inline] kmsan_internal_chain_origin+0xd2/0x170 mm/kmsan/kmsan.c:314 __msan_chain_origin+0x6b/0xe0 mm/kmsan/kmsan_instr.c:184 rc_g_keycode_from_table drivers/media/rc/rc-main.c:583 [inline] rc_keydown+0x2c4/0x6f0 drivers/media/rc/rc-main.c:816 cxusb_rc_query+0x2e1/0x360 drivers/media/usb/dvb-usb/cxusb.c:548 dvb_usb_read_remote_control+0xf9/0x290 drivers/media/usb/dvb-usb/dvb-usb-remote.c:261 process_one_work+0x1572/0x1ef0 kernel/workqueue.c:2269 worker_thread+0x111b/0x2460 kernel/workqueue.c:2415 kthread+0x4b5/0x4f0 kernel/kthread.c:256 ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:355 Local variable description: ----ircode@cxusb_rc_query Variable was created at: cxusb_rc_query+0x4d/0x360 drivers/media/usb/dvb-usb/cxusb.c:543 dvb_usb_read_remote_control+0xf9/0x290 drivers/media/usb/dvb-usb/dvb-usb-remote.c:261 Signed-off-by: Vito Caputo Reported-by: syzbot Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/cxusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index cfe86b4864b3..47a9a791ee7d 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -455,7 +455,8 @@ static int cxusb_rc_query(struct dvb_usb_device *d) { u8 ircode[4]; - cxusb_ctrl_msg(d, CMD_GET_IR_CODE, NULL, 0, ircode, 4); + if (cxusb_ctrl_msg(d, CMD_GET_IR_CODE, NULL, 0, ircode, 4) < 0) + return 0; if (ircode[2] || ircode[3]) rc_keydown(d->rc_dev, RC_PROTO_NEC, From 2d4c3e5c180f0f91cdc41cf65675bd768401b584 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 16 Oct 2019 14:19:15 -0300 Subject: [PATCH 1125/3715] media: imon: invalid dereference in imon_touch_event commit f3f5ba42c58d56d50f539854d8cc188944e96087 upstream. The touch timer is set up in intf1. If the second interface does not exist, the timer and touch input device are not setup and we get the following error, when touch events are reported via intf0. kernel BUG at kernel/time/timer.c:956! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc1+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__mod_timer kernel/time/timer.c:956 [inline] RIP: 0010:__mod_timer kernel/time/timer.c:949 [inline] RIP: 0010:mod_timer+0x5a2/0xb50 kernel/time/timer.c:1100 Code: 45 10 c7 44 24 14 ff ff ff ff 48 89 44 24 08 48 8d 45 20 48 c7 44 24 18 00 00 00 00 48 89 04 24 e9 5a fc ff ff e8 ae ce 0e 00 <0f> 0b e8 a7 ce 0e 00 4c 89 74 24 20 e9 37 fe ff ff e8 98 ce 0e 00 RSP: 0018:ffff8881db209930 EFLAGS: 00010006 RAX: ffffffff86c2b200 RBX: 00000000ffffa688 RCX: ffffffff83efc583 RDX: 0000000000000100 RSI: ffffffff812f4d82 RDI: ffff8881d2356200 RBP: ffff8881d23561e8 R08: ffffffff86c2b200 R09: ffffed103a46abeb R10: ffffed103a46abea R11: ffff8881d2355f53 R12: dffffc0000000000 R13: 1ffff1103b64132d R14: ffff8881d2355f50 R15: 0000000000000006 FS: 0000000000000000(0000) GS:ffff8881db200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f75e2799000 CR3: 00000001d3b07000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: imon_touch_event drivers/media/rc/imon.c:1348 [inline] imon_incoming_packet.isra.0+0x2546/0x2f10 drivers/media/rc/imon.c:1603 usb_rx_callback_intf0+0x151/0x1e0 drivers/media/rc/imon.c:1734 __usb_hcd_giveback_urb+0x1f2/0x470 drivers/usb/core/hcd.c:1654 usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1719 dummy_timer+0x120f/0x2fa2 drivers/usb/gadget/udc/dummy_hcd.c:1965 call_timer_fn+0x179/0x650 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0x5e3/0x1490 kernel/time/timer.c:1786 __do_softirq+0x221/0x912 kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x178/0x1a0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x12f/0x500 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 RIP: 0010:default_idle+0x28/0x2e0 arch/x86/kernel/process.c:581 Code: 90 90 41 56 41 55 65 44 8b 2d 44 3a 8f 7a 41 54 55 53 0f 1f 44 00 00 e8 36 ee d0 fb e9 07 00 00 00 0f 00 2d fa dd 4f 00 fb f4 <65> 44 8b 2d 20 3a 8f 7a 0f 1f 44 00 00 5b 5d 41 5c 41 5d 41 5e c3 RSP: 0018:ffffffff86c07da8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000007 RBX: ffffffff86c2b200 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffffffff86c2ba4c RBP: fffffbfff0d85640 R08: ffffffff86c2b200 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x3b6/0x500 kernel/sched/idle.c:263 cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:355 start_kernel+0x82a/0x864 init/main.c:784 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Modules linked in: Reported-by: syzbot+f49d12d34f2321cf4df2@syzkaller.appspotmail.com Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/imon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index a7547c88e4c3..edf8a7a76e86 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -1737,8 +1737,7 @@ static void imon_incoming_scancode(struct imon_context *ictx, spin_unlock_irqrestore(&ictx->kc_lock, flags); /* send touchscreen events through input subsystem if touchpad data */ - if (ictx->display_type == IMON_DISPLAY_TYPE_VGA && len == 8 && - buf[7] == 0x86) { + if (ictx->touch && len == 8 && buf[7] == 0x86) { imon_touch_event(ictx, buf); return; From 9870e74477c1db3234c716dccd0ccd157f3ee69d Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 14 Nov 2019 13:46:46 +0100 Subject: [PATCH 1126/3715] virtio_ring: fix return code on DMA mapping fails [ Upstream commit f7728002c1c7bfa787b276a31c3ef458739b8e7c ] Commit 780bc7903a32 ("virtio_ring: Support DMA APIs") makes virtqueue_add() return -EIO when we fail to map our I/O buffers. This is a very realistic scenario for guests with encrypted memory, as swiotlb may run out of space, depending on it's size and the I/O load. The virtio-blk driver interprets -EIO form virtqueue_add() as an IO error, despite the fact that swiotlb full is in absence of bugs a recoverable condition. Let us change the return code to -ENOMEM, and make the block layer recover form these failures when virtio-blk encounters the condition described above. Cc: stable@vger.kernel.org Fixes: 780bc7903a32 ("virtio_ring: Support DMA APIs") Signed-off-by: Halil Pasic Tested-by: Michael Mueller Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cc9d421c0929..b82bb0b08161 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -432,7 +432,7 @@ unmap_release: kfree(desc); END_USE(vq); - return -EIO; + return -ENOMEM; } /** From 0fbf00e767ebe0c2e1975fb0545c67ff6ef4a8a7 Mon Sep 17 00:00:00 2001 From: Hewenliang Date: Fri, 25 Oct 2019 00:35:15 -0400 Subject: [PATCH 1127/3715] usbip: tools: fix fd leakage in the function of read_attr_usbip_status commit 26a4d4c00f85cb844dd11dd35e848b079c2f5e8f upstream. We should close the fd before the return of read_attr_usbip_status. Fixes: 3391ba0e2792 ("usbip: tools: Extract generic code to be shared with vudc backend") Signed-off-by: Hewenliang Cc: stable Link: https://lore.kernel.org/r/20191025043515.20053-1-hewenliang4@huawei.com Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_host_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index f5ad219a324e..4bb905925b0e 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -69,7 +69,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) } value = atoi(status); - + close(fd); return value; } From e7074b64f478269227997afcb67b385ec5ceb056 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Mon, 11 Nov 2019 23:10:35 +0900 Subject: [PATCH 1128/3715] usbip: Fix uninitialized symbol 'nents' in stub_recv_cmd_submit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2a9125317b247f2cf35c196f968906dcf062ae2d upstream. Smatch reported that nents is not initialized and used in stub_recv_cmd_submit(). nents is currently initialized by sgl_alloc() and used to allocate multiple URBs when host controller doesn't support scatter-gather DMA. The use of uninitialized nents means that buf_len is zero and use_sg is true. But buffer length should not be zero when an URB uses scatter-gather DMA. To prevent this situation, add the conditional that checks buf_len and use_sg. And move the use of nents right after the sgl_alloc() to avoid the use of uninitialized nents. If the error occurs, it adds SDEV_EVENT_ERROR_MALLOC and stub_priv will be released by stub event handler and connection will be shut down. Fixes: ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Suwan Kim Acked-by: Shuah Khan Cc: stable Link: https://lore.kernel.org/r/20191111141035.27788-1-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 50 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 8812d3edade1..cb24b22252e4 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -487,18 +487,50 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, if (pipe == -1) return; + /* + * Smatch reported the error case where use_sg is true and buf_len is 0. + * In this case, It adds SDEV_EVENT_ERROR_MALLOC and stub_priv will be + * released by stub event handler and connection will be shut down. + */ priv = stub_priv_alloc(sdev, pdu); if (!priv) return; buf_len = (unsigned long long)pdu->u.cmd_submit.transfer_buffer_length; + if (use_sg && !buf_len) { + dev_err(&udev->dev, "sg buffer with zero length\n"); + goto err_malloc; + } + /* allocate urb transfer buffer, if needed */ if (buf_len) { if (use_sg) { sgl = sgl_alloc(buf_len, GFP_KERNEL, &nents); if (!sgl) goto err_malloc; + + /* Check if the server's HCD supports SG */ + if (!udev->bus->sg_tablesize) { + /* + * If the server's HCD doesn't support SG, break + * a single SG request into several URBs and map + * each SG list entry to corresponding URB + * buffer. The previously allocated SG list is + * stored in priv->sgl (If the server's HCD + * support SG, SG list is stored only in + * urb->sg) and it is used as an indicator that + * the server split single SG request into + * several URBs. Later, priv->sgl is used by + * stub_complete() and stub_send_ret_submit() to + * reassemble the divied URBs. + */ + support_sg = 0; + num_urbs = nents; + priv->completed_urbs = 0; + pdu->u.cmd_submit.transfer_flags &= + ~URB_DMA_MAP_SG; + } } else { buffer = kzalloc(buf_len, GFP_KERNEL); if (!buffer) @@ -506,24 +538,6 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } } - /* Check if the server's HCD supports SG */ - if (use_sg && !udev->bus->sg_tablesize) { - /* - * If the server's HCD doesn't support SG, break a single SG - * request into several URBs and map each SG list entry to - * corresponding URB buffer. The previously allocated SG - * list is stored in priv->sgl (If the server's HCD support SG, - * SG list is stored only in urb->sg) and it is used as an - * indicator that the server split single SG request into - * several URBs. Later, priv->sgl is used by stub_complete() and - * stub_send_ret_submit() to reassemble the divied URBs. - */ - support_sg = 0; - num_urbs = nents; - priv->completed_urbs = 0; - pdu->u.cmd_submit.transfer_flags &= ~URB_DMA_MAP_SG; - } - /* allocate urb array */ priv->num_urbs = num_urbs; priv->urbs = kmalloc_array(num_urbs, sizeof(*priv->urbs), GFP_KERNEL); From 5d1c57e674edc78d5781b2f20c5d55194e7422f6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Nov 2019 10:21:19 +0100 Subject: [PATCH 1129/3715] usb-serial: cp201x: support Mark-10 digital force gauge commit 347bc8cb26388791c5881a3775cb14a3f765a674 upstream. Add support for the Mark-10 digital force gauge device to the cp201x driver. Based on a report and a larger patch from Joel Jennings Reported-by: Joel Jennings Cc: stable Acked-by: Johan Hovold Link: https://lore.kernel.org/r/20191118092119.GA153852@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 98e466c3cfca..8dd9852f399d 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -128,6 +128,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */ { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ + { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */ { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */ { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ From 79ac186da1ffd968dd8b0ac05364f5dc973f4365 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 Nov 2019 15:28:55 +0100 Subject: [PATCH 1130/3715] USB: chaoskey: fix error case of a timeout commit 92aa5986f4f7b5a8bf282ca0f50967f4326559f5 upstream. In case of a timeout or if a signal aborts a read communication with the device needs to be ended lest we overwrite an active URB the next time we do IO to the device, as the URB may still be active. Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20191107142856.16774-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/chaoskey.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index eb0795c5ff7a..3a701c1e9e75 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -396,13 +396,17 @@ static int _chaoskey_fill(struct chaoskey *dev) !dev->reading, (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) ); - if (result < 0) + if (result < 0) { + usb_kill_urb(dev->urb); goto out; + } - if (result == 0) + if (result == 0) { result = -ETIMEDOUT; - else + usb_kill_urb(dev->urb); + } else { result = dev->valid; + } out: /* Let the device go back to sleep eventually */ usb_autopm_put_interface(dev->interface); @@ -538,7 +542,21 @@ static int chaoskey_suspend(struct usb_interface *interface, static int chaoskey_resume(struct usb_interface *interface) { + struct chaoskey *dev; + struct usb_device *udev = interface_to_usbdev(interface); + usb_dbg(interface, "resume"); + dev = usb_get_intfdata(interface); + + /* + * We may have lost power. + * In that case the device that needs a long time + * for the first requests needs an extended timeout + * again + */ + if (le16_to_cpu(udev->descriptor.idVendor) == ALEA_VENDOR_ID) + dev->reads_started = false; + return 0; } #else From dd41c3f5910ed241f6c7f2e3715a8a609ca19945 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 6 Nov 2019 13:49:01 +0100 Subject: [PATCH 1131/3715] appledisplay: fix error handling in the scheduled work commit 91feb01596e5efc0cc922cc73f5583114dccf4d2 upstream. The work item can operate on 1. stale memory left over from the last transfer the actual length of the data transfered needs to be checked 2. memory already freed the error handling in appledisplay_probe() needs to cancel the work in that case Reported-and-tested-by: syzbot+495dab1f175edc9c2f13@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20191106124902.7765-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 3b59eaf81eef..aad7963e40e7 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -182,7 +182,12 @@ static int appledisplay_bl_get_brightness(struct backlight_device *bd) 0, pdata->msgdata, 2, ACD_USB_TIMEOUT); - brightness = pdata->msgdata[1]; + if (retval < 2) { + if (retval >= 0) + retval = -EMSGSIZE; + } else { + brightness = pdata->msgdata[1]; + } mutex_unlock(&pdata->sysfslock); if (retval < 0) @@ -317,6 +322,7 @@ error: if (pdata) { if (pdata->urb) { usb_kill_urb(pdata->urb); + cancel_delayed_work_sync(&pdata->work); if (pdata->urbdata) usb_free_coherent(pdata->udev, ACD_URB_BUFFER_LEN, pdata->urbdata, pdata->urb->transfer_dma); From 4f55d2480a915dde210fb2e2911811a6630dc078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20L=C3=B6bl?= Date: Fri, 1 Nov 2019 08:01:50 +0100 Subject: [PATCH 1132/3715] USB: serial: mos7840: add USB ID to support Moxa UPort 2210 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e696d00e65e81d46e911f24b12e441037bf11b38 upstream. Add USB ID for MOXA UPort 2210. This device contains mos7820 but it passes GPIO0 check implemented by driver and it's detected as mos7840. Hence product id check is added to force mos7820 mode. Signed-off-by: Pavel Löbl Cc: stable [ johan: rename id defines and add vendor-id check ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 5e490177cf75..f31c5289983c 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -131,11 +131,15 @@ /* This driver also supports * ATEN UC2324 device using Moschip MCS7840 * ATEN UC2322 device using Moschip MCS7820 + * MOXA UPort 2210 device using Moschip MCS7820 */ #define USB_VENDOR_ID_ATENINTL 0x0557 #define ATENINTL_DEVICE_ID_UC2324 0x2011 #define ATENINTL_DEVICE_ID_UC2322 0x7820 +#define USB_VENDOR_ID_MOXA 0x110a +#define MOXA_DEVICE_ID_2210 0x2210 + /* Interrupt Routine Defines */ #define SERIAL_IIR_RLS 0x06 @@ -206,6 +210,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)}, + {USB_DEVICE(USB_VENDOR_ID_MOXA, MOXA_DEVICE_ID_2210)}, {} /* terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); @@ -2065,6 +2070,7 @@ static int mos7840_probe(struct usb_serial *serial, const struct usb_device_id *id) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); + u16 vid = le16_to_cpu(serial->dev->descriptor.idVendor); u8 *buf; int device_type; @@ -2074,6 +2080,11 @@ static int mos7840_probe(struct usb_serial *serial, goto out; } + if (vid == USB_VENDOR_ID_MOXA && product == MOXA_DEVICE_ID_2210) { + device_type = MOSCHIP_DEVICE_ID_7820; + goto out; + } + buf = kzalloc(VENDOR_READ_LENGTH, GFP_KERNEL); if (!buf) return -ENOMEM; From bbad873cde8db9bc2f5fc12dca965eebbdbff017 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 7 Nov 2019 14:21:18 +0100 Subject: [PATCH 1133/3715] USB: serial: mos7720: fix remote wakeup commit ea422312a462696093b5db59d294439796cba4ad upstream. The driver was setting the device remote-wakeup feature during probe in violation of the USB specification (which says it should only be set just prior to suspending the device). This could potentially waste power during suspend as well as lead to spurious wakeups. Note that USB core would clear the remote-wakeup feature at first resume. Fixes: 0f64478cbc7a ("USB: add USB serial mos7720 driver") Cc: stable # 2.6.19 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 393a91ab56ed..37967f4d93fd 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1905,10 +1905,6 @@ static int mos7720_startup(struct usb_serial *serial) product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; - /* setting configuration feature to one */ - usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), - (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - if (product == MOSCHIP_DEVICE_ID_7715) { struct urb *urb = serial->port[0]->interrupt_in_urb; From 03b951f097c5c6bf4fe05c130f4170592989c8d4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 7 Nov 2019 14:21:19 +0100 Subject: [PATCH 1134/3715] USB: serial: mos7840: fix remote wakeup commit 92fe35fb9c70a00d8fbbf5bd6172c921dd9c7815 upstream. The driver was setting the device remote-wakeup feature during probe in violation of the USB specification (which says it should only be set just prior to suspending the device). This could potentially waste power during suspend as well as lead to spurious wakeups. Note that USB core would clear the remote-wakeup feature at first resume. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Cc: stable # 2.6.19 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index f31c5289983c..285527f115dd 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2338,11 +2338,6 @@ out: goto error; } else dev_dbg(&port->dev, "ZLP_REG5 Writing success status%d\n", status); - - /* setting configuration feature to one */ - usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), - 0x03, 0x00, 0x01, 0x00, NULL, 0x00, - MOS_WDR_TIMEOUT); } return 0; error: From 082c5765f7cbd908946a486329de07c85a5376d1 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Thu, 7 Nov 2019 11:55:08 +0100 Subject: [PATCH 1135/3715] USB: serial: option: add support for DW5821e with eSIM support commit 957c31ea082e3fe5196f46d5b04018b10de47400 upstream. The device exposes AT, NMEA and DIAG ports in both USB configurations. Exactly same layout as the default DW5821e module, just a different vid/pid. P: Vendor=413c ProdID=81e0 Rev=03.18 S: Manufacturer=Dell Inc. S: Product=DW5821e-eSIM Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option P: Vendor=413c ProdID=81e0 Rev=03.18 S: Manufacturer=Dell Inc. S: Product=DW5821e-eSIM Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: Aleksander Morgado Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b9fad046828d..c91c7ebb7601 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -200,6 +200,7 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5804_MINICARD_ATT 0x819b /* Novatel E371 */ #define DELL_PRODUCT_5821E 0x81d7 +#define DELL_PRODUCT_5821E_ESIM 0x81e0 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -1047,6 +1048,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, From 8196fd1b52049c372402bb0868da4239509087e5 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 13 Nov 2019 11:14:05 +0100 Subject: [PATCH 1136/3715] USB: serial: option: add support for Foxconn T77W968 LTE modules commit f0797095423e6ea3b4be61134ee353c7f504d440 upstream. These are the Foxconn-branded variants of the Dell DW5821e modules, same USB layout as those. The device exposes AT, NMEA and DIAG ports in both USB configurations. P: Vendor=0489 ProdID=e0b4 Rev=03.18 S: Manufacturer=FII S: Product=T77W968 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option P: Vendor=0489 ProdID=e0b4 Rev=03.18 S: Manufacturer=FII S: Product=T77W968 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: Aleksander Morgado [ johan: drop id defines ] Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c91c7ebb7601..8d349f2e5656 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1995,6 +1995,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, + { USB_DEVICE(0x0489, 0xe0b4), /* Foxconn T77W968 */ + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ From 8199730f48ec30c721a8a1bf028381241e5f7476 Mon Sep 17 00:00:00 2001 From: Bernd Porr Date: Mon, 18 Nov 2019 23:07:59 +0000 Subject: [PATCH 1137/3715] staging: comedi: usbduxfast: usbduxfast_ai_cmdtest rounding error commit 5618332e5b955b4bff06d0b88146b971c8dd7b32 upstream. The userspace comedilib function 'get_cmd_generic_timed' fills the cmd structure with an informed guess and then calls the function 'usbduxfast_ai_cmdtest' in this driver repeatedly while 'usbduxfast_ai_cmdtest' is modifying the cmd struct until it no longer changes. However, because of rounding errors this never converged because 'steps = (cmd->convert_arg * 30) / 1000' and then back to 'cmd->convert_arg = (steps * 1000) / 30' won't be the same because of rounding errors. 'Steps' should only be converted back to the 'convert_arg' if 'steps' has actually been modified. In addition the case of steps being 0 wasn't checked which is also now done. Signed-off-by: Bernd Porr Cc: # 4.4+ Reviewed-by: Ian Abbott Link: https://lore.kernel.org/r/20191118230759.1727-1-mail@berndporr.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/usbduxfast.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c index 608403c7586b..f0572d6a5f63 100644 --- a/drivers/staging/comedi/drivers/usbduxfast.c +++ b/drivers/staging/comedi/drivers/usbduxfast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2014 Bernd Porr, mail@berndporr.me.uk + * Copyright (C) 2004-2019 Bernd Porr, mail@berndporr.me.uk * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ * Description: University of Stirling USB DAQ & INCITE Technology Limited * Devices: [ITL] USB-DUX-FAST (usbduxfast) * Author: Bernd Porr - * Updated: 10 Oct 2014 + * Updated: 16 Nov 2019 * Status: stable */ @@ -31,6 +31,7 @@ * * * Revision history: + * 1.0: Fixed a rounding error in usbduxfast_ai_cmdtest * 0.9: Dropping the first data packet which seems to be from the last transfer. * Buffer overflows in the FX2 are handed over to comedi. * 0.92: Dropping now 4 packets. The quad buffer has to be emptied. @@ -359,6 +360,7 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev, struct comedi_cmd *cmd) { int err = 0; + int err2 = 0; unsigned int steps; unsigned int arg; @@ -408,11 +410,16 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev, */ steps = (cmd->convert_arg * 30) / 1000; if (cmd->chanlist_len != 1) - err |= comedi_check_trigger_arg_min(&steps, - MIN_SAMPLING_PERIOD); - err |= comedi_check_trigger_arg_max(&steps, MAX_SAMPLING_PERIOD); - arg = (steps * 1000) / 30; - err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg); + err2 |= comedi_check_trigger_arg_min(&steps, + MIN_SAMPLING_PERIOD); + else + err2 |= comedi_check_trigger_arg_min(&steps, 1); + err2 |= comedi_check_trigger_arg_max(&steps, MAX_SAMPLING_PERIOD); + if (err2) { + err |= err2; + arg = (steps * 1000) / 30; + err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg); + } if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); From 583f9d772ef869a108c0909176f9f4570e8fc685 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Thu, 23 May 2019 21:46:48 -0500 Subject: [PATCH 1138/3715] powerpc/64s: support nospectre_v2 cmdline option commit d8f0e0b073e1ec52a05f0c2a56318b47387d2f10 upstream. Add support for disabling the kernel implemented spectre v2 mitigation (count cache flush on context switch) via the nospectre_v2 and mitigations=off cmdline options. Suggested-by: Michael Ellerman Signed-off-by: Christopher M. Riedl Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190524024647.381-1-cmr@informatik.wtf Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 68d4ec373cfc..cc0aac4bde75 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO bool barrier_nospec_enabled; static bool no_nospec; static bool btb_flush_enabled; -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) static bool no_spectrev2; #endif @@ -106,7 +106,7 @@ static __init int barrier_nospec_debugfs_init(void) device_initcall(barrier_nospec_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) static int __init handle_nospectre_v2(char *p) { no_spectrev2 = true; @@ -114,6 +114,9 @@ static int __init handle_nospectre_v2(char *p) return 0; } early_param("nospectre_v2", handle_nospectre_v2); +#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_FSL_BOOK3E void setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) @@ -391,7 +394,17 @@ static void toggle_count_cache_flush(bool enable) void setup_count_cache_flush(void) { - toggle_count_cache_flush(true); + bool enable = true; + + if (no_spectrev2 || cpu_mitigations_off()) { + if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) || + security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED)) + pr_warn("Spectre v2 mitigations not under software control, can't disable\n"); + + enable = false; + } + + toggle_count_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS From bc06abfb2bc7e5856f997ebc57c361caa08cd961 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Nov 2019 21:05:41 +1100 Subject: [PATCH 1139/3715] powerpc/book3s64: Fix link stack flush on context switch commit 39e72bf96f5847ba87cc5bd7a3ce0fed813dc9ad upstream. In commit ee13cb249fab ("powerpc/64s: Add support for software count cache flush"), I added support for software to flush the count cache (indirect branch cache) on context switch if firmware told us that was the required mitigation for Spectre v2. As part of that code we also added a software flush of the link stack (return address stack), which protects against Spectre-RSB between user processes. That is all correct for CPUs that activate that mitigation, which is currently Power9 Nimbus DD2.3. What I got wrong is that on older CPUs, where firmware has disabled the count cache, we also need to flush the link stack on context switch. To fix it we create a new feature bit which is not set by firmware, which tells us we need to flush the link stack. We set that when firmware tells us that either of the existing Spectre v2 mitigations are enabled. Then we adjust the patching code so that if we see that feature bit we enable the link stack flush. If we're also told to flush the count cache in software then we fall through and do that also. On the older CPUs we don't need to do do the software count cache flush, firmware has disabled it, so in that case we patch in an early return after the link stack flush. The naming of some of the functions is awkward after this patch, because they're called "count cache" but they also do link stack. But we'll fix that up in a later commit to ease backporting. This is the fix for CVE-2019-18660. Reported-by: Anthony Steinhauser Fixes: ee13cb249fab ("powerpc/64s: Add support for software count cache flush") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Michael Ellerman [dja: straightforward backport to v4.14] Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/asm-prototypes.h | 1 + arch/powerpc/include/asm/security_features.h | 3 ++ arch/powerpc/kernel/entry_64.S | 6 +++ arch/powerpc/kernel/security.c | 48 ++++++++++++++++++-- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ba4c75062d49..fb5f911b0d91 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -129,6 +129,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); /* Patch sites */ extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; +extern s32 patch__flush_link_stack_return; extern long flush_count_cache; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 759597bf0fd8..ccf44c135389 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -81,6 +81,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Software required to flush count cache on context switch #define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull +// Software required to flush link stack on context switch +#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 12395895b9aa..02a0bf52aec0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -524,6 +524,7 @@ flush_count_cache: /* Save LR into r9 */ mflr r9 + // Flush the link stack .rept 64 bl .+4 .endr @@ -533,6 +534,11 @@ flush_count_cache: .balign 32 /* Restore LR */ 1: mtlr r9 + + // If we're just flushing the link stack, return here +3: nop + patch_site 3b patch__flush_link_stack_return + li r9,0x7fff mtctr r9 diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index cc0aac4bde75..88e582d2bad7 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -24,6 +24,7 @@ enum count_cache_flush_type { COUNT_CACHE_FLUSH_HW = 0x4, }; static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; +static bool link_stack_flush_enabled; bool barrier_nospec_enabled; static bool no_nospec; @@ -204,11 +205,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { @@ -369,18 +378,40 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +static void no_count_cache_flush(void) +{ + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); +} + static void toggle_count_cache_flush(bool enable) { - if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && + !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) + enable = false; + + if (!enable) { patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); + pr_info("link-stack-flush: software flush disabled.\n"); + link_stack_flush_enabled = false; + no_count_cache_flush(); return; } + // This enables the branch from _switch to flush_count_cache patch_branch_site(&patch__call_flush_count_cache, (u64)&flush_count_cache, BRANCH_SET_LINK); + pr_info("link-stack-flush: software flush enabled.\n"); + link_stack_flush_enabled = true; + + // If we just need to flush the link stack, patch an early return + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + no_count_cache_flush(); + return; + } + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { count_cache_flush_type = COUNT_CACHE_FLUSH_SW; pr_info("count-cache-flush: full software flush sequence enabled.\n"); @@ -399,11 +430,20 @@ void setup_count_cache_flush(void) if (no_spectrev2 || cpu_mitigations_off()) { if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED)) - pr_warn("Spectre v2 mitigations not under software control, can't disable\n"); + pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n"); enable = false; } + /* + * There's no firmware feature flag/hypervisor bit to tell us we need to + * flush the link stack on context switch. So we set it here if we see + * either of the Spectre v2 mitigations that aim to protect userspace. + */ + if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) || + security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + toggle_count_cache_flush(enable); } From 38a2fa7519028e84023e1ab855a6be26186b20ab Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Nov 2019 21:05:44 +1100 Subject: [PATCH 1140/3715] KVM: PPC: Book3S HV: Flush link stack on guest exit to host kernel commit af2e8c68b9c5403f77096969c516f742f5bb29e0 upstream. On some systems that are vulnerable to Spectre v2, it is up to software to flush the link stack (return address stack), in order to protect against Spectre-RSB. When exiting from a guest we do some house keeping and then potentially exit to C code which is several stack frames deep in the host kernel. We will then execute a series of returns without preceeding calls, opening up the possiblity that the guest could have poisoned the link stack, and direct speculative execution of the host to a gadget of some sort. To prevent this we add a flush of the link stack on exit from a guest. Signed-off-by: Michael Ellerman [dja: straightforward backport to v4.14] Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/asm-prototypes.h | 2 ++ arch/powerpc/kernel/security.c | 9 ++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 27 +++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index fb5f911b0d91..2d4444981c2c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -130,7 +130,9 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; +extern s32 patch__call_kvm_flush_link_stack; extern long flush_count_cache; +extern long kvm_flush_link_stack; #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 88e582d2bad7..f5d6541bf8c2 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -392,6 +392,9 @@ static void toggle_count_cache_flush(bool enable) if (!enable) { patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); +#endif pr_info("link-stack-flush: software flush disabled.\n"); link_stack_flush_enabled = false; no_count_cache_flush(); @@ -402,6 +405,12 @@ static void toggle_count_cache_flush(bool enable) patch_branch_site(&patch__call_flush_count_cache, (u64)&flush_count_cache, BRANCH_SET_LINK); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This enables the branch from guest_exit_cont to kvm_flush_link_stack + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); +#endif + pr_info("link-stack-flush: software flush enabled.\n"); link_stack_flush_enabled = true; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 663a398449b7..46ea42f40334 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -1445,6 +1446,10 @@ mc_cont: 1: #endif /* CONFIG_KVM_XICS */ + /* Possibly flush the link stack here. */ +1: nop + patch_site 1b patch__call_kvm_flush_link_stack + stw r12, STACK_SLOT_TRAP(r1) mr r3, r12 /* Increment exit count, poke other threads to exit */ @@ -1957,6 +1962,28 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) mtlr r0 blr +.balign 32 +.global kvm_flush_link_stack +kvm_flush_link_stack: + /* Save LR into r0 */ + mflr r0 + + /* Flush the link stack. On Power8 it's up to 32 entries in size. */ + .rept 32 + bl .+4 + .endr + + /* And on Power9 it's up to 64. */ +BEGIN_FTR_SECTION + .rept 32 + bl .+4 + .endr +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* Restore LR */ + mtlr r0 + blr + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing From 55a6d11eb6ccb55e531fb31311a8822c8a91f52e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Nov 2019 14:56:37 -0500 Subject: [PATCH 1141/3715] x86/hyperv: mark hyperv_init as __init function This change was done upstream as part of 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access"), but that commit introduced a lot of new functionality we don't want to backport. This change eliminates a build warning. Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2653b7b25d17..924fa9c07368 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -125,7 +125,7 @@ static int __init hv_pci_init(void) * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; From fbc5fe7a54d02e11972e3b2a5ddb6ffc88162c8f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 1 Dec 2019 09:14:37 +0100 Subject: [PATCH 1142/3715] Linux 4.14.157 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b1db48ad832e..dad90f53faeb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 156 +SUBLEVEL = 157 EXTRAVERSION = NAME = Petit Gorille From e6b1fb0e83b2ff84f5b2c5f27fc6d33dbfe2ca92 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 22 Aug 2019 21:26:53 +0100 Subject: [PATCH 1143/3715] BACKPORT: ARM: 8900/1: UNWINDER_FRAME_POINTER implementation for Clang commit 6dc5fd93b2f1ef75d5e50fced8cb193811f25f22 upstream. The stackframe setup when compiled with clang is different. Since the stack unwinder expects the gcc stackframe setup it fails to print backtraces. This patch adds support for the clang stackframe setup. Link: https://github.com/ClangBuiltLinux/linux/issues/35 Cc: clang-built-linux@googlegroups.com Suggested-by: Tri Vo Signed-off-by: Nathan Huckleberry Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Russell King [nd: configs were renamed in f9b58e8c7d03, CONFIG_CC_IS_{GCC|CLANG} did not exist until a4353898980c/469cb7376c06] Signed-off-by: Nick Desaulniers Bug: 145525910 Change-Id: I9f595cff06a48498027e7dc1b958c35fc5da46e2 --- arch/arm/Makefile | 5 +- arch/arm/lib/Makefile | 8 +- arch/arm/lib/backtrace-clang.S | 217 +++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 arch/arm/lib/backtrace-clang.S diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 826dff0dc631..6aa61257ed18 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -39,7 +39,10 @@ KBUILD_CFLAGS += $(call cc-option,-mno-unaligned-access) endif ifeq ($(CONFIG_FRAME_POINTER),y) -KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog +KBUILD_CFLAGS +=-fno-omit-frame-pointer +ifeq ($(cc-name),gcc) +KBUILD_CFLAGS += -mapcs -mno-sched-prolog +endif endif ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 4cf026f3f00d..69f135069bd0 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,7 +5,7 @@ # Copyright (C) 1995-2000 Russell King # -lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ +lib-y := changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ @@ -19,6 +19,12 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ mmu-y := clear_user.o copy_page.o getuser.o putuser.o \ copy_from_user.o copy_to_user.o +ifeq ($(cc-name),clang) + lib-y += backtrace-clang.o +else + lib-y += backtrace.o +endif + # using lib_ here won't override already available weak symbols obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S new file mode 100644 index 000000000000..2ff375144b55 --- /dev/null +++ b/arch/arm/lib/backtrace-clang.S @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/backtrace-clang.S + * + * Copyright (C) 2019 Nathan Huckleberry + * + */ +#include +#include +#include + .text + +/* fp is 0 or stack frame */ + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define sv_lr r8 + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + ret lr +ENDPROC(c_backtrace) +#else + + +/* + * Clang does not store pc or sp in function prologues so we don't know exactly + * where the function starts. + * + * We can treat the current frame's lr as the saved pc and the preceding + * frame's lr as the current frame's lr, but we can't trace the most recent + * call. Inserting a false stack frame allows us to reference the function + * called last in the stacktrace. + * + * If the call instruction was a bl we can look at the callers branch + * instruction to calculate the saved pc. We can recover the pc in most cases, + * but in cases such as calling function pointers we cannot. In this case, + * default to using the lr. This will be some address in the function, but will + * not be the function start. + * + * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these + * are less frequently saved. + * + * Stack frame layout: + * + * saved lr + * frame=> saved fp + * optionally saved caller registers (r4 - r10) + * optionally saved arguments (r0 - r3) + * + * + * + * Functions start with the following code sequence: + * corrected pc => stmfd sp!, {..., fp, lr} + * add fp, sp, #x + * stmfd sp!, {r0 - r3} (optional) + * + * + * + * + * + * + * The diagram below shows an example stack setup for dump_stack. + * + * The frame for c_backtrace has pointers to the code of dump_stack. This is + * why the frame of c_backtrace is used to for the pc calculation of + * dump_stack. This is why we must move back a frame to print dump_stack. + * + * The stored locals for dump_stack are in dump_stack's frame. This means that + * to fully print dump_stack's frame we need both the frame for dump_stack (for + * locals) and the frame that was called by dump_stack (for pc). + * + * To print locals we must know where the function start is. If we read the + * function prologue opcodes we can determine which variables are stored in the + * stack frame. + * + * To find the function start of dump_stack we can look at the stored LR of + * show_stack. It points at the instruction directly after the bl dump_stack. + * We can then read the offset from the bl opcode to determine where the branch + * takes us. The address calculated must be the start of dump_stack. + * + * c_backtrace frame dump_stack: + * {[LR] } ============| ... + * {[FP] } =======| | bl c_backtrace + * | |=> ... + * {[R4-R10]} | + * {[R0-R3] } | show_stack: + * dump_stack frame | ... + * {[LR] } =============| bl dump_stack + * {[FP] } <=======| |=> ... + * {[R4-R10]} + * {[R0-R3] } + */ + + stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register + @ to ensure 8 byte alignment + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + tst r1, #0x10 @ 26 or 32-bit mode? + moveq mask, #0xfc000003 + movne mask, #0 @ mask for 32-bit + +/* + * Switches the current frame to be the frame for dump_stack. + */ + add frame, sp, #24 @ switch to false frame +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +/* + * sv_fp is the stack frame with the locals for the current considered + * function. + * + * sv_pc is the saved lr frame the frame above. This is a pointer to a code + * address within the current considered function, but it is not the function + * start. This value gets updated to be the function start later if it is + * possible. + */ +1001: ldr sv_pc, [frame, #4] @ get saved 'pc' +1002: ldr sv_fp, [frame, #0] @ get saved fp + + teq sv_fp, mask @ make sure next frame exists + beq no_frame + +/* + * sv_lr is the lr from the function that called the current function. This is + * a pointer to a code address in the current function's caller. sv_lr-4 is + * the instruction used to call the current function. + * + * This sv_lr can be used to calculate the function start if the function was + * called using a bl instruction. If the function start can be recovered sv_pc + * is overwritten with the function start. + * + * If the current function was called using a function pointer we cannot + * recover the function start and instead continue with sv_pc as an arbitrary + * value within the current function. If this is the case we cannot print + * registers for the current function, but the stacktrace is still printed + * properly. + */ +1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame + + ldr r0, [sv_lr, #-4] @ get call instruction + ldr r3, .Lopcode+4 + and r2, r3, r0 @ is this a bl call + teq r2, r3 + bne finished_setup @ give up if it's not + and r0, #0xffffff @ get call offset 24-bit int + lsl r0, r0, #8 @ sign extend offset + asr r0, r0, #8 + ldr sv_pc, [sv_fp, #4] @ get lr address + add sv_pc, sv_pc, #-4 @ get call instruction address + add sv_pc, sv_pc, #8 @ take care of prefetch + add sv_pc, sv_pc, r0, lsl #2@ find function start + +finished_setup: + + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +/* + * Print the function (sv_pc) and where it was called from (sv_lr). + */ +1004: mov r0, sv_pc + + mov r1, sv_lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + bl dump_backtrace_entry + +/* + * Test if the function start is a stmfd instruction to determine which + * registers were stored in the function prologue. + * + * If we could not recover the sv_pc because we were called through a function + * pointer the comparison will fail and no registers will print. Unwinding will + * continue as if there had been no registers stored in this frame. + */ +1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr} + ldr r3, .Lopcode @ instruction exists, + teq r3, r1, lsr #11 + ldr r0, [frame] @ locals are stored in + @ the preceding frame + subeq r0, r0, #4 + bleq dump_backtrace_stm @ dump saved registers + +/* + * If we are out of frames or if the next frame is invalid. + */ + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, frame + bl printk +no_frame: ldmfd sp!, {r4 - r9, fp, pc} +ENDPROC(c_backtrace) + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .long 1005b, 1006b + .popsection + +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr} + .word 0x0b000000 @ bl if these bits are set + +#endif From 0211c07e29852edd14bb1bee8491943dc83adc4c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Aug 2019 07:55:16 -0700 Subject: [PATCH 1144/3715] mm: set S_SWAPFILE on blockdev swap devices Set S_SWAPFILE on block device inodes so that they have the same protections as a swap flie. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- mm/swapfile.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index e47a21e64764..be677a8a2691 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2395,9 +2395,8 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, * requirements, they are simply tossed out - we will never use those blocks * for swapping. * - * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This - * prevents root from shooting her foot off by ftruncating an in-use swapfile, - * which will scribble on the fs. + * For all swap devices we set S_SWAPFILE across the life of the swapon. This + * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of @@ -2660,13 +2659,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); + set_blocksize(bdev, old_block_size); blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - } else { - inode_lock(inode); - inode->i_flags &= ~S_SWAPFILE; - inode_unlock(inode); } + + inode_lock(inode); + inode->i_flags &= ~S_SWAPFILE; + inode_unlock(inode); filp_close(swap_file, NULL); /* @@ -2893,11 +2893,11 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - } else - return -EINVAL; + } + + inode_lock(inode); + if (IS_SWAPFILE(inode)) + return -EBUSY; return 0; } @@ -3273,8 +3273,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); - if (S_ISREG(inode->i_mode)) - inode->i_flags |= S_SWAPFILE; + inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: @@ -3294,7 +3293,7 @@ bad_swap: kvfree(cluster_info); kvfree(frontswap_map); if (swap_file) { - if (inode && S_ISREG(inode->i_mode)) { + if (inode) { inode_unlock(inode); inode = NULL; } @@ -3307,7 +3306,7 @@ out: } if (name) putname(name); - if (inode && S_ISREG(inode->i_mode)) + if (inode) inode_unlock(inode); if (!error) enable_swap_slots_cache(); From f1a7f58cf4d55a1f398f7d4e59c86fad5869b454 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Aug 2019 07:55:16 -0700 Subject: [PATCH 1145/3715] vfs: don't allow writes to swap files Don't let userspace write to an active swap file because the kernel effectively has a long term lease on the storage and things could get seriously corrupted if we let this happen. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/block_dev.c | 3 +++ include/linux/fs.h | 11 +++++++++++ mm/filemap.c | 3 +++ mm/memory.c | 4 ++++ mm/mmap.c | 8 ++++++-- mm/swapfile.c | 12 +++++++++++- 6 files changed, 38 insertions(+), 3 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 789f55e851ae..f408ebc5bd66 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1890,6 +1890,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; + if (IS_SWAPFILE(bd_inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0988d705960c..d07269956030 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3420,4 +3420,15 @@ static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags) fa->fsx_xflags = xflags; } +/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + #endif /* _LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 6eb4e32d99c8..f370f94e8ad1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2828,6 +2828,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) unsigned long limit = rlimit(RLIMIT_FSIZE); loff_t pos; + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0; diff --git a/mm/memory.c b/mm/memory.c index a728bed16c20..1a1da025e5ee 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2350,6 +2350,10 @@ static int do_page_mkwrite(struct vm_fault *vmf) vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + if (vmf->vma->vm_file && + IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) + return VM_FAULT_SIGBUS; + ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; diff --git a/mm/mmap.c b/mm/mmap.c index 476e810cf100..8d596bfb792f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1390,8 +1390,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, switch (flags & MAP_TYPE) { case MAP_SHARED: - if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) - return -EACCES; + if (prot & PROT_WRITE) { + if (!(file->f_mode & FMODE_WRITE)) + return -EACCES; + if (IS_SWAPFILE(file->f_mapping->host)) + return -ETXTBSY; + } /* * Make sure we don't allow writing to an append-only diff --git a/mm/swapfile.c b/mm/swapfile.c index be677a8a2691..2f68455f7d55 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3253,6 +3253,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (error) goto bad_swap; + /* + * Flush any pending IO and dirty mappings before we start using this + * swap device. + */ + inode->i_flags |= S_SWAPFILE; + error = inode_drain_writes(inode); + if (error) { + inode->i_flags &= ~S_SWAPFILE; + goto bad_swap; + } + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) @@ -3273,7 +3284,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); - inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: From 4bcefaebc5e2fbf813989ae09197b54058db88f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Sep 2019 18:01:35 +0800 Subject: [PATCH 1146/3715] f2fs: fix to update time in lazytime mode generic/018 reports an inconsistent status of atime, the testcase is as below: - open file with O_SYNC - write file to construct fraged space - calc md5 of file - record {a,c,m}time - defrag file --- do nothing - umount & mount - check {a,c,m}time The root cause is, as f2fs enables lazytime by default, atime update will dirty vfs inode, rather than dirtying f2fs inode (by set with FI_DIRTY_INODE), so later f2fs_write_inode() called from VFS will fail to update inode page due to our skip: f2fs_write_inode() if (is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; So eventually, after evict(), we lose last atime for ever. To fix this issue, we need to check whether {a,c,m,cr}time is consistent in between inode cache and inode page, and only skip f2fs_update_inode() if f2fs inode is not dirty and time is consistent as well. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 23 +++++++++++++++-------- fs/f2fs/inode.c | 6 +++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 624f87d06f0d..9811c3e2f4c8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2703,6 +2703,20 @@ static inline void clear_file(struct inode *inode, int type) f2fs_mark_inode_dirty_sync(inode, true); } +static inline bool f2fs_is_time_consistent(struct inode *inode) +{ + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + return true; +} + static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { bool ret; @@ -2720,14 +2734,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, - &F2FS_I(inode)->i_crtime)) + if (!f2fs_is_time_consistent(inode)) return false; down_read(&F2FS_I(inode)->i_sem); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index db4fec30c30d..386ad54c13c3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -615,7 +615,11 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) + /* + * atime could be updated without dirtying f2fs inode in lazytime mode + */ + if (f2fs_is_time_consistent(inode) && + !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; if (!f2fs_is_checkpoint_ready(sbi)) From 71ca56604cd8b4e9b68bc6af34fa3875252b419d Mon Sep 17 00:00:00 2001 From: Chengguang Xu via Linux-f2fs-devel Date: Fri, 27 Sep 2019 09:35:48 +0800 Subject: [PATCH 1147/3715] f2fs: mark recovery flag correctly in read_raw_super_block() On the combination of first fail and second success, we will miss to mark recovery flag because currently we reuse err variable in the loop. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 10a640cdaa05..9fb09e05300e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2961,6 +2961,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Unable to read %dth superblock", block + 1); err = -EIO; + *recovery = 1; continue; } @@ -2970,6 +2971,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); brelse(bh); + *recovery = 1; continue; } @@ -2982,10 +2984,6 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, brelse(bh); } - /* Fail to read any one of the superblocks*/ - if (err < 0) - *recovery = 1; - /* No valid superblock */ if (!*raw_super) kvfree(super); From 7706be6fcb059c88abff38f5f5a5fc7f0cefa655 Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Mon, 23 Sep 2019 12:21:39 +0800 Subject: [PATCH 1148/3715] f2fs: update multi-dev metadata in resize_fs Multi-device metadata should be updated in resize_fs as well. Also, we check that the new FS size still reaches the last device. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6ba0f4c9abc8..27f19b19deba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1437,11 +1437,20 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + (long long)segs * sbi->blocks_per_seg); + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + int dev_segs = + le32_to_cpu(raw_sb->devs[last_dev].total_segments); + + raw_sb->devs[last_dev].total_segments = + cpu_to_le32(dev_segs + segs); + } } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { int segs = secs * sbi->segs_per_sec; + long long blks = (long long)segs * sbi->blocks_per_seg; long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -1449,8 +1458,20 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; - F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + - (long long)segs * sbi->blocks_per_seg); + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); + + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + + FDEV(last_dev).total_segments = + (int)FDEV(last_dev).total_segments + segs; + FDEV(last_dev).end_blk = + (long long)FDEV(last_dev).end_blk + blks; +#ifdef CONFIG_BLK_DEV_ZONED + FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + + (int)(blks >> sbi->log_blocks_per_blkz); +#endif + } } int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) @@ -1465,6 +1486,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) if (block_count > old_block_count) return -EINVAL; + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + __u64 last_segs = FDEV(last_dev).total_segments; + + if (block_count + last_segs * sbi->blocks_per_seg <= + old_block_count) + return -EINVAL; + } + /* new fs size should align to section size */ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); if (rem) From c5f24de38fdeea53380ec72fa6a6c2db38f20f74 Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Mon, 23 Sep 2019 12:22:35 +0800 Subject: [PATCH 1149/3715] f2fs: check total_segments from devices in raw_super For multi-device F2FS, we should check if the sum of total_segments from all devices matches segment_count. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9fb09e05300e..40de9a355fe8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2612,6 +2612,21 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } + if (RDEV(0).path[0]) { + block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments); + int i = 1; + + while (i < MAX_DEVICES && RDEV(i).path[0]) { + dev_seg_count += le32_to_cpu(RDEV(i).total_segments); + i++; + } + if (segment_count != dev_seg_count) { + f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)", + segment_count, dev_seg_count); + return -EFSCORRUPTED; + } + } + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); From faa35391f58c7f5300eccc1ba8af8a4b85f7b1e4 Mon Sep 17 00:00:00 2001 From: Randall Huang Date: Fri, 18 Oct 2019 14:56:22 +0800 Subject: [PATCH 1150/3715] f2fs: fix to avoid memory leakage in f2fs_listxattr In f2fs_listxattr, there is no boundary check before memcpy e_name to buffer. If the e_name_len is corrupted, unexpected memory contents may be returned to the buffer. Signed-off-by: Randall Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 181900af2576..296b3189448a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -539,8 +539,9 @@ out: ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; - void *base_addr; + void *base_addr, *last_base_addr; int error = 0; size_t rest = buffer_size; @@ -550,6 +551,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = f2fs_xattr_handler(entry->e_name_index); @@ -557,6 +560,15 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) size_t prefix_len; size_t size; + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto cleanup; + } + if (!handler || (handler->list && !handler->list(dentry))) continue; From 47c687236f5f3f468102c52078206c11613fa5fa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Sep 2019 18:53:25 +0800 Subject: [PATCH 1151/3715] f2fs: cache global IPU bio In commit 8648de2c581e ("f2fs: add bio cache for IPU"), we added f2fs_submit_ipu_bio() in __write_data_page() as below: __write_data_page() if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) { f2fs_submit_ipu_bio(sbi, bio, page); .... } in order to avoid below deadlock: Thread A Thread B - __write_data_page (inode x, page y) - f2fs_do_write_data_page - set_page_writeback ---- set writeback flag in page y - f2fs_inplace_write_data - f2fs_balance_fs - lock gc_mutex - lock gc_mutex - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - f2fs_wait_on_page_writeback - wait_on_page_writeback --- wait writeback of page y However, the bio submission breaks the merge of IPU IOs. So in this patch let's add a global bio cache for merged IPU pages, then f2fs_wait_on_page_writeback() is able to submit bio if a writebacked page is cached in global bio cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 179 +++++++++++++++++++++++++++++++++++++--------- fs/f2fs/f2fs.h | 11 +++ fs/f2fs/segment.c | 3 + fs/f2fs/super.c | 8 +++ 4 files changed, 169 insertions(+), 32 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c80db81528e..ce16c5bcae4d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -29,6 +29,7 @@ #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; +static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; static bool __is_cp_guaranteed(struct page *page) @@ -542,6 +543,126 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, return io_type_is_mergeable(io, fio); } +static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, + struct page *page, enum temp_type temp) +{ + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct bio_entry *be; + + be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + be->bio = bio; + bio_get(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) + f2fs_bug_on(sbi, 1); + + down_write(&io->bio_list_lock); + list_add_tail(&be->list, &io->bio_list); + up_write(&io->bio_list_lock); +} + +static void del_bio_entry(struct bio_entry *be) +{ + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); +} + +static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + enum temp_type temp; + bool found = false; + int ret = -EAGAIN; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (be->bio != *bio) + continue; + + found = true; + + if (bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { + ret = 0; + break; + } + + /* bio is full */ + del_bio_entry(be); + __submit_bio(sbi, *bio, DATA); + break; + } + up_write(&io->bio_list_lock); + } + + if (ret) { + bio_put(*bio); + *bio = NULL; + } + + return ret; +} + +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page) +{ + enum temp_type temp; + bool found = false; + struct bio *target = bio ? *bio : NULL; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + if (list_empty(head)) + continue; + + down_read(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) + break; + } + up_read(&io->bio_list_lock); + + if (!found) + continue; + + found = false; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) { + target = be->bio; + del_bio_entry(be); + break; + } + } + up_write(&io->bio_list_lock); + } + + if (found) + __submit_bio(sbi, target, DATA); + if (bio && *bio) { + bio_put(*bio); + *bio = NULL; + } +} + int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; @@ -556,20 +677,17 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, - fio->new_blkaddr)) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - } + fio->new_blkaddr)) + f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); bio_set_op_attrs(bio, fio->op, fio->op_flags); - } - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - goto alloc_new; + add_bio_entry(fio->sbi, bio, page, fio->temp); + } else { + if (add_ipu_page(fio->sbi, &bio, page)) + goto alloc_new; } if (fio->io_wbc) @@ -583,19 +701,6 @@ alloc_new: return 0; } -static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, - struct page *page) -{ - if (!bio) - return; - - if (!__has_merged_page(*bio, NULL, page, 0)) - return; - - __submit_bio(sbi, *bio, DATA); - *bio = NULL; -} - void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -2214,14 +2319,12 @@ out: unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) { - f2fs_submit_ipu_bio(sbi, bio, page); + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); - } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } @@ -2341,13 +2444,11 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) { + if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_submit_ipu_bio(sbi, &bio, page); - } else { + else goto continue_unlock; - } } if (!clear_page_dirty_for_io(page)) @@ -2405,7 +2506,7 @@ continue_unlock: NULL, 0, DATA); /* submit cached bio of IPU write */ if (bio) - __submit_bio(sbi, bio, DATA); + f2fs_submit_merged_ipu_write(sbi, &bio, NULL); return ret; } @@ -3210,8 +3311,22 @@ fail: return -ENOMEM; } -void __exit f2fs_destroy_post_read_processing(void) +void f2fs_destroy_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } + +int __init f2fs_init_bio_entry_cache(void) +{ + bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + return -ENOMEM; + return 0; +} + +void __exit f2fs_destroy_bio_entry_cache(void) +{ + kmem_cache_destroy(bio_entry_slab); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9811c3e2f4c8..f20fc99d16c9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1066,6 +1066,11 @@ struct f2fs_io_info { unsigned char version; /* version of the node */ }; +struct bio_entry { + struct bio *bio; + struct list_head list; +}; + #define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ @@ -1075,6 +1080,8 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ + struct list_head bio_list; /* bio entry list head */ + struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -3194,10 +3201,14 @@ void f2fs_destroy_checkpoint_caches(void); */ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); +int f2fs_init_bio_entry_cache(void); +void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type); +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 808709581481..25c750cd0272 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3379,7 +3379,10 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + /* sbumit cached IPU IO */ + f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); f2fs_bug_on(sbi, locked && PageWriteback(page)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 40de9a355fe8..4910099ebf94 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3352,6 +3352,8 @@ try_onemore: sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); + init_rwsem(&sbi->write_io[i][j].bio_list_lock); } } @@ -3762,8 +3764,13 @@ static int __init init_f2fs_fs(void) err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; + err = f2fs_init_bio_entry_cache(); + if (err) + goto free_post_read; return 0; +free_post_read: + f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); @@ -3787,6 +3794,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); From 4e3db89f7f51e822e2eef127180604f6bfd2d083 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 22 Oct 2019 17:26:11 +0800 Subject: [PATCH 1152/3715] f2fs: fix wrong description in document As reported in bugzilla, default value of DEF_RAM_THRESHOLD was fixed by commit 29710bcf9426 ("f2fs: fix wrong percentage"), however leaving wrong description in document, fix it. https://bugzilla.kernel.org/show_bug.cgi?id=205203 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 93f49f2786ca..b6b73d8b0918 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -346,7 +346,7 @@ Files in /sys/fs/f2fs/ ram_thresh This parameter controls the memory footprint used by free nids and cached nat entries. By default, - 10 is set, which indicates 10 MB / 1 GB RAM. + 1 is set, which indicates 10 MB / 1 GB RAM. ra_nid_pages When building free nids, F2FS reads NAT blocks ahead for speed up. Default is 0. From 52963b54efca7c36763aae4e3fa5ff203f3e6f62 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 Nov 2019 09:34:21 -0700 Subject: [PATCH 1153/3715] f2fs: avoid kernel panic on corruption test xfstests/generic/475 complains kernel warn/panic while testing corrupted disk. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 14c1896a6842..e2939dabfc9f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2349,7 +2349,6 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); - f2fs_bug_on(sbi, !mount); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } From ce44f2ad72938166f2081d887f77391e505eb588 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 18 Oct 2019 10:06:40 -0700 Subject: [PATCH 1154/3715] f2fs: support aligned pinned file This patch supports 2MB-aligned pinned file, which can guarantee no GC at all by allocating fully valid 2MB segment. Check free segments by has_not_enough_free_secs() with large budget. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/file.c | 42 +++++++++++++++++++++++++++++++++++++----- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 31 +++++++++++++++++++++++++++---- fs/f2fs/segment.h | 2 ++ fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 2 ++ 7 files changed, 73 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f20fc99d16c9..9eb50d83229c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -888,6 +888,7 @@ enum { CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ NO_CHECK_TYPE, + CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */ }; struct flush_cmd { @@ -1299,6 +1300,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; + struct rw_semaphore pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -3115,7 +3117,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d6e6e4acd452..4b0fb77e54e5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1541,12 +1541,44 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - if (f2fs_is_pinned_file(inode)) - map.m_seg_type = CURSEG_COLD_DATA; + if (!map.m_len) + return 0; - err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? - F2FS_GET_BLOCK_PRE_DIO : - F2FS_GET_BLOCK_PRE_AIO)); + if (f2fs_is_pinned_file(inode)) { + block_t len = (map.m_len >> sbi->log_blocks_per_seg) << + sbi->log_blocks_per_seg; + block_t done = 0; + + if (map.m_len % sbi->blocks_per_seg) + len += sbi->blocks_per_seg; + + map.m_len = sbi->blocks_per_seg; +next_alloc: + if (has_not_enough_free_secs(sbi, 0, + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + mutex_lock(&sbi->gc_mutex); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err && err != -ENODATA && err != -EAGAIN) + goto out_err; + } + + down_write(&sbi->pin_sem); + map.m_seg_type = CURSEG_COLD_DATA_PINNED; + f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + up_write(&sbi->pin_sem); + + done += map.m_len; + len -= map.m_len; + map.m_lblk += map.m_len; + if (!err && len) + goto next_alloc; + + map.m_len = done; + } else { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + } +out_err: if (err) { pgoff_t last_off; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 69e660ab6823..f177e2749f19 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -711,7 +711,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 25c750cd0272..8bb37f8a1845 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2690,7 +2690,7 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg; unsigned int old_segno; @@ -2699,10 +2699,17 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (type != NO_CHECK_TYPE && i != type) + continue; + curseg = CURSEG_I(sbi, i); - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); + if (type == NO_CHECK_TYPE || curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, false) || + get_ckpt_valid_blocks(sbi, curseg->segno)) { + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } up_write(&SIT_I(sbi)->sentry_lock); @@ -3068,6 +3075,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + bool put_pin_sem = false; + + if (type == CURSEG_COLD_DATA) { + /* GC during CURSEG_COLD_DATA_PINNED allocation */ + if (down_read_trylock(&sbi->pin_sem)) { + put_pin_sem = true; + } else { + type = CURSEG_WARM_DATA; + curseg = CURSEG_I(sbi, type); + } + } else if (type == CURSEG_COLD_DATA_PINNED) { + type = CURSEG_COLD_DATA; + } down_read(&SM_I(sbi)->curseg_lock); @@ -3133,6 +3153,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); + + if (put_pin_sem) + up_read(&sbi->pin_sem); } static void update_device_state(struct f2fs_io_info *fio) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 325781a1ae4d..a95467b202ea 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -313,6 +313,8 @@ struct sit_entry_set { */ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) { + if (type == CURSEG_COLD_DATA_PINNED) + type = CURSEG_COLD_DATA; return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4910099ebf94..61483b6d439c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2861,6 +2861,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) spin_lock_init(&sbi->dev_lock); init_rwsem(&sbi->sb_lock); + init_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a1023b905df2..aeb9d6a998ac 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -154,6 +154,8 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_casefold(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "pin_file"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } From f96f967879df45604139626ebe5a11ae029fcc8c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 14:12:05 +0800 Subject: [PATCH 1155/3715] f2fs: fix to update dir's i_pino during cross_rename As Eric reported: RENAME_EXCHANGE support was just added to fsstress in xfstests: commit 65dfd40a97b6bbbd2a22538977bab355c5bc0f06 Author: kaixuxia Date: Thu Oct 31 14:41:48 2019 +0800 fsstress: add EXCHANGE renameat2 support This is causing xfstest generic/579 to fail due to fsck.f2fs reporting errors. I'm not sure what the problem is, but it still happens even with all the fs-verity stuff in the test commented out, so that the test just runs fsstress. generic/579 23s ... [10:02:25] [ 7.745370] run fstests generic/579 at 2019-11-04 10:02:25 _check_generic_filesystem: filesystem on /dev/vdc is inconsistent (see /results/f2fs/results-default/generic/579.full for details) [10:02:47] Ran: generic/579 Failures: generic/579 Failed 1 of 1 tests Xunit report: /results/f2fs/results-default/result.xml Here's the contents of 579.full: _check_generic_filesystem: filesystem on /dev/vdc is inconsistent *** fsck.f2fs output *** [ASSERT] (__chk_dots_dentries:1378) --> Bad inode number[0x24] for '..', parent parent ino is [0xd10] The root cause is that we forgot to update directory's i_pino during cross_rename, fix it. Fixes: 32f9bc25cbda0 ("f2fs: support ->rename2()") Signed-off-by: Chao Yu Tested-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e6fd53edc747..452394990121 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -985,7 +985,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else - F2FS_I(old_inode)->i_pino = new_dir->i_ino; + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -1145,7 +1146,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_dir, old_entry, old_page, new_inode); down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry) + file_lost_pino(old_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); @@ -1160,7 +1165,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(new_dir, new_entry, new_page, old_inode); down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); + if (!new_dir_entry) + file_lost_pino(new_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(new_inode, old_dir->i_ino); up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); From 7c4b00dcb81facc479c4508bd4d5b4b63520dd95 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 17:29:00 +0800 Subject: [PATCH 1156/3715] f2fs: fix potential overflow We expect 64-bit calculation result from below statement, however in 32-bit machine, looped left shift operation on pgoff_t type variable may cause overflow issue, fix it by forcing type cast. page->index << PAGE_SHIFT; Fixes: 26de9b117130 ("f2fs: avoid unnecessary updating inode during fsync") Fixes: 0a2aa8fbb969 ("f2fs: refactor __exchange_data_block for speed up") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce16c5bcae4d..2188fadcfeec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2202,7 +2202,7 @@ static int __write_data_page(struct page *page, bool *submitted, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; - loff_t psize = (page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4b0fb77e54e5..57dce0bcd811 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1135,7 +1135,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } dn.ofs_in_node++; i++; - new_size = (dst + i) << PAGE_SHIFT; + new_size = (loff_t)(dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); From f8db0be12044a13acdc87eeaf3ef788696c7a5fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 1 Nov 2019 17:53:23 +0800 Subject: [PATCH 1157/3715] f2fs: show f2fs instance in printk_ratelimited As Eric mentioned, bare printk{,_ratelimited} won't show which filesystem instance these message is coming from, this patch tries to show fs instance with sb->s_id field in all places we missed before. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 8 +++++--- fs/f2fs/dir.c | 7 ++++--- fs/f2fs/f2fs.h | 24 +++++++++++++----------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 9 +++++---- 9 files changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a28eac29234d..3299ee97f220 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -581,7 +581,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(FAULT_ORPHAN); + f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2188fadcfeec..1546afc1f407 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -168,8 +168,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { - f2fs_show_injection_info(FAULT_READ_IO); + struct f2fs_sb_info *sbi = F2FS_P_SB(bio->bi_io_vec->bv_page); + + if (time_to_inject(sbi, FAULT_READ_IO)) { + f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } @@ -191,7 +193,7 @@ static void f2fs_write_end_io(struct bio *bio) int i; if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(FAULT_WRITE_IO); + f2fs_show_injection_info(sbi, FAULT_WRITE_IO); bio->bi_status = BLK_STS_IOERR; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9440d59b109d..dbb9f5a4d7a6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -628,7 +628,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { - f2fs_show_injection_info(FAULT_DIR_DEPTH); + f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH); return -ENOSPC; } @@ -919,8 +919,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos++; ctx->pos = start_pos + bit_pos; printk_ratelimited( - "%s, invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, le32_to_cpu(de->ino)); + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9eb50d83229c..5c6a59e34185 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1372,9 +1372,10 @@ struct f2fs_private_dio { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(sbi, type) \ + printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ + KERN_INFO, sbi->sb->s_id, \ + f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1394,7 +1395,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) return false; } #else -#define f2fs_show_injection_info(type) do { } while (0) +#define f2fs_show_injection_info(sbi, type) do { } while (0) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { return false; @@ -1780,7 +1781,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, return ret; if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); release = *count; goto release_quota; } @@ -2032,7 +2033,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, } if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); goto enospc; } @@ -2147,7 +2148,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return page; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); + f2fs_show_injection_info(F2FS_M_SB(mapping), + FAULT_PAGE_ALLOC); return NULL; } } @@ -2162,7 +2164,7 @@ static inline struct page *f2fs_pagecache_get_page( int fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { - f2fs_show_injection_info(FAULT_PAGE_GET); + f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET); return NULL; } @@ -2231,7 +2233,7 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio; } if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(FAULT_ALLOC_BIO); + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; } @@ -2798,7 +2800,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, void *ret; if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } @@ -2819,7 +2821,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KVMALLOC)) { - f2fs_show_injection_info(FAULT_KVMALLOC); + f2fs_show_injection_info(sbi, FAULT_KVMALLOC); return NULL; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 57dce0bcd811..4e518dbd36f2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -680,7 +680,7 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { - f2fs_show_injection_info(FAULT_TRUNCATE); + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE); return -EIO; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 27f19b19deba..75ad9958145f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -54,7 +54,7 @@ static int gc_thread_func(void *data) } if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 386ad54c13c3..502bd491336a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -681,7 +681,7 @@ retry: err = f2fs_truncate(inode); if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); + f2fs_show_injection_info(sbi, FAULT_EVICT_INODE); err = -EIO; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e2939dabfc9f..494947d282d5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2398,7 +2398,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) { - f2fs_show_injection_info(FAULT_ALLOC_NID); + f2fs_show_injection_info(sbi, FAULT_ALLOC_NID); return false; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8bb37f8a1845..43daa9431160 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -480,7 +480,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -1008,8 +1008,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) printk_ratelimited( - "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); + "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, sbi->sb->s_id, + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1149,7 +1150,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { - f2fs_show_injection_info(FAULT_DISCARD); + f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; goto submit; } From bcee82097f2997f236485e7dd787840bcb18f5fc Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 13 Nov 2019 16:01:03 +0530 Subject: [PATCH 1158/3715] f2fs: Fix deadlock in f2fs_gc() context during atomic files handling The FS got stuck in the below stack when the storage is almost full/dirty condition (when FG_GC is being done). schedule_timeout io_schedule_timeout congestion_wait f2fs_drop_inmem_pages_all f2fs_gc f2fs_balance_fs __write_node_page f2fs_fsync_node_pages f2fs_do_sync_file f2fs_ioctl The root cause for this issue is there is a potential infinite loop in f2fs_drop_inmem_pages_all() for the case where gc_failure is true and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is not set. Fix this by keeping track of the total atomic files currently opened and using that to exit from this condition. Fix-suggested-by: Chao Yu Signed-off-by: Chao Yu Signed-off-by: Sahitya Tummala Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + fs/f2fs/segment.c | 21 +++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5c6a59e34185..3a17f3ba954d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1295,6 +1295,7 @@ struct f2fs_sb_info { unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4e518dbd36f2..70292386ed85 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1918,6 +1918,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(&fi->inmem_ilist)) list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + sbi->atomic_files++; spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); /* add inode in inmem_list first and set atomic_file */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43daa9431160..fa32ce92ed65 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; struct f2fs_inode_info *fi; + unsigned int count = sbi->atomic_files; + unsigned int looped = 0; next: spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(head)) { @@ -296,22 +298,26 @@ next: } fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); inode = igrab(&fi->vfs_inode); + if (inode) + list_move_tail(&fi->inmem_ilist, head); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { if (gc_failure) { - if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) - goto drop; - goto skip; + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto skip; } -drop: set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); f2fs_drop_inmem_pages(inode); +skip: iput(inode); } -skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); + if (gc_failure) { + if (++looped >= count) + return; + } goto next; } @@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); } - clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; stat_dec_atomic_write(inode); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } From c70bcddf9478ebb362f2ee8bb377f7e06c90ee35 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 25 Nov 2019 11:20:36 +0800 Subject: [PATCH 1159/3715] f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project() Setting softlimit larger than hardlimit seems meaningless for disk quota but currently it is allowed. In this case, there may be a bit of comfusion for users when they run df comamnd to directory which has project quota. For example, we set 20M softlimit and 10M hardlimit of block usage limit for project quota of test_dir(project id 123). [root@hades f2fs]# repquota -P -a *** Report for project quotas on device /dev/nvme0n1p8 Block grace time: 7days; Inode grace time: 7days Block limits File limits Project used soft hard grace used soft hard grace ---------------------------------------------------------------------- 0 -- 4 0 0 1 0 0 123 +- 10248 20480 10240 2 0 0 The result of df command as below: [root@hades f2fs]# df -h /mnt/f2fs/test Filesystem Size Used Avail Use% Mounted on /dev/nvme0n1p8 20M 11M 10M 51% /mnt/f2fs Even though it looks like there is another 10M free space to use, if we write new data to diretory test(inherit project id), the write will fail with errno(-EDQUOT). After this patch, the df result looks like below. [root@hades f2fs]# df -h /mnt/f2fs/test Filesystem Size Used Avail Use% Mounted on /dev/nvme0n1p8 10M 10M 0 100% /mnt/f2fs Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 61483b6d439c..47898dc931ef 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1221,9 +1221,13 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = 0; + if (dquot->dq_dqb.dqb_bsoftlimit) + limit = dquot->dq_dqb.dqb_bsoftlimit; + if (dquot->dq_dqb.dqb_bhardlimit && + (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) + limit = dquot->dq_dqb.dqb_bhardlimit; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -1232,9 +1236,13 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = 0; + if (dquot->dq_dqb.dqb_isoftlimit) + limit = dquot->dq_dqb.dqb_isoftlimit; + if (dquot->dq_dqb.dqb_ihardlimit && + (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) + limit = dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = From 9b45d1eeaeff1795bf6c6eb03620ae9d65d7afd9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 22 Nov 2019 11:53:10 -0800 Subject: [PATCH 1160/3715] f2fs: expose main_blkaddr in sysfs Expose in /sys/fs/f2fs//main_blkaddr the block address where the main area starts. This allows user mode programs to determine: - That pinned files that are made exclusively of fully allocated 2MB segments will never be unpinned by the file system. - Where the main area starts. This is required by programs that want to verify if a file is made exclusively of 2MB f2fs segments, the alignment boundary for segments starts at this address. Testing for 2MB alignment relative to the start of the device is incorrect, because for some filesystems main_blkaddr is not at a 2MB boundary relative to the start of the device. The entry will be used when validating reliable pinning file feature proposed by "f2fs: support aligned pinned file". Signed-off-by: Ramon Pantin Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ Documentation/filesystems/f2fs.txt | 3 +++ fs/f2fs/sysfs.c | 2 ++ 3 files changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 12edef14db63..0cd608e3ed12 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -31,6 +31,12 @@ Contact: "Jaegeuk Kim" Description: Controls the issue rate of segment discard commands. +What: /sys/fs/f2fs//max_blkaddr +Date: November 2019 +Contact: "Ramon Pantin" +Description: + Shows first block address of MAIN area. + What: /sys/fs/f2fs//ipu_policy Date: November 2013 Contact: "Jaegeuk Kim" diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b6b73d8b0918..dc2cecdaa7dd 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -297,6 +297,9 @@ Files in /sys/fs/f2fs/ reclaim the prefree segments to free segments. By default, 5% over total # of segments. + main_blkaddr This value gives the first block address of + MAIN area in the partition. + max_small_discards This parameter controls the number of discard commands that consist small blocks less than 2MB. The candidates to be discarded are cached until diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index aeb9d6a998ac..9ed632b9c9bb 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -445,6 +445,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -512,6 +513,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), + ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), From 2fe7325a0e44885c5d2ff4d17c0ad3a0d1834a09 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 22 Nov 2019 12:02:06 -0800 Subject: [PATCH 1161/3715] f2fs: stop GC when the victim becomes fully valid We must stop GC, once the segment becomes fully valid. Otherwise, it can produce another dirty segments by moving valid blocks in the segment partially. Ramon hit no free segment panic sometimes and saw this case happens when validating reliable file pinning feature. Signed-off-by: Ramon Pantin Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 75ad9958145f..bd94068647ff 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1012,8 +1012,14 @@ next_step: block_t start_bidx; nid_t nid = le32_to_cpu(entry->nid); - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) + /* + * stop BG_GC if there is not enough free sections. + * Or, stop GC if the segment becomes fully valid caused by + * race condition along with SSR block allocation. + */ + if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || + get_valid_blocks(sbi, segno, false) == + sbi->blocks_per_seg) return submitted; if (check_valid_map(sbi, segno, off) == 0) From 90bb36e922d5c3b25db02471b8f43f7fc43f4986 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 4 Nov 2019 11:12:38 -0700 Subject: [PATCH 1162/3715] BACKPORT: coresight: etm4x: Save/restore state across CPU low power states Backport: context code is different from upstream. Some hardware will ignore bit TRCPDCR.PU which is used to signal to hardware that power should not be removed from the trace unit. Let's mitigate against this by conditionally saving and restoring the trace unit state when the CPU enters low power states. This patchset introduces a firmware property named 'arm,coresight-loses-context-with-cpu' - when this is present the hardware state will be conditionally saved and restored. A module parameter 'pm_save_enable' is also introduced which can be configured to override the firmware property. This can be set to never allow save/restore or to conditionally allow it (only for self-hosted). The default value is determined by firmware. We avoid saving the hardware state when self-hosted coresight isn't in use to reduce PM latency - we can't determine this by reading the claim tags (TRCCLAIMCLR) as these are 'trace' registers which need power and clocking, something we can't easily provide in the PM context. Therefore we rely on the existing drvdata->mode internal state that is set when self-hosted coresight is used (and powered). Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20191104181251.26732-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit f188b5e76aae9f713c73708d2ba57b65953ce207). Bug: 140266694 Change-Id: I9fec6b7881200c3bf46db5522ed57dafd985fd1d Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x.c | 318 ++++++++++++++++++ drivers/hwtracing/coresight/coresight-etm4x.h | 64 ++++ drivers/hwtracing/coresight/coresight.c | 6 + include/linux/coresight.h | 5 + 4 files changed, 393 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 564bbee60cdc..9e550558ba6d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +45,15 @@ static int boot_enable; module_param_named(boot_enable, boot_enable, int, S_IRUGO); +#define PARAM_PM_SAVE_FIRMWARE 0 /* save self-hosted state as per firmware */ +#define PARAM_PM_SAVE_NEVER 1 /* never save any state */ +#define PARAM_PM_SAVE_SELF_HOSTED 2 /* save self-hosted state only */ + +static int pm_save_enable = PARAM_PM_SAVE_FIRMWARE; +module_param(pm_save_enable, int, 0444); +MODULE_PARM_DESC(pm_save_enable, + "Save/restore state on power down: 1 = never, 2 = self-hosted"); + /* The number of ETMv4 currently registered */ static int etm4_count; static struct etmv4_drvdata *etmdrvdata[NR_CPUS]; @@ -60,6 +71,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata) isb(); } +static void etm4_os_lock(struct etmv4_drvdata *drvdata) +{ + /* Writing 0x1 to TRCOSLAR locks the trace registers */ + writel_relaxed(0x1, drvdata->base + TRCOSLAR); + drvdata->os_unlock = false; + isb(); +} + static bool etm4_arch_supported(u8 arch) { /* Mask out the minor version number */ @@ -1090,6 +1109,288 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } +#ifdef CONFIG_CPU_PM +static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +{ + int i, ret = 0; + struct etmv4_save_state *state; + struct device *etm_dev = &drvdata->csdev->dev; + + /* + * As recommended by 3.4.1 ("The procedure when powering down the PE") + * of ARM IHI 0064D + */ + dsb(sy); + isb(); + + CS_UNLOCK(drvdata->base); + + /* Lock the OS lock to disable trace and external debugger access */ + etm4_os_lock(drvdata); + + /* wait for TRCSTATR.PMSTABLE to go up */ + if (coresight_timeout(drvdata->base, TRCSTATR, + TRCSTATR_PMSTABLE_BIT, 1)) { + dev_err(etm_dev, + "timeout while waiting for PM Stable Status\n"); + etm4_os_unlock(drvdata); + ret = -EBUSY; + goto out; + } + + state = drvdata->save_state; + + state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR); + state->trcprocselr = readl(drvdata->base + TRCPROCSELR); + state->trcconfigr = readl(drvdata->base + TRCCONFIGR); + state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR); + state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R); + state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R); + state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR); + state->trctsctlr = readl(drvdata->base + TRCTSCTLR); + state->trcsyncpr = readl(drvdata->base + TRCSYNCPR); + state->trcccctlr = readl(drvdata->base + TRCCCCTLR); + state->trcbbctlr = readl(drvdata->base + TRCBBCTLR); + state->trctraceidr = readl(drvdata->base + TRCTRACEIDR); + state->trcqctlr = readl(drvdata->base + TRCQCTLR); + + state->trcvictlr = readl(drvdata->base + TRCVICTLR); + state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR); + state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR); + state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR); + state->trcvdctlr = readl(drvdata->base + TRCVDCTLR); + state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR); + state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR); + + for (i = 0; i < drvdata->nrseqstate; i++) + state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i)); + + state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR); + state->trcseqstr = readl(drvdata->base + TRCSEQSTR); + state->trcextinselr = readl(drvdata->base + TRCEXTINSELR); + + for (i = 0; i < drvdata->nr_cntr; i++) { + state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i)); + state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i)); + state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i)); + } + + for (i = 0; i < drvdata->nr_resource * 2; i++) + state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i)); + + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i)); + state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i)); + state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i)); + } + + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i)); + state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i)); + } + + /* + * Data trace stream is architecturally prohibited for A profile cores + * so we don't save (or later restore) trcdvcvr and trcdvcmr - As per + * section 1.3.4 ("Possible functional configurations of an ETMv4 trace + * unit") of ARM IHI 0064D. + */ + + for (i = 0; i < drvdata->numcidc; i++) + state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i)); + + for (i = 0; i < drvdata->numvmidc; i++) + state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i)); + + state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0); + state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); + + state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0); + state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1); + + state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); + + state->trcpdcr = readl(drvdata->base + TRCPDCR); + + /* wait for TRCSTATR.IDLE to go up */ + if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) { + dev_err(etm_dev, + "timeout while waiting for Idle Trace Status\n"); + etm4_os_unlock(drvdata); + ret = -EBUSY; + goto out; + } + + drvdata->state_needs_restore = true; + + /* + * Power can be removed from the trace unit now. We do this to + * potentially save power on systems that respect the TRCPDCR_PU + * despite requesting software to save/restore state. + */ + writel_relaxed((state->trcpdcr & ~TRCPDCR_PU), + drvdata->base + TRCPDCR); + +out: + CS_LOCK(drvdata->base); + return ret; +} + +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +{ + int i; + struct etmv4_save_state *state = drvdata->save_state; + + CS_UNLOCK(drvdata->base); + + writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); + + writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR); + writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR); + writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR); + writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR); + writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R); + writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R); + writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR); + writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR); + writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR); + writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR); + writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR); + writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR); + writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR); + + writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR); + writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR); + writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR); + writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR); + writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR); + writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR); + writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR); + + for (i = 0; i < drvdata->nrseqstate; i++) + writel_relaxed(state->trcseqevr[i], + drvdata->base + TRCSEQEVRn(i)); + + writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR); + writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR); + writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR); + + for (i = 0; i < drvdata->nr_cntr; i++) { + writel_relaxed(state->trccntrldvr[i], + drvdata->base + TRCCNTRLDVRn(i)); + writel_relaxed(state->trccntctlr[i], + drvdata->base + TRCCNTCTLRn(i)); + writel_relaxed(state->trccntvr[i], + drvdata->base + TRCCNTVRn(i)); + } + + for (i = 0; i < drvdata->nr_resource * 2; i++) + writel_relaxed(state->trcrsctlr[i], + drvdata->base + TRCRSCTLRn(i)); + + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + writel_relaxed(state->trcssccr[i], + drvdata->base + TRCSSCCRn(i)); + writel_relaxed(state->trcsscsr[i], + drvdata->base + TRCSSCSRn(i)); + writel_relaxed(state->trcsspcicr[i], + drvdata->base + TRCSSPCICRn(i)); + } + + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + writel_relaxed(state->trcacvr[i], + drvdata->base + TRCACVRn(i)); + writel_relaxed(state->trcacatr[i], + drvdata->base + TRCACATRn(i)); + } + + for (i = 0; i < drvdata->numcidc; i++) + writel_relaxed(state->trccidcvr[i], + drvdata->base + TRCCIDCVRn(i)); + + for (i = 0; i < drvdata->numvmidc; i++) + writel_relaxed(state->trcvmidcvr[i], + drvdata->base + TRCVMIDCVRn(i)); + + writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0); + writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); + + writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0); + writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1); + + writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); + + writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR); + + drvdata->state_needs_restore = false; + + /* + * As recommended by section 4.3.7 ("Synchronization when using the + * memory-mapped interface") of ARM IHI 0064D + */ + dsb(sy); + isb(); + + /* Unlock the OS lock to re-enable trace and external debug access */ + etm4_os_unlock(drvdata); + CS_LOCK(drvdata->base); +} + +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, + void *v) +{ + struct etmv4_drvdata *drvdata; + unsigned int cpu = smp_processor_id(); + + if (!etmdrvdata[cpu]) + return NOTIFY_OK; + + drvdata = etmdrvdata[cpu]; + + if (!drvdata->save_state) + return NOTIFY_OK; + + if (WARN_ON_ONCE(drvdata->cpu != cpu)) + return NOTIFY_BAD; + + switch (cmd) { + case CPU_PM_ENTER: + /* save the state if self-hosted coresight is in use */ + if (local_read(&drvdata->mode)) + if (etm4_cpu_save(drvdata)) + return NOTIFY_BAD; + break; + case CPU_PM_EXIT: + /* fallthrough */ + case CPU_PM_ENTER_FAILED: + if (drvdata->state_needs_restore) + etm4_cpu_restore(drvdata); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block etm4_cpu_pm_nb = { + .notifier_call = etm4_cpu_pm_notify, +}; + +static int etm4_cpu_pm_register(void) +{ + return cpu_pm_register_notifier(&etm4_cpu_pm_nb); +} + +static void etm4_cpu_pm_unregister(void) +{ + cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); +} +#else +static int etm4_cpu_pm_register(void) { return 0; } +static void etm4_cpu_pm_unregister(void) { } +#endif + static int etm4_probe(struct amba_device *adev, const struct amba_id *id) { int ret; @@ -1115,6 +1416,17 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) drvdata->dev = &adev->dev; dev_set_drvdata(dev, drvdata); + if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) + pm_save_enable = coresight_loses_context_with_cpu(dev) ? + PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER; + + if (pm_save_enable != PARAM_PM_SAVE_NEVER) { + drvdata->save_state = devm_kmalloc(dev, + sizeof(struct etmv4_save_state), GFP_KERNEL); + if (!drvdata->save_state) + return -ENOMEM; + } + /* Validity for the resource is already checked by the AMBA core */ base = devm_ioremap_resource(dev, res); if (IS_ERR(base)) @@ -1143,6 +1455,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) if (ret < 0) goto err_arch_supported; hp_online = ret; + + ret = etm4_cpu_pm_register(); + if (ret) + goto err_arch_supported; } cpus_read_unlock(); @@ -1186,6 +1502,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) err_arch_supported: if (--etm4_count == 0) { + etm4_cpu_pm_unregister(); + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING); if (hp_online) cpuhp_remove_state_nocalls(hp_online); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index b3b5ea7b7fb3..397fd40556b0 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -182,6 +182,7 @@ ETM_MODE_EXCL_USER) #define TRCSTATR_IDLE_BIT 0 +#define TRCSTATR_PMSTABLE_BIT 1 #define ETM_DEFAULT_ADDR_COMP 0 /* PowerDown Control Register bits */ @@ -291,6 +292,65 @@ struct etmv4_config { u32 ext_inp; }; +/** + * struct etm4_save_state - state to be preserved when ETM is without power + */ +struct etmv4_save_state { + u32 trcprgctlr; + u32 trcprocselr; + u32 trcconfigr; + u32 trcauxctlr; + u32 trceventctl0r; + u32 trceventctl1r; + u32 trcstallctlr; + u32 trctsctlr; + u32 trcsyncpr; + u32 trcccctlr; + u32 trcbbctlr; + u32 trctraceidr; + u32 trcqctlr; + + u32 trcvictlr; + u32 trcviiectlr; + u32 trcvissctlr; + u32 trcvipcssctlr; + u32 trcvdctlr; + u32 trcvdsacctlr; + u32 trcvdarcctlr; + + u32 trcseqevr[ETM_MAX_SEQ_STATES]; + u32 trcseqrstevr; + u32 trcseqstr; + u32 trcextinselr; + u32 trccntrldvr[ETMv4_MAX_CNTR]; + u32 trccntctlr[ETMv4_MAX_CNTR]; + u32 trccntvr[ETMv4_MAX_CNTR]; + + u32 trcrsctlr[ETM_MAX_RES_SEL * 2]; + + u32 trcssccr[ETM_MAX_SS_CMP]; + u32 trcsscsr[ETM_MAX_SS_CMP]; + u32 trcsspcicr[ETM_MAX_SS_CMP]; + + u64 trcacvr[ETM_MAX_SINGLE_ADDR_CMP]; + u64 trcacatr[ETM_MAX_SINGLE_ADDR_CMP]; + u64 trccidcvr[ETMv4_MAX_CTXID_CMP]; + u32 trcvmidcvr[ETM_MAX_VMID_CMP]; + u32 trccidcctlr0; + u32 trccidcctlr1; + u32 trcvmidcctlr0; + u32 trcvmidcctlr1; + + u32 trcclaimset; + + u32 cntr_val[ETMv4_MAX_CNTR]; + u32 seq_state; + u32 vinst_ctrl; + u32 ss_status[ETM_MAX_SS_CMP]; + + u32 trcpdcr; +}; + /** * struct etm4_drvdata - specifics associated to an ETM component * @base: Memory mapped base address for this component. @@ -347,6 +407,8 @@ struct etmv4_config { * @atbtrig: If the implementation can support ATB triggers * @lpoverride: If the implementation can support low-power state over. * @config: structure holding configuration parameters. + * @save_state: State to be preserved across power loss + * @state_needs_restore: True when there is context to restore after PM exit */ struct etmv4_drvdata { void __iomem *base; @@ -393,6 +455,8 @@ struct etmv4_drvdata { bool atbtrig; bool lpoverride; struct etmv4_config config; + struct etmv4_save_state *save_state; + bool state_needs_restore; }; /* Address comparator access types */ diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 8381e726c29c..65ea87e51c46 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -1275,3 +1275,9 @@ void coresight_unregister(struct coresight_device *csdev) device_unregister(&csdev->dev); } EXPORT_SYMBOL_GPL(coresight_unregister); + +bool coresight_loses_context_with_cpu(struct device *dev) +{ + return fwnode_property_present(dev_fwnode(dev), + "arm,coresight-loses-context-with-cpu"); +} diff --git a/include/linux/coresight.h b/include/linux/coresight.h index b8c51f606121..d38d1ac45e13 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -282,6 +282,7 @@ extern int coresight_claim_device_unlocked(void __iomem *base); extern void coresight_disclaim_device(void __iomem *base); extern void coresight_disclaim_device_unlocked(void __iomem *base); +extern bool coresight_loses_context_with_cpu(struct device *dev); #else static inline struct coresight_device * coresight_register(struct coresight_desc *desc) { return NULL; } @@ -304,6 +305,10 @@ static inline int coresight_claim_device(void __iomem *base) static inline void coresight_disclaim_device(void __iomem *base) {} static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} +static inline bool coresight_loses_context_with_cpu(struct device *dev) +{ + return false; +} #endif #ifdef CONFIG_OF From b7f8d9ba4f3eb4294d4ba2a87499cbeb75612645 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 4 Nov 2019 11:12:39 -0700 Subject: [PATCH 1163/3715] UPSTREAM: dt-bindings: arm: coresight: Add support for coresight-loses-context-with-cpu Some coresight components, because of choices made during hardware integration, require their state to be saved and restored across CPU low power states. The software has no reliable method of detecting when save/restore is required thus let's add a binding to inform the kernel. Signed-off-by: Andrew Murray Reviewed-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Reviewed-by: Rob Herring Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20191104181251.26732-3-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (Upstream commit b7909065e8ebaffd80375d01f5d151ff59b59ce6). Bug: 140266694 Change-Id: If246eaf51410a623e3f00d883069f10ad030f6a1 Signed-off-by: Yabin Cui --- Documentation/devicetree/bindings/arm/coresight.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 9aa30a156b57..63173479e8c3 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -73,6 +73,15 @@ its hardware characteristcs. * port or ports: same as above. +* Optional properties for all components: + + * arm,coresight-loses-context-with-cpu : boolean. Indicates that the + hardware will lose register context on CPU power down (e.g. CPUIdle). + An example of where this may be needed are systems which contain a + coresight component and CPU in the same power domain. When the CPU + powers down the coresight component also powers down and loses its + context. This property is currently only used for the ETM 4.x driver. + * Optional properties for ETM/PTMs: * arm,cp14: must be present if the system accesses ETM/PTM management From 77fdfb139c039dd3244e7983df62f750089b6742 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 8 Jun 2018 17:27:11 +0300 Subject: [PATCH 1164/3715] fs/lock: skip lock owner pid translation in case we are in init_pid_ns If the flock owner process is dead and its pid has been already freed, pid translation won't work, but we still want to show flock owner pid number when expecting /proc/$PID/fdinfo/$FD in init pidns. Reproducer: process A process A1 process A2 fork()---------> exit() open() flock() fork()---------> exit() sleep() Before the patch: ================ (root@vz7)/: cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 257 lock: (root@vz7)/: After the patch: =============== (root@vz7)/:cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 295 lock: 1: FLOCK ADVISORY WRITE ${PID_A1} b6:f8a61:529946 0 EOF Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Konstantin Khorenko Acked-by: Andrey Vagin Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton --- fs/locks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 1bd71c4d663a..665e3ce9ab47 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2074,6 +2074,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns) return -1; if (IS_REMOTELCK(fl)) return fl->fl_pid; + /* + * If the flock owner process is dead and its pid has been already + * freed, the translation below won't work, but we still want to show + * flock owner pid number in init pidns. + */ + if (ns == &init_pid_ns) + return (pid_t)fl->fl_pid; rcu_read_lock(); pid = find_pid_ns(fl->fl_pid, &init_pid_ns); From 4f4788e49910473a1be6bb14aaa01915d895377d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Dec 2019 08:25:45 +0100 Subject: [PATCH 1165/3715] Revert "KVM: nVMX: reset cache/shadows when switching loaded VMCS" This reverts commit 9f0b41be6aff47c24c6431bdc76f86b9cd647a0d which is commit b7031fd40fcc741b0f9b0c04c8d844e445858b84 upstream. It should not have been selected for a stable kernel as it breaks the nVMX regression tests. Reported-by: Jack Wang Reported-by: Paolo Bonzini Cc: Jim Mattson Cc: Sean Christopherson Cc: Jim Mattson Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f67fc0f359ff..790b217fef9f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10000,10 +10000,6 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; put_cpu(); - - vm_entry_controls_reset_shadow(vmx); - vm_exit_controls_reset_shadow(vmx); - vmx_segment_cache_clear(vmx); } /* @@ -11432,6 +11428,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); + vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { leave_guest_mode(vcpu); @@ -12175,6 +12172,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, } vmx_switch_vmcs(vcpu, &vmx->vmcs01); + vm_entry_controls_reset_shadow(vmx); + vm_exit_controls_reset_shadow(vmx); + vmx_segment_cache_clear(vmx); /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); From 05fb6527b1b2d91d38fd258e8670d6cd2f9f163b Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 21 Sep 2019 17:04:11 +0200 Subject: [PATCH 1166/3715] clk: meson: gxbb: let sar_adc_clk_div set the parent clock rate [ Upstream commit 44b09b11b813b8550e6b976ea51593bc23bba8d1 ] The meson-saradc driver manually sets the input clock for sar_adc_clk_sel. Update the GXBB clock driver (which is used on GXBB, GXL and GXM) so the rate settings on sar_adc_clk_div are propagated up to sar_adc_clk_sel which will let the common clock framework select the best matching parent clock if we want that. This makes sar_adc_clk_div consistent with the axg-aoclk and g12a-aoclk drivers, which both also specify CLK_SET_RATE_PARENT. Fixes: 33d0fcdfe0e870 ("clk: gxbb: add the SAR ADC clocks and expose them") Signed-off-by: Martin Blumenstingl Signed-off-by: Jerome Brunet Signed-off-by: Sasha Levin --- drivers/clk/meson/gxbb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 92168348ffa6..f2d27addf485 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -687,6 +687,7 @@ static struct clk_divider gxbb_sar_adc_clk_div = { .ops = &clk_divider_ops, .parent_names = (const char *[]){ "sar_adc_clk_sel" }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, }, }; From 1fce0adc143d76a81c414854dbaddc7252d894d1 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sun, 20 Oct 2019 17:30:06 +0200 Subject: [PATCH 1167/3715] ASoC: msm8916-wcd-analog: Fix RX1 selection in RDAC2 MUX [ Upstream commit 9110d1b0e229cebb1ffce0c04db2b22beffd513d ] According to the PM8916 Hardware Register Description, CDC_D_CDC_CONN_HPHR_DAC_CTL has only a single bit (RX_SEL) to switch between RX1 (0) and RX2 (1). It is not possible to disable it entirely to achieve the "ZERO" state. However, at the moment the "RDAC2 MUX" mixer defines three possible values ("ZERO", "RX2" and "RX1"). Setting the mixer to "ZERO" actually configures it to RX1. Setting the mixer to "RX1" has (seemingly) no effect. Remove "ZERO" and replace it with "RX1" to fix this. Fixes: 585e881e5b9e ("ASoC: codecs: Add msm8916-wcd analog codec") Signed-off-by: Stephan Gerhold Acked-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191020153007.206070-1-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/msm8916-wcd-analog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 0b9b014b4bb6..969283737787 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -303,7 +303,7 @@ struct pm8916_wcd_analog_priv { }; static const char *const adc2_mux_text[] = { "ZERO", "INP2", "INP3" }; -static const char *const rdac2_mux_text[] = { "ZERO", "RX2", "RX1" }; +static const char *const rdac2_mux_text[] = { "RX1", "RX2" }; static const char *const hph_text[] = { "ZERO", "Switch", }; static const struct soc_enum hph_enum = SOC_ENUM_SINGLE_VIRT( @@ -318,7 +318,7 @@ static const struct soc_enum adc2_enum = SOC_ENUM_SINGLE_VIRT( /* RDAC2 MUX */ static const struct soc_enum rdac2_mux_enum = SOC_ENUM_SINGLE( - CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 3, rdac2_mux_text); + CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 2, rdac2_mux_text); static const struct snd_kcontrol_new spkr_switch[] = { SOC_DAPM_SINGLE("Switch", CDC_A_SPKR_DAC_CTL, 7, 1, 0) From 7434723ba87267dc9547b09fb9ac584a98103b76 Mon Sep 17 00:00:00 2001 From: Xiaojun Sang Date: Mon, 21 Oct 2019 10:54:32 +0100 Subject: [PATCH 1168/3715] ASoC: compress: fix unsigned integer overflow check [ Upstream commit d3645b055399538415586ebaacaedebc1e5899b0 ] Parameter fragments and fragment_size are type of u32. U32_MAX is the correct check. Signed-off-by: Xiaojun Sang Signed-off-by: Srinivas Kandagatla Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20191021095432.5639-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 2e2d18468491..7ae8e24dc1e6 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -529,7 +529,7 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments > U32_MAX / params->buffer.fragment_size || params->buffer.fragments == 0) return -EINVAL; From 533cdfe9cc126a9e19b54a8958684cd1baa29500 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 22 Oct 2019 14:06:23 +0530 Subject: [PATCH 1169/3715] reset: Fix memory leak in reset_control_array_put() [ Upstream commit 532f9cd6ee994ed10403e856ca27501428048597 ] Memory allocated for 'struct reset_control_array' in of_reset_control_array_get() is never freed in reset_control_array_put() resulting in kmemleak showing the following backtrace. backtrace: [<00000000c5f17595>] __kmalloc+0x1b0/0x2b0 [<00000000bd499e13>] of_reset_control_array_get+0xa4/0x180 [<000000004cc02754>] 0xffff800008c669e4 [<0000000050a83b24>] platform_drv_probe+0x50/0xa0 [<00000000d3a0b0bc>] really_probe+0x108/0x348 [<000000005aa458ac>] driver_probe_device+0x58/0x100 [<000000008853626c>] device_driver_attach+0x6c/0x90 [<0000000085308d19>] __driver_attach+0x84/0xc8 [<00000000080d35f2>] bus_for_each_dev+0x74/0xc8 [<00000000dd7f015b>] driver_attach+0x20/0x28 [<00000000923ba6e6>] bus_add_driver+0x148/0x1f0 [<0000000061473b66>] driver_register+0x60/0x110 [<00000000c5bec167>] __platform_driver_register+0x40/0x48 [<000000007c764b4f>] 0xffff800008c6c020 [<0000000047ec2e8c>] do_one_initcall+0x5c/0x1b0 [<0000000093d4b50d>] do_init_module+0x54/0x1d0 Fixes: 17c82e206d2a ("reset: Add APIs to manage array of resets") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/reset/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 72b96b5c75a8..7e0a14211c88 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -513,6 +513,7 @@ static void reset_control_array_put(struct reset_control_array *resets) for (i = 0; i < resets->num_rstcs; i++) __reset_control_put_internal(resets->rstc[i]); mutex_unlock(&reset_list_mutex); + kfree(resets); } /** From d7c8540ceb631e1b8d054845c871c01f3c1175ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 23 Oct 2019 14:46:44 +0100 Subject: [PATCH 1170/3715] ASoC: kirkwood: fix external clock probe defer [ Upstream commit 4523817d51bc3b2ef38da768d004fda2c8bc41de ] When our call to get the external clock fails, we forget to clean up the enabled internal clock correctly. Enable the clock after we have obtained all our resources. Fixes: 84aac6c79bfd ("ASoC: kirkwood: fix loss of external clock at probe time") Signed-off-by: Russell King Link: https://lore.kernel.org/r/E1iNGyK-0004oF-6A@rmk-PC.armlinux.org.uk Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/kirkwood/kirkwood-i2s.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 105a73cc5158..149b7cba10fb 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -569,10 +569,6 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - err = clk_prepare_enable(priv->clk); - if (err < 0) - return err; - priv->extclk = devm_clk_get(&pdev->dev, "extclk"); if (IS_ERR(priv->extclk)) { if (PTR_ERR(priv->extclk) == -EPROBE_DEFER) @@ -588,6 +584,10 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) } } + err = clk_prepare_enable(priv->clk); + if (err < 0) + return err; + /* Some sensible defaults - this reflects the powerup values */ priv->ctl_play = KIRKWOOD_PLAYCTL_SIZE_24; priv->ctl_rec = KIRKWOOD_RECCTL_SIZE_24; From a8284286df09a6f61a47dae2a73b375adf3ec03a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 25 Oct 2019 11:02:01 +0200 Subject: [PATCH 1171/3715] clk: samsung: exynos5420: Preserve PLL configuration during suspend/resume [ Upstream commit e9323b664ce29547d996195e8a6129a351c39108 ] Properly save and restore all top PLL related configuration registers during suspend/resume cycle. So far driver only handled EPLL and RPLL clocks, all other were reset to default values after suspend/resume cycle. This caused for example lower G3D (MALI Panfrost) performance after system resume, even if performance governor has been selected. Reported-by: Reported-by: Marian Mihailescu Fixes: 773424326b51 ("clk: samsung: exynos5420: add more registers to restore list") Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos5420.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index a882f7038bce..47a14f93f869 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -170,12 +170,18 @@ static const unsigned long exynos5x_clk_regs[] __initconst = { GATE_BUS_CPU, GATE_SCLK_CPU, CLKOUT_CMU_CPU, + CPLL_CON0, + DPLL_CON0, EPLL_CON0, EPLL_CON1, EPLL_CON2, RPLL_CON0, RPLL_CON1, RPLL_CON2, + IPLL_CON0, + SPLL_CON0, + VPLL_CON0, + MPLL_CON0, SRC_TOP0, SRC_TOP1, SRC_TOP2, From 0b27f7fe373eddcbf66a8be94f5844b9b07b238b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Oct 2019 20:57:06 -0700 Subject: [PATCH 1172/3715] reset: fix reset_control_ops kerneldoc comment [ Upstream commit f430c7ed8bc22992ed528b518da465b060b9223f ] Add a missing short description to the reset_control_ops documentation. Signed-off-by: Randy Dunlap [p.zabel@pengutronix.de: rebased and updated commit message] Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- include/linux/reset-controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index adb88f8cefbc..576caaf0c9af 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -7,7 +7,7 @@ struct reset_controller_dev; /** - * struct reset_control_ops + * struct reset_control_ops - reset controller driver callbacks * * @reset: for self-deasserting resets, does all necessary * things to reset the device From ae3883adf0b73a9b0fec533ce023195ec5348dc2 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 20 Sep 2019 17:39:06 +0200 Subject: [PATCH 1173/3715] clk: at91: avoid sleeping early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 658fd65cf0b0d511de1718e48d9a28844c385ae0 ] It is not allowed to sleep to early in the boot process and this may lead to kernel issues if the bootloader didn't prepare the slow clock and main clock. This results in the following error and dump stack on the AriettaG25: bad: scheduling from the idle thread! Ensure it is possible to sleep, else simply have a delay. Reported-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Link: https://lkml.kernel.org/r/20190920153906.20887-1-alexandre.belloni@bootlin.com Fixes: 80eded6ce8bb ("clk: at91: add slow clks driver") Tested-by: Uwe Kleine-König Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/at91/clk-main.c | 5 ++++- drivers/clk/at91/sckc.c | 20 ++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index 2f97a843d6d6..fb5c14af8cc8 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -354,7 +354,10 @@ static int clk_main_probe_frequency(struct regmap *regmap) regmap_read(regmap, AT91_CKGR_MCFR, &mcfr); if (mcfr & AT91_PMC_MAINRDY) return 0; - usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); + if (system_state < SYSTEM_RUNNING) + udelay(MAINF_LOOP_MIN_WAIT); + else + usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); } while (time_before(prep_time, timeout)); return -ETIMEDOUT; diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c index ab6ecefc49ad..43ba2a8b03fa 100644 --- a/drivers/clk/at91/sckc.c +++ b/drivers/clk/at91/sckc.c @@ -74,7 +74,10 @@ static int clk_slow_osc_prepare(struct clk_hw *hw) writel(tmp | AT91_SCKC_OSC32EN, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -197,7 +200,10 @@ static int clk_slow_rc_osc_prepare(struct clk_hw *hw) writel(readl(sckcr) | AT91_SCKC_RCEN, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -310,7 +316,10 @@ static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index) writel(tmp, sckcr); - usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); + if (system_state < SYSTEM_RUNNING) + udelay(SLOWCK_SW_TIME_USEC); + else + usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); return 0; } @@ -443,7 +452,10 @@ static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw) return 0; } - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); osc->prepared = true; return 0; From 7086d6a65075d96c203a03d464bd439fd81160e5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Oct 2019 12:28:09 +0100 Subject: [PATCH 1174/3715] clk: sunxi-ng: a80: fix the zero'ing of bits 16 and 18 [ Upstream commit cdfc2e2086bf9c465f44e2db25561373b084a113 ] The zero'ing of bits 16 and 18 is incorrect. Currently the code is masking with the bitwise-and of BIT(16) & BIT(18) which is 0, so the updated value for val is always zero. Fix this by bitwise and-ing value with the correct mask that will zero bits 16 and 18. Addresses-Coverity: (" Suspicious &= or |= constant expression") Fixes: b8eb71dcdd08 ("clk: sunxi-ng: Add A80 CCU") Signed-off-by: Colin Ian King Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun9i-a80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c index 8936ef87652c..c14bf782b2b3 100644 --- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c +++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c @@ -1231,7 +1231,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev) /* Enforce d1 = 0, d2 = 0 for Audio PLL */ val = readl(reg + SUN9I_A80_PLL_AUDIO_REG); - val &= (BIT(16) & BIT(18)); + val &= ~(BIT(16) | BIT(18)); writel(val, reg + SUN9I_A80_PLL_AUDIO_REG); /* Enforce P = 1 for both CPU cluster PLLs */ From d0840f62f1a51df5853b3f22ffc5be91438304db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 2 Nov 2019 00:25:08 -0400 Subject: [PATCH 1175/3715] idr: Fix idr_alloc_u32 on 32-bit systems [ Upstream commit b7e9728f3d7fc5c5c8508d99f1675212af5cfd49 ] Attempting to allocate an entry at 0xffffffff when one is already present would succeed in allocating one at 2^32, which would confuse everything. Return -ENOSPC in this case, as expected. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Sasha Levin --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d172f0341b80..ff00c816266b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2184,7 +2184,7 @@ void __rcu **idr_get_free_cmn(struct radix_tree_root *root, offset = radix_tree_find_next_bit(node, IDR_FREE, offset + 1); start = next_index(start, node, offset); - if (start > max) + if (start > max || start == 0) return ERR_PTR(-ENOSPC); while (offset == RADIX_TREE_MAP_SIZE) { offset = node->offset + 1; From 222b0c56d2849a48b3ba57ff08c64f5591e72a74 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 29 Oct 2019 13:25:02 +0800 Subject: [PATCH 1176/3715] x86/resctrl: Prevent NULL pointer dereference when reading mondata [ Upstream commit 26467b0f8407cbd628fa5b7bcfd156e772004155 ] When a mon group is being deleted, rdtgrp->flags is set to RDT_DELETED in rdtgroup_rmdir_mon() firstly. The structure of rdtgrp will be freed until rdtgrp->waitcount is dropped to 0 in rdtgroup_kn_unlock() later. During the window of deleting a mon group, if an application calls rdtgroup_mondata_show() to read mondata under this mon group, 'rdtgrp' returned from rdtgroup_kn_lock_live() is a NULL pointer when rdtgrp->flags is RDT_DELETED. And then 'rdtgrp' is passed in this path: rdtgroup_mondata_show() --> mon_event_read() --> mon_event_count(). Thus it results in NULL pointer dereference in mon_event_count(). Check 'rdtgrp' in rdtgroup_mondata_show(), and return -ENOENT immediately when reading mondata during the window of deleting a mon group. Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Fenghua Yu Reviewed-by: Tony Luck Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: pei.p.jia@intel.com Cc: Reinette Chatre Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/1572326702-27577-1-git-send-email-xiaochen.shen@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index f6ea94f8954a..f892cb0b485e 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -313,6 +313,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } md.priv = of->kn->priv; resid = md.u.rid; From be5d20130b6572f8f36405beb27e8227eb9a4ae2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 2 Oct 2019 11:34:36 +0300 Subject: [PATCH 1177/3715] clk: ti: dra7-atl-clock: Remove ti_clk_add_alias call [ Upstream commit 9982b0f69b49931b652d35f86f519be2ccfc7027 ] ti_clk_register() calls it already so the driver should not create duplicated alias. Signed-off-by: Peter Ujfalusi Link: https://lkml.kernel.org/r/20191002083436.10194-1-peter.ujfalusi@ti.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clk-dra7-atl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index 148815470431..beb672a215b6 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -174,7 +174,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) struct clk_init_data init = { NULL }; const char **parent_names = NULL; struct clk *clk; - int ret; clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); if (!clk_hw) { @@ -207,11 +206,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) clk = ti_clk_register(NULL, &clk_hw->hw, node->name); if (!IS_ERR(clk)) { - ret = ti_clk_add_alias(NULL, clk, node->name); - if (ret) { - clk_unregister(clk); - goto cleanup; - } of_clk_add_provider(node, of_clk_src_simple_get, clk); kfree(parent_names); return; From 3c4879a381c5ca3d9a8a6f884b55f53d3c02a710 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 4 Nov 2019 23:50:00 +0800 Subject: [PATCH 1178/3715] net: fec: add missed clk_disable_unprepare in remove [ Upstream commit c43eab3eddb4c6742ac20138659a9b701822b274 ] This driver forgets to disable and unprepare clks when remove. Add calls to clk_disable_unprepare to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 0ae6532b02e0..0237221059bf 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3574,6 +3574,8 @@ fec_drv_remove(struct platform_device *pdev) regulator_disable(fep->reg_phy); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); From 6fc27a2497def71f459ee326c9b7580530b7a838 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Nov 2019 20:54:28 +0100 Subject: [PATCH 1179/3715] bridge: ebtables: don't crash when using dnat target in output chains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b23c0742c2ce7e33ed79d10e451f70fdb5ca85d1 ] xt_in() returns NULL in the output hook, skip the pkt_type change for that case, redirection only makes sense in broute/prerouting hooks. Reported-by: Tom Yan Cc: Linus Lüssing Fixes: cf3cb246e277d ("bridge: ebtables: fix reception of frames DNAT-ed to bridge device/port") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebt_dnat.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index dfc86a0199da..1d8c834d9018 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -19,7 +19,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -32,10 +31,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; From a07253754ad72d0d4cf64dfcbff66da7bf1cd74c Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Wed, 25 Sep 2019 08:58:45 +0000 Subject: [PATCH 1180/3715] can: peak_usb: report bus recovery as well [ Upstream commit 128a1b87d3ceb2ba449d5aadb222fe22395adeb0 ] While the state changes are reported when the error counters increase and decrease, there is no event when the bus recovers and the error counters decrease again. So add those as well. Change the state going downward to be ERROR_PASSIVE -> ERROR_WARNING -> ERROR_ACTIVE instead of directly to ERROR_ACTIVE again. Signed-off-by: Jeroen Hofstee Cc: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/peak_usb/pcan_usb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index e626c2afbbb1..0e1fc6c4360e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -441,8 +441,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + new_state = CAN_STATE_ERROR_ACTIVE; + break; } break; @@ -465,9 +465,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + /* no error (back to warning state) */ + new_state = CAN_STATE_ERROR_WARNING; + break; } break; @@ -506,6 +506,11 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, mc->pdev->dev.can.can_stats.error_warning++; break; + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + break; + default: /* CAN_STATE_MAX (trick to handle other errors) */ cf->can_id |= CAN_ERR_CRTL; From b448e73cd2cf2ec5bdab5ed8a322c000dc5fea8f Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 1 Oct 2019 21:01:20 +0000 Subject: [PATCH 1181/3715] can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open [ Upstream commit 23c5a9488f076bab336177cd1d1a366bd8ddf087 ] When the CAN interface is closed it the hardwre is put in power down mode, but does not reset the error counters / state. Reset the D_CAN on open, so the reported state and the actual state match. According to [1], the C_CAN module doesn't have the software reset. [1] http://www.bosch-semiconductors.com/media/ip_modules/pdf_2/c_can_fd8/users_manual_c_can_fd8_r210_1.pdf Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/c_can/c_can.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 9b61bfbea6cd..24c6015f6c92 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -52,6 +52,7 @@ #define CONTROL_EX_PDR BIT(8) /* control register */ +#define CONTROL_SWR BIT(15) #define CONTROL_TEST BIT(7) #define CONTROL_CCE BIT(6) #define CONTROL_DISABLE_AR BIT(5) @@ -572,6 +573,26 @@ static void c_can_configure_msg_objects(struct net_device *dev) IF_MCONT_RCV_EOB); } +static int c_can_software_reset(struct net_device *dev) +{ + struct c_can_priv *priv = netdev_priv(dev); + int retry = 0; + + if (priv->type != BOSCH_D_CAN) + return 0; + + priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_SWR | CONTROL_INIT); + while (priv->read_reg(priv, C_CAN_CTRL_REG) & CONTROL_SWR) { + msleep(20); + if (retry++ > 100) { + netdev_err(dev, "CCTRL: software reset failed\n"); + return -EIO; + } + } + + return 0; +} + /* * Configure C_CAN chip: * - enable/disable auto-retransmission @@ -581,6 +602,11 @@ static void c_can_configure_msg_objects(struct net_device *dev) static int c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + int err; + + err = c_can_software_reset(dev); + if (err) + return err; /* enable automatic retransmission */ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR); From 37d45825181d0481f7f1e9eb87e899f3e9826408 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: [PATCH 1182/3715] can: rx-offload: can_rx_offload_queue_tail(): fix error handling, avoid skb mem leak [ Upstream commit 6caf8a6d6586d44fd72f4aa1021d14aa82affafb ] If the rx-offload skb_queue is full can_rx_offload_queue_tail() will not queue the skb and return with an error. This patch frees the skb in case of a full queue, which brings can_rx_offload_queue_tail() in line with the can_rx_offload_queue_sorted() function, which has been adjusted in the previous patch. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the caller. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Reported-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 1a7c183e6678..b26987a13620 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -261,8 +261,10 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } skb_queue_tail(&offload->skb_queue, skb); can_rx_offload_schedule(offload); From b3cc7954719dfdc37d14bab5377d74de73630316 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 16:03:18 +0200 Subject: [PATCH 1183/3715] can: rx-offload: can_rx_offload_offload_one(): do not increase the skb_queue beyond skb_queue_len_max [ Upstream commit a2dc3f5e1022a5ede8af9ab89a144f1e69db8636 ] The skb_queue is a linked list, holding the skb to be processed in the next NAPI call. Without this patch, the queue length in can_rx_offload_offload_one() is limited to skb_queue_len_max + 1. As the skb_queue is a linked list, no array or other resources are accessed out-of-bound, however this behaviour is counterintuitive. This patch limits the rx-offload skb_queue length to skb_queue_len_max. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index b26987a13620..c0e51a2f8e4f 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -124,7 +124,7 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload int ret; /* If queue is full or skb not available, read to discard mailbox */ - if (likely(skb_queue_len(&offload->skb_queue) <= + if (likely(skb_queue_len(&offload->skb_queue) < offload->skb_queue_len_max)) skb = alloc_can_skb(offload->dev, &cf); From 8c7dd979e1f70beece01e1fbd6006e1a4a81e880 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:15:07 +0200 Subject: [PATCH 1184/3715] can: rx-offload: can_rx_offload_offload_one(): increment rx_fifo_errors on queue overflow or OOM [ Upstream commit 4e9016bee3bf0c24963097edace034ff205b565c ] If the rx-offload skb_queue is full or the skb allocation fails (due to OOM), the mailbox contents is discarded. This patch adds the incrementing of the rx_fifo_errors statistics counter. Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index c0e51a2f8e4f..b068e2320c79 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -134,8 +134,10 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) + if (ret) { offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + } return NULL; } From f0aad062e96ed750e3daaec68ca5685dcf70f90f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 21:00:32 +0200 Subject: [PATCH 1185/3715] can: rx-offload: can_rx_offload_offload_one(): use ERR_PTR() to propagate error value in case of errors [ Upstream commit d763ab3044f0bf50bd0e6179f6b2cf1c125d1d94 ] Before this patch can_rx_offload_offload_one() returns a pointer to a skb containing the read CAN frame or a NULL pointer. However the meaning of the NULL pointer is ambiguous, it can either mean the requested mailbox is empty or there was an error. This patch fixes this situation by returning: - pointer to skb on success - NULL pointer if mailbox is empty - ERR_PTR() in case of an error All users of can_rx_offload_offload_one() have been adopted, no functional change intended. Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 86 ++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index b068e2320c79..840aef094f20 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -116,39 +116,95 @@ static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b) return cb_b->timestamp - cb_a->timestamp; } -static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) +/** + * can_rx_offload_offload_one() - Read one CAN frame from HW + * @offload: pointer to rx_offload context + * @n: number of mailbox to read + * + * The task of this function is to read a CAN frame from mailbox @n + * from the device and return the mailbox's content as a struct + * sk_buff. + * + * If the struct can_rx_offload::skb_queue exceeds the maximal queue + * length (struct can_rx_offload::skb_queue_len_max) or no skb can be + * allocated, the mailbox contents is discarded by reading it into an + * overflow buffer. This way the mailbox is marked as free by the + * driver. + * + * Return: A pointer to skb containing the CAN frame on success. + * + * NULL if the mailbox @n is empty. + * + * ERR_PTR() in case of an error + */ +static struct sk_buff * +can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) { - struct sk_buff *skb = NULL; + struct sk_buff *skb = NULL, *skb_error = NULL; struct can_rx_offload_cb *cb; struct can_frame *cf; int ret; - /* If queue is full or skb not available, read to discard mailbox */ if (likely(skb_queue_len(&offload->skb_queue) < - offload->skb_queue_len_max)) + offload->skb_queue_len_max)) { skb = alloc_can_skb(offload->dev, &cf); + if (unlikely(!skb)) + skb_error = ERR_PTR(-ENOMEM); /* skb alloc failed */ + } else { + skb_error = ERR_PTR(-ENOBUFS); /* skb_queue is full */ + } - if (!skb) { + /* If queue is full or skb not available, drop by reading into + * overflow buffer. + */ + if (unlikely(skb_error)) { struct can_frame cf_overflow; u32 timestamp; ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) { - offload->dev->stats.rx_dropped++; - offload->dev->stats.rx_fifo_errors++; - } - return NULL; + /* Mailbox was empty. */ + if (unlikely(!ret)) + return NULL; + + /* Mailbox has been read and we're dropping it or + * there was a problem reading the mailbox. + * + * Increment error counters in any case. + */ + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + /* There was a problem reading the mailbox, propagate + * error value. + */ + if (unlikely(ret < 0)) + return ERR_PTR(ret); + + return skb_error; } cb = can_rx_offload_get_cb(skb); ret = offload->mailbox_read(offload, cf, &cb->timestamp, n); - if (!ret) { + + /* Mailbox was empty. */ + if (unlikely(!ret)) { kfree_skb(skb); return NULL; } + /* There was a problem reading the mailbox, propagate error value. */ + if (unlikely(ret < 0)) { + kfree_skb(skb); + + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + return ERR_PTR(ret); + } + + /* Mailbox was read. */ return skb; } @@ -168,7 +224,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen continue; skb = can_rx_offload_offload_one(offload, i); - if (!skb) + if (IS_ERR_OR_NULL(skb)) break; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); @@ -199,7 +255,11 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) struct sk_buff *skb; int received = 0; - while ((skb = can_rx_offload_offload_one(offload, 0))) { + while (1) { + skb = can_rx_offload_offload_one(offload, 0); + if (IS_ERR_OR_NULL(skb)) + break; + skb_queue_tail(&offload->skb_queue, skb); received++; } From 900a8c4842445e3627757e0f6139f2edbb203f53 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: [PATCH 1186/3715] can: rx-offload: can_rx_offload_irq_offload_timestamp(): continue on error [ Upstream commit c2a9f74c9d18acfdcabd3361adc7eac82c537a66 ] In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. However can_rx_offload_irq_offload_timestamp() bails out in the error case. In case of a resource shortage all mailboxes should be discarded, to avoid an IRQ storm and give the system some time to recover. Since can_rx_offload_irq_offload_timestamp() is typically called from a while loop, all message will eventually be discarded. So let's continue on error instead to discard them directly. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 840aef094f20..fef6534f163b 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -225,7 +225,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen skb = can_rx_offload_offload_one(offload, i); if (IS_ERR_OR_NULL(skb)) - break; + continue; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); } From de8d7ad94e35f6936732e6cb85bd6d0bb3b82e7b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: [PATCH 1187/3715] can: rx-offload: can_rx_offload_irq_offload_fifo(): continue on error [ Upstream commit 1f7f504dcd9d1262437bdcf4fa071e41dec1af03 ] In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. If the hardware FIFO is empty can_rx_offload_offload_one() will return NULL. In case a CAN frame was read from the hardware, can_rx_offload_offload_one() returns the skb containing it. Without this patch can_rx_offload_irq_offload_fifo() bails out if no skb returned, regardless of the reason. Similar to can_rx_offload_irq_offload_timestamp() in case of a resource shortage the whole FIFO should be discarded, to avoid an IRQ storm and give the system some time to recover. However if the FIFO is empty the loop can be left. With this patch the loop is left in case of empty FIFO, but not on errors. Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index fef6534f163b..54ffd1e91a69 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -257,7 +257,9 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) while (1) { skb = can_rx_offload_offload_one(offload, 0); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) + continue; + if (!skb) break; skb_queue_tail(&offload->skb_queue, skb); From 477daab60643cc5c7efcd5b1e1ea02d193135246 Mon Sep 17 00:00:00 2001 From: Xingyu Chen Date: Sun, 29 Sep 2019 18:53:49 +0800 Subject: [PATCH 1188/3715] watchdog: meson: Fix the wrong value of left time [ Upstream commit 2c77734642d52448aca673e889b39f981110828b ] The left time value is wrong when we get it by sysfs. The left time value should be equal to preset timeout value minus elapsed time value. According to the Meson-GXB/GXL datasheets which can be found at [0], the timeout value is saved to BIT[0-15] of the WATCHDOG_TCNT, and elapsed time value is saved to BIT[16-31] of the WATCHDOG_TCNT. [0]: http://linux-meson.com Fixes: 683fa50f0e18 ("watchdog: Add Meson GXBB Watchdog Driver") Signed-off-by: Xingyu Chen Acked-by: Neil Armstrong Reviewed-by: Kevin Hilman Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/meson_gxbb_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c index 69a5a57f1446..61297a6ab43a 100644 --- a/drivers/watchdog/meson_gxbb_wdt.c +++ b/drivers/watchdog/meson_gxbb_wdt.c @@ -137,8 +137,8 @@ static unsigned int meson_gxbb_wdt_get_timeleft(struct watchdog_device *wdt_dev) reg = readl(data->reg_base + GXBB_WDT_TCNT_REG); - return ((reg >> GXBB_WDT_TCNT_CNT_SHIFT) - - (reg & GXBB_WDT_TCNT_SETUP_MASK)) / 1000; + return ((reg & GXBB_WDT_TCNT_SETUP_MASK) - + (reg >> GXBB_WDT_TCNT_CNT_SHIFT)) / 1000; } static const struct watchdog_ops meson_gxbb_wdt_ops = { From 2acd1714d09607216529c4566571506c13d7dcba Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 5 Nov 2019 21:17:06 -0800 Subject: [PATCH 1189/3715] scripts/gdb: fix debugging modules compiled with hot/cold partitioning [ Upstream commit 8731acc5068eb3f422a45c760d32198175c756f8 ] gcc's -freorder-blocks-and-partition option makes it group frequently and infrequently used code in .text.hot and .text.unlikely sections respectively. At least when building modules on s390, this option is used by default. gdb assumes that all code is located in .text section, and that .text section is located at module load address. With such modules this is no longer the case: there is code in .text.hot and .text.unlikely, and either of them might precede .text. Fix by explicitly telling gdb the addresses of code sections. It might be tempting to do this for all sections, not only the ones in the white list. Unfortunately, gdb appears to have an issue, when telling it about e.g. loadable .note.gnu.build-id section causes it to think that non-loadable .note.Linux section is loaded at address 0, which in turn causes NULL pointers to be resolved to bogus symbols. So keep using the white list approach for the time being. Link: http://lkml.kernel.org/r/20191028152734.13065-1-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Jan Kiszka Cc: Kieran Bingham Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/gdb/linux/symbols.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 004b0ac7fa72..4644f1a83b57 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -99,7 +99,8 @@ lx-symbols command.""" attrs[n]['name'].string(): attrs[n]['address'] for n in range(int(sect_attrs['nsections']))} args = [] - for section_name in [".data", ".data..read_mostly", ".rodata", ".bss"]: + for section_name in [".data", ".data..read_mostly", ".rodata", ".bss", + ".text", ".text.hot", ".text.unlikely"]: address = section_name_to_address.get(section_name) if address: args.append(" -s {name} {addr}".format( From 106f14d2ee321f0d12ae48e44f4ffb4eaba6dd72 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:26 -0800 Subject: [PATCH 1190/3715] net: bcmgenet: reapply manual settings to the PHY [ Upstream commit 0686bd9d5e6863f60e4bb1e78e6fe7bb217a0890 ] The phy_init_hw() function may reset the PHY to a configuration that does not match manual network settings stored in the phydev structure. If the phy state machine is polled rather than event driven this can create a timing hazard where the phy state machine might alter the settings stored in the phydev structure from the value read from the BMCR. This commit follows invocations of phy_init_hw() by the bcmgenet driver with invocations of the genphy_config_aneg() function to ensure that the BMCR is written to match the settings held in the phydev structure. This prevents the risk of manual settings being accidentally altered. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b6af286fa5c7..3e3044fe3206 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2611,8 +2611,10 @@ static void bcmgenet_irq_task(struct work_struct *work) } if (status & UMAC_IRQ_PHY_DET_R && - priv->dev->phydev->autoneg != AUTONEG_ENABLE) + priv->dev->phydev->autoneg != AUTONEG_ENABLE) { phy_init_hw(priv->dev->phydev); + genphy_config_aneg(priv->dev->phydev); + } /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) @@ -3688,6 +3690,7 @@ static int bcmgenet_resume(struct device *d) phy_init_hw(priv->phydev); /* Speed settings must be restored */ + genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); /* disable ethernet MAC while updating its registers */ From 06d3f91ed5172ef57c3abf3c2fb40fee5fb6560f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Nov 2019 09:39:32 -0500 Subject: [PATCH 1191/3715] ceph: return -EINVAL if given fsc mount option on kernel w/o support [ Upstream commit ff29fde84d1fc82f233c7da0daa3574a3942bec7 ] If someone requests fscache on the mount, and the kernel doesn't support it, it should fail the mount. [ Drop ceph prefix -- it's provided by pr_err. ] Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/super.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f0694293b31a..088c4488b449 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -232,6 +232,7 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; break; case Opt_fscache_uniq: +#ifdef CONFIG_CEPH_FSCACHE kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, @@ -240,7 +241,10 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; - /* misc */ +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_wsize: if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) return -EINVAL; @@ -312,8 +316,13 @@ static int parse_fsopt_token(char *c, void *private) fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; break; case Opt_fscache: +#ifdef CONFIG_CEPH_FSCACHE fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; From a7c4a8ed5846e81e003944258d3bc75e3a06600d Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 31 Oct 2019 06:12:43 -0600 Subject: [PATCH 1192/3715] mac80211: fix station inactive_time shortly after boot [ Upstream commit 285531f9e6774e3be71da6673d475ff1a088d675 ] In the first 5 minutes after boot (time of INITIAL_JIFFIES), ieee80211_sta_last_active() returns zero if last_ack is zero. This leads to "inactive time" showing jiffies_to_msecs(jiffies). # iw wlan0 station get fc:ec:da:64:a6:dd Station fc:ec:da:64:a6:dd (on wlan0) inactive time: 4294894049 ms . . connected time: 70 seconds Fix by returning last_rx if last_ack == 0. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20191031121243.27694-1-anzaki@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f1b496222bda..1a86974b02e3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2313,7 +2313,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } From b68abc88cafbfb4451fa333469464b185371ce1f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2019 10:48:47 +0300 Subject: [PATCH 1193/3715] block: drbd: remove a stray unlock in __drbd_send_protocol() [ Upstream commit 8e9c523016cf9983b295e4bc659183d1fa6ef8e0 ] There are two callers of this function and they both unlock the mutex so this ends up being a double unlock. Fixes: 44ed167da748 ("drbd: rcu_read_lock() and rcu_dereference() for tconn->net_conf") Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8cb3791898ae..7ea13b5497fd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -795,7 +795,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm if (nc->tentative && connection->agreed_pro_version < 92) { rcu_read_unlock(); - mutex_unlock(&sock->mutex); drbd_err(connection, "--dry-run is not supported by peer"); return -EOPNOTSUPP; } From bc34231181d38f730e71d3df5d2b9508390be204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 17 Oct 2019 21:22:18 +0200 Subject: [PATCH 1194/3715] pwm: bcm-iproc: Prevent unloading the driver module while in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24906a41eecb73d51974ade0847c21e429beec60 ] The owner member of struct pwm_ops must be set to THIS_MODULE to increase the reference count of the module such that the module cannot be removed while its code is in use. Fixes: daa5abc41c80 ("pwm: Add support for Broadcom iProc PWM controller") Signed-off-by: Uwe Kleine-König Reviewed-by: Florian Fainelli Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-bcm-iproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index d961a8207b1c..31b01035d0ab 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -187,6 +187,7 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops iproc_pwm_ops = { .apply = iproc_pwmc_apply, .get_state = iproc_pwmc_get_state, + .owner = THIS_MODULE, }; static int iproc_pwmc_probe(struct platform_device *pdev) From 2e0c82241c09a79de8d2e9d63b3219b6719a7012 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 29 Nov 2018 16:09:32 -0800 Subject: [PATCH 1195/3715] scsi: lpfc: Fix kernel Oops due to null pring pointers [ Upstream commit 5a9eeff57f340238c39c95d8e7e54c96fc722de7 ] Driver is hitting null pring pointers in lpfc_do_work(). Pointer assignment occurs based on SLI-revision. If recovering after an error, its possible the sli revision for the port was cleared, making the lpfc_phba_elsring() not return a ring pointer, thus the null pointer. Add SLI revision checking to lpfc_phba_elsring() and status checking to all callers. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc.h | 6 ++++++ drivers/scsi/lpfc/lpfc_els.c | 2 ++ drivers/scsi/lpfc/lpfc_init.c | 7 ++++++- drivers/scsi/lpfc/lpfc_sli.c | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index bc61cc8bc6f0..03e95a3216c8 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1239,6 +1239,12 @@ lpfc_sli_read_hs(struct lpfc_hba *phba) static inline struct lpfc_sli_ring * lpfc_phba_elsring(struct lpfc_hba *phba) { + /* Return NULL if sli_rev has become invalid due to bad fw */ + if (phba->sli_rev != LPFC_SLI_REV4 && + phba->sli_rev != LPFC_SLI_REV3 && + phba->sli_rev != LPFC_SLI_REV2) + return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { if (phba->sli4_hba.els_wq) return phba->sli4_hba.els_wq->pring; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index e5db20e8979d..a31f87eb1e62 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1337,6 +1337,8 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) Fabric_DID); pring = lpfc_phba_elsring(phba); + if (unlikely(!pring)) + return -EIO; /* * Check the txcmplq for an iocb that matches the nport the driver is diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 15bcd00dd7a2..c69c2a2b2ead 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1773,7 +1773,12 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, lpfc_offline(phba); /* release interrupt for possible resource change */ lpfc_sli4_disable_intr(phba); - lpfc_sli_brdrestart(phba); + rc = lpfc_sli_brdrestart(phba); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6309 Failed to restart board\n"); + return rc; + } /* request and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index ebf7d3cda367..62bea4ffdc25 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4421,6 +4421,8 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba) hba_aer_enabled = phba->hba_flag & HBA_AER_ENABLED; rc = lpfc_sli4_brdreset(phba); + if (rc) + return rc; spin_lock_irq(&phba->hbalock); phba->pport->stopped = 0; From 78f7d2dbdf06f4af54d63c2c5334ad08710617d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 29 Nov 2018 16:09:40 -0800 Subject: [PATCH 1196/3715] scsi: lpfc: Fix dif and first burst use in write commands [ Upstream commit 7c4042a4d0b7532cfbc90478fd3084b2dab5849e ] When dif and first burst is used in a write command wqe, the driver was not properly setting fields in the io command request. This resulted in no dif bytes being sent and invalid xfer_rdy's, resulting in the io being aborted by the hardware. Correct the wqe initializaton when both dif and first burst are used. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 07cb671bb855..2eba0c39ac1c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -2714,6 +2714,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the bpl beyond fcp_cmnd @@ -2819,6 +2820,14 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, */ iocb_cmd->un.fcpi.fcpi_parm = fcpdl; + /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + return 0; err: if (lpfc_cmd->seg_cnt) @@ -3371,6 +3380,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the sgl beyond fcp_cmnd @@ -3486,6 +3496,14 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, */ iocb_cmd->un.fcpi.fcpi_parm = fcpdl; + /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + /* * If the OAS driver feature is enabled and the lun is enabled for * OAS, set the oas iocb related flags. From 4b9ea58bc7318c54170ffc21a9b2bcc845db481f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 19 Oct 2018 09:00:51 +0200 Subject: [PATCH 1197/3715] ARM: dts: Fix up SQ201 flash access [ Upstream commit d88b11ef91b15d0af9c0676cbf4f441a0dff0c56 ] This sets the partition information on the SQ201 to be read out from the RedBoot partition table, removes the static partition table and sets our boot options to mount root from /dev/mtdblock2 where the squashfs+JFFS2 resides. Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- arch/arm/boot/dts/gemini-sq201.dts | 37 ++++-------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/arch/arm/boot/dts/gemini-sq201.dts b/arch/arm/boot/dts/gemini-sq201.dts index 63c02ca9513c..e9e2f6ff0c58 100644 --- a/arch/arm/boot/dts/gemini-sq201.dts +++ b/arch/arm/boot/dts/gemini-sq201.dts @@ -20,7 +20,7 @@ }; chosen { - bootargs = "console=ttyS0,115200n8"; + bootargs = "console=ttyS0,115200n8 root=/dev/mtdblock2 rw rootfstype=squashfs,jffs2 rootwait"; stdout-path = &uart0; }; @@ -71,37 +71,10 @@ /* 16MB of flash */ reg = <0x30000000 0x01000000>; - partition@0 { - label = "RedBoot"; - reg = <0x00000000 0x00120000>; - read-only; - }; - partition@120000 { - label = "Kernel"; - reg = <0x00120000 0x00200000>; - }; - partition@320000 { - label = "Ramdisk"; - reg = <0x00320000 0x00600000>; - }; - partition@920000 { - label = "Application"; - reg = <0x00920000 0x00600000>; - }; - partition@f20000 { - label = "VCTL"; - reg = <0x00f20000 0x00020000>; - read-only; - }; - partition@f40000 { - label = "CurConf"; - reg = <0x00f40000 0x000a0000>; - read-only; - }; - partition@fe0000 { - label = "FIS directory"; - reg = <0x00fe0000 0x00020000>; - read-only; + partitions { + compatible = "redboot-fis"; + /* Eraseblock at 0xfe0000 */ + fis-index-block = <0x1fc>; }; }; From 6fd7de500ef668af49dfa1e507eadb5d02a69ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 28 Nov 2018 10:04:16 +0100 Subject: [PATCH 1198/3715] ARM: debug-imx: only define DEBUG_IMX_UART_PORT if needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7c41ea57beb2aee96fa63091a457b1a2826f3c42 ] If debugging on i.MX is enabled DEBUG_IMX_UART_PORT defines which UART is used for the debug output. If however debugging is off don't only hide the then unused config item but drop it completely by using a dependency instead of a conditional prompt. This fixes DEBUG_IMX_UART_PORT being present in the kernel config even if DEBUG_LL is disabled. Signed-off-by: Uwe Kleine-König Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/Kconfig.debug | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 954ba8b81052..fd4b679945d3 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1376,21 +1376,21 @@ config DEBUG_OMAP2PLUS_UART depends on ARCH_OMAP2PLUS config DEBUG_IMX_UART_PORT - int "i.MX Debug UART Port Selection" if DEBUG_IMX1_UART || \ - DEBUG_IMX25_UART || \ - DEBUG_IMX21_IMX27_UART || \ - DEBUG_IMX31_UART || \ - DEBUG_IMX35_UART || \ - DEBUG_IMX50_UART || \ - DEBUG_IMX51_UART || \ - DEBUG_IMX53_UART || \ - DEBUG_IMX6Q_UART || \ - DEBUG_IMX6SL_UART || \ - DEBUG_IMX6SX_UART || \ - DEBUG_IMX6UL_UART || \ - DEBUG_IMX7D_UART + int "i.MX Debug UART Port Selection" + depends on DEBUG_IMX1_UART || \ + DEBUG_IMX25_UART || \ + DEBUG_IMX21_IMX27_UART || \ + DEBUG_IMX31_UART || \ + DEBUG_IMX35_UART || \ + DEBUG_IMX50_UART || \ + DEBUG_IMX51_UART || \ + DEBUG_IMX53_UART || \ + DEBUG_IMX6Q_UART || \ + DEBUG_IMX6SL_UART || \ + DEBUG_IMX6SX_UART || \ + DEBUG_IMX6UL_UART || \ + DEBUG_IMX7D_UART default 1 - depends on ARCH_MXC help Choose UART port on which kernel low-level debug messages should be output. From 221c05bd1f8c996751f48f5de5a3f1ae0890a9cd Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 5 Dec 2018 16:10:01 -0200 Subject: [PATCH 1199/3715] ARM: dts: imx53-voipac-dmm-668: Fix memory node duplication [ Upstream commit 998a84c27a7f3f9133d32af64e19c05cec161a1a ] imx53-voipac-dmm-668 has two memory nodes, but the correct representation would be to use a single one with two reg entries - one for each RAM chip select, so fix it accordingly. Reported-by: Marco Franchi Signed-off-by: Fabio Estevam Signed-off-by: Marco Franchi Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi b/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi index df8dafe2564d..2297ed90ee89 100644 --- a/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi +++ b/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi @@ -17,12 +17,8 @@ memory@70000000 { device_type = "memory"; - reg = <0x70000000 0x20000000>; - }; - - memory@b0000000 { - device_type = "memory"; - reg = <0xb0000000 0x20000000>; + reg = <0x70000000 0x20000000>, + <0xb0000000 0x20000000>; }; regulators { From e7a995e699837c36fcee377f3a210fb5b61a0783 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 15 Nov 2018 21:09:59 +0100 Subject: [PATCH 1200/3715] parisc: Fix serio address output [ Upstream commit 785145171d17af2554128becd6a7c8f89e101141 ] We want the hpa addresses printed in the serio modules, not some virtual ioremap()ed address, e.g.: serio: gsc-ps2-keyboard port at 0xf0108000 irq 22 @ 2:0:11 serio: gsc-ps2-mouse port at 0xf0108100 irq 22 @ 2:0:12 Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/input/serio/gscps2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c index aa9f29b875de..d84e3b70215a 100644 --- a/drivers/input/serio/gscps2.c +++ b/drivers/input/serio/gscps2.c @@ -382,9 +382,9 @@ static int __init gscps2_probe(struct parisc_device *dev) goto fail; #endif - printk(KERN_INFO "serio: %s port at 0x%p irq %d @ %s\n", + pr_info("serio: %s port at 0x%08lx irq %d @ %s\n", ps2port->port->name, - ps2port->addr, + hpa, ps2port->padev->irq, ps2port->port->phys); From 048b7d2c8a884220c7e3e107b9779bf6c01067cb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 15 Nov 2018 21:06:34 +0100 Subject: [PATCH 1201/3715] parisc: Fix HP SDC hpa address output [ Upstream commit c4bff35ca1bfba886da6223c9fed76a2b1382b8e ] Show the hpa address of the HP SDC instead of a hashed value, e.g.: HP SDC: HP SDC at 0xf0201000, IRQ 23 (NMI IRQ 24) Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/input/serio/hp_sdc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 8eef6849d066..5585823ced19 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -887,8 +887,8 @@ static int __init hp_sdc_init(void) "HP SDC NMI", &hp_sdc)) goto err2; - printk(KERN_INFO PREFIX "HP SDC at 0x%p, IRQ %d (NMI IRQ %d)\n", - (void *)hp_sdc.base_io, hp_sdc.irq, hp_sdc.nmi); + pr_info(PREFIX "HP SDC at 0x%08lx, IRQ %d (NMI IRQ %d)\n", + hp_sdc.base_io, hp_sdc.irq, hp_sdc.nmi); hp_sdc_status_in8(); hp_sdc_data_in8(); From 63f1e5fb8f1ed58562fff5258bf8bdb686fc0544 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 6 Dec 2018 22:50:40 +0000 Subject: [PATCH 1202/3715] arm64: mm: Prevent mismatched 52-bit VA support [ Upstream commit a96a33b1ca57dbea4285893dedf290aeb8eb090b ] For cases where there is a mismatch in ARMv8.2-LVA support between CPUs we have to be careful in allowing secondary CPUs to boot if 52-bit virtual addresses have already been enabled on the boot CPU. This patch adds code to the secondary startup path. If the boot CPU has enabled 52-bit VAs then ID_AA64MMFR2_EL1 is checked to see if the secondary can also enable 52-bit support. If not, the secondary is prevented from booting and an error message is displayed indicating why. Technically this patch could be implemented using the cpufeature code when considering 52-bit userspace support. However, we employ low level checks here as the cpufeature code won't be able to run if we have mismatched 52-bit kernel va support. Signed-off-by: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/head.S | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/smp.c | 5 +++++ 2 files changed, 31 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 92cc7b51f100..9c00fd2acc2a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -594,6 +594,7 @@ secondary_startup: /* * Common entry point for secondary CPUs. */ + bl __cpu_secondary_check52bitva bl __cpu_setup // initialise processor bl __enable_mmu ldr x8, =__secondary_switched @@ -668,6 +669,31 @@ ENTRY(__enable_mmu) ret ENDPROC(__enable_mmu) +ENTRY(__cpu_secondary_check52bitva) +#ifdef CONFIG_ARM64_52BIT_VA + ldr_l x0, vabits_user + cmp x0, #52 + b.ne 2f + + mrs_s x0, SYS_ID_AA64MMFR2_EL1 + and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + cbnz x0, 2f + + adr_l x0, va52mismatch + mov w1, #1 + strb w1, [x0] + dmb sy + dc ivac, x0 // Invalidate potentially stale cache line + + update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x0, x1 +1: wfe + wfi + b 1b + +#endif +2: ret +ENDPROC(__cpu_secondary_check52bitva) + __no_granule_support: /* Indicate that this CPU can't boot and is stuck in the kernel */ update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2 diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a683cd499515..0881dfab10f8 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -106,6 +106,7 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle) } static DECLARE_COMPLETION(cpu_running); +bool va52mismatch __ro_after_init; int __cpu_up(unsigned int cpu, struct task_struct *idle) { @@ -135,6 +136,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!cpu_online(cpu)) { pr_crit("CPU%u: failed to come online\n", cpu); + + if (IS_ENABLED(CONFIG_ARM64_52BIT_VA) && va52mismatch) + pr_crit("CPU%u: does not support 52-bit VAs\n", cpu); + ret = -EIO; } } else { From 7324581318f696cbb615d796657b11eae041c353 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 10 Dec 2018 18:07:33 +0000 Subject: [PATCH 1203/3715] arm64: smp: Handle errors reported by the firmware [ Upstream commit f357b3a7e17af7736d67d8267edc1ed3d1dd9391 ] The __cpu_up() routine ignores the errors reported by the firmware for a CPU bringup operation and looks for the error status set by the booting CPU. If the CPU never entered the kernel, we could end up in assuming stale error status, which otherwise would have been set/cleared appropriately by the booting CPU. Reported-by: Steve Capper Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 0881dfab10f8..909bf3926fd2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -144,6 +144,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) } } else { pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + return ret; } secondary_data.task = NULL; From e4bf1456a9fcc5614ba8292c35cc72a4b3aa4d7c Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 4 Dec 2018 19:57:42 +0200 Subject: [PATCH 1204/3715] ARM: OMAP1: fix USB configuration for device-only setups [ Upstream commit c7b7b5cbd0c859b1546a5a3455d457708bdadf4c ] Currently we do USB configuration only if the host mode (CONFIG_USB) is enabled. But it should be done also in the case of device-only setups, so change the condition to CONFIG_USB_SUPPORT. This allows to use omap_udc on Palm Tungsten E. Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap1/Makefile | 2 +- arch/arm/mach-omap1/include/mach/usb.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index e8ccf51c6f29..ec0235899de2 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -25,7 +25,7 @@ obj-y += $(i2c-omap-m) $(i2c-omap-y) led-y := leds.o -usb-fs-$(CONFIG_USB) := usb.o +usb-fs-$(CONFIG_USB_SUPPORT) := usb.o obj-y += $(usb-fs-m) $(usb-fs-y) # Specific board support diff --git a/arch/arm/mach-omap1/include/mach/usb.h b/arch/arm/mach-omap1/include/mach/usb.h index 77867778d4ec..5429d86c7190 100644 --- a/arch/arm/mach-omap1/include/mach/usb.h +++ b/arch/arm/mach-omap1/include/mach/usb.h @@ -11,7 +11,7 @@ #include -#if IS_ENABLED(CONFIG_USB) +#if IS_ENABLED(CONFIG_USB_SUPPORT) void omap1_usb_init(struct omap_usb_config *pdata); #else static inline void omap1_usb_init(struct omap_usb_config *pdata) From bb97bd1891697d7cce8d03e71de238d043e9c086 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 10 Dec 2018 17:17:25 +0200 Subject: [PATCH 1205/3715] RDMA/vmw_pvrdma: Use atomic memory allocation in create AH [ Upstream commit a276a4d93bf1580d737f38d1810e5f4b166f3edd ] Create address handle callback should not sleep, use GFP_ATOMIC instead of GFP_KERNEL for memory allocation. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Cc: Adit Ranadive Signed-off-by: Gal Pressman Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index aa533f08e017..5c7aa6ff1538 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -550,7 +550,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); From 456536aa540f8e2696533222ef10a4c456c1c9ee Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sun, 2 Dec 2018 21:52:11 +0100 Subject: [PATCH 1206/3715] PM / AVS: SmartReflex: NULL check before some freeing functions is not needed [ Upstream commit 14d338a857f05f894ba3badd9e6d3039c68b8180 ] NULL check before some freeing functions is not needed. Signed-off-by: Thomas Meyer Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/power/avs/smartreflex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c index 974fd684bab2..4b6fddc18394 100644 --- a/drivers/power/avs/smartreflex.c +++ b/drivers/power/avs/smartreflex.c @@ -994,8 +994,7 @@ static int omap_sr_remove(struct platform_device *pdev) if (sr_info->autocomp_active) sr_stop_vddautocomp(sr_info); - if (sr_info->dbg_dir) - debugfs_remove_recursive(sr_info->dbg_dir); + debugfs_remove_recursive(sr_info->dbg_dir); pm_runtime_disable(&pdev->dev); list_del(&sr_info->node); From c03e74b67b61b41fb62e518957f95fde18d3e6fa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 22:58:38 +0100 Subject: [PATCH 1207/3715] ARM: ks8695: fix section mismatch warning [ Upstream commit 4aa64677330beeeed721b4b122884dabad845d66 ] WARNING: vmlinux.o(.text+0x13250): Section mismatch in reference from the function acs5k_i2c_init() to the (unknown reference) .init.data:(unknown) The function acs5k_i2c_init() references the (unknown reference) __initdata (unknown). This is often because acs5k_i2c_init lacks a __initdata annotation or the annotation of (unknown) is wrong. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/mach-ks8695/board-acs5k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ks8695/board-acs5k.c b/arch/arm/mach-ks8695/board-acs5k.c index e4d709c8ed32..76d3083f1f63 100644 --- a/arch/arm/mach-ks8695/board-acs5k.c +++ b/arch/arm/mach-ks8695/board-acs5k.c @@ -92,7 +92,7 @@ static struct i2c_board_info acs5k_i2c_devs[] __initdata = { }, }; -static void acs5k_i2c_init(void) +static void __init acs5k_i2c_init(void) { /* The gpio interface */ platform_device_register(&acs5k_i2c_device); From 77cf4c5b45b0a9e6854e8e352dce55fa8769354c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 Dec 2018 13:59:24 +0100 Subject: [PATCH 1208/3715] ACPI / LPSS: Ignore acpi_device_fix_up_power() return value [ Upstream commit 1a2fa02f7489dc4d746f2a15fb77b3ce1affade8 ] Ignore acpi_device_fix_up_power() return value. If we return an error we end up with acpi_default_enumeration() still creating a platform- device for the device and we end up with the device still being used but without the special LPSS related handling which is not useful. Specicifically ignoring the error fixes the touchscreen no longer working after a suspend/resume on a Prowise PT301 tablet. This tablet has a broken _PS0 method on the touchscreen's I2C controller, causing acpi_device_fix_up_power() to fail, causing fallback to standard platform-dev handling and specifically causing acpi_lpss_save/restore_ctx to not run. The I2C controllers _PS0 method does actually turn on the device, but then does some more nonsense which fails when run during early boot trying to use I2C opregion handling on another not-yet registered I2C controller. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_lpss.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 1ab8d7223b25..84b1d30f699c 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -518,12 +518,7 @@ static int acpi_lpss_create_device(struct acpi_device *adev, * have _PS0 and _PS3 without _PSC (and no power resources), so * acpi_bus_init_power() will assume that the BIOS has put them into D0. */ - ret = acpi_device_fix_up_power(adev); - if (ret) { - /* Skip the device, but continue the namespace scan. */ - ret = 0; - goto err_out; - } + acpi_device_fix_up_power(adev); adev->driver_data = pdata; pdev = acpi_create_platform_device(adev, dev_desc->properties); From 9a959526b56d926167b18abd951b04ab9dc8d546 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Dec 2018 19:37:01 -0800 Subject: [PATCH 1209/3715] scsi: lpfc: Enable Management features for IF_TYPE=6 [ Upstream commit 719162bd5bb968203397b9b1d0dd30a9797bbd09 ] Addition of support for if_type=6 missed several checks for interface type, resulting in the failure of several key management features such as firmware dump and loopback testing. Correct the checks on the if_type so that both SLI4 IF_TYPE's 2 and 6 are supported. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_attr.c | 4 ++-- drivers/scsi/lpfc/lpfc_bsg.c | 6 +++--- drivers/scsi/lpfc/lpfc_els.c | 2 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 3da242201cb4..82ce5d193018 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1178,7 +1178,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) return -EACCES; if ((phba->sli_rev < LPFC_SLI_REV4) || - (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2)) return -EPERM; @@ -4056,7 +4056,7 @@ lpfc_link_speed_store(struct device *dev, struct device_attribute *attr, uint32_t prev_val, if_type; if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf); - if (if_type == LPFC_SLI_INTF_IF_TYPE_2 && + if (if_type >= LPFC_SLI_INTF_IF_TYPE_2 && phba->hba_flag & HBA_FORCED_LINK_SPEED) return -EPERM; diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index d89816222b23..6dde21dc82a3 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2221,7 +2221,7 @@ lpfc_bsg_diag_loopback_mode(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) rc = lpfc_sli3_bsg_diag_loopback_mode(phba, job); - else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == + else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) rc = lpfc_sli4_bsg_diag_loopback_mode(phba, job); else @@ -2261,7 +2261,7 @@ lpfc_sli4_bsg_diag_mode_end(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) return -ENODEV; - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) return -ENODEV; @@ -2353,7 +2353,7 @@ lpfc_sli4_bsg_link_diag_test(struct bsg_job *job) rc = -ENODEV; goto job_error; } - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rc = -ENODEV; goto job_error; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index a31f87eb1e62..c851fd14ff3e 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5550,7 +5550,7 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, struct ls_rjt stat; if (phba->sli_rev < LPFC_SLI_REV4 || - bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rjt_err = LSRJT_UNABLE_TPC; rjt_expl = LSEXP_REQ_UNSUPPORTED; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index d850077c5e22..3f88f3d79622 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4769,7 +4769,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (phba->sli_rev == LPFC_SLI_REV4 && (!(vport->load_flag & FC_UNLOADING)) && (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) == + &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) && (kref_read(&ndlp->kref) > 0)) { mbox->context1 = lpfc_nlp_get(ndlp); From d98de9d9b8427db4be1b91949cc633aab5f7e76a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 6 Dec 2018 15:55:41 -0800 Subject: [PATCH 1210/3715] crypto: user - support incremental algorithm dumps [ Upstream commit 0ac6b8fb23c724b015d9ca70a89126e8d1563166 ] CRYPTO_MSG_GETALG in NLM_F_DUMP mode sometimes doesn't return all registered crypto algorithms, because it doesn't support incremental dumps. crypto_dump_report() only permits itself to be called once, yet the netlink subsystem allocates at most ~64 KiB for the skb being dumped to. Thus only the first recvmsg() returns data, and it may only include a subset of the crypto algorithms even if the user buffer passed to recvmsg() is large enough to hold all of them. Fix this by using one of the arguments in the netlink_callback structure to keep track of the current position in the algorithm list. Then userspace can do multiple recvmsg() on the socket after sending the dump request. This is the way netlink dumps work elsewhere in the kernel; it's unclear why this was different (probably just an oversight). Also fix an integer overflow when calculating the dump buffer size hint. Fixes: a38f7907b926 ("crypto: Add userspace configuration API") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/crypto_user.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 2b8fb8f1391e..5e457a7dd1c9 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -296,30 +296,33 @@ drop_alg: static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) { - struct crypto_alg *alg; + const size_t start_pos = cb->args[0]; + size_t pos = 0; struct crypto_dump_info info; - int err; - - if (cb->args[0]) - goto out; - - cb->args[0] = 1; + struct crypto_alg *alg; + int res; info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; + down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) { - err = crypto_report_alg(alg, &info); - if (err) - goto out_err; + if (pos >= start_pos) { + res = crypto_report_alg(alg, &info); + if (res == -EMSGSIZE) + break; + if (res) + goto out; + } + pos++; } - + cb->args[0] = pos; + res = skb->len; out: - return skb->len; -out_err: - return err; + up_read(&crypto_alg_sem); + return res; } static int crypto_dump_report_done(struct netlink_callback *cb) @@ -503,7 +506,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && (nlh->nlmsg_flags & NLM_F_DUMP))) { struct crypto_alg *alg; - u16 dump_alloc = 0; + unsigned long dump_alloc = 0; if (link->dump == NULL) return -EINVAL; @@ -511,16 +514,16 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; + up_read(&crypto_alg_sem); { struct netlink_dump_control c = { .dump = link->dump, .done = link->done, - .min_dump_alloc = dump_alloc, + .min_dump_alloc = min(dump_alloc, 65535UL), }; err = netlink_dump_start(crypto_nlsk, skb, nlh, &c); } - up_read(&crypto_alg_sem); return err; } From 070fc4ce25c831193918880ab307549ff9285167 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 29 Nov 2018 18:25:19 +0800 Subject: [PATCH 1211/3715] mwifiex: fix potential NULL dereference and use after free [ Upstream commit 1dcd9429212b98bea87fc6ec92fb50bf5953eb47 ] There are two defects: (1) passing a NULL bss to mwifiex_save_hidden_ssid_channels will result in NULL dereference, (2) using bss after dropping the reference to it via cfg80211_put_bss. To fix them, the patch moves the buggy code to the branch that bss is not NULL and puts it before cfg80211_put_bss. Signed-off-by: Pan Bian Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/scan.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 67c334221077..c013c94fbf15 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1901,15 +1901,17 @@ mwifiex_parse_single_response_buf(struct mwifiex_private *priv, u8 **bss_info, ETH_ALEN)) mwifiex_update_curr_bss_params(priv, bss); - cfg80211_put_bss(priv->wdev.wiphy, bss); - } - if ((chan->flags & IEEE80211_CHAN_RADAR) || - (chan->flags & IEEE80211_CHAN_NO_IR)) { - mwifiex_dbg(adapter, INFO, - "radar or passive channel %d\n", - channel); - mwifiex_save_hidden_ssid_channels(priv, bss); + if ((chan->flags & IEEE80211_CHAN_RADAR) || + (chan->flags & IEEE80211_CHAN_NO_IR)) { + mwifiex_dbg(adapter, INFO, + "radar or passive channel %d\n", + channel); + mwifiex_save_hidden_ssid_channels(priv, + bss); + } + + cfg80211_put_bss(priv->wdev.wiphy, bss); } } } else { From 8cdb45a907c49369b1deb4748fb27c5f25fca4c8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 3 Dec 2018 14:26:55 -0800 Subject: [PATCH 1212/3715] mwifiex: debugfs: correct histogram spacing, formatting [ Upstream commit 4cb777c64e030778c569f605398d7604d8aabc0f ] Currently, snippets of this file look like: rx rates (in Mbps): 0=1M 1=2M2=5.5M 3=11M 4=6M 5=9M 6=12M 7=18M 8=24M 9=36M 10=48M 11=54M12-27=MCS0-15(BW20) 28-43=MCS0-15(BW40) 44-53=MCS0-9(VHT:BW20)54-63=MCS0-9(VHT:BW40)64-73=MCS0-9(VHT:BW80) ... noise_flr[--96dBm] = 22 noise_flr[--95dBm] = 149 noise_flr[--94dBm] = 9 noise_flr[--93dBm] = 2 We're missing some spaces, and we're adding a minus sign ('-') on values that are already negative signed integers. Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 6f4239be609d..49ca84ef1a99 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -296,15 +296,13 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf, "total samples = %d\n", atomic_read(&phist_data->num_samples)); - p += sprintf(p, "rx rates (in Mbps): 0=1M 1=2M"); - p += sprintf(p, "2=5.5M 3=11M 4=6M 5=9M 6=12M\n"); - p += sprintf(p, "7=18M 8=24M 9=36M 10=48M 11=54M"); - p += sprintf(p, "12-27=MCS0-15(BW20) 28-43=MCS0-15(BW40)\n"); + p += sprintf(p, + "rx rates (in Mbps): 0=1M 1=2M 2=5.5M 3=11M 4=6M 5=9M 6=12M\n" + "7=18M 8=24M 9=36M 10=48M 11=54M 12-27=MCS0-15(BW20) 28-43=MCS0-15(BW40)\n"); if (ISSUPP_11ACENABLED(priv->adapter->fw_cap_info)) { - p += sprintf(p, "44-53=MCS0-9(VHT:BW20)"); - p += sprintf(p, "54-63=MCS0-9(VHT:BW40)"); - p += sprintf(p, "64-73=MCS0-9(VHT:BW80)\n\n"); + p += sprintf(p, + "44-53=MCS0-9(VHT:BW20) 54-63=MCS0-9(VHT:BW40) 64-73=MCS0-9(VHT:BW80)\n\n"); } else { p += sprintf(p, "\n"); } @@ -333,7 +331,7 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf, for (i = 0; i < MWIFIEX_MAX_NOISE_FLR; i++) { value = atomic_read(&phist_data->noise_flr[i]); if (value) - p += sprintf(p, "noise_flr[-%02ddBm] = %d\n", + p += sprintf(p, "noise_flr[%02ddBm] = %d\n", (int)(i-128), value); } for (i = 0; i < MWIFIEX_MAX_SIG_STRENGTH; i++) { From 5d6babdd9c5eaa61aaf6bcd0763af3fe8f0aa4b9 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 29 Nov 2018 18:48:10 +0800 Subject: [PATCH 1213/3715] rtl818x: fix potential use after free [ Upstream commit afbb1947db94eacc5a13302eee88a9772fb78935 ] entry is released via usb_put_urb just after calling usb_submit_urb. However, entry is used if the submission fails, resulting in a use after free bug. The patch fixes this. Signed-off-by: Pan Bian ACKed-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index 9a1d15b3ce45..518caaaf8a98 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -444,12 +444,13 @@ static int rtl8187_init_urbs(struct ieee80211_hw *dev) skb_queue_tail(&priv->rx_queue, skb); usb_anchor_urb(entry, &priv->anchored); ret = usb_submit_urb(entry, GFP_KERNEL); - usb_put_urb(entry); if (ret) { skb_unlink(skb, &priv->rx_queue); usb_unanchor_urb(entry); + usb_put_urb(entry); goto err; } + usb_put_urb(entry); } return ret; From 23df2afe9da6642bfecfb8f2b817aef8bcffc8ef Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 Dec 2018 15:18:52 -0800 Subject: [PATCH 1214/3715] xfs: require both realtime inodes to mount [ Upstream commit 64bafd2f1e484e27071e7584642005d56516cb77 ] Since mkfs always formats the filesystem with the realtime bitmap and summary inodes immediately after the root directory, we should expect that both of them are present and loadable, even if there isn't a realtime volume attached. There's no reason to skip this if rbmino == NULLFSINO; in fact, this causes an immediate crash if the there /is/ a realtime volume and someone writes to it. Signed-off-by: Darrick J. Wong Reviewed-by: Bill O'Donnell Signed-off-by: Sasha Levin --- fs/xfs/xfs_rtalloc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 488719d43ca8..cdcb7235e41a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1214,13 +1214,11 @@ xfs_rtmount_inodes( xfs_sb_t *sbp; sbp = &mp->m_sb; - if (sbp->sb_rbmino == NULLFSINO) - return 0; error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); if (error) return error; ASSERT(mp->m_rbmip != NULL); - ASSERT(sbp->sb_rsumino != NULLFSINO); + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); if (error) { IRELE(mp->m_rbmip); From 628eaca54b500244deb3cd28fea181204fffc9c9 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 10:57:33 +0800 Subject: [PATCH 1215/3715] ubi: Put MTD device after it is not used [ Upstream commit b95f83ab762dd6211351b9140f99f43644076ca8 ] The MTD device reference is dropped via put_mtd_device, however its field ->index is read and passed to ubi_msg. To fix this, the patch moves the reference dropping after calling ubi_msg. Signed-off-by: Pan Bian Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/ubi/build.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 6445c693d935..0104d9537329 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1092,10 +1092,10 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); - put_mtd_device(ubi->mtd); vfree(ubi->peb_buf); vfree(ubi->fm_buf); ubi_msg(ubi, "mtd%d is detached", ubi->mtd->index); + put_mtd_device(ubi->mtd); put_device(&ubi->dev); return 0; } From d8f7562fce7d23a7258435ad20c243e0939c294f Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 11:20:03 +0800 Subject: [PATCH 1216/3715] ubi: Do not drop UBI device reference before using [ Upstream commit e542087701f09418702673631a908429feb3eae0 ] The UBI device reference is dropped but then the device is used as a parameter of ubi_err. The bug is introduced in changing ubi_err's behavior. The old ubi_err does not require a UBI device as its first parameter, but the new one does. Fixes: 32608703310 ("UBI: Extend UBI layer debug/messaging capabilities") Signed-off-by: Pan Bian Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/ubi/kapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index d4b2e8744498..c2cf6bd3c162 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -227,9 +227,9 @@ out_unlock: out_free: kfree(desc); out_put_ubi: - ubi_put_device(ubi); ubi_err(ubi, "cannot open device %d, volume %d, error %d", ubi_num, vol_id, err); + ubi_put_device(ubi); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ubi_open_volume); From 54a708ab4f564941e43a0ee33744835523bf336d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Dec 2018 20:33:52 +0900 Subject: [PATCH 1217/3715] microblaze: adjust the help to the real behavior [ Upstream commit bafcc61d998c1ca18f556d92a0e95335ac68c7da ] "make ARCH=microblaze help" mentions simpleImage.
.unstrip, but it is not a real Make target. It does not work because Makefile assumes "system.unstrip" is the name of DT. $ make ARCH=microblaze CROSS_COMPILE=microblaze-linux- simpleImage.system.unstrip [ snip ] make[1]: *** No rule to make target 'arch/microblaze/boot/dts/system.unstrip.dtb', needed by 'arch/microblaze/boot/dts/system.dtb'. Stop. make: *** [Makefile;1060: arch/microblaze/boot/dts] Error 2 make: *** Waiting for unfinished jobs.... simpleImage.
works like a phony target that generates multiple images. Reflect the real behavior. I removed the DT directory path information because it is already explained a few lines below. While I am here, I deleted the redundant *_defconfig explanation. The top-level Makefile caters to list available defconfig files: mmu_defconfig - Build for mmu nommu_defconfig - Build for nommu Signed-off-by: Masahiro Yamada Signed-off-by: Michal Simek Signed-off-by: Sasha Levin --- arch/microblaze/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index d269dd4b8279..188f07bba095 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -91,11 +91,11 @@ define archhelp echo '* linux.bin - Create raw binary' echo ' linux.bin.gz - Create compressed raw binary' echo ' linux.bin.ub - Create U-Boot wrapped raw binary' - echo ' simpleImage.
- ELF image with $(arch)/boot/dts/
.dts linked in' - echo ' - stripped elf with fdt blob' - echo ' simpleImage.
.unstrip - full ELF image with fdt blob' - echo ' *_defconfig - Select default config from arch/microblaze/configs' - echo '' + echo ' simpleImage.
- Create the following images with
.dtb linked in' + echo ' simpleImage.
: raw image' + echo ' simpleImage.
.ub : raw image with U-Boot header' + echo ' simpleImage.
.unstrip: ELF (identical to vmlinux)' + echo ' simpleImage.
.strip : stripped ELF' echo ' Targets with
embed a device tree blob inside the image' echo ' These targets support board with firmware that does not' echo ' support passing a device tree directly. Replace
with the' From e3c40bd22da10e52b72222315ecac0cd7a4ef23e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 7 Dec 2018 20:33:53 +0900 Subject: [PATCH 1218/3715] microblaze: move "... is ready" messages to arch/microblaze/Makefile [ Upstream commit 2e14f94cf4bc2f15ca5362e81ca3a987c79e3062 ] To prepare for more fixes, move this to arch/microblaze/Makefile. Otherwise, the same "... is ready" would be printed multiple times. Signed-off-by: Masahiro Yamada Signed-off-by: Michal Simek Signed-off-by: Sasha Levin --- arch/microblaze/Makefile | 2 ++ arch/microblaze/boot/Makefile | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 188f07bba095..fe5e48184c3c 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -83,9 +83,11 @@ archclean: linux.bin linux.bin.gz linux.bin.ub: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' simpleImage.%: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index 7c2f52d4a0e4..49dbd1063d71 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -9,15 +9,12 @@ OBJCOPYFLAGS := -R .note -R .comment -R .note.gnu.build-id -O binary $(obj)/linux.bin: vmlinux FORCE $(call if_changed,objcopy) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/linux.bin.ub: $(obj)/linux.bin FORCE $(call if_changed,uimage) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE $(call if_changed,gzip) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' quiet_cmd_cp = CP $< $@$2 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) @@ -35,6 +32,5 @@ $(obj)/simpleImage.%: vmlinux FORCE $(call if_changed,objcopy) $(call if_changed,uimage) $(call if_changed,strip,.strip) - @echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')' clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb From 7f0d3384d10f21a705e5916384f24bcfa93637f4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 16 Aug 2018 09:23:08 +0300 Subject: [PATCH 1219/3715] iwlwifi: move iwl_nvm_check_version() into dvm [ Upstream commit 64866e5da1eabd0c52ff45029b245f5465920031 ] This function is only half-used by mvm (i.e. only the nvm_version part matters, since the calibration version is irrelevant), so it's pointless to export it from iwlwifi. If mvm uses this function, it has the additional complexity of setting the calib version to a bogus value on all cfg structs. To avoid this, move the function to dvm and make a simple comparison of the nvm_version in mvm instead. Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/dvm/main.c | 17 +++++++++++++++++ .../wireless/intel/iwlwifi/iwl-eeprom-parse.c | 19 ------------------- .../wireless/intel/iwlwifi/iwl-eeprom-parse.h | 5 ++--- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 +++- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index 2acd94da9efe..051a2fea9572 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1229,6 +1229,23 @@ static int iwl_eeprom_init_hw_params(struct iwl_priv *priv) return 0; } +static int iwl_nvm_check_version(struct iwl_nvm_data *data, + struct iwl_trans *trans) +{ + if (data->nvm_version >= trans->cfg->nvm_ver || + data->calib_version >= trans->cfg->nvm_calib_ver) { + IWL_DEBUG_INFO(trans, "device EEPROM VER=0x%x, CALIB=0x%x\n", + data->nvm_version, data->calib_version); + return 0; + } + + IWL_ERR(trans, + "Unsupported (too old) EEPROM VER=0x%x < 0x%x CALIB=0x%x < 0x%x\n", + data->nvm_version, trans->cfg->nvm_ver, + data->calib_version, trans->cfg->nvm_calib_ver); + return -EINVAL; +} + static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, const struct iwl_fw *fw, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c index 3199d345b427..92727f7e42db 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c @@ -928,22 +928,3 @@ iwl_parse_eeprom_data(struct device *dev, const struct iwl_cfg *cfg, return NULL; } IWL_EXPORT_SYMBOL(iwl_parse_eeprom_data); - -/* helper functions */ -int iwl_nvm_check_version(struct iwl_nvm_data *data, - struct iwl_trans *trans) -{ - if (data->nvm_version >= trans->cfg->nvm_ver || - data->calib_version >= trans->cfg->nvm_calib_ver) { - IWL_DEBUG_INFO(trans, "device EEPROM VER=0x%x, CALIB=0x%x\n", - data->nvm_version, data->calib_version); - return 0; - } - - IWL_ERR(trans, - "Unsupported (too old) EEPROM VER=0x%x < 0x%x CALIB=0x%x < 0x%x\n", - data->nvm_version, trans->cfg->nvm_ver, - data->calib_version, trans->cfg->nvm_calib_ver); - return -EINVAL; -} -IWL_EXPORT_SYMBOL(iwl_nvm_check_version); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h index b33888991b94..5545210151cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h @@ -7,6 +7,7 @@ * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -121,9 +123,6 @@ struct iwl_nvm_data * iwl_parse_eeprom_data(struct device *dev, const struct iwl_cfg *cfg, const u8 *eeprom, size_t eeprom_size); -int iwl_nvm_check_version(struct iwl_nvm_data *data, - struct iwl_trans *trans); - int iwl_init_sband_channels(struct iwl_nvm_data *data, struct ieee80211_supported_band *sband, int n_channels, enum nl80211_band band); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 534c0ea7b232..78228f870f8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -501,7 +501,9 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) if (mvm->nvm_file_name) iwl_mvm_load_nvm_to_nic(mvm); - WARN_ON(iwl_nvm_check_version(mvm->nvm_data, mvm->trans)); + WARN_ONCE(mvm->nvm_data->nvm_version < mvm->trans->cfg->nvm_ver, + "Too old NVM version (0x%0x, required = 0x%0x)", + mvm->nvm_data->nvm_version, mvm->trans->cfg->nvm_ver); /* * abort after reading the nvm in case RF Kill is on, we will complete From 7d3a9de5cbacb29a5498da0cebb5fe01d104f5d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 6 Dec 2018 10:45:49 +0100 Subject: [PATCH 1220/3715] gpiolib: Fix return value of gpio_to_desc() stub if !GPIOLIB [ Upstream commit c5510b8dafce5f3f5a039c9b262ebcae0092c462 ] If CONFIG_GPOILIB is not set, the stub of gpio_to_desc() should return the same type of error as regular version: NULL. All the callers compare the return value of gpio_to_desc() against NULL, so returned ERR_PTR would be treated as non-error case leading to dereferencing of error value. Fixes: 79a9becda894 ("gpiolib: export descriptor-based GPIO interface") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- include/linux/gpio/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index c4a350d83578..79ad4f8b889d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -404,7 +404,7 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc) static inline struct gpio_desc *gpio_to_desc(unsigned gpio) { - return ERR_PTR(-EINVAL); + return NULL; } static inline int desc_to_gpio(const struct gpio_desc *desc) From bf208699455507006166c19e21a09d0e8c39eead Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 5 Dec 2018 15:28:58 -0800 Subject: [PATCH 1221/3715] kvm: vmx: Set IA32_TSC_AUX for legacy mode guests [ Upstream commit 0023ef39dc35c773c436eaa46ca539a26b308b55 ] RDTSCP is supported in legacy mode as well as long mode. The IA32_TSC_AUX MSR should be set to the correct guest value before entering any guest that supports RDTSCP. Fixes: 4e47c7a6d714 ("KVM: VMX: Add instruction rdtscp support for guest") Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Marc Orr Reviewed-by: Liran Alon Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 790b217fef9f..c579cda1721e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2818,9 +2818,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_CSTAR); if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - move_msr_up(vmx, index, save_nmsrs++); /* * MSR_STAR is only needed on long mode guests, and only * if efer.sce is enabled. @@ -2833,6 +2830,9 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_EFER); if (index >= 0 && update_transition_efer(vmx, index)) move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_TSC_AUX); + if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) + move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; From 5c92b652d8126399f45286ce656683b9076f91f9 Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Tue, 11 Dec 2018 11:12:55 -0800 Subject: [PATCH 1222/3715] VSOCK: bind to random port for VMADDR_PORT_ANY [ Upstream commit 8236b08cf50f85bbfaf48910a0b3ee68318b7c4b ] The old code always starts from fixed port for VMADDR_PORT_ANY. Sometimes when VMM crashed, there is still orphaned vsock which is waiting for close timer, then it could cause connection time out for new started VM if they are trying to connect to same port with same guest cid since the new packets could hit that orphaned vsock. We could also fix this by doing more in vhost_vsock_reset_orphans, but any way, it should be better to start from a random local port instead of a fixed one. Signed-off-by: Lepton Wu Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/af_vsock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1939b77e98b7..73eac97e19fb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -487,9 +488,13 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = LAST_RESERVED_PORT + 1; + static u32 port = 0; struct sockaddr_vm new_addr; + if (!port) + port = LAST_RESERVED_PORT + 1 + + prandom_u32_max(U32_MAX - LAST_RESERVED_PORT); + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); if (addr->svm_port == VMADDR_PORT_ANY) { From 41703d1bd5752694bb7cb3a5e33e65aabb3e42aa Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 6 Dec 2018 16:18:25 +0100 Subject: [PATCH 1223/3715] mmc: meson-gx: make sure the descriptor is stopped on errors [ Upstream commit 18f92bc02f1739b5c4d5b70009fbb7eada45bca3 ] On errors, if we don't stop the descriptor chain, it may continue to run and raise IRQ after we have called mmc_request_done(). This is bad because we won't be able to get cmd anymore and properly deal with the IRQ. This patch makes sure the descriptor chain is stopped before calling mmc_request_done() Fixes: 79ed05e329c3 ("mmc: meson-gx: add support for descriptor chain mode") Signed-off-by: Jerome Brunet Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/meson-gx-mmc.c | 73 ++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 08a55c2e96e1..53ce1bb83d2c 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -74,9 +75,11 @@ #define CFG_CLK_ALWAYS_ON BIT(18) #define CFG_CHK_DS BIT(20) #define CFG_AUTO_CLK BIT(23) +#define CFG_ERR_ABORT BIT(27) #define SD_EMMC_STATUS 0x48 #define STATUS_BUSY BIT(31) +#define STATUS_DESC_BUSY BIT(30) #define STATUS_DATI GENMASK(23, 16) #define SD_EMMC_IRQ_EN 0x4c @@ -905,6 +908,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode); cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */ + cmd_cfg |= CMD_CFG_ERROR; /* stop in case of error */ meson_mmc_set_response_bits(cmd, &cmd_cfg); @@ -999,6 +1003,17 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) u32 irq_en, status, raw_status; irqreturn_t ret = IRQ_NONE; + irq_en = readl(host->regs + SD_EMMC_IRQ_EN); + raw_status = readl(host->regs + SD_EMMC_STATUS); + status = raw_status & irq_en; + + if (!status) { + dev_dbg(host->dev, + "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n", + irq_en, raw_status); + return IRQ_NONE; + } + if (WARN_ON(!host) || WARN_ON(!host->cmd)) return IRQ_NONE; @@ -1006,22 +1021,18 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) cmd = host->cmd; data = cmd->data; - irq_en = readl(host->regs + SD_EMMC_IRQ_EN); - raw_status = readl(host->regs + SD_EMMC_STATUS); - status = raw_status & irq_en; - cmd->error = 0; if (status & IRQ_CRC_ERR) { dev_dbg(host->dev, "CRC Error - status 0x%08x\n", status); cmd->error = -EILSEQ; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } if (status & IRQ_TIMEOUTS) { dev_dbg(host->dev, "Timeout - status 0x%08x\n", status); cmd->error = -ETIMEDOUT; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } @@ -1046,17 +1057,49 @@ out: /* ack all enabled interrupts */ writel(irq_en, host->regs + SD_EMMC_STATUS); + if (cmd->error) { + /* Stop desc in case of errors */ + u32 start = readl(host->regs + SD_EMMC_START); + + start &= ~START_DESC_BUSY; + writel(start, host->regs + SD_EMMC_START); + } + if (ret == IRQ_HANDLED) meson_mmc_request_done(host->mmc, cmd->mrq); - else if (ret == IRQ_NONE) - dev_warn(host->dev, - "Unexpected IRQ! status=0x%08x, irq_en=0x%08x\n", - raw_status, irq_en); spin_unlock(&host->lock); return ret; } +static int meson_mmc_wait_desc_stop(struct meson_host *host) +{ + int loop; + u32 status; + + /* + * It may sometimes take a while for it to actually halt. Here, we + * are giving it 5ms to comply + * + * If we don't confirm the descriptor is stopped, it might raise new + * IRQs after we have called mmc_request_done() which is bad. + */ + for (loop = 50; loop; loop--) { + status = readl(host->regs + SD_EMMC_STATUS); + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) + udelay(100); + else + break; + } + + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) { + dev_err(host->dev, "Timed out waiting for host to stop\n"); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) { struct meson_host *host = dev_id; @@ -1067,6 +1110,13 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (WARN_ON(!cmd)) return IRQ_NONE; + if (cmd->error) { + meson_mmc_wait_desc_stop(host); + meson_mmc_request_done(host->mmc, cmd->mrq); + + return IRQ_HANDLED; + } + data = cmd->data; if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; @@ -1107,6 +1157,9 @@ static void meson_mmc_cfg_init(struct meson_host *host) cfg |= FIELD_PREP(CFG_RC_CC_MASK, ilog2(SD_EMMC_CFG_CMD_GAP)); cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, ilog2(SD_EMMC_CFG_BLK_SIZE)); + /* abort chain on R/W errors */ + cfg |= CFG_ERR_ABORT; + writel(cfg, host->regs + SD_EMMC_CFG); } From 2faad660e05fc979be6d385b92f9e9e159d304d1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 16 Dec 2018 09:34:17 +0100 Subject: [PATCH 1224/3715] mtd: rawnand: sunxi: Write pageprog related opcodes to WCMD_SET [ Upstream commit 732774437ae01d9882e60314e303898e63c7f038 ] The opcodes used by the controller when doing batched page prog should be written in NFC_REG_WCMD_SET not FC_REG_RCMD_SET. Luckily, the default NFC_REG_WCMD_SET value matches the one we set in the driver which explains why we didn't notice the problem. Fixes: 614049a8d904 ("mtd: nand: sunxi: add support for DMA assisted operations") Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin --- drivers/mtd/nand/sunxi_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 958974821582..8e5231482397 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1435,7 +1435,7 @@ static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, sunxi_nfc_randomizer_enable(mtd); writel((NAND_CMD_RNDIN << 8) | NAND_CMD_PAGEPROG, - nfc->regs + NFC_REG_RCMD_SET); + nfc->regs + NFC_REG_WCMD_SET); dma_async_issue_pending(nfc->dmac); From fe6f3973ff0ac8a6bd72b944acac90a95fe1b4a4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 3 Dec 2018 10:20:32 -0500 Subject: [PATCH 1225/3715] btrfs: only track ref_heads in delayed_ref_updates [ Upstream commit 158ffa364bf723fa1ef128060646d23dc3942994 ] We use this number to figure out how many delayed refs to run, but __btrfs_run_delayed_refs really only checks every time we need a new delayed ref head, so we always run at least one ref head completely no matter what the number of items on it. Fix the accounting to only be adjusted when we add/remove a ref head. In addition to using this number to limit the number of delayed refs run, a future patch is also going to use it to calculate the amount of space required for delayed refs space reservation. Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/delayed-ref.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 93ffa898df6d..d56bd3625468 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -195,8 +195,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, ref->in_tree = 0; btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); - if (trans->delayed_ref_updates) - trans->delayed_ref_updates--; } static bool merge_ref(struct btrfs_trans_handle *trans, @@ -458,7 +456,6 @@ add_tail: if (ref->action == BTRFS_ADD_DELAYED_REF) list_add_tail(&ref->add_list, &href->ref_add_list); atomic_inc(&root->num_entries); - trans->delayed_ref_updates++; spin_unlock(&href->lock); return ret; } From ab8f3d980d5df972d577dbf1287de029187dbee2 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 22 Nov 2018 08:52:33 +0800 Subject: [PATCH 1226/3715] HID: intel-ish-hid: fixes incorrect error handling [ Upstream commit 6e0856d317440a950b17c00a9283114f025e5699 ] The memory chunk allocated by hid_allocate_device() should be released by hid_destroy_device(), not kfree(). Fixes: 0b28cb4bcb1("HID: intel-ish-hid: ISH HID client driver") Signed-off-by: Pan Bian Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ishtp-hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c index cd23903ddcf1..e918d78e541c 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid.c @@ -222,7 +222,7 @@ int ishtp_hid_probe(unsigned int cur_hid_dev, err_hid_device: kfree(hid_data); err_hid_data: - kfree(hid); + hid_destroy_device(hid); return rv; } From 6f8c923839721a9ea266230d99e2b9452efdf434 Mon Sep 17 00:00:00 2001 From: Darwin Dingel Date: Mon, 10 Dec 2018 11:29:09 +1300 Subject: [PATCH 1227/3715] serial: 8250: Rate limit serial port rx interrupts during input overruns [ Upstream commit 6d7f677a2afa1c82d7fc7af7f9159cbffd5dc010 ] When a serial port gets faulty or gets flooded with inputs, its interrupt handler starts to work double time to get the characters to the workqueue for the tty layer to handle them. When this busy time on the serial/tty subsystem happens during boot, where it is also busy on the userspace trying to initialise, some processes can continuously get preempted and will be on hold until the interrupts subside. The fix is to backoff on processing received characters for a specified amount of time when an input overrun is seen (received a new character before the previous one is processed). This only stops receive and will continue to transmit characters to serial port. After the backoff period is done, it receive will be re-enabled. This is optional and will only be enabled by setting 'overrun-throttle-ms' in the dts. Signed-off-by: Darwin Dingel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_core.c | 25 +++++++++++++++++++++++++ drivers/tty/serial/8250/8250_fsl.c | 23 ++++++++++++++++++++++- drivers/tty/serial/8250/8250_of.c | 5 +++++ include/linux/serial_8250.h | 4 ++++ 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index d29b512a7d9f..ceeea4b159c4 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -953,6 +953,21 @@ static struct uart_8250_port *serial8250_find_match_or_unused(struct uart_port * return NULL; } +static void serial_8250_overrun_backoff_work(struct work_struct *work) +{ + struct uart_8250_port *up = + container_of(to_delayed_work(work), struct uart_8250_port, + overrun_backoff); + struct uart_port *port = &up->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + up->port.read_status_mask |= UART_LSR_DR; + serial_out(up, UART_IER, up->ier); + spin_unlock_irqrestore(&port->lock, flags); +} + /** * serial8250_register_8250_port - register a serial port * @up: serial port template @@ -1063,6 +1078,16 @@ int serial8250_register_8250_port(struct uart_8250_port *up) ret = 0; } } + + /* Initialise interrupt backoff work if required */ + if (up->overrun_backoff_time_ms > 0) { + uart->overrun_backoff_time_ms = up->overrun_backoff_time_ms; + INIT_DELAYED_WORK(&uart->overrun_backoff, + serial_8250_overrun_backoff_work); + } else { + uart->overrun_backoff_time_ms = 0; + } + mutex_unlock(&serial_mutex); return ret; diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 910bfee5a88b..cc138c24ae88 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -48,8 +48,29 @@ int fsl8250_handle_irq(struct uart_port *port) lsr = orig_lsr = up->port.serial_in(&up->port, UART_LSR); - if (lsr & (UART_LSR_DR | UART_LSR_BI)) + /* Process incoming characters first */ + if ((lsr & (UART_LSR_DR | UART_LSR_BI)) && + (up->ier & (UART_IER_RLSI | UART_IER_RDI))) { lsr = serial8250_rx_chars(up, lsr); + } + + /* Stop processing interrupts on input overrun */ + if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { + unsigned long delay; + + up->ier = port->serial_in(port, UART_IER); + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { + /* Keep restarting the timer until + * the input overrun subsides. + */ + cancel_delayed_work(&up->overrun_backoff); + } + + delay = msecs_to_jiffies(up->overrun_backoff_time_ms); + schedule_delayed_work(&up->overrun_backoff, delay); + } serial8250_modem_status(up); diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index ec510e342e06..c51044ba503c 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -232,6 +232,11 @@ static int of_platform_serial_probe(struct platform_device *ofdev) if (of_property_read_bool(ofdev->dev.of_node, "auto-flow-control")) port8250.capabilities |= UART_CAP_AFE; + if (of_property_read_u32(ofdev->dev.of_node, + "overrun-throttle-ms", + &port8250.overrun_backoff_time_ms) != 0) + port8250.overrun_backoff_time_ms = 0; + ret = serial8250_register_8250_port(&port8250); if (ret < 0) goto err_dispose; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index a27ef5f56431..791a6be0e394 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -134,6 +134,10 @@ struct uart_8250_port { void (*dl_write)(struct uart_8250_port *, int); struct uart_8250_em485 *em485; + + /* Serial port overrun backoff */ + struct delayed_work overrun_backoff; + u32 overrun_backoff_time_ms; }; static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) From e8113c362adcb8735c81f80cf24d781708047ce1 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 10 Dec 2018 16:12:57 +0100 Subject: [PATCH 1228/3715] kprobes/x86/xen: blacklist non-attachable xen interrupt functions [ Upstream commit bf9445a33ae6ac2f0822d2f1ce1365408387d568 ] Blacklist symbols in Xen probe-prohibited areas, so that user can see these prohibited symbols in debugfs. See also: a50480cb6d61. Signed-off-by: Andrea Righi Acked-by: Masami Hiramatsu Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- arch/x86/xen/xen-asm_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 3a6feed76dfc..a93d8a7cef26 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -24,6 +25,7 @@ ENTRY(xen_\name) pop %r11 jmp \name END(xen_\name) +_ASM_NOKPROBE(xen_\name) .endm xen_pv_trap divide_error From e50bf6849f4de99385ba6bbc548cc0ecc86a5cdb Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 14 Dec 2018 12:55:45 +0000 Subject: [PATCH 1229/3715] xen/pciback: Check dev_data before using it [ Upstream commit 1669907e3d1abfa3f7586e2d55dbbc117b5adba2 ] If pcistub_init_device fails, the release function will be called with dev_data set to NULL. Check it before using it to avoid a NULL pointer dereference. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pci_stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 9e480fdebe1f..8c250f4a3a97 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -106,7 +106,8 @@ static void pcistub_device_release(struct kref *kref) * is called from "unbind" which takes a device_lock mutex. */ __pci_reset_function_locked(dev); - if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) + if (dev_data && + pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) dev_info(&dev->dev, "Could not reload PCI state\n"); else pci_restore_state(dev); From a509ddd2fb1b2bdcbe4596651c68a6eac594ea04 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 19 Oct 2018 11:04:27 -0700 Subject: [PATCH 1230/3715] vfio-mdev/samples: Use u8 instead of char for handle functions [ Upstream commit 8ba35b3a0046d6573c98f00461d9bd1b86250d35 ] Clang warns: samples/vfio-mdev/mtty.c:592:39: warning: implicit conversion from 'int' to 'char' changes value from 162 to -94 [-Wconstant-conversion] *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~ 1 warning generated. Turns out that all uses of buf in this function ultimately end up stored or cast to an unsigned type. Just use u8, which has the same number of bits but can store this larger number so Clang no longer warns. Signed-off-by: Nathan Chancellor Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- samples/vfio-mdev/mtty.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index ca495686b9c3..f8c7249fa705 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -171,7 +171,7 @@ static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) return NULL; } -void dump_buffer(char *buf, uint32_t count) +void dump_buffer(u8 *buf, uint32_t count) { #if defined(DEBUG) int i; @@ -250,7 +250,7 @@ static void mtty_create_config_space(struct mdev_state *mdev_state) } static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, - char *buf, u32 count) + u8 *buf, u32 count) { u32 cfg_addr, bar_mask, bar_index = 0; @@ -304,7 +304,7 @@ static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, } static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, - u16 offset, char *buf, u32 count) + u16 offset, u8 *buf, u32 count) { u8 data = *buf; @@ -475,7 +475,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, } static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, - u16 offset, char *buf, u32 count) + u16 offset, u8 *buf, u32 count) { /* Handle read requests by guest */ switch (offset) { @@ -650,7 +650,7 @@ static void mdev_read_base(struct mdev_state *mdev_state) } } -static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, +static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count, loff_t pos, bool is_write) { struct mdev_state *mdev_state; @@ -698,7 +698,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, #if defined(DEBUG_REGS) pr_info("%s: BAR%d WR @0x%llx %s val:0x%02x dlab:%d\n", __func__, index, offset, wr_reg[offset], - (u8)*buf, mdev_state->s[index].dlab); + *buf, mdev_state->s[index].dlab); #endif handle_bar_write(index, mdev_state, offset, buf, count); } else { @@ -708,7 +708,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, #if defined(DEBUG_REGS) pr_info("%s: BAR%d RD @0x%llx %s val:0x%02x dlab:%d\n", __func__, index, offset, rd_reg[offset], - (u8)*buf, mdev_state->s[index].dlab); + *buf, mdev_state->s[index].dlab); #endif } break; @@ -827,7 +827,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -839,7 +839,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -851,7 +851,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, } else { u8 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -889,7 +889,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -901,7 +901,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -913,7 +913,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; From db94533f587d7770f1fc6f8659795fbff50fa28a Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Fri, 14 Dec 2018 08:48:25 +0100 Subject: [PATCH 1231/3715] pinctrl: xway: fix gpio-hog related boot issues [ Upstream commit 9b4924da4711674e62d97d4f5360446cc78337af ] This patch is based on commit a86caa9ba5d7 ("pinctrl: msm: fix gpio-hog related boot issues"). It fixes the issue that the gpio ranges needs to be defined before gpiochip_add(). Therefore, we also have to swap the order of registering the pinctrl driver and registering the gpio chip. You also have to add the "gpio-ranges" property to the pinctrl device node to get it finally working. Signed-off-by: Martin Schiller Acked-by: John Crispin Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-xway.c | 39 +++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index f9e98a7d4f0c..1b0c5958c56a 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1748,14 +1748,6 @@ static int pinmux_xway_probe(struct platform_device *pdev) } xway_pctrl_desc.pins = xway_info.pads; - /* register the gpio chip */ - xway_chip.parent = &pdev->dev; - ret = devm_gpiochip_add_data(&pdev->dev, &xway_chip, NULL); - if (ret) { - dev_err(&pdev->dev, "Failed to register gpio chip\n"); - return ret; - } - /* setup the data needed by pinctrl */ xway_pctrl_desc.name = dev_name(&pdev->dev); xway_pctrl_desc.npins = xway_chip.ngpio; @@ -1777,10 +1769,33 @@ static int pinmux_xway_probe(struct platform_device *pdev) return ret; } - /* finish with registering the gpio range in pinctrl */ - xway_gpio_range.npins = xway_chip.ngpio; - xway_gpio_range.base = xway_chip.base; - pinctrl_add_gpio_range(xway_info.pctrl, &xway_gpio_range); + /* register the gpio chip */ + xway_chip.parent = &pdev->dev; + xway_chip.owner = THIS_MODULE; + xway_chip.of_node = pdev->dev.of_node; + ret = devm_gpiochip_add_data(&pdev->dev, &xway_chip, NULL); + if (ret) { + dev_err(&pdev->dev, "Failed to register gpio chip\n"); + return ret; + } + + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pdev->dev.of_node, "gpio-ranges")) { + /* finish with registering the gpio range in pinctrl */ + xway_gpio_range.npins = xway_chip.ngpio; + xway_gpio_range.base = xway_chip.base; + pinctrl_add_gpio_range(xway_info.pctrl, &xway_gpio_range); + } + dev_info(&pdev->dev, "Init done\n"); return 0; } From f2fce585d55ca4b530bc4372f66bfed23abca225 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 13 Dec 2018 13:15:11 +0200 Subject: [PATCH 1232/3715] net/mlx5: Continue driver initialization despite debugfs failure [ Upstream commit 199fa087dc6b503baad06712716fac645a983e8a ] The failure to create debugfs entry is unpleasant event, but not enough to abort drier initialization. Align the mlx5_core code to debugfs design and continue execution whenever debugfs_create_dir() successes or not. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 97874c2568fc..1ac0e173da12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -838,11 +838,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); - priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); - return -ENOMEM; - } + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); err = mlx5_pci_enable_device(dev); if (err) { From 3cc434913830c35ef2bd9f467f15763a577c3bbc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Nov 2018 22:26:42 -0500 Subject: [PATCH 1233/3715] exofs_mount(): fix leaks on failure exits [ Upstream commit 26cb5a328c6b2bda9e859307ce4cfc60df3a2c28 ] ... and don't abuse mount_nodev(), while we are at it. Signed-off-by: Al Viro Reviewed-by: David Howells Signed-off-by: Sasha Levin --- fs/exofs/super.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index c9ec652e2fcd..881d5798a181 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -702,21 +702,18 @@ out: /* * Read the superblock from the OSD and fill in the fields */ -static int exofs_fill_super(struct super_block *sb, void *data, int silent) +static int exofs_fill_super(struct super_block *sb, + struct exofs_mountopt *opts, + struct exofs_sb_info *sbi, + int silent) { struct inode *root; - struct exofs_mountopt *opts = data; - struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ struct ore_comp comp; unsigned table_count; int ret; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - /* use mount options to fill superblock */ if (opts->is_osdname) { struct osd_dev_info odi = {.systemid_len = 0}; @@ -860,7 +857,9 @@ static struct dentry *exofs_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { + struct super_block *s; struct exofs_mountopt opts; + struct exofs_sb_info *sbi; int ret; ret = parse_options(data, &opts); @@ -869,9 +868,31 @@ static struct dentry *exofs_mount(struct file_system_type *type, return ERR_PTR(ret); } + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + kfree(opts.dev_name); + return ERR_PTR(-ENOMEM); + } + + s = sget(type, NULL, set_anon_super, flags, NULL); + + if (IS_ERR(s)) { + kfree(opts.dev_name); + kfree(sbi); + return ERR_CAST(s); + } + if (!opts.dev_name) opts.dev_name = dev_name; - return mount_nodev(type, flags, &opts, exofs_fill_super); + + + ret = exofs_fill_super(s, &opts, sbi, flags & SB_SILENT ? 1 : 0); + if (ret) { + deactivate_locked_super(s); + return ERR_PTR(ret); + } + s->s_flags |= SB_ACTIVE; + return dget(s->s_root); } /* From 086d895a99a1b03c4dc3875a8e6f2b001b4a529e Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 16 Dec 2018 18:46:28 -0500 Subject: [PATCH 1234/3715] bnxt_en: Return linux standard errors in bnxt_ethtool.c [ Upstream commit 7c675421afef18253a86ffc383f57bc15ef32ea8 ] Currently firmware specific errors are returned directly in flash_device and reset ethtool hooks. Modify it to return linux standard errors to userspace when flashing operations fail. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 56 +++++++++++++------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a22336fef66b..4879371ad0c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1339,14 +1339,22 @@ static int bnxt_flash_nvram(struct net_device *dev, rc = hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT); dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle); + if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to flash the device\n"); + rc = -EACCES; + } else if (rc) { + rc = -EIO; + } return rc; } static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type) { - struct bnxt *bp = netdev_priv(dev); struct hwrm_fw_reset_input req = {0}; + struct bnxt *bp = netdev_priv(dev); + int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1); @@ -1380,7 +1388,15 @@ static int bnxt_firmware_reset(struct net_device *dev, return -EINVAL; } - return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to reset the device\n"); + rc = -EACCES; + } else if (rc) { + rc = -EIO; + } + return rc; } static int bnxt_flash_firmware(struct net_device *dev, @@ -1587,9 +1603,9 @@ static int bnxt_flash_package_from_file(struct net_device *dev, struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_nvm_install_update_input install = {0}; const struct firmware *fw; + int rc, hwrm_err = 0; u32 item_len; u16 index; - int rc; bnxt_hwrm_fw_set_time(bp); @@ -1632,15 +1648,16 @@ static int bnxt_flash_package_from_file(struct net_device *dev, memcpy(kmem, fw->data, fw->size); modify.host_src_addr = cpu_to_le64(dma_handle); - rc = hwrm_send_message(bp, &modify, sizeof(modify), - FLASH_PACKAGE_TIMEOUT); + hwrm_err = hwrm_send_message(bp, &modify, + sizeof(modify), + FLASH_PACKAGE_TIMEOUT); dma_free_coherent(&bp->pdev->dev, fw->size, kmem, dma_handle); } } release_firmware(fw); - if (rc) - return rc; + if (rc || hwrm_err) + goto err_exit; if ((install_type & 0xffff) == 0) install_type >>= 16; @@ -1648,12 +1665,10 @@ static int bnxt_flash_package_from_file(struct net_device *dev, install.install_type = cpu_to_le32(install_type); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (rc) { - rc = -EOPNOTSUPP; + hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (hwrm_err) goto flash_pkg_exit; - } if (resp->error_code) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; @@ -1661,12 +1676,11 @@ static int bnxt_flash_package_from_file(struct net_device *dev, if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); - rc = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (rc) { - rc = -EOPNOTSUPP; + hwrm_err = _hwrm_send_message(bp, &install, + sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (hwrm_err) goto flash_pkg_exit; - } } } @@ -1677,6 +1691,14 @@ static int bnxt_flash_package_from_file(struct net_device *dev, } flash_pkg_exit: mutex_unlock(&bp->hwrm_cmd_lock); +err_exit: + if (hwrm_err == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to flash the device\n"); + rc = -EACCES; + } else if (hwrm_err) { + rc = -EOPNOTSUPP; + } return rc; } From 2135588adb3cd6660ed8d5f0815607f1bf768165 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 16 Dec 2018 18:46:31 -0500 Subject: [PATCH 1235/3715] bnxt_en: query force speeds before disabling autoneg mode. [ Upstream commit 56d374624778652d2a999e18c87a25338b127b41 ] With autoneg enabled, PHY loopback test fails. To disable autoneg, driver needs to send a valid forced speed to FW. FW is not sending async event for invalid speeds. To fix this, query forced speeds and send the correct speed when disabling autoneg mode. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4879371ad0c7..fc8e185718a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2258,17 +2258,37 @@ static int bnxt_hwrm_mac_loopback(struct bnxt *bp, bool enable) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } +static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds) +{ + struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_port_phy_qcaps_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *force_speeds = le16_to_cpu(resp->supported_speeds_force_mode); + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_disable_an_for_lpbk(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req) { struct bnxt_link_info *link_info = &bp->link_info; - u16 fw_advertising = link_info->advertising; + u16 fw_advertising; u16 fw_speed; int rc; if (!link_info->autoneg) return 0; + rc = bnxt_query_force_speeds(bp, &fw_advertising); + if (rc) + return rc; + fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB; if (netif_carrier_ok(bp->dev)) fw_speed = bp->link_info.link_speed; From cdf2e054bb9ddbcbf5d11567bb36802381fecede Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 30 Nov 2018 15:32:06 +0100 Subject: [PATCH 1236/3715] KVM: s390: unregister debug feature on failing arch init [ Upstream commit 308c3e6673b012beecb96ef04cc65f4a0e7cdd99 ] Make sure the debug feature and its allocated resources get released upon unsuccessful architecture initialization. A related indication of the issue will be reported as kernel message. Signed-off-by: Michael Mueller Reviewed-by: Cornelia Huck Reviewed-by: Pierre Morel Reviewed-by: David Hildenbrand Message-Id: <20181130143215.69496-2-mimu@linux.ibm.com> Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ff62a4fe2159..91c24e87fe10 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -361,19 +361,30 @@ static void kvm_s390_cpu_feat_init(void) int kvm_arch_init(void *opaque) { + int rc; + kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); if (!kvm_s390_dbf) return -ENOMEM; if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { - debug_unregister(kvm_s390_dbf); - return -ENOMEM; + rc = -ENOMEM; + goto out_debug_unreg; } kvm_s390_cpu_feat_init(); /* Register floating interrupt controller interface. */ - return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); + rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); + if (rc) { + pr_err("Failed to register FLIC rc=%d\n", rc); + goto out_debug_unreg; + } + return 0; + +out_debug_unreg: + debug_unregister(kvm_s390_dbf); + return rc; } void kvm_arch_exit(void) From 9714561e561d8a7c3c8c65f3743e617bed9a686f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Dec 2018 14:09:56 +0100 Subject: [PATCH 1237/3715] pinctrl: sh-pfc: sh7264: Fix PFCR3 and PFCR0 register configuration [ Upstream commit 1b99d0c80bbe1810572c2cb77b90f67886adfa8d ] The Port F Control Register 3 (PFCR3) contains only a single field. However, counting from left to right, it is the fourth field, not the first field. Insert the missing dummy configuration values (3 fields of 16 values) to fix this. The descriptor for the Port F Control Register 0 (PFCR0) lacks the description for the 4th field (PF0 Mode, PF0MD[2:0]). Add the missing configuration values to fix this. Fixes: a8d42fc4217b1ea1 ("sh-pfc: Add sh7264 pinmux support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7264.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index 8070765311db..e1c34e19222e 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -1716,6 +1716,9 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { }, { PINMUX_CFG_REG("PFCR3", 0xfffe38a8, 16, 4) { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PF12MD_000, PF12MD_001, 0, PF12MD_011, PF12MD_100, PF12MD_101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } @@ -1759,8 +1762,10 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { 0, 0, 0, 0, 0, 0, 0, 0, PF1MD_000, PF1MD_001, PF1MD_010, PF1MD_011, PF1MD_100, PF1MD_101, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - } + 0, 0, 0, 0, 0, 0, 0, 0, + PF0MD_000, PF0MD_001, PF0MD_010, PF0MD_011, + PF0MD_100, PF0MD_101, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 } }, { PINMUX_CFG_REG("PFIOR0", 0xfffe38b2, 16, 1) { From 449c108f09d83bd2e57f11a240213f3320e9edb0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Dec 2018 14:41:11 +0100 Subject: [PATCH 1238/3715] pinctrl: sh-pfc: sh7734: Fix shifted values in IPSR10 [ Upstream commit 054f2400f706327f96770219c3065b5131f8f154 ] Some values in the Peripheral Function Select Register 10 descriptor are shifted by one position, which may cause a peripheral function to be programmed incorrectly. Fixing this makes all HSCIF0 pins use Function 4 (value 3), like was already the case for the HSCK0 pin in field IP10[5:3]. Fixes: ac1ebc2190f575fc ("sh-pfc: Add sh7734 pinmux support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7734.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c index 6502e676d368..33232041ee86 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c @@ -2213,22 +2213,22 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* IP10_22 [1] */ FN_CAN_CLK_A, FN_RX4_D, /* IP10_21_19 [3] */ - FN_AUDIO_CLKOUT, FN_TX1_E, FN_HRTS0_C, FN_FSE_B, - FN_LCD_M_DISP_B, 0, 0, 0, + FN_AUDIO_CLKOUT, FN_TX1_E, 0, FN_HRTS0_C, FN_FSE_B, + FN_LCD_M_DISP_B, 0, 0, /* IP10_18_16 [3] */ - FN_AUDIO_CLKC, FN_SCK1_E, FN_HCTS0_C, FN_FRB_B, - FN_LCD_VEPWC_B, 0, 0, 0, + FN_AUDIO_CLKC, FN_SCK1_E, 0, FN_HCTS0_C, FN_FRB_B, + FN_LCD_VEPWC_B, 0, 0, /* IP10_15 [1] */ FN_AUDIO_CLKB_A, FN_LCD_CLK_B, /* IP10_14_12 [3] */ FN_AUDIO_CLKA_A, FN_VI1_CLK_B, FN_SCK1_D, FN_IECLK_B, FN_LCD_FLM_B, 0, 0, 0, /* IP10_11_9 [3] */ - FN_SSI_SDATA3, FN_VI1_7_B, FN_HTX0_C, FN_FWE_B, - FN_LCD_CL2_B, 0, 0, 0, + FN_SSI_SDATA3, FN_VI1_7_B, 0, FN_HTX0_C, FN_FWE_B, + FN_LCD_CL2_B, 0, 0, /* IP10_8_6 [3] */ - FN_SSI_SDATA2, FN_VI1_6_B, FN_HRX0_C, FN_FRE_B, - FN_LCD_CL1_B, 0, 0, 0, + FN_SSI_SDATA2, FN_VI1_6_B, 0, FN_HRX0_C, FN_FRE_B, + FN_LCD_CL1_B, 0, 0, /* IP10_5_3 [3] */ FN_SSI_WS23, FN_VI1_5_B, FN_TX1_D, FN_HSCK0_C, FN_FALE_B, FN_LCD_DON_B, 0, 0, 0, From 0b2db05c8e561a53494fcac016d145dcfaa4fa7f Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Thu, 13 Dec 2018 11:28:51 +1000 Subject: [PATCH 1239/3715] HID: doc: fix wrong data structure reference for UHID_OUTPUT [ Upstream commit 46b14eef59a8157138dc02f916a7f97c73b3ec53 ] Signed-off-by: Peter Hutterer Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- Documentation/hid/uhid.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hid/uhid.txt b/Documentation/hid/uhid.txt index c8656dd029a9..958fff945304 100644 --- a/Documentation/hid/uhid.txt +++ b/Documentation/hid/uhid.txt @@ -160,7 +160,7 @@ them but you should handle them according to your needs. UHID_OUTPUT: This is sent if the HID device driver wants to send raw data to the I/O device on the interrupt channel. You should read the payload and forward it to - the device. The payload is of type "struct uhid_data_req". + the device. The payload is of type "struct uhid_output_req". This may be received even though you haven't received UHID_OPEN, yet. UHID_GET_REPORT: From c6cd92fcabd6cc78bb1808c6a18245c842722fc1 Mon Sep 17 00:00:00 2001 From: Sweet Tea Date: Tue, 13 Nov 2018 08:04:24 -0500 Subject: [PATCH 1240/3715] dm flakey: Properly corrupt multi-page bios. [ Upstream commit a00f5276e26636cbf72f24f79831026d2e2868e7 ] The flakey target is documented to be able to corrupt the Nth byte in a bio, but does not corrupt byte indices after the first biovec in the bio. Change the corrupting function to actually corrupt the Nth byte no matter in which biovec that index falls. A test device generating two-page bios, atop a flakey device configured to corrupt a byte index on the second page, verified both the failure to corrupt before this patch and the expected corruption after this change. Signed-off-by: John Dorminy Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-flakey.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 0c1ef63c3461..b1b68e01b889 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -282,20 +282,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) { - unsigned bio_bytes = bio_cur_bytes(bio); - char *data = bio_data(bio); + unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; + + struct bvec_iter iter; + struct bio_vec bvec; + + if (!bio_has_data(bio)) + return; /* - * Overwrite the Nth byte of the data returned. + * Overwrite the Nth byte of the bio's data, on whichever page + * it falls. */ - if (data && bio_bytes >= fc->corrupt_bio_byte) { - data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; - - DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " - "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", - bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, - (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, bio_bytes); + bio_for_each_segment(bvec, bio, iter) { + if (bio_iter_len(bio, iter) > corrupt_bio_byte) { + char *segment = (page_address(bio_iter_page(bio, iter)) + + bio_iter_offset(bio, iter)); + segment[corrupt_bio_byte] = fc->corrupt_bio_value; + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + break; + } + corrupt_bio_byte -= bio_iter_len(bio, iter); } } From fff825f248888723a2aec73d031754098bf3ebf1 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 18 Dec 2018 08:29:32 -0600 Subject: [PATCH 1241/3715] gfs2: take jdata unstuff into account in do_grow [ Upstream commit bc0205612bbd4dd4026d4ba6287f5643c37366ec ] Before this patch, function do_grow would not reserve enough journal blocks in the transaction to unstuff jdata files while growing them. This patch adds the logic to add one more block if the file to grow is jdata. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/bmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 09432b25fe9b..b3a1b16d4e3e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1445,6 +1445,8 @@ static int do_grow(struct inode *inode, u64 size) } error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + + (unstuff && + gfs2_is_jdata(ip) ? RES_JDATA : 0) + (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 0 : RES_QUOTA), 0); if (error) From 05ec56052bb9f14986b3ff0b05518f2a91874c0f Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Mon, 17 Dec 2018 09:35:27 -0800 Subject: [PATCH 1242/3715] xfs: Align compat attrlist_by_handle with native implementation. [ Upstream commit c456d64449efe37da50832b63d91652a85ea1d20 ] While inspecting the ioctl implementations, I noticed that the compat implementation of XFS_IOC_ATTRLIST_BY_HANDLE does not do exactly the same thing as the native implementation. Specifically, the "cursor" does not appear to be written out to userspace on the compat path, like it is on the native path. This adjusts the compat implementation to copy out the cursor just like the native implementation does. The attrlist cursor does not require any special compat handling. This fixes xfstests xfs/269 on both IA-32 and x32 userspace, when running on an amd64 kernel. Signed-off-by: Nick Bowler Fixes: 0facef7fb053b ("xfs: in _attrlist_by_handle, copy the cursor back to userspace") Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/xfs_ioctl32.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index fa0bc4d46065..d3c0e4b8bf42 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -347,6 +347,7 @@ xfs_compat_attrlist_by_handle( { int error; attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t __user *p = arg; compat_xfs_fsop_attrlist_handlereq_t al_hreq; struct dentry *dentry; char *kbuf; @@ -381,6 +382,11 @@ xfs_compat_attrlist_by_handle( if (error) goto out_kfree; + if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + error = -EFAULT; + goto out_kfree; + } + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) error = -EFAULT; From 1cc7f4c628d92275572683219ea5e34580bd83bd Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Mon, 17 Dec 2018 09:35:27 -0800 Subject: [PATCH 1243/3715] xfs: Fix bulkstat compat ioctls on x32 userspace. [ Upstream commit 7ca860e3c1a74ad6bd8949364073ef1044cad758 ] The bulkstat family of ioctls are problematic on x32, because there is a mixup of native 32-bit and 64-bit conventions. The xfs_fsop_bulkreq struct contains pointers and 32-bit integers so that matches the native 32-bit layout, and that means the ioctl implementation goes into the regular compat path on x32. However, the 'ubuffer' member of that struct in turn refers to either struct xfs_inogrp or xfs_bstat (or an array of these). On x32, those structures match the native 64-bit layout. The compat implementation writes out the 32-bit version of these structures. This is not the expected format for x32 userspace, causing problems. Fortunately the functions which actually output these xfs_inogrp and xfs_bstat structures have an easy way to select which output format is required, so we just need a little tweak to select the right format on x32. Signed-off-by: Nick Bowler Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/xfs_ioctl32.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index d3c0e4b8bf42..5f616a6a5358 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -252,6 +252,32 @@ xfs_compat_ioc_bulkstat( int done; int error; + /* + * Output structure handling functions. Depending on the command, + * either the xfs_bstat and xfs_inogrp structures are written out + * to userpace memory via bulkreq.ubuffer. Normally the compat + * functions and structure size are the correct ones to use ... + */ + inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; + bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; + size_t bs_one_size = sizeof(struct compat_xfs_bstat); + +#ifdef CONFIG_X86_X32 + if (in_x32_syscall()) { + /* + * ... but on x32 the input xfs_fsop_bulkreq has pointers + * which must be handled in the "compat" (32-bit) way, while + * the xfs_bstat and xfs_inogrp structures follow native 64- + * bit layout convention. So adjust accordingly, otherwise + * the data written out in compat layout will not match what + * x32 userspace expects. + */ + inumbers_func = xfs_inumbers_fmt; + bs_one_func = xfs_bulkstat_one; + bs_one_size = sizeof(struct xfs_bstat); + } +#endif + /* done = 1 if there are more stats to get and if bulkstat */ /* should be called again (unused here, but used in dmapi) */ @@ -283,15 +309,15 @@ xfs_compat_ioc_bulkstat( if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt_compat); + bulkreq.ubuffer, inumbers_func); } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { int res; - error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), NULL, &res); + error = bs_one_func(mp, inlast, bulkreq.ubuffer, + bs_one_size, NULL, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bs_one_func, bs_one_size, bulkreq.ubuffer, &done); } else error = -EINVAL; From 652904f9337fd43e15437b91cd9a26d1d0a49676 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Dec 2018 10:05:36 +0300 Subject: [PATCH 1244/3715] IB/qib: Fix an error code in qib_sdma_verbs_send() [ Upstream commit 5050ae5fa3d54c8e83e1e447cc7e3591110a7f57 ] We accidentally return success on this error path. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qib/qib_sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 891873b38a1e..5f3f197678b7 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -600,8 +600,10 @@ retry: dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ From befe38cd27eacd0e77c760183fc526f3806f9fde Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 3 Oct 2018 14:53:50 -0700 Subject: [PATCH 1245/3715] clocksource/drivers/fttmr010: Fix invalid interrupt register access [ Upstream commit 86fe57fc47b17b3528fa5497fc57e158d846c4ea ] TIMER_INTR_MASK register (Base Address of Timer + 0x38) is not designed for masking interrupts on ast2500 chips, and it's not even listed in ast2400 datasheet, so it's not safe to access TIMER_INTR_MASK on aspeed chips. Similarly, TIMER_INTR_STATE register (Base Address of Timer + 0x34) is not interrupt status register on ast2400 and ast2500 chips. Although there is no side effect to reset the register in fttmr010_common_init(), it's just misleading to do so. Besides, "count_down" is renamed to "is_aspeed" in "fttmr010" structure, and more comments are added so the code is more readble. Signed-off-by: Tao Ren Reviewed-by: Linus Walleij Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/timer-fttmr010.c | 73 ++++++++++++++++------------ 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c index cdfe1c82f3f0..3928f3999015 100644 --- a/drivers/clocksource/timer-fttmr010.c +++ b/drivers/clocksource/timer-fttmr010.c @@ -21,7 +21,7 @@ #include /* - * Register definitions for the timers + * Register definitions common for all the timer variants. */ #define TIMER1_COUNT (0x00) #define TIMER1_LOAD (0x04) @@ -36,9 +36,10 @@ #define TIMER3_MATCH1 (0x28) #define TIMER3_MATCH2 (0x2c) #define TIMER_CR (0x30) -#define TIMER_INTR_STATE (0x34) -#define TIMER_INTR_MASK (0x38) +/* + * Control register (TMC30) bit fields for fttmr010/gemini/moxart timers. + */ #define TIMER_1_CR_ENABLE BIT(0) #define TIMER_1_CR_CLOCK BIT(1) #define TIMER_1_CR_INT BIT(2) @@ -53,8 +54,9 @@ #define TIMER_3_CR_UPDOWN BIT(11) /* - * The Aspeed AST2400 moves bits around in the control register - * and lacks bits for setting the timer to count upwards. + * Control register (TMC30) bit fields for aspeed ast2400/ast2500 timers. + * The aspeed timers move bits around in the control register and lacks + * bits for setting the timer to count upwards. */ #define TIMER_1_CR_ASPEED_ENABLE BIT(0) #define TIMER_1_CR_ASPEED_CLOCK BIT(1) @@ -66,6 +68,18 @@ #define TIMER_3_CR_ASPEED_CLOCK BIT(9) #define TIMER_3_CR_ASPEED_INT BIT(10) +/* + * Interrupt status/mask register definitions for fttmr010/gemini/moxart + * timers. + * The registers don't exist and they are not needed on aspeed timers + * because: + * - aspeed timer overflow interrupt is controlled by bits in Control + * Register (TMC30). + * - aspeed timers always generate interrupt when either one of the + * Match registers equals to Status register. + */ +#define TIMER_INTR_STATE (0x34) +#define TIMER_INTR_MASK (0x38) #define TIMER_1_INT_MATCH1 BIT(0) #define TIMER_1_INT_MATCH2 BIT(1) #define TIMER_1_INT_OVERFLOW BIT(2) @@ -80,7 +94,7 @@ struct fttmr010 { void __iomem *base; unsigned int tick_rate; - bool count_down; + bool is_aspeed; u32 t1_enable_val; struct clock_event_device clkevt; #ifdef CONFIG_ARM @@ -130,7 +144,7 @@ static int fttmr010_timer_set_next_event(unsigned long cycles, cr &= ~fttmr010->t1_enable_val; writel(cr, fttmr010->base + TIMER_CR); - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { /* * ASPEED Timer Controller will load TIMER1_LOAD register * into TIMER1_COUNT register when the timer is re-enabled. @@ -175,16 +189,17 @@ static int fttmr010_timer_set_oneshot(struct clock_event_device *evt) /* Setup counter start from 0 or ~0 */ writel(0, fttmr010->base + TIMER1_COUNT); - if (fttmr010->count_down) + if (fttmr010->is_aspeed) { writel(~0, fttmr010->base + TIMER1_LOAD); - else + } else { writel(0, fttmr010->base + TIMER1_LOAD); - /* Enable interrupt */ - cr = readl(fttmr010->base + TIMER_INTR_MASK); - cr &= ~(TIMER_1_INT_OVERFLOW | TIMER_1_INT_MATCH2); - cr |= TIMER_1_INT_MATCH1; - writel(cr, fttmr010->base + TIMER_INTR_MASK); + /* Enable interrupt */ + cr = readl(fttmr010->base + TIMER_INTR_MASK); + cr &= ~(TIMER_1_INT_OVERFLOW | TIMER_1_INT_MATCH2); + cr |= TIMER_1_INT_MATCH1; + writel(cr, fttmr010->base + TIMER_INTR_MASK); + } return 0; } @@ -201,9 +216,8 @@ static int fttmr010_timer_set_periodic(struct clock_event_device *evt) writel(cr, fttmr010->base + TIMER_CR); /* Setup timer to fire at 1/HZ intervals. */ - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { writel(period, fttmr010->base + TIMER1_LOAD); - writel(0, fttmr010->base + TIMER1_MATCH1); } else { cr = 0xffffffff - (period - 1); writel(cr, fttmr010->base + TIMER1_COUNT); @@ -281,23 +295,21 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) } /* - * The Aspeed AST2400 moves bits around in the control register, - * otherwise it works the same. + * The Aspeed timers move bits around in the control register. */ if (is_aspeed) { fttmr010->t1_enable_val = TIMER_1_CR_ASPEED_ENABLE | TIMER_1_CR_ASPEED_INT; - /* Downward not available */ - fttmr010->count_down = true; + fttmr010->is_aspeed = true; } else { fttmr010->t1_enable_val = TIMER_1_CR_ENABLE | TIMER_1_CR_INT; - } - /* - * Reset the interrupt mask and status - */ - writel(TIMER_INT_ALL_MASK, fttmr010->base + TIMER_INTR_MASK); - writel(0, fttmr010->base + TIMER_INTR_STATE); + /* + * Reset the interrupt mask and status + */ + writel(TIMER_INT_ALL_MASK, fttmr010->base + TIMER_INTR_MASK); + writel(0, fttmr010->base + TIMER_INTR_STATE); + } /* * Enable timer 1 count up, timer 2 count up, except on Aspeed, @@ -306,9 +318,8 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) if (is_aspeed) val = TIMER_2_CR_ASPEED_ENABLE; else { - val = TIMER_2_CR_ENABLE; - if (!fttmr010->count_down) - val |= TIMER_1_CR_UPDOWN | TIMER_2_CR_UPDOWN; + val = TIMER_2_CR_ENABLE | TIMER_1_CR_UPDOWN | + TIMER_2_CR_UPDOWN; } writel(val, fttmr010->base + TIMER_CR); @@ -321,7 +332,7 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) writel(0, fttmr010->base + TIMER2_MATCH1); writel(0, fttmr010->base + TIMER2_MATCH2); - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { writel(~0, fttmr010->base + TIMER2_LOAD); clocksource_mmio_init(fttmr010->base + TIMER2_COUNT, "FTTMR010-TIMER2", @@ -371,7 +382,7 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) #ifdef CONFIG_ARM /* Also use this timer for delays */ - if (fttmr010->count_down) + if (fttmr010->is_aspeed) fttmr010->delay_timer.read_current_timer = fttmr010_read_current_timer_down; else From 9ab07796de822dced31e4522ee1b4691b14c3cf9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:16:00 +0000 Subject: [PATCH 1246/3715] vxlan: Fix error path in __vxlan_dev_create() [ Upstream commit 6db9246871394b3a136cd52001a0763676563840 ] When a failure occurs in rtnl_configure_link(), the current code calls unregister_netdevice() to roll back the earlier call to register_netdevice(), and jumps to errout, which calls vxlan_fdb_destroy(). However unregister_netdevice() calls transitively ndo_uninit, which is vxlan_uninit(), and that already takes care of deleting the default FDB entry by calling vxlan_fdb_delete_default(). Since the entry added earlier in __vxlan_dev_create() is exactly the default entry, the cleanup code in the errout block always leads to double free and thus a panic. Besides, since vxlan_fdb_delete_default() always destroys the FDB entry with notification enabled, the deletion of the default entry is notified even before the addition was notified. Instead, move the unregister_netdevice() call after the manual destroy, which solves both problems. Fixes: 0241b836732f ("vxlan: fix default fdb entry netlink notify ordering during netdev create") Signed-off-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6d26bbd190dd..153a81ece9fe 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3217,6 +3217,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f = NULL; + bool unregister = false; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3242,12 +3243,11 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (err) goto errout; + unregister = true; err = rtnl_configure_link(dev, NULL); - if (err) { - unregister_netdevice(dev); + if (err) goto errout; - } /* notify default fdb entry */ if (f) @@ -3255,9 +3255,16 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, list_add(&vxlan->next, &vn->vxlan_list); return 0; + errout: + /* unregister_netdevice() destroys the default FDB entry with deletion + * notification. But the addition notification was not sent yet, so + * destroy the entry by hand here. + */ if (f) vxlan_fdb_destroy(vxlan, f, false); + if (unregister) + unregister_netdevice(dev); return err; } From 2585fe4893a0c4c18f0727a8185c5281c8a7c59e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Nov 2018 17:27:42 +0000 Subject: [PATCH 1247/3715] powerpc/book3s/32: fix number of bats in p/v_block_mapped() [ Upstream commit e93ba1b7eb5b188c749052df7af1c90821c5f320 ] This patch fixes the loop in p_block_mapped() and v_block_mapped() to scan the entire bat_addrs[] array. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/ppc_mmu_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 2a049fb8523d..96c52271e9c2 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -52,7 +52,7 @@ struct batrange { /* stores address ranges mapped by BATs */ phys_addr_t v_block_mapped(unsigned long va) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) return bat_addrs[b].phys + (va - bat_addrs[b].start); return 0; @@ -64,7 +64,7 @@ phys_addr_t v_block_mapped(unsigned long va) unsigned long p_block_mapped(phys_addr_t pa) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (pa >= bat_addrs[b].phys && pa < (bat_addrs[b].limit-bat_addrs[b].start) +bat_addrs[b].phys) From f871fc2e6ad9a83f30343a7ebfee2130cad4c496 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Nov 2018 17:31:08 +0000 Subject: [PATCH 1248/3715] powerpc/xmon: fix dump_segments() [ Upstream commit 32c8c4c621897199e690760c2d57054f8b84b6e6 ] mfsrin() takes segment num from bits 31-28 (IBM bits 0-3). Signed-off-by: Christophe Leroy [mpe: Clarify bit numbering] Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5a739588aa50..51a53fd51722 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3293,7 +3293,7 @@ void dump_segments(void) printf("sr0-15 ="); for (i = 0; i < 16; ++i) - printf(" %x", mfsrin(i)); + printf(" %x", mfsrin(i << 28)); printf("\n"); } #endif From b63cd67875dda7d041f0febc2bb5453361b8b101 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 18 Dec 2018 23:04:13 -0600 Subject: [PATCH 1249/3715] drivers/regulator: fix a missing check of return value [ Upstream commit 966e927bf8cc6a44f8b72582a1d6d3ffc73b12ad ] If palmas_smps_read() fails, we should not use the read data in "reg" which may contain random value. The fix inserts a check for the return value of palmas_smps_read(): If it fails, we return the error code upstream and stop using "reg". Signed-off-by: Kangjie Lu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/palmas-regulator.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index bb5ab7d78895..c2cc392a27d4 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -443,13 +443,16 @@ static int palmas_ldo_write(struct palmas *palmas, unsigned int reg, static int palmas_set_mode_smps(struct regulator_dev *dev, unsigned int mode) { int id = rdev_get_id(dev); + int ret; struct palmas_pmic *pmic = rdev_get_drvdata(dev); struct palmas_pmic_driver_data *ddata = pmic->palmas->pmic_ddata; struct palmas_regs_info *rinfo = &ddata->palmas_regs_info[id]; unsigned int reg; bool rail_enable = true; - palmas_smps_read(pmic->palmas, rinfo->ctrl_addr, ®); + ret = palmas_smps_read(pmic->palmas, rinfo->ctrl_addr, ®); + if (ret) + return ret; reg &= ~PALMAS_SMPS12_CTRL_MODE_ACTIVE_MASK; From c6418ce8b8526b03d04f79816eb412b9907425bc Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Wed, 19 Dec 2018 12:22:28 +0100 Subject: [PATCH 1250/3715] Bluetooth: hci_bcm: Handle specific unknown packets after firmware loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 22bba80500fdf624a7cfbb65fdfa97a038ae224d ] The Broadcom controller on aries S5PV210 boards sends out a couple of unknown packets after the firmware is loaded. This will cause logging of errors such as: Bluetooth: hci0: Frame reassembly failed (-84) This is probably also the case with other boards, as there are related Android userspace patches for custom ROMs such as https://review.lineageos.org/#/c/LineageOS/android_system_bt/+/142721/ Since this appears to be intended behaviour, treated them as diagnostic packets. Note that this is another variant of commit 01d5e44ace8a ("Bluetooth: hci_bcm: Handle empty packet after firmware loading") Signed-off-by: Jonathan Bakker Signed-off-by: Paweł Chmiel Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_bcm.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 6d41b2023f09..61971ddbd231 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -50,6 +50,12 @@ #define BCM_LM_DIAG_PKT 0x07 #define BCM_LM_DIAG_SIZE 63 +#define BCM_TYPE49_PKT 0x31 +#define BCM_TYPE49_SIZE 0 + +#define BCM_TYPE52_PKT 0x34 +#define BCM_TYPE52_SIZE 0 + #define BCM_AUTOSUSPEND_DELAY 5000 /* default autosleep delay */ /* platform device driver resources */ @@ -483,12 +489,28 @@ finalize: .lsize = 0, \ .maxlen = BCM_NULL_SIZE +#define BCM_RECV_TYPE49 \ + .type = BCM_TYPE49_PKT, \ + .hlen = BCM_TYPE49_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = BCM_TYPE49_SIZE + +#define BCM_RECV_TYPE52 \ + .type = BCM_TYPE52_PKT, \ + .hlen = BCM_TYPE52_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = BCM_TYPE52_SIZE + static const struct h4_recv_pkt bcm_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, { BCM_RECV_LM_DIAG, .recv = hci_recv_diag }, { BCM_RECV_NULL, .recv = hci_recv_diag }, + { BCM_RECV_TYPE49, .recv = hci_recv_diag }, + { BCM_RECV_TYPE52, .recv = hci_recv_diag }, }; static int bcm_recv(struct hci_uart *hu, const void *data, int count) From 077d87993053a743ddf29540ab9a75928e6cf27c Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 19 Dec 2018 14:19:20 +0300 Subject: [PATCH 1251/3715] serial: max310x: Fix tx_empty() callback [ Upstream commit a8da3c7873ea57acb8f9cea58c0af477522965aa ] Function max310x_tx_empty() accesses the IRQSTS register, which is cleared by IC when reading, so if there is an interrupt status, we will lose it. This patch implement the transmitter check only by the current FIFO level. Signed-off-by: Alexander Shiyan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0969a0d97b2b..cec995ec11ea 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -769,12 +769,9 @@ static void max310x_start_tx(struct uart_port *port) static unsigned int max310x_tx_empty(struct uart_port *port) { - unsigned int lvl, sts; + u8 lvl = max310x_port_read(port, MAX310X_TXFIFOLVL_REG); - lvl = max310x_port_read(port, MAX310X_TXFIFOLVL_REG); - sts = max310x_port_read(port, MAX310X_IRQSTS_REG); - - return ((sts & MAX310X_IRQ_TXEMPTY_BIT) && !lvl) ? TIOCSER_TEMT : 0; + return lvl ? 0 : TIOCSER_TEMT; } static unsigned int max310x_get_mctrl(struct uart_port *port) From bcf9cabb99bf316ccddbe3436419b00399590a9e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Dec 2018 13:20:47 +0100 Subject: [PATCH 1252/3715] openrisc: Fix broken paths to arch/or32 [ Upstream commit 57ce8ba0fd3a95bf29ed741df1c52bd591bf43ff ] OpenRISC was mainlined as "openrisc", not "or32". vmlinux.lds is generated from vmlinux.lds.S. Signed-off-by: Geert Uytterhoeven Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin --- arch/openrisc/kernel/entry.S | 2 +- arch/openrisc/kernel/head.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index b16e95a4e875..1107d34e45bf 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -184,7 +184,7 @@ handler: ;\ * occured. in fact they never do. if you need them use * values saved on stack (for SPR_EPC, SPR_ESR) or content * of r4 (for SPR_EEAR). for details look at EXCEPTION_HANDLE() - * in 'arch/or32/kernel/head.S' + * in 'arch/openrisc/kernel/head.S' */ /* =====================================================[ exceptions] === */ diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 90979acdf165..4d878d13b860 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -1551,7 +1551,7 @@ _string_nl: /* * .data section should be page aligned - * (look into arch/or32/kernel/vmlinux.lds) + * (look into arch/openrisc/kernel/vmlinux.lds.S) */ .section .data,"aw" .align 8192 From 4b7152c9eadb11a5b179310a4c4f78c17cf96de1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 17 Dec 2018 13:20:36 -0800 Subject: [PATCH 1253/3715] RDMA/srp: Propagate ib_post_send() failures to the SCSI mid-layer [ Upstream commit 2ee00f6a98c36f7e4ba07cc33f24cc5a69060cc9 ] This patch avoids that the SCSI mid-layer keeps retrying forever if ib_post_send() fails. This was discovered while testing immediate data support and passing a too large num_sge value to ib_post_send(). Cc: Sergey Gorenko Cc: Max Gurtovoy Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srp/ib_srp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3f5b5893792c..9f7287f45d06 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2210,6 +2210,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (srp_post_send(ch, iu, len)) { shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; goto err_unmap; } From ed5ba2cc5bf99f76ec3bcfd9eac3fa3b9c11096c Mon Sep 17 00:00:00 2001 From: Anatoliy Glagolev Date: Thu, 6 Dec 2018 16:48:45 -0700 Subject: [PATCH 1254/3715] scsi: qla2xxx: deadlock by configfs_depend_item [ Upstream commit 17b18eaa6f59044a5172db7d07149e31ede0f920 ] The intent of invoking configfs_depend_item in commit 7474f52a82d51 ("tcm_qla2xxx: Perform configfs depend/undepend for base_tpg") was to prevent a physical Fibre Channel port removal when virtual (NPIV) ports announced through that physical port are active. The change does not work as expected: it makes enabled physical port dependent on target configfs subsystem (the port's parent), something the configfs guarantees anyway. Besides, scheduling work in a worker thread and waiting for the work's completion is not really a valid workaround for the requirement not to call configfs_depend_item from a configfs callback: the call occasionally deadlocks. Thus, removing configfs_depend_item calls does not break anything and fixes the deadlock problem. Signed-off-by: Anatoliy Glagolev Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 48 +++++------------------------- drivers/scsi/qla2xxx/tcm_qla2xxx.h | 3 -- 2 files changed, 8 insertions(+), 43 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 2fcdaadd10fa..e08ac431bc49 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -903,38 +903,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct config_item *item, atomic_read(&tpg->lport_tpg_enabled)); } -static void tcm_qla2xxx_depend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { - atomic_set(&base_tpg->lport_tpg_enabled, 1); - qlt_enable_vha(base_vha); - } - complete(&base_tpg->tpg_base_comp); -} - -static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { - atomic_set(&base_tpg->lport_tpg_enabled, 0); - target_undepend_item(&se_tpg->tpg_group.cg_item); - } - complete(&base_tpg->tpg_base_comp); -} - static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, const char *page, size_t count) { struct se_portal_group *se_tpg = to_tpg(item); + struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; + struct tcm_qla2xxx_lport *lport = container_of(se_wwn, + struct tcm_qla2xxx_lport, lport_wwn); + struct scsi_qla_host *vha = lport->qla_vha; struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); unsigned long op; @@ -953,24 +929,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, if (atomic_read(&tpg->lport_tpg_enabled)) return -EEXIST; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_depend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 1); + qlt_enable_vha(vha); } else { if (!atomic_read(&tpg->lport_tpg_enabled)) return count; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_undepend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 0); + qlt_stop_phase1(vha->vha_tgt.qla_tgt); } - init_completion(&tpg->tpg_base_comp); - schedule_work(&tpg->tpg_base_work); - wait_for_completion(&tpg->tpg_base_comp); - if (op) { - if (!atomic_read(&tpg->lport_tpg_enabled)) - return -ENODEV; - } else { - if (atomic_read(&tpg->lport_tpg_enabled)) - return -EPERM; - } return count; } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 7550ba2831c3..147cf6c90366 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg { struct tcm_qla2xxx_tpg_attrib tpg_attrib; /* Returned by tcm_qla2xxx_make_tpg() */ struct se_portal_group se_tpg; - /* Items for dealing with configfs_depend_item */ - struct completion tpg_base_comp; - struct work_struct tpg_base_work; }; struct tcm_qla2xxx_fc_loopid { From f0a23cf647fdf3088f879c9520bbcc593477cf06 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 19 Dec 2018 15:42:50 +0530 Subject: [PATCH 1255/3715] scsi: csiostor: fix incorrect dma device in case of vport [ Upstream commit 9934613edcb40b92a216122876cd3b7e76d08390 ] In case of ->vport_create() call scsi_add_host_with_dma() instead of scsi_add_host() to pass correct dma device. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 28a9c7d706cb..03c7b1603dbc 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -649,7 +649,7 @@ csio_shost_init(struct csio_hw *hw, struct device *dev, if (csio_lnode_init(ln, hw, pln)) goto err_shost_put; - if (scsi_add_host(shost, dev)) + if (scsi_add_host_with_dma(shost, dev, &hw->pdev->dev)) goto err_lnode_exit; return ln; From e44926f59412c5867cd31a4bd47dafdbfb53577d Mon Sep 17 00:00:00 2001 From: Kyle Roeschley Date: Wed, 5 Dec 2018 09:43:11 +0200 Subject: [PATCH 1256/3715] ath6kl: Only use match sets when firmware supports it [ Upstream commit fb376a495fbdb886f38cfaf5a3805401b9e46f13 ] Commit dd45b7598f1c ("ath6kl: Include match ssid list in scheduled scan") merged the probed and matched SSID lists before sending them to the firmware. In the process, it assumed match set support is always available in ath6kl_set_probed_ssids, which breaks scans for hidden SSIDs. Now, check that the firmware supports matching SSIDs in scheduled scans before setting MATCH_SSID_FLAG. Fixes: dd45b7598f1c ("ath6kl: Include match ssid list in scheduled scan") Signed-off-by: Kyle Roeschley Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 414b5b596efc..f790d8021fa1 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -939,7 +939,7 @@ static int ath6kl_set_probed_ssids(struct ath6kl *ar, else ssid_list[i].flag = ANY_SSID_FLAG; - if (n_match_ssid == 0) + if (ar->wiphy->max_match_sets != 0 && n_match_ssid == 0) ssid_list[i].flag |= MATCH_SSID_FLAG; } From 9e477ef5cddfa70e7f66b06bc517b7bd0edc424d Mon Sep 17 00:00:00 2001 From: Kyle Roeschley Date: Wed, 5 Dec 2018 09:43:13 +0200 Subject: [PATCH 1257/3715] ath6kl: Fix off by one error in scan completion [ Upstream commit 5803c12816c43bd09e5f4247dd9313c2d9a2c41b ] When ath6kl was reworked to share code between regular and scheduled scans in commit 3b8ffc6a22ba ("ath6kl: Configure probed SSID list consistently"), probed SSID entry changed from 1-index to 0-indexed. However, ath6kl_cfg80211_scan_complete_event() was missed in that change. Fix its indexing so that we correctly clear out the probed SSID list. Signed-off-by: Kyle Roeschley Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index f790d8021fa1..37deb9bae364 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1093,7 +1093,7 @@ void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted) if (vif->scan_req->n_ssids && vif->scan_req->ssids[0].ssid_len) { for (i = 0; i < vif->scan_req->n_ssids; i++) { ath6kl_wmi_probedssid_cmd(ar->wmi, vif->fw_vif_idx, - i + 1, DISABLE_SSID_FLAG, + i, DISABLE_SSID_FLAG, 0, NULL); } } From a910cb308dc5c63c52c671eb14926fdc35c948cf Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 9 Oct 2017 19:42:40 +0530 Subject: [PATCH 1258/3715] powerpc/perf: Fix unit_sel/cache_sel checks [ Upstream commit 2d46d4877b1afd14059393a48bdb8ce27955174c ] Raw event code has couple of fields "unit" and "cache" in it, to capture the "unit" to monitor for a given pmcxsel and cache reload qualifier to program in MMCR1. isa207_get_constraint() refers "unit" field to update the MMCRC (L2/L3) Event bus control fields with "cache" bits of the raw event code. These are power8 specific and not supported by PowerISA v3.0 pmu. So wrap the checks to be power8 specific. Also, "cache" bit field is referred to update MMCR1[16:17] and this check can be power8 specific. Fixes: 7ffd948fae4cd ('powerpc/perf: factor out power8 pmu functions') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/perf/isa207-common.c | 25 ++++++++++++++++++------- arch/powerpc/perf/isa207-common.h | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index cf9c35aa0cf4..7ecea7143e58 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -150,6 +150,14 @@ static bool is_thresh_cmp_valid(u64 event) return true; } +static unsigned int dc_ic_rld_quad_l1_sel(u64 event) +{ + unsigned int cache; + + cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK; + return cache; +} + static inline u64 isa207_find_source(u64 idx, u32 sub_idx) { u64 ret = PERF_MEM_NA; @@ -290,10 +298,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) * have a cache selector of zero. The bank selector (bit 3) is * irrelevant, as long as the rest of the value is 0. */ - if (cache & 0x7) + if (!cpu_has_feature(CPU_FTR_ARCH_300) && (cache & 0x7)) return -1; - } else if (event & EVENT_IS_L1) { + } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) { mask |= CNST_L1_QUAL_MASK; value |= CNST_L1_QUAL_VAL(cache); } @@ -396,11 +404,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, /* In continuous sampling mode, update SDAR on TLB miss */ mmcra_sdar_mode(event[i], &mmcra); - if (event[i] & EVENT_IS_L1) { - cache = event[i] >> EVENT_CACHE_SEL_SHIFT; - mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; - cache >>= 1; - mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } else { + if (event[i] & EVENT_IS_L1) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } } if (is_event_marked(event[i])) { diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 6c737d675792..493e5cc5fa8a 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -232,8 +232,8 @@ #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) #define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) #define MMCR1_FAB_SHIFT 36 -#define MMCR1_DC_QUAL_SHIFT 47 -#define MMCR1_IC_QUAL_SHIFT 46 +#define MMCR1_DC_IC_QUAL_MASK 0x3 +#define MMCR1_DC_IC_QUAL_SHIFT 46 /* MMCR1 Combine bits macro for power9 */ #define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2)) From 6f95eb11d2d0d34b06ab1c5bce18237d68fedd2d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Dec 2018 10:27:47 +0000 Subject: [PATCH 1259/3715] powerpc/prom: fix early DEBUG messages [ Upstream commit b18f0ae92b0a1db565c3e505fa87b6971ad3b641 ] This patch fixes early DEBUG messages in prom.c: - Use %px instead of %p to see the addresses - Cast memblock_phys_mem_size() with (unsigned long long) to avoid build failure when phys_addr_t is not 64 bits. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/prom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f83056297441..d96b28415090 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -128,7 +128,7 @@ static void __init move_device_tree(void) p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = p; - DBG("Moved device tree to 0x%p\n", p); + DBG("Moved device tree to 0x%px\n", p); } DBG("<- move_device_tree\n"); @@ -662,7 +662,7 @@ void __init early_init_devtree(void *params) { phys_addr_t limit; - DBG(" -> early_init_devtree(%p)\n", params); + DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ if (!early_init_dt_verify(params)) @@ -722,7 +722,7 @@ void __init early_init_devtree(void *params) memblock_allow_resize(); memblock_dump_all(); - DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size()); /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ From b862fa12bf28136c8049e1dc0ae629efbfbbb11b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Dec 2018 15:23:33 +0000 Subject: [PATCH 1260/3715] powerpc/mm: Make NULL pointer deferences explicit on bad page faults. [ Upstream commit 49a502ea23bf9dec47f8f3c3960909ff409cd1bb ] As several other arches including x86, this patch makes it explicit that a bad page fault is a NULL pointer dereference when the fault address is lower than PAGE_SIZE In the mean time, this page makes all bad_page_fault() messages shorter so that they remain on one single line. And it prefixes them by "BUG: " so that they get easily grepped. Signed-off-by: Christophe Leroy [mpe: Avoid pr_cont()] Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/fault.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 52863deed65d..5fc8a010fdf0 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -581,21 +581,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (regs->trap) { case 0x300: case 0x380: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "data at address 0x%08lx\n", regs->dar); + pr_alert("BUG: %s at 0x%08lx\n", + regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : + "Unable to handle kernel data access", regs->dar); break; case 0x400: case 0x480: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "instruction fetch\n"); + pr_alert("BUG: Unable to handle kernel instruction fetch%s", + regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; case 0x600: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unaligned access at address 0x%08lx\n", regs->dar); + pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", + regs->dar); break; default: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unknown fault\n"); + pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n", + regs->dar); break; } printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", From 4fc4b8531e870c28d2da90b5ced666456e4cfa38 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 11 Dec 2018 13:27:33 +1100 Subject: [PATCH 1261/3715] powerpc/44x/bamboo: Fix PCI range [ Upstream commit 3cfb9ebe906b51f2942b1e251009bb251efd2ba6 ] The bamboo dts has a bug: it uses a non-naturally aligned range for PCI memory space. This isnt' supported by the code, thus causing PCI to break on this system. This is due to the fact that while the chip memory map has 1G reserved for PCI memory, it's only 512M aligned. The code doesn't know how to split that into 2 different PMMs and fails, so limit the region to 512M. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/boot/dts/bamboo.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts index aa68911f6560..084b82ba7493 100644 --- a/arch/powerpc/boot/dts/bamboo.dts +++ b/arch/powerpc/boot/dts/bamboo.dts @@ -268,8 +268,10 @@ /* Outbound ranges, one memory and one IO, * later cannot be changed. Chip supports a second * IO range but we don't use it for now + * The chip also supports a larger memory range but + * it's not naturally aligned, so our code will break */ - ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x40000000 + ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x20000000 0x02000000 0x00000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00100000 0x01000000 0x00000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; From 7a0d07f81e7e38ad063f9f94edcb812dff868a4f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Oct 2018 13:22:31 +1000 Subject: [PATCH 1262/3715] vfio/spapr_tce: Get rid of possible infinite loop [ Upstream commit 517ad4ae8aa93dccdb9a88c27257ecb421c9e848 ] As a part of cleanup, the SPAPR TCE IOMMU subdriver releases preregistered memory. If there is a bug in memory release, the loop in tce_iommu_release() becomes infinite; this actually happened to me. This makes the loop finite and prints a warning on every failure to make the code more bug prone. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- drivers/vfio/vfio_iommu_spapr_tce.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index b4c68f3b82be..eba9aaf3cc17 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -409,6 +409,7 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; struct tce_iommu_group *tcegrp; + struct tce_iommu_prereg *tcemem, *tmtmp; long i; while (tce_groups_attached(container)) { @@ -431,13 +432,8 @@ static void tce_iommu_release(void *iommu_data) tce_iommu_free_table(container, tbl); } - while (!list_empty(&container->prereg_list)) { - struct tce_iommu_prereg *tcemem; - - tcemem = list_first_entry(&container->prereg_list, - struct tce_iommu_prereg, next); - WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem)); - } + list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next) + WARN_ON(tce_iommu_prereg_free(container, tcemem)); tce_iommu_disable(container); if (container->mm) From a355ad546e7b9464f19a424ed4efe2d0139a31f1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 19 Nov 2018 15:25:17 +1100 Subject: [PATCH 1263/3715] powerpc/powernv/eeh/npu: Fix uninitialized variables in opal_pci_eeh_freeze_status [ Upstream commit c20577014f85f36d4e137d3d52a1f61225b4a3d2 ] The current implementation of the OPAL_PCI_EEH_FREEZE_STATUS call in skiboot's NPU driver does not touch the pci_error_type parameter so it might have garbage but the powernv code analyzes it nevertheless. This initializes pcierr and fstate to zero in all call sites. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Sam Bobroff Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/eeh-powernv.c | 8 ++++---- arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- arch/powerpc/platforms/powernv/pci.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 8864065eba22..fa2965c96155 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -548,8 +548,8 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) static int pnv_eeh_get_phb_state(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; s64 rc; int result = 0; @@ -587,8 +587,8 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe) static int pnv_eeh_get_pe_state(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; s64 rc; int result; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ddef22e00ddd..d3d5796f7df6 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -598,8 +598,8 @@ static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) { struct pnv_ioda_pe *slave, *pe; - u8 fstate, state; - __be16 pcierr; + u8 fstate = 0, state; + __be16 pcierr = 0; s64 rc; /* Sanity check on PE number */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5422f4a6317c..e2d031a3ec15 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -600,8 +600,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) static void pnv_pci_config_check_eeh(struct pci_dn *pdn) { struct pnv_phb *phb = pdn->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; unsigned int pe_no; s64 rc; From f76565605852d998c0ba92b5f33a875a97debade Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Dec 2018 17:23:31 +0100 Subject: [PATCH 1264/3715] drbd: ignore "all zero" peer volume sizes in handshake [ Upstream commit 94c43a13b8d6e3e0dd77b3536b5e04a84936b762 ] During handshake, if we are diskless ourselves, we used to accept any size presented by the peer. Which could be zero if that peer was just brought up and connected to us without having a disk attached first, in which case both peers would just "flip" their volume sizes. Now, even a diskless node will ignore "zero" sizes presented by a diskless peer. Also a currently Diskless Primary will refuse to shrink during handshake: it may be frozen, and waiting for a "suitable" local disk or peer to re-appear (on-no-data-accessible suspend-io). If the peer is smaller than what we used to be, it is not suitable. The logic for a diskless node during handshake is now supposed to be: believe the peer, if - I don't have a current size myself - we agree on the size anyways - I do have a current size, am Secondary, and he has the only disk - I do have a current size, am Primary, and he has the only disk, which is larger than my current size Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_receiver.c | 33 +++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8fbdfaacc222..08586bc5219b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3977,6 +3977,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; enum determine_dev_size dd = DS_UNCHANGED; sector_t p_size, p_usize, p_csize, my_usize; + sector_t new_size, cur_size; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3984,6 +3985,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info if (!peer_device) return config_unknown_volume(connection, pi); device = peer_device->device; + cur_size = drbd_get_capacity(device->this_bdev); p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); @@ -3994,7 +3996,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info device->p_size = p_size; if (get_ldev(device)) { - sector_t new_size, cur_size; rcu_read_lock(); my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); @@ -4012,7 +4013,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); - cur_size = drbd_get_capacity(device->this_bdev); if (new_size < cur_size && device->state.disk >= D_OUTDATED && device->state.conn < C_CONNECTED) { @@ -4077,9 +4077,36 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info * * However, if he sends a zero current size, * take his (user-capped or) backing disk size anyways. + * + * Unless of course he does not have a disk himself. + * In which case we ignore this completely. */ + sector_t new_size = p_csize ?: p_usize ?: p_size; drbd_reconsider_queue_parameters(device, NULL, o); - drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); + if (new_size == 0) { + /* Ignore, peer does not know nothing. */ + } else if (new_size == cur_size) { + /* nothing to do */ + } else if (cur_size != 0 && p_size == 0) { + drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", + (unsigned long long)new_size, (unsigned long long)cur_size); + } else if (new_size < cur_size && device->state.role == R_PRIMARY) { + drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", + (unsigned long long)new_size, (unsigned long long)cur_size); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); + return -EIO; + } else { + /* I believe the peer, if + * - I don't have a current size myself + * - we agree on the size anyways + * - I do have a current size, am Secondary, + * and he has the only disk + * - I do have a current size, am Primary, + * and he has the only disk, + * which is larger than my current size + */ + drbd_set_my_capacity(device, new_size); + } } if (get_ldev(device)) { From e3be18effa2147010a0ef8fee8e58e121c5c5ea8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Dec 2018 17:23:35 +0100 Subject: [PATCH 1265/3715] drbd: reject attach of unsuitable uuids even if connected [ Upstream commit fe43ed97bba3b11521abd934b83ed93143470e4f ] Multiple failure scenario: a) all good Connected Primary/Secondary UpToDate/UpToDate b) lose disk on Primary, Connected Primary/Secondary Diskless/UpToDate c) continue to write to the device, changes only make it to the Secondary storage. d) lose disk on Secondary, Connected Primary/Secondary Diskless/Diskless e) now try to re-attach on Primary This would have succeeded before, even though that is clearly the wrong data set to attach to (missing the modifications from c). Because we only compared our "effective" and the "to-be-attached" data generation uuid tags if (device->state.conn < C_CONNECTED). Fix: change that constraint to (device->state.pdsk != D_UP_TO_DATE) compare the uuids, and reject the attach. This patch also tries to improve the reverse scenario: first lose Secondary, then Primary disk, then try to attach the disk on Secondary. Before this patch, the attach on the Secondary succeeds, but since commit drbd: disconnect, if the wrong UUIDs are attached on a connected peer the Primary will notice unsuitable data, and drop the connection hard. Though unfortunately at a point in time during the handshake where we cannot easily abort the attach on the peer without more refactoring of the handshake. We now reject any attach to "unsuitable" uuids, as long as we can see a Primary role, unless we already have access to "good" data. Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ad13ec66c8e4..a675a0f61f9c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1935,9 +1935,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (device->state.conn < C_CONNECTED && - device->state.role == R_PRIMARY && device->ed_uuid && - (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && + (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { drbd_err(device, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)device->ed_uuid); retcode = ERR_DATA_NOT_CURRENT; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 08586bc5219b..a7c180426c60 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4392,6 +4392,25 @@ static int receive_state(struct drbd_connection *connection, struct packet_info if (peer_state.conn == C_AHEAD) ns.conn = C_BEHIND; + /* TODO: + * if (primary and diskless and peer uuid != effective uuid) + * abort attach on peer; + * + * If this node does not have good data, was already connected, but + * the peer did a late attach only now, trying to "negotiate" with me, + * AND I am currently Primary, possibly frozen, with some specific + * "effective" uuid, this should never be reached, really, because + * we first send the uuids, then the current state. + * + * In this scenario, we already dropped the connection hard + * when we received the unsuitable uuids (receive_uuids(). + * + * Should we want to change this, that is: not drop the connection in + * receive_uuids() already, then we would need to add a branch here + * that aborts the attach of "unsuitable uuids" on the peer in case + * this node is currently Diskless Primary. + */ + if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && get_ldev_if_state(device, D_NEGOTIATING)) { int cr; /* consider resync */ From 262b7951cdf19bb112332f8811f7143647c783df Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Dec 2018 17:23:37 +0100 Subject: [PATCH 1266/3715] drbd: do not block when adjusting "disk-options" while IO is frozen [ Upstream commit f708bd08ecbdc23d03aaedf5b3311ebe44cfdb50 ] "suspending" IO is overloaded. It can mean "do not allow new requests" (obviously), but it also may mean "must not complete pending IO", for example while the fencing handlers do their arbitration. When adjusting disk options, we suspend io (disallow new requests), then wait for the activity-log to become unused (drain all IO completions), and possibly replace it with a new activity log of different size. If the other "suspend IO" aspect is active, pending IO completions won't happen, and we would block forever (unkillable drbdsetup process). Fix this by skipping the activity log adjustment if the "al-extents" setting did not change. Also, in case it did change, fail early without blocking if it looks like we would block forever. Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_nl.c | 37 ++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a675a0f61f9c..31d7fe4480af 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1515,6 +1515,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis } } +static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) +{ + int err = -EBUSY; + + if (device->act_log && + device->act_log->nr_elements == dc->al_extents) + return 0; + + drbd_suspend_io(device); + /* If IO completion is currently blocked, we would likely wait + * "forever" for the activity log to become unused. So we don't. */ + if (atomic_read(&device->ap_bio_cnt)) + goto out; + + wait_event(device->al_wait, lc_try_lock(device->act_log)); + drbd_al_shrink(device); + err = drbd_check_al_size(device, dc); + lc_unlock(device->act_log); + wake_up(&device->al_wait); +out: + drbd_resume_io(device); + return err; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -1577,15 +1601,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } - drbd_suspend_io(device); - wait_event(device->al_wait, lc_try_lock(device->act_log)); - drbd_al_shrink(device); - err = drbd_check_al_size(device, new_disk_conf); - lc_unlock(device->act_log); - wake_up(&device->al_wait); - drbd_resume_io(device); - + err = disk_opts_check_al_size(device, new_disk_conf); if (err) { + /* Could be just "busy". Ignore? + * Introduce dedicated error code? */ + drbd_msg_put_info(adm_ctx.reply_skb, + "Try again without changing current al-extents setting"); retcode = ERR_NOMEM; goto fail_unlock; } From 3a2dfdab05f913eb0cc3e4494d9e8d9d869c3611 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 20 Dec 2018 17:23:39 +0100 Subject: [PATCH 1267/3715] drbd: fix print_st_err()'s prototype to match the definition [ Upstream commit 2c38f035117331eb78d0504843c79ea7c7fabf37 ] print_st_err() is defined with its 4th argument taking an 'enum drbd_state_rv' but its prototype use an int for it. Fix this by using 'enum drbd_state_rv' in the prototype too. Signed-off-by: Luc Van Oostenryck Signed-off-by: Roland Kammerer Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index ea58301d0895..b2a390ba73a0 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -131,7 +131,7 @@ extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state enum chg_state_flags, struct completion *done); extern void print_st_err(struct drbd_device *, union drbd_state, - union drbd_state, int); + union drbd_state, enum drbd_state_rv); enum drbd_state_rv _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, From 94848ffd66c2ddcb725a296b71b5f8dbe2cbeaa2 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 14 Dec 2018 00:32:42 -0600 Subject: [PATCH 1268/3715] IB/rxe: Make counters thread safe [ Upstream commit d5108e69fe013ff47ab815b849caba9cc33ca1e5 ] Current rxe device counters are not thread safe. When multiple QPs are used, they can be racy. Make them thread safe by making it atomic64. Fixes: 0b1e5b99a48b ("IB/rxe: Add port protocol stats") Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 6aeb7a165e46..ea4542a9d69e 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -59,7 +59,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, return -EINVAL; for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) - stats->value[cnt] = dev->stats_counters[cnt]; + stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); return ARRAY_SIZE(rxe_counter_name); } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index b2b76a316eba..d1cc89f6f2e3 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -410,16 +410,16 @@ struct rxe_dev { spinlock_t mmap_offset_lock; /* guard mmap_offset */ int mmap_offset; - u64 stats_counters[RXE_NUM_OF_COUNTERS]; + atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; struct list_head list; struct crypto_shash *tfm; }; -static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt) +static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) { - rxe->stats_counters[cnt]++; + atomic64_inc(&rxe->stats_counters[index]); } static inline struct rxe_dev *to_rdev(struct ib_device *dev) From 2a8fb5b9fd37157b5017e1a8a5a6fe7946aa4df6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 21 Dec 2018 00:29:19 -0600 Subject: [PATCH 1269/3715] regulator: tps65910: fix a missing check of return value [ Upstream commit cd07e3701fa6a4c68f8493ee1d12caa18d46ec6a ] tps65910_reg_set_bits() may fail. The fix checks if it fails, and if so, returns with its error code. Signed-off-by: Kangjie Lu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/tps65910-regulator.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 81672a58fcc2..194fa0cbbc04 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -1102,8 +1102,10 @@ static int tps65910_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pmic); /* Give control of all register to control port */ - tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL, + err = tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL, DEVCTRL_SR_CTL_I2C_SEL_MASK); + if (err < 0) + return err; switch (tps65910_chip_id(tps65910)) { case TPS65910: From 2885cf0d501fa480009ecc2deefb2802ce354b6e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Dec 2018 11:41:29 +0000 Subject: [PATCH 1270/3715] powerpc/83xx: handle machine check caused by watchdog timer [ Upstream commit 0deae39cec6dab3a66794f3e9e83ca4dc30080f1 ] When the watchdog timer is set in interrupt mode, it causes a machine check when it times out. The purpose of this mode is to ease debugging, not to crash the kernel and reboot the machine. This patch implements a special handling for that, in order to not crash the kernel if the watchdog times out while in interrupt or within the idle task. Signed-off-by: Christophe Leroy [scottwood: added missing #include] Signed-off-by: Scott Wood Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/cputable.h | 1 + arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kernel/cputable.c | 10 ++++++---- arch/powerpc/platforms/83xx/misc.c | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 7e3ccf21830e..e4451b30d7e3 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -45,6 +45,7 @@ extern int machine_check_e500(struct pt_regs *regs); extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); +int machine_check_83xx(struct pt_regs *regs); extern void cpu_down_flush_e500v2(void); extern void cpu_down_flush_e500mc(void); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..05f3c2b3aa0e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -733,6 +733,8 @@ #define SRR1_PROGTRAP 0x00020000 /* Trap */ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ +#define SRR1_MCE_MCP 0x00080000 /* Machine check signal caused interrupt */ + #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ #define HSRR1_DENORM 0x00100000 /* Denorm exception */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 760872916013..da4b0e379238 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1185,6 +1185,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, +#ifdef CONFIG_PPC_83xx { /* e300c1 (a 603e core, plus some) on 83xx */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00830000, @@ -1195,7 +1196,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .platform = "ppc603", }, { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ @@ -1209,7 +1210,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .platform = "ppc603", }, { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ @@ -1223,7 +1224,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", .oprofile_type = PPC_OPROFILE_FSL_EMB, @@ -1240,12 +1241,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", .oprofile_type = PPC_OPROFILE_FSL_EMB, .platform = "ppc603", }, +#endif { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index d75c9816a5c9..2b6589fe812d 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -150,3 +151,19 @@ void __init mpc83xx_setup_arch(void) mpc83xx_setup_pci(); } + +int machine_check_83xx(struct pt_regs *regs) +{ + u32 mask = 1 << (31 - IPIC_MCP_WDT); + + if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask)) + return machine_check_generic(regs); + ipic_clear_mcp_status(mask); + + if (debugger_fault_handler(regs)) + return 1; + + die("Watchdog NMI Reset", regs, 0); + + return 1; +} From 2936c4f8aaf349fcd3e4581fdd72d530ce123448 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 27 Nov 2018 19:16:44 +1100 Subject: [PATCH 1271/3715] powerpc/pseries: Fix node leak in update_lmb_associativity_index() [ Upstream commit 47918bc68b7427e961035949cc1501a864578a69 ] In update_lmb_associativity_index() we lookup dr_node using of_find_node_by_path() which takes a reference for us. In the non-error case we forget to drop the reference. Note that find_aa_index() does modify properties of the node, but doesn't need an extra reference held once it's returned. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/hotplug-memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 99a3cf51c5ba..fdfce7a46d73 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -295,6 +295,7 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc); + of_node_put(dr_node); dlpar_free_cc_nodes(lmb_node); return aa_index; } From 97dbfe465b0435c79733b9231110c98e4c00f630 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 13 Dec 2018 07:52:32 -0200 Subject: [PATCH 1272/3715] crypto: mxc-scc - fix build warnings on ARM64 [ Upstream commit 2326828ee40357b3d2b1359b8ca7526af201495b ] The following build warnings are seen when building for ARM64 allmodconfig: drivers/crypto/mxc-scc.c:181:20: warning: format '%d' expects argument of type 'int', but argument 5 has type 'size_t' {aka 'long unsigned int'} [-Wformat=] drivers/crypto/mxc-scc.c:186:21: warning: format '%d' expects argument of type 'int', but argument 4 has type 'size_t' {aka 'long unsigned int'} [-Wformat=] drivers/crypto/mxc-scc.c:277:21: warning: format '%d' expects argument of type 'int', but argument 4 has type 'size_t' {aka 'long unsigned int'} [-Wformat=] drivers/crypto/mxc-scc.c:339:3: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] drivers/crypto/mxc-scc.c:340:3: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fix them by using the %zu specifier to print a size_t variable and using a plain %x to print the result of a readl(). Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/mxc-scc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c index e01c46387df8..519086730791 100644 --- a/drivers/crypto/mxc-scc.c +++ b/drivers/crypto/mxc-scc.c @@ -178,12 +178,12 @@ static int mxc_scc_get_data(struct mxc_scc_ctx *ctx, else from = scc->black_memory; - dev_dbg(scc->dev, "pcopy: from 0x%p %d bytes\n", from, + dev_dbg(scc->dev, "pcopy: from 0x%p %zu bytes\n", from, ctx->dst_nents * 8); len = sg_pcopy_from_buffer(ablkreq->dst, ctx->dst_nents, from, ctx->size, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err from 0x%p (len=%d)\n", from, len); + dev_err(scc->dev, "pcopy err from 0x%p (len=%zu)\n", from, len); return -EINVAL; } @@ -274,7 +274,7 @@ static int mxc_scc_put_data(struct mxc_scc_ctx *ctx, len = sg_pcopy_to_buffer(req->src, ctx->src_nents, to, len, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err to 0x%p (len=%d)\n", to, len); + dev_err(scc->dev, "pcopy err to 0x%p (len=%zu)\n", to, len); return -EINVAL; } @@ -335,9 +335,9 @@ static void mxc_scc_ablkcipher_next(struct mxc_scc_ctx *ctx, return; } - dev_dbg(scc->dev, "Start encryption (0x%p/0x%p)\n", - (void *)readl(scc->base + SCC_SCM_RED_START), - (void *)readl(scc->base + SCC_SCM_BLACK_START)); + dev_dbg(scc->dev, "Start encryption (0x%x/0x%x)\n", + readl(scc->base + SCC_SCM_RED_START), + readl(scc->base + SCC_SCM_BLACK_START)); /* clear interrupt control registers */ writel(SCC_SCM_INTR_CTRL_CLR_INTR, From 3e5d1310f341064b9325d93fee11221708aa6e67 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 19 Dec 2018 13:39:24 +0300 Subject: [PATCH 1273/3715] pwm: clps711x: Fix period calculation [ Upstream commit b0f17570b8203c22f139459c86cfbaa0311313ed ] Commit e39c0df1be5a ("pwm: Introduce the pwm_args concept") has changed the variable for the period for clps711x-pwm driver, so now pwm_get/set_period() works with pwm->state.period variable instead of pwm->args.period. This patch changes the period variable in other places where it is used. Signed-off-by: Alexander Shiyan Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-clps711x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c index 26ec24e457b1..7e16b7def0dc 100644 --- a/drivers/pwm/pwm-clps711x.c +++ b/drivers/pwm/pwm-clps711x.c @@ -48,7 +48,7 @@ static void clps711x_pwm_update_val(struct clps711x_chip *priv, u32 n, u32 v) static unsigned int clps711x_get_duty(struct pwm_device *pwm, unsigned int v) { /* Duty cycle 0..15 max */ - return DIV_ROUND_CLOSEST(v * 0xf, pwm_get_period(pwm)); + return DIV_ROUND_CLOSEST(v * 0xf, pwm->args.period); } static int clps711x_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) @@ -71,7 +71,7 @@ static int clps711x_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, struct clps711x_chip *priv = to_clps711x_chip(chip); unsigned int duty; - if (period_ns != pwm_get_period(pwm)) + if (period_ns != pwm->args.period) return -EINVAL; duty = clps711x_get_duty(pwm, duty_ns); From ffe6754349892403d348eb5793aaf278a12aa414 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 23 Dec 2018 18:54:53 -0600 Subject: [PATCH 1274/3715] net/netlink_compat: Fix a missing check of nla_parse_nested [ Upstream commit 89dfd0083751d00d5d7ead36f6d8b045bf89c5e1 ] In tipc_nl_compat_sk_dump(), if nla_parse_nested() fails, it could return an error. To be consistent with other invocations of the function call, on error, the fix passes the return value upstream. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index ad4dcc663c6d..1c8ac0c11008 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1021,8 +1021,11 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); + + if (err) + return err; node = nla_get_u32(con[TIPC_NLA_CON_NODE]); tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", From 145422360a329f12f8c19ddad5edb178625966ee Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 23 Dec 2018 19:42:38 -0600 Subject: [PATCH 1275/3715] net/net_namespace: Check the return value of register_pernet_subsys() [ Upstream commit 0eb987c874dc93f9c9d85a6465dbde20fdd3884c ] In net_ns_init(), register_pernet_subsys() could fail while registering network namespace subsystems. The fix checks the return value and sends a panic() on failure. Signed-off-by: Aditya Pakki Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/net_namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 60b88718b1d4..1af25d53f63c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -854,7 +854,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); - register_pernet_subsys(&net_ns_ops); + if (register_pernet_subsys(&net_ns_ops)) + panic("Could not register network namespace subsystems"); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); From 3ccb647772d94c8a5f006f2f1df877e93e7b06d7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Dec 2018 19:20:17 +0800 Subject: [PATCH 1276/3715] f2fs: fix to dirty inode synchronously [ Upstream commit b32e019049e959ee10ec359893c9dd5d057dad55 ] If user change inode's i_flags via ioctl, let's add it into global dirty list, so that checkpoint can guarantee its persistence before fsync, it can make checkpoint keeping strong consistency. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1b1792199445..d68b0132718a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1593,7 +1593,7 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } From 212c573efd8ff7a4351675a35d6bc59e6c2a9f3d Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 30 Oct 2018 12:58:12 +0100 Subject: [PATCH 1277/3715] um: Make GCOV depend on !KCOV [ Upstream commit 550ed0e2036663b35cec12374b835444f9c60454 ] Both do more or less the same thing and are mutually exclusive. If both are enabled the build will fail. Sooner or later we can kill UML's GCOV. Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index 967d3109689f..39d44bfb241d 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -19,6 +19,7 @@ config GPROF config GCOV bool "Enable gcov support" depends on DEBUG_INFO + depends on !KCOV help This option allows developers to retrieve coverage data from a UML session. From 3092e216b75397f09611a6d409476e5d7e8a75c2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 20:55:37 -0600 Subject: [PATCH 1278/3715] net: (cpts) fix a missing check of clk_prepare [ Upstream commit 2d822f2dbab7f4c820f72eb8570aacf3f35855bd ] clk_prepare() could fail, so let's check its status, and if it fails, return its error code upstream. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpts.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index e7b76f6b4f67..7d1281d81248 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -567,7 +567,9 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, return ERR_PTR(PTR_ERR(cpts->refclk)); } - clk_prepare(cpts->refclk); + ret = clk_prepare(cpts->refclk); + if (ret) + return ERR_PTR(ret); cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); From 2f2a742cea6d028c703f53fb57919c101e6aef12 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 20:57:14 -0600 Subject: [PATCH 1279/3715] net: stmicro: fix a missing check of clk_prepare [ Upstream commit f86a3b83833e7cfe558ca4d70b64ebc48903efec ] clk_prepare() could fail, so let's check its status, and if it fails, return its error code upstream. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index d07520fb969e..62ccbd47c1db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -59,7 +59,9 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - clk_prepare(gmac->tx_clk); + ret = clk_prepare(gmac->tx_clk); + if (ret) + return ret; } return 0; From 04e35269d5d34ad5e4e13105048c77b872438e02 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 22:08:18 -0600 Subject: [PATCH 1280/3715] net: dsa: bcm_sf2: Propagate error value from mdio_write [ Upstream commit e49505f7255be8ced695919c08a29bf2c3d79616 ] Both bcm_sf2_sw_indir_rw and mdiobus_write_nested could fail, so let's return their error codes upstream. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index af666951a959..94ad2fdd6ef0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -432,11 +432,10 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, * send them to our master MDIO bus controller */ if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) - bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); + return bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); else - mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); - - return 0; + return mdiobus_write_nested(priv->master_mii_bus, addr, + regnum, val); } static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id) From a4bc476bd09e09d8f854dd8b7dcb60cb0c4dfafe Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 22:23:19 -0600 Subject: [PATCH 1281/3715] atl1e: checking the status of atl1e_write_phy_reg [ Upstream commit ff07d48d7bc0974d4f96a85a4df14564fb09f1ef ] atl1e_write_phy_reg() could fail. The fix issues an error message when it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4f7e195af0bc..0d08039981b5 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -472,7 +472,9 @@ static void atl1e_mdio_write(struct net_device *netdev, int phy_id, { struct atl1e_adapter *adapter = netdev_priv(netdev); - atl1e_write_phy_reg(&adapter->hw, reg_num & MDIO_REG_ADDR_MASK, val); + if (atl1e_write_phy_reg(&adapter->hw, + reg_num & MDIO_REG_ADDR_MASK, val)) + netdev_err(netdev, "write phy register failed\n"); } static int atl1e_mii_ioctl(struct net_device *netdev, From 709f50b180528ad1e89d9a8ad59cc81a85937bb8 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Wed, 26 Dec 2018 00:09:04 -0600 Subject: [PATCH 1282/3715] tipc: fix a missing check of genlmsg_put [ Upstream commit 46273cf7e009231d2b6bc10a926e82b8928a9fb2 ] genlmsg_put could fail. The fix inserts a check of its return value, and if it fails, returns -EMSGSIZE. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1c8ac0c11008..91d51a595ac2 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -974,6 +974,8 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + return -EMSGSIZE; nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { From a6082b56e9eddfc23e432ff63fc092413992d2d2 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 26 Dec 2018 16:28:30 +0800 Subject: [PATCH 1283/3715] net/wan/fsl_ucc_hdlc: Avoid double free in ucc_hdlc_probe() [ Upstream commit 40752b3eae29f8ca2378e978a02bd6dbeeb06d16 ] This patch fixes potential double frees if register_hdlc_device() fails. Signed-off-by: Wen Yang Reviewed-by: Peng Hao CC: Zhao Qiang CC: "David S. Miller" CC: netdev@vger.kernel.org CC: linuxppc-dev@lists.ozlabs.org CC: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/fsl_ucc_hdlc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 18b648648adb..289dff262948 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1114,7 +1114,6 @@ static int ucc_hdlc_probe(struct platform_device *pdev) if (register_hdlc_device(dev)) { ret = -ENOBUFS; pr_err("ucc_hdlc: unable to register hdlc device\n"); - free_netdev(dev); goto free_dev; } From c11d159d2fb5f81868454b385694fac1526b99d7 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 28 Dec 2018 00:32:53 -0800 Subject: [PATCH 1284/3715] ocfs2: clear journal dirty flag after shutdown journal [ Upstream commit d85400af790dba2aa294f0a77e712f166681f977 ] Dirty flag of the journal should be cleared at the last stage of umount, if do it before jbd2_journal_destroy(), then some metadata in uncommitted transaction could be lost due to io error, but as dirty flag of journal was already cleared, we can't find that until run a full fsck. This may cause system panic or other corruption. Link: http://lkml.kernel.org/r/20181121020023.3034-3-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Yiwen Jiang Reviewed-by: Joseph Qi Cc: Jun Piao Cc: Changwei Ge Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/journal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f7fba58618ef..2459ae9d2234 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1018,7 +1018,8 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) mlog_errno(status); } - if (status == 0) { + /* Shutdown the kernel journal system */ + if (!jbd2_journal_destroy(journal->j_journal) && !status) { /* * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal @@ -1027,9 +1028,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) if (status < 0) mlog_errno(status); } - - /* Shutdown the kernel journal system */ - jbd2_journal_destroy(journal->j_journal); journal->j_journal = NULL; OCFS2_I(inode)->ip_open_count--; From 1a439b577e36e8f6a9452c3731d2a6a088a85c33 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 28 Dec 2018 00:34:36 -0800 Subject: [PATCH 1285/3715] vmscan: return NODE_RECLAIM_NOSCAN in node_reclaim() when CONFIG_NUMA is n [ Upstream commit 8b09549c2bfd9f3f8f4cdad74107ef4f4ff9cdd7 ] Commit fa5e084e43eb ("vmscan: do not unconditionally treat zones that fail zone_reclaim() as full") changed the return value of node_reclaim(). The original return value 0 means NODE_RECLAIM_SOME after this commit. While the return value of node_reclaim() when CONFIG_NUMA is n is not changed. This will leads to call zone_watermark_ok() again. This patch fixes the return value by adjusting to NODE_RECLAIM_NOSCAN. Since node_reclaim() is only called in page_alloc.c, move it to mm/internal.h. Link: http://lkml.kernel.org/r/20181113080436.22078-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Matthew Wilcox Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swap.h | 6 ------ mm/internal.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index e643866912b7..411953964c34 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -363,14 +363,8 @@ extern unsigned long vm_total_pages; extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); #else #define node_reclaim_mode 0 -static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, - unsigned int order) -{ - return 0; -} #endif extern int page_evictable(struct page *page); diff --git a/mm/internal.h b/mm/internal.h index 1df011f62480..a182506242c4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -455,6 +455,16 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, #define NODE_RECLAIM_SOME 0 #define NODE_RECLAIM_SUCCESS 1 +#ifdef CONFIG_NUMA +extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); +#else +static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, + unsigned int order) +{ + return NODE_RECLAIM_NOSCAN; +} +#endif + extern int hwpoison_filter(struct page *p); extern u32 hwpoison_filter_dev_major; From 525ef5687c35eda312a7add8a221580d0c4aee55 Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Thu, 3 Jan 2019 15:26:44 -0800 Subject: [PATCH 1286/3715] lib/genalloc.c: fix allocation of aligned buffer from non-aligned chunk [ Upstream commit 52fbf1134d479234d7e64ba9dcbaea23405f229e ] gen_pool_alloc_algo() uses different allocation functions implementing different allocation algorithms. With gen_pool_first_fit_align() allocation function, the returned address should be aligned on the requested boundary. If chunk start address isn't aligned on the requested boundary, the returned address isn't aligned too. The only way to get properly aligned address is to initialize the pool with chunks aligned on the requested boundary. If want to have an ability to allocate buffers aligned on different boundaries (for example, 4K, 1MB, ...), the chunk start address should be aligned on the max possible alignment. This happens because gen_pool_first_fit_align() looks for properly aligned memory block without taking into account the chunk start address alignment. To fix this, we provide chunk start address to gen_pool_first_fit_align() and change its implementation such that it starts looking for properly aligned block with appropriate offset (exactly as is done in CMA). Link: https://lkml.kernel.org/lkml/a170cf65-6884-3592-1de9-4c235888cc8a@intel.com Link: http://lkml.kernel.org/r/1541690953-4623-1-git-send-email-alexey.skidanov@intel.com Signed-off-by: Alexey Skidanov Reviewed-by: Andrew Morton Cc: Logan Gunthorpe Cc: Daniel Mentz Cc: Mathieu Desnoyers Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/genalloc.h | 13 +++++++------ lib/genalloc.c | 20 ++++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 872f930f1b06..dd0a452373e7 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -51,7 +51,8 @@ typedef unsigned long (*genpool_algo_t)(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, + unsigned long start_addr); /* * General purpose special memory pool descriptor. @@ -131,24 +132,24 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool); + struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_first_fit_order_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool); + struct gen_pool *pool, unsigned long start_addr); extern struct gen_pool *devm_gen_pool_create(struct device *dev, diff --git a/lib/genalloc.c b/lib/genalloc.c index ca06adc4f445..5deb25c40a5a 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -311,7 +311,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, end_bit = chunk_size(chunk) >> order; retry: start_bit = algo(chunk->bits, end_bit, start_bit, - nbits, data, pool); + nbits, data, pool, chunk->start_addr); if (start_bit >= end_bit) continue; remain = bitmap_set_ll(chunk->bits, start_bit, nbits); @@ -525,7 +525,7 @@ EXPORT_SYMBOL(gen_pool_set_algo); */ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { return bitmap_find_next_zero_area(map, size, start, nr, 0); } @@ -543,16 +543,19 @@ EXPORT_SYMBOL(gen_pool_first_fit); */ unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { struct genpool_data_align *alignment; - unsigned long align_mask; + unsigned long align_mask, align_off; int order; alignment = data; order = pool->min_alloc_order; align_mask = ((alignment->align + (1UL << order) - 1) >> order) - 1; - return bitmap_find_next_zero_area(map, size, start, nr, align_mask); + align_off = (start_addr & (alignment->align - 1)) >> order; + + return bitmap_find_next_zero_area_off(map, size, start, nr, + align_mask, align_off); } EXPORT_SYMBOL(gen_pool_first_fit_align); @@ -567,7 +570,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_align); */ unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { struct genpool_data_fixed *fixed_data; int order; @@ -601,7 +604,8 @@ EXPORT_SYMBOL(gen_pool_fixed_alloc); */ unsigned long gen_pool_first_fit_order_align(unsigned long *map, unsigned long size, unsigned long start, - unsigned int nr, void *data, struct gen_pool *pool) + unsigned int nr, void *data, struct gen_pool *pool, + unsigned long start_addr) { unsigned long align_mask = roundup_pow_of_two(nr) - 1; @@ -624,7 +628,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_order_align); */ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { unsigned long start_bit = size; unsigned long len = size + 1; From afe13dc9835a4ca5bd44f74fdc6e8b3a8b6dd402 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Thu, 3 Jan 2019 15:26:51 -0800 Subject: [PATCH 1287/3715] lib/genalloc.c: use vzalloc_node() to allocate the bitmap [ Upstream commit 6862d2fc81859f88c1f3f660886427893f2b4f3f ] Some devices may have big memory on chip, such as over 1G. In some cases, the nbytes maybe bigger then 4M which is the bounday of the memory buddy system (4K default). So use vzalloc_node() to allocate the bitmap. Also use vfree to free it. Link: http://lkml.kernel.org/r/20181225015701.6289-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reviewed-by: Andrew Morton Cc: Alexey Skidanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/genalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/genalloc.c b/lib/genalloc.c index 5deb25c40a5a..f365d71cdc77 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -187,7 +187,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy int nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); - chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); + chunk = vzalloc_node(nbytes, nid); if (unlikely(chunk == NULL)) return -ENOMEM; @@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool) bit = find_next_bit(chunk->bits, end_bit, 0); BUG_ON(bit < end_bit); - kfree(chunk); + vfree(chunk); } kfree_const(pool->name); kfree(pool); From 7c2ec471cf5f8a5ddd7c66a739ae575b30b6341e Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 3 Jan 2019 15:28:03 -0800 Subject: [PATCH 1288/3715] fork: fix some -Wmissing-prototypes warnings [ Upstream commit fb5bf31722d0805a3f394f7d59f2e8cd07acccb7 ] We get a warning when building kernel with W=1: kernel/fork.c:167:13: warning: no previous prototype for `arch_release_thread_stack' [-Wmissing-prototypes] kernel/fork.c:779:13: warning: no previous prototype for `fork_init' [-Wmissing-prototypes] Add the missing declaration in head file to fix this. Also, remove arch_release_thread_stack() completely because no arch seems to implement it since bb9d81264 (arch: remove tile port). Link: http://lkml.kernel.org/r/1542170087-23645-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Yi Wang Acked-by: Michal Hocko Acked-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/sched/task.h | 2 ++ init/main.c | 1 - kernel/fork.c | 5 ----- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a74ec619ac51..11b4fba82950 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -39,6 +39,8 @@ void __noreturn do_task_dead(void); extern void proc_caches_init(void); +extern void fork_init(void); + extern void release_task(struct task_struct * p); #ifdef CONFIG_HAVE_COPY_THREAD_TLS diff --git a/init/main.c b/init/main.c index 51067e2db509..b1ab36fe1a55 100644 --- a/init/main.c +++ b/init/main.c @@ -98,7 +98,6 @@ static int kernel_init(void *); extern void init_IRQ(void); -extern void fork_init(void); extern void radix_tree_init(void); /* diff --git a/kernel/fork.c b/kernel/fork.c index 3352fdbd5e20..3d9d6a28e21d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -162,10 +162,6 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_stack(unsigned long *stack) -{ -} - #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* @@ -348,7 +344,6 @@ static void release_task_stack(struct task_struct *tsk) return; /* Better to leak the stack than to free prematurely */ account_kernel_stack(tsk, -1); - arch_release_thread_stack(tsk->stack); free_thread_stack(tsk); tsk->stack = NULL; #ifdef CONFIG_VMAP_STACK From 30e1e00002bfb0ac87d5d4ec06704cbec11be440 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 3 Jan 2019 15:29:05 -0800 Subject: [PATCH 1289/3715] drivers/base/platform.c: kmemleak ignore a known leak [ Upstream commit 967d3010df8b6f6f9aa95c198edc5fe3646ebf36 ] unreferenced object 0xffff808ec6dc5a80 (size 128): comm "swapper/0", pid 1, jiffies 4294938063 (age 2560.530s) hex dump (first 32 bytes): ff ff ff ff 00 00 00 00 6b 6b 6b 6b 6b 6b 6b 6b ........kkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk backtrace: [<00000000476dcf8c>] kmem_cache_alloc_trace+0x430/0x500 [<000000004f708d37>] platform_device_register_full+0xbc/0x1e8 [<000000006c2a7ec7>] acpi_create_platform_device+0x370/0x450 [<00000000ef135642>] acpi_default_enumeration+0x34/0x78 [<000000003bd9a052>] acpi_bus_attach+0x2dc/0x3e0 [<000000003cf4f7f2>] acpi_bus_attach+0x108/0x3e0 [<000000003cf4f7f2>] acpi_bus_attach+0x108/0x3e0 [<000000002968643e>] acpi_bus_scan+0xb0/0x110 [<0000000010dd0bd7>] acpi_scan_init+0x1a8/0x410 [<00000000965b3c5a>] acpi_init+0x408/0x49c [<00000000ed4b9fe2>] do_one_initcall+0x178/0x7f4 [<00000000a5ac5a74>] kernel_init_freeable+0x9d4/0xa9c [<0000000070ea6c15>] kernel_init+0x18/0x138 [<00000000fb8fff06>] ret_from_fork+0x10/0x1c [<0000000041273a0d>] 0xffffffffffffffff Then, faddr2line pointed out this line, /* * This memory isn't freed when the device is put, * I don't have a nice idea for that though. Conceptually * dma_mask in struct device should not be a pointer. * See http://thread.gmane.org/gmane.linux.kernel.pci/9081 */ pdev->dev.dma_mask = kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL); Since this leak has existed for more than 8 years and it does not reference other parts of the memory, let kmemleak ignore it, so users don't need to waste time reporting this in the future. Link: http://lkml.kernel.org/r/20181206160751.36211-1-cai@gmx.us Signed-off-by: Qian Cai Reviewed-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: "Rafael J . Wysocki" Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/base/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 9045c5f3734e..f1105de0d9fe 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -526,6 +527,8 @@ struct platform_device *platform_device_register_full( if (!pdev->dev.dma_mask) goto err; + kmemleak_ignore(pdev->dev.dma_mask); + *pdev->dev.dma_mask = pdevinfo->dma_mask; pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } From d8e74cb55c41aa7fae79afa399899728803417e9 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sat, 5 Jan 2019 13:21:18 -0800 Subject: [PATCH 1290/3715] lib/genalloc.c: include vmalloc.h [ Upstream commit 35004f2e55807a1a1491db24ab512dd2f770a130 ] Fixes build break on most ARM/ARM64 defconfigs: lib/genalloc.c: In function 'gen_pool_add_virt': lib/genalloc.c:190:10: error: implicit declaration of function 'vzalloc_node'; did you mean 'kzalloc_node'? lib/genalloc.c:190:8: warning: assignment to 'struct gen_pool_chunk *' from 'int' makes pointer from integer without a cast [-Wint-conversion] lib/genalloc.c: In function 'gen_pool_destroy': lib/genalloc.c:254:3: error: implicit declaration of function 'vfree'; did you mean 'kfree'? Fixes: 6862d2fc8185 ('lib/genalloc.c: use vzalloc_node() to allocate the bitmap') Cc: Huang Shijie Cc: Andrew Morton Cc: Alexey Skidanov Signed-off-by: Olof Johansson Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/genalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/genalloc.c b/lib/genalloc.c index f365d71cdc77..7e85d1e37a6e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -35,6 +35,7 @@ #include #include #include +#include static inline size_t chunk_size(const struct gen_pool_chunk *chunk) { From 43ac6874069c9e6a01b3bce018778a230c153e75 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 2 Jan 2019 15:36:54 +0100 Subject: [PATCH 1291/3715] mtd: Check add_mtd_device() ret code [ Upstream commit 2b6f0090a3335b7bdd03ca520c35591159463041 ] add_mtd_device() can fail. We should always check its return value and gracefully handle the failure case. Fix the call sites where this not done (in mtdpart.c) and add a __must_check attribute to the prototype to avoid this kind of mistakes. Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/mtdcore.h | 2 +- drivers/mtd/mtdpart.c | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h index 37accfd0400e..24480b75a88d 100644 --- a/drivers/mtd/mtdcore.h +++ b/drivers/mtd/mtdcore.h @@ -7,7 +7,7 @@ extern struct mutex mtd_table_mutex; struct mtd_info *__mtd_next_device(int i); -int add_mtd_device(struct mtd_info *mtd); +int __must_check add_mtd_device(struct mtd_info *mtd); int del_mtd_device(struct mtd_info *mtd); int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index a308e707392d..27d9785487d6 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -684,10 +684,22 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, list_add(&new->list, &mtd_partitions); mutex_unlock(&mtd_partitions_mutex); - add_mtd_device(&new->mtd); + ret = add_mtd_device(&new->mtd); + if (ret) + goto err_remove_part; mtd_add_partition_attrs(new); + return 0; + +err_remove_part: + mutex_lock(&mtd_partitions_mutex); + list_del(&new->list); + mutex_unlock(&mtd_partitions_mutex); + + free_partition(new); + pr_info("%s:%i\n", __func__, __LINE__); + return ret; } EXPORT_SYMBOL_GPL(mtd_add_partition); @@ -778,22 +790,31 @@ int add_mtd_partitions(struct mtd_info *master, { struct mtd_part *slave; uint64_t cur_offset = 0; - int i; + int i, ret; printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name); for (i = 0; i < nbparts; i++) { slave = allocate_partition(master, parts + i, i, cur_offset); if (IS_ERR(slave)) { - del_mtd_partitions(master); - return PTR_ERR(slave); + ret = PTR_ERR(slave); + goto err_del_partitions; } mutex_lock(&mtd_partitions_mutex); list_add(&slave->list, &mtd_partitions); mutex_unlock(&mtd_partitions_mutex); - add_mtd_device(&slave->mtd); + ret = add_mtd_device(&slave->mtd); + if (ret) { + mutex_lock(&mtd_partitions_mutex); + list_del(&slave->list); + mutex_unlock(&mtd_partitions_mutex); + + free_partition(slave); + goto err_del_partitions; + } + mtd_add_partition_attrs(slave); if (parts[i].types) mtd_parse_part(slave, parts[i].types); @@ -802,6 +823,11 @@ int add_mtd_partitions(struct mtd_info *master, } return 0; + +err_del_partitions: + del_mtd_partitions(master); + + return ret; } static DEFINE_SPINLOCK(part_parser_lock); From 8d0a31370a92aa6e1bd12b00b2f4dd0a0183672d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 5 Jan 2019 10:52:23 -0600 Subject: [PATCH 1292/3715] tipc: fix memory leak in tipc_nl_compat_publ_dump [ Upstream commit f87d8ad9233f115db92c6c087d58403b0009ed36 ] There is a memory leak in case genlmsg_put fails. Fix this by freeing *args* before return. Addresses-Coverity-ID: 1476406 ("Resource leak") Fixes: 46273cf7e009 ("tipc: fix a missing check of genlmsg_put") Signed-off-by: Gustavo A. R. Silva Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 91d51a595ac2..bbd05707c4e0 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -974,8 +974,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); - if (!hdr) + if (!hdr) { + kfree_skb(args); return -EMSGSIZE; + } nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { From acf72ff8b68cf81044bfa0d04093151b2340c430 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 8 Jan 2019 12:30:00 +0300 Subject: [PATCH 1293/3715] net/core/neighbour: tell kmemleak about hash tables [ Upstream commit 85704cb8dcfd88d351bfc87faaeba1c8214f3177 ] This fixes false-positive kmemleak reports about leaked neighbour entries: unreferenced object 0xffff8885c6e4d0a8 (size 1024): comm "softirq", pid 0, jiffies 4294922664 (age 167640.804s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 20 2c f3 83 ff ff ff ff ........ ,...... 08 c0 ef 5f 84 88 ff ff 01 8c 7d 02 01 00 00 00 ..._......}..... backtrace: [<00000000748509fe>] ip6_finish_output2+0x887/0x1e40 [<0000000036d7a0d8>] ip6_output+0x1ba/0x600 [<0000000027ea7dba>] ip6_send_skb+0x92/0x2f0 [<00000000d6e2111d>] udp_v6_send_skb.isra.24+0x680/0x15e0 [<000000000668a8be>] udpv6_sendmsg+0x18c9/0x27a0 [<000000004bd5fa90>] sock_sendmsg+0xb3/0xf0 [<000000008227b29f>] ___sys_sendmsg+0x745/0x8f0 [<000000008698009d>] __sys_sendmsg+0xde/0x170 [<00000000889dacf1>] do_syscall_64+0x9b/0x400 [<0000000081cdb353>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000005767ed39>] 0xffffffffffffffff Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/neighbour.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eb3efeabac91..9a28a21a51f0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -361,12 +362,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); - else + } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); + kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + } if (!buckets) { kfree(ret); return NULL; @@ -386,10 +389,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head) size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { kfree(buckets); - else + } else { + kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); + } kfree(nht); } From 5ab9b2bc8a7617c8a5d7f34505c3f8485d580607 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 15 Jan 2019 17:31:29 -0600 Subject: [PATCH 1294/3715] PCI/MSI: Return -ENOSPC from pci_alloc_irq_vectors_affinity() [ Upstream commit 77f88abd4a6f73a1a68dbdc0e3f21575fd508fc3 ] The API of pci_alloc_irq_vectors_affinity() says it returns -ENOSPC if fewer than @min_vecs interrupt vectors are available for @dev. However, if a device supports MSI-X but not MSI and a caller requests @min_vecs that can't be satisfied by MSI-X, we previously returned -EINVAL (from the failed attempt to enable MSI), not -ENOSPC. When -ENOSPC is returned, callers may reduce the number IRQs they request and try again. Most callers can use the @min_vecs and @max_vecs parameters to avoid this retry loop, but that doesn't work when using IRQ affinity "nr_sets" because rebalancing the sets is driver-specific. This return value bug has been present since pci_alloc_irq_vectors() was added in v4.10 by aff171641d18 ("PCI: Provide sensible IRQ vector alloc/free routines"), but it wasn't an issue because @min_vecs/@max_vecs removed the need for callers to iteratively reduce the number of IRQs requested and retry the allocation, so they didn't need to distinguish -ENOSPC from -EINVAL. In v5.0, 6da4b3ab9a6e ("genirq/affinity: Add support for allocating interrupt sets") added IRQ sets to the interface, which reintroduced the need to check for -ENOSPC and possibly reduce the number of IRQs requested and retry the allocation. Signed-off-by: Ming Lei [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Cc: Jens Axboe Cc: Keith Busch Cc: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/pci/msi.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 536e9a5cd2b1..d66ef88e13cf 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1156,7 +1156,8 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, const struct irq_affinity *affd) { static const struct irq_affinity msi_default_affd; - int vecs = -ENOSPC; + int msix_vecs = -ENOSPC; + int msi_vecs = -ENOSPC; if (flags & PCI_IRQ_AFFINITY) { if (!affd) @@ -1167,16 +1168,17 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } if (flags & PCI_IRQ_MSIX) { - vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, - affd); - if (vecs > 0) - return vecs; + msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, + max_vecs, affd); + if (msix_vecs > 0) + return msix_vecs; } if (flags & PCI_IRQ_MSI) { - vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); - if (vecs > 0) - return vecs; + msi_vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, + affd); + if (msi_vecs > 0) + return msi_vecs; } /* use legacy irq if allowed */ @@ -1187,7 +1189,9 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } } - return vecs; + if (msix_vecs == -ENOSPC) + return -ENOSPC; + return msi_vecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); From 951c390b1edccf886a91dd019e569a4b9b5fdec0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 14 Jan 2019 13:38:43 +0300 Subject: [PATCH 1295/3715] net/core/neighbour: fix kmemleak minimal reference count for hash tables [ Upstream commit 01b833ab44c9e484060aad72267fc7e71beb559b ] This should be 1 for normal allocations, 0 disables leak reporting. Signed-off-by: Konstantin Khlebnikov Reported-by: Cong Wang Fixes: 85704cb8dcfd ("net/core/neighbour: tell kmemleak about hash tables") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9a28a21a51f0..2664ad58e5c0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -368,7 +368,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); - kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); } if (!buckets) { kfree(ret); From f21cb374df34cacd7cae4dff25989e9288e5d537 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Thu, 17 Jan 2019 17:00:19 +0800 Subject: [PATCH 1296/3715] serial: 8250: Fix serial8250 initialization crash [ Upstream commit 352c4cf40c4a7d439fa5d30aa2160f54b394da82 ] The initialization code of interrupt backoff work might reference NULL pointer and cause the following crash, if no port was found. [ 10.017727] CPU 0 Unable to handle kernel paging request at virtual address 000001b0, epc == 807088e0, ra == 8070863c ---- snip ---- [ 11.704470] [<807088e0>] serial8250_register_8250_port+0x318/0x4ac [ 11.747251] [<80708d74>] serial8250_probe+0x148/0x1c0 [ 11.789301] [<80728450>] platform_drv_probe+0x40/0x94 [ 11.830515] [<807264f8>] really_probe+0xf8/0x318 [ 11.870876] [<80726b7c>] __driver_attach+0x110/0x12c [ 11.910960] [<80724374>] bus_for_each_dev+0x78/0xcc [ 11.951134] [<80725958>] bus_add_driver+0x200/0x234 [ 11.989756] [<807273d8>] driver_register+0x84/0x148 [ 12.029832] [<80d72f84>] serial8250_init+0x138/0x198 [ 12.070447] [<80100e6c>] do_one_initcall+0x5c/0x2a0 [ 12.110104] [<80d3a208>] kernel_init_freeable+0x370/0x484 [ 12.150722] [<80a49420>] kernel_init+0x10/0xf8 [ 12.191517] [<8010756c>] ret_from_kernel_thread+0x14/0x1c This patch makes sure the initialization code can be reached only if a port is found. Fixes: 6d7f677a2afa ("serial: 8250: Rate limit serial port rx interrupts during input overruns") Signed-off-by: He Zhe Reviewed-by: Darwin Dingel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index ceeea4b159c4..c698ebab6d3b 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1077,15 +1077,16 @@ int serial8250_register_8250_port(struct uart_8250_port *up) ret = 0; } - } - /* Initialise interrupt backoff work if required */ - if (up->overrun_backoff_time_ms > 0) { - uart->overrun_backoff_time_ms = up->overrun_backoff_time_ms; - INIT_DELAYED_WORK(&uart->overrun_backoff, - serial_8250_overrun_backoff_work); - } else { - uart->overrun_backoff_time_ms = 0; + /* Initialise interrupt backoff work if required */ + if (up->overrun_backoff_time_ms > 0) { + uart->overrun_backoff_time_ms = + up->overrun_backoff_time_ms; + INIT_DELAYED_WORK(&uart->overrun_backoff, + serial_8250_overrun_backoff_work); + } else { + uart->overrun_backoff_time_ms = 0; + } } mutex_unlock(&serial_mutex); From 189f921470547fc15f1e4c49bb5a27f9b4a0b6f4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 18 Dec 2018 16:46:29 +0100 Subject: [PATCH 1297/3715] gpu: ipu-v3: pre: don't trigger update if buffer address doesn't change [ Upstream commit eb0200a4357da100064971689d3a0e9e3cf57f33 ] On a NOP double buffer update where current buffer address is the same as the next buffer address, the SDW_UPDATE bit clears too late. As we are now using this bit to determine when it is safe to signal flip completion to userspace this will delay completion of atomic commits where one plane doesn't change the buffer by a whole frame period. Fix this by remembering the last buffer address and just skip the double buffer update if it would not change the buffer address. Signed-off-by: Lucas Stach [p.zabel@pengutronix.de: initialize last_bufaddr in ipu_pre_configure] Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-pre.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index 1d1612e28854..6fd4af647f59 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -102,6 +102,7 @@ struct ipu_pre { void *buffer_virt; bool in_use; unsigned int safe_window_end; + unsigned int last_bufaddr; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -177,6 +178,7 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; val = IPU_PRE_PREF_ENG_CTRL_INPUT_PIXEL_FORMAT(0) | IPU_PRE_PREF_ENG_CTRL_INPUT_ACTIVE_BPP(active_bpp) | @@ -218,7 +220,11 @@ void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) unsigned short current_yblock; u32 val; + if (bufaddr == pre->last_bufaddr) + return; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; do { if (time_after(jiffies, timeout)) { From c3513e5c27a7e6113012c9dcabcbc65975a55025 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 22 Jan 2019 19:02:17 +0000 Subject: [PATCH 1298/3715] sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe [ Upstream commit 3366463513f544c12c6b88c13da4462ee9e7a1a1 ] Use a bitmap to keep track of which partition types we've already seen; for duplicates, return -EEXIST from efx_ef10_mtd_probe_partition() and thus skip adding that partition. Duplicate partitions occur because of the A/B backup scheme used by newer sfc NICs. Prior to this patch they cause sysfs_warn_dup errors because they have the same name, causing us not to expose any MTDs at all. Signed-off-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 09352ee43b55..cc3be94d0562 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5852,22 +5852,25 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_LICENSE, 0, 0, "sfc_license" }, { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" }, }; +#define EF10_NVRAM_PARTITION_COUNT ARRAY_SIZE(efx_ef10_nvram_types) static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, struct efx_mcdi_mtd_partition *part, - unsigned int type) + unsigned int type, + unsigned long *found) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); const struct efx_ef10_nvram_type_info *info; size_t size, erase_size, outlen; + int type_idx = 0; bool protected; int rc; - for (info = efx_ef10_nvram_types; ; info++) { - if (info == - efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types)) + for (type_idx = 0; ; type_idx++) { + if (type_idx == EF10_NVRAM_PARTITION_COUNT) return -ENODEV; + info = efx_ef10_nvram_types + type_idx; if ((type & ~info->type_mask) == info->type) break; } @@ -5880,6 +5883,13 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (protected) return -ENODEV; /* hide it */ + /* If we've already exposed a partition of this type, hide this + * duplicate. All operations on MTDs are keyed by the type anyway, + * so we can't act on the duplicate. + */ + if (__test_and_set_bit(type_idx, found)) + return -EEXIST; + part->nvram_type = type; MCDI_SET_DWORD(inbuf, NVRAM_METADATA_IN_TYPE, type); @@ -5908,6 +5918,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT); struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; @@ -5936,11 +5947,13 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) for (i = 0; i < n_parts_total; i++) { type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID, i); - rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type); - if (rc == 0) - n_parts++; - else if (rc != -ENODEV) + rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type, + found); + if (rc == -EEXIST || rc == -ENODEV) + continue; + if (rc) goto fail; + n_parts++; } rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); From 35e074ebdadb8f2f186d189269b05fd3297b2341 Mon Sep 17 00:00:00 2001 From: wenxu Date: Sat, 19 Jan 2019 13:11:25 +0800 Subject: [PATCH 1299/3715] ip_tunnel: Make none-tunnel-dst tunnel port work with lwtunnel [ Upstream commit d71b57532d70c03f4671dd04e84157ac6bf021b0 ] ip l add dev tun type gretap key 1000 ip a a dev tun 10.0.0.1/24 Packets with tun-id 1000 can be recived by tun dev. But packet can't be sent through dev tun for non-tunnel-dst With this patch: tunnel-dst can be get through lwtunnel like beflow: ip r a 10.0.0.7 encap ip dst 172.168.0.11 dev tun Signed-off-by: wenxu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_tunnel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fabc299cb875..7a31287ff123 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -661,13 +661,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ + struct ip_tunnel_info *tun_info; if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) { + tun_info = skb_tunnel_info(skb); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && + ip_tunnel_info_af(tun_info) == AF_INET && + tun_info->key.u.ipv4.dst) + dst = tun_info->key.u.ipv4.dst; + else if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); dst = rt_nexthop(rt, inner_iph->daddr); } From b89c62555ab774d816c7b3d341cfb9f3b27c7a59 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 26 Jan 2019 21:12:19 +0100 Subject: [PATCH 1300/3715] decnet: fix DN_IFREQ_SIZE [ Upstream commit 50c2936634bcb1db78a8ca63249236810c11a80f ] Digging through the ioctls with Al because of the previous patches, we found that on 64-bit decnet's dn_dev_ioctl() is wrong, because struct ifreq::ifr_ifru is actually 24 bytes (not 16 as expected from struct sockaddr) due to the ifru_map and ifru_settings members. Clearly, decnet expects the ioctl to be called with a struct like struct ifreq_dn { char ifr_name[IFNAMSIZ]; struct sockaddr_dn ifr_addr; }; since it does struct ifreq *ifr = ...; struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; This means that DN_IFREQ_SIZE is too big for what it wants on 64-bit, as it is sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn) This assumes that sizeof(struct sockaddr) is the size of ifr_ifru but that isn't true. Fix this to use offsetof(struct ifreq, ifr_ifru). This indeed doesn't really matter much - the result is that we copy in/out 8 bytes more than we should on 64-bit platforms. In case the "struct ifreq_dn" lands just on the end of a page though it might lead to faults. As far as I can tell, it has been like this forever, so it seems very likely that nobody cares. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/decnet/dn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index df042b6d80b8..22876a197ebe 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -56,7 +56,7 @@ #include #include -#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) +#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; From bbd45bd490f0a71a59c364d6fc664c1086771721 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:02 +0100 Subject: [PATCH 1301/3715] net/smc: prevent races between smc_lgr_terminate() and smc_conn_free() [ Upstream commit 77f838ace755d2f466536c44dac6c856f62cd901 ] To prevent races between smc_lgr_terminate() and smc_conn_free() add an extra check of the lgr field before accessing it, and cancel a delayed free_work when a new smc connection is created. This fixes the problem that free_work cleared the lgr variable but smc_lgr_terminate() or smc_conn_free() still access it in parallel. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/smc_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f04a037dc967..0de788fa43e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -103,6 +103,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int reduced = 0; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { reduced = 1; @@ -431,6 +433,8 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } From 479d9460a2919e1900d5147a1271837201ffdfda Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Feb 2019 11:55:39 +0100 Subject: [PATCH 1302/3715] blktrace: Show requests without sector [ Upstream commit 0803de78049fe1b0baf44bcddc727b036fb9139b ] Currently, blktrace will not show requests that don't have any data as rq->__sector is initialized to -1 which is out of device range and thus discarded by act_log_check(). This is most notably the case for cache flush requests sent to the device. Fix the problem by making blk_rq_trace_sector() return 0 for requests without initialized sector. Reviewed-by: Johannes Thumshirn Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/blktrace_api.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) From c5924bac0cf78e60d74d0b3185b033fdefde7acf Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 11 Feb 2019 09:18:28 +0700 Subject: [PATCH 1303/3715] tipc: fix skb may be leaky in tipc_link_input [ Upstream commit 7384b538d3aed2ed49d3575483d17aeee790fb06 ] When we free skb at tipc_data_input, we return a 'false' boolean. Then, skb passed to subcalling tipc_link_input in tipc_link_rcv, 1303 int tipc_link_rcv: ... 1354 if (!tipc_data_input(l, skb, l->inputq)) 1355 rc |= tipc_link_input(l, skb, l->inputq); Fix it by simple changing to a 'true' boolean when skb is being free-ed. Then, tipc_link_rcv will bypassed to subcalling tipc_link_input as above condition. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 631bfc7e9127..da749916faac 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1073,7 +1073,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } From 50740980d526e97eea76ed0ff54b1935ed971565 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 12 Feb 2019 13:10:00 +0000 Subject: [PATCH 1304/3715] sfc: initialise found bitmap in efx_ef10_mtd_probe [ Upstream commit c65285428b6e7797f1bb063f33b0ae7e93397b7b ] The bitmap of found partitions in efx_ef10_mtd_probe was not initialised, causing partitions to be suppressed based off whatever value was in the bitmap at the start. Fixes: 3366463513f5 ("sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index cc3be94d0562..2d92a9fe4606 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5918,7 +5918,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); - DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 }; struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; From 75fa3a9a2f7b6daf266c6cbbecdd83fbcb9dad6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Feb 2019 12:26:27 -0800 Subject: [PATCH 1305/3715] net: fix possible overflow in __sk_mem_raise_allocated() [ Upstream commit 5bf325a53202b8728cf7013b72688c46071e212e ] With many active TCP sockets, fat TCP sockets could fool __sk_mem_raise_allocated() thanks to an overflow. They would increase their share of the memory, instead of decreasing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 780c6c0a86f0..0af46cbd3649 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1232,7 +1232,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(sk->sk_prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); diff --git a/net/core/sock.c b/net/core/sock.c index 7ccbcd853cbc..90ccbbf9e6b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2357,7 +2357,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; From a184a9d6015ec0ec7ee7551d7157594ee89d02b2 Mon Sep 17 00:00:00 2001 From: Maciej Kwiecien Date: Fri, 22 Feb 2019 09:45:26 +0100 Subject: [PATCH 1306/3715] sctp: don't compare hb_timer expire date before starting it [ Upstream commit d1f20c03f48102e52eb98b8651d129b83134cae4 ] hb_timer might not start at all for a particular transport because its start is conditional. In a result a node is not sending heartbeats. Function sctp_transport_reset_hb_timer has two roles: - initial start of hb_timer for a given transport, - update expire date of hb_timer for a given transport. The function is optimized to update timer's expire only if it is before a new calculated one but this comparison is invalid for a timer which has not yet started. Such a timer has expire == 0 and if a new expire value is bigger than (MAX_JIFFIES / 2 + 2) then "time_before" macro will fail and timer will not start resulting in no heartbeat packets send by the node. This was found when association was initialized within first 5 mins after system boot due to jiffies init value which is near to MAX_JIFFIES. Test kernel version: 4.9.154 (ARCH=arm) hb_timer.expire = 0; //initialized, not started timer new_expire = MAX_JIFFIES / 2 + 2; //or more time_before(hb_timer.expire, new_expire) == false Fixes: ba6f5e33bdbb ("sctp: avoid refreshing heartbeat timer too often") Reported-by: Marcin Stojek Tested-by: Marcin Stojek Signed-off-by: Maciej Kwiecien Reviewed-by: Alexander Sverdlin Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 43105cf04bc4..274df899e7bf 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -210,7 +210,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); - if (time_before(transport->hb_timer.expires, expires) && + if ((time_before(transport->hb_timer.expires, expires) || + !timer_pending(&transport->hb_timer)) && !mod_timer(&transport->hb_timer, expires + prandom_u32_max(transport->rto))) sctp_transport_hold(transport); From 7e50f6c7dc3006d3f92d668b61b2e75bec793ccf Mon Sep 17 00:00:00 2001 From: Peng Sun Date: Tue, 26 Feb 2019 22:15:37 +0800 Subject: [PATCH 1307/3715] bpf: decrease usercnt if bpf_map_new_fd() fails in bpf_map_get_fd_by_id() [ Upstream commit 781e62823cb81b972dc8652c1827205cda2ac9ac ] In bpf/syscall.c, bpf_map_get_fd_by_id() use bpf_map_inc_not_zero() to increase the refcount, both map->refcnt and map->usercnt. Then, if bpf_map_new_fd() fails, should handle map->usercnt too. Fixes: bd5f5f4ecb78 ("bpf: Add BPF_MAP_GET_FD_BY_ID") Signed-off-by: Peng Sun Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 59d2e94ecb79..34110450a78f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1354,7 +1354,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) fd = bpf_map_new_fd(map); if (fd < 0) - bpf_map_put(map); + bpf_map_put_with_uref(map); return fd; } From 7d962062251ce74041ba3ce03d7e34a51bbb67f7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 27 Feb 2019 13:37:26 +0300 Subject: [PATCH 1308/3715] net: dev: Use unsigned integer as an argument to left-shift [ Upstream commit f4d7b3e23d259c44f1f1c39645450680fcd935d6 ] 1 << 31 is Undefined Behaviour according to the C standard. Use U type modifier to avoid theoretical overflow. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 40b830d55fe5..4725a9d9597f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3522,7 +3522,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) if (debug_value == 0) /* no output */ return 0; /* set low N bits */ - return (1 << debug_value) - 1; + return (1U << debug_value) - 1; } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) From 4e4a0cf997f8c40747bdf6465070f2bc04dcdc18 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 28 Feb 2019 16:34:37 +0100 Subject: [PATCH 1309/3715] kvm: properly check debugfs dentry before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ed0579c12b2fe56a1fac2f712f58fc26c1dc49b ] debugfs can now report an error code if something went wrong instead of just NULL. So if the return value is to be used as a "real" dentry, it needs to be checked if it is an error before dereferencing it. This is now happening because of ff9fb72bc077 ("debugfs: return error values, not NULL"). syzbot has found a way to trigger multiple debugfs files attempting to be created, which fails, and then the error code gets passed to dentry_path_raw() which obviously does not like it. Reported-by: Eric Biggers Reported-and-tested-by: syzbot+7857962b4d45e602b8ad@syzkaller.appspotmail.com Cc: "Radim Krčmář" Cc: kvm@vger.kernel.org Acked-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cdaacdf7bc87..deff4b3eb972 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3989,7 +3989,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); if (p) { From ebf233fd40bfc70e53ac9ba6b46ec7b3439c3da1 Mon Sep 17 00:00:00 2001 From: Peng Sun Date: Wed, 27 Feb 2019 22:36:25 +0800 Subject: [PATCH 1310/3715] bpf: drop refcount if bpf_map_new_fd() fails in map_create() [ Upstream commit 352d20d611414715353ee65fc206ee57ab1a6984 ] In bpf/syscall.c, map_create() first set map->usercnt to 1, a file descriptor is supposed to return to userspace. When bpf_map_new_fd() fails, drop the refcount. Fixes: bd5f5f4ecb78 ("bpf: Add BPF_MAP_GET_FD_BY_ID") Signed-off-by: Peng Sun Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/syscall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 34110450a78f..f5c1d5479ba3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -348,12 +348,12 @@ static int map_create(union bpf_attr *attr) err = bpf_map_new_fd(map); if (err < 0) { /* failed to allocate fd. - * bpf_map_put() is needed because the above + * bpf_map_put_with_uref() is needed because the above * bpf_map_alloc_id() has published the map * to the userspace and the userspace may * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. */ - bpf_map_put(map); + bpf_map_put_with_uref(map); return err; } From 0cc8bd14e94d9feb4563ae76018359cf7794b654 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 23 Jan 2019 07:39:34 +0800 Subject: [PATCH 1311/3715] net: hns3: Change fw error code NOT_EXEC to NOT_SUPPORTED [ Upstream commit 4a402f47cfce904051cd8b31bef4fe2910d9dce9 ] According to firmware error code definition, the error code of 2 means NOT_SUPPORTED, this patch changes it to NOT_SUPPORTED. Signed-off-by: Yunsheng Lin Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 ++ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 8b511e6e0ce9..396ea0db7102 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -251,6 +251,8 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) if ((enum hclge_cmd_return_status)desc_ret == HCLGE_CMD_EXEC_SUCCESS) retval = 0; + else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED) + retval = -EOPNOTSUPP; else retval = -EIO; hw->cmq.last_status = (enum hclge_cmd_status)desc_ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 758cf3948131..3823ae6303ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -52,7 +52,7 @@ struct hclge_cmq_ring { enum hclge_cmd_return_status { HCLGE_CMD_EXEC_SUCCESS = 0, HCLGE_CMD_NO_AUTH = 1, - HCLGE_CMD_NOT_EXEC = 2, + HCLGE_CMD_NOT_SUPPORTED = 2, HCLGE_CMD_QUEUE_FULL = 3, }; From 2f27946e2af3d70a72937af1a34d12863f39db50 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Wed, 13 Mar 2019 21:53:24 +0800 Subject: [PATCH 1312/3715] iommu/amd: Fix NULL dereference bug in match_hid_uid [ Upstream commit bb6bccba390c7d743c1e4427de4ef284c8cc6869 ] Add a non-NULL check to fix potential NULL pointer dereference Cleanup code to call function once. Signed-off-by: Aaron Ma Fixes: 2bf9a0a12749b ('iommu/amd: Add iommu support for ACPI HID devices') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 07b6cf58fd99..d09c24825734 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -139,10 +139,14 @@ static struct lock_class_key reserved_rbtree_key; static inline int match_hid_uid(struct device *dev, struct acpihid_map_entry *entry) { + struct acpi_device *adev = ACPI_COMPANION(dev); const char *hid, *uid; - hid = acpi_device_hid(ACPI_COMPANION(dev)); - uid = acpi_device_uid(ACPI_COMPANION(dev)); + if (!adev) + return -ENODEV; + + hid = acpi_device_hid(adev); + uid = acpi_device_uid(adev); if (!hid || !(*hid)) return -ENODEV; From 01dbfb2c74e7f38f225153c156065c7011754c6e Mon Sep 17 00:00:00 2001 From: Chris Coulson Date: Mon, 4 Feb 2019 10:21:23 +0000 Subject: [PATCH 1313/3715] apparmor: delete the dentry in aafs_remove() to avoid a leak [ Upstream commit 201218e4d3dfa1346e30997f48725acce3f26d01 ] Although the apparmorfs dentries are always dropped from the dentry cache when the usage count drops to zero, there is no guarantee that this will happen in aafs_remove(), as another thread might still be using it. In this scenario, this means that the dentry will temporarily continue to appear in the results of lookups, even after the call to aafs_remove(). In the case of removal of a profile - it also causes simple_rmdir() on the profile directory to fail, as the directory won't be empty until the usage counts of all child dentries have decreased to zero. This results in the dentry for the profile directory leaking and appearing empty in the file system tree forever. Signed-off-by: Chris Coulson Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/apparmorfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index dd746bd69a9b..c106988c1b25 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -363,6 +363,7 @@ static void aafs_remove(struct dentry *dentry) simple_rmdir(dir, dentry); else simple_unlink(dir, dentry); + d_delete(dentry); dput(dentry); } inode_unlock(dir); From 6e2dd42cede242f4ca4e7205f51c8b9ff706ca5c Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 5 Jan 2019 00:01:28 +0800 Subject: [PATCH 1314/3715] scsi: libsas: Support SATA PHY connection rate unmatch fixing during discovery [ Upstream commit cec9771d2e954650095aa37a6a97722c8194e7d2 ] +----------+ +----------+ | | | | | |--- 3.0 G ---| |--- 6.0 G --- SAS disk | | | | | |--- 3.0 G ---| |--- 6.0 G --- SAS disk |initiator | | | | device |--- 3.0 G ---| Expander |--- 6.0 G --- SAS disk | | | | | |--- 3.0 G ---| |--- 6.0 G --- SATA disk -->failed to connect | | | | | | | |--- 6.0 G --- SATA disk -->failed to connect | | | | +----------+ +----------+ According to Serial Attached SCSI - 1.1 (SAS-1.1): If an expander PHY attached to a SATA PHY is using a physical link rate greater than the maximum connection rate supported by the pathway from an STP initiator port, a management application client should use the SMP PHY CONTROL function (see 10.4.3.10) to set the PROGRAMMED MAXIMUM PHYSICAL LINK RATE field of the expander PHY to the maximum connection rate supported by the pathway from that STP initiator port. Currently libsas does not support checking if this condition occurs, nor rectifying when it does. Such a condition is not at all common, however it has been seen on some pre-silicon environments where the initiator PHY only supports a 1.5 Gbit maximum linkrate, mated with 12G expander PHYs and 3/6G SATA phy. This patch adds support for checking and rectifying this condition during initial device discovery only. We do support checking min pathway connection rate during revalidation phase, when new devices can be detected in the topology. However we do not support in the case of the the user reprogramming PHY linkrates, such that min pathway condition is not met/maintained. A note on root port PHY rates: The libsas root port PHY rates calculation is broken. Libsas sets the rates (min, max, and current linkrate) of a root port to the same linkrate of the first PHY member of that same port. In doing so, it assumes that all other PHYs which subsequently join the port to have the same negotiated linkrate, when they could actually be different. In practice this doesn't happen, as initiator and expander PHYs are normally initialised with consistent min/max linkrates. This has not caused an issue so far, so leave alone for now. Tested-by: Jian Luo Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 7f2d00354a85..63c44eaabf69 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -817,6 +817,26 @@ static struct domain_device *sas_ex_discover_end_dev( #ifdef CONFIG_SCSI_SAS_ATA if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) { + if (child->linkrate > parent->min_linkrate) { + struct sas_phy_linkrates rates = { + .maximum_linkrate = parent->min_linkrate, + .minimum_linkrate = parent->min_linkrate, + }; + int ret; + + pr_notice("ex %016llx phy%02d SATA device linkrate > min pathway connection rate, attempting to lower device linkrate\n", + SAS_ADDR(child->sas_addr), phy_id); + ret = sas_smp_phy_control(parent, phy_id, + PHY_FUNC_LINK_RESET, &rates); + if (ret) { + pr_err("ex %016llx phy%02d SATA device could not set linkrate (%d)\n", + SAS_ADDR(child->sas_addr), phy_id, ret); + goto out_free; + } + pr_notice("ex %016llx phy%02d SATA device set linkrate successfully\n", + SAS_ADDR(child->sas_addr), phy_id); + child->linkrate = child->min_linkrate; + } res = sas_get_ata_info(child, phy); if (res) goto out_free; From 3ad61d642b69e42554a14eba7f18cbb20f1d6f61 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:37 +0000 Subject: [PATCH 1315/3715] ACPI / APEI: Don't wait to serialise with oops messages when panic()ing [ Upstream commit 78b0b690f6558ed788dccafa45965325dd11ba89 ] oops_begin() exists to group printk() messages with the oops message printed by die(). To reach this caller we know that platform firmware took this error first, then notified the OS via NMI with a 'panic' severity. Don't wait for another CPU to release the die-lock before panic()ing, our only goal is to print this fatal error and panic(). This code is always called in_nmi(), and since commit 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI"), it has been safe to call printk() from this context. Messages are batched in a per-cpu buffer and printed via irq-work, or a call back from panic(). Link: https://patchwork.kernel.org/patch/10313555/ Acked-by: Borislav Petkov Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5889f6407fea..9c31c7cd5cb5 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -936,7 +935,6 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { - oops_begin(); ghes_print_queued_estatus(); __ghes_panic(ghes); } From 74fac32b6474ec92898dafcf270fc0d113026e07 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:39 +0000 Subject: [PATCH 1316/3715] ACPI / APEI: Switch estatus pool to use vmalloc memory [ Upstream commit 0ac234be1a9497498e57d958f4251f5257b116b4 ] The ghes code is careful to parse and round firmware's advertised memory requirements for CPER records, up to a maximum of 64K. However when ghes_estatus_pool_expand() does its work, it splits the requested size into PAGE_SIZE granules. This means if firmware generates 5K of CPER records, and correctly describes this in the table, __process_error() will silently fail as it is unable to allocate more than PAGE_SIZE. Switch the estatus pool to vmalloc() memory. On x86 vmalloc() memory may fault and be fixed up by vmalloc_fault(). To prevent this call vmalloc_sync_all() before an NMI handler could discover the memory. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9c31c7cd5cb5..cd6fae6ad4c2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -170,40 +170,40 @@ static int ghes_estatus_pool_init(void) return 0; } -static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, +static void ghes_estatus_pool_free_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data) { - free_page(chunk->start_addr); + vfree((void *)chunk->start_addr); } static void ghes_estatus_pool_exit(void) { gen_pool_for_each_chunk(ghes_estatus_pool, - ghes_estatus_pool_free_chunk_page, NULL); + ghes_estatus_pool_free_chunk, NULL); gen_pool_destroy(ghes_estatus_pool); } static int ghes_estatus_pool_expand(unsigned long len) { - unsigned long i, pages, size, addr; - int ret; + unsigned long size, addr; ghes_estatus_pool_size_request += PAGE_ALIGN(len); size = gen_pool_size(ghes_estatus_pool); if (size >= ghes_estatus_pool_size_request) return 0; - pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; - for (i = 0; i < pages; i++) { - addr = __get_free_page(GFP_KERNEL); - if (!addr) - return -ENOMEM; - ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); - if (ret) - return ret; - } - return 0; + addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); + if (!addr) + return -ENOMEM; + + /* + * New allocation must be visible in all pgd before it can be found by + * an NMI allocating from the pool. + */ + vmalloc_sync_all(); + + return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); } static int map_gen_v2(struct ghes *ghes) From 0e7b61dce67264cd55bfd488f96a1d2f103c5989 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 5 Jan 2019 00:01:27 +0800 Subject: [PATCH 1317/3715] scsi: libsas: Check SMP PHY control function result [ Upstream commit 01929a65dfa13e18d89264ab1378854a91857e59 ] Currently the SMP PHY control execution result is checked, however the function result for the command is not. As such, we may be missing all potential errors, like SMP FUNCTION FAILED, INVALID REQUEST FRAME LENGTH, etc., meaning the PHY control request has failed. In some scenarios we need to ensure the function result is accepted, so add a check for this. Tested-by: Jian Luo Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 63c44eaabf69..f77d72f01da9 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -614,7 +614,14 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id, } res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE); - + if (res) { + pr_err("ex %016llx phy%02d PHY control failed: %d\n", + SAS_ADDR(dev->sas_addr), phy_id, res); + } else if (pc_resp[2] != SMP_RESP_FUNC_ACC) { + pr_err("ex %016llx phy%02d PHY control failed: function result 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, pc_resp[2]); + res = pc_resp[2]; + } kfree(pc_resp); kfree(pc_req); return res; From 0583dc6fceb6b70cbd7e39802da41d8269fe82ac Mon Sep 17 00:00:00 2001 From: Gen Zhang Date: Sun, 26 May 2019 10:42:40 +0800 Subject: [PATCH 1318/3715] powerpc/pseries/dlpar: Fix a missing check in dlpar_parse_cc_property() [ Upstream commit efa9ace68e487ddd29c2b4d6dd23242158f1f607 ] In dlpar_parse_cc_property(), 'prop->name' is allocated by kstrdup(). kstrdup() may return NULL, so it should be checked and handle error. And prop should be freed if 'prop->name' is NULL. Signed-off-by: Gen Zhang Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/dlpar.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index f4e6565dd7a9..fb2876a84fbe 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -63,6 +63,10 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); prop->name = kstrdup(name, GFP_KERNEL); + if (!prop->name) { + dlpar_free_cc_property(prop); + return NULL; + } prop->length = be32_to_cpu(ccwa->prop_length); value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset); From ef04ffdd59623ef169aa1e28c896ed8801e247cb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 30 Jan 2019 09:47:00 +0100 Subject: [PATCH 1319/3715] mtd: Remove a debug trace in mtdpart.c [ Upstream commit bda2ab56356b9acdfab150f31c4bac9846253092 ] Commit 2b6f0090a333 ("mtd: Check add_mtd_device() ret code") contained a leftover of the debug session that led to this bug fix. Remove this pr_info(). Fixes: 2b6f0090a333 ("mtd: Check add_mtd_device() ret code") Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/mtdpart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 27d9785487d6..45626b0eed64 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -698,7 +698,6 @@ err_remove_part: mutex_unlock(&mtd_partitions_mutex); free_partition(new); - pr_info("%s:%i\n", __func__, __LINE__); return ret; } From 8725aa80dd65f0389f5097cc5f7f36ca7d399009 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 29 Nov 2019 10:03:50 +0100 Subject: [PATCH 1320/3715] mm, gup: add missing refcount overflow checks on s390 The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from overflowing page refcount") was backported to 4.14.y stable as commit 04198de24771. The backport however missed that in 4.14, there are several arch-specific gup.c versions with fast gup implementations, so these do not prevent refcount overflow. This stable-only commit fixes the s390 version, and is based on the backport in SUSE SLES/openSUSE 4.12-based kernels. The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely the known overflow scenario based on FUSE, which needs 140GB of RAM, is a problem for those architectures, and I don't feel confident enough to patch them. Signed-off-by: Vlastimil Babka Signed-off-by: Sasha Levin --- arch/s390/mm/gup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 05c8abd864f1..9bce54eac0b0 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -39,7 +39,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head))) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -77,7 +78,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } @@ -151,7 +153,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } From 5f42cde9910ebc4e4157583f83d777dbb0ee9846 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 9 Sep 2019 15:30:31 +0000 Subject: [PATCH 1321/3715] clk: at91: fix update bit maps on CFG_MOR write commit 263eaf8f172d9f44e15d6aca85fe40ec18d2c477 upstream. The regmap update bits call was not selecting the proper mask, considering the bits which was updating. Update the mask from call to also include OSCBYPASS. Removed MOSCEN which was not updated. Fixes: 1bdf02326b71 ("clk: at91: make use of syscon/regmap internally") Signed-off-by: Eugen Hristev Link: https://lkml.kernel.org/r/1568042692-11784-1-git-send-email-eugen.hristev@microchip.com Acked-by: Alexandre Belloni Reviewed-by: Claudiu Beznea Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/clk/at91/clk-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index fb5c14af8cc8..90988e7a5b47 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -162,7 +162,7 @@ at91_clk_register_main_osc(struct regmap *regmap, if (bypass) regmap_update_bits(regmap, AT91_CKGR_MOR, MOR_KEY_MASK | - AT91_PMC_MOSCEN, + AT91_PMC_OSCBYPASS, AT91_PMC_OSCBYPASS | AT91_PMC_KEY); hw = &osc->hw; From 607047ef2b4366139f1024272e11dc05f3102742 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 16 Oct 2018 16:21:43 +0200 Subject: [PATCH 1322/3715] clk: at91: generated: set audio_pll_allowed in at91_clk_register_generated() commit c1e4580a1d0ff510d56268c1fc7fcfeec366fe70 upstream. Set gck->audio_pll_allowed in at91_clk_register_generated. This makes it easier to do it from code that is not parsing device tree. Also, this fixes an issue where the resulting clk_hw can be dereferenced before being tested for error. Fixes: 1a1a36d72e3d ("clk: at91: clk-generated: make gclk determine audio_pll rate") Signed-off-by: Alexandre Belloni Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/clk/at91/clk-generated.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 113152425a95..ea23002be4de 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -284,7 +284,7 @@ static void clk_generated_startup(struct clk_generated *gck) static struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const char *name, const char **parent_names, - u8 num_parents, u8 id, + u8 num_parents, u8 id, bool pll_audio, const struct clk_range *range) { struct clk_generated *gck; @@ -308,6 +308,7 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, gck->regmap = regmap; gck->lock = lock; gck->range = *range; + gck->audio_pll_allowed = pll_audio; clk_generated_startup(gck); hw = &gck->hw; @@ -333,7 +334,6 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) struct device_node *gcknp; struct clk_range range = CLK_RANGE(0, 0); struct regmap *regmap; - struct clk_generated *gck; num_parents = of_clk_get_parent_count(np); if (num_parents == 0 || num_parents > GENERATED_SOURCE_MAX) @@ -350,6 +350,8 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) return; for_each_child_of_node(np, gcknp) { + bool pll_audio = false; + if (of_property_read_u32(gcknp, "reg", &id)) continue; @@ -362,24 +364,14 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) of_at91_get_clk_range(gcknp, "atmel,clk-output-range", &range); + if (of_device_is_compatible(np, "atmel,sama5d2-clk-generated") && + (id == GCK_ID_I2S0 || id == GCK_ID_I2S1 || + id == GCK_ID_CLASSD)) + pll_audio = true; + hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, name, parent_names, num_parents, - id, &range); - - gck = to_clk_generated(hw); - - if (of_device_is_compatible(np, - "atmel,sama5d2-clk-generated")) { - if (gck->id == GCK_ID_SSC0 || gck->id == GCK_ID_SSC1 || - gck->id == GCK_ID_I2S0 || gck->id == GCK_ID_I2S1 || - gck->id == GCK_ID_CLASSD) - gck->audio_pll_allowed = true; - else - gck->audio_pll_allowed = false; - } else { - gck->audio_pll_allowed = false; - } - + id, pll_audio, &range); if (IS_ERR(hw)) continue; From a3a967f00a54885eaea6034c8a3c538f65a0b9e7 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 5 Nov 2019 22:49:11 +0800 Subject: [PATCH 1323/3715] staging: rtl8192e: fix potential use after free commit b7aa39a2ed0112d07fc277ebd24a08a7b2368ab9 upstream. The variable skb is released via kfree_skb() when the return value of _rtl92e_tx is not zero. However, after that, skb is accessed again to read its length, which may result in a use after free bug. This patch fixes the bug by moving the release operation to where skb is never used later. Signed-off-by: Pan Bian Reviewed-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/1572965351-6745-1-git-send-email-bianpan2016@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index aca52654825b..811cada301ac 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -1630,14 +1630,15 @@ static void _rtl92e_hard_data_xmit(struct sk_buff *skb, struct net_device *dev, memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev)); skb_push(skb, priv->rtllib->tx_headroom); ret = _rtl92e_tx(dev, skb); - if (ret != 0) - kfree_skb(skb); if (queue_index != MGNT_QUEUE) { priv->rtllib->stats.tx_bytes += (skb->len - priv->rtllib->tx_headroom); priv->rtllib->stats.tx_packets++; } + + if (ret != 0) + kfree_skb(skb); } static int _rtl92e_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) From ee5f5bbef31027004663c1824d7819465bec8457 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Nov 2019 12:38:46 +0100 Subject: [PATCH 1324/3715] staging: rtl8723bs: Drop ACPI device ids commit 2d9d2491530a156b9a5614adf9dc79285e35d55e upstream. The driver only binds by SDIO device-ids, all the ACPI device-id does is causing the driver to load unnecessarily on devices where the DSDT contains a bogus OBDA8723 device. Signed-off-by: Hans de Goede Cc: stable Link: https://lore.kernel.org/r/20191111113846.24940-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 943324877707..32b88e2392f9 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -30,13 +30,7 @@ static const struct sdio_device_id sdio_ids[] = { SDIO_DEVICE(0x024c, 0xb723), }, { /* end: all zeroes */ }, }; -static const struct acpi_device_id acpi_ids[] = { - {"OBDA8723", 0x0000}, - {} -}; - MODULE_DEVICE_TABLE(sdio, sdio_ids); -MODULE_DEVICE_TABLE(acpi, acpi_ids); static int rtw_drv_init(struct sdio_func *func, const struct sdio_device_id *id); static void rtw_dev_remove(struct sdio_func *func); From 67e31c8e6aad3f52234cf8ba9c5fcafc183cd8c1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Nov 2019 12:38:45 +0100 Subject: [PATCH 1325/3715] staging: rtl8723bs: Add 024c:0525 to the list of SDIO device-ids commit 3d5f1eedbfd22ceea94b39989d6021b1958181f4 upstream. Add 024c:0525 to the list of SDIO device-ids, based on a patch found in the Android X86 kernels. According to that patch this device id is used on the Alcatel Plus 10 device. Reported-and-tested-by: youling257 Signed-off-by: Hans de Goede Cc: stable Link: https://lore.kernel.org/r/20191111113846.24940-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 32b88e2392f9..33e052106ce7 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -25,6 +25,7 @@ static const struct sdio_device_id sdio_ids[] = { { SDIO_DEVICE(0x024c, 0x0523), }, + { SDIO_DEVICE(0x024c, 0x0525), }, { SDIO_DEVICE(0x024c, 0x0623), }, { SDIO_DEVICE(0x024c, 0x0626), }, { SDIO_DEVICE(0x024c, 0xb723), }, From df3353a27f13209e7e3de80bd5714d5b42a8abc0 Mon Sep 17 00:00:00 2001 From: Fabio D'Urso Date: Thu, 14 Nov 2019 01:30:53 +0000 Subject: [PATCH 1326/3715] USB: serial: ftdi_sio: add device IDs for U-Blox C099-F9P commit c1a1f273d0825774c80896b8deb1c9ea1d0b91e3 upstream. This device presents itself as a USB hub with three attached devices: - An ACM serial port connected to the GPS module (not affected by this commit) - An FTDI serial port connected to the GPS module (1546:0502) - Another FTDI serial port connected to the ODIN-W2 radio module (1546:0503) This commit registers U-Blox's VID and the PIDs of the second and third devices. Datasheet: https://www.u-blox.com/sites/default/files/C099-F9P-AppBoard-Mbed-OS3-FW_UserGuide_%28UBX-18063024%29.pdf Signed-off-by: Fabio D'Urso Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 3 +++ drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8b5c99df0f2b..a962065227c4 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1028,6 +1028,9 @@ static const struct usb_device_id id_table_combined[] = { /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, + /* U-Blox devices */ + { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, + { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 22d66217cb41..e8373528264c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1558,3 +1558,10 @@ */ #define UNJO_VID 0x22B7 #define UNJO_ISODEBUG_V1_PID 0x150D + +/* + * U-Blox products (http://www.u-blox.com). + */ +#define UBLOX_VID 0x1546 +#define UBLOX_C099F9P_ZED_PID 0x0502 +#define UBLOX_C099F9P_ODIN_PID 0x0503 From 0a6f9bc592ef8bd28629dbc32cc82150ea060045 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 5 Nov 2019 17:05:13 +0200 Subject: [PATCH 1327/3715] mei: bus: prefix device names on bus with the bus name commit 7a2b9e6ec84588b0be65cc0ae45a65bac431496b upstream. Add parent device name to the name of devices on bus to avoid device names collisions for same client UUID available from different MEI heads. Namely this prevents sysfs collision under /sys/bus/mei/device/ In the device part leave just UUID other parameters that are required for device matching are not required here and are just bloating the name. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191105150514.14010-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 37b13bc5c16f..8f6ab516041b 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -850,15 +850,16 @@ static const struct device_type mei_cl_device_type = { /** * mei_cl_bus_set_name - set device name for me client device + * - + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb * * @cldev: me client device */ static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) { - dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", - cldev->name, - mei_me_cl_uuid(cldev->me_cl), - mei_me_cl_ver(cldev->me_cl)); + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); } /** From 171148ea3ab00b372f29d41b56fec44313cb0dca Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 6 Nov 2019 08:13:49 +0100 Subject: [PATCH 1328/3715] xfrm: Fix memleak on xfrm state destroy commit 86c6739eda7d2a03f2db30cbee67a5fb81afa8ba upstream. We leak the page that we use to create skb page fragments when destroying the xfrm_state. Fix this by dropping a page reference if a page was assigned to the xfrm_state. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Reported-by: JD Reported-by: Paul Wouters Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index bd16e6882017..190ca59d5ba3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -449,6 +449,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + if (x->xfrag.page) + put_page(x->xfrag.page); xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); From 3b58babc4aa19c852cb3f9de2c6b6a9f91a2834a Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 15 Apr 2019 10:13:51 -0400 Subject: [PATCH 1329/3715] media: v4l2-ctrl: fix flags for DO_WHITE_BALANCE commit a0816e5088baab82aa738d61a55513114a673c8e upstream. Control DO_WHITE_BALANCE is a button, with read only and execute-on-write flags. Adding this control in the proper list in the fill function. After adding it here, we can see output of v4l2-ctl -L do_white_balance 0x0098090d (button) : flags=write-only, execute-on-write Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ctrls.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 1ee072e939e4..34d6ae43fc45 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1014,6 +1014,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_FLASH_STROBE_STOP: case V4L2_CID_AUTO_FOCUS_START: case V4L2_CID_AUTO_FOCUS_STOP: + case V4L2_CID_DO_WHITE_BALANCE: *type = V4L2_CTRL_TYPE_BUTTON; *flags |= V4L2_CTRL_FLAG_WRITE_ONLY | V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; From a5ac276a5e5303083b48906706eac43f79516b3a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 14 May 2019 15:23:07 +0200 Subject: [PATCH 1330/3715] net: macb: fix error format in dev_err() commit f413cbb332a0b5251a790f396d0eb4ebcade5dec upstream. Errors are negative numbers. Using %u shows them as very large positive numbers such as 4294967277 that don't make sense. Use the %d format instead, and get a much nicer -19. Signed-off-by: Luca Ceresoli Fixes: b48e0bab142f ("net: macb: Migrate to devm clock interface") Fixes: 93b31f48b3ba ("net/macb: unify clock management") Fixes: 421d9df0628b ("net/macb: merge at91_ether driver into macb driver") Fixes: aead88bd0e99 ("net: ethernet: macb: Add support for rx_clk") Fixes: f5473d1d44e4 ("net: macb: Support clock management for tsu_clk") Acked-by: Nicolas Ferre Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2287749de087..bc9ab227d055 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2822,7 +2822,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); return err; } @@ -2831,7 +2831,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); return err; } @@ -2845,25 +2845,25 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } err = clk_prepare_enable(*hclk); if (err) { - dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable hclk (%d)\n", err); goto err_disable_pclk; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_hclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txclk; } @@ -3298,7 +3298,7 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } From ae21311f818a810be0c4c1b90d52debc86f2be1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 25 Mar 2019 10:49:33 +0100 Subject: [PATCH 1331/3715] pwm: Clear chip_data in pwm_put() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e926b12c611c2095c7976e2ed31753ad6eb5ff1a upstream. After a PWM is disposed by its user the per chip data becomes invalid. Clear the data in common code instead of the device drivers to get consistent behaviour. Before this patch only three of nine drivers cleaned up here. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/core.c | 1 + drivers/pwm/pwm-berlin.c | 1 - drivers/pwm/pwm-pca9685.c | 1 - drivers/pwm/pwm-samsung.c | 1 - 4 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index c45e5719ba17..b1b74cfb1571 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -874,6 +874,7 @@ void pwm_put(struct pwm_device *pwm) if (pwm->chip->ops->free) pwm->chip->ops->free(pwm->chip, pwm); + pwm_set_chip_data(pwm, NULL); pwm->label = NULL; module_put(pwm->chip->ops->owner); diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 771859aca4be..7bb819e3c0c1 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -78,7 +78,6 @@ static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm); - pwm_set_chip_data(pwm, NULL); kfree(channel); } diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index a7eaf962a95b..567f5e2771c4 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -176,7 +176,6 @@ static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) pm_runtime_put(pca->chip.dev); mutex_lock(&pca->lock); pwm = &pca->chip.pwms[offset]; - pwm_set_chip_data(pwm, NULL); mutex_unlock(&pca->lock); } diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 062f2cfc45ec..3762432dd6a7 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -238,7 +238,6 @@ static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm) static void pwm_samsung_free(struct pwm_chip *chip, struct pwm_device *pwm) { devm_kfree(chip->dev, pwm_get_chip_data(pwm)); - pwm_set_chip_data(pwm, NULL); } static int pwm_samsung_enable(struct pwm_chip *chip, struct pwm_device *pwm) From 2c4575f07080c2ccefa0a5bef1753a2c7d6ed258 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Fri, 12 Apr 2019 06:19:49 -0400 Subject: [PATCH 1332/3715] media: atmel: atmel-isc: fix asd memory allocation commit 1e4e25c4959c10728fbfcc6a286f9503d32dfe02 upstream. The subsystem will free the asd memory on notifier cleanup, if the asd is added to the notifier. However the memory is freed using kfree. Thus, we cannot allocate the asd using devm_* This can lead to crashes and problems. To test this issue, just return an error at probe, but cleanup the notifier beforehand. Fixes: 106267444f ("[media] atmel-isc: add the Image Sensor Controller code") Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/atmel/atmel-isc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/atmel/atmel-isc.c b/drivers/media/platform/atmel/atmel-isc.c index d7103c5f92c3..504d1ca0330e 100644 --- a/drivers/media/platform/atmel/atmel-isc.c +++ b/drivers/media/platform/atmel/atmel-isc.c @@ -1722,8 +1722,11 @@ static int isc_parse_dt(struct device *dev, struct isc_device *isc) break; } - subdev_entity->asd = devm_kzalloc(dev, - sizeof(*subdev_entity->asd), GFP_KERNEL); + /* asd will be freed by the subsystem once it's added to the + * notifier list + */ + subdev_entity->asd = kzalloc(sizeof(*subdev_entity->asd), + GFP_KERNEL); if (subdev_entity->asd == NULL) { of_node_put(rem); ret = -ENOMEM; @@ -1859,6 +1862,7 @@ static int atmel_isc_probe(struct platform_device *pdev) &subdev_entity->notifier); if (ret) { dev_err(dev, "fail to register async notifier\n"); + kfree(subdev_entity->asd); goto cleanup_subdev; } From b6a10a40a4f8e5cdaaafc85bdd7e4113223a07ba Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Fri, 12 Apr 2019 06:19:46 -0400 Subject: [PATCH 1333/3715] media: atmel: atmel-isc: fix INIT_WORK misplacement commit 79199002db5c571e335131856b3ff057ffd9f3c0 upstream. In case the completion function failes, unbind will be called which will call cancel_work for awb_work. This will trigger a WARN message from the workqueue. To avoid this, move the INIT_WORK call at the start of the completion function. This way the work is always initialized, which corresponds to the 'always canceled' unbind code. Fixes: 93d4a26c3d ("[media] atmel-isc: add the isc pipeline function") Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/atmel/atmel-isc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/atmel/atmel-isc.c b/drivers/media/platform/atmel/atmel-isc.c index 504d1ca0330e..0dea3cf2cb52 100644 --- a/drivers/media/platform/atmel/atmel-isc.c +++ b/drivers/media/platform/atmel/atmel-isc.c @@ -1555,6 +1555,8 @@ static int isc_async_complete(struct v4l2_async_notifier *notifier) struct vb2_queue *q = &isc->vb2_vidq; int ret; + INIT_WORK(&isc->awb_work, isc_awb_work); + ret = v4l2_device_register_subdev_nodes(&isc->v4l2_dev); if (ret < 0) { v4l2_err(&isc->v4l2_dev, "Failed to register subdev nodes\n"); @@ -1614,8 +1616,6 @@ static int isc_async_complete(struct v4l2_async_notifier *notifier) return ret; } - INIT_WORK(&isc->awb_work, isc_awb_work); - /* Register video device */ strlcpy(vdev->name, ATMEL_ISC_NAME, sizeof(vdev->name)); vdev->release = video_device_release_empty; From e854565dbbd3b65f3a7c5f10c3434634e523e66a Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 25 Nov 2019 16:58:09 +0800 Subject: [PATCH 1334/3715] macvlan: schedule bc_work even if error [ Upstream commit 1d7ea55668878bb350979c377fc72509dd6f5b21 ] While enqueueing a broadcast skb to port->bc_queue, schedule_work() is called to add port->bc_work, which processes the skbs in bc_queue, to "events" work queue. If port->bc_queue is full, the skb will be discarded and schedule_work(&port->bc_work) won't be called. However, if port->bc_queue is full and port->bc_work is not running or pending, port->bc_queue will keep full and schedule_work() won't be called any more, and all broadcast skbs to macvlan will be discarded. This case can happen: macvlan_process_broadcast() is the pending function of port->bc_work, it moves all the skbs in port->bc_queue to the queue "list", and processes the skbs in "list". During this, new skbs will keep being added to port->bc_queue in macvlan_broadcast_enqueue(), and port->bc_queue may already full when macvlan_process_broadcast() return. This may happen, especially when there are a lot of real-time threads and the process is preempted. Fix this by calling schedule_work(&port->bc_work) even if port->bc_work is full in macvlan_broadcast_enqueue(). Fixes: 412ca1550cbe ("macvlan: Move broadcasts into a work queue") Signed-off-by: Menglong Dong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 963a02c988e9..8d5f88a538fc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -363,10 +363,11 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, } spin_unlock(&port->bc_queue.lock); + schedule_work(&port->bc_work); + if (err) goto free_nskb; - schedule_work(&port->bc_work); return; free_nskb: From 4b61349a540d0b3a2f6133a4bc16239cf24820c5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 27 Nov 2019 00:16:44 +0200 Subject: [PATCH 1335/3715] net: psample: fix skb_over_panic [ Upstream commit 7eb9d7675c08937cd11d32b0b40442d4d731c5ee ] We need to calculate the skb size correctly otherwise we risk triggering skb_over_panic[1]. The issue is that data_len is added to the skb in a nl attribute, but we don't account for its header size (nlattr 4 bytes) and alignment. We account for it when calculating the total size in the > PSAMPLE_MAX_PACKET_SIZE comparison correctly, but not when allocating after that. The fix is simple - use nla_total_size() for data_len when allocating. To reproduce: $ tc qdisc add dev eth1 clsact $ tc filter add dev eth1 egress matchall action sample rate 1 group 1 trunc 129 $ mausezahn eth1 -b bcast -a rand -c 1 -p 129 < skb_over_panic BUG(), tail is 4 bytes past skb->end > [1] Trace: [ 50.459526][ T3480] skbuff: skb_over_panic: text:(____ptrval____) len:196 put:136 head:(____ptrval____) data:(____ptrval____) tail:0xc4 end:0xc0 dev: [ 50.474339][ T3480] ------------[ cut here ]------------ [ 50.481132][ T3480] kernel BUG at net/core/skbuff.c:108! [ 50.486059][ T3480] invalid opcode: 0000 [#1] PREEMPT SMP [ 50.489463][ T3480] CPU: 3 PID: 3480 Comm: mausezahn Not tainted 5.4.0-rc7 #108 [ 50.492844][ T3480] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 [ 50.496551][ T3480] RIP: 0010:skb_panic+0x79/0x7b [ 50.498261][ T3480] Code: bc 00 00 00 41 57 4c 89 e6 48 c7 c7 90 29 9a 83 4c 8b 8b c0 00 00 00 50 8b 83 b8 00 00 00 50 ff b3 c8 00 00 00 e8 ae ef c0 fe <0f> 0b e8 2f df c8 fe 48 8b 55 08 44 89 f6 4c 89 e7 48 c7 c1 a0 22 [ 50.504111][ T3480] RSP: 0018:ffffc90000447a10 EFLAGS: 00010282 [ 50.505835][ T3480] RAX: 0000000000000087 RBX: ffff888039317d00 RCX: 0000000000000000 [ 50.507900][ T3480] RDX: 0000000000000000 RSI: ffffffff812716e1 RDI: 00000000ffffffff [ 50.509820][ T3480] RBP: ffffc90000447a60 R08: 0000000000000001 R09: 0000000000000000 [ 50.511735][ T3480] R10: ffffffff81d4f940 R11: 0000000000000000 R12: ffffffff834a22b0 [ 50.513494][ T3480] R13: ffffffff82c10433 R14: 0000000000000088 R15: ffffffff838a8084 [ 50.515222][ T3480] FS: 00007f3536462700(0000) GS:ffff88803eac0000(0000) knlGS:0000000000000000 [ 50.517135][ T3480] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 50.518583][ T3480] CR2: 0000000000442008 CR3: 000000003b222000 CR4: 00000000000006e0 [ 50.520723][ T3480] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 50.522709][ T3480] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 50.524450][ T3480] Call Trace: [ 50.525214][ T3480] skb_put.cold+0x1b/0x1b [ 50.526171][ T3480] psample_sample_packet+0x1d3/0x340 [ 50.527307][ T3480] tcf_sample_act+0x178/0x250 [ 50.528339][ T3480] tcf_action_exec+0xb1/0x190 [ 50.529354][ T3480] mall_classify+0x67/0x90 [ 50.530332][ T3480] tcf_classify+0x72/0x160 [ 50.531286][ T3480] __dev_queue_xmit+0x3db/0xd50 [ 50.532327][ T3480] dev_queue_xmit+0x18/0x20 [ 50.533299][ T3480] packet_sendmsg+0xee7/0x2090 [ 50.534331][ T3480] sock_sendmsg+0x54/0x70 [ 50.535271][ T3480] __sys_sendto+0x148/0x1f0 [ 50.536252][ T3480] ? tomoyo_file_ioctl+0x23/0x30 [ 50.537334][ T3480] ? ksys_ioctl+0x5e/0xb0 [ 50.540068][ T3480] __x64_sys_sendto+0x2a/0x30 [ 50.542810][ T3480] do_syscall_64+0x73/0x1f0 [ 50.545383][ T3480] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 50.548477][ T3480] RIP: 0033:0x7f35357d6fb3 [ 50.551020][ T3480] Code: 48 8b 0d 18 90 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d f9 d3 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 eb f6 ff ff 48 89 04 24 [ 50.558547][ T3480] RSP: 002b:00007ffe0c7212c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 50.561870][ T3480] RAX: ffffffffffffffda RBX: 0000000001dac010 RCX: 00007f35357d6fb3 [ 50.565142][ T3480] RDX: 0000000000000082 RSI: 0000000001dac2a2 RDI: 0000000000000003 [ 50.568469][ T3480] RBP: 00007ffe0c7212f0 R08: 00007ffe0c7212d0 R09: 0000000000000014 [ 50.571731][ T3480] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000082 [ 50.574961][ T3480] R13: 0000000001dac2a2 R14: 0000000000000001 R15: 0000000000000003 [ 50.578170][ T3480] Modules linked in: sch_ingress virtio_net [ 50.580976][ T3480] ---[ end trace 61a515626a595af6 ]--- CC: Yotam Gigi CC: Jiri Pirko CC: Jamal Hadi Salim CC: Simon Horman CC: Roopa Prabhu Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/psample/psample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/psample/psample.c b/net/psample/psample.c index 4cea353221da..30e8239bd774 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -223,7 +223,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN - NLA_ALIGNTO; - nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC); + nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC); if (unlikely(!nl_skb)) return; From 0e32df103ca66a9efce43c6100bb0f8d973f24b6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 26 Nov 2019 12:55:50 +0100 Subject: [PATCH 1336/3715] openvswitch: fix flow command message size [ Upstream commit 4e81c0b3fa93d07653e2415fa71656b080a112fd ] When user-space sets the OVS_UFID_F_OMIT_* flags, and the relevant flow has no UFID, we can exceed the computed size, as ovs_nla_put_identifier() will always dump an OVS_FLOW_ATTR_KEY attribute. Take the above in account when computing the flow command message size. Fixes: 74ed7ab9264c ("openvswitch: Add support for unique flow IDs.") Reported-by: Qi Jun Ding Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2de2a923ff2b..33b0fc5c33c8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -724,9 +724,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) From f5bcc687e3d699bc4949bf37ef5f77fa50269f8c Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Mon, 25 Nov 2019 14:23:43 +0200 Subject: [PATCH 1337/3715] slip: Fix use-after-free Read in slip_open [ Upstream commit e58c1912418980f57ba2060017583067f5f71e52 ] Slip_open doesn't clean-up device which registration failed from the slip_devs device list. On next open after failure this list is iterated and freed device is accessed. Fix this by calling sl_free_netdev in error path. Here is the trace from the Syzbot: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:634 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:132 sl_sync drivers/net/slip/slip.c:725 [inline] slip_open+0xecd/0x11b7 drivers/net/slip/slip.c:801 tty_ldisc_open.isra.0+0xa3/0x110 drivers/tty/tty_ldisc.c:469 tty_set_ldisc+0x30e/0x6b0 drivers/tty/tty_ldisc.c:596 tiocsetd drivers/tty/tty_io.c:2334 [inline] tty_ioctl+0xe8d/0x14f0 drivers/tty/tty_io.c:2594 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xdb6/0x13e0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 3b5a39979daf ("slip: Fix memory leak in slip_open error path") Reported-by: syzbot+4d5170758f3762109542@syzkaller.appspotmail.com Cc: David Miller Cc: Oliver Hartkopp Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/slip/slip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 2901b7db9d2e..d6dc00b4ba55 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -859,6 +859,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + sl_free_netdev(sl->dev); free_netdev(sl->dev); err_exit: From 2356f0b95fc04f37a028e4f67ef7812aacd2e30c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 1 Dec 2019 18:41:24 +0100 Subject: [PATCH 1338/3715] openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() [ Upstream commit 8ffeb03fbba3b599690b361467bfd2373e8c450f ] All the callers of ovs_flow_cmd_build_info() already deal with error return code correctly, so we can handle the error condition in a more gracefull way. Still dump a warning to preserve debuggability. v1 -> v2: - clarify the commit message - clean the skb and report the error (DaveM) Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 33b0fc5c33c8..002aa96d79ef 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -906,7 +906,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; } From 796c569498e1ce5159f070c142ba1bfebd33cc18 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 1 Dec 2019 18:41:25 +0100 Subject: [PATCH 1339/3715] openvswitch: remove another BUG_ON() [ Upstream commit 8a574f86652a4540a2433946ba826ccb87f398cc ] If we can't build the flow del notification, we can simply delete the flow, no need to crash the kernel. Still keep a WARN_ON to preserve debuggability. Note: the BUG_ON() predates the Fixes tag, but this change can be applied only after the mentioned commit. v1 -> v2: - do not leak an skb on error Fixes: aed067783e50 ("openvswitch: Minimize ovs_flow_cmd_del critical section.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 002aa96d79ef..3248cf04d0b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1372,7 +1372,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + } ovs_notify(&dp_flow_genl_family, reply, info); } else { @@ -1380,6 +1383,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } } +out_free: ovs_flow_free(flow, true); return 0; unlock: From 4d75732b5f0cf60154a78d5710821cb3063e6578 Mon Sep 17 00:00:00 2001 From: John Rutherford Date: Tue, 26 Nov 2019 13:52:55 +1100 Subject: [PATCH 1340/3715] tipc: fix link name length check [ Upstream commit fd567ac20cb0377ff466d3337e6e9ac5d0cb15e4 ] In commit 4f07b80c9733 ("tipc: check msg->req data len in tipc_nl_compat_bearer_disable") the same patch code was copied into routines: tipc_nl_compat_bearer_disable(), tipc_nl_compat_link_stat_dump() and tipc_nl_compat_link_reset_stats(). The two link routine occurrences should have been modified to check the maximum link name length and not bearer name length. Fixes: 4f07b80c9733 ("tipc: check msg->reg data len in tipc_nl_compat_bearer_disable") Signed-off-by: John Rutherford Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index bbd05707c4e0..fa0522cd683e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -539,7 +539,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -821,7 +821,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; From 2d1ff8fb1144b13804bbedb2ec3874c46a9db8ba Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 23 Nov 2019 11:56:49 +0800 Subject: [PATCH 1341/3715] sctp: cache netns in sctp_ep_common [ Upstream commit 312434617cb16be5166316cf9d08ba760b1042a1 ] This patch is to fix a data-race reported by syzbot: BUG: KCSAN: data-race in sctp_assoc_migrate / sctp_hash_obj write to 0xffff8880b67c0020 of 8 bytes by task 18908 on cpu 1: sctp_assoc_migrate+0x1a6/0x290 net/sctp/associola.c:1091 sctp_sock_migrate+0x8aa/0x9b0 net/sctp/socket.c:9465 sctp_accept+0x3c8/0x470 net/sctp/socket.c:4916 inet_accept+0x7f/0x360 net/ipv4/af_inet.c:734 __sys_accept4+0x224/0x430 net/socket.c:1754 __do_sys_accept net/socket.c:1795 [inline] __se_sys_accept net/socket.c:1792 [inline] __x64_sys_accept+0x4e/0x60 net/socket.c:1792 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff8880b67c0020 of 8 bytes by task 12003 on cpu 0: sctp_hash_obj+0x4f/0x2d0 net/sctp/input.c:894 rht_key_get_hash include/linux/rhashtable.h:133 [inline] rht_key_hashfn include/linux/rhashtable.h:159 [inline] rht_head_hashfn include/linux/rhashtable.h:174 [inline] head_hashfn lib/rhashtable.c:41 [inline] rhashtable_rehash_one lib/rhashtable.c:245 [inline] rhashtable_rehash_chain lib/rhashtable.c:276 [inline] rhashtable_rehash_table lib/rhashtable.c:316 [inline] rht_deferred_worker+0x468/0xab0 lib/rhashtable.c:420 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 It was caused by rhashtable access asoc->base.sk when sctp_assoc_migrate is changing its value. However, what rhashtable wants is netns from asoc base.sk, and for an asoc, its netns won't change once set. So we can simply fix it by caching netns since created. Fixes: d6c0256a60e6 ("sctp: add the rhashtable apis for sctp global transport hashtable") Reported-by: syzbot+e3b35fe7918ff0ee474e@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/structs.h | 3 +++ net/sctp/associola.c | 1 + net/sctp/endpointola.c | 1 + net/sctp/input.c | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 94c775773f58..c1f71dd464d3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1181,6 +1181,9 @@ struct sctp_ep_common { /* What socket does this endpoint belong to? */ struct sock *sk; + /* Cache netns and it won't change once set */ + struct net *net; + /* This is where we receive inbound chunks. */ struct sctp_inq inqueue; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 23fec3817e0c..dd1a3bd80be5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -80,6 +80,7 @@ static struct sctp_association *sctp_association_init( /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; asoc->base.sk = (struct sock *)sk; + asoc->base.net = sock_net(sk); sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5d4079ef3de6..c71b4191df1e 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -165,6 +165,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Remember who we are attached to. */ ep->base.sk = sk; + ep->base.net = sock_net(sk); sock_hold(ep->base.sk); return ep; diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..3c0affecf272 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -813,7 +813,7 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, if (!sctp_transport_hold(t)) return err; - if (!net_eq(sock_net(t->asoc->base.sk), x->net)) + if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; @@ -828,7 +828,7 @@ static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; - const struct net *net = sock_net(t->asoc->base.sk); + const struct net *net = t->asoc->base.net; __be16 lport = htons(t->asoc->base.bind_addr.port); __u32 addr; From 3c6e9591255b8726de77c33c916d2c161e9eedda Mon Sep 17 00:00:00 2001 From: Dust Li Date: Thu, 28 Nov 2019 14:29:09 +0800 Subject: [PATCH 1342/3715] net: sched: fix `tc -s class show` no bstats on class with nolock subqueues [ Upstream commit 14e54ab9143fa60794d13ea0a66c792a2046a8f3 ] When a classful qdisc's child qdisc has set the flag TCQ_F_CPUSTATS (pfifo_fast for example), the child qdisc's cpu_bstats should be passed to gnet_stats_copy_basic(), but many classful qdisc didn't do that. As a result, `tc -s class show dev DEV` always return 0 for bytes and packets in this case. Pass the child qdisc's cpu_bstats to gnet_stats_copy_basic() to fix this issue. The qstats also has this problem, but it has been fixed in 5dd431b6b9 ("net: sched: introduce and use qstats read...") and bstats still remains buggy. Fixes: 22e0f8b9322c ("net: sched: make bstats per cpu and estimator RCU safe") Signed-off-by: Dust Li Signed-off-by: Tony Lu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_mq.c | 3 ++- net/sched/sch_mqprio.c | 4 ++-- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3a3e507422b..442ac9c3f16f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -191,7 +191,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, + &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6bcdfe6e7b63..bb8d3fbc13bb 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -366,8 +366,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, + sch->cpu_bstats, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index ff4fc3e0facd..65aa03d46857 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -340,7 +340,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2dd6c68ae91e..c60777351de1 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -298,7 +298,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; From eb3257cba634f9a520467f6c8c56f4abfa347484 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 Nov 2019 21:43:41 -0500 Subject: [PATCH 1343/3715] ext4: add more paranoia checking in ext4_expand_extra_isize handling commit 4ea99936a1630f51fc3a2d61a58ec4a1c4b7d55a upstream. It's possible to specify a non-zero s_want_extra_isize via debugging option, and this can cause bad things(tm) to happen when using a file system with an inode size of 128 bytes. Add better checking when the file system is mounted, as well as when we are actually doing the trying to do the inode expansion. Link: https://lore.kernel.org/r/20191110121510.GH23325@mit.edu Reported-by: syzbot+f8d6f8386ceacdbfff57@syzkaller.appspotmail.com Reported-by: syzbot+33d7ea72e47de3bdf4e1@syzkaller.appspotmail.com Reported-by: syzbot+44b6763edfc17144296f@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 15 +++++++++++++++ fs/ext4/super.c | 21 ++++++++++++--------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8c987a1994d4..b3d5fd84b485 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5751,8 +5751,23 @@ static int __ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); int error; + /* this was checked at iget time, but double check for good measure */ + if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || + (ei->i_extra_isize & 3)) { + EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); + return -EFSCORRUPTED; + } + if ((new_extra_isize < ei->i_extra_isize) || + (new_extra_isize < 4) || + (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) + return -EINVAL; /* Should never happen */ + raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 61d07608577e..1a0a56647974 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3458,12 +3458,15 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + unsigned def_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = 0; + return; + } + if (sbi->s_want_extra_isize < 4) { + sbi->s_want_extra_isize = def_extra_isize; if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) @@ -3476,10 +3479,10 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb) } } /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if ((sbi->s_want_extra_isize > sbi->s_inode_size) || + (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size)) { + sbi->s_want_extra_isize = def_extra_isize; ext4_msg(sb, KERN_INFO, "required extra inode space not available"); } From fdcaab7abf3b4ced5f4dddf4d361351992756147 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Wed, 8 May 2019 14:15:03 +0000 Subject: [PATCH 1344/3715] watchdog: sama5d4: fix WDD value to be always set to max commit 8632944841d41a36d77dd1fa88d4201b5291100f upstream. WDD value must be always set to max (0xFFF) otherwise the hardware block will reset the board on the first ping of the watchdog. Signed-off-by: Eugen Hristev Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sama5d4_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index 0ae947c3d7bc..d8cf2039c6a4 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -111,9 +111,7 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, u32 value = WDT_SEC2TICKS(timeout); wdt->mr &= ~AT91_WDT_WDV; - wdt->mr &= ~AT91_WDT_WDD; wdt->mr |= AT91_WDT_SET_WDV(value); - wdt->mr |= AT91_WDT_SET_WDD(value); /* * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When @@ -255,7 +253,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) timeout = WDT_SEC2TICKS(wdd->timeout); - wdt->mr |= AT91_WDT_SET_WDD(timeout); + wdt->mr |= AT91_WDT_SET_WDD(WDT_SEC2TICKS(MAX_WDT_TIMEOUT)); wdt->mr |= AT91_WDT_SET_WDV(timeout); ret = sama5d4_wdt_init(wdt); From 1e23d6338d76bb24a4a02210db17e805de3b8974 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 27 Jun 2019 11:51:00 +0530 Subject: [PATCH 1345/3715] net: macb: Fix SUBNS increment and increase resolution commit 7ad342bc58cc5197cd2f12a3c30b3949528c6d83 upstream. The subns increment register has 24 bits as follows: RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] Fix the same in the driver and increase sub ns resolution to the best capable, 24 bits. This should be the case on all GEM versions that this PTP driver supports. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.h | 6 +++++- drivers/net/ethernet/cadence/macb_ptp.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c93f3a2dc6c1..4c0bcfd1d250 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -457,7 +457,11 @@ /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 -#define GEM_SUBNSINCR_SIZE 16 +#define GEM_SUBNSINCRL_OFFSET 24 +#define GEM_SUBNSINCRL_SIZE 8 +#define GEM_SUBNSINCRH_OFFSET 0 +#define GEM_SUBNSINCRH_SIZE 16 +#define GEM_SUBNSINCR_SIZE 24 /* Bitfields in TI */ #define GEM_NSINCR_OFFSET 0 diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 678835136bf8..f1f07e9d53f8 100755 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -115,7 +115,10 @@ static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec) * to take effect. */ spin_lock_irqsave(&bp->tsu_clk_lock, flags); - gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns)); + /* RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] */ + gem_writel(bp, TISUBN, GEM_BF(SUBNSINCRL, incr_spec->sub_ns) | + GEM_BF(SUBNSINCRH, (incr_spec->sub_ns >> + GEM_SUBNSINCRL_SIZE))); gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns)); spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); From c19b9c45fc58396b6e443381153911cdf48bbc28 Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Mon, 8 Apr 2019 15:37:54 -0400 Subject: [PATCH 1346/3715] net: macb driver, check for SKBTX_HW_TSTAMP commit a62520473f15750cd1432d36b377a06cd7cff8d2 upstream. Make sure SKBTX_HW_TSTAMP (i.e. SOF_TIMESTAMPING_TX_HARDWARE) has been enabled for this skb. It does fix the issue where normal socks that aren't expecting a timestamp will not wake up on select, but when a user does want a SOF_TIMESTAMPING_TX_HARDWARE it does work. Signed-off-by: Paul Thomas Signed-off-by: David S. Miller Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index bc9ab227d055..5aff1b460151 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -852,7 +852,9 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { - if (gem_ptp_do_txstamp(queue, skb, desc) == 0) { + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP) && + gem_ptp_do_txstamp(queue, skb, desc) == 0) { /* skb now belongs to timestamp buffer * and will be removed later */ From 623771d7948f8550b3c05cb65ea6606a20c39e6c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 1 Apr 2019 16:49:01 +0200 Subject: [PATCH 1347/3715] mtd: rawnand: atmel: Fix spelling mistake in error message commit e39bb786816453788836c367caefd72eceea380c upstream. Wrong copy/paste from the previous block, the error message should refer to #size-cells instead of #address-cells. Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Signed-off-by: Miquel Raynal Reviewed-by: Tudor Ambarus Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 0b93f152d993..d5a493e8ee08 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1888,7 +1888,7 @@ static int atmel_nand_controller_add_nands(struct atmel_nand_controller *nc) ret = of_property_read_u32(np, "#size-cells", &val); if (ret) { - dev_err(dev, "missing #address-cells property\n"); + dev_err(dev, "missing #size-cells property\n"); return ret; } From 77cfe99a0765b45473bbf7af3123bb39018d6005 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 7 Feb 2019 03:50:55 +0000 Subject: [PATCH 1348/3715] mtd: rawnand: atmel: fix possible object reference leak commit a12085d13997ed15f745f33a0e01002541160179 upstream. of_find_device_by_node() takes a reference to the struct device when it finds a match via get_device, there is no need to call get_device() twice. We also should make sure to drop the reference to the device taken by of_find_device_by_node() on driver unbind. Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Signed-off-by: Wen Yang Suggested-by: Boris Brezillon Reviewed-by: Boris Brezillon Reviewed-by: Miquel Raynal Acked-by: Miquel Raynal Cc: Tudor Ambarus Cc: Boris Brezillon Cc: Miquel Raynal Cc: Richard Weinberger Cc: David Woodhouse Cc: Brian Norris Cc: Marek Vasut Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: linux-mtd@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/pmecc.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c index 4124bf91bee6..8cd153974e8d 100644 --- a/drivers/mtd/nand/atmel/pmecc.c +++ b/drivers/mtd/nand/atmel/pmecc.c @@ -875,23 +875,32 @@ static struct atmel_pmecc *atmel_pmecc_get_by_node(struct device *userdev, { struct platform_device *pdev; struct atmel_pmecc *pmecc, **ptr; + int ret; pdev = of_find_device_by_node(np); - if (!pdev || !platform_get_drvdata(pdev)) + if (!pdev) return ERR_PTR(-EPROBE_DEFER); + pmecc = platform_get_drvdata(pdev); + if (!pmecc) { + ret = -EPROBE_DEFER; + goto err_put_device; + } ptr = devres_alloc(devm_atmel_pmecc_put, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - get_device(&pdev->dev); - pmecc = platform_get_drvdata(pdev); + if (!ptr) { + ret = -ENOMEM; + goto err_put_device; + } *ptr = pmecc; devres_add(userdev, ptr); return pmecc; + +err_put_device: + put_device(&pdev->dev); + return ERR_PTR(ret); } static const int atmel_pmecc_strengths[] = { 2, 4, 8, 12, 24, 32 }; From 87b4ed6c62132663689f553ba506bfe5891c0f2e Mon Sep 17 00:00:00 2001 From: "huijin.park" Date: Wed, 28 Nov 2018 03:02:14 -0500 Subject: [PATCH 1349/3715] mtd: spi-nor: cast to u64 to avoid uint overflows commit 84a1c2109d23df3543d96231c4fee1757299bb1a upstream. The "params->size" is defined as "u64". And "info->sector_size" and "info->n_sectors" are defined as unsigned int and u16. Thus, u64 data might have strange data(loss data) if the result overflows an unsigned int. This patch casts "info->sector_size" to an u64. Signed-off-by: huijin.park Reviewed-by: Geert Uytterhoeven Signed-off-by: Boris Brezillon Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 6c013341ef09..d550148177a0 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -2382,7 +2382,7 @@ static int spi_nor_init_params(struct spi_nor *nor, memset(params, 0, sizeof(*params)); /* Set SPI NOR sizes. */ - params->size = info->sector_size * info->n_sectors; + params->size = (u64)info->sector_size * info->n_sectors; params->page_size = info->page_size; /* (Fast) Read settings. */ From 0c08f1da992db758dc3f6a5a56518a7ab68fc1d3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Apr 2018 16:31:07 +0200 Subject: [PATCH 1350/3715] y2038: futex: Move compat implementation into futex.c commit 04e7712f4460585e5eed5b853fd8b82a9943958f upstream. We are going to share the compat_sys_futex() handler between 64-bit architectures and 32-bit architectures that need to deal with both 32-bit and 64-bit time_t, and this is easier if both entry points are in the same file. In fact, most other system call handlers do the same thing these days, so let's follow the trend here and merge all of futex_compat.c into futex.c. In the process, a few minor changes have to be done to make sure everything still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become local symbol, and the compat version of the fetch_robust_entry() function gets renamed to compat_fetch_robust_entry() to avoid a symbol clash. This is intended as a purely cosmetic patch, no behavior should change. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/futex.h | 8 -- kernel/Makefile | 3 - kernel/futex.c | 195 +++++++++++++++++++++++++++++++++++++++- kernel/futex_compat.c | 202 ------------------------------------------ 4 files changed, 192 insertions(+), 216 deletions(-) delete mode 100644 kernel/futex_compat.c diff --git a/include/linux/futex.h b/include/linux/futex.h index c0fb9a24bbd2..d05e753232ba 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -12,9 +12,6 @@ struct task_struct; long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int -handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); - /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. @@ -55,11 +52,6 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int futex_cmpxchg_enabled; -#endif #else static inline void exit_robust_list(struct task_struct *curr) { diff --git a/kernel/Makefile b/kernel/Makefile index 3085141c055c..43e92e3691ec 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -49,9 +49,6 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_FUTEX) += futex.o -ifeq ($(CONFIG_COMPAT),y) -obj-$(CONFIG_FUTEX) += futex_compat.o -endif obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) diff --git a/kernel/futex.c b/kernel/futex.c index afe6a81584c9..2836ba1aa213 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -44,6 +44,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -173,8 +174,10 @@ * double_lock_hb() and double_unlock_hb(), respectively. */ -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG -int __read_mostly futex_cmpxchg_enabled; +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else +static int __read_mostly futex_cmpxchg_enabled; #endif /* @@ -3476,7 +3479,7 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) +static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; int err; @@ -3723,6 +3726,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } +#ifdef CONFIG_COMPAT +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t __user *head, unsigned int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + +static void __user *futex_uaddr(struct robust_list __user *entry, + compat_long_t futex_offset) +{ + compat_uptr_t base = ptr_to_compat(entry); + void __user *uaddr = compat_ptr(base + futex_offset); + + return uaddr; +} + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *next_entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int uninitialized_var(next_pi); + compat_uptr_t uentry, next_uentry, upending; + compat_long_t futex_offset; + int rc; + + if (!futex_cmpxchg_enabled) + return; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + return; + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (compat_fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pip)) + return; + + next_entry = NULL; /* avoid warning with gcc */ + while (entry != (struct robust_list __user *) &head->list) { + /* + * Fetch the next entry in the list before calling + * handle_futex_death: + */ + rc = compat_fetch_robust_entry(&next_uentry, &next_entry, + (compat_uptr_t __user *)&entry->next, &next_pi); + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) { + void __user *uaddr = futex_uaddr(entry, futex_offset); + + if (handle_futex_death(uaddr, curr, pi)) + return; + } + if (rc) + return; + uentry = next_uentry; + entry = next_entry; + pi = next_pi; + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } + if (pending) { + void __user *uaddr = futex_uaddr(pending, futex_offset); + + handle_futex_death(uaddr, curr, pip); + } +} + +COMPAT_SYSCALL_DEFINE2(set_robust_list, + struct compat_robust_list_head __user *, head, + compat_size_t, len) +{ + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + compat_uptr_t __user *, head_ptr, + compat_size_t __user *, len_ptr) +{ + struct compat_robust_list_head __user *head; + unsigned long ret; + struct task_struct *p; + + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + rcu_read_lock(); + + ret = -ESRCH; + if (!pid) + p = current; + else { + p = find_task_by_vpid(pid); + if (!p) + goto err_unlock; + } + + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + rcu_read_unlock(); + + return ret; +} + +COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct compat_timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) +{ + struct timespec ts; + ktime_t t, *tp = NULL; + int val2 = 0; + int cmd = op & FUTEX_CMD_MASK; + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (compat_get_timespec(&ts, utime)) + return -EFAULT; + if (!timespec_valid(&ts)) + return -EINVAL; + + t = timespec_to_ktime(ts); + if (cmd == FUTEX_WAIT) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); +} +#endif /* CONFIG_COMPAT */ + static void __init futex_detect_cmpxchg(void) { #ifndef CONFIG_HAVE_FUTEX_CMPXCHG diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c deleted file mode 100644 index 83f830acbb5f..000000000000 --- a/kernel/futex_compat.c +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/kernel/futex_compat.c - * - * Futex compatibililty routines. - * - * Copyright 2006, Red Hat, Inc., Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include - -#include - - -/* - * Fetch a robust-list pointer. Bit 0 signals PI futexes: - */ -static inline int -fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t __user *head, unsigned int *pi) -{ - if (get_user(*uentry, head)) - return -EFAULT; - - *entry = compat_ptr((*uentry) & ~1); - *pi = (unsigned int)(*uentry) & 1; - - return 0; -} - -static void __user *futex_uaddr(struct robust_list __user *entry, - compat_long_t futex_offset) -{ - compat_uptr_t base = ptr_to_compat(entry); - void __user *uaddr = compat_ptr(base + futex_offset); - - return uaddr; -} - -/* - * Walk curr->robust_list (very carefully, it's a userspace list!) - * and mark any locks found there dead, and notify any waiters. - * - * We silently return on any sign of list-walking problem. - */ -void compat_exit_robust_list(struct task_struct *curr) -{ - struct compat_robust_list_head __user *head = curr->compat_robust_list; - struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int uninitialized_var(next_pi); - compat_uptr_t uentry, next_uentry, upending; - compat_long_t futex_offset; - int rc; - - if (!futex_cmpxchg_enabled) - return; - - /* - * Fetch the list head (which was registered earlier, via - * sys_set_robust_list()): - */ - if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) - return; - /* - * Fetch the relative futex offset: - */ - if (get_user(futex_offset, &head->futex_offset)) - return; - /* - * Fetch any possibly pending lock-add first, and handle it - * if it exists: - */ - if (fetch_robust_entry(&upending, &pending, - &head->list_op_pending, &pip)) - return; - - next_entry = NULL; /* avoid warning with gcc */ - while (entry != (struct robust_list __user *) &head->list) { - /* - * Fetch the next entry in the list before calling - * handle_futex_death: - */ - rc = fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); - /* - * A pending lock might already be on the list, so - * dont process it twice: - */ - if (entry != pending) { - void __user *uaddr = futex_uaddr(entry, futex_offset); - - if (handle_futex_death(uaddr, curr, pi)) - return; - } - if (rc) - return; - uentry = next_uentry; - entry = next_entry; - pi = next_pi; - /* - * Avoid excessively long or circular lists: - */ - if (!--limit) - break; - - cond_resched(); - } - if (pending) { - void __user *uaddr = futex_uaddr(pending, futex_offset); - - handle_futex_death(uaddr, curr, pip); - } -} - -COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, - compat_size_t, len) -{ - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - if (unlikely(len != sizeof(*head))) - return -EINVAL; - - current->compat_robust_list = head; - - return 0; -} - -COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - compat_uptr_t __user *, head_ptr, - compat_size_t __user *, len_ptr) -{ - struct compat_robust_list_head __user *head; - unsigned long ret; - struct task_struct *p; - - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - rcu_read_lock(); - - ret = -ESRCH; - if (!pid) - p = current; - else { - p = find_task_by_vpid(pid); - if (!p) - goto err_unlock; - } - - ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) - goto err_unlock; - - head = p->compat_robust_list; - rcu_read_unlock(); - - if (put_user(sizeof(*head), len_ptr)) - return -EFAULT; - return put_user(ptr_to_compat(head), head_ptr); - -err_unlock: - rcu_read_unlock(); - - return ret; -} - -COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct compat_timespec __user *, utime, u32 __user *, uaddr2, - u32, val3) -{ - struct timespec ts; - ktime_t t, *tp = NULL; - int val2 = 0; - int cmd = op & FUTEX_CMD_MASK; - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { - if (compat_get_timespec(&ts, utime)) - return -EFAULT; - if (!timespec_valid(&ts)) - return -EINVAL; - - t = timespec_to_ktime(ts); - if (cmd == FUTEX_WAIT) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (int) (unsigned long) utime; - - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -} From 3e24098da750991f75819069c79e090dfd029219 Mon Sep 17 00:00:00 2001 From: Yang Tao Date: Wed, 6 Nov 2019 22:55:35 +0100 Subject: [PATCH 1351/3715] futex: Prevent robust futex exit race commit ca16d5bee59807bf04deaab0a8eccecd5061528c upstream. Robust futexes utilize the robust_list mechanism to allow the kernel to release futexes which are held when a task exits. The exit can be voluntary or caused by a signal or fault. This prevents that waiters block forever. The futex operations in user space store a pointer to the futex they are either locking or unlocking in the op_pending member of the per task robust list. After a lock operation has succeeded the futex is queued in the robust list linked list and the op_pending pointer is cleared. After an unlock operation has succeeded the futex is removed from the robust list linked list and the op_pending pointer is cleared. The robust list exit code checks for the pending operation and any futex which is queued in the linked list. It carefully checks whether the futex value is the TID of the exiting task. If so, it sets the OWNER_DIED bit and tries to wake up a potential waiter. This is race free for the lock operation but unlock has two race scenarios where waiters might not be woken up. These issues can be observed with regular robust pthread mutexes. PI aware pthread mutexes are not affected. (1) Unlocking task is killed after unlocking the futex value in user space before being able to wake a waiter. pthread_mutex_unlock() | V atomic_exchange_rel (&mutex->__data.__lock, 0) <------------------------killed lll_futex_wake () | | |(__lock = 0) |(enter kernel) | V do_exit() exit_mm() mm_release() exit_robust_list() handle_futex_death() | |(__lock = 0) |(uval = 0) | V if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) return 0; The sanity check which ensures that the user space futex is owned by the exiting task prevents the wakeup of waiters which in consequence block infinitely. (2) Waiting task is killed after a wakeup and before it can acquire the futex in user space. OWNER WAITER futex_wait() pthread_mutex_unlock() | | | |(__lock = 0) | | | V | futex_wake() ------------> wakeup() | |(return to userspace) |(__lock = 0) | V oldval = mutex->__data.__lock <-----------------killed atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, | id | assume_other_futex_waiters, 0) | | | (enter kernel)| | V do_exit() | | V handle_futex_death() | |(__lock = 0) |(uval = 0) | V if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) return 0; The sanity check which ensures that the user space futex is owned by the exiting task prevents the wakeup of waiters, which seems to be correct as the exiting task does not own the futex value, but the consequence is that other waiters wont be woken up and block infinitely. In both scenarios the following conditions are true: - task->robust_list->list_op_pending != NULL - user space futex value == 0 - Regular futex (not PI) If these conditions are met then it is reasonably safe to wake up a potential waiter in order to prevent the above problems. As this might be a false positive it can cause spurious wakeups, but the waiter side has to handle other types of unrelated wakeups, e.g. signals gracefully anyway. So such a spurious wakeup will not affect the correctness of these operations. This workaround must not touch the user space futex value and cannot set the OWNER_DIED bit because the lock value is 0, i.e. uncontended. Setting OWNER_DIED in this case would result in inconsistent state and subsequently in malfunction of the owner died handling in user space. The rest of the user space state is still consistent as no other task can observe the list_op_pending entry in the exiting tasks robust list. The eventually woken up waiter will observe the uncontended lock value and take it over. [ tglx: Massaged changelog and comment. Made the return explicit and not depend on the subsequent check and added constants to hand into handle_futex_death() instead of plain numbers. Fixed a few coding style issues. ] Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core") Signed-off-by: Yang Tao Signed-off-by: Yi Wang Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1573010582-35297-1-git-send-email-wang.yi59@zte.com.cn Link: https://lkml.kernel.org/r/20191106224555.943191378@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 2836ba1aa213..d0a36cba905d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3475,11 +3475,16 @@ err_unlock: return ret; } +/* Constants for the pending_op argument of handle_futex_death */ +#define HANDLE_DEATH_PENDING true +#define HANDLE_DEATH_LIST false + /* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) +static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, + bool pi, bool pending_op) { u32 uval, uninitialized_var(nval), mval; int err; @@ -3492,6 +3497,42 @@ retry: if (get_user(uval, uaddr)) return -1; + /* + * Special case for regular (non PI) futexes. The unlock path in + * user space has two race scenarios: + * + * 1. The unlock path releases the user space futex value and + * before it can execute the futex() syscall to wake up + * waiters it is killed. + * + * 2. A woken up waiter is killed before it can acquire the + * futex in user space. + * + * In both cases the TID validation below prevents a wakeup of + * potential waiters which can cause these waiters to block + * forever. + * + * In both cases the following conditions are met: + * + * 1) task->robust_list->list_op_pending != NULL + * @pending_op == true + * 2) User space futex value == 0 + * 3) Regular futex: @pi == false + * + * If these conditions are met, it is safe to attempt waking up a + * potential waiter without touching the user space futex value and + * trying to set the OWNER_DIED bit. The user space futex value is + * uncontended and the rest of the user space mutex state is + * consistent, so a woken waiter will just take over the + * uncontended futex. Setting the OWNER_DIED bit would create + * inconsistent state and malfunction of the user space owner died + * handling. + */ + if (pending_op && !pi && !uval) { + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; + } + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) return 0; @@ -3611,10 +3652,11 @@ void exit_robust_list(struct task_struct *curr) * A pending lock might already be on the list, so * don't process it twice: */ - if (entry != pending) + if (entry != pending) { if (handle_futex_death((void __user *)entry + futex_offset, - curr, pi)) + curr, pi, HANDLE_DEATH_LIST)) return; + } if (rc) return; entry = next_entry; @@ -3628,9 +3670,10 @@ void exit_robust_list(struct task_struct *curr) cond_resched(); } - if (pending) + if (pending) { handle_futex_death((void __user *)pending + futex_offset, - curr, pip); + curr, pip, HANDLE_DEATH_PENDING); + } } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, @@ -3805,7 +3848,8 @@ void compat_exit_robust_list(struct task_struct *curr) if (entry != pending) { void __user *uaddr = futex_uaddr(entry, futex_offset); - if (handle_futex_death(uaddr, curr, pi)) + if (handle_futex_death(uaddr, curr, pi, + HANDLE_DEATH_LIST)) return; } if (rc) @@ -3824,7 +3868,7 @@ void compat_exit_robust_list(struct task_struct *curr) if (pending) { void __user *uaddr = futex_uaddr(pending, futex_offset); - handle_futex_death(uaddr, curr, pip); + handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); } } From 2f6c5ebbbbc9d9193831e77e614c61ad03c77925 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:36 +0100 Subject: [PATCH 1352/3715] futex: Move futex exit handling into futex code commit ba31c1a48538992316cc71ce94fa9cd3e7b427c0 upstream. The futex exit handling is #ifdeffed into mm_release() which is not pretty to begin with. But upcoming changes to address futex exit races need to add more functionality to this exit code. Split it out into a function, move it into futex code and make the various futex exit functions static. Preparatory only and no functional change. Folded build fix from Borislav. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/compat.h | 2 -- include/linux/futex.h | 27 +++++++++++++++++---------- kernel/fork.c | 25 +++---------------------- kernel/futex.c | 33 +++++++++++++++++++++++++++++---- 4 files changed, 49 insertions(+), 38 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 23909d12f729..cec96d4794d0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -324,8 +324,6 @@ struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; -extern void compat_exit_robust_list(struct task_struct *curr); - asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); diff --git a/include/linux/futex.h b/include/linux/futex.h index d05e753232ba..bc4138b87035 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include #include + #include struct inode; @@ -51,19 +53,24 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX -extern void exit_robust_list(struct task_struct *curr); -#else -static inline void exit_robust_list(struct task_struct *curr) -{ -} -#endif -#ifdef CONFIG_FUTEX_PI -extern void exit_pi_state_list(struct task_struct *curr); -#else -static inline void exit_pi_state_list(struct task_struct *curr) +static inline void futex_init_task(struct task_struct *tsk) { + tsk->robust_list = NULL; +#ifdef CONFIG_COMPAT + tsk->compat_robust_list = NULL; +#endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; } + +void futex_mm_release(struct task_struct *tsk); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +#else +static inline void futex_init_task(struct task_struct *tsk) { } +static inline void futex_mm_release(struct task_struct *tsk) { } #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index 3d9d6a28e21d..1dc6b9ecc33a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1135,20 +1135,7 @@ static int wait_for_vfork_done(struct task_struct *child, void mm_release(struct task_struct *tsk, struct mm_struct *mm) { /* Get rid of any futexes when releasing the mm */ -#ifdef CONFIG_FUTEX - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); - tsk->robust_list = NULL; - } -#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; - } -#endif - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); -#endif + futex_mm_release(tsk); uprobe_free_utask(tsk); @@ -1796,14 +1783,8 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_BLOCK p->plug = NULL; #endif -#ifdef CONFIG_FUTEX - p->robust_list = NULL; -#ifdef CONFIG_COMPAT - p->compat_robust_list = NULL; -#endif - INIT_LIST_HEAD(&p->pi_state_list); - p->pi_state_cache = NULL; -#endif + futex_init_task(p); + /* * sigaltstack should be cleared when sharing the same VM */ diff --git a/kernel/futex.c b/kernel/futex.c index d0a36cba905d..75edb19fa3a2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared) } #endif /* CONFIG_FAIL_FUTEX */ +#ifdef CONFIG_COMPAT +static void compat_exit_robust_list(struct task_struct *curr); +#else +static inline void compat_exit_robust_list(struct task_struct *curr) { } +#endif + static inline void futex_get_mm(union futex_key *key) { mmgrab(key->private.mm); @@ -890,7 +896,7 @@ static struct task_struct *futex_find_get_task(pid_t pid) * Kernel cleans up PI-state, but userspace is likely hosed. * (Robust-futex cleanup is separate and might save the day for userspace.) */ -void exit_pi_state_list(struct task_struct *curr) +static void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; @@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_struct *curr) } raw_spin_unlock_irq(&curr->pi_lock); } - +#else +static inline void exit_pi_state_list(struct task_struct *curr) { } #endif /* @@ -3611,7 +3618,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -void exit_robust_list(struct task_struct *curr) +static void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; @@ -3676,6 +3683,24 @@ void exit_robust_list(struct task_struct *curr) } } +void futex_mm_release(struct task_struct *tsk) +{ + if (unlikely(tsk->robust_list)) { + exit_robust_list(tsk); + tsk->robust_list = NULL; + } + +#ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) { + compat_exit_robust_list(tsk); + tsk->compat_robust_list = NULL; + } +#endif + + if (unlikely(!list_empty(&tsk->pi_state_list))) + exit_pi_state_list(tsk); +} + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { @@ -3801,7 +3826,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry, * * We silently return on any sign of list-walking problem. */ -void compat_exit_robust_list(struct task_struct *curr) +static void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; From c35d0ebed357f47ede0afef03699db29f14032c1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:37 +0100 Subject: [PATCH 1353/3715] futex: Replace PF_EXITPIDONE with a state commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream. The futex exit handling relies on PF_ flags. That's suboptimal as it requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in the middle of do_exit() to enforce the observability of PF_EXITING in the futex code. Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic over to the new state. The PF_EXITING dependency will be cleaned up in a later step. This prepares for handling various futex exit issues later. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/futex.h | 33 +++++++++++++++++++++++++++++++++ include/linux/sched.h | 2 +- kernel/exit.c | 18 ++---------------- kernel/futex.c | 25 +++++++++++++------------ 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index bc4138b87035..35703ec3a255 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -53,6 +53,10 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX +enum { + FUTEX_STATE_OK, + FUTEX_STATE_DEAD, +}; static inline void futex_init_task(struct task_struct *tsk) { @@ -62,6 +66,34 @@ static inline void futex_init_task(struct task_struct *tsk) #endif INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; +} + +/** + * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD + * @tsk: task to set the state on + * + * Set the futex exit state of the task lockless. The futex waiter code + * observes that state when a task is exiting and loops until the task has + * actually finished the futex cleanup. The worst case for this is that the + * waiter runs through the wait loop until the state becomes visible. + * + * This has two callers: + * + * - futex_mm_release() after the futex exit cleanup has been done + * + * - do_exit() from the recursive fault handling path. + * + * In case of a recursive fault this is best effort. Either the futex exit + * code has run already or not. If the OWNER_DIED bit has been set on the + * futex then the waiter can take it over. If not, the problem is pushed + * back to user space. If the futex exit code did not run yet, then an + * already queued waiter might block forever, but there is nothing which + * can be done about that. + */ +static inline void futex_exit_done(struct task_struct *tsk) +{ + tsk->futex_state = FUTEX_STATE_DEAD; } void futex_mm_release(struct task_struct *tsk); @@ -71,6 +103,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, #else static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_mm_release(struct task_struct *tsk) { } +static inline void futex_exit_done(struct task_struct *tsk) { } #endif #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 866439c361a9..6a3e0053791a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -959,6 +959,7 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; @@ -1334,7 +1335,6 @@ extern struct pid *cad_pid; */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ diff --git a/kernel/exit.c b/kernel/exit.c index 15437cfdcd70..c5435a2d37c3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -803,16 +803,7 @@ void __noreturn do_exit(long code) */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); - /* - * We can do this unlocked here. The futex code uses - * this flag just to verify whether the pi state - * cleanup has been done or not. In the worst case it - * loops once more. We pretend that the cleanup was - * done as there is no way to return. Either the - * OWNER_DIED bit is set by now or we push the blocked - * task into the wait for ever nirwana as well. - */ - tsk->flags |= PF_EXITPIDONE; + futex_exit_done(tsk); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } @@ -902,12 +893,7 @@ void __noreturn do_exit(long code) * Make sure we are holding no locks: */ debug_check_no_locks_held(); - /* - * We can do this unlocked here. The futex code uses this flag - * just to verify whether the pi state cleanup has been done - * or not. In the worst case it loops once more. - */ - tsk->flags |= PF_EXITPIDONE; + futex_exit_done(tsk); if (tsk->io_context) exit_io_context(tsk); diff --git a/kernel/futex.c b/kernel/futex.c index 75edb19fa3a2..6d576c421abb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1182,9 +1182,10 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, u32 uval2; /* - * If PF_EXITPIDONE is not yet set, then try again. + * If the futex exit state is not yet FUTEX_STATE_DEAD, wait + * for it to finish. */ - if (tsk && !(tsk->flags & PF_EXITPIDONE)) + if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) return -EAGAIN; /* @@ -1203,8 +1204,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * *uaddr = 0xC0000000; tsk = get_task(PID); * } if (!tsk->flags & PF_EXITING) { * ... attach(); - * tsk->flags |= PF_EXITPIDONE; } else { - * if (!(tsk->flags & PF_EXITPIDONE)) + * tsk->futex_state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex_state != + * FUTEX_STATE_DEAD) * return -EAGAIN; * return -ESRCH; <--- FAIL * } @@ -1260,17 +1262,16 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, } /* - * We need to look at the task state flags to figure out, - * whether the task is exiting. To protect against the do_exit - * change of the task flags, we do this protected by - * p->pi_lock: + * We need to look at the task state to figure out, whether the + * task is exiting. To protect against the change of the task state + * in futex_exit_release(), we do this protected by p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); - if (unlikely(p->flags & PF_EXITING)) { + if (unlikely(p->futex_state != FUTEX_STATE_OK)) { /* - * The task is on the way out. When PF_EXITPIDONE is - * set, we know that the task has finished the - * cleanup: + * The task is on the way out. When the futex state is + * FUTEX_STATE_DEAD, we know that the task has finished + * the cleanup: */ int ret = handle_exit_race(uaddr, uval, p); From 7d79d1c681ac4f4e0702ceb346150db4b3bb87c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:38 +0100 Subject: [PATCH 1354/3715] exit/exec: Seperate mm_release() commit 4610ba7ad877fafc0a25a30c6c82015304120426 upstream. mm_release() contains the futex exit handling. mm_release() is called from do_exit()->exit_mm() and from exec()->exec_mm(). In the exit_mm() case PF_EXITING and the futex state is updated. In the exec_mm() case these states are not touched. As the futex exit code needs further protections against exit races, this needs to be split into two functions. Preparatory only, no functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/sched/mm.h | 6 ++++-- kernel/exit.c | 2 +- kernel/fork.c | 12 +++++++++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 4623fc3ac86b..7def97f6aac2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1007,7 +1007,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; - mm_release(tsk, old_mm); + exec_mm_release(tsk, old_mm); if (old_mm) { sync_mm_rss(old_mm); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9f7cc1d7ec4a..efb9e12e7f91 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -125,8 +125,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exit() */ +extern void exit_mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exec() */ +extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); diff --git a/kernel/exit.c b/kernel/exit.c index c5435a2d37c3..844678c57756 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -497,7 +497,7 @@ static void exit_mm(void) struct mm_struct *mm = current->mm; struct core_state *core_state; - mm_release(current, mm); + exit_mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); diff --git a/kernel/fork.c b/kernel/fork.c index 1dc6b9ecc33a..768a57aaf00d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1132,7 +1132,7 @@ static int wait_for_vfork_done(struct task_struct *child, * restoring the old one. . . * Eric Biederman 10 January 1998 */ -void mm_release(struct task_struct *tsk, struct mm_struct *mm) +static void mm_release(struct task_struct *tsk, struct mm_struct *mm) { /* Get rid of any futexes when releasing the mm */ futex_mm_release(tsk); @@ -1169,6 +1169,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) complete_vfork_done(tsk); } +void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + mm_release(tsk, mm); +} + +void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + mm_release(tsk, mm); +} + /* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. From a6dc90f43fc4595db805e980c7ddf45f7b86afd8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:39 +0100 Subject: [PATCH 1355/3715] futex: Split futex_mm_release() for exit/exec commit 150d71584b12809144b8145b817e83b81158ae5f upstream. To allow separate handling of the futex exit state in the futex exit code for exit and exec, split futex_mm_release() into two functions and invoke them from the corresponding exit/exec_mm_release() callsites. Preparatory only, no functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/futex.h | 6 ++++-- kernel/fork.c | 5 ++--- kernel/futex.c | 7 ++++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index 35703ec3a255..fb698e25f210 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -96,14 +96,16 @@ static inline void futex_exit_done(struct task_struct *tsk) tsk->futex_state = FUTEX_STATE_DEAD; } -void futex_mm_release(struct task_struct *tsk); +void futex_exit_release(struct task_struct *tsk); +void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); #else static inline void futex_init_task(struct task_struct *tsk) { } -static inline void futex_mm_release(struct task_struct *tsk) { } static inline void futex_exit_done(struct task_struct *tsk) { } +static inline void futex_exit_release(struct task_struct *tsk) { } +static inline void futex_exec_release(struct task_struct *tsk) { } #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index 768a57aaf00d..0a328cf0cb13 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1134,9 +1134,6 @@ static int wait_for_vfork_done(struct task_struct *child, */ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - /* Get rid of any futexes when releasing the mm */ - futex_mm_release(tsk); - uprobe_free_utask(tsk); /* Get rid of any cached register state */ @@ -1171,11 +1168,13 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) { + futex_exit_release(tsk); mm_release(tsk, mm); } void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) { + futex_exec_release(tsk); mm_release(tsk, mm); } diff --git a/kernel/futex.c b/kernel/futex.c index 6d576c421abb..db4cd0ed43c1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3684,7 +3684,7 @@ static void exit_robust_list(struct task_struct *curr) } } -void futex_mm_release(struct task_struct *tsk) +void futex_exec_release(struct task_struct *tsk) { if (unlikely(tsk->robust_list)) { exit_robust_list(tsk); @@ -3702,6 +3702,11 @@ void futex_mm_release(struct task_struct *tsk) exit_pi_state_list(tsk); } +void futex_exit_release(struct task_struct *tsk) +{ + futex_exec_release(tsk); +} + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { From 32676552cfea1aa1d96b23000c8d9af735cd064f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:40 +0100 Subject: [PATCH 1356/3715] futex: Set task::futex_state to DEAD right after handling futex exit commit f24f22435dcc11389acc87e5586239c1819d217c upstream. Setting task::futex_state in do_exit() is rather arbitrarily placed for no reason. Move it into the futex code. Note, this is only done for the exit cleanup as the exec cleanup cannot set the state to FUTEX_STATE_DEAD because the task struct is still in active use. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 1 - kernel/futex.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index 844678c57756..915514ceca0c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -893,7 +893,6 @@ void __noreturn do_exit(long code) * Make sure we are holding no locks: */ debug_check_no_locks_held(); - futex_exit_done(tsk); if (tsk->io_context) exit_io_context(tsk); diff --git a/kernel/futex.c b/kernel/futex.c index db4cd0ed43c1..53de6574a134 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3705,6 +3705,7 @@ void futex_exec_release(struct task_struct *tsk) void futex_exit_release(struct task_struct *tsk) { futex_exec_release(tsk); + futex_exit_done(tsk); } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, From 1be36de0acdc5d58d8e2510b8b35c6d97c881451 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:41 +0100 Subject: [PATCH 1357/3715] futex: Mark the begin of futex exit explicitly commit 18f694385c4fd77a09851fd301236746ca83f3cb upstream. Instead of relying on PF_EXITING use an explicit state for the futex exit and set it in the futex exit function. This moves the smp barrier and the lock/unlock serialization into the futex code. As with the DEAD state this is restricted to the exit path as exec continues to use the same task struct. This allows to simplify that logic in a next step. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/futex.h | 31 +++---------------------------- kernel/exit.c | 13 +------------ kernel/futex.c | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index fb698e25f210..ff143f766b46 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -55,6 +55,7 @@ union futex_key { #ifdef CONFIG_FUTEX enum { FUTEX_STATE_OK, + FUTEX_STATE_EXITING, FUTEX_STATE_DEAD, }; @@ -69,33 +70,7 @@ static inline void futex_init_task(struct task_struct *tsk) tsk->futex_state = FUTEX_STATE_OK; } -/** - * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD - * @tsk: task to set the state on - * - * Set the futex exit state of the task lockless. The futex waiter code - * observes that state when a task is exiting and loops until the task has - * actually finished the futex cleanup. The worst case for this is that the - * waiter runs through the wait loop until the state becomes visible. - * - * This has two callers: - * - * - futex_mm_release() after the futex exit cleanup has been done - * - * - do_exit() from the recursive fault handling path. - * - * In case of a recursive fault this is best effort. Either the futex exit - * code has run already or not. If the OWNER_DIED bit has been set on the - * futex then the waiter can take it over. If not, the problem is pushed - * back to user space. If the futex exit code did not run yet, then an - * already queued waiter might block forever, but there is nothing which - * can be done about that. - */ -static inline void futex_exit_done(struct task_struct *tsk) -{ - tsk->futex_state = FUTEX_STATE_DEAD; -} - +void futex_exit_recursive(struct task_struct *tsk); void futex_exit_release(struct task_struct *tsk); void futex_exec_release(struct task_struct *tsk); @@ -103,7 +78,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); #else static inline void futex_init_task(struct task_struct *tsk) { } -static inline void futex_exit_done(struct task_struct *tsk) { } +static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } static inline void futex_exec_release(struct task_struct *tsk) { } #endif diff --git a/kernel/exit.c b/kernel/exit.c index 915514ceca0c..57cb0eb1271c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -803,23 +803,12 @@ void __noreturn do_exit(long code) */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); - futex_exit_done(tsk); + futex_exit_recursive(tsk); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } exit_signals(tsk); /* sets PF_EXITING */ - /* - * Ensure that all new tsk->pi_lock acquisitions must observe - * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). - */ - smp_mb(); - /* - * Ensure that we must observe the pi_state in exit_mm() -> - * mm_release() -> exit_pi_state_list(). - */ - raw_spin_lock_irq(&tsk->pi_lock); - raw_spin_unlock_irq(&tsk->pi_lock); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", diff --git a/kernel/futex.c b/kernel/futex.c index 53de6574a134..4960e327c375 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3702,10 +3702,45 @@ void futex_exec_release(struct task_struct *tsk) exit_pi_state_list(tsk); } +/** + * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD + * @tsk: task to set the state on + * + * Set the futex exit state of the task lockless. The futex waiter code + * observes that state when a task is exiting and loops until the task has + * actually finished the futex cleanup. The worst case for this is that the + * waiter runs through the wait loop until the state becomes visible. + * + * This is called from the recursive fault handling path in do_exit(). + * + * This is best effort. Either the futex exit code has run already or + * not. If the OWNER_DIED bit has been set on the futex then the waiter can + * take it over. If not, the problem is pushed back to user space. If the + * futex exit code did not run yet, then an already queued waiter might + * block forever, but there is nothing which can be done about that. + */ +void futex_exit_recursive(struct task_struct *tsk) +{ + tsk->futex_state = FUTEX_STATE_DEAD; +} + void futex_exit_release(struct task_struct *tsk) { + tsk->futex_state = FUTEX_STATE_EXITING; + /* + * Ensure that all new tsk->pi_lock acquisitions must observe + * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). + */ + smp_mb(); + /* + * Ensure that we must observe the pi_state in exit_pi_state_list(). + */ + raw_spin_lock_irq(&tsk->pi_lock); + raw_spin_unlock_irq(&tsk->pi_lock); + futex_exec_release(tsk); - futex_exit_done(tsk); + + tsk->futex_state = FUTEX_STATE_DEAD; } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, From 0633e316e2a8bbea17949ae85c9cebf3a67eeaa1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:42 +0100 Subject: [PATCH 1358/3715] futex: Sanitize exit state handling commit 4a8e991b91aca9e20705d434677ac013974e0e30 upstream. Instead of having a smp_mb() and an empty lock/unlock of task::pi_lock move the state setting into to the lock section. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.645603214@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 4960e327c375..fac0c1747396 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3726,16 +3726,19 @@ void futex_exit_recursive(struct task_struct *tsk) void futex_exit_release(struct task_struct *tsk) { - tsk->futex_state = FUTEX_STATE_EXITING; /* - * Ensure that all new tsk->pi_lock acquisitions must observe - * FUTEX_STATE_EXITING. Serializes against attach_to_pi_owner(). - */ - smp_mb(); - /* - * Ensure that we must observe the pi_state in exit_pi_state_list(). + * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. + * + * This ensures that all subsequent checks of tsk->futex_state in + * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with + * tsk->pi_lock held. + * + * It guarantees also that a pi_state which was queued right before + * the state change under tsk->pi_lock by a concurrent waiter must + * be observed in exit_pi_state_list(). */ raw_spin_lock_irq(&tsk->pi_lock); + tsk->futex_state = FUTEX_STATE_EXITING; raw_spin_unlock_irq(&tsk->pi_lock); futex_exec_release(tsk); From 7d143b66d4677bacb9ae20490c8ce725ccbd83fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:43 +0100 Subject: [PATCH 1359/3715] futex: Provide state handling for exec() as well commit af8cbda2cfcaa5515d61ec500498d46e9a8247e2 upstream. exec() attempts to handle potentially held futexes gracefully by running the futex exit handling code like exit() does. The current implementation has no protection against concurrent incoming waiters. The reason is that the futex state cannot be set to FUTEX_STATE_DEAD after the cleanup because the task struct is still active and just about to execute the new binary. While its arguably buggy when a task holds a futex over exec(), for consistency sake the state handling can at least cover the actual futex exit cleanup section. This provides state consistency protection accross the cleanup. As the futex state of the task becomes FUTEX_STATE_OK after the cleanup has been finished, this cannot prevent subsequent attempts to attach to the task in case that the cleanup was not successfull in mopping up all leftovers. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.753355618@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index fac0c1747396..7eeb56d61f48 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3684,7 +3684,7 @@ static void exit_robust_list(struct task_struct *curr) } } -void futex_exec_release(struct task_struct *tsk) +static void futex_cleanup(struct task_struct *tsk) { if (unlikely(tsk->robust_list)) { exit_robust_list(tsk); @@ -3724,7 +3724,7 @@ void futex_exit_recursive(struct task_struct *tsk) tsk->futex_state = FUTEX_STATE_DEAD; } -void futex_exit_release(struct task_struct *tsk) +static void futex_cleanup_begin(struct task_struct *tsk) { /* * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. @@ -3740,10 +3740,40 @@ void futex_exit_release(struct task_struct *tsk) raw_spin_lock_irq(&tsk->pi_lock); tsk->futex_state = FUTEX_STATE_EXITING; raw_spin_unlock_irq(&tsk->pi_lock); +} - futex_exec_release(tsk); +static void futex_cleanup_end(struct task_struct *tsk, int state) +{ + /* + * Lockless store. The only side effect is that an observer might + * take another loop until it becomes visible. + */ + tsk->futex_state = state; +} - tsk->futex_state = FUTEX_STATE_DEAD; +void futex_exec_release(struct task_struct *tsk) +{ + /* + * The state handling is done for consistency, but in the case of + * exec() there is no way to prevent futher damage as the PID stays + * the same. But for the unlikely and arguably buggy case that a + * futex is held on exec(), this provides at least as much state + * consistency protection which is possible. + */ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + /* + * Reset the state to FUTEX_STATE_OK. The task is alive and about + * exec a new binary. + */ + futex_cleanup_end(tsk, FUTEX_STATE_OK); +} + +void futex_exit_release(struct task_struct *tsk) +{ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, From ac7e59a0c1ae35a252b2e9946547b82be90cec36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:44 +0100 Subject: [PATCH 1360/3715] futex: Add mutex around futex exit commit 3f186d974826847a07bc7964d79ec4eded475ad9 upstream. The mutex will be used in subsequent changes to replace the busy looping of a waiter when the futex owner is currently executing the exit cleanup to prevent a potential live lock. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/futex.h | 1 + include/linux/sched.h | 1 + kernel/futex.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) diff --git a/include/linux/futex.h b/include/linux/futex.h index ff143f766b46..a4b6cba699bf 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -68,6 +68,7 @@ static inline void futex_init_task(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; tsk->futex_state = FUTEX_STATE_OK; + mutex_init(&tsk->futex_exit_mutex); } void futex_exit_recursive(struct task_struct *tsk); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6a3e0053791a..b06577652643 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -959,6 +959,7 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct mutex futex_exit_mutex; unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/futex.c b/kernel/futex.c index 7eeb56d61f48..e6a68380f4de 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3721,11 +3721,22 @@ static void futex_cleanup(struct task_struct *tsk) */ void futex_exit_recursive(struct task_struct *tsk) { + /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ + if (tsk->futex_state == FUTEX_STATE_EXITING) + mutex_unlock(&tsk->futex_exit_mutex); tsk->futex_state = FUTEX_STATE_DEAD; } static void futex_cleanup_begin(struct task_struct *tsk) { + /* + * Prevent various race issues against a concurrent incoming waiter + * including live locks by forcing the waiter to block on + * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in + * attach_to_pi_owner(). + */ + mutex_lock(&tsk->futex_exit_mutex); + /* * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. * @@ -3749,6 +3760,11 @@ static void futex_cleanup_end(struct task_struct *tsk, int state) * take another loop until it becomes visible. */ tsk->futex_state = state; + /* + * Drop the exit protection. This unblocks waiters which observed + * FUTEX_STATE_EXITING to reevaluate the state. + */ + mutex_unlock(&tsk->futex_exit_mutex); } void futex_exec_release(struct task_struct *tsk) From e6e00df182908f34360c3c9f2d13cc719362e9c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:45 +0100 Subject: [PATCH 1361/3715] futex: Provide distinct return value when owner is exiting commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream. attach_to_pi_owner() returns -EAGAIN for various cases: - Owner task is exiting - Futex value has changed The caller drops the held locks (hash bucket, mmap_sem) and retries the operation. In case of the owner task exiting this can result in a live lock. As a preparatory step for seperating those cases, provide a distinct return value (EBUSY) for the owner exiting case. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.935606117@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index e6a68380f4de..660d6e259dca 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1182,11 +1182,11 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, u32 uval2; /* - * If the futex exit state is not yet FUTEX_STATE_DEAD, wait - * for it to finish. + * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the + * caller that the alleged owner is busy. */ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) - return -EAGAIN; + return -EBUSY; /* * Reread the user space value to handle the following situation: @@ -2093,12 +2093,13 @@ retry_private: if (!ret) goto retry; goto out; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Owner is exiting and we just wait for the + * - EBUSY: Owner is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); @@ -2859,12 +2860,13 @@ retry_private: goto out_unlock_put_key; case -EFAULT: goto uaddr_faulted; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Task is exiting and we just wait for the + * - EBUSY: Task is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ queue_unlock(hb); put_futex_key(&q.key); From 61fa9f167caaa73d0a7c88f498eceeb12c6fa3db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:46 +0100 Subject: [PATCH 1362/3715] futex: Prevent exit livelock commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream. Oleg provided the following test case: int main(void) { struct sched_param sp = {}; sp.sched_priority = 2; assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); int lock = vfork(); if (!lock) { sp.sched_priority = 1; assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0); _exit(0); } syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0); return 0; } This creates an unkillable RT process spinning in futex_lock_pi() on a UP machine or if the process is affine to a single CPU. The reason is: parent child set FIFO prio 2 vfork() -> set FIFO prio 1 implies wait_for_child() sched_setscheduler(...) exit() do_exit() .... mm_release() tsk->futex_state = FUTEX_STATE_EXITING; exit_futex(); (NOOP in this case) complete() --> wakes parent sys_futex() loop infinite because tsk->futex_state == FUTEX_STATE_EXITING The same problem can happen just by regular preemption as well: task holds futex ... do_exit() tsk->futex_state = FUTEX_STATE_EXITING; --> preemption (unrelated wakeup of some other higher prio task, e.g. timer) switch_to(other_task) return to user sys_futex() loop infinite as above Just for the fun of it the futex exit cleanup could trigger the wakeup itself before the task sets its futex state to DEAD. To cure this, the handling of the exiting owner is changed so: - A refcount is held on the task - The task pointer is stored in a caller visible location - The caller drops all locks (hash bucket, mmap_sem) and blocks on task::futex_exit_mutex. When the mutex is acquired then the exiting task has completed the cleanup and the state is consistent and can be reevaluated. This is not a pretty solution, but there is no choice other than returning an error code to user space, which would break the state consistency guarantee and open another can of problems including regressions. For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538 are required as well, but for anything older than 5.3.y the backports are going to be provided when this hits mainline as the other dependencies for those kernels are definitely not stable material. Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems") Reported-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Stable Team Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 15 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 660d6e259dca..f5aae14c247b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1176,6 +1176,36 @@ out_error: return ret; } +/** + * wait_for_owner_exiting - Block until the owner has exited + * @exiting: Pointer to the exiting task + * + * Caller must hold a refcount on @exiting. + */ +static void wait_for_owner_exiting(int ret, struct task_struct *exiting) +{ + if (ret != -EBUSY) { + WARN_ON_ONCE(exiting); + return; + } + + if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) + return; + + mutex_lock(&exiting->futex_exit_mutex); + /* + * No point in doing state checking here. If the waiter got here + * while the task was in exec()->exec_futex_release() then it can + * have any FUTEX_STATE_* value when the waiter has acquired the + * mutex. OK, if running, EXITING or DEAD if it reached exit() + * already. Highly unlikely and not a problem. Just one more round + * through the futex maze. + */ + mutex_unlock(&exiting->futex_exit_mutex); + + put_task_struct(exiting); +} + static int handle_exit_race(u32 __user *uaddr, u32 uval, struct task_struct *tsk) { @@ -1237,7 +1267,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * it after doing proper sanity checks. */ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, - struct futex_pi_state **ps) + struct futex_pi_state **ps, + struct task_struct **exiting) { pid_t pid = uval & FUTEX_TID_MASK; struct futex_pi_state *pi_state; @@ -1276,7 +1307,19 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, int ret = handle_exit_race(uaddr, uval, p); raw_spin_unlock_irq(&p->pi_lock); - put_task_struct(p); + /* + * If the owner task is between FUTEX_STATE_EXITING and + * FUTEX_STATE_DEAD then store the task pointer and keep + * the reference on the task struct. The calling code will + * drop all locks, wait for the task to reach + * FUTEX_STATE_DEAD and then drop the refcount. This is + * required to prevent a live lock when the current task + * preempted the exiting task between the two states. + */ + if (ret == -EBUSY) + *exiting = p; + else + put_task_struct(p); return ret; } @@ -1315,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, static int lookup_pi_state(u32 __user *uaddr, u32 uval, struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) + union futex_key *key, struct futex_pi_state **ps, + struct task_struct **exiting) { struct futex_q *top_waiter = futex_top_waiter(hb, key); @@ -1330,7 +1374,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, * We are the first waiter - try to look up the owner based on * @uval and attach to it. */ - return attach_to_pi_owner(uaddr, uval, key, ps); + return attach_to_pi_owner(uaddr, uval, key, ps, exiting); } static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) @@ -1358,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) * lookup * @task: the task to perform the atomic lock work for. This will * be "current" except in the case of requeue pi. + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Return: @@ -1366,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) * - <0 - error * * The hb->lock and futex_key refs shall be held by the caller. + * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. */ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, - struct task_struct *task, int set_waiters) + struct task_struct *task, + struct task_struct **exiting, + int set_waiters) { u32 uval, newval, vpid = task_pid_vnr(task); struct futex_q *top_waiter; @@ -1440,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uaddr, newval, key, ps); + return attach_to_pi_owner(uaddr, newval, key, ps, exiting); } /** @@ -1859,6 +1911,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * @key1: the from futex key * @key2: the to futex key * @ps: address to store the pi_state pointer + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Try and get the lock on behalf of the top waiter if we can do it atomically. @@ -1866,16 +1920,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * hb1 and hb2 must be held by the caller. * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. + * * Return: * - 0 - failed to acquire the lock atomically; * - >0 - acquired the lock, return value is vpid of the top_waiter * - <0 - error */ -static int futex_proxy_trylock_atomic(u32 __user *pifutex, - struct futex_hash_bucket *hb1, - struct futex_hash_bucket *hb2, - union futex_key *key1, union futex_key *key2, - struct futex_pi_state **ps, int set_waiters) +static int +futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key1, + union futex_key *key2, struct futex_pi_state **ps, + struct task_struct **exiting, int set_waiters) { struct futex_q *top_waiter = NULL; u32 curval; @@ -1912,7 +1970,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, */ vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, - set_waiters); + exiting, set_waiters); if (ret == 1) { requeue_pi_wake_futex(top_waiter, key2, hb2); return vpid; @@ -2041,6 +2099,8 @@ retry_private: } if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + struct task_struct *exiting = NULL; + /* * Attempt to acquire uaddr2 and wake the top waiter. If we * intend to requeue waiters, force setting the FUTEX_WAITERS @@ -2048,7 +2108,8 @@ retry_private: * faults rather in the requeue loop below. */ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, - &key2, &pi_state, nr_requeue); + &key2, &pi_state, + &exiting, nr_requeue); /* * At this point the top_waiter has either taken uaddr2 or is @@ -2075,7 +2136,8 @@ retry_private: * If that call succeeds then we have pi_state and an * initial refcount on it. */ - ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, + &pi_state, &exiting); } switch (ret) { @@ -2105,6 +2167,12 @@ retry_private: hb_waiters_dec(hb2); put_futex_key(&key2); put_futex_key(&key1); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: @@ -2820,6 +2888,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct futex_pi_state *pi_state = NULL; + struct task_struct *exiting = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; @@ -2847,7 +2916,8 @@ retry: retry_private: hb = queue_lock(&q); - ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, + &exiting, 0); if (unlikely(ret)) { /* * Atomic work succeeded and we got the lock, @@ -2870,6 +2940,12 @@ retry_private: */ queue_unlock(hb); put_futex_key(&q.key); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: From 564262cf4274a238a124fa76e48f14ead06caac2 Mon Sep 17 00:00:00 2001 From: Candle Sun Date: Tue, 22 Oct 2019 22:21:39 +0800 Subject: [PATCH 1363/3715] HID: core: check whether Usage Page item is after Usage ID items commit 1cb0d2aee26335d0bccf29100c7bed00ebece851 upstream. Upstream commit 58e75155009c ("HID: core: move Usage Page concatenation to Main item") adds support for Usage Page item after Usage ID items (such as keyboards manufactured by Primax). Usage Page concatenation in Main item works well for following report descriptor patterns: USAGE_PAGE (Keyboard) 05 07 USAGE_MINIMUM (Keyboard LeftControl) 19 E0 USAGE_MAXIMUM (Keyboard Right GUI) 29 E7 LOGICAL_MINIMUM (0) 15 00 LOGICAL_MAXIMUM (1) 25 01 REPORT_SIZE (1) 75 01 REPORT_COUNT (8) 95 08 INPUT (Data,Var,Abs) 81 02 ------------- USAGE_MINIMUM (Keyboard LeftControl) 19 E0 USAGE_MAXIMUM (Keyboard Right GUI) 29 E7 LOGICAL_MINIMUM (0) 15 00 LOGICAL_MAXIMUM (1) 25 01 REPORT_SIZE (1) 75 01 REPORT_COUNT (8) 95 08 USAGE_PAGE (Keyboard) 05 07 INPUT (Data,Var,Abs) 81 02 But it makes the parser act wrong for the following report descriptor pattern(such as some Gamepads): USAGE_PAGE (Button) 05 09 USAGE (Button 1) 09 01 USAGE (Button 2) 09 02 USAGE (Button 4) 09 04 USAGE (Button 5) 09 05 USAGE (Button 7) 09 07 USAGE (Button 8) 09 08 USAGE (Button 14) 09 0E USAGE (Button 15) 09 0F USAGE (Button 13) 09 0D USAGE_PAGE (Consumer Devices) 05 0C USAGE (Back) 0a 24 02 USAGE (HomePage) 0a 23 02 LOGICAL_MINIMUM (0) 15 00 LOGICAL_MAXIMUM (1) 25 01 REPORT_SIZE (1) 75 01 REPORT_COUNT (11) 95 0B INPUT (Data,Var,Abs) 81 02 With Usage Page concatenation in Main item, parser recognizes all the 11 Usages as consumer keys, it is not the HID device's real intention. This patch checks whether Usage Page is really defined after Usage ID items by comparing usage page using status. Usage Page concatenation on currently defined Usage Page will always do in local parsing when Usage ID items encountered. When Main item is parsing, concatenation will do again with last defined Usage Page if this page has not been used in the previous usages concatenation. Signed-off-by: Candle Sun Signed-off-by: Nianfu Bai Cc: Benjamin Tissoires Signed-off-by: Jiri Kosina Cc: Siarhei Vishniakou Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 51 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0b0fa257299d..0c547bf841f4 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -195,6 +195,18 @@ static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type) return 0; /* we know nothing about this usage type */ } +/* + * Concatenate usage which defines 16 bits or less with the + * currently defined usage page to form a 32 bit usage + */ + +static void complete_usage(struct hid_parser *parser, unsigned int index) +{ + parser->local.usage[index] &= 0xFFFF; + parser->local.usage[index] |= + (parser->global.usage_page & 0xFFFF) << 16; +} + /* * Add a usage to the temporary parser table. */ @@ -206,6 +218,14 @@ static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size) return -1; } parser->local.usage[parser->local.usage_index] = usage; + + /* + * If Usage item only includes usage id, concatenate it with + * currently defined usage page + */ + if (size <= 2) + complete_usage(parser, parser->local.usage_index); + parser->local.usage_size[parser->local.usage_index] = size; parser->local.collection_index[parser->local.usage_index] = parser->collection_stack_ptr ? @@ -522,13 +542,32 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) * usage value." */ -static void hid_concatenate_usage_page(struct hid_parser *parser) +static void hid_concatenate_last_usage_page(struct hid_parser *parser) { int i; + unsigned int usage_page; + unsigned int current_page; - for (i = 0; i < parser->local.usage_index; i++) - if (parser->local.usage_size[i] <= 2) - parser->local.usage[i] += parser->global.usage_page << 16; + if (!parser->local.usage_index) + return; + + usage_page = parser->global.usage_page; + + /* + * Concatenate usage page again only if last declared Usage Page + * has not been already used in previous usages concatenation + */ + for (i = parser->local.usage_index - 1; i >= 0; i--) { + if (parser->local.usage_size[i] > 2) + /* Ignore extended usages */ + continue; + + current_page = parser->local.usage[i] >> 16; + if (current_page == usage_page) + break; + + complete_usage(parser, i); + } } /* @@ -540,7 +579,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int ret; - hid_concatenate_usage_page(parser); + hid_concatenate_last_usage_page(parser); data = item_udata(item); @@ -751,7 +790,7 @@ static int hid_scan_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int i; - hid_concatenate_usage_page(parser); + hid_concatenate_last_usage_page(parser); data = item_udata(item); From 12127f571caf8de22c76c867271281a26925b0be Mon Sep 17 00:00:00 2001 From: Lionel Debieve Date: Fri, 28 Jun 2019 13:26:54 +0200 Subject: [PATCH 1364/3715] crypto: stm32/hash - Fix hmac issue more than 256 bytes commit 0acabecebc912b3ba06289e4ef40476acc499a37 upstream. Correct condition for the second hmac loop. Key must be only set in the first loop. Initial condition was wrong, HMAC_KEY flag was not properly checked. Signed-off-by: Lionel Debieve Signed-off-by: Herbert Xu Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/stm32/stm32-hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 4835dd4a9e50..4909f820e953 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -361,7 +361,7 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev, return -ETIMEDOUT; if ((hdev->flags & HASH_FLAGS_HMAC) && - (hdev->flags & ~HASH_FLAGS_HMAC_KEY)) { + (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { hdev->flags |= HASH_FLAGS_HMAC_KEY; stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) From f6cea8a29103f366e3c4aaba9c82ab3c78c1865c Mon Sep 17 00:00:00 2001 From: Hugues Fruchet Date: Thu, 28 Feb 2019 12:10:53 -0500 Subject: [PATCH 1365/3715] media: stm32-dcmi: fix DMA corruption when stopping streaming commit b3ce6f6ff3c260ee53b0f2236e5fd950d46957da upstream. Avoid call of dmaengine_terminate_all() between dmaengine_prep_slave_single() and dmaengine_submit() by locking the whole DMA submission sequence. Signed-off-by: Hugues Fruchet Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/stm32/stm32-dcmi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index 4281f3f76ab1..f157ccbd8286 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -161,6 +161,9 @@ struct stm32_dcmi { u32 misr; int errors_count; int buffers_count; + + /* Ensure DMA operations atomicity */ + struct mutex dma_lock; }; static inline struct stm32_dcmi *notifier_to_dcmi(struct v4l2_async_notifier *n) @@ -291,6 +294,13 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, return ret; } + /* + * Avoid call of dmaengine_terminate_all() between + * dmaengine_prep_slave_single() and dmaengine_submit() + * by locking the whole DMA submission sequence + */ + mutex_lock(&dcmi->dma_lock); + /* Prepare a DMA transaction */ desc = dmaengine_prep_slave_single(dcmi->dma_chan, buf->paddr, buf->size, @@ -298,6 +308,7 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, if (!desc) { dev_err(dcmi->dev, "%s: DMA dmaengine_prep_slave_single failed for buffer size %zu\n", __func__, buf->size); + mutex_unlock(&dcmi->dma_lock); return -EINVAL; } @@ -309,9 +320,12 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, dcmi->dma_cookie = dmaengine_submit(desc); if (dma_submit_error(dcmi->dma_cookie)) { dev_err(dcmi->dev, "%s: DMA submission failed\n", __func__); + mutex_unlock(&dcmi->dma_lock); return -ENXIO; } + mutex_unlock(&dcmi->dma_lock); + dma_async_issue_pending(dcmi->dma_chan); return 0; @@ -690,7 +704,9 @@ static void dcmi_stop_streaming(struct vb2_queue *vq) spin_unlock_irq(&dcmi->irqlock); /* Stop all pending DMA operations */ + mutex_lock(&dcmi->dma_lock); dmaengine_terminate_all(dcmi->dma_chan); + mutex_unlock(&dcmi->dma_lock); clk_disable(dcmi->mclk); @@ -1662,6 +1678,7 @@ static int dcmi_probe(struct platform_device *pdev) spin_lock_init(&dcmi->irqlock); mutex_init(&dcmi->lock); + mutex_init(&dcmi->dma_lock); init_completion(&dcmi->complete); INIT_LIST_HEAD(&dcmi->buffers); From 934ff312e52e3dd4e547c9f6a6367410311347d0 Mon Sep 17 00:00:00 2001 From: Lionel Debieve Date: Mon, 1 Apr 2019 12:30:45 +0200 Subject: [PATCH 1366/3715] hwrng: stm32 - fix unbalanced pm_runtime_enable commit af0d4442dd6813de6e77309063beb064fa8e89ae upstream. No remove function implemented yet in the driver. Without remove function, the pm_runtime implementation complains when removing and probing again the driver. Signed-off-by: Lionel Debieve Signed-off-by: Herbert Xu Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/stm32-rng.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 83c695938a2d..f53d47e3355d 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -166,6 +166,13 @@ static int stm32_rng_probe(struct platform_device *ofdev) return devm_hwrng_register(dev, &priv->rng); } +static int stm32_rng_remove(struct platform_device *ofdev) +{ + pm_runtime_disable(&ofdev->dev); + + return 0; +} + #ifdef CONFIG_PM static int stm32_rng_runtime_suspend(struct device *dev) { @@ -202,6 +209,7 @@ static struct platform_driver stm32_rng_driver = { .of_match_table = stm32_rng_match, }, .probe = stm32_rng_probe, + .remove = stm32_rng_remove, }; module_platform_driver(stm32_rng_driver); From 517287405bf928638ab7dd30ba2226e51f9cba45 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Fri, 4 Jan 2019 14:47:16 +0100 Subject: [PATCH 1367/3715] mailbox: mailbox-test: fix null pointer if no mmio commit 6899b4f7c99c72968e58e502f96084f74f6e5e86 upstream. Fix null pointer issue if resource_size is called with no ioresource. Signed-off-by: Ludovic Barre Signed-off-by: Fabien Dessenne Signed-off-by: Jassi Brar Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/mailbox-test.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 93f3d4d61fa7..546ba140f83d 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -363,22 +363,24 @@ static int mbox_test_probe(struct platform_device *pdev) /* It's okay for MMIO to be NULL */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - size = resource_size(res); tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (PTR_ERR(tdev->tx_mmio) == -EBUSY) + if (PTR_ERR(tdev->tx_mmio) == -EBUSY) { /* if reserved area in SRAM, try just ioremap */ + size = resource_size(res); tdev->tx_mmio = devm_ioremap(&pdev->dev, res->start, size); - else if (IS_ERR(tdev->tx_mmio)) + } else if (IS_ERR(tdev->tx_mmio)) { tdev->tx_mmio = NULL; + } /* If specified, second reg entry is Rx MMIO */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - size = resource_size(res); tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (PTR_ERR(tdev->rx_mmio) == -EBUSY) + if (PTR_ERR(tdev->rx_mmio) == -EBUSY) { + size = resource_size(res); tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size); - else if (IS_ERR(tdev->rx_mmio)) + } else if (IS_ERR(tdev->rx_mmio)) { tdev->rx_mmio = tdev->tx_mmio; + } tdev->tx_channel = mbox_test_request_channel(pdev, "tx"); tdev->rx_channel = mbox_test_request_channel(pdev, "rx"); From c319da0690bf14bd2b8da37e59dbe9f32e5f97ac Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Thu, 14 Feb 2019 17:54:24 +0100 Subject: [PATCH 1368/3715] pinctrl: stm32: fix memory leak issue commit cd8c9b5a49576bf28990237715bc2cb2210ac80a upstream. configs is allocated by pinconf_generic_parse_dt_config(), pinctrl_utils_add_map_configs() duplicates configs so it can and has to be freed to prevent memory leaks. Signed-off-by: Alexandre Torgue Signed-off-by: Linus Walleij Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/stm32/pinctrl-stm32.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 50299ad96659..072bd11074c6 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -403,7 +403,7 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned int num_configs; bool has_config = 0; unsigned reserve = 0; - int num_pins, num_funcs, maps_per_pin, i, err; + int num_pins, num_funcs, maps_per_pin, i, err = 0; pctl = pinctrl_dev_get_drvdata(pctldev); @@ -430,41 +430,45 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, if (has_config && num_pins >= 1) maps_per_pin++; - if (!num_pins || !maps_per_pin) - return -EINVAL; + if (!num_pins || !maps_per_pin) { + err = -EINVAL; + goto exit; + } reserve = num_pins * maps_per_pin; err = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps, reserve); if (err) - return err; + goto exit; for (i = 0; i < num_pins; i++) { err = of_property_read_u32_index(node, "pinmux", i, &pinfunc); if (err) - return err; + goto exit; pin = STM32_GET_PIN_NO(pinfunc); func = STM32_GET_PIN_FUNC(pinfunc); if (!stm32_pctrl_is_function_valid(pctl, pin, func)) { dev_err(pctl->dev, "invalid function.\n"); - return -EINVAL; + err = -EINVAL; + goto exit; } grp = stm32_pctrl_find_group_by_pin(pctl, pin); if (!grp) { dev_err(pctl->dev, "unable to match pin %d to group\n", pin); - return -EINVAL; + err = -EINVAL; + goto exit; } err = stm32_pctrl_dt_node_to_map_func(pctl, pin, func, grp, map, reserved_maps, num_maps); if (err) - return err; + goto exit; if (has_config) { err = pinctrl_utils_add_map_configs(pctldev, map, @@ -472,11 +476,13 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, configs, num_configs, PIN_MAP_TYPE_CONFIGS_GROUP); if (err) - return err; + goto exit; } } - return 0; +exit: + kfree(configs); + return err; } static int stm32_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, From 704a74eb0c0729be77fb36ac8759f2f66f2589ec Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 26 Feb 2019 14:51:07 +0100 Subject: [PATCH 1369/3715] ASoC: stm32: i2s: fix dma configuration commit 1ac2bd16448997d9ec01922423486e1e85535eda upstream. DMA configuration is not balanced on start/stop. Move DMA configuration to trigger callback. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_i2s.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 6d0bf78d114d..449bb7049a28 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -488,7 +488,7 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, { struct stm32_i2s_data *i2s = snd_soc_dai_get_drvdata(cpu_dai); int format = params_width(params); - u32 cfgr, cfgr_mask, cfg1, cfg1_mask; + u32 cfgr, cfgr_mask, cfg1; unsigned int fthlv; int ret; @@ -529,15 +529,11 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, if (ret < 0) return ret; - cfg1 = I2S_CFG1_RXDMAEN | I2S_CFG1_TXDMAEN; - cfg1_mask = cfg1; - fthlv = STM32_I2S_FIFO_SIZE * I2S_FIFO_TH_ONE_QUARTER / 4; - cfg1 |= I2S_CFG1_FTHVL_SET(fthlv - 1); - cfg1_mask |= I2S_CFG1_FTHVL_MASK; + cfg1 = I2S_CFG1_FTHVL_SET(fthlv - 1); return regmap_update_bits(i2s->regmap, STM32_I2S_CFG1_REG, - cfg1_mask, cfg1); + I2S_CFG1_FTHVL_MASK, cfg1); } static int stm32_i2s_startup(struct snd_pcm_substream *substream, @@ -589,6 +585,10 @@ static int stm32_i2s_trigger(struct snd_pcm_substream *substream, int cmd, /* Enable i2s */ dev_dbg(cpu_dai->dev, "start I2S\n"); + cfg1_mask = I2S_CFG1_RXDMAEN | I2S_CFG1_TXDMAEN; + regmap_update_bits(i2s->regmap, STM32_I2S_CFG1_REG, + cfg1_mask, cfg1_mask); + ret = regmap_update_bits(i2s->regmap, STM32_I2S_CR1_REG, I2S_CR1_SPE, I2S_CR1_SPE); if (ret < 0) { From 8eccb17132c587a0b98301d79cdc52930516d689 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 26 Feb 2019 14:51:05 +0100 Subject: [PATCH 1370/3715] ASoC: stm32: i2s: fix 16 bit format support commit 0c4c68d6fa1bae74d450e50823c24fcc3cd0b171 upstream. I2S supports 16 bits data in 32 channel length. However the expected driver behavior, is to set channel length to 16 bits when data format is 16 bits. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 449bb7049a28..004d83091505 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -501,7 +501,7 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, switch (format) { case 16: cfgr = I2S_CGFR_DATLEN_SET(I2S_I2SMOD_DATLEN_16); - cfgr_mask = I2S_CGFR_DATLEN_MASK; + cfgr_mask = I2S_CGFR_DATLEN_MASK | I2S_CGFR_CHLEN; break; case 32: cfgr = I2S_CGFR_DATLEN_SET(I2S_I2SMOD_DATLEN_32) | From 861bdf61e64d8afda07ab76398d645cf1d778040 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 26 Feb 2019 14:51:04 +0100 Subject: [PATCH 1371/3715] ASoC: stm32: i2s: fix IRQ clearing commit 8ba3c5215d69c09f5c39783ff3b78347769822ad upstream. Because of regmap cache, interrupts may not be cleared as expected. Declare IFCR register as write only and make writings to IFCR register unconditional. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_i2s.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 004d83091505..aa2b1196171a 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -246,8 +246,8 @@ static irqreturn_t stm32_i2s_isr(int irq, void *devid) return IRQ_NONE; } - regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, flags); + regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, flags); if (flags & I2S_SR_OVR) { dev_dbg(&pdev->dev, "Overrun\n"); @@ -276,7 +276,6 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg) case STM32_I2S_CFG2_REG: case STM32_I2S_IER_REG: case STM32_I2S_SR_REG: - case STM32_I2S_IFCR_REG: case STM32_I2S_TXDR_REG: case STM32_I2S_RXDR_REG: case STM32_I2S_CGFR_REG: @@ -547,8 +546,8 @@ static int stm32_i2s_startup(struct snd_pcm_substream *substream, i2s->refcount++; spin_unlock(&i2s->lock_fd); - return regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, I2S_IFCR_MASK); + return regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, I2S_IFCR_MASK); } static int stm32_i2s_hw_params(struct snd_pcm_substream *substream, @@ -603,8 +602,8 @@ static int stm32_i2s_trigger(struct snd_pcm_substream *substream, int cmd, return ret; } - regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, I2S_IFCR_MASK); + regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, I2S_IFCR_MASK); if (playback_flg) { ier = I2S_IER_UDRIE; From 591547ec35c1a8f19f0fd584dd610d184c9f1b9d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Nov 2019 19:56:40 +0100 Subject: [PATCH 1372/3715] platform/x86: hp-wmi: Fix ACPI errors caused by too small buffer commit 16245db1489cd9aa579506f64afeeeb13d825a93 upstream. The HP WMI calls may take up to 128 bytes of data as input, and the AML methods implementing the WMI calls, declare a couple of fields for accessing input in different sizes, specifycally the HWMC method contains: CreateField (Arg1, 0x80, 0x0400, D128) Even though we do not use any of the WMI command-types which need a buffer of this size, the APCI interpreter still tries to create it as it is declared in generoc code at the top of the HWMC method which runs before the code looks at which command-type is requested. This results in many of these errors on many different HP laptop models: [ 14.459261] ACPI Error: Field [D128] at 1152 exceeds Buffer [NULL] size 160 (bits) (20170303/dsopcode-236) [ 14.459268] ACPI Error: Method parse/execution failed [\HWMC] (Node ffff8edcc61507f8), AE_AML_BUFFER_LIMIT (20170303/psparse-543) [ 14.459279] ACPI Error: Method parse/execution failed [\_SB.WMID.WMAA] (Node ffff8edcc61523c0), AE_AML_BUFFER_LIMIT (20170303/psparse-543) This commit increases the size of the data element of the bios_args struct to 128 bytes fixing these errors. Cc: stable@vger.kernel.org BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=197007 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=201981 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1520703 Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index b4224389febe..3db57dea0ba9 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -78,7 +78,7 @@ struct bios_args { u32 command; u32 commandtype; u32 datasize; - u32 data; + u8 data[128]; }; enum hp_wmi_commandtype { @@ -229,7 +229,7 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, .command = command, .commandtype = query, .datasize = insize, - .data = 0, + .data = { 0 }, }; struct acpi_buffer input = { sizeof(struct bios_args), &args }; struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -241,7 +241,7 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, if (WARN_ON(insize > sizeof(args.data))) return -EINVAL; - memcpy(&args.data, buffer, insize); + memcpy(&args.data[0], buffer, insize); wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output); From 6d4408556902692f348e599819cd3ea0587edb19 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Nov 2019 19:56:41 +0100 Subject: [PATCH 1373/3715] platform/x86: hp-wmi: Fix ACPI errors caused by passing 0 as input size commit f3e4f3fc8ee9729c4b1b27a478c68b713df53c0c upstream. The AML code implementing the WMI methods creates a variable length field to hold the input data we pass like this: CreateDWordField (Arg1, 0x0C, DSZI) Local5 = DSZI /* \HWMC.DSZI */ CreateField (Arg1, 0x80, (Local5 * 0x08), DAIN) If we pass 0 as bios_args.datasize argument then (Local5 * 0x08) is 0 which results in these errors: [ 71.973305] ACPI BIOS Error (bug): Attempt to CreateField of length zero (20190816/dsopcode-133) [ 71.973332] ACPI Error: Aborting method \HWMC due to previous error (AE_AML_OPERAND_VALUE) (20190816/psparse-529) [ 71.973413] ACPI Error: Aborting method \_SB.WMID.WMAA due to previous error (AE_AML_OPERAND_VALUE) (20190816/psparse-529) And in our HPWMI_WIRELESS2_QUERY calls always failing. for read commands like HPWMI_WIRELESS2_QUERY the DSZI value is not used / checked, except for read commands where extra input is needed to specify exactly what to read. So for HPWMI_WIRELESS2_QUERY we can safely pass the size of the expected output as insize to hp_wmi_perform_query(), as we are already doing for all other HPWMI_READ commands we send. Doing so fixes these errors. Cc: stable@vger.kernel.org BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=197007 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=201981 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1520703 Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 3db57dea0ba9..d0ffdd5d9199 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -393,7 +393,7 @@ static int hp_wmi_rfkill2_refresh(void) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - 0, sizeof(state)); + sizeof(state), sizeof(state)); if (err) return err; @@ -790,7 +790,7 @@ static int __init hp_wmi_rfkill2_setup(struct platform_device *device) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - 0, sizeof(state)); + sizeof(state), sizeof(state)); if (err) return err < 0 ? err : -EINVAL; From 982d424239d7fae74938557428d45c717567ea9b Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 20 Nov 2019 09:25:13 +0800 Subject: [PATCH 1374/3715] net: fec: fix clock count mis-match commit a31eda65ba210741b598044d045480494d0ed52a upstream. pm_runtime_put_autosuspend in probe will call runtime suspend to disable clks automatically if CONFIG_PM is defined. (If CONFIG_PM is not defined, its implementation will be empty, then runtime suspend will not be called.) Therefore, we can call pm_runtime_get_sync to runtime resume it first to enable clks, which matches the runtime suspend. (Only when CONFIG_PM is defined, otherwise pm_runtime_get_sync will also be empty, then runtime resume will not be called.) Then it is fine to disable clks without causing clock count mis-match. Fixes: c43eab3eddb4 ("net: fec: add missed clk_disable_unprepare in remove") Signed-off-by: Chuhong Yuan Acked-by: Fugang Duan Signed-off-by: David S. Miller Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 0237221059bf..62bc19bedb06 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3565,6 +3565,11 @@ fec_drv_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); struct device_node *np = pdev->dev.of_node; + int ret; + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + return ret; cancel_work_sync(&fep->tx_timeout_work); fec_ptp_stop(pdev); @@ -3572,15 +3577,17 @@ fec_drv_remove(struct platform_device *pdev) fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); - clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); + if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); free_netdev(ndev); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } From a844dc4c544291470aa69edbe2434b040794e269 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Dec 2019 15:38:36 +0100 Subject: [PATCH 1375/3715] Linux 4.14.158 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dad90f53faeb..d97288c0754f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 157 +SUBLEVEL = 158 EXTRAVERSION = NAME = Petit Gorille From 8ffb983df07acdaa6ef2017f684cbc7c59e5c680 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 4 Dec 2019 14:50:28 -0800 Subject: [PATCH 1376/3715] BACKPORT: ARM: 8905/1: Emit __gnu_mcount_nc when using Clang 10.0.0 or newer commit b0fe66cf095016e0b238374c10ae366e1f087d11 upstream. Currently, multi_v7_defconfig + CONFIG_FUNCTION_TRACER fails to build with clang: arm-linux-gnueabi-ld: kernel/softirq.o: in function `_local_bh_enable': softirq.c:(.text+0x504): undefined reference to `mcount' arm-linux-gnueabi-ld: kernel/softirq.o: in function `__local_bh_enable_ip': softirq.c:(.text+0x58c): undefined reference to `mcount' arm-linux-gnueabi-ld: kernel/softirq.o: in function `do_softirq': softirq.c:(.text+0x6c8): undefined reference to `mcount' arm-linux-gnueabi-ld: kernel/softirq.o: in function `irq_enter': softirq.c:(.text+0x75c): undefined reference to `mcount' arm-linux-gnueabi-ld: kernel/softirq.o: in function `irq_exit': softirq.c:(.text+0x840): undefined reference to `mcount' arm-linux-gnueabi-ld: kernel/softirq.o:softirq.c:(.text+0xa50): more undefined references to `mcount' follow clang can emit a working mcount symbol, __gnu_mcount_nc, when '-meabi gnu' is passed to it. Until r369147 in LLVM, this was broken and caused the kernel not to boot with '-pg' because the calling convention was not correct. Always build with '-meabi gnu' when using clang but ensure that '-pg' (which is added with CONFIG_FUNCTION_TRACER and its prereq CONFIG_HAVE_FUNCTION_TRACER) cannot be added with it unless this is fixed (which means using clang 10.0.0 and newer). Link: https://github.com/ClangBuiltLinux/linux/issues/35 Link: https://bugs.llvm.org/show_bug.cgi?id=33845 Link: https://github.com/llvm/llvm-project/commit/16fa8b09702378bacfa3d07081afe6b353b99e60 Reviewed-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Reviewed-by: Stefan Agner Signed-off-by: Nathan Chancellor Signed-off-by: Russell King Bug: 145525910 [nd: Kconfig clang version check removed due to significant size of backporting 469cb7376c06/2fd5b09c201e. Android R will not have a version of Clang < clang-10] Change-Id: I186771b7a3553652b84ee66dac6b53f7932b7076 Signed-off-by: Nick Desaulniers --- arch/arm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6aa61257ed18..61b10744c12e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -118,6 +118,10 @@ ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif +ifeq ($(cc-name),clang) +CFLAGS_ABI += -meabi gnu +endif + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) From 0e39aa9d504b5dba9253aa9b07f7d9e3db7e3527 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 5 Dec 2019 13:57:36 +0000 Subject: [PATCH 1377/3715] UPSTREAM: arm64: Validate tagged addresses in access_ok() called from kernel threads (Upstream commit df325e05a682e9c624f471835c35bd3f870d5e8c). __range_ok(), invoked from access_ok(), clears the tag of the user address only if CONFIG_ARM64_TAGGED_ADDR_ABI is enabled and the thread opted in to the relaxed ABI. The latter sets the TIF_TAGGED_ADDR thread flag. In the case of asynchronous I/O (e.g. io_submit()), the access_ok() may be called from a kernel thread. Since kernel threads don't have TIF_TAGGED_ADDR set, access_ok() will fail for valid tagged user addresses. Example from the ffs_user_copy_worker() thread: use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); unuse_mm(io_data->mm); Relax the __range_ok() check to always untag the user address if called in the context of a kernel thread. The user pointers would have already been checked via aio_setup_rw() -> import_{single_range,iovec}() at the time of the asynchronous I/O request. Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") Cc: # 5.4.x- Cc: Will Deacon Reported-by: Evgenii Stepanov Tested-by: Evgenii Stepanov Signed-off-by: Catalin Marinas Signed-off-by: Andrey Konovalov Change-Id: Icfb58d6e03a85409b98a97bb570c8608cac4a3b1 Bug: 135692346 --- arch/arm64/include/asm/uaccess.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 2923610a8966..4ee8daa24d89 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -76,8 +76,13 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) { unsigned long limit = current_thread_info()->addr_limit; + /* + * Asynchronous I/O running in a kernel thread does not have the + * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag + * the user address before checking. + */ if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && - test_thread_flag(TIF_TAGGED_ADDR)) + (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) addr = untagged_addr(addr); __chk_user_ptr(addr); From 577ec2d381ffda62f741a00c090fc459502326ec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 1 Jun 2018 10:22:52 +0200 Subject: [PATCH 1378/3715] BACKPORT: gnss: add GNSS receiver subsystem Add a new subsystem for GNSS (e.g. GPS) receivers. While GNSS receivers are typically accessed using a UART interface they often also support other I/O interfaces such as I2C, SPI and USB, while yet other devices use iomem or even some form of remote-processor messaging (rpmsg). The new GNSS subsystem abstracts the underlying interface and provides a new "gnss" class type, which exposes a character-device interface (e.g. /dev/gnss0) to user space. This allows GNSS receivers to have a representation in the Linux device model, something which is important not least for power management purposes. Note that the character-device interface provides raw access to whatever protocol the receiver is (currently) using, such as NMEA 0183, UBX or SiRF Binary. These protocols are expected to be continued to be handled by user space for the time being, even if some hybrid solutions are also conceivable (e.g. to have kernel drivers issue management commands). This will still allow for better platform integration by allowing GNSS devices and their resources (e.g. regulators and enable-gpios) to be described by firmware and managed by kernel drivers rather than platform-specific scripts and services. While the current interface is kept minimal, it could be extended using IOCTLs, sysfs or uevents as needs and proper abstraction levels are identified and determined (e.g. for device and feature identification). Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2b6a440351436d792b1960822da4b7d6e673f568) [adelva: adjusted for drivers/Makefile conflict, __poll_t->unsigned int, EPOLL* -> POLL* for 4.14] Bug: 146062677 Change-Id: I5704e952766cd8057fba3be3212091a726e0d85a Signed-off-by: Alistair Delva --- MAINTAINERS | 6 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/gnss/Kconfig | 11 ++ drivers/gnss/Makefile | 7 + drivers/gnss/core.c | 371 ++++++++++++++++++++++++++++++++++++++++++ include/linux/gnss.h | 66 ++++++++ 7 files changed, 464 insertions(+) create mode 100644 drivers/gnss/Kconfig create mode 100644 drivers/gnss/Makefile create mode 100644 drivers/gnss/core.c create mode 100644 include/linux/gnss.h diff --git a/MAINTAINERS b/MAINTAINERS index 740e82b50c39..346875a363cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5847,6 +5847,12 @@ F: Documentation/isdn/README.gigaset F: drivers/isdn/gigaset/ F: include/uapi/linux/gigaset_dev.h +GNSS SUBSYSTEM +M: Johan Hovold +S: Maintained +F: drivers/gnss/ +F: include/linux/gnss.h + GO7007 MPEG CODEC M: Hans Verkuil L: linux-media@vger.kernel.org diff --git a/drivers/Kconfig b/drivers/Kconfig index 1d7af3c2ff27..52ff5b25d146 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -9,6 +9,8 @@ source "drivers/bus/Kconfig" source "drivers/connector/Kconfig" +source "drivers/gnss/Kconfig" + source "drivers/mtd/Kconfig" source "drivers/of/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 5f5ccdbad21a..a7fa8a4ab66c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -183,3 +183,4 @@ obj-$(CONFIG_FPGA) += fpga/ obj-$(CONFIG_FSI) += fsi/ obj-$(CONFIG_TEE) += tee/ obj-$(CONFIG_MULTIPLEXER) += mux/ +obj-$(CONFIG_GNSS) += gnss/ diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig new file mode 100644 index 000000000000..103fcc70992e --- /dev/null +++ b/drivers/gnss/Kconfig @@ -0,0 +1,11 @@ +# +# GNSS receiver configuration +# + +menuconfig GNSS + tristate "GNSS receiver support" + ---help--- + Say Y here if you have a GNSS receiver (e.g. a GPS receiver). + + To compile this driver as a module, choose M here: the module will + be called gnss. diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile new file mode 100644 index 000000000000..1f7a7baab1d9 --- /dev/null +++ b/drivers/gnss/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the GNSS subsystem. +# + +obj-$(CONFIG_GNSS) += gnss.o +gnss-y := core.o diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c new file mode 100644 index 000000000000..7b9aeb6d2961 --- /dev/null +++ b/drivers/gnss/core.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GNSS receiver core + * + * Copyright (C) 2018 Johan Hovold + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GNSS_FLAG_HAS_WRITE_RAW BIT(0) + +#define GNSS_MINORS 16 + +static DEFINE_IDA(gnss_minors); +static dev_t gnss_first; + +/* FIFO size must be a power of two */ +#define GNSS_READ_FIFO_SIZE 4096 +#define GNSS_WRITE_BUF_SIZE 1024 + +#define to_gnss_device(d) container_of((d), struct gnss_device, dev) + +static int gnss_open(struct inode *inode, struct file *file) +{ + struct gnss_device *gdev; + int ret = 0; + + gdev = container_of(inode->i_cdev, struct gnss_device, cdev); + + get_device(&gdev->dev); + + nonseekable_open(inode, file); + file->private_data = gdev; + + down_write(&gdev->rwsem); + if (gdev->disconnected) { + ret = -ENODEV; + goto unlock; + } + + if (gdev->count++ == 0) { + ret = gdev->ops->open(gdev); + if (ret) + gdev->count--; + } +unlock: + up_write(&gdev->rwsem); + + if (ret) + put_device(&gdev->dev); + + return ret; +} + +static int gnss_release(struct inode *inode, struct file *file) +{ + struct gnss_device *gdev = file->private_data; + + down_write(&gdev->rwsem); + if (gdev->disconnected) + goto unlock; + + if (--gdev->count == 0) { + gdev->ops->close(gdev); + kfifo_reset(&gdev->read_fifo); + } +unlock: + up_write(&gdev->rwsem); + + put_device(&gdev->dev); + + return 0; +} + +static ssize_t gnss_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct gnss_device *gdev = file->private_data; + unsigned int copied; + int ret; + + mutex_lock(&gdev->read_mutex); + while (kfifo_is_empty(&gdev->read_fifo)) { + mutex_unlock(&gdev->read_mutex); + + if (gdev->disconnected) + return 0; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(gdev->read_queue, + gdev->disconnected || + !kfifo_is_empty(&gdev->read_fifo)); + if (ret) + return -ERESTARTSYS; + + mutex_lock(&gdev->read_mutex); + } + + ret = kfifo_to_user(&gdev->read_fifo, buf, count, &copied); + if (ret == 0) + ret = copied; + + mutex_unlock(&gdev->read_mutex); + + return ret; +} + +static ssize_t gnss_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct gnss_device *gdev = file->private_data; + size_t written = 0; + int ret; + + if (gdev->disconnected) + return -EIO; + + if (!count) + return 0; + + if (!(gdev->flags & GNSS_FLAG_HAS_WRITE_RAW)) + return -EIO; + + /* Ignoring O_NONBLOCK, write_raw() is synchronous. */ + + ret = mutex_lock_interruptible(&gdev->write_mutex); + if (ret) + return -ERESTARTSYS; + + for (;;) { + size_t n = count - written; + + if (n > GNSS_WRITE_BUF_SIZE) + n = GNSS_WRITE_BUF_SIZE; + + if (copy_from_user(gdev->write_buf, buf, n)) { + ret = -EFAULT; + goto out_unlock; + } + + /* + * Assumes write_raw can always accept GNSS_WRITE_BUF_SIZE + * bytes. + * + * FIXME: revisit + */ + down_read(&gdev->rwsem); + if (!gdev->disconnected) + ret = gdev->ops->write_raw(gdev, gdev->write_buf, n); + else + ret = -EIO; + up_read(&gdev->rwsem); + + if (ret < 0) + break; + + written += ret; + buf += ret; + + if (written == count) + break; + } + + if (written) + ret = written; +out_unlock: + mutex_unlock(&gdev->write_mutex); + + return ret; +} + +static unsigned int gnss_poll(struct file *file, poll_table *wait) +{ + struct gnss_device *gdev = file->private_data; + unsigned int mask = 0; + + poll_wait(file, &gdev->read_queue, wait); + + if (!kfifo_is_empty(&gdev->read_fifo)) + mask |= POLLIN | POLLRDNORM; + if (gdev->disconnected) + mask |= POLLHUP; + + return mask; +} + +static const struct file_operations gnss_fops = { + .owner = THIS_MODULE, + .open = gnss_open, + .release = gnss_release, + .read = gnss_read, + .write = gnss_write, + .poll = gnss_poll, + .llseek = no_llseek, +}; + +static struct class *gnss_class; + +static void gnss_device_release(struct device *dev) +{ + struct gnss_device *gdev = to_gnss_device(dev); + + kfree(gdev->write_buf); + kfifo_free(&gdev->read_fifo); + ida_simple_remove(&gnss_minors, gdev->id); + kfree(gdev); +} + +struct gnss_device *gnss_allocate_device(struct device *parent) +{ + struct gnss_device *gdev; + struct device *dev; + int id; + int ret; + + gdev = kzalloc(sizeof(*gdev), GFP_KERNEL); + if (!gdev) + return NULL; + + id = ida_simple_get(&gnss_minors, 0, GNSS_MINORS, GFP_KERNEL); + if (id < 0) { + kfree(gdev); + return ERR_PTR(id); + } + + gdev->id = id; + + dev = &gdev->dev; + device_initialize(dev); + dev->devt = gnss_first + id; + dev->class = gnss_class; + dev->parent = parent; + dev->release = gnss_device_release; + dev_set_drvdata(dev, gdev); + dev_set_name(dev, "gnss%d", id); + + init_rwsem(&gdev->rwsem); + mutex_init(&gdev->read_mutex); + mutex_init(&gdev->write_mutex); + init_waitqueue_head(&gdev->read_queue); + + ret = kfifo_alloc(&gdev->read_fifo, GNSS_READ_FIFO_SIZE, GFP_KERNEL); + if (ret) + goto err_put_device; + + gdev->write_buf = kzalloc(GNSS_WRITE_BUF_SIZE, GFP_KERNEL); + if (!gdev->write_buf) + goto err_put_device; + + cdev_init(&gdev->cdev, &gnss_fops); + gdev->cdev.owner = THIS_MODULE; + + return gdev; + +err_put_device: + put_device(dev); + + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(gnss_allocate_device); + +void gnss_put_device(struct gnss_device *gdev) +{ + put_device(&gdev->dev); +} +EXPORT_SYMBOL_GPL(gnss_put_device); + +int gnss_register_device(struct gnss_device *gdev) +{ + int ret; + + /* Set a flag which can be accessed without holding the rwsem. */ + if (gdev->ops->write_raw != NULL) + gdev->flags |= GNSS_FLAG_HAS_WRITE_RAW; + + ret = cdev_device_add(&gdev->cdev, &gdev->dev); + if (ret) { + dev_err(&gdev->dev, "failed to add device: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(gnss_register_device); + +void gnss_deregister_device(struct gnss_device *gdev) +{ + down_write(&gdev->rwsem); + gdev->disconnected = true; + if (gdev->count) { + wake_up_interruptible(&gdev->read_queue); + gdev->ops->close(gdev); + } + up_write(&gdev->rwsem); + + cdev_device_del(&gdev->cdev, &gdev->dev); +} +EXPORT_SYMBOL_GPL(gnss_deregister_device); + +/* + * Caller guarantees serialisation. + * + * Must not be called for a closed device. + */ +int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, + size_t count) +{ + int ret; + + ret = kfifo_in(&gdev->read_fifo, buf, count); + + wake_up_interruptible(&gdev->read_queue); + + return ret; +} +EXPORT_SYMBOL_GPL(gnss_insert_raw); + +static int __init gnss_module_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&gnss_first, 0, GNSS_MINORS, "gnss"); + if (ret < 0) { + pr_err("failed to allocate device numbers: %d\n", ret); + return ret; + } + + gnss_class = class_create(THIS_MODULE, "gnss"); + if (IS_ERR(gnss_class)) { + ret = PTR_ERR(gnss_class); + pr_err("failed to create class: %d\n", ret); + goto err_unregister_chrdev; + } + + pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first)); + + return 0; + +err_unregister_chrdev: + unregister_chrdev_region(gnss_first, GNSS_MINORS); + + return ret; +} +module_init(gnss_module_init); + +static void __exit gnss_module_exit(void) +{ + class_destroy(gnss_class); + unregister_chrdev_region(gnss_first, GNSS_MINORS); + ida_destroy(&gnss_minors); +} +module_exit(gnss_module_exit); + +MODULE_AUTHOR("Johan Hovold "); +MODULE_DESCRIPTION("GNSS receiver core"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/gnss.h b/include/linux/gnss.h new file mode 100644 index 000000000000..e26aeac1e0e2 --- /dev/null +++ b/include/linux/gnss.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GNSS receiver support + * + * Copyright (C) 2018 Johan Hovold + */ + +#ifndef _LINUX_GNSS_H +#define _LINUX_GNSS_H + +#include +#include +#include +#include +#include +#include +#include + +struct gnss_device; + +struct gnss_operations { + int (*open)(struct gnss_device *gdev); + void (*close)(struct gnss_device *gdev); + int (*write_raw)(struct gnss_device *gdev, const unsigned char *buf, + size_t count); +}; + +struct gnss_device { + struct device dev; + struct cdev cdev; + int id; + + unsigned long flags; + + struct rw_semaphore rwsem; + const struct gnss_operations *ops; + unsigned int count; + unsigned int disconnected:1; + + struct mutex read_mutex; + struct kfifo read_fifo; + wait_queue_head_t read_queue; + + struct mutex write_mutex; + char *write_buf; +}; + +struct gnss_device *gnss_allocate_device(struct device *parent); +void gnss_put_device(struct gnss_device *gdev); +int gnss_register_device(struct gnss_device *gdev); +void gnss_deregister_device(struct gnss_device *gdev); + +int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, + size_t count); + +static inline void gnss_set_drvdata(struct gnss_device *gdev, void *data) +{ + dev_set_drvdata(&gdev->dev, data); +} + +static inline void *gnss_get_drvdata(struct gnss_device *gdev) +{ + return dev_get_drvdata(&gdev->dev); +} + +#endif /* _LINUX_GNSS_H */ From f12d1ab0972d98f4d2b121f1cb5fa1d1805c5c43 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 11 Dec 2019 11:33:55 -0800 Subject: [PATCH 1379/3715] ANDROID: cuttlefish_defconfig: Enable CONFIG_GNSS Bug: 146062677 Change-Id: Iad8047c2f1de41fdd882a177dc61554b73616e43 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 02051840d72b..c1c08e057980 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -203,6 +203,7 @@ CONFIG_RFKILL=y # CONFIG_UEVENT_HELPER is not set # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_DEBUG_DEVRES=y +CONFIG_GNSS=y CONFIG_OF_UNITTEST=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 55a88517c3a8..1de5775d96d1 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -215,6 +215,7 @@ CONFIG_MAC80211=y CONFIG_RFKILL=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y +CONFIG_GNSS=y CONFIG_OF=y CONFIG_OF_UNITTEST=y # CONFIG_PNP_DEBUG_MESSAGES is not set From a92804db1222b88758399897e9860b187f3dfc1b Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 11 Dec 2019 11:59:48 -0800 Subject: [PATCH 1380/3715] ANDROID: cuttlefish_defconfig: Enable CONFIG_SERIAL_DEV_BUS The gnss serial core utilizes the serdev API, so we need to enable it. Bug: 146062677 Change-Id: Id6477f5b1eb0d9cfd240b62b337643f46f490a63 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c1c08e057980..29fd4486e3f4 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -294,6 +294,7 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_DEV_BUS=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 1de5775d96d1..1713f383da61 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -313,6 +313,7 @@ CONFIG_SERIAL_8250_NR_UARTS=48 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_DEV_BUS=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_INTEL is not set From 34c4103cfe15cfb0fc54f5bb256b6278c8f64443 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 1 Jun 2018 10:22:54 +0200 Subject: [PATCH 1381/3715] UPSTREAM: gnss: add generic serial driver Add a generic serial GNSS driver (library) which provides a common implementation for the gnss interface and power management (runtime and system suspend). This allows GNSS drivers for specific chip to be implemented by simply providing a set_power() callback to handle three states: ACTIVE, STANDBY and OFF. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 37768b054f2074f40de3cacd492baed482f5d9da) Bug: 146062677 Change-Id: I7713c82eea2ce4a6a7956d74dfc964e6d012293c Signed-off-by: Alistair Delva --- drivers/gnss/Kconfig | 7 ++ drivers/gnss/Makefile | 3 + drivers/gnss/serial.c | 275 ++++++++++++++++++++++++++++++++++++++++++ drivers/gnss/serial.h | 47 ++++++++ 4 files changed, 332 insertions(+) create mode 100644 drivers/gnss/serial.c create mode 100644 drivers/gnss/serial.h diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig index 103fcc70992e..f8ee54f99a8d 100644 --- a/drivers/gnss/Kconfig +++ b/drivers/gnss/Kconfig @@ -9,3 +9,10 @@ menuconfig GNSS To compile this driver as a module, choose M here: the module will be called gnss. + +if GNSS + +config GNSS_SERIAL + tristate + +endif # GNSS diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile index 1f7a7baab1d9..171aba71684d 100644 --- a/drivers/gnss/Makefile +++ b/drivers/gnss/Makefile @@ -5,3 +5,6 @@ obj-$(CONFIG_GNSS) += gnss.o gnss-y := core.o + +obj-$(CONFIG_GNSS_SERIAL) += gnss-serial.o +gnss-serial-y := serial.o diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c new file mode 100644 index 000000000000..b01ba4438501 --- /dev/null +++ b/drivers/gnss/serial.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic serial GNSS receiver driver + * + * Copyright (C) 2018 Johan Hovold + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "serial.h" + +static int gnss_serial_open(struct gnss_device *gdev) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + int ret; + + ret = serdev_device_open(serdev); + if (ret) + return ret; + + serdev_device_set_baudrate(serdev, gserial->speed); + serdev_device_set_flow_control(serdev, false); + + ret = pm_runtime_get_sync(&serdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&serdev->dev); + goto err_close; + } + + return 0; + +err_close: + serdev_device_close(serdev); + + return ret; +} + +static void gnss_serial_close(struct gnss_device *gdev) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + + serdev_device_close(serdev); + + pm_runtime_put(&serdev->dev); +} + +static int gnss_serial_write_raw(struct gnss_device *gdev, + const unsigned char *buf, size_t count) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + int ret; + + /* write is only buffered synchronously */ + ret = serdev_device_write(serdev, buf, count, 0); + if (ret < 0) + return ret; + + /* FIXME: determine if interrupted? */ + serdev_device_wait_until_sent(serdev, 0); + + return count; +} + +static const struct gnss_operations gnss_serial_gnss_ops = { + .open = gnss_serial_open, + .close = gnss_serial_close, + .write_raw = gnss_serial_write_raw, +}; + +static int gnss_serial_receive_buf(struct serdev_device *serdev, + const unsigned char *buf, size_t count) +{ + struct gnss_serial *gserial = serdev_device_get_drvdata(serdev); + struct gnss_device *gdev = gserial->gdev; + + return gnss_insert_raw(gdev, buf, count); +} + +static const struct serdev_device_ops gnss_serial_serdev_ops = { + .receive_buf = gnss_serial_receive_buf, + .write_wakeup = serdev_device_write_wakeup, +}; + +static int gnss_serial_set_power(struct gnss_serial *gserial, + enum gnss_serial_pm_state state) +{ + if (!gserial->ops || !gserial->ops->set_power) + return 0; + + return gserial->ops->set_power(gserial, state); +} + +/* + * FIXME: need to provide subdriver defaults or separate dt parsing from + * allocation. + */ +static int gnss_serial_parse_dt(struct serdev_device *serdev) +{ + struct gnss_serial *gserial = serdev_device_get_drvdata(serdev); + struct device_node *node = serdev->dev.of_node; + u32 speed = 4800; + + of_property_read_u32(node, "current-speed", &speed); + + gserial->speed = speed; + + return 0; +} + +struct gnss_serial *gnss_serial_allocate(struct serdev_device *serdev, + size_t data_size) +{ + struct gnss_serial *gserial; + struct gnss_device *gdev; + int ret; + + gserial = kzalloc(sizeof(*gserial) + data_size, GFP_KERNEL); + if (!gserial) + return ERR_PTR(-ENOMEM); + + gdev = gnss_allocate_device(&serdev->dev); + if (!gdev) { + ret = -ENOMEM; + goto err_free_gserial; + } + + gdev->ops = &gnss_serial_gnss_ops; + gnss_set_drvdata(gdev, gserial); + + gserial->serdev = serdev; + gserial->gdev = gdev; + + serdev_device_set_drvdata(serdev, gserial); + serdev_device_set_client_ops(serdev, &gnss_serial_serdev_ops); + + ret = gnss_serial_parse_dt(serdev); + if (ret) + goto err_put_device; + + return gserial; + +err_put_device: + gnss_put_device(gserial->gdev); +err_free_gserial: + kfree(gserial); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(gnss_serial_allocate); + +void gnss_serial_free(struct gnss_serial *gserial) +{ + gnss_put_device(gserial->gdev); + kfree(gserial); +}; +EXPORT_SYMBOL_GPL(gnss_serial_free); + +int gnss_serial_register(struct gnss_serial *gserial) +{ + struct serdev_device *serdev = gserial->serdev; + int ret; + + if (IS_ENABLED(CONFIG_PM)) { + pm_runtime_enable(&serdev->dev); + } else { + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); + if (ret < 0) + return ret; + } + + ret = gnss_register_device(gserial->gdev); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + if (IS_ENABLED(CONFIG_PM)) + pm_runtime_disable(&serdev->dev); + else + gnss_serial_set_power(gserial, GNSS_SERIAL_OFF); + + return ret; +} +EXPORT_SYMBOL_GPL(gnss_serial_register); + +void gnss_serial_deregister(struct gnss_serial *gserial) +{ + struct serdev_device *serdev = gserial->serdev; + + gnss_deregister_device(gserial->gdev); + + if (IS_ENABLED(CONFIG_PM)) + pm_runtime_disable(&serdev->dev); + else + gnss_serial_set_power(gserial, GNSS_SERIAL_OFF); +} +EXPORT_SYMBOL_GPL(gnss_serial_deregister); + +#ifdef CONFIG_PM +static int gnss_serial_runtime_suspend(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + + return gnss_serial_set_power(gserial, GNSS_SERIAL_STANDBY); +} + +static int gnss_serial_runtime_resume(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + + return gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); +} +#endif /* CONFIG_PM */ + +static int gnss_serial_prepare(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 1; + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int gnss_serial_suspend(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + int ret = 0; + + /* + * FIXME: serdev currently lacks support for managing the underlying + * device's wakeup settings. A workaround would be to close the serdev + * device here if it is open. + */ + + if (!pm_runtime_suspended(dev)) + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_STANDBY); + + return ret; +} + +static int gnss_serial_resume(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + int ret = 0; + + if (!pm_runtime_suspended(dev)) + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +const struct dev_pm_ops gnss_serial_pm_ops = { + .prepare = gnss_serial_prepare, + SET_SYSTEM_SLEEP_PM_OPS(gnss_serial_suspend, gnss_serial_resume) + SET_RUNTIME_PM_OPS(gnss_serial_runtime_suspend, gnss_serial_runtime_resume, NULL) +}; +EXPORT_SYMBOL_GPL(gnss_serial_pm_ops); + +MODULE_AUTHOR("Johan Hovold "); +MODULE_DESCRIPTION("Generic serial GNSS receiver driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gnss/serial.h b/drivers/gnss/serial.h new file mode 100644 index 000000000000..980ffdc86c2a --- /dev/null +++ b/drivers/gnss/serial.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic serial GNSS receiver driver + * + * Copyright (C) 2018 Johan Hovold + */ + +#ifndef _LINUX_GNSS_SERIAL_H +#define _LINUX_GNSS_SERIAL_H + +#include +#include + +struct gnss_serial { + struct serdev_device *serdev; + struct gnss_device *gdev; + speed_t speed; + const struct gnss_serial_ops *ops; + unsigned long drvdata[0]; +}; + +enum gnss_serial_pm_state { + GNSS_SERIAL_OFF, + GNSS_SERIAL_ACTIVE, + GNSS_SERIAL_STANDBY, +}; + +struct gnss_serial_ops { + int (*set_power)(struct gnss_serial *gserial, + enum gnss_serial_pm_state state); +}; + +extern const struct dev_pm_ops gnss_serial_pm_ops; + +struct gnss_serial *gnss_serial_allocate(struct serdev_device *gserial, + size_t data_size); +void gnss_serial_free(struct gnss_serial *gserial); + +int gnss_serial_register(struct gnss_serial *gserial); +void gnss_serial_deregister(struct gnss_serial *gserial); + +static inline void *gnss_serial_get_drvdata(struct gnss_serial *gserial) +{ + return gserial->drvdata; +} + +#endif /* _LINUX_GNSS_SERIAL_H */ From 5c4bdeb3a8bfa21c518758eb0d7bf86e8b119a03 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 1 Jun 2018 10:22:53 +0200 Subject: [PATCH 1382/3715] UPSTREAM: dt-bindings: add generic gnss binding Describe generic properties for GNSS receivers. Reviewed-by: Rob Herring Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 98ddec80fdf1c3e6c594fe633e6fb2a8a0d699dd) Bug: 146062677 Change-Id: Ifbb10321daae51230a163464f64ebb4d523cebb6 Signed-off-by: Alistair Delva --- .../devicetree/bindings/gnss/gnss.txt | 36 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 37 insertions(+) create mode 100644 Documentation/devicetree/bindings/gnss/gnss.txt diff --git a/Documentation/devicetree/bindings/gnss/gnss.txt b/Documentation/devicetree/bindings/gnss/gnss.txt new file mode 100644 index 000000000000..f1e4a2ff47c5 --- /dev/null +++ b/Documentation/devicetree/bindings/gnss/gnss.txt @@ -0,0 +1,36 @@ +GNSS Receiver DT binding + +This documents the binding structure and common properties for GNSS receiver +devices. + +A GNSS receiver node is a node named "gnss" and typically resides on a serial +bus (e.g. UART, I2C or SPI). + +Please refer to the following documents for generic properties: + + Documentation/devicetree/bindings/serial/slave-device.txt + Documentation/devicetree/bindings/spi/spi-bus.txt + +Required properties: + +- compatible : A string reflecting the vendor and specific device the node + represents + +Optional properties: +- enable-gpios : GPIO used to enable the device +- timepulse-gpios : Time pulse GPIO + +Example: + +serial@1234 { + compatible = "ns16550a"; + + gnss { + compatible = "u-blox,neo-8"; + + vcc-supply = <&gnss_reg>; + timepulse-gpios = <&gpio0 16 GPIO_ACTIVE_HIGH>; + + current-speed = <4800>; + }; +}; diff --git a/MAINTAINERS b/MAINTAINERS index 346875a363cf..9ad6db51b90e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5850,6 +5850,7 @@ F: include/uapi/linux/gigaset_dev.h GNSS SUBSYSTEM M: Johan Hovold S: Maintained +F: Documentation/devicetree/bindings/gnss/ F: drivers/gnss/ F: include/linux/gnss.h From 38361f51854c8bb49114535c85f29b689b3b171e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 1 Jun 2018 10:22:59 +0200 Subject: [PATCH 1383/3715] BACKPORT: gnss: add receiver type support Add a "type" device attribute and a "GNSS_TYPE" uevent variable which can be used to determine the type of a GNSS receiver. The currently identified types reflect the protocol(s) supported by a receiver: "NMEA" NMEA 0183 "SiRF" SiRF Binary "UBX" UBX Note that both SiRF and UBX type receivers typically support a subset of NMEA 0183 with vendor extensions (e.g. to allow switching to the vendor protocol). Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 10f146639fee5ffaf7cf0081c1af518f7d0c533c) [adelva: dropped changes to drivers that were not backported] Bug: 146062677 Change-Id: Id272aaa9aa59df41d5d94014edc835f242e719a6 Signed-off-by: Alistair Delva --- Documentation/ABI/testing/sysfs-class-gnss | 15 +++++++ MAINTAINERS | 1 + drivers/gnss/core.c | 49 ++++++++++++++++++++++ include/linux/gnss.h | 9 ++++ 4 files changed, 74 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-gnss diff --git a/Documentation/ABI/testing/sysfs-class-gnss b/Documentation/ABI/testing/sysfs-class-gnss new file mode 100644 index 000000000000..2467b6900eae --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-gnss @@ -0,0 +1,15 @@ +What: /sys/class/gnss/gnssN/type +Date: May 2018 +KernelVersion: 4.18 +Contact: Johan Hovold +Description: + The GNSS receiver type. The currently identified types reflect + the protocol(s) supported by the receiver: + + "NMEA" NMEA 0183 + "SiRF" SiRF Binary + "UBX" UBX + + Note that also non-"NMEA" type receivers typically support a + subset of NMEA 0183 with vendor extensions (e.g. to allow + switching to a vendor protocol). diff --git a/MAINTAINERS b/MAINTAINERS index 9ad6db51b90e..2cb45b54ed30 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5850,6 +5850,7 @@ F: include/uapi/linux/gigaset_dev.h GNSS SUBSYSTEM M: Johan Hovold S: Maintained +F: Documentation/ABI/testing/sysfs-class-gnss F: Documentation/devicetree/bindings/gnss/ F: drivers/gnss/ F: include/linux/gnss.h diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c index 7b9aeb6d2961..afdf4ee63d71 100644 --- a/drivers/gnss/core.c +++ b/drivers/gnss/core.c @@ -330,6 +330,52 @@ int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, } EXPORT_SYMBOL_GPL(gnss_insert_raw); +static const char * const gnss_type_names[GNSS_TYPE_COUNT] = { + [GNSS_TYPE_NMEA] = "NMEA", + [GNSS_TYPE_SIRF] = "SiRF", + [GNSS_TYPE_UBX] = "UBX", +}; + +static const char *gnss_type_name(struct gnss_device *gdev) +{ + const char *name = NULL; + + if (gdev->type < GNSS_TYPE_COUNT) + name = gnss_type_names[gdev->type]; + + if (!name) + dev_WARN(&gdev->dev, "type name not defined\n"); + + return name; +} + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct gnss_device *gdev = to_gnss_device(dev); + + return sprintf(buf, "%s\n", gnss_type_name(gdev)); +} +static DEVICE_ATTR_RO(type); + +static struct attribute *gnss_attrs[] = { + &dev_attr_type.attr, + NULL, +}; +ATTRIBUTE_GROUPS(gnss); + +static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct gnss_device *gdev = to_gnss_device(dev); + int ret; + + ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev)); + if (ret) + return ret; + + return 0; +} + static int __init gnss_module_init(void) { int ret; @@ -347,6 +393,9 @@ static int __init gnss_module_init(void) goto err_unregister_chrdev; } + gnss_class->dev_groups = gnss_groups; + gnss_class->dev_uevent = gnss_uevent; + pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first)); return 0; diff --git a/include/linux/gnss.h b/include/linux/gnss.h index e26aeac1e0e2..43546977098c 100644 --- a/include/linux/gnss.h +++ b/include/linux/gnss.h @@ -18,6 +18,14 @@ struct gnss_device; +enum gnss_type { + GNSS_TYPE_NMEA = 0, + GNSS_TYPE_SIRF, + GNSS_TYPE_UBX, + + GNSS_TYPE_COUNT +}; + struct gnss_operations { int (*open)(struct gnss_device *gdev); void (*close)(struct gnss_device *gdev); @@ -30,6 +38,7 @@ struct gnss_device { struct cdev cdev; int id; + enum gnss_type type; unsigned long flags; struct rw_semaphore rwsem; From 4743b4fd4ad134431dedce4e34f78b1ad9eb533d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Jul 2018 12:42:03 +0200 Subject: [PATCH 1384/3715] UPSTREAM: gnss: fix potential error pointer dereference The gnss_allocate_device() function returns a mix of NULL and error pointers on error. It should only return one or the other. Since the callers both check for NULL, I've modified it to return NULL on error. Fixes: 2b6a44035143 ("gnss: add GNSS receiver subsystem") Signed-off-by: Dan Carpenter Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d9995a0fab40af333b08902ad43a387843ca0e17) Bug: 146062677 Change-Id: I0d555d6459ab76a8c4eb8208a5bf469d04a16121 Signed-off-by: Alistair Delva --- drivers/gnss/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c index afdf4ee63d71..0adf55e748e4 100644 --- a/drivers/gnss/core.c +++ b/drivers/gnss/core.c @@ -235,7 +235,7 @@ struct gnss_device *gnss_allocate_device(struct device *parent) id = ida_simple_get(&gnss_minors, 0, GNSS_MINORS, GFP_KERNEL); if (id < 0) { kfree(gdev); - return ERR_PTR(id); + return NULL; } gdev->id = id; @@ -270,7 +270,7 @@ struct gnss_device *gnss_allocate_device(struct device *parent) err_put_device: put_device(dev); - return ERR_PTR(-ENOMEM); + return NULL; } EXPORT_SYMBOL_GPL(gnss_allocate_device); From 4818e17215361f39506eb29f1513c7eb8b199c09 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Nov 2018 09:33:57 +0100 Subject: [PATCH 1385/3715] UPSTREAM: gnss: serial: fix synchronous write timeout Passing a timeout of zero to the synchronous serdev_device_write() helper does currently not imply to wait forever (unlike passing zero to serdev_device_wait_until_sent()). Instead, if there's insufficient room in the write buffer, we'd end up with an incomplete write. Fixes: 37768b054f20 ("gnss: add generic serial driver") Cc: stable # 4.19 Signed-off-by: Johan Hovold (cherry picked from commit 56a6c7268312cba9436b84cac01b3e502c5c511d) Bug: 146062677 Change-Id: I6c7b31b385b962f2afc44f467b72ccaaab151332 Signed-off-by: Alistair Delva --- drivers/gnss/serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c index b01ba4438501..31e891f00175 100644 --- a/drivers/gnss/serial.c +++ b/drivers/gnss/serial.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ static int gnss_serial_write_raw(struct gnss_device *gdev, int ret; /* write is only buffered synchronously */ - ret = serdev_device_write(serdev, buf, count, 0); + ret = serdev_device_write(serdev, buf, count, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; From 2982283bb0549856a97c82f40ef2ee91186bc887 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Nov 2018 16:09:02 +0100 Subject: [PATCH 1386/3715] BACKPORT: serdev: make synchronous write return bytes written Make the synchronous serdev_device_write() helper behave analogous to the asynchronous serdev_device_write_buf() by returning the number of bytes written (or rather buffered) also on timeout. This will allow drivers to distinguish the case where data was partially written from the case where no data was written. Also update the only two users that checked the return value. Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0bbf0a88fa29de6a043ba40058409c7e550fc8be) [adelva: dropped changes to driver that was not backported] Bug: 146062677 Change-Id: Ib9fb6041808e59f57d78fa23088bf37a8a8d4518 Signed-off-by: Alistair Delva --- drivers/gnss/serial.c | 2 +- drivers/tty/serdev/core.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c index 31e891f00175..def64b36d994 100644 --- a/drivers/gnss/serial.c +++ b/drivers/gnss/serial.c @@ -65,7 +65,7 @@ static int gnss_serial_write_raw(struct gnss_device *gdev, /* write is only buffered synchronously */ ret = serdev_device_write(serdev, buf, count, MAX_SCHEDULE_TIMEOUT); - if (ret < 0) + if (ret < 0 || ret < count) return ret; /* FIXME: determine if interrupted? */ diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index ae2564ecddcd..8ad3724bfe32 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -153,6 +153,7 @@ int serdev_device_write(struct serdev_device *serdev, unsigned long timeout) { struct serdev_controller *ctrl = serdev->ctrl; + int written = 0; int ret; if (!ctrl || !ctrl->ops->write_buf || @@ -167,14 +168,21 @@ int serdev_device_write(struct serdev_device *serdev, if (ret < 0) break; + written += ret; buf += ret; count -= ret; - } while (count && (timeout = wait_for_completion_timeout(&serdev->write_comp, timeout))); mutex_unlock(&serdev->write_lock); - return ret < 0 ? ret : (count ? -ETIMEDOUT : 0); + + if (ret < 0) + return ret; + + if (timeout == 0 && written == 0) + return -ETIMEDOUT; + + return written; } EXPORT_SYMBOL_GPL(serdev_device_write); From 9e61c87b1f47b4dc3d48de83d85e9f17c320f91e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: [PATCH 1387/3715] UPSTREAM: bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller (cherry picked from commit 324bda9e6c5add86ba2e1066476481c48132aca0) Signed-off-by: Connor O'Brien Bug: 121213201 Bug: 138317270 Test: build & boot cuttlefish Change-Id: If17b11a773f73d45ea565a947fc1bf7e158db98d --- include/linux/bpf-cgroup.h | 46 ++-- include/linux/bpf.h | 32 +++ include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 42 +++- kernel/bpf/cgroup.c | 465 ++++++++++++++++++++++++++----------- kernel/bpf/core.c | 31 +++ kernel/bpf/syscall.c | 37 ++- kernel/cgroup/cgroup.c | 26 ++- 8 files changed, 513 insertions(+), 168 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 44dfae288fcf..540c44fab023 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -15,27 +15,42 @@ struct bpf_sock_ops_kern; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + struct cgroup_bpf { - /* - * Store two sets of bpf_prog pointers, one for programs that are - * pinned directly to this cgroup, and one for those that are effective - * when this cgroup is accessed. + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; - bool disallow_override[MAX_BPF_ATTACH_TYPE]; + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; }; void cgroup_bpf_put(struct cgroup *cgrp); -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); +int cgroup_bpf_inherit(struct cgroup *cgrp); -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool overridable); +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); -/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable); +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -97,8 +112,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} -static inline void cgroup_bpf_inherit(struct cgroup *cgrp, - struct cgroup *parent) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2954ab168af..38d3daf83526 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -255,6 +255,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_prog **_prog; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _prog = rcu_dereference(array)->progs; \ + for (; *_prog; _prog++) \ + _ret &= func(*_prog, ctx); \ + rcu_read_unlock(); \ + _ret; \ + }) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/filter.h b/include/linux/filter.h index f33f80ee9dc6..5a5786240006 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -536,7 +536,7 @@ static inline void bpf_jit_set_header_magic(struct bpf_binary_header *hdr) } #endif -#define BPF_PROG_RUN(filter, ctx) bpf_call_func(filter, ctx) +#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a88b2c458dcc..3dd49ee01094 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -144,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 546113430049..6b7500bbdb53 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -27,129 +27,361 @@ void cgroup_bpf_put(struct cgroup *cgrp) { unsigned int type; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { - struct bpf_prog *prog = cgrp->bpf.prog[type]; + for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog_list *pl, *tmp; - if (prog) { - bpf_prog_put(prog); + list_for_each_entry_safe(pl, tmp, progs, node) { + list_del(&pl->node); + bpf_prog_put(pl->prog); + kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_prog_array_free(cgrp->bpf.effective[type]); } } +/* count number of elements in the list. + * it's slow but the list cannot be long + */ +static u32 prog_list_length(struct list_head *head) +{ + struct bpf_prog_list *pl; + u32 cnt = 0; + + list_for_each_entry(pl, head, node) { + if (!pl->prog) + continue; + cnt++; + } + return cnt; +} + +/* if parent has non-overridable prog attached, + * disallow attaching new programs to the descendent cgroup. + * if parent has overridable or multi-prog, allow attaching + */ +static bool hierarchy_allows_attach(struct cgroup *cgrp, + enum bpf_attach_type type, + u32 new_flags) +{ + struct cgroup *p; + + p = cgroup_parent(cgrp); + if (!p) + return true; + do { + u32 flags = p->bpf.flags[type]; + u32 cnt; + + if (flags & BPF_F_ALLOW_MULTI) + return true; + cnt = prog_list_length(&p->bpf.progs[type]); + WARN_ON_ONCE(cnt > 1); + if (cnt == 1) + return !!(flags & BPF_F_ALLOW_OVERRIDE); + p = cgroup_parent(p); + } while (p); + return true; +} + +/* compute a chain of effective programs for a given cgroup: + * start from the list of programs in this cgroup and add + * all parent programs. + * Note that parent's F_ALLOW_OVERRIDE-type program is yielding + * to programs in this cgroup + */ +static int compute_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu **array) +{ + struct bpf_prog_array __rcu *progs; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + int cnt = 0; + + /* count number of effective programs by walking parents */ + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[type]); + p = cgroup_parent(p); + } while (p); + + progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); + if (!progs) + return -ENOMEM; + + /* populate the array with effective progs */ + cnt = 0; + p = cgrp; + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + list_for_each_entry(pl, + &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + rcu_dereference_protected(progs, 1)-> + progs[cnt++] = pl->prog; + } + p = cgroup_parent(p); + } while (p); + + *array = progs; + return 0; +} + +static void activate_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu *array) +{ + struct bpf_prog_array __rcu *old_array; + + old_array = xchg(&cgrp->bpf.effective[type], array); + /* free prog array after grace period, since __cgroup_bpf_run_*() + * might be still walking the array + */ + bpf_prog_array_free(old_array); +} + /** * cgroup_bpf_inherit() - inherit effective programs from parent * @cgrp: the cgroup to modify - * @parent: the parent to inherit from */ -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) +int cgroup_bpf_inherit(struct cgroup *cgrp) { - unsigned int type; +/* has to use marco instead of const int, since compiler thinks + * that array below is variable length + */ +#define NR ARRAY_SIZE(cgrp->bpf.effective) + struct bpf_prog_array __rcu *arrays[NR] = {}; + int i; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { - struct bpf_prog *e; + for (i = 0; i < NR; i++) + INIT_LIST_HEAD(&cgrp->bpf.progs[i]); - e = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - rcu_assign_pointer(cgrp->bpf.effective[type], e); - cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; - } + for (i = 0; i < NR; i++) + if (compute_effective_progs(cgrp, i, &arrays[i])) + goto cleanup; + + for (i = 0; i < NR; i++) + activate_effective_progs(cgrp, i, arrays[i]); + + return 0; +cleanup: + for (i = 0; i < NR; i++) + bpf_prog_array_free(arrays[i]); + return -ENOMEM; } +#define BPF_CGROUP_MAX_PROGS 64 + /** - * __cgroup_bpf_update() - Update the pinned program of a cgroup, and + * __cgroup_bpf_attach() - Attach the program to a cgroup, and * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse - * @parent: The parent of @cgrp, or %NULL if @cgrp is the root - * @prog: A new program to pin - * @type: Type of pinning operation (ingress/egress) - * - * Each cgroup has a set of two pointers for bpf programs; one for eBPF - * programs it owns, and which is effective for execution. - * - * If @prog is not %NULL, this function attaches a new program to the cgroup - * and releases the one that is currently attached, if any. @prog is then made - * the effective program of type @type in that cgroup. - * - * If @prog is %NULL, the currently attached program of type @type is released, - * and the effective program of the parent cgroup (if any) is inherited to - * @cgrp. - * - * Then, the descendants of @cgrp are walked and the effective program for - * each of them is set to the effective program of @cgrp unless the - * descendant has its own program attached, in which case the subbranch is - * skipped. This ensures that delegated subcgroups with own programs are left - * untouched. + * @prog: A program to attach + * @type: Type of attach operation * * Must be called with cgroup_mutex held. */ -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool new_overridable) +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct bpf_prog *old_prog, *effective = NULL; - struct cgroup_subsys_state *pos; - bool overridable = true; + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + bool pl_was_allocated; + u32 old_flags; + int err; - if (parent) { - overridable = !parent->bpf.disallow_override[type]; - effective = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - } + if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + /* invalid combination */ + return -EINVAL; - if (prog && effective && !overridable) - /* if parent has non-overridable prog attached, disallow - * attaching new programs to descendent cgroup + if (!hierarchy_allows_attach(cgrp, type, flags)) + return -EPERM; + + if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + /* Disallow attaching non-overridable on top + * of existing overridable in this cgroup. + * Disallow attaching multi-prog if overridable or none */ return -EPERM; - if (prog && effective && overridable != new_overridable) - /* if parent has overridable prog attached, only - * allow overridable programs in descendent cgroup - */ - return -EPERM; + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) + return -E2BIG; - old_prog = cgrp->bpf.prog[type]; + if (flags & BPF_F_ALLOW_MULTI) { + list_for_each_entry(pl, progs, node) + if (pl->prog == prog) + /* disallow attaching the same prog twice */ + return -EINVAL; - if (prog) { - overridable = new_overridable; - effective = prog; - if (old_prog && - cgrp->bpf.disallow_override[type] == new_overridable) - /* disallow attaching non-overridable on top - * of existing overridable in this cgroup - * and vice versa - */ - return -EPERM; - } - - if (!prog && !old_prog) - /* report error when trying to detach and nothing is attached */ - return -ENOENT; - - cgrp->bpf.prog[type] = prog; - - css_for_each_descendant_pre(pos, &cgrp->self) { - struct cgroup *desc = container_of(pos, struct cgroup, self); - - /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) { - pos = css_rightmost_descendant(pos); + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + pl->prog = prog; + list_add_tail(&pl->node, progs); + } else { + if (list_empty(progs)) { + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + list_add_tail(&pl->node, progs); } else { - rcu_assign_pointer(desc->bpf.effective[type], - effective); - desc->bpf.disallow_override[type] = !overridable; + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl_was_allocated = false; } + pl->prog = prog; } - if (prog) - static_branch_inc(&cgroup_bpf_enabled_key); + old_flags = cgrp->bpf.flags[type]; + cgrp->bpf.flags[type] = flags; + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + static_branch_inc(&cgroup_bpf_enabled_key); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and cleanup the prog list */ + pl->prog = old_prog; + if (pl_was_allocated) { + list_del(&pl->node); + kfree(pl); + } + return err; +} + +/** + * __cgroup_bpf_detach() - Detach the program from a cgroup, and + * propagate the change to descendants + * @cgrp: The cgroup which descendants to traverse + * @prog: A program to detach or NULL + * @type: Type of detach operation + * + * Must be called with cgroup_mutex held. + */ +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 unused_flags) +{ + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + int err; + + if (flags & BPF_F_ALLOW_MULTI) { + if (!prog) + /* to detach MULTI prog the user has to specify valid FD + * of the program to be detached + */ + return -EINVAL; + } else { + if (list_empty(progs)) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + } + + if (flags & BPF_F_ALLOW_MULTI) { + /* find the prog and detach it */ + list_for_each_entry(pl, progs, node) { + if (pl->prog != prog) + continue; + old_prog = prog; + /* mark it deleted, so it's ignored while + * recomputing effective + */ + pl->prog = NULL; + break; + } + if (!old_prog) + return -ENOENT; + } else { + /* to maintain backward compatibility NONE and OVERRIDE cgroups + * allow detaching with invalid FD (prog==NULL) + */ + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl->prog = NULL; + } + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* now can actually delete it from this cgroup list */ + list_del(&pl->node); + kfree(pl); + if (list_empty(progs)) + /* last program was detached, reset flags to zero */ + cgrp->bpf.flags[type] = 0; + + bpf_prog_put(old_prog); + static_branch_dec(&cgroup_bpf_enabled_key); + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and restore back old_prog */ + pl->prog = old_prog; + return err; } /** @@ -171,36 +403,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum bpf_attach_type type) { - struct bpf_prog *prog; + unsigned int offset = skb->data - skb_network_header(skb); + struct sock *save_sk; struct cgroup *cgrp; - int ret = 0; + int ret; if (!sk || !sk_fullsock(sk)) return 0; - if (sk->sk_family != AF_INET && - sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) { - unsigned int offset = skb->data - skb_network_header(skb); - struct sock *save_sk = skb->sk; - - skb->sk = sk; - __skb_push(skb, offset); - ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; - __skb_pull(skb, offset); - skb->sk = save_sk; - } - - rcu_read_unlock(); - - return ret; + save_sk = skb->sk; + skb->sk = sk; + __skb_push(skb, offset); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, + bpf_prog_run_save_cb); + __skb_pull(skb, offset); + skb->sk = save_sk; + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); @@ -221,19 +443,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -258,18 +471,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, + BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6f812bdfcb0a..9b0937ddeb59 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1481,6 +1481,37 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +static struct { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +} empty_prog_array = { + .null_prog = NULL, +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) +{ + if (prog_cnt) + return kzalloc(sizeof(struct bpf_prog_array) + + sizeof(struct bpf_prog *) * (prog_cnt + 1), + flags); + + return &empty_prog_array.hdr; +} + +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +{ + if (!progs || + progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + return; + kfree_rcu(progs, rcu); +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 171e605d0a55..89d58554eb99 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1242,6 +1242,9 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) return 0; } +#define BPF_F_ATTACH_MASK \ + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; @@ -1255,7 +1258,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; - if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + if (attr->attach_flags & ~BPF_F_ATTACH_MASK) return -EINVAL; switch (attr->attach_type) { @@ -1286,8 +1289,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, - attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + attr->attach_flags); if (ret) bpf_prog_put(prog); cgroup_put(cgrp); @@ -1299,6 +1302,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; + struct bpf_prog *prog; struct cgroup *cgrp; int ret; @@ -1311,23 +1316,33 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; + break; case BPF_CGROUP_SOCK_OPS: - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - - ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); - cgroup_put(cgrp); + ptype = BPF_PROG_TYPE_SOCK_OPS; break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - ret = sockmap_get_from_fd(attr, false); - break; + return sockmap_get_from_fd(attr, false); default: return -EINVAL; } + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + prog = NULL; + + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); + if (prog) + bpf_prog_put(prog); + cgroup_put(cgrp); return ret; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bb0285973cca..4d7a961615ff 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1902,6 +1902,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) if (ret) goto destroy_root; + ret = cgroup_bpf_inherit(root_cgrp); + WARN_ON_ONCE(ret); + trace_cgroup_setup_root(root); /* @@ -4899,6 +4902,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; + ret = cgroup_bpf_inherit(cgrp); + if (ret) + goto out_idr_free; spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { @@ -4941,9 +4947,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) goto out_idr_free; } - if (parent) - cgroup_bpf_inherit(cgrp, parent); - cgroup_propagate_control(cgrp); return cgrp; @@ -5945,14 +5948,23 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ #ifdef CONFIG_CGROUP_BPF -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable) +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct cgroup *parent = cgroup_parent(cgrp); int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); + ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + mutex_unlock(&cgroup_mutex); + return ret; +} +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_detach(cgrp, prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } From 10408722baf297ac89f8609922ce739be1da3c0d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:07 -0700 Subject: [PATCH 1388/3715] UPSTREAM: bpf: use the same condition in perf event set/free bpf handler This is a cleanup such that doing the same check in perf_event_free_bpf_prog as we already do in perf_event_set_bpf_prog step. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller (cherry picked from commit 0b4c6841fee03e096b735074a0c4aab3a8e92986) Signed-off-by: Connor O'Brien Bug: 121213201 Bug: 138317270 Test: build & boot cuttlefish Change-Id: Id64d5a025d383fa3d3b16c5c74e8f9e86148efaa --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 121544ef3371..d4668f6e63eb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8275,10 +8275,10 @@ static void perf_event_free_bpf_prog(struct perf_event *event) { struct bpf_prog *prog; - perf_event_free_bpf_handler(event); - - if (!event->tp_event) + if (event->attr.type != PERF_TYPE_TRACEPOINT) { + perf_event_free_bpf_handler(event); return; + } prog = event->tp_event->prog; if (prog && event->tp_event->bpf_prog_owner == event) { From 5179a6a673e1bff5b9823b1317c59127bacd4641 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:08 -0700 Subject: [PATCH 1389/3715] UPSTREAM: bpf: permit multiple bpf attachments for a single perf event This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller (cherry picked from commit e87c6bc3852b981e71c757be20771546ce9f76f3) Signed-off-by: Connor O'Brien Bug: 121213201 Bug: 138317270 Test: build & boot cuttlefish; attach 2 progs to 1 tracepoint Change-Id: I25ce1ed6c9512d0a6f2db7547e109958fe1619b6 --- include/linux/bpf.h | 30 ++++++++++--- include/linux/trace_events.h | 43 ++++++++++++++++-- include/trace/perf.h | 6 +-- kernel/bpf/core.c | 81 ++++++++++++++++++++++++++++++++++ kernel/events/core.c | 26 ++++------- kernel/trace/bpf_trace.c | 82 ++++++++++++++++++++++++++++++++--- kernel/trace/trace_kprobe.c | 6 +-- kernel/trace/trace_syscalls.c | 34 ++++++++------- kernel/trace/trace_uprobe.c | 3 +- 9 files changed, 255 insertions(+), 56 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 38d3daf83526..56e877211868 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -275,18 +275,38 @@ struct bpf_prog_array { struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog; \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ u32 _ret = 1; \ rcu_read_lock(); \ - _prog = rcu_dereference(array)->progs; \ - for (; *_prog; _prog++) \ - _ret &= func(*_prog, ctx); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ rcu_read_unlock(); \ _ret; \ }) +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2bcb4dc6df1a..84014ecfa67f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -272,14 +272,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -436,12 +459,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -512,6 +546,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/trace/perf.h b/include/trace/perf.h index e4b249821684..dbc6c74defc3 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -35,7 +35,6 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ - struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ u64 __count = 1; \ struct task_struct *__task = NULL; \ @@ -47,8 +46,9 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (!prog && __builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b0937ddeb59..cc84110949ac 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1481,6 +1481,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +static unsigned int __bpf_prog_ret1(const void *ctx, + const struct bpf_insn *insn) +{ + return 1; +} + +static struct bpf_prog_dummy { + struct bpf_prog prog; +} dummy_bpf_prog = { + .prog = { + .bpf_func = __bpf_prog_ret1, + }, +}; + /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() @@ -1512,6 +1526,73 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog) +{ + struct bpf_prog **prog = progs->progs; + + for (; *prog; prog++) + if (*prog == old_prog) { + WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + break; + } +} + +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array) +{ + int new_prog_cnt, carry_prog_cnt = 0; + struct bpf_prog **existing_prog; + struct bpf_prog_array *array; + int new_prog_idx = 0; + + /* Figure out how many existing progs we need to carry over to + * the new array. + */ + if (old_array) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) { + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + carry_prog_cnt++; + if (*existing_prog == include_prog) + return -EEXIST; + } + } + + /* How many progs (not NULL) will be in the new array? */ + new_prog_cnt = carry_prog_cnt; + if (include_prog) + new_prog_cnt += 1; + + /* Do we have any prog (not NULL) in the new array? */ + if (!new_prog_cnt) { + *new_array = NULL; + return 0; + } + + /* +1 as the end of prog_array is marked with NULL */ + array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); + if (!array) + return -ENOMEM; + + /* Fill in the new prog array */ + if (carry_prog_cnt) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + array->progs[new_prog_idx++] = *existing_prog; + } + if (include_prog) + array->progs[new_prog_idx++] = include_prog; + array->progs[new_prog_idx] = NULL; + *new_array = array; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/events/core.c b/kernel/events/core.c index d4668f6e63eb..f87d54270076 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8037,11 +8037,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task) { - struct bpf_prog *prog = call->prog; - - if (prog) { + if (bpf_prog_array_valid(call)) { *(struct pt_regs **)raw_data = regs; - if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; } @@ -8231,13 +8229,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; + int ret; if (event->attr.type != PERF_TYPE_TRACEPOINT) return perf_event_set_bpf_handler(event, prog_fd); - if (event->tp_event->prog) - return -EEXIST; - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); @@ -8265,26 +8261,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EACCES; } } - event->tp_event->prog = prog; - event->tp_event->bpf_prog_owner = event; - return 0; + ret = perf_event_attach_bpf_prog(event, prog); + if (ret) + bpf_prog_put(prog); + return ret; } static void perf_event_free_bpf_prog(struct perf_event *event) { - struct bpf_prog *prog; - if (event->attr.type != PERF_TYPE_TRACEPOINT) { perf_event_free_bpf_handler(event); return; } - - prog = event->tp_event->prog; - if (prog && event->tp_event->bpf_prog_owner == event) { - event->tp_event->prog = NULL; - bpf_prog_put(prog); - } + perf_event_detach_bpf_prog(event); } #else diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f9dd8fd055a6..7a3df66056de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,7 +17,7 @@ /** * trace_call_bpf - invoke BPF program - * @prog: BPF program + * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. @@ -29,7 +29,7 @@ * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; @@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) goto out; } - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, ctx); - rcu_read_unlock(); + /* + * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock + * to all call sites, we did a bpf_prog_array_valid() there to check + * whether call->prog_array is empty or not, which is + * a heurisitc to speed up execution. + * + * If bpf_prog_array_valid() fetched prog_array was + * non-NULL, we go into trace_call_bpf() and do the actual + * proper rcu_dereference() under RCU lock. + * If it turns out that prog_array is NULL then, we bail out. + * For the opposite, if the bpf_prog_array_valid() fetched pointer + * was NULL, you'll skip the prog_array with the risk of missing + * out of events when it was updated in between this and the + * rcu_dereference() which is accepted risk. + */ + ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); out: __this_cpu_dec(bpf_prog_active); @@ -674,3 +687,62 @@ const struct bpf_verifier_ops perf_event_prog_ops = { .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; + +static DEFINE_MUTEX(bpf_event_mutex); + +int perf_event_attach_bpf_prog(struct perf_event *event, + struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret = -EEXIST; + + mutex_lock(&bpf_event_mutex); + + if (event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); + if (ret < 0) + goto out; + + /* set the new array to event->tp_event and set event->prog */ + event->prog = prog; + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + +out: + mutex_unlock(&bpf_event_mutex); + return ret; +} + +void perf_event_detach_bpf_prog(struct perf_event *event) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret; + + mutex_lock(&bpf_event_mutex); + + if (!event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + + ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); + } else { + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + } + + bpf_prog_put(event->prog); + event->prog = NULL; + +out: + mutex_unlock(&bpf_event_mutex); +} diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ea20274a105a..0c23b5615977 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1185,13 +1185,12 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); @@ -1221,13 +1220,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a2a642f2c64f..19bcaaac884b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -560,9 +560,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; -static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_metadata *sys_data, - struct syscall_trace_enter *rec) { +static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -574,7 +575,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) param.args[i] = rec->args[i]; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) @@ -582,7 +583,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -597,9 +598,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; - prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); + if (!valid_prog_array && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -615,7 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + if ((valid_prog_array && + !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; @@ -660,8 +662,9 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } -static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_trace_exit *rec) { +static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_trace_exit *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -671,7 +674,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) @@ -679,7 +682,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -694,9 +697,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; - prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); + if (!valid_prog_array && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -710,7 +713,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + if ((valid_prog_array && + !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 10d13fdcabf4..78d56bddaaa5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1118,13 +1118,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; - struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); From 10fa2703fb52f2adf0332c6743587d44ce670b0f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 30 Jan 2019 18:22:58 -0800 Subject: [PATCH 1390/3715] ANDROID: scripts/Kbuild: add ld-name support for ld.lld Bug: 63740206 Change-Id: I0cd9940c2fa07a6bb4573ec8b1edcd21fd1f3a4d Signed-off-by: Nick Desaulniers Signed-off-by: Sami Tolvanen --- scripts/Kbuild.include | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index ae30702c1f15..08969e192c38 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -204,8 +204,19 @@ ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2)) ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2)) # ld-name -# Expands to either bfd or gold -ld-name = $(shell $(LD) -v 2>&1 | grep -q "GNU gold" && echo gold || echo bfd) +# Expands to either bfd, gold, or lld +ifneq (,$(LD)) + __ld-name = $(shell $(LD) -v 2>&1) +endif +ifneq (,$(findstring GNU gold,$(__ld-name))) + ld-name = gold +else + ifneq (,$(findstring LLD,$(__ld-name))) + ld-name = lld + else + ld-name = bfd + endif +endif # ld-version # Note this is mainly for HJ Lu's 3 number binutil versions From 4a4ea24df22cf8419006f5f65745f12b5fcdace0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 11 Feb 2019 14:04:46 -0800 Subject: [PATCH 1391/3715] BACKPORT: FROMLIST: Makefile: lld: set -O2 linker flag when linking with LLD For arm64: 0.34% size improvement with lld -O2 over lld for vmlinux. 3.3% size improvement with lld -O2 over lld for Image.lz4-dtb. (am from https://lkml.org/lkml/2019/2/11/1949) Bug: 63740206 Change-Id: Ibdc7ecc9861562305b49456d6d37274ce8075e22 Link: https://github.com/ClangBuiltLinux/linux/issues/343 Suggested-by: Rui Ueyama Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Sami Tolvanen --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index d703398592d0..5226cecfcc19 100644 --- a/Makefile +++ b/Makefile @@ -755,6 +755,10 @@ else KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) endif +ifeq ($(ld-name),lld) +LDFLAGS += -O2 +endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls From 26bbb793c7eb43f25e37bafd3ae9455d2728dac6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 30 Jan 2019 18:23:26 -0800 Subject: [PATCH 1392/3715] BACKPORT: FROMLIST: Makefile: lld: tell clang to use lld This is needed because clang doesn't select which linker to use based on $LD but rather -fuse-ld=lld. This is problematic especially for cc-ldoption, which checks for linker flag support via invoking the compiler, rather than the linker. (am from https://lkml.org/lkml/2019/2/11/1947) Bug: 63740206 Change-Id: I3edf2f0f6c0bac842bef22194bd48a993fc0e3c0 Link: https://github.com/ClangBuiltLinux/linux/issues/342 Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 5226cecfcc19..1970d26d730d 100644 --- a/Makefile +++ b/Makefile @@ -498,6 +498,9 @@ CLANG_FLAGS += -Werror=unknown-warning-option KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) export CLANG_FLAGS +ifeq ($(ld-name),lld) +KBUILD_CFLAGS += -fuse-ld=lld +endif endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register From 6aeb8ae050bd5263c583007b9bcdfe20f8b4a07f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 19 Feb 2019 16:06:28 -0800 Subject: [PATCH 1393/3715] ANDROID: Makefile: set -Qunused-arguments sooner The recently added support for -fuse-ld=lld is problematic for kbuild in that cc-option tests that add -Werror -c -fuse-ld=lld, as -c implies that no linker is invoked, and thus -fuse-ld=lld is an unused flag. Therefor, -Qunused-arguments needs to be set sooner in the Makefile, otherwise cc-option will fail when building with: $ make LD=ld.lld Also, -Qunused-arguments has been supported by Clang for a long time; there's no need to wrap it in cc-option. Bug: 63740206 Bug: 124794189 Change-Id: I90fb78fab1197db781ede09327783f616e5fbfaf Link: https://github.com/ClangBuiltLinux/linux/issues/366 Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1970d26d730d..4d36ffb75de2 100644 --- a/Makefile +++ b/Makefile @@ -501,6 +501,7 @@ export CLANG_FLAGS ifeq ($(ld-name),lld) KBUILD_CFLAGS += -fuse-ld=lld endif +KBUILD_CPPFLAGS += -Qunused-arguments endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register @@ -740,7 +741,6 @@ endif KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier) From 668703e6479fe0c9881a092daf792995a238cb7d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 12 Feb 2019 10:58:47 -0800 Subject: [PATCH 1394/3715] ANDROID: kbuild: allow lld to be used with CONFIG_LTO_CLANG Bug: 63740206 Bug: 117299373 Change-Id: Ic9c8ca03fd082a8404905718f5312a3f497efa5a Signed-off-by: Sami Tolvanen --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4d36ffb75de2..86c0ae37e350 100644 --- a/Makefile +++ b/Makefile @@ -654,10 +654,12 @@ export CFLAGS_GCOV CFLAGS_KCOV # Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure # ar/cc/ld-* macros return correct values. ifdef CONFIG_LTO_CLANG +ifneq ($(ld-name),lld) # use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link LDFINAL_vmlinux := $(LD) LD := $(LDGOLD) LDFLAGS += -plugin LLVMgold.so +endif # use llvm-ar for building symbol tables from IR files, and llvm-dis instead # of objdump for processing symbol versions and exports LLVM_AR := llvm-ar @@ -1218,8 +1220,10 @@ ifdef CONFIG_LTO_CLANG ifneq ($(call clang-ifversion, -ge, 0500, y), y) @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 endif - ifneq ($(call gold-ifversion, -ge, 112000000, y), y) - @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + ifneq ($(ld-name),lld) + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif endif endif # Make sure compiler supports LTO flags From 971edfbdef2a2fab26adf4cd5977cdcc3e6203be Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 9 Dec 2019 15:47:29 -0800 Subject: [PATCH 1395/3715] ANDROID: enable ARM64_ERRATUM_843419 by default with LTO_CLANG This reverts a workaround we needed with an old version of the toolchain. Bug: 145210207 Change-Id: If28b9f97da21c046adff7d5a58943a4fc06cb390 Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0328d7427b07..444390dedfaa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -436,7 +436,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - default y if !LTO_CLANG select ARM64_MODULE_CMODEL_LARGE if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and From 71d1e547a6ccff11307c7f93426b2db32d791f0c Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 12 Feb 2019 11:00:51 -0800 Subject: [PATCH 1396/3715] ANDROID: arm64: add atomic_ll_sc.o to obj-y if using lld __ll_sc_* functions are only referenced in inline assembly, which means lld won't see them when CONFIG_LTO_CLANG is enabled due to a bug in LLVM: https://bugs.llvm.org/show_bug.cgi?id=35841 When LTO and CONFIG_ARM64_LSE_ATOMICS are both enabled, linking fails with the following type of errors: ld.lld: error: relocation R_AARCH64_CALL26 cannot refer to absolute symbol: __ll_sc_atomic64_add_return_acquire >>> defined in vmlinux.o >>> referenced by atomic_lse.h:299 (arch/arm64/include/asm/atomic_lse.h:299) >>> vmlinux.o:(kernel_init_freeable) ... This change works around the problem by always linking in atomic_ll_sc.o to vmlinux, instead of placing it in arch/arm64/lib/lib.a. Bug: 63740206 Bug: 117299373 Bug: 124318741 Change-Id: Idf2211034cce9cb0b6b2007bbdb1dfc1c1a1053d Signed-off-by: Sami Tolvanen --- arch/arm64/lib/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 9a947afaf74c..c6a638a1bb3f 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -11,7 +11,12 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ # patching of the bl instruction in the caller with an atomic instruction # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. +ifeq ($(ld-name),lld) +# https://bugs.llvm.org/show_bug.cgi?id=35841 +obj-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +else lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +endif CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ From b2763f0fff968d27c3564b33f105b4de5f990d91 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 9 Dec 2019 15:26:19 -0800 Subject: [PATCH 1397/3715] ANDROID: clang: update to 10.0.1 Bug: 144776928 Change-Id: I00f376fb41cd752eccc219ebdd0524361b88a5ac Signed-off-by: Sami Tolvanen --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index 57ebdcd6e8be..8a15f9176b83 100644 --- a/build.config.common +++ b/build.config.common @@ -2,7 +2,7 @@ BRANCH=android-4.14 KERNEL_DIR=common CC=clang -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r365631c/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' From 6fdd67bc4d3b006e56fced35112f977161de06ab Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 9 Dec 2019 15:18:32 -0800 Subject: [PATCH 1398/3715] ANDROID: Switch to LLD Bug: 63740206 Change-Id: I60389ba997e2d1d8bb419f0edbb8fcd0093636fb Signed-off-by: Sami Tolvanen --- build.config.common | 1 + 1 file changed, 1 insertion(+) diff --git a/build.config.common b/build.config.common index 8a15f9176b83..d4754486cd82 100644 --- a/build.config.common +++ b/build.config.common @@ -2,6 +2,7 @@ BRANCH=android-4.14 KERNEL_DIR=common CC=clang +LD=ld.lld CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 From 6d6127fd19f11f20579454e6b31d53fccdf7d929 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 1 Aug 2019 12:45:43 -0700 Subject: [PATCH 1399/3715] ANDROID: add support for ThinLTO This change adds support for ThinLTO, which greatly improves build times over full LTO while retaining most of the performance benefits: https://clang.llvm.org/docs/ThinLTO.html Bug: 145210207 Change-Id: I8bfc19028266077be2bc1fb5c2bc001b599d3214 Signed-off-by: Sami Tolvanen --- Makefile | 16 +++++++++++----- arch/Kconfig | 12 ++++++++++++ scripts/Makefile.build | 4 ++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 86c0ae37e350..ec0ac17ab7e5 100644 --- a/Makefile +++ b/Makefile @@ -663,8 +663,8 @@ endif # use llvm-ar for building symbol tables from IR files, and llvm-dis instead # of objdump for processing symbol versions and exports LLVM_AR := llvm-ar -LLVM_DIS := llvm-dis -export LLVM_AR LLVM_DIS +LLVM_NM := llvm-nm +export LLVM_AR LLVM_NM endif # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default @@ -831,10 +831,16 @@ KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif ifdef CONFIG_LTO_CLANG -lto-clang-flags := -flto -fvisibility=hidden +ifdef CONFIG_THINLTO +lto-clang-flags := -flto=thin +LDFLAGS += --thinlto-cache-dir=.thinlto-cache +else +lto-clang-flags := -flto +endif +lto-clang-flags += -fvisibility=default $(call cc-option, -fsplit-lto-unit) # allow disabling only clang LTO where needed -DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +DISABLE_LTO_CLANG := -fno-lto export DISABLE_LTO_CLANG endif @@ -851,7 +857,7 @@ export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux endif ifdef CONFIG_CFI_CLANG -cfi-clang-flags += -fsanitize=cfi $(call cc-option, -fsplit-lto-unit) +cfi-clang-flags += -fsanitize=cfi DISABLE_CFI_CLANG := -fno-sanitize=cfi ifdef CONFIG_MODULES cfi-clang-flags += -fsanitize-cfi-cross-dso diff --git a/arch/Kconfig b/arch/Kconfig index 784c1fe35583..cb156bb334a8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -622,6 +622,18 @@ config ARCH_SUPPORTS_LTO_CLANG - compiling inline assembly with clang's integrated assembler, - and linking with either lld or GNU gold w/ LLVMgold. +config ARCH_SUPPORTS_THINLTO + bool + help + An architecture should select this if it supports clang's ThinLTO. + +config THINLTO + bool "Use clang ThinLTO (EXPERIMENTAL)" + depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO + default y + help + Use ThinLTO to speed up Link Time Optimization. + choice prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" default LTO_NONE diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 48622d8c0e31..392ad77e8423 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -220,7 +220,7 @@ cmd_modversions_c = \ > $(@D)/$(@F).symversions; \ fi; \ else \ - if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/$(@F).symversions; \ fi; \ @@ -594,7 +594,7 @@ cmd_export_list = \ $(filter_export_list) \ >>$(ksyms-lds); \ else \ - $(LLVM_DIS) -o=- $$o | \ + $(LLVM_NM) $$o | \ $(filter_export_list) \ >>$(ksyms-lds); \ fi; \ From fb40239300fc6af06803d8915c3dce391354c8d7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 10 Dec 2019 13:53:00 -0800 Subject: [PATCH 1400/3715] Revert "ANDROID: HACK: init: ensure initcall ordering with LTO" This reverts commit 727e8415818e53ba15d4e6106e7931aca5cb9032. This is change not sufficient for ThinLTO, it's replaced by Iddda881a52b7942781713b188d810b6100159a2b Bug: 145210207 Change-Id: Idc03fdb8c80b5939c2a0915165cb55e4cf6f0015 Signed-off-by: Sami Tolvanen --- include/linux/init.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index f138e5b918c2..d501f9704bc4 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -153,15 +153,6 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ -#ifdef CONFIG_LTO_CLANG - /* prepend the variable name with __COUNTER__ to ensure correct ordering */ - #define ___initcall_name2(c, fn, id) __initcall_##c##_##fn##id - #define ___initcall_name1(c, fn, id) ___initcall_name2(c, fn, id) - #define __initcall_name(fn, id) ___initcall_name1(__COUNTER__, fn, id) -#else - #define __initcall_name(fn, id) __initcall_##fn##id -#endif - /* * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined @@ -179,7 +170,7 @@ extern bool initcall_debug; */ #define __define_initcall(fn, id) \ - static initcall_t __initcall_name(fn, id) __used \ + static initcall_t __initcall_##fn##id __used \ __attribute__((__section__(".initcall" #id ".init"))) = fn; /* From 4ff705ab4dfcda1e0ab7dfb2c921933d2465c9e8 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 8 Dec 2017 15:49:24 -0800 Subject: [PATCH 1401/3715] ANDROID: init: ensure initcall ordering with LTO With LTO, the compiler doesn't necessarily obey link order for initcalls, and the initcall variable needs to be globally unique to avoid naming collisions. In order to preserve the correct order, we add each variable into its own section and generate a linker script (in scripts/link-vmlinux.sh) to ensure the order remains correct. We also add a __COUNTER__ prefix to the name, so we can retain the order of initcalls within each compilation unit, and __LINE__ to make the names more unique. Bug: 145210207 Change-Id: Iddda881a52b7942781713b188d810b6100159a2b Signed-off-by: Sami Tolvanen --- include/linux/init.h | 36 +++-- scripts/generate_initcall_order.pl | 250 +++++++++++++++++++++++++++++ scripts/link-vmlinux.sh | 37 ++--- 3 files changed, 296 insertions(+), 27 deletions(-) create mode 100755 scripts/generate_initcall_order.pl diff --git a/include/linux/init.h b/include/linux/init.h index d501f9704bc4..45f1b7723d86 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -168,10 +168,33 @@ extern bool initcall_debug; * and remove that completely, so the initcall sections have to be marked * as KEEP() in the linker script. */ +#ifdef CONFIG_LTO_CLANG + /* + * With LTO, the compiler doesn't necessarily obey link order for + * initcalls, and the initcall variable needs to be globally unique + * to avoid naming collisions. In order to preserve the correct + * order, we add each variable into its own section and generate a + * linker script (in scripts/link-vmlinux.sh) to ensure the order + * remains correct. We also add a __COUNTER__ prefix to the name, + * so we can retain the order of initcalls within each compilation + * unit, and __LINE__ to make the names more unique. + */ + #define ___lto_initcall(c, l, fn, id, __sec) \ + static initcall_t __initcall_##c##_##l##_##fn##id __used \ + __attribute__((__section__( #__sec \ + __stringify(.init..##c##_##l##_##fn)))) = fn; + #define __lto_initcall(c, l, fn, id, __sec) \ + ___lto_initcall(c, l, fn, id, __sec) -#define __define_initcall(fn, id) \ + #define ___define_initcall(fn, id, __sec) \ + __lto_initcall(__COUNTER__, __LINE__, fn, id, __sec) +#else + #define ___define_initcall(fn, id, __sec) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; + __attribute__((__section__(#__sec ".init"))) = fn; +#endif + +#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* * Early initcalls run before initializing SMP. @@ -210,13 +233,8 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.con_initcall.init) = fn - -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.security_initcall.init) = fn +#define console_initcall(fn) ___define_initcall(fn, con, .con_initcall) +#define security_initcall(fn) ___define_initcall(fn, security, .security_initcall) struct obs_kernel_param { const char *str; diff --git a/scripts/generate_initcall_order.pl b/scripts/generate_initcall_order.pl new file mode 100755 index 000000000000..f772b4a01caa --- /dev/null +++ b/scripts/generate_initcall_order.pl @@ -0,0 +1,250 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# +# Generates a linker script that specifies the correct initcall order. +# +# Copyright (C) 2019 Google LLC + +use strict; +use warnings; +use IO::Handle; + +my $nm = $ENV{'LLVM_NM'} || "llvm-nm"; +my $ar = $ENV{'AR'} || "llvm-ar"; +my $objtree = $ENV{'objtree'} || "."; + +## list of all object files to process, in link order +my @objects; +## currently active child processes +my $jobs = {}; # child process pid -> file handle +## results from child processes +my $results = {}; # object index -> { level, function } + +## reads _NPROCESSORS_ONLN to determine the number of processes to start +sub get_online_processors { + open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |") + or die "$0: failed to execute getconf: $!"; + my $procs = <$fh>; + close($fh); + + if (!($procs =~ /^\d+$/)) { + return 1; + } + + return int($procs); +} + +## finds initcalls defined in an object file, parses level and function name, +## and prints it out to the parent process +sub find_initcalls { + my ($object) = @_; + + die "$0: object file $object doesn't exist?" if (! -f $object); + + open(my $fh, "\"$nm\" -just-symbol-name -defined-only \"$object\" 2>/dev/null |") + or die "$0: failed to execute \"$nm\": $!"; + + my $initcalls = {}; + + while (<$fh>) { + chomp; + + my ($counter, $line, $symbol) = $_ =~ /^__initcall_(\d+)_(\d+)_(.*)$/; + + if (!defined($counter) || !defined($line) || !defined($symbol)) { + next; + } + + my ($function, $level) = $symbol =~ + /^(.*)((early|rootfs|con|security|[0-9])s?)$/; + + die "$0: duplicate initcall counter value in object $object: $_" + if exists($initcalls->{$counter}); + + $initcalls->{$counter} = { + 'level' => $level, + 'line' => $line, + 'function' => $function + }; + } + + close($fh); + + # sort initcalls in each object file numerically by the counter value + # to ensure they are in the order they were defined + foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) { + print $initcalls->{$counter}->{"level"} . " " . + $counter . " " . + $initcalls->{$counter}->{"line"} . " " . + $initcalls->{$counter}->{"function"} . "\n"; + } +} + +## waits for any child process to complete, reads the results, and adds them to +## the $results array for later processing +sub wait_for_results { + my $pid = wait(); + if ($pid > 0) { + my $fh = $jobs->{$pid}; + + # the child process prints out results in the following format: + # line 1: + # line 2..n: + + my $index = <$fh>; + chomp($index); + + if (!($index =~ /^\d+$/)) { + die "$0: child $pid returned an invalid index: $index"; + } + $index = int($index); + + while (<$fh>) { + chomp; + my ($level, $counter, $line, $function) = $_ =~ + /^([^\ ]+)\ (\d+)\ (\d+)\ (.*)$/; + + if (!defined($level) || + !defined($counter) || + !defined($line) || + !defined($function)) { + die "$0: child $pid returned invalid data"; + } + + if (!exists($results->{$index})) { + $results->{$index} = []; + } + + push (@{$results->{$index}}, { + 'level' => $level, + 'counter' => $counter, + 'line' => $line, + 'function' => $function + }); + } + + close($fh); + delete($jobs->{$pid}); + } +} + +## launches child processes to find initcalls from the object files, waits for +## each process to complete and collects the results +sub process_objects { + my $index = 0; # link order index of the object file + my $njobs = get_online_processors(); + + while (scalar(@objects) > 0) { + my $object = shift(@objects); + + # fork a child process and read it's stdout + my $pid = open(my $fh, '-|'); + + if (!defined($pid)) { + die "$0: failed to fork: $!"; + } elsif ($pid) { + # save the child process pid and the file handle + $jobs->{$pid} = $fh; + } else { + STDOUT->autoflush(1); + print "$index\n"; + find_initcalls("$objtree/$object"); + exit; + } + + $index++; + + # if we reached the maximum number of processes, wait for one + # to complete before launching new ones + if (scalar(keys(%{$jobs})) >= $njobs && scalar(@objects) > 0) { + wait_for_results(); + } + } + + # wait for the remaining children to complete + while (scalar(keys(%{$jobs})) > 0) { + wait_for_results(); + } +} + +## gets a list of actual object files from thin archives, and adds them to +## @objects in link order +sub find_objects { + while (my $file = shift(@ARGV)) { + my $pid = open (my $fh, "\"$ar\" t \"$file\" 2>/dev/null |") + or die "$0: failed to execute $ar: $!"; + + my @output; + + while (<$fh>) { + chomp; + push(@output, $_); + } + + close($fh); + + # if $ar failed, assume we have an object file + if ($? != 0) { + push(@objects, $file); + next; + } + + # if $ar succeeded, read the list of object files + foreach (@output) { + push(@objects, $_); + } + } +} + +## START +find_objects(); +process_objects(); + +## process results and add them to $sections in the correct order +my $sections = {}; + +foreach my $index (sort { $a <=> $b } keys(%{$results})) { + foreach my $result (@{$results->{$index}}) { + my $level = $result->{'level'}; + + if (!exists($sections->{$level})) { + $sections->{$level} = []; + } + + my $fsname = $result->{'counter'} . '_' . + $result->{'line'} . '_' . + $result->{'function'}; + + push(@{$sections->{$level}}, $fsname); + } +} + +if (!keys(%{$sections})) { + exit(0); # no initcalls...? +} + +## print out a linker script that defines the order of initcalls for each +## level +print "SECTIONS {\n"; + +foreach my $level (sort(keys(%{$sections}))) { + my $section; + + if ($level eq 'con') { + $section = '.con_initcall.init'; + } elsif ($level eq 'security') { + $section = '.security_initcall.init'; + } else { + $section = ".initcall${level}.init"; + } + + print "\t${section} : {\n"; + + foreach my $fsname (@{$sections->{$level}}) { + print "\t\t*(${section}..${fsname}) ;\n" + } + + print "\t}\n"; +} + +print "}\n"; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index cfa44718cef7..ecba415423ec 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -70,29 +70,30 @@ archive_builtin() fi } -# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into -# .tmp_symversions -modversions() +# If CONFIG_LTO_CLANG is selected, generate a linker script to ensure correct +# ordering of initcalls, and with CONFIG_MODVERSIONS also enabled, collect the +# previously generated symbol versions into the same script. +lto_lds() { if [ -z "${CONFIG_LTO_CLANG}" ]; then return fi - if [ -z "${CONFIG_MODVERSIONS}" ]; then - return + ${srctree}/scripts/generate_initcall_order.pl \ + built-in.o ${KBUILD_VMLINUX_LIBS} \ + > .tmp_lto.lds + + if [ -n "${CONFIG_MODVERSIONS}" ]; then + for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_lto.lds + fi + done + done fi - rm -f .tmp_symversions - - for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do - for o in $(${AR} t $a); do - if [ -f ${o}.symversions ]; then - cat ${o}.symversions >> .tmp_symversions - fi - done - done - - echo "-T .tmp_symversions" + echo "-T .tmp_lto.lds" } # Link of vmlinux.o used for section mismatch analysis @@ -124,7 +125,7 @@ modpost_link() info LD vmlinux.o fi - ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects} + ${LD} ${LDFLAGS} -r -o ${1} $(lto_lds) ${objects} } # If CONFIG_LTO_CLANG is selected, we postpone running recordmcount until @@ -251,7 +252,7 @@ cleanup() rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_version - rm -f .tmp_symversions + rm -f .tmp_lto.lds rm -f .tmp_vmlinux* rm -f built-in.o rm -f System.map From 51042de0309fa662ec331fc6e97d01f92212ce80 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 3 Dec 2019 15:56:24 -0800 Subject: [PATCH 1402/3715] ANDROID: kbuild: merge module sections with LTO LLD always splits sections with LTO, which increases module sizes. This change adds a linker script that merges the split sections in the final module. Bug: 145297228 Change-Id: I247e8bd029bd0f98a4fa1cd4db7f6398467b8e55 Suggested-by: Nick Desaulniers Signed-off-by: Sami Tolvanen --- Makefile | 2 ++ scripts/module-lto.lds | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 scripts/module-lto.lds diff --git a/Makefile b/Makefile index ec0ac17ab7e5..bee9a9e2bd43 100644 --- a/Makefile +++ b/Makefile @@ -839,6 +839,8 @@ lto-clang-flags := -flto endif lto-clang-flags += -fvisibility=default $(call cc-option, -fsplit-lto-unit) +KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds + # allow disabling only clang LTO where needed DISABLE_LTO_CLANG := -fno-lto export DISABLE_LTO_CLANG diff --git a/scripts/module-lto.lds b/scripts/module-lto.lds new file mode 100644 index 000000000000..f5ee544a877d --- /dev/null +++ b/scripts/module-lto.lds @@ -0,0 +1,22 @@ +/* + * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and + * -ffunction-sections, which increases the size of the final module. + * Merge the split sections in the final binary. + */ +SECTIONS { + /* + * LLVM may emit .eh_frame with CONFIG_CFI_CLANG despite + * -fno-asynchronous-unwind-tables. Discard the section. + */ + /DISCARD/ : { + *(.eh_frame) + } + + .bss : { *(.bss .bss[.0-9a-zA-Z_]*) } + .data : { *(.data .data[.0-9a-zA-Z_]*) } + .rela.data : { *(.rela.data .rela.data[.0-9a-zA-Z_]*) } + .rela.rodata : { *(.rela.rodata .rela.rodata[.0-9a-zA-Z_]*) } + .rela.text : { *(.rela.text .rela.text[.0-9a-zA-Z_]*) } + .rodata : { *(.rodata .rodata[.0-9a-zA-Z_]*) } + .text : { *(.text .text[.0-9a-zA-Z_]*) } +} From 91c7460b15334d466a1f2ea971f6f5064b3ca07e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 4 Dec 2019 15:57:39 -0800 Subject: [PATCH 1403/3715] ANDROID: kbuild: limit LTO inlining This change limits function inlining across translation unit boundaries in order to reduce the binary size with LTO. The -import-instr-limit flag defines a size limit, as the number of LLVM IR instructions, for importing functions from other TUs. The default value is 100, and decreasing it to 5 reduces the size of a stripped arm64 defconfig vmlinux by 11%. Bug: 145297228 Change-Id: Iaf366f843582972a5dfadc4695abb8f9c59882af Suggested-by: George Burgess IV Signed-off-by: Sami Tolvanen --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index bee9a9e2bd43..b4264d6a6c3b 100644 --- a/Makefile +++ b/Makefile @@ -839,6 +839,12 @@ lto-clang-flags := -flto endif lto-clang-flags += -fvisibility=default $(call cc-option, -fsplit-lto-unit) +# Limit inlining across translation units to reduce binary size +LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5 + +KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG) +KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG) + KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds # allow disabling only clang LTO where needed From f80c0f11c60f8c04ac3c3b4fb8a8300968af05b6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 17 Jun 2019 09:29:13 -0700 Subject: [PATCH 1404/3715] ANDROID: irqchip/gic-v3: rename gic_of_init to work around a ThinLTO+CFI bug Bug: 145210207 Link: https://github.com/ClangBuiltLinux/linux/issues/537 Change-Id: Ibe51a1f531625fde4a44cf92f89b1f9ac41b4c68 Signed-off-by: Sami Tolvanen --- drivers/irqchip/irq-gic-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 3d7374655587..d7c76f8bbce2 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1179,7 +1179,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_set_kvm_info(&gic_v3_kvm_info); } -static int __init gic_of_init(struct device_node *node, struct device_node *parent) +static int __init gicv3_of_init(struct device_node *node, struct device_node *parent) { void __iomem *dist_base; struct redist_region *rdist_regs; @@ -1244,7 +1244,7 @@ out_unmap_dist: return err; } -IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); +IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gicv3_of_init); #ifdef CONFIG_ACPI static struct From 3ad770bcb743feb908fed0ea10303ad73637b4c4 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Aug 2019 15:30:50 -0700 Subject: [PATCH 1405/3715] FROMLIST: arm64: fix alternatives with LLVM's integrated assembler LLVM's integrated assembler fails with the following error when building KVM: :12:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :21:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :24:2: error: unrecognized instruction mnemonic NOT_AN_INSTRUCTION ^ LLVM ERROR: Error parsing inline asm These errors come from ALTERNATIVE_CB and __ALTERNATIVE_CFG, which test for the existence of the callback parameter in inline assembly using the following expression: " .if " __stringify(cb) " == 0\n" This works with GNU as, but isn't supported by LLVM. This change splits __ALTERNATIVE_CFG and ALTINSTR_ENTRY into separate macros to fix the LLVM build. Bug: 145210207 Change-Id: I62dd786277bcaf3c730a7489c4b5a2b437f722ce (am from https://lore.kernel.org/patchwork/patch/1136803/) Link: https://github.com/ClangBuiltLinux/linux/issues/472 Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/alternative.h | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a91933b1e2e6..5d08ab07df33 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -30,13 +30,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature,cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -57,15 +60,14 @@ void apply_alternatives(void *start, size_t length); * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -73,17 +75,25 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature,cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include From dec81f91cc382268616dfbbceb7de3a8ffd7ec20 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 13 Jun 2019 12:06:48 -0700 Subject: [PATCH 1406/3715] ANDROID: soc/tegra: disable ARCH_TEGRA_210_SOC with LTO Disable CONFIG_ARCH_TEGRA_210_SOC with LTO to work around an issue with ThinLTO. Bug: 145210207 Change-Id: Idc2792d6b7d0c755f6f7695cf0b20fa49721d09c Link: https://github.com/ClangBuiltLinux/linux/issues/510 Signed-off-by: Sami Tolvanen --- drivers/soc/tegra/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index e9e277178c94..96c00470ab32 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -76,6 +76,7 @@ config ARCH_TEGRA_210_SOC select PINCTRL_TEGRA210 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC + depends on !LTO_CLANG help Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1, the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53 From ab3f7595f43d3ad7ca6e021ab61c88dab976411d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 6 Aug 2019 10:38:30 -0700 Subject: [PATCH 1407/3715] ANDROID: arm64: allow ThinLTO to be selected Bug: 145210207 Change-Id: I1284cc32f70c2332ef9908e7b070c69476b1e5d0 Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 444390dedfaa..a2293b99cb46 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -25,6 +25,7 @@ config ARM64 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_THINLTO select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION From 560c13a848ab7fb0f4710cfa459612476a6cca4d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 4 Oct 2019 09:14:05 -0700 Subject: [PATCH 1408/3715] ANDROID: arm64: add __pa_function We use non-canonical CFI jump tables with CONFIG_CFI_CLANG, which means the compiler replaces function address references with the address of the function's CFI jump table entry. This results in __pa_symbol(function) returning the physical address of the jump table entry, which can lead to address space confusion since the jump table points to a virtual address. This change adds a __pa_function macro, which uses inline assembly to take the actual function address instead. Bug: 145210207 Change-Id: I14995e522365ad09a5c9bd676e1203b2b642cd5a Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/memory.h | 16 ++++++++++++++++ arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/kernel/cpu-reset.h | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/psci.c | 3 ++- arch/arm64/kernel/smp_spin_table.c | 2 +- 6 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index da96146c2e56..8f7f30fec6be 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -310,6 +310,22 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +/* + * With non-canonical CFI jump tables, the compiler replaces function + * address references with the address of the function's CFI jump + * table entry. This results in __pa_symbol(function) returning the + * physical address of the jump table entry, which can lead to address + * space confusion since the jump table points to the function's + * virtual address. Therefore, use inline assembly to ensure we are + * always taking the address of the actual function. + */ +#define __pa_function(x) ({ \ + unsigned long addr; \ + asm("adrp %0, " __stringify(x) "\n\t" \ + "add %0, %0, :lo12:" __stringify(x) : "=r" (addr)); \ + __pa_symbol(addr); \ +}) + /* * virt_to_page(k) convert a _valid_ virtual address to struct page * * virt_addr_valid(k) indicates whether a virtual address is valid diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f7ff06580721..ecb5a1115455 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -140,7 +140,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgd) phys_addr_t pgd_phys = virt_to_phys(pgd); - replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + replace_phys = (void *)__pa_function(idmap_cpu_replace_ttbr1); cpu_install_idmap(); replace_phys(pgd_phys); diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 6c2b1b4f57c9..d3c60c0c121b 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -24,7 +24,7 @@ static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, el2_switch = el2_switch && !is_kernel_in_hyp_mode() && is_hyp_mode_available(); - restart = (void *)__pa_symbol(__cpu_soft_restart); + restart = (void *)__pa_function(__cpu_soft_restart); cpu_install_idmap(); restart(el2_switch, entry, arg0, arg1, arg2); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e85cbfd9e4f2..f9e0493d766e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -911,7 +911,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) if (kpti_applied) return; - remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + remap_fn = (void *)__pa_function(idmap_kpti_install_ng_mappings); cpu_install_idmap(); remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..c150cde3454c 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -46,7 +46,8 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu) static int cpu_psci_cpu_boot(unsigned int cpu) { - int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry)); + int err = psci_ops.cpu_on(cpu_logical_map(cpu), + __pa_function(secondary_entry)); if (err) pr_err("failed to boot CPU%d (%d)\n", cpu, err); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 93034651c87e..1ba281dc4fae 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -99,7 +99,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * boot-loader's endianess before jumping. This is mandated by * the boot protocol. */ - writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); + writeq_relaxed(__pa_function(secondary_holding_pen), release_addr); __flush_dcache_area((__force void *)release_addr, sizeof(*release_addr)); From 169a8bc423a5205ba77f6fee0c91aeec12965ab4 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 16 Aug 2019 11:32:43 -0700 Subject: [PATCH 1409/3715] ANDROID: arm64: add __nocfi to __apply_alternatives __apply_alternatives makes indirect calls to functions whose address is taken in assembly code using the alternative_cb macro. With CFI enabled using non-canonical jump tables, the compiler isn't able to replace the function reference with the jump table reference, which trips CFI. Bug: 145210207 Change-Id: I088faf94b64ac3068ac798dd8e5029cb2ab92903 Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 5c4bce4ac381..77b36c88eee5 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -122,7 +122,7 @@ static void patch_alternative(struct alt_instr *alt, } } -static void __apply_alternatives(void *alt_region, bool use_linear_alias) +static void __nocfi __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; From c1b40e1cb763c4f2345dc0c00e85c1d99cb51285 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 20 Nov 2019 09:26:05 -0800 Subject: [PATCH 1410/3715] ANDROID: use non-canonical CFI jump tables Bug: 145210207 Change-Id: I8f59ab53c94087d1e9ec0154337486ad8aad973f Signed-off-by: Sami Tolvanen --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b4264d6a6c3b..270931928ecd 100644 --- a/Makefile +++ b/Makefile @@ -865,7 +865,7 @@ export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux endif ifdef CONFIG_CFI_CLANG -cfi-clang-flags += -fsanitize=cfi +cfi-clang-flags += -fsanitize=cfi -fno-sanitize-cfi-canonical-jump-tables DISABLE_CFI_CLANG := -fno-sanitize=cfi ifdef CONFIG_MODULES cfi-clang-flags += -fsanitize-cfi-cross-dso From 729284c11571ddfab7fa0eb7b3de5e4c165b55e2 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 16 Oct 2019 18:31:45 -0700 Subject: [PATCH 1411/3715] FROMLIST: arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings idmap_kpti_install_ng_mappings uses x18 as a temporary register, which will result in a conflict when x18 is reserved. Use x16 and x17 instead where needed. Bug: 145210207 Change-Id: I57cc0a9f242bd66e9ed6cb4f251ad951b63aaae8 (am from https://lore.kernel.org/patchwork/patch/1149052/) Reviewed-by: Nick Desaulniers Reviewed-by: Mark Rutland Signed-off-by: Sami Tolvanen --- arch/arm64/mm/proc.S | 65 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f571aff467ab..7f0a622f8964 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -251,13 +251,13 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We're the boot CPU. Wait for the others to catch up */ sevl 1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b + ldaxr w17, [flag_ptr] + eor w17, w17, num_cpus + cbnz w17, 1b /* We need to walk swapper, so turn off the MMU. */ - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M + mrs x17, sctlr_el1 + bic x17, x17, #SCTLR_ELx_M msr sctlr_el1, x18 isb @@ -281,9 +281,9 @@ skip_pgd: isb /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + orr x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* @@ -353,33 +353,9 @@ skip_pte: b.ne do_pte b next_pmd - /* Secondary CPUs end up here */ -__idmap_kpti_secondary: - /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 - - /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] - cbnz w17, 1b - - /* Wait for the boot CPU to finish messing around with swapper */ - sevl -1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b - - /* All done, act like nothing happened */ - msr ttbr1_el1, swapper_ttb - isb - ret - .unreq cpu .unreq num_cpus .unreq swapper_pa - .unreq swapper_ttb - .unreq flag_ptr .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -392,6 +368,31 @@ __idmap_kpti_secondary: .unreq cur_ptep .unreq end_ptep .unreq pte + + /* Secondary CPUs end up here */ +__idmap_kpti_secondary: + /* Uninstall swapper before surgery begins */ + __idmap_cpu_set_reserved_ttbr1 x16, x17 + + /* Increment the flag to let the boot CPU we're ready */ +1: ldxr w16, [flag_ptr] + add w16, w16, #1 + stxr w17, w16, [flag_ptr] + cbnz w17, 1b + + /* Wait for the boot CPU to finish messing around with swapper */ + sevl +1: wfe + ldxr w16, [flag_ptr] + cbnz w16, 1b + + /* All done, act like nothing happened */ + msr ttbr1_el1, swapper_ttb + isb + ret + + .unreq swapper_ttb + .unreq flag_ptr ENDPROC(idmap_kpti_install_ng_mappings) .popsection #endif From 817a6f5e0ff806e36b33a156ca74f4cb7bba6b85 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Apr 2018 10:38:28 -0700 Subject: [PATCH 1412/3715] FROMLIST: arm64/lib: copy_page: avoid x18 register in assembler code Register x18 will no longer be used as a caller save register in the future, so stop using it in the copy_page() code. Bug: 145210207 Change-Id: I521c1e52899b36f6a53269852b7b1b2858280756 (am from https://lore.kernel.org/patchwork/patch/1149064/) Link: https://patchwork.kernel.org/patch/9836869/ Signed-off-by: Ard Biesheuvel Reviewed-by: Mark Rutland [Sami: changed the offset and bias to be explicit] Signed-off-by: Sami Tolvanen --- arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 076c43715e64..30f93161190f 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -45,45 +45,45 @@ alternative_else_nop_endif ldp x14, x15, [x1, #96] ldp x16, x17, [x1, #112] - mov x18, #(PAGE_SIZE - 128) + add x0, x0, #256 add x1, x1, #128 1: - subs x18, x18, #128 + tst x0, #(PAGE_SIZE - 1) alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] alternative_else_nop_endif - stnp x2, x3, [x0] + stnp x2, x3, [x0, #-256] ldp x2, x3, [x1] - stnp x4, x5, [x0, #16] + stnp x4, x5, [x0, #16 - 256] ldp x4, x5, [x1, #16] - stnp x6, x7, [x0, #32] + stnp x6, x7, [x0, #32 - 256] ldp x6, x7, [x1, #32] - stnp x8, x9, [x0, #48] + stnp x8, x9, [x0, #48 - 256] ldp x8, x9, [x1, #48] - stnp x10, x11, [x0, #64] + stnp x10, x11, [x0, #64 - 256] ldp x10, x11, [x1, #64] - stnp x12, x13, [x0, #80] + stnp x12, x13, [x0, #80 - 256] ldp x12, x13, [x1, #80] - stnp x14, x15, [x0, #96] + stnp x14, x15, [x0, #96 - 256] ldp x14, x15, [x1, #96] - stnp x16, x17, [x0, #112] + stnp x16, x17, [x0, #112 - 256] ldp x16, x17, [x1, #112] add x0, x0, #128 add x1, x1, #128 - b.gt 1b + b.ne 1b - stnp x2, x3, [x0] - stnp x4, x5, [x0, #16] - stnp x6, x7, [x0, #32] - stnp x8, x9, [x0, #48] - stnp x10, x11, [x0, #64] - stnp x12, x13, [x0, #80] - stnp x14, x15, [x0, #96] - stnp x16, x17, [x0, #112] + stnp x2, x3, [x0, #-256] + stnp x4, x5, [x0, #16 - 256] + stnp x6, x7, [x0, #32 - 256] + stnp x8, x9, [x0, #48 - 256] + stnp x10, x11, [x0, #64 - 256] + stnp x12, x13, [x0, #80 - 256] + stnp x14, x15, [x0, #96 - 256] + stnp x16, x17, [x0, #112 - 256] ret ENDPROC(copy_page) From 826e73e2aeb628a30ff9177af84dcfa4421af504 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Apr 2018 10:39:04 -0700 Subject: [PATCH 1413/3715] FROMLIST: arm64: kvm: stop treating register x18 as caller save In preparation of reserving x18, stop treating it as caller save in the KVM guest entry/exit code. Currently, the code assumes there is no need to preserve it for the host, given that it would have been assumed clobbered anyway by the function call to __guest_enter(). Instead, preserve its value and restore it upon return. Bug: 145210207 Change-Id: I12ef981133176a5092b091a060c8d2493cb6f0c3 (am from https://lore.kernel.org/patchwork/patch/1149065/) Link: https://patchwork.kernel.org/patch/9836891/ Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Marc Zyngier Reviewed-by: Mark Rutland [Sami: updated commit message, switched from x18 to x29 for the guest context] Signed-off-by: Sami Tolvanen --- arch/arm64/kvm/hyp/entry.S | 41 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index a360ac6e89e9..2c2b588a73b7 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -31,7 +31,12 @@ .text .pushsection .hyp.text, "ax" +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ .macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -41,6 +46,8 @@ .endm .macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -57,29 +64,26 @@ ENTRY(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros - // x18: guest context + // x29: guest context // Store the host regs save_callee_saved_regs x1 - add x18, x0, #VCPU_CONTEXT + add x29, x0, #VCPU_CONTEXT // Restore guest regs x0-x17 - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] - // Restore guest regs x19-x29, lr - restore_callee_saved_regs x18 - - // Restore guest reg x18 - ldr x18, [x18, #CPU_XREG_OFFSET(18)] + // Restore guest regs x18-x29, lr + restore_callee_saved_regs x29 // Do not touch any register after this! eret @@ -101,7 +105,7 @@ ENTRY(__guest_exit) // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 - // Store the guest regs x0-x1 and x4-x18 + // Store the guest regs x0-x1 and x4-x17 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] @@ -110,9 +114,8 @@ ENTRY(__guest_exit) stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - str x18, [x1, #CPU_XREG_OFFSET(18)] - // Store the guest regs x19-x29, lr + // Store the guest regs x18-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 From 89384505fe8a768be412249d6541a9972ec1b6f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Apr 2018 10:44:32 -0700 Subject: [PATCH 1414/3715] FROMLIST: arm64: kernel: avoid x18 in __cpu_soft_restart The code in __cpu_soft_restart() uses x18 as an arbitrary temp register, which will shortly be disallowed. So use x8 instead. Bug: 145210207 Change-Id: Ifeb90566b4e41eb376fd4beaffd55fca8597aa2c (am from https://lore.kernel.org/patchwork/patch/1149053/) Link: https://patchwork.kernel.org/patch/9836877/ Signed-off-by: Ard Biesheuvel Reviewed-by: Mark Rutland Reviewed-by: Kees Cook [Sami: updated commit message] Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/cpu-reset.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 8021b46c9743..63ccd07d994b 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -45,11 +45,11 @@ ENTRY(__cpu_soft_restart) mov x0, #HVC_SOFT_RESTART hvc #0 // no return -1: mov x18, x1 // entry +1: mov x8, x1 // entry mov x0, x2 // arg0 mov x1, x3 // arg1 mov x2, x4 // arg2 - br x18 + br x8 ENDPROC(__cpu_soft_restart) .popsection From a7f210693021679a8319941f3851aa756604cebc Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 2 May 2018 10:44:59 -0700 Subject: [PATCH 1415/3715] FROMLIST: add support for Clang's Shadow Call Stack (SCS) This change adds generic support for Clang's Shadow Call Stack, which uses a shadow stack to protect return addresses from being overwritten by an attacker. Details are available here: https://clang.llvm.org/docs/ShadowCallStack.html Note that security guarantees in the kernel differ from the ones documented for user space. The kernel must store addresses of shadow stacks used by other tasks and interrupt handlers in memory, which means an attacker capable reading and writing arbitrary memory may be able to locate them and hijack control flow by modifying shadow stacks that are not currently in use. Bug: 145210207 Change-Id: Ia5f1650593fa95da4efcf86f84830a20989f161c (am from https://lore.kernel.org/patchwork/patch/1149054/) Reviewed-by: Kees Cook Reviewed-by: Miguel Ojeda Signed-off-by: Sami Tolvanen --- Makefile | 6 ++ arch/Kconfig | 33 ++++++ include/linux/compiler-clang.h | 6 ++ include/linux/compiler_types.h | 4 + include/linux/scs.h | 57 ++++++++++ init/init_task.c | 8 ++ kernel/Makefile | 1 + kernel/fork.c | 8 ++ kernel/sched/core.c | 2 + kernel/scs.c | 187 +++++++++++++++++++++++++++++++++ 10 files changed, 312 insertions(+) create mode 100644 include/linux/scs.h create mode 100644 kernel/scs.c diff --git a/Makefile b/Makefile index 270931928ecd..4cafae480e5f 100644 --- a/Makefile +++ b/Makefile @@ -891,6 +891,12 @@ DISABLE_LTO += $(DISABLE_CFI) export DISABLE_CFI endif +ifdef CONFIG_SHADOW_CALL_STACK +CC_FLAGS_SCS := -fsanitize=shadow-call-stack +KBUILD_CFLAGS += $(CC_FLAGS_SCS) +export CC_FLAGS_SCS +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) CHECKFLAGS += $(NOSTDINC_FLAGS) diff --git a/arch/Kconfig b/arch/Kconfig index cb156bb334a8..b7938a82cedc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -692,6 +692,39 @@ config CFI_CLANG_SHADOW If you select this option, the kernel builds a fast look-up table of CFI check functions in loaded modules to reduce overhead. +config ARCH_SUPPORTS_SHADOW_CALL_STACK + bool + help + An architecture should select this if it supports Clang's Shadow + Call Stack, has asm/scs.h, and implements runtime support for shadow + stack switching. + +config SHADOW_CALL_STACK + bool "Clang Shadow Call Stack" + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK + help + This option enables Clang's Shadow Call Stack, which uses a + shadow stack to protect function return addresses from being + overwritten by an attacker. More information can be found from + Clang's documentation: + + https://clang.llvm.org/docs/ShadowCallStack.html + + Note that security guarantees in the kernel differ from the ones + documented for user space. The kernel must store addresses of shadow + stacks used by other tasks and interrupt handlers in memory, which + means an attacker capable reading and writing arbitrary memory may + be able to locate them and hijack control flow by modifying shadow + stacks that are not currently in use. + +config SHADOW_CALL_STACK_VMAP + bool "Use virtually mapped shadow call stacks" + depends on SHADOW_CALL_STACK + help + Use virtually mapped shadow call stacks. Selecting this option + provides better stack exhaustion protection, but increases per-thread + memory consumption as a full page is allocated for each shadow stack. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ef9856bb1994..e8642cc797fd 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -59,3 +59,9 @@ __has_builtin(__builtin_sub_overflow) #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif + +#if __has_feature(shadow_call_stack) +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#else +# define __noscs +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a207f820d3b0..226f4ea0e57c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -158,6 +158,10 @@ struct ftrace_likely_data { #define __malloc #endif +#ifndef __noscs +# define __noscs +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. diff --git a/include/linux/scs.h b/include/linux/scs.h new file mode 100644 index 000000000000..c5572fd770b0 --- /dev/null +++ b/include/linux/scs.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#ifndef _LINUX_SCS_H +#define _LINUX_SCS_H + +#include +#include +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit + * architecture) provided ~40% safety margin on stack usage while keeping + * memory allocation overhead reasonable. + */ +#define SCS_SIZE 1024UL +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO) + +/* + * A random number outside the kernel's virtual address space to mark the + * end of the shadow stack. + */ +#define SCS_END_MAGIC 0xaf0194819b1635f6UL + +#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack) + +static inline void task_set_scs(struct task_struct *tsk, void *s) +{ + task_scs(tsk) = s; +} + +extern void scs_init(void); +extern void scs_task_reset(struct task_struct *tsk); +extern int scs_prepare(struct task_struct *tsk, int node); +extern bool scs_corrupted(struct task_struct *tsk); +extern void scs_release(struct task_struct *tsk); + +#else /* CONFIG_SHADOW_CALL_STACK */ + +#define task_scs(tsk) NULL + +static inline void task_set_scs(struct task_struct *tsk, void *s) {} +static inline void scs_init(void) {} +static inline void scs_task_reset(struct task_struct *tsk) {} +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } +static inline bool scs_corrupted(struct task_struct *tsk) { return false; } +static inline void scs_release(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* _LINUX_SCS_H */ diff --git a/init/init_task.c b/init/init_task.c index 9325fee7dc82..7da82dd8034f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,13 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); +#ifdef CONFIG_SHADOW_CALL_STACK +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data + __aligned(SCS_SIZE) = { + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC +}; +#endif + /* * Initial thread structure. Alignment of this is handled by a special * linker map entry. diff --git a/kernel/Makefile b/kernel/Makefile index c013aafb797e..3d6ca8e9d04c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/fork.c b/kernel/fork.c index 1afd3f0a67fe..07f92f94ec23 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -93,6 +93,7 @@ #include #include #include +#include #include #include @@ -365,6 +366,7 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { cpufreq_task_times_exit(tsk); + scs_release(tsk); #ifndef CONFIG_THREAD_INFO_IN_TASK /* @@ -490,6 +492,8 @@ void __init fork_init(void) NULL, free_vm_stack_cache); #endif + scs_init(); + lockdep_init_task(&init_task); } @@ -545,6 +549,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (err) goto free_stack; + err = scs_prepare(tsk, node); + if (err) + goto free_stack; + #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 449e943e856e..1ef76c9effdb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -5319,6 +5320,7 @@ void init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; + scs_task_reset(idle); kasan_unpoison_task_stack(idle); #ifdef CONFIG_SMP diff --git a/kernel/scs.c b/kernel/scs.c new file mode 100644 index 000000000000..28abed21950c --- /dev/null +++ b/kernel/scs.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void *__scs_base(struct task_struct *tsk) +{ + /* + * To minimize risk the of exposure, architectures may clear a + * task's thread_info::shadow_call_stack while that task is + * running, and only save/restore the active shadow call stack + * pointer when the usual register may be clobbered (e.g. across + * context switches). + * + * The shadow call stack is aligned to SCS_SIZE, and grows + * upwards, so we can mask out the low bits to extract the base + * when the task is not running. + */ + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1)); +} + +static inline unsigned long *scs_magic(void *s) +{ + return (unsigned long *)(s + SCS_SIZE) - 1; +} + +static inline void scs_set_magic(void *s) +{ + *scs_magic(s) = SCS_END_MAGIC; +} + +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP + +/* Matches NR_CACHED_STACKS for VMAP_STACK */ +#define NR_CACHED_SCS 2 +static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]); + +static void *scs_alloc(int node) +{ + int i; + void *s; + + for (i = 0; i < NR_CACHED_SCS; i++) { + s = this_cpu_xchg(scs_cache[i], NULL); + if (s) { + memset(s, 0, SCS_SIZE); + goto out; + } + } + + /* + * We allocate a full page for the shadow stack, which should be + * more than we need. Check the assumption nevertheless. + */ + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE); + + s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, + VMALLOC_START, VMALLOC_END, + GFP_SCS, PAGE_KERNEL, 0, + node, __builtin_return_address(0)); + +out: + if (s) + scs_set_magic(s); + /* TODO: poison for KASAN, unpoison in scs_free */ + + return s; +} + +static void scs_free(void *s) +{ + int i; + + for (i = 0; i < NR_CACHED_SCS; i++) + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) + return; + + vfree_atomic(s); +} + +static int scs_cleanup(unsigned int cpu) +{ + int i; + void **cache = per_cpu_ptr(scs_cache, cpu); + + for (i = 0; i < NR_CACHED_SCS; i++) { + vfree(cache[i]); + cache[i] = NULL; + } + + return 0; +} + +void __init scs_init(void) +{ + WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL, + scs_cleanup) < 0); +} + +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */ + +static struct kmem_cache *scs_cache; + +static inline void *scs_alloc(int node) +{ + void *s; + + s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); + if (s) { + scs_set_magic(s); + /* + * Poison the allocation to catch unintentional accesses to + * the shadow stack when KASAN is enabled. + */ + kasan_poison_object_data(scs_cache, s); + } + + return s; +} + +static inline void scs_free(void *s) +{ + kasan_unpoison_object_data(scs_cache, s); + kmem_cache_free(scs_cache, s); +} + +void __init scs_init(void) +{ + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE, + 0, NULL); + WARN_ON(!scs_cache); +} + +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ + +void scs_task_reset(struct task_struct *tsk) +{ + /* + * Reset the shadow stack to the base address in case the task + * is reused. + */ + task_set_scs(tsk, __scs_base(tsk)); +} + +int scs_prepare(struct task_struct *tsk, int node) +{ + void *s; + + s = scs_alloc(node); + if (!s) + return -ENOMEM; + + task_set_scs(tsk, s); + return 0; +} + +bool scs_corrupted(struct task_struct *tsk) +{ + unsigned long *magic = scs_magic(__scs_base(tsk)); + + return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; +} + +void scs_release(struct task_struct *tsk) +{ + void *s; + + s = __scs_base(tsk); + if (!s) + return; + + WARN_ON(scs_corrupted(tsk)); + + task_set_scs(tsk, NULL); + scs_free(s); +} From ad751f33a5aa26c2b5dcdb4de5842fa385479bdf Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 23 May 2018 13:27:08 -0700 Subject: [PATCH 1416/3715] FROMLIST: scs: add accounting This change adds accounting for the memory allocated for shadow stacks. Bug: 145210207 Change-Id: I51157fe0b23b4cb28bb33c86a5dfe3ac911296a4 (am from https://lore.kernel.org/patchwork/patch/1149055/) Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen --- drivers/base/node.c | 6 ++++++ fs/proc/meminfo.c | 4 ++++ include/linux/mmzone.h | 3 +++ kernel/scs.c | 20 ++++++++++++++++++++ mm/page_alloc.c | 6 ++++++ mm/vmstat.c | 3 +++ 6 files changed, 42 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index 5c39f14d15a5..bde133e7dee2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -114,6 +114,9 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d AnonPages: %8lu kB\n" "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" +#ifdef CONFIG_SHADOW_CALL_STACK + "Node %d ShadowCallStack:%8lu kB\n" +#endif "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" @@ -134,6 +137,9 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(i.sharedram), nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024, +#endif nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index d9d50d3af33b..8c55b69a559c 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -116,6 +116,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_SLAB_UNRECLAIMABLE)); seq_printf(m, "KernelStack: %8lu kB\n", global_zone_page_state(NR_KERNEL_STACK_KB)); +#ifdef CONFIG_SHADOW_CALL_STACK + seq_printf(m, "ShadowCallStack:%8lu kB\n", + global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024); +#endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); #ifdef CONFIG_QUICKLIST diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 71b7a8bc82ea..af41d43cf461 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -142,6 +142,9 @@ enum zone_stat_item { NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_BYTES, /* measured in bytes */ +#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) diff --git a/kernel/scs.c b/kernel/scs.c index 28abed21950c..5245e992c692 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include static inline void *__scs_base(struct task_struct *tsk) @@ -89,6 +90,11 @@ static void scs_free(void *s) vfree_atomic(s); } +static struct page *__scs_page(struct task_struct *tsk) +{ + return vmalloc_to_page(__scs_base(tsk)); +} + static int scs_cleanup(unsigned int cpu) { int i; @@ -135,6 +141,11 @@ static inline void scs_free(void *s) kmem_cache_free(scs_cache, s); } +static struct page *__scs_page(struct task_struct *tsk) +{ + return virt_to_page(__scs_base(tsk)); +} + void __init scs_init(void) { scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE, @@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk) task_set_scs(tsk, __scs_base(tsk)); } +static void scs_account(struct task_struct *tsk, int account) +{ + mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES, + account * SCS_SIZE); +} + int scs_prepare(struct task_struct *tsk, int node) { void *s; @@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node) return -ENOMEM; task_set_scs(tsk, s); + scs_account(tsk, 1); + return 0; } @@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk) WARN_ON(scs_corrupted(tsk)); + scs_account(tsk, -1); task_set_scs(tsk, NULL); scs_free(s); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e35ef3e067d..94575070bd4e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4858,6 +4858,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " managed:%lukB" " mlocked:%lukB" " kernel_stack:%lukB" +#ifdef CONFIG_SHADOW_CALL_STACK + " shadow_call_stack:%lukB" +#endif " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" @@ -4879,6 +4882,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->managed_pages), K(zone_page_state(zone, NR_MLOCK)), zone_page_state(zone, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024, +#endif K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), diff --git a/mm/vmstat.c b/mm/vmstat.c index d775e05f62fe..83d26e8917d0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1046,6 +1046,9 @@ const char * const vmstat_text[] = { "nr_mlock", "nr_page_table_pages", "nr_kernel_stack", +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + "nr_shadow_call_stack_bytes", +#endif "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", From 1c6394a973a34681a43f386c03a9095fa47abe00 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 6 Sep 2018 13:09:06 -0700 Subject: [PATCH 1417/3715] FROMLIST: scs: add support for stack usage debugging Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled, also prints out the highest shadow stack usage per process. Bug: 145210207 Change-Id: I4c085b51e1432e8d52e54126ffd8bf7b6e35b529 (am from https://lore.kernel.org/patchwork/patch/1149056/) Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen --- kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/kernel/scs.c b/kernel/scs.c index 5245e992c692..ad74d13f2c0f 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node) return 0; } +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long scs_used(struct task_struct *tsk) +{ + unsigned long *p = __scs_base(tsk); + unsigned long *end = scs_magic(p); + unsigned long s = (unsigned long)p; + + while (p < end && READ_ONCE_NOCHECK(*p)) + p++; + + return (unsigned long)p - s; +} + +static void scs_check_usage(struct task_struct *tsk) +{ + static DEFINE_SPINLOCK(lock); + static unsigned long highest; + unsigned long used = scs_used(tsk); + + if (used <= highest) + return; + + spin_lock(&lock); + + if (used > highest) { + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", + tsk->comm, task_pid_nr(tsk), used); + highest = used; + } + + spin_unlock(&lock); +} +#else +static inline void scs_check_usage(struct task_struct *tsk) +{ +} +#endif + bool scs_corrupted(struct task_struct *tsk) { unsigned long *magic = scs_magic(__scs_base(tsk)); @@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk) return; WARN_ON(scs_corrupted(tsk)); + scs_check_usage(tsk); scs_account(tsk, -1); task_set_scs(tsk, NULL); From 1425ea0ab86f43e235d9081852165ab8ef003346 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 5 Sep 2018 15:23:18 -0700 Subject: [PATCH 1418/3715] FROMLIST: arm64: disable function graph tracing with SCS The graph tracer hooks returns by modifying frame records on the (regular) stack, but with SCS the return address is taken from the shadow stack, and the value in the frame record has no effect. As we don't currently have a mechanism to determine the corresponding slot on the shadow stack (and to pass this through the ftrace infrastructure), for now let's disable the graph tracer when SCS is enabled. Bug: 145210207 Change-Id: I6fdca3eee60bb8594401920a420cd3c1e23cabce (am from https://lore.kernel.org/patchwork/patch/1149057/) Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a2293b99cb46..fc4359975566 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -95,7 +95,7 @@ config ARM64 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS From fb8319c10d6e6ae5c3b794675a49b5a096208c25 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 2 May 2018 09:41:34 -0700 Subject: [PATCH 1419/3715] FROMLIST: arm64: reserve x18 from general allocation with SCS Reserve the x18 register from general allocation when SCS is enabled, because the compiler uses the register to store the current task's shadow stack pointer. Note that all external kernel modules must also be compiled with -ffixed-x18 if the kernel has SCS enabled. Bug: 145210207 Change-Id: If2315ace9a879b3dd2a85f6ba43eddadc4430595 (am from https://lore.kernel.org/patchwork/patch/1149058/) Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8654cee8c921..79650bfbbfd9 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -74,6 +74,10 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) +ifeq ($(CONFIG_SHADOW_CALL_STACK), y) +KBUILD_CFLAGS += -ffixed-x18 +endif + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ From c89eb7d54c9762353295adae529d155723ce6c21 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 2 May 2018 11:03:32 -0700 Subject: [PATCH 1420/3715] FROMLIST: arm64: preserve x18 when CPU is suspended Don't lose the current task's shadow stack when the CPU is suspended. Bug: 145210207 Change-Id: I8f74f33b4b2c707e25e46bea4fb8a8f5ea1a7036 (am from https://lore.kernel.org/patchwork/patch/1149059/) Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/suspend.h | 2 +- arch/arm64/mm/proc.S | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 8939c87c4dce..0cde2f473971 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 12 +#define NR_CTX_REGS 13 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7f0a622f8964..118597813da7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -65,6 +65,8 @@ ENDPROC(cpu_do_idle) * cpu_do_suspend - save CPU registers context * * x0: virtual address of context pointer + * + * This must be kept in sync with struct cpu_suspend_ctx in . */ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 @@ -89,6 +91,11 @@ alternative_endif stp x8, x9, [x0, #48] stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] + /* + * Save x18 as it may be used as a platform register, e.g. by shadow + * call stack. + */ + str x18, [x0, #96] ret ENDPROC(cpu_do_suspend) @@ -105,6 +112,13 @@ ENTRY(cpu_do_resume) ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] ldp x13, x14, [x0, #80] + /* + * Restore x18, as it may be used as a platform register, and clear + * the buffer to minimize the risk of exposure when used for shadow + * call stack. + */ + ldr x18, [x0, #96] + str xzr, [x0, #96] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 From fb7982c4f4b6825290309953c05de85db42950ae Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 2 May 2018 11:05:53 -0700 Subject: [PATCH 1421/3715] BACKPORT: FROMLIST: arm64: vdso: disable Shadow Call Stack Shadow stacks are only available in the kernel, so disable SCS instrumentation for the vDSO. Bug: 145210207 Change-Id: I6e01b2c7788ba52d3b754b1fbd5bfb908b45741b (am from https://lore.kernel.org/patchwork/patch/1149061/) Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/vdso/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ef3f9d9d4062..f7620d315bc5 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -17,6 +17,8 @@ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) ccflags-y += $(DISABLE_LTO) +CFLAGS_REMOVE_vgettimeofday.o += $(CC_FLAGS_SCS) + # Disable gcov profiling for VDSO code GCOV_PROFILE := n From 06304418de5d9b355b7ed31b698a20cb3fe0d5bc Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 16 Aug 2019 13:09:54 -0700 Subject: [PATCH 1422/3715] FROMLIST: arm64: disable SCS for hypervisor code Filter out CC_FLAGS_SCS for code that runs at a different exception level. Bug: 145210207 Change-Id: Ic93a7333920830f0934c8dd5530082f0ca941777 (am from https://lore.kernel.org/patchwork/patch/1149062/) Suggested-by: Steven Rostedt (VMware) Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Sami Tolvanen --- arch/arm64/kvm/hyp/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index bfa00a9e161d..e41392ce3808 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -31,3 +31,6 @@ GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n + +# remove the SCS flags from all objects in this directory +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) From 98308264140092420b14475efd1d2721546c51d1 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 2 May 2018 11:05:22 -0700 Subject: [PATCH 1423/3715] BACKPORT: FROMLIST: arm64: implement Shadow Call Stack This change implements shadow stack switching, initial SCS set-up, and interrupt shadow stacks for arm64. Bug: 145210207 Change-Id: I3d5b9ec374418b110d1f351e1abd41610cfee597 (am from https://lore.kernel.org/patchwork/patch/1149062/) Reviewed-by: Kees Cook Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 5 ++++ arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++ arch/arm64/include/asm/stacktrace.h | 4 +++ arch/arm64/include/asm/thread_info.h | 3 +++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++-- arch/arm64/kernel/head.S | 9 +++++++ arch/arm64/kernel/irq.c | 2 ++ arch/arm64/kernel/process.c | 2 ++ arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++ arch/arm64/kernel/smp.c | 4 +++ 12 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/scs.h create mode 100644 arch/arm64/kernel/scs.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fc4359975566..883dfb81024a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_THINLTO + select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -734,6 +735,10 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" +# Supported by clang >= 7.0 +config CC_HAVE_SHADOW_CALL_STACK + def_bool y if CLANG_VERSION >= 70000 + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h new file mode 100644 index 000000000000..c50d2b0c6c5f --- /dev/null +++ b/arch/arm64/include/asm/scs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifndef __ASSEMBLY__ + +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +extern void scs_init_irq(void); + +static __always_inline void scs_save(struct task_struct *tsk) +{ + void *s; + + asm volatile("mov %0, x18" : "=r" (s)); + task_set_scs(tsk, s); +} + +static inline void scs_overflow_check(struct task_struct *tsk) +{ + if (unlikely(scs_corrupted(tsk))) + panic("corrupted shadow stack detected inside scheduler\n"); +} + +#else /* CONFIG_SHADOW_CALL_STACK */ + +static inline void scs_init_irq(void) {} +static inline void scs_save(struct task_struct *tsk) {} +static inline void scs_overflow_check(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* __ASSEMBLY __ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 99390755c0c4..b9da0ad58d33 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -38,6 +38,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); +#ifdef CONFIG_SHADOW_CALL_STACK +DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); +#endif + static inline bool on_irq_stack(unsigned long sp) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 527ceeb96441..8b2204ea4f3c 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -43,6 +43,9 @@ struct thread_info { u64 ttbr0; /* saved TTBR0_EL1 */ #endif int preempt_count; /* 0 => preemptable, <0 => bug */ +#ifdef CONFIG_SHADOW_CALL_STACK + void *shadow_call_stack; +#endif }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 714fe90dbf66..f12754d43113 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -55,6 +55,7 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o +arm64-obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o ifeq ($(CONFIG_KVM),y) arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b4a0f4ab770a..636657ac89f0 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -42,6 +42,9 @@ int main(void) DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); +#endif +#ifdef CONFIG_SHADOW_CALL_STACK + DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack)); #endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); BLANK(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 31ebefa68bba..791f52bec437 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -192,6 +192,11 @@ alternative_cb_end 1: mov x29, xzr // fp pointed to user-space + +#ifdef CONFIG_SHADOW_CALL_STACK + ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack + str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS +#endif .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk @@ -281,6 +286,12 @@ alternative_else_nop_endif ct_user_enter .endif +#ifdef CONFIG_SHADOW_CALL_STACK + .if \el == 0 + str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack + .endif +#endif + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR @@ -371,6 +382,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_entry mov x19, sp // preserve the original sp +#ifdef CONFIG_SHADOW_CALL_STACK + mov x20, x18 // preserve the original shadow stack +#endif /* * Compare sp with the base of the task stack. @@ -388,15 +402,24 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 /* switch to the irq stack */ mov sp, x26 + +#ifdef CONFIG_SHADOW_CALL_STACK + /* also switch to the irq shadow stack */ + ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26 +#endif + 9998: .endm /* - * x19 should be preserved between irq_stack_entry and - * irq_stack_exit. + * The callee-saved regs (x19-x29) should be preserved between + * irq_stack_entry and irq_stack_exit. */ .macro irq_stack_exit mov sp, x19 +#ifdef CONFIG_SHADOW_CALL_STACK + mov x18, x20 +#endif .endm /* @@ -1120,6 +1143,11 @@ ENTRY(cpu_switch_to) ldr lr, [x8] mov sp, x9 msr sp_el0, x1 +#ifdef CONFIG_SHADOW_CALL_STACK + str x18, [x0, #TSK_TI_SCS] + ldr x18, [x1, #TSK_TI_SCS] + str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS +#endif ret ENDPROC(cpu_switch_to) NOKPROBE(cpu_switch_to) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e0e5630d0047..a69e4cfd7394 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -335,6 +336,10 @@ __primary_switched: stp xzr, x30, [sp, #-16]! mov x29, sp +#ifdef CONFIG_SHADOW_CALL_STACK + adr_l x18, init_shadow_call_stack // Set shadow call stack +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -613,6 +618,10 @@ __secondary_switched: mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] msr sp_el0, x2 +#ifdef CONFIG_SHADOW_CALL_STACK + ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack + str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS +#endif mov x29, #0 mov x30, #0 b secondary_start_kernel diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index b91abb8f7cd4..706a8d4e0004 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -29,6 +29,7 @@ #include #include #include +#include unsigned long irq_err_count; @@ -91,6 +92,7 @@ static void init_irq_stacks(void) void __init init_IRQ(void) { init_irq_stacks(); + scs_init_irq(); irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 98aeaa970dbb..c4617de80f9c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #ifdef CONFIG_CC_STACKPROTECTOR @@ -470,6 +471,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + scs_overflow_check(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c new file mode 100644 index 000000000000..eaadf5430baa --- /dev/null +++ b/arch/arm64/kernel/scs.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include +#include +#include + +DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); + +#ifndef CONFIG_SHADOW_CALL_STACK_VMAP +DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack) + __aligned(SCS_SIZE); +#endif + +void scs_init_irq(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP + unsigned long *p; + + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, + VMALLOC_START, VMALLOC_END, + GFP_SCS, PAGE_KERNEL, + 0, cpu_to_node(cpu), + __builtin_return_address(0)); + + per_cpu(irq_shadow_call_stack_ptr, cpu) = p; +#else + per_cpu(irq_shadow_call_stack_ptr, cpu) = + per_cpu(irq_shadow_call_stack, cpu); +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ + } +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3671ee825ea5..e9ce1e4b19f0 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -346,6 +347,9 @@ void cpu_die(void) { unsigned int cpu = smp_processor_id(); + /* Save the shadow stack pointer before exiting the idle task */ + scs_save(current); + idle_task_exit(); local_irq_disable(); From 917cc0957cdcf3cc66413bd71484293772ea0c60 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 10 Dec 2019 13:35:29 -0800 Subject: [PATCH 1424/3715] ANDROID: arm64: cuttlefish_defconfig: enable LTO, CFI, and SCS Bug: 145210207 Change-Id: I5d9dd59f9521526b5f5c4e5a44f8f17dd08e6391 Signed-off-by: Sami Tolvanen --- arch/arm64/configs/cuttlefish_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 29fd4486e3f4..89b2521aef19 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -39,6 +39,9 @@ CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_LTO_CLANG=y +CONFIG_CFI_CLANG=y +CONFIG_SHADOW_CALL_STACK=y CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y From f9770a68355c2928cae6c87e7daeb7d0139e8103 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 16 Dec 2019 13:32:11 -0800 Subject: [PATCH 1425/3715] ANDROID: kbuild: disable SCS by default in allmodconfig 4.14 doesn't support checking for clang in Kconfig, so change CONFIG_SHADOW_CALL_STACK into a choice, which defaults to SCS being disabled. Bug: 145297810 Fixes: a7f210693021 ("FROMLIST: add support for Clang's Shadow Call Stack (SCS)") Change-Id: I4673d598b11d92a873401fa08301fabd8ae71a01 Signed-off-by: Sami Tolvanen --- arch/Kconfig | 13 +++++++++++++ arch/arm64/Kconfig | 6 +----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index b7938a82cedc..b27eac589618 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -699,6 +699,17 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK Call Stack, has asm/scs.h, and implements runtime support for shadow stack switching. +choice + prompt "Return-oriented programming (ROP) protection" + default ROP_PROTECTION_NONE + help + This option controls kernel protections against return-oriented + programming (ROP) attacks, which involve overwriting function return + addresses. + +config ROP_PROTECTION_NONE + bool "None" + config SHADOW_CALL_STACK bool "Clang Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK @@ -717,6 +728,8 @@ config SHADOW_CALL_STACK be able to locate them and hijack control flow by modifying shadow stacks that are not currently in use. +endchoice + config SHADOW_CALL_STACK_VMAP bool "Use virtually mapped shadow call stacks" depends on SHADOW_CALL_STACK diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 883dfb81024a..98824f0256ab 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,7 +26,7 @@ config ARM64 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_THINLTO - select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK + select ARCH_SUPPORTS_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -735,10 +735,6 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" -# Supported by clang >= 7.0 -config CC_HAVE_SHADOW_CALL_STACK - def_bool y if CLANG_VERSION >= 70000 - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- From f59edcb963b550675ce3690b2a949c64cf1898a0 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:34 -0700 Subject: [PATCH 1426/3715] UPSTREAM: mm, slab: combine kmalloc_caches and kmalloc_dma_caches Patch series "kmalloc-reclaimable caches", v4. As discussed at LSF/MM [1] here's a patchset that introduces kmalloc-reclaimable caches (more details in the second patch) and uses them for dcache external names. That allows us to repurpose the NR_INDIRECTLY_RECLAIMABLE_BYTES counter later in the series. With patch 3/6, dcache external names are allocated from kmalloc-rcl-* caches, eliminating the need for manual accounting. More importantly, it also ensures the reclaimable kmalloc allocations are grouped in pages separate from the regular kmalloc allocations. The need for proper accounting of dcache external names has shown it's easy for misbehaving process to allocate lots of them, causing premature OOMs. Without the added grouping, it's likely that a similar workload can interleave the dcache external names allocations with regular kmalloc allocations (note: I haven't searched myself for an example of such regular kmalloc allocation, but I would be very surprised if there wasn't some). A pathological case would be e.g. one 64byte regular allocations with 63 external dcache names in a page (64x64=4096), which means the page is not freed even after reclaiming after all dcache names, and the process can thus "steal" the whole page with single 64byte allocation. If other kmalloc users similar to dcache external names become identified, they can also benefit from the new functionality simply by adding __GFP_RECLAIMABLE to the kmalloc calls. Side benefits of the patchset (that could be also merged separately) include removed branch for detecting __GFP_DMA kmalloc(), and shortening kmalloc cache names in /proc/slabinfo output. The latter is potentially an ABI break in case there are tools parsing the names and expecting the values to be in bytes. This is how /proc/slabinfo looks like after booting in virtme: ... kmalloc-rcl-4M 0 0 4194304 1 1024 : tunables 1 1 0 : slabdata 0 0 0 ... kmalloc-rcl-96 7 32 128 32 1 : tunables 120 60 8 : slabdata 1 1 0 kmalloc-rcl-64 25 128 64 64 1 : tunables 120 60 8 : slabdata 2 2 0 kmalloc-rcl-32 0 0 32 124 1 : tunables 120 60 8 : slabdata 0 0 0 kmalloc-4M 0 0 4194304 1 1024 : tunables 1 1 0 : slabdata 0 0 0 kmalloc-2M 0 0 2097152 1 512 : tunables 1 1 0 : slabdata 0 0 0 kmalloc-1M 0 0 1048576 1 256 : tunables 1 1 0 : slabdata 0 0 0 ... /proc/vmstat with renamed nr_indirectly_reclaimable_bytes counter: ... nr_slab_reclaimable 2817 nr_slab_unreclaimable 1781 ... nr_kernel_misc_reclaimable 0 ... /proc/meminfo with new KReclaimable counter: ... Shmem: 564 kB KReclaimable: 11260 kB Slab: 18368 kB SReclaimable: 11260 kB SUnreclaim: 7108 kB KernelStack: 1248 kB ... This patch (of 6): The kmalloc caches currently mainain separate (optional) array kmalloc_dma_caches for __GFP_DMA allocations. There are tests for __GFP_DMA in the allocation hotpaths. We can avoid the branches by combining kmalloc_caches and kmalloc_dma_caches into a single two-dimensional array where the outer dimension is cache "type". This will also allow to add kmalloc-reclaimable caches as a third type. Link: http://lkml.kernel.org/r/20180731090649.16028-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Christoph Lameter Acked-by: Roman Gushchin Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Laura Abbott Cc: Sumit Semwal Cc: Vijayanand Jitta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cc252eae85e09552f9c1e7ac0c3227f835efdf2d) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: I60acb23a8bc9e1c18a470b71de179017e451d869 Signed-off-by: Suren Baghdasaryan --- include/linux/slab.h | 44 ++++++++++++++++++++++++++++++++------------ mm/slab.c | 4 ++-- mm/slab_common.c | 31 ++++++++++++------------------- mm/slub.c | 13 +++++++------ 4 files changed, 53 insertions(+), 39 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index c8f9c967886e..7b20c0597a9c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -267,11 +267,28 @@ static inline const char *__check_heap_object(const void *ptr, #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) -#ifndef CONFIG_SLOB -extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +enum kmalloc_cache_type { + KMALLOC_NORMAL = 0, #ifdef CONFIG_ZONE_DMA -extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; + KMALLOC_DMA, #endif + NR_KMALLOC_TYPES +}; + +#ifndef CONFIG_SLOB +extern struct kmem_cache * +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) +{ + int is_dma = 0; + +#ifdef CONFIG_ZONE_DMA + is_dma = !!(flags & __GFP_DMA); +#endif + + return is_dma; +} /* * Figure out which kmalloc slab an allocation of a certain size @@ -476,18 +493,20 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { +#ifndef CONFIG_SLOB + unsigned int index; +#endif if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); #ifndef CONFIG_SLOB - if (!(flags & GFP_DMA)) { - int index = kmalloc_index(size); + index = kmalloc_index(size); - if (!index) - return ZERO_SIZE_PTR; + if (!index) + return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace(kmalloc_caches[index], - flags, size); - } + return kmem_cache_alloc_trace( + kmalloc_caches[kmalloc_type(flags)][index], + flags, size); #endif } return __kmalloc(size, flags); @@ -517,13 +536,14 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { + size <= KMALLOC_MAX_CACHE_SIZE) { int i = kmalloc_index(size); if (!i) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace(kmalloc_caches[i], + return kmem_cache_alloc_node_trace( + kmalloc_caches[kmalloc_type(flags)][i], flags, node, size); } #endif diff --git a/mm/slab.c b/mm/slab.c index e8b1e6ea211f..5dd23f6fd451 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1280,7 +1280,7 @@ void __init kmem_cache_init(void) * Initialize the caches that provide memory for the kmem_cache_node * structures first. Without this, further allocations will bug. */ - kmalloc_caches[INDEX_NODE] = create_kmalloc_cache( + kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache( kmalloc_info[INDEX_NODE].name, kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); slab_state = PARTIAL_NODE; @@ -1295,7 +1295,7 @@ void __init kmem_cache_init(void) for_each_online_node(nid) { init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); - init_list(kmalloc_caches[INDEX_NODE], + init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE], &init_kmem_cache_node[SIZE_NODE + nid], nid); } } diff --git a/mm/slab_common.c b/mm/slab_common.c index c70bd327329b..49891aaea1fa 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -917,14 +917,10 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, return s; } -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +struct kmem_cache * +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; EXPORT_SYMBOL(kmalloc_caches); -#ifdef CONFIG_ZONE_DMA -struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_dma_caches); -#endif - /* * Conversion table for small slabs sizes / 8 to the index in the * kmalloc array. This is necessary for slabs < 192 since we have non power @@ -984,12 +980,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) index = fls(size - 1); } -#ifdef CONFIG_ZONE_DMA - if (unlikely((flags & GFP_DMA))) - return kmalloc_dma_caches[index]; - -#endif - return kmalloc_caches[index]; + return kmalloc_caches[kmalloc_type(flags)][index]; } /* @@ -1063,7 +1054,8 @@ void __init setup_kmalloc_cache_index_table(void) static void __init new_kmalloc_cache(int idx, unsigned long flags) { - kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, + kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache( + kmalloc_info[idx].name, kmalloc_info[idx].size, flags); } @@ -1075,9 +1067,10 @@ static void __init new_kmalloc_cache(int idx, unsigned long flags) void __init create_kmalloc_caches(unsigned long flags) { int i; + int type = KMALLOC_NORMAL; for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (!kmalloc_caches[i]) + if (!kmalloc_caches[type][i]) new_kmalloc_cache(i, flags); /* @@ -1085,9 +1078,9 @@ void __init create_kmalloc_caches(unsigned long flags) * These have to be created immediately after the * earlier power of two caches */ - if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) + if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6) new_kmalloc_cache(1, flags); - if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) + if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7) new_kmalloc_cache(2, flags); } @@ -1096,7 +1089,7 @@ void __init create_kmalloc_caches(unsigned long flags) #ifdef CONFIG_ZONE_DMA for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[i]; + struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; if (s) { int size = kmalloc_size(i); @@ -1104,8 +1097,8 @@ void __init create_kmalloc_caches(unsigned long flags) "dma-kmalloc-%d", size); BUG_ON(!n); - kmalloc_dma_caches[i] = create_kmalloc_cache(n, - size, SLAB_CACHE_DMA | flags); + kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( + n, size, SLAB_CACHE_DMA | flags); } } #endif diff --git a/mm/slub.c b/mm/slub.c index 68ae0a5528c5..6f4d7d869a07 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4732,6 +4732,7 @@ static int list_locations(struct kmem_cache *s, char *buf, static void __init resiliency_test(void) { u8 *p; + int type = KMALLOC_NORMAL; BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); @@ -4744,7 +4745,7 @@ static void __init resiliency_test(void) pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n", p + 16); - validate_slab_cache(kmalloc_caches[4]); + validate_slab_cache(kmalloc_caches[type][4]); /* Hmmm... The next two are dangerous */ p = kzalloc(32, GFP_KERNEL); @@ -4753,33 +4754,33 @@ static void __init resiliency_test(void) p); pr_err("If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[5]); + validate_slab_cache(kmalloc_caches[type][5]); p = kzalloc(64, GFP_KERNEL); p += 64 + (get_cycles() & 0xff) * sizeof(void *); *p = 0x56; pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", p); pr_err("If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[6]); + validate_slab_cache(kmalloc_caches[type][6]); pr_err("\nB. Corruption after free\n"); p = kzalloc(128, GFP_KERNEL); kfree(p); *p = 0x78; pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[7]); + validate_slab_cache(kmalloc_caches[type][7]); p = kzalloc(256, GFP_KERNEL); kfree(p); p[50] = 0x9a; pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[8]); + validate_slab_cache(kmalloc_caches[type][8]); p = kzalloc(512, GFP_KERNEL); kfree(p); p[512] = 0xab; pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[9]); + validate_slab_cache(kmalloc_caches[type][9]); } #else #ifdef CONFIG_SYSFS From b24663e16206b410839b3d56ded916a0ee7ba8f4 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:38 -0700 Subject: [PATCH 1427/3715] BACKPORT: mm, slab/slub: introduce kmalloc-reclaimable caches Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which indicates they contain objects which can be reclaimed under memory pressure (typically through a shrinker). This makes the slab pages accounted as NR_SLAB_RECLAIMABLE in vmstat, which is reflected also the MemAvailable meminfo counter and in overcommit decisions. The slab pages are also allocated with __GFP_RECLAIMABLE, which is good for anti-fragmentation through grouping pages by mobility. The generic kmalloc-X caches are created without this flag, but sometimes are used also for objects that can be reclaimed, which due to varying size cannot have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag. A prominent example are dcache external names, which prompted the creation of a new, manually managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES in commit f1782c9bc547 ("dcache: account external names as indirectly reclaimable memory"). To better handle this and any other similar cases, this patch introduces SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X. They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among gfp flags. They are added to the kmalloc_caches array as a new type. Allocations with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type cache. This change only applies to SLAB and SLUB, not SLOB. This is fine, since SLOB's target are tiny system and this patch does add some overhead of kmem management objects. Link: http://lkml.kernel.org/r/20180731090649.16028-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Christoph Lameter Acked-by: Roman Gushchin Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Laura Abbott Cc: Matthew Wilcox Cc: Michal Hocko Cc: Sumit Semwal Cc: Vijayanand Jitta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 1291523f2c1d631fea34102fd241fb54a4e8f7a0) Conflicts: mm/slab_common.c (1. replace not yet existing slab_flags_t with unsigned long 2. change %u to %lu in kasprintf to prevent compile warnings) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: Ibe56723da05349d94e3f962b96aa223fc154785e Signed-off-by: Suren Baghdasaryan --- include/linux/slab.h | 16 ++++++++++++++- mm/slab_common.c | 48 ++++++++++++++++++++++++++++---------------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 7b20c0597a9c..116a2b7facfd 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -267,8 +267,13 @@ static inline const char *__check_heap_object(const void *ptr, #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) +/* + * Whenever changing this, take care of that kmalloc_type() and + * create_kmalloc_caches() still work as intended. + */ enum kmalloc_cache_type { KMALLOC_NORMAL = 0, + KMALLOC_RECLAIM, #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif @@ -282,12 +287,21 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) { int is_dma = 0; + int type_dma = 0; + int is_reclaimable; #ifdef CONFIG_ZONE_DMA is_dma = !!(flags & __GFP_DMA); + type_dma = is_dma * KMALLOC_DMA; #endif - return is_dma; + is_reclaimable = !!(flags & __GFP_RECLAIMABLE); + + /* + * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return + * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE + */ + return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM; } /* diff --git a/mm/slab_common.c b/mm/slab_common.c index 49891aaea1fa..7eb713278eaa 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1052,10 +1052,21 @@ void __init setup_kmalloc_cache_index_table(void) } } -static void __init new_kmalloc_cache(int idx, unsigned long flags) +static void __init +new_kmalloc_cache(int idx, int type, unsigned long flags) { - kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache( - kmalloc_info[idx].name, + const char *name; + + if (type == KMALLOC_RECLAIM) { + flags |= SLAB_RECLAIM_ACCOUNT; + name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%lu", + kmalloc_info[idx].size); + BUG_ON(!name); + } else { + name = kmalloc_info[idx].name; + } + + kmalloc_caches[type][idx] = create_kmalloc_cache(name, kmalloc_info[idx].size, flags); } @@ -1066,22 +1077,25 @@ static void __init new_kmalloc_cache(int idx, unsigned long flags) */ void __init create_kmalloc_caches(unsigned long flags) { - int i; - int type = KMALLOC_NORMAL; + int i, type; - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (!kmalloc_caches[type][i]) - new_kmalloc_cache(i, flags); + for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + if (!kmalloc_caches[type][i]) + new_kmalloc_cache(i, type, flags); - /* - * Caches that are not of the two-to-the-power-of size. - * These have to be created immediately after the - * earlier power of two caches - */ - if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6) - new_kmalloc_cache(1, flags); - if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7) - new_kmalloc_cache(2, flags); + /* + * Caches that are not of the two-to-the-power-of size. + * These have to be created immediately after the + * earlier power of two caches + */ + if (KMALLOC_MIN_SIZE <= 32 && i == 6 && + !kmalloc_caches[type][1]) + new_kmalloc_cache(1, type, flags); + if (KMALLOC_MIN_SIZE <= 64 && i == 7 && + !kmalloc_caches[type][2]) + new_kmalloc_cache(2, type, flags); + } } /* Kmalloc array is now usable */ From 5bd16ffb9627d0050de6e143ce8362cf27a54c22 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:41 -0700 Subject: [PATCH 1428/3715] UPSTREAM: dcache: allocate external names from reclaimable kmalloc caches We can use the newly introduced kmalloc-reclaimable-X caches, to allocate external names in dcache, which will take care of the proper accounting automatically, and also improve anti-fragmentation page grouping. This effectively reverts commit f1782c9bc547 ("dcache: account external names as indirectly reclaimable memory") and instead passes __GFP_RECLAIMABLE to kmalloc(). The accounting thus moves from NR_INDIRECTLY_RECLAIMABLE_BYTES to NR_SLAB_RECLAIMABLE, which is also considered in MemAvailable calculation and overcommit decisions. Link: http://lkml.kernel.org/r/20180731090649.16028-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Roman Gushchin Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Laura Abbott Cc: Matthew Wilcox Cc: Michal Hocko Cc: Sumit Semwal Cc: Vijayanand Jitta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 2e03b4bc4ae84fcc0eee00e5ba5d228901d38809) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: Ib395c861580501c754835736bfc03464d26b9aeb Signed-off-by: Suren Baghdasaryan --- fs/dcache.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index a3d9dd74356d..2f01b271023d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -270,24 +270,10 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } -static void __d_free_external_name(struct rcu_head *head) -{ - struct external_name *name = container_of(head, struct external_name, - u.head); - - mod_node_page_state(page_pgdat(virt_to_page(name)), - NR_INDIRECTLY_RECLAIMABLE_BYTES, - -ksize(name)); - - kfree(name); -} - static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - - __d_free_external_name(&external_name(dentry)->u.head); - + kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } @@ -319,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name) struct external_name *p; p = container_of(name->name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) - call_rcu(&p->u.head, __d_free_external_name); + kfree_rcu(p, u.head); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -1615,7 +1601,6 @@ EXPORT_SYMBOL(d_invalidate); struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { - struct external_name *ext = NULL; struct dentry *dentry; char *dname; int err; @@ -1636,13 +1621,15 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); - ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); - if (!ext) { + struct external_name *p = kmalloc(size + name->len, + GFP_KERNEL_ACCOUNT | + __GFP_RECLAIMABLE); + if (!p) { kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&ext->u.count, 1); - dname = ext->name; + atomic_set(&p->u.count, 1); + dname = p->name; } else { dname = dentry->d_iname; } @@ -1682,12 +1669,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } } - if (unlikely(ext)) { - pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); - mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, - ksize(ext)); - } - this_cpu_inc(nr_dentry); return dentry; @@ -2782,7 +2763,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target) dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - call_rcu(&old_name->u.head, __d_free_external_name); + kfree_rcu(old_name, u.head); } static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) From 7ca01d96f44a5b9eeaca5d60f17a273fe0e04904 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:46 -0700 Subject: [PATCH 1429/3715] BACKPORT: mm: rename and change semantics of nr_indirectly_reclaimable_bytes The vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES was introduced by commit eb59254608bc ("mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES") with the goal of accounting objects that can be reclaimed, but cannot be allocated via a SLAB_RECLAIM_ACCOUNT cache. This is now possible via kmalloc() with __GFP_RECLAIMABLE flag, and the dcache external names user is converted. The counter is however still useful for accounting direct page allocations (i.e. not slab) with a shrinker, such as the ION page pool. So keep it, and: - change granularity to pages to be more like other counters; sub-page allocations should be able to use kmalloc - rename the counter to NR_KERNEL_MISC_RECLAIMABLE - expose the counter again in vmstat as "nr_kernel_misc_reclaimable"; we can again remove the check for not printing "hidden" counters Link: http://lkml.kernel.org/r/20180731090649.16028-5-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Christoph Lameter Acked-by: Roman Gushchin Cc: Vijayanand Jitta Cc: Laura Abbott Cc: Sumit Semwal Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b29940c1abd7a4c3abeb926df0a5ec84d6902d47) Conflicts: drivers/staging/android/ion/ion_page_pool.c (1. NR_INDIRECTLY_RECLAIMABLE_BYTES accounting is absent, ignore it since this patch replaces it anyway.) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: I6196eaa1e72f16dbde7a2894dc42435e75ae156c Signed-off-by: Suren Baghdasaryan --- drivers/staging/android/ion/ion_page_pool.c | 5 +++++ include/linux/mmzone.h | 2 +- mm/page_alloc.c | 19 +++++++------------ mm/util.c | 3 +-- mm/vmstat.c | 6 +----- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c index 817849df9de3..1e0a16041280 100644 --- a/drivers/staging/android/ion/ion_page_pool.c +++ b/drivers/staging/android/ion/ion_page_pool.c @@ -50,6 +50,9 @@ static int ion_page_pool_add(struct ion_page_pool *pool, struct page *page) list_add_tail(&page->lru, &pool->low_items); pool->low_count++; } + + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, + 1 << pool->order); mutex_unlock(&pool->mutex); return 0; } @@ -69,6 +72,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high) } list_del(&page->lru); + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->order)); return page; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index af41d43cf461..a12bd7d1609f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,7 +184,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ - NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ + NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 94575070bd4e..4bb46b766afd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4592,6 +4592,7 @@ long si_mem_available(void) unsigned long pagecache; unsigned long wmark_low = 0; unsigned long pages[NR_LRU_LISTS]; + unsigned long reclaimable; struct zone *zone; int lru; @@ -4617,19 +4618,13 @@ long si_mem_available(void) available += pagecache; /* - * Part of the reclaimable slab consists of items that are in use, - * and cannot be freed. Cap this estimate at the low watermark. + * Part of the reclaimable slab and other kernel memory consists of + * items that are in use, and cannot be freed. Cap this estimate at the + * low watermark. */ - available += global_node_page_state(NR_SLAB_RECLAIMABLE) - - min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, - wmark_low); - - /* - * Part of the kernel memory, which can be released under memory - * pressure. - */ - available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >> - PAGE_SHIFT; + reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) + + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); + available += reclaimable - min(reclaimable / 2, wmark_low); if (available < 0) available = 0; diff --git a/mm/util.c b/mm/util.c index 842ba5fb662e..990acbf5c2f8 100644 --- a/mm/util.c +++ b/mm/util.c @@ -639,8 +639,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * Part of the kernel memory, which can be released * under memory pressure. */ - free += global_node_page_state( - NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT; + free += global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); /* * Leave reserved pages. The pages are not for anonymous pages. diff --git a/mm/vmstat.c b/mm/vmstat.c index 83d26e8917d0..cb230b98035c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1094,7 +1094,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "", /* nr_indirectly_reclaimable */ + "nr_kernel_misc_reclaimable", /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1676,10 +1676,6 @@ static int vmstat_show(struct seq_file *m, void *arg) unsigned long *l = arg; unsigned long off = l - (unsigned long *)m->private; - /* Skip hidden vmstat items. */ - if (*vmstat_text[off] == '\0') - return 0; - seq_puts(m, vmstat_text[off]); seq_put_decimal_ull(m, " ", *l); seq_putc(m, '\n'); From 1d58dbc09613138e20f23ad0a6361690b6f0f5e3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:50 -0700 Subject: [PATCH 1430/3715] UPSTREAM: mm, proc: add KReclaimable to /proc/meminfo The vmstat NR_KERNEL_MISC_RECLAIMABLE counter is for kernel non-slab allocations that can be reclaimed via shrinker. In /proc/meminfo, we can show the sum of all reclaimable kernel allocations (including slab) as "KReclaimable". Add the same counter also to per-node meminfo under /sys With this counter, users will have more complete information about kernel memory usage. Non-slab reclaimable pages (currently just the ION allocator) will not be missing from /proc/meminfo, making users wonder where part of their memory went. More precisely, they already appear in MemAvailable, but without the new counter, it's not obvious why the value in MemAvailable doesn't fully correspond with the sum of other counters participating in it. Link: http://lkml.kernel.org/r/20180731090649.16028-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Roman Gushchin Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Laura Abbott Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Sumit Semwal Cc: Vijayanand Jitta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 61f94e18de94f79abaad3bb83549ff78923ac785) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: I646e1a4f1217de902c70466906ca053c6c825185 Signed-off-by: Suren Baghdasaryan --- Documentation/filesystems/proc.txt | 4 ++++ drivers/base/node.c | 19 ++++++++++++------- fs/proc/meminfo.c | 16 ++++++++-------- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 09bc35fbe66b..76bfa25151bf 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -862,6 +862,7 @@ Writeback: 0 kB AnonPages: 861800 kB Mapped: 280372 kB Shmem: 644 kB +KReclaimable: 168048 kB Slab: 284364 kB SReclaimable: 159856 kB SUnreclaim: 124508 kB @@ -925,6 +926,9 @@ AnonHugePages: Non-file backed huge pages mapped into userspace page tables ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated with huge pages ShmemPmdMapped: Shared memory mapped into userspace with huge pages +KReclaimable: Kernel allocations that the kernel will attempt to reclaim + under memory pressure. Includes SReclaimable (below), and other + direct allocations with a shrinker. Slab: in-kernel data structures cache SReclaimable: Part of Slab, that might be reclaimed, such as caches SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure diff --git a/drivers/base/node.c b/drivers/base/node.c index bde133e7dee2..7801b94ca6f2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -67,8 +67,11 @@ static ssize_t node_read_meminfo(struct device *dev, int nid = dev->id; struct pglist_data *pgdat = NODE_DATA(nid); struct sysinfo i; + unsigned long sreclaimable, sunreclaimable; si_meminfo_node(&i, nid); + sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE); + sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE); n = sprintf(buf, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" @@ -121,6 +124,7 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" "Node %d WritebackTmp: %8lu kB\n" + "Node %d KReclaimable: %8lu kB\n" "Node %d Slab: %8lu kB\n" "Node %d SReclaimable: %8lu kB\n" "Node %d SUnreclaim: %8lu kB\n" @@ -144,20 +148,21 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), - nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) + - node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), - nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)), + nid, K(sreclaimable + + node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), + nid, K(sreclaimable + sunreclaimable), + nid, K(sreclaimable), + nid, K(sunreclaimable) #ifdef CONFIG_TRANSPARENT_HUGEPAGE - nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), + , nid, K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * - HPAGE_PMD_NR)); -#else - nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE))); + HPAGE_PMD_NR) #endif + ); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8c55b69a559c..585651de1f5b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -50,6 +50,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) long cached; long available; unsigned long pages[NR_LRU_LISTS]; + unsigned long sreclaimable, sunreclaim; int lru; si_meminfo(&i); @@ -65,6 +66,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) pages[lru] = global_node_page_state(NR_LRU_BASE + lru); available = si_mem_available(); + sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE); + sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE); show_val_kb(m, "MemTotal: ", i.totalram); show_val_kb(m, "MemFree: ", i.freeram); @@ -106,14 +109,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Mapped: ", global_node_page_state(NR_FILE_MAPPED)); show_val_kb(m, "Shmem: ", i.sharedram); - show_val_kb(m, "Slab: ", - global_node_page_state(NR_SLAB_RECLAIMABLE) + - global_node_page_state(NR_SLAB_UNRECLAIMABLE)); - - show_val_kb(m, "SReclaimable: ", - global_node_page_state(NR_SLAB_RECLAIMABLE)); - show_val_kb(m, "SUnreclaim: ", - global_node_page_state(NR_SLAB_UNRECLAIMABLE)); + show_val_kb(m, "KReclaimable: ", sreclaimable + + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE)); + show_val_kb(m, "Slab: ", sreclaimable + sunreclaim); + show_val_kb(m, "SReclaimable: ", sreclaimable); + show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", global_zone_page_state(NR_KERNEL_STACK_KB)); #ifdef CONFIG_SHADOW_CALL_STACK From 9bd7c69e153ecfd80473e9eb4d5ff21eecafeb6d Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:05:55 -0700 Subject: [PATCH 1431/3715] UPSTREAM: mm, slab: shorten kmalloc cache names for large sizes Kmalloc cache names can get quite long for large object sizes, when the sizes are expressed in bytes. Use 'k' and 'M' prefixes to make the names as short as possible e.g. in /proc/slabinfo. This works, as we mostly use power-of-two sizes, with exceptions only below 1k. Example: 'kmalloc-4194304' becomes 'kmalloc-4M' Link: http://lkml.kernel.org/r/20180731090649.16028-7-vbabka@suse.cz Suggested-by: Matthew Wilcox Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Christoph Lameter Acked-by: Roman Gushchin Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Laura Abbott Cc: Michal Hocko Cc: Sumit Semwal Cc: Vijayanand Jitta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f0d77874143df90f9831f30254eb149fc4d76b40) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: Ib7facb09f333ccd7c224a08e59dede9e6a0c1e0c Signed-off-by: Suren Baghdasaryan --- mm/slab_common.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 7eb713278eaa..20da89561fd2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -994,15 +994,15 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = { {"kmalloc-16", 16}, {"kmalloc-32", 32}, {"kmalloc-64", 64}, {"kmalloc-128", 128}, {"kmalloc-256", 256}, {"kmalloc-512", 512}, - {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048}, - {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192}, - {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768}, - {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072}, - {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288}, - {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152}, - {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608}, - {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432}, - {"kmalloc-67108864", 67108864} + {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048}, + {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192}, + {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768}, + {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072}, + {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288}, + {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152}, + {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608}, + {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432}, + {"kmalloc-64M", 67108864} }; /* @@ -1052,6 +1052,21 @@ void __init setup_kmalloc_cache_index_table(void) } } +static const char * +kmalloc_cache_name(const char *prefix, unsigned int size) +{ + + static const char units[3] = "\0kM"; + int idx = 0; + + while (size >= 1024 && (size % 1024 == 0)) { + size /= 1024; + idx++; + } + + return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]); +} + static void __init new_kmalloc_cache(int idx, int type, unsigned long flags) { @@ -1059,7 +1074,7 @@ new_kmalloc_cache(int idx, int type, unsigned long flags) if (type == KMALLOC_RECLAIM) { flags |= SLAB_RECLAIM_ACCOUNT; - name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%lu", + name = kmalloc_cache_name("kmalloc-rcl", kmalloc_info[idx].size); BUG_ON(!name); } else { @@ -1107,8 +1122,7 @@ void __init create_kmalloc_caches(unsigned long flags) if (s) { int size = kmalloc_size(i); - char *n = kasprintf(GFP_NOWAIT, - "dma-kmalloc-%d", size); + const char *n = kmalloc_cache_name("dma-kmalloc", size); BUG_ON(!n); kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( From c8ec8fccac51ad16853d6c719a6cea43d6ea06f8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 28 Dec 2018 00:33:17 -0800 Subject: [PATCH 1432/3715] UPSTREAM: include/linux/slab.h: fix sparse warning in kmalloc_type() Multiple people have reported the following sparse warning: ./include/linux/slab.h:332:43: warning: dubious: x & !y The minimal fix would be to change the logical & to boolean &&, which emits the same code, but Andrew has suggested that the branch-avoiding tricks are maybe not worthwile. David Laight provided a nice comparison of disassembly of multiple variants, which shows that the current version produces a 4 deep dependency chain, and fixing the sparse warning by changing logical and to multiplication emits an IMUL, making it even more expensive. The code as rewritten by this patch yielded the best disassembly, with a single predictable branch for the most common case, and a ternary operator for the rest, which gcc seems to compile without a branch or cmov by itself. The result should be more readable, without a sparse warning and probably also faster for the common case. Link: http://lkml.kernel.org/r/80340595-d7c5-97b9-4f6c-23fa893a91e9@suse.cz Fixes: 1291523f2c1d ("mm, slab/slub: introduce kmalloc-reclaimable caches") Reviewed-by: Andrew Morton Signed-off-by: Vlastimil Babka Reported-by: Bart Van Assche Reported-by: Darryl T. Agostinelli Reported-by: Masahiro Yamada Suggested-by: Andrew Morton Suggested-by: David Laight Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 4e45f712d82c6b7a37e02faf388173ad12ab464d) Bug: 138148041 Test: verify KReclaimable accounting after ION allocation+deallocation Change-Id: I008bbef44dc64e454c8c93a27acf736843d91519 Signed-off-by: Suren Baghdasaryan --- include/linux/slab.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 116a2b7facfd..5921f381f5bc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -286,22 +286,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) { - int is_dma = 0; - int type_dma = 0; - int is_reclaimable; - #ifdef CONFIG_ZONE_DMA - is_dma = !!(flags & __GFP_DMA); - type_dma = is_dma * KMALLOC_DMA; -#endif - - is_reclaimable = !!(flags & __GFP_RECLAIMABLE); + /* + * The most common case is KMALLOC_NORMAL, so test for it + * with a single branch for both flags. + */ + if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0)) + return KMALLOC_NORMAL; /* - * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return - * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE + * At least one of the flags has to be set. If both are, __GFP_DMA + * is more important. */ - return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM; + return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM; +#else + return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL; +#endif } /* From 30b38df5ef833255911749ac580409b5234adca9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 13 Nov 2019 10:45:00 -0800 Subject: [PATCH 1433/3715] ANDROID: staging: android: ion: Expose total heap and pool sizes via sysfs Add sysfs attributes to track ion total heap and pool memory allocations. The following sysfs attributes are added: /sys/kernel/ion/total_heaps_kb /sys/kernel/ion/total_pools_kb Bug: 138148041 Test: adb shell cat /sys/kernel/ion/* Change-Id: If92770dc3389af865c619525f04d3ba0e013b244 Signed-off-by: Suren Baghdasaryan --- Documentation/ABI/testing/sysfs-kernel-ion | 27 ++++++++ drivers/staging/android/ion/ion.c | 68 ++++++++++++++++++++- drivers/staging/android/ion/ion.h | 1 + drivers/staging/android/ion/ion_page_pool.c | 17 ++++++ 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-ion diff --git a/Documentation/ABI/testing/sysfs-kernel-ion b/Documentation/ABI/testing/sysfs-kernel-ion new file mode 100644 index 000000000000..f57f970574ae --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-ion @@ -0,0 +1,27 @@ +What: /sys/kernel/ion +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The /sys/kernel/ion directory contains a snapshot of the + internal state of ION memory heaps and pools. +Users: kernel memory tuning tools + +What: /sys/kernel/ion/total_heaps_kb +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The total_heaps_kb file is read-only and specifies how much + memory in Kb is allocated to ION heaps. + +What: /sys/kernel/ion/total_pools_kb +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The total_pools_kb file is read-only and specifies how much + memory in Kb is allocated to ION pools. diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index dd96ca61a515..4c311b64c50c 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -42,6 +42,7 @@ static struct ion_device *internal_dev; static int heap_id; +static atomic_long_t total_heap_bytes; bool ion_buffer_cached(struct ion_buffer *buffer) { @@ -120,6 +121,7 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap, mutex_lock(&dev->buffer_lock); ion_buffer_add(dev, buffer); mutex_unlock(&dev->buffer_lock); + atomic_long_add(len, &total_heap_bytes); return buffer; err1: @@ -148,6 +150,7 @@ static void _ion_buffer_destroy(struct ion_buffer *buffer) mutex_lock(&dev->buffer_lock); rb_erase(&buffer->node, &dev->buffers); mutex_unlock(&dev->buffer_lock); + atomic_long_sub(buffer->size, &total_heap_bytes); if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) ion_heap_freelist_add(heap, buffer); @@ -589,6 +592,56 @@ void ion_device_add_heap(struct ion_heap *heap) } EXPORT_SYMBOL(ion_device_add_heap); +static ssize_t +total_heaps_kb_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 size_in_bytes = atomic_long_read(&total_heap_bytes); + + return sprintf(buf, "%llu\n", div_u64(size_in_bytes, 1024)); +} + +static ssize_t +total_pools_kb_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 size_in_bytes = ion_page_pool_nr_pages() * PAGE_SIZE; + + return sprintf(buf, "%llu\n", div_u64(size_in_bytes, 1024)); +} + +static struct kobj_attribute total_heaps_kb_attr = + __ATTR_RO(total_heaps_kb); + +static struct kobj_attribute total_pools_kb_attr = + __ATTR_RO(total_pools_kb); + +static struct attribute *ion_device_attrs[] = { + &total_heaps_kb_attr.attr, + &total_pools_kb_attr.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(ion_device); + +static int ion_init_sysfs(void) +{ + struct kobject *ion_kobj; + int ret; + + ion_kobj = kobject_create_and_add("ion", kernel_kobj); + if (!ion_kobj) + return -ENOMEM; + + ret = sysfs_create_groups(ion_kobj, ion_device_groups); + if (ret) { + kobject_put(ion_kobj); + return ret; + } + + return 0; +} + static int ion_device_create(void) { struct ion_device *idev; @@ -605,8 +658,13 @@ static int ion_device_create(void) ret = misc_register(&idev->dev); if (ret) { pr_err("ion: failed to register misc device.\n"); - kfree(idev); - return ret; + goto err_reg; + } + + ret = ion_init_sysfs(); + if (ret) { + pr_err("ion: failed to add sysfs attributes.\n"); + goto err_sysfs; } idev->debug_root = debugfs_create_dir("ion", NULL); @@ -622,5 +680,11 @@ debugfs_done: plist_head_init(&idev->heaps); internal_dev = idev; return 0; + +err_sysfs: + misc_deregister(&idev->dev); +err_reg: + kfree(idev); + return ret; } subsys_initcall(ion_device_create); diff --git a/drivers/staging/android/ion/ion.h b/drivers/staging/android/ion/ion.h index 621e5f7ceacb..91adccb06a4c 100644 --- a/drivers/staging/android/ion/ion.h +++ b/drivers/staging/android/ion/ion.h @@ -343,6 +343,7 @@ struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order, void ion_page_pool_destroy(struct ion_page_pool *pool); struct page *ion_page_pool_alloc(struct ion_page_pool *pool); void ion_page_pool_free(struct ion_page_pool *pool, struct page *page); +long ion_page_pool_nr_pages(void); /** ion_page_pool_shrink - shrinks the size of the memory cached in the pool * @pool: the pool diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c index 1e0a16041280..b5d8f71ba6b7 100644 --- a/drivers/staging/android/ion/ion_page_pool.c +++ b/drivers/staging/android/ion/ion_page_pool.c @@ -25,6 +25,13 @@ #include "ion.h" +/* + * We avoid atomic_long_t to minimize cache flushes at the cost of possible + * race which would result in a small accounting inaccuracy that we can + * tolerate. + */ +static long nr_total_pages; + static void *ion_page_pool_alloc_pages(struct ion_page_pool *pool) { struct page *page = alloc_pages(pool->gfp_mask, pool->order); @@ -51,6 +58,7 @@ static int ion_page_pool_add(struct ion_page_pool *pool, struct page *page) pool->low_count++; } + nr_total_pages += 1 << pool->order; mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, 1 << pool->order); mutex_unlock(&pool->mutex); @@ -72,6 +80,7 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high) } list_del(&page->lru); + nr_total_pages -= 1 << pool->order; mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, -(1 << pool->order)); return page; @@ -117,6 +126,14 @@ static int ion_page_pool_total(struct ion_page_pool *pool, bool high) return count << pool->order; } +long ion_page_pool_nr_pages(void) +{ + /* Correct possible overflow caused by racing writes */ + if (nr_total_pages < 0) + nr_total_pages = 0; + return nr_total_pages; +} + int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask, int nr_to_scan) { From e54ad58c754108d819a5a1c3586f1825351bee30 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 17 Dec 2019 09:51:12 -0800 Subject: [PATCH 1434/3715] ANDROID: staging: android: ion: Fix build when CONFIG_ION_SYSTEM_HEAP=n Fixes: 30b38df5ef83 ("ANDROID: staging: android: ion: Expose total heap and pool sizes via sysfs") When CONFIG_ION_SYSTEM_HEAP not set ion_page_pool_nr_pages symbol will not be compiled. Fix this by providing an implementation for this configuration. Bug: 138148041 Test: build with CONFIG_ION_SYSTEM_HEAP=n Signed-off-by: Suren Baghdasaryan Change-Id: I37ccdd4ecc97e9c7e10728834a56b422ac18c3c7 --- drivers/staging/android/ion/ion.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/android/ion/ion.h b/drivers/staging/android/ion/ion.h index 91adccb06a4c..62853c52dc07 100644 --- a/drivers/staging/android/ion/ion.h +++ b/drivers/staging/android/ion/ion.h @@ -343,7 +343,12 @@ struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order, void ion_page_pool_destroy(struct ion_page_pool *pool); struct page *ion_page_pool_alloc(struct ion_page_pool *pool); void ion_page_pool_free(struct ion_page_pool *pool, struct page *page); + +#ifdef CONFIG_ION_SYSTEM_HEAP long ion_page_pool_nr_pages(void); +#else +static inline long ion_page_pool_nr_pages(void) { return 0; } +#endif /** ion_page_pool_shrink - shrinks the size of the memory cached in the pool * @pool: the pool From 0d8b2921af273b9545e16ad21375fabcb647c56e Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 13 Sep 2019 19:08:11 -0500 Subject: [PATCH 1435/3715] rsi: release skb if rsi_prepare_beacon fails commit d563131ef23cbc756026f839a82598c8445bc45f upstream. In rsi_send_beacon, if rsi_prepare_beacon fails the allocated skb should be released. Signed-off-by: Navid Emamdoost Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index f7b550f900c4..234e41e1cb57 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1576,6 +1576,7 @@ static int rsi_send_beacon(struct rsi_common *common) skb_pull(skb, (64 - dword_align_bytes)); if (rsi_prepare_beacon(common, skb)) { rsi_dbg(ERR_ZONE, "Failed to prepare beacon\n"); + dev_kfree_skb(skb); return -EINVAL; } skb_queue_tail(&common->tx_queue[MGMT_BEACON_Q], skb); From 4f3836c8aee27c43fd26af7d05e214ea4fe1d67e Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 25 Sep 2019 15:12:29 +0100 Subject: [PATCH 1436/3715] arm64: tegra: Fix 'active-low' warning for Jetson TX1 regulator commit 1e5e929c009559bd7e898ac8e17a5d01037cb057 upstream. Commit 34993594181d ("arm64: tegra: Enable HDMI on Jetson TX1") added a regulator for HDMI on the Jetson TX1 platform. This regulator has an active high enable, but the GPIO specifier for enabling the regulator incorrectly defines it as active-low. This causes the following warning to occur on boot ... WARNING KERN regulator@10 GPIO handle specifies active low - ignored The fixed-regulator binding does not use the active-low flag from the gpio specifier and purely relies of the presence of the 'enable-active-high' property to determine if it is active high or low (if this property is omitted). Fix this warning by setting the GPIO to active-high in the GPIO specifier which aligns with the presense of the 'enable-active-high' property. Fixes: 34993594181d ("arm64: tegra: Enable HDMI on Jetson TX1") Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index d67ef4319f3b..97f31bc4fa1e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1584,7 +1584,7 @@ regulator-name = "VDD_HDMI_5V0"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&exp1 12 GPIO_ACTIVE_LOW>; + gpio = <&exp1 12 GPIO_ACTIVE_HIGH>; enable-active-high; vin-supply = <&vdd_5v0_sys>; }; From ec85bb89cc74a60c1afb47f537f9613b795a184a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 10 Aug 2019 10:42:48 +0200 Subject: [PATCH 1437/3715] usb: gadget: u_serial: add missing port entry locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit daf82bd24e308c5a83758047aff1bd81edda4f11 upstream. gserial_alloc_line() misses locking (for a release barrier) while resetting port entry on TTY allocation failure. Fix this. Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław Reviewed-by: Greg Kroah-Hartman Tested-by: Ladislav Michl Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 4176216d54be..520ace49f91d 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1392,8 +1392,10 @@ int gserial_alloc_line(unsigned char *line_num) __func__, port_num, PTR_ERR(tty_dev)); ret = PTR_ERR(tty_dev); + mutex_lock(&ports[port_num].lock); port = ports[port_num].port; ports[port_num].port = NULL; + mutex_unlock(&ports[port_num].lock); gserial_free_port(port); goto err; } From d3e83b6532a008200b46c9e7ff04b767d6b34d1c Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 5 Nov 2019 05:51:10 +0000 Subject: [PATCH 1438/3715] tty: serial: fsl_lpuart: use the sg count from dma_map_sg commit 487ee861de176090b055eba5b252b56a3b9973d6 upstream. The dmaengine_prep_slave_sg needs to use sg count returned by dma_map_sg, not use sport->dma_tx_nents, because the return value of dma_map_sg is not always same with "nents". When enabling iommu for lpuart + edma, iommu framework may concatenate two sgs into one. Fixes: 6250cc30c4c4e ("tty: serial: fsl_lpuart: Use scatter/gather DMA for Tx") Cc: Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1572932977-17866-1-git-send-email-peng.fan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 32a473f9d1d3..fb2dcb3f8591 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -380,8 +380,8 @@ static void lpuart_dma_tx(struct lpuart_port *sport) } sport->dma_tx_desc = dmaengine_prep_slave_sg(sport->dma_tx_chan, sgl, - sport->dma_tx_nents, - DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); + ret, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT); if (!sport->dma_tx_desc) { dma_unmap_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE); dev_err(dev, "Cannot prepare TX slave DMA!\n"); From 358aba9b3ba4264060598559043e99bd621f263d Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 21 Oct 2019 08:46:16 -0700 Subject: [PATCH 1439/3715] tty: serial: msm_serial: Fix flow control commit b027ce258369cbfa88401a691c23dad01deb9f9b upstream. hci_qca interfaces to the wcn3990 via a uart_dm on the msm8998 mtp and Lenovo Miix 630 laptop. As part of initializing the wcn3990, hci_qca disables flow, configures the uart baudrate, and then reenables flow - at which point an event is expected to be received over the uart from the wcn3990. It is observed that this event comes after the baudrate change but before hci_qca re-enables flow. This is unexpected, and is a result of msm_reset() being broken. According to the uart_dm hardware documentation, it is recommended that automatic hardware flow control be enabled by setting RX_RDY_CTL. Auto hw flow control will manage RFR based on the configured watermark. When there is space to receive data, the hw will assert RFR. When the watermark is hit, the hw will de-assert RFR. The hardware documentation indicates that RFR can me manually managed via CR when RX_RDY_CTL is not set. SET_RFR asserts RFR, and RESET_RFR de-asserts RFR. msm_reset() is broken because after resetting the hardware, it unconditionally asserts RFR via SET_RFR. This enables flow regardless of the current configuration, and would undo a previous flow disable operation. It should instead de-assert RFR via RESET_RFR to block flow until the hardware is reconfigured. msm_serial should rely on the client to specify that flow should be enabled, either via mctrl() or the termios structure, and only assert RFR in response to those triggers. Fixes: 04896a77a97b ("msm_serial: serial driver for MSM7K onboard serial peripheral.") Signed-off-by: Jeffrey Hugo Reviewed-by: Bjorn Andersson Cc: stable Reviewed-by: Andy Gross Link: https://lore.kernel.org/r/20191021154616.25457-1-jeffrey.l.hugo@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 0e0ccc132ab0..e937fb189034 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -988,6 +988,7 @@ static unsigned int msm_get_mctrl(struct uart_port *port) static void msm_reset(struct uart_port *port) { struct msm_port *msm_port = UART_TO_MSM(port); + unsigned int mr; /* reset everything */ msm_write(port, UART_CR_CMD_RESET_RX, UART_CR); @@ -995,7 +996,10 @@ static void msm_reset(struct uart_port *port) msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR); msm_write(port, UART_CR_CMD_RESET_BREAK_INT, UART_CR); msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR); - msm_write(port, UART_CR_CMD_SET_RFR, UART_CR); + msm_write(port, UART_CR_CMD_RESET_RFR, UART_CR); + mr = msm_read(port, UART_MR1); + mr &= ~UART_MR1_RX_RDY_CTL; + msm_write(port, mr, UART_MR1); /* Disable DM modes */ if (msm_port->is_uartdm) From 8c6aed4fc2387cec8ffcdb796c3687ef82727984 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 18 Nov 2019 10:25:47 +0100 Subject: [PATCH 1440/3715] serial: pl011: Fix DMA ->flush_buffer() commit f6a196477184b99a31d16366a8e826558aa11f6d upstream. PL011's ->flush_buffer() implementation releases and reacquires the port lock. Due to a race condition here, data can end up being added to the circular buffer but neither being discarded nor being sent out. This leads to, for example, tcdrain(2) waiting indefinitely. Process A Process B uart_flush_buffer() - acquire lock - circ_clear - pl011_flush_buffer() -- release lock -- dmaengine_terminate_all() uart_write() - acquire lock - add chars to circ buffer - start_tx() -- start DMA - release lock -- acquire lock -- turn off DMA -- release lock // Data in circ buffer but DMA is off According to the comment in the code, the releasing of the lock around dmaengine_terminate_all() is to avoid a deadlock with the DMA engine callback. However, since the time this code was written, the DMA engine API documentation seems to have been clarified to say that dmaengine_terminate_all() (in the identically implemented but differently named dmaengine_terminate_async() variant) does not wait for any running complete callback to be completed and can even be called from a complete callback. So there is no possibility of deadlock if the DMA engine driver implements this API correctly. So we should be able to just remove this release and reacquire of the lock to prevent the aforementioned race condition. Signed-off-by: Vincent Whitchurch Cc: stable Link: https://lore.kernel.org/r/20191118092547.32135-1-vincent.whitchurch@axis.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 4a4a9f33715c..637f72fb6427 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -829,10 +829,8 @@ __acquires(&uap->port.lock) if (!uap->using_tx_dma) return; - /* Avoid deadlock with the DMA engine callback */ - spin_unlock(&uap->port.lock); - dmaengine_terminate_all(uap->dmatx.chan); - spin_lock(&uap->port.lock); + dmaengine_terminate_async(uap->dmatx.chan); + if (uap->dmatx.queued) { dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, DMA_TO_DEVICE); From 79e08aef5797d2ec18e538f57ae2acd20b6c9b7b Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Wed, 20 Nov 2019 23:18:53 +0800 Subject: [PATCH 1441/3715] serial: serial_core: Perform NULL checks for break_ctl ops commit 7d73170e1c282576419f8b50a771f1fcd2b81a94 upstream. Doing fuzz test on sbsa uart device, causes a kernel crash due to NULL pointer dereference: ------------[ cut here ]------------ Unable to handle kernel paging request at virtual address fffffffffffffffc pgd = ffffffe331723000 [fffffffffffffffc] *pgd=0000002333595003, *pud=0000002333595003, *pmd=00000 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: ping(O) jffs2 rtos_snapshot(O) pramdisk(O) hisi_sfc(O) Drv_Nandc_K(O) Drv_SysCtl_K(O) Drv_SysClk_K(O) bsp_reg(O) hns3(O) hns3_uio_enet(O) hclgevf(O) hclge(O) hnae3(O) mdio_factory(O) mdio_registry(O) mdio_dev(O) mdio(O) hns3_info(O) rtos_kbox_panic(O) uart_suspend(O) rsm(O) stp llc tunnel4 xt_tcpudp ipt_REJECT nf_reject_ipv4 iptable_filter ip_tables x_tables sd_mod xhci_plat_hcd xhci_pci xhci_hcd usbmon usbhid usb_storage ohci_platform ohci_pci ohci_hcd hid_generic hid ehci_platform ehci_pci ehci_hcd vfat fat usbcore usb_common scsi_mod yaffs2multi(O) ext4 jbd2 ext2 mbcache ofpart i2c_dev i2c_core uio ubi nand nand_ecc nand_ids cfi_cmdset_0002 cfi_cmdset_0001 cfi_probe gen_probe cmdlinepart chipreg mtdblock mtd_blkdevs mtd nfsd auth_rpcgss oid_registry nfsv3 nfs nfs_acl lockd sunrpc grace autofs4 CPU: 2 PID: 2385 Comm: tty_fuzz_test Tainted: G O 4.4.193 #1 task: ffffffe32b23f110 task.stack: ffffffe32bda4000 PC is at uart_break_ctl+0x44/0x84 LR is at uart_break_ctl+0x34/0x84 pc : [] lr : [] pstate: 80000005 sp : ffffffe32bda7cc0 x29: ffffffe32bda7cc0 x28: ffffffe32b23f110 x27: ffffff8393402000 x26: 0000000000000000 x25: ffffffe32b233f40 x24: ffffffc07a8ec680 x23: 0000000000005425 x22: 00000000ffffffff x21: ffffffe33ed73c98 x20: 0000000000000000 x19: ffffffe33ed94168 x18: 0000000000000004 x17: 0000007f92ae9d30 x16: ffffff8392fa6064 x15: 0000000000000010 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000020 x10: 0000007ffdac1708 x9 : 0000000000000078 x8 : 000000000000001d x7 : 0000000052a64887 x6 : ffffffe32bda7e08 x5 : ffffffe32b23c000 x4 : 0000005fbc5b0000 x3 : ffffff83938d5018 x2 : 0000000000000080 x1 : ffffffe32b23c040 x0 : ffffff83934428f8 virtual start addr offset is 38ac00000 module base offset is 2cd4cf1000 linear region base offset is : 0 Process tty_fuzz_test (pid: 2385, stack limit = 0xffffffe32bda4000) Stack: (0xffffffe32bda7cc0 to 0xffffffe32bda8000) 7cc0: ffffffe32bda7cf0 ffffff8393177718 ffffffc07a8ec680 ffffff8393196054 7ce0: 000000001739f2e0 0000007ffdac1978 ffffffe32bda7d20 ffffff8393179a1c 7d00: 0000000000000000 ffffff8393c0a000 ffffffc07a8ec680 cb88537fdc8ba600 7d20: ffffffe32bda7df0 ffffff8392fa5a40 ffffff8393c0a000 0000000000005425 7d40: 0000007ffdac1978 ffffffe32b233f40 ffffff8393178dcc 0000000000000003 7d60: 000000000000011d 000000000000001d ffffffe32b23f110 000000000000029e 7d80: ffffffe34fe8d5d0 0000000000000000 ffffffe32bda7e14 cb88537fdc8ba600 7da0: ffffffe32bda7e30 ffffff8393042cfc ffffff8393c41720 ffffff8393c46410 7dc0: ffffff839304fa68 ffffffe32b233f40 0000000000005425 0000007ffdac1978 7de0: 000000000000011d cb88537fdc8ba600 ffffffe32bda7e70 ffffff8392fa60cc 7e00: 0000000000000000 ffffffe32b233f40 ffffffe32b233f40 0000000000000003 7e20: 0000000000005425 0000007ffdac1978 ffffffe32bda7e70 ffffff8392fa60b0 7e40: 0000000000000280 ffffffe32b233f40 ffffffe32b233f40 0000000000000003 7e60: 0000000000005425 cb88537fdc8ba600 0000000000000000 ffffff8392e02e78 7e80: 0000000000000280 0000005fbc5b0000 ffffffffffffffff 0000007f92ae9d3c 7ea0: 0000000060000000 0000000000000015 0000000000000003 0000000000005425 7ec0: 0000007ffdac1978 0000000000000000 00000000a54c910e 0000007f92b95014 7ee0: 0000007f92b95090 0000000052a64887 000000000000001d 0000000000000078 7f00: 0000007ffdac1708 0000000000000020 0000000000000000 0000000000000000 7f20: 0000000000000000 0000000000000010 000000556acf0090 0000007f92ae9d30 7f40: 0000000000000004 000000556acdef10 0000000000000000 000000556acdebd0 7f60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 7f80: 0000000000000000 0000000000000000 0000000000000000 0000007ffdac1840 7fa0: 000000556acdedcc 0000007ffdac1840 0000007f92ae9d3c 0000000060000000 7fc0: 0000000000000000 0000000000000000 0000000000000003 000000000000001d 7fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 Call trace: Exception stack(0xffffffe32bda7ab0 to 0xffffffe32bda7bf0) 7aa0: 0000000000001000 0000007fffffffff 7ac0: ffffffe32bda7cc0 ffffff8393196098 0000000080000005 0000000000000025 7ae0: ffffffe32b233f40 ffffff83930d777c ffffffe32bda7b30 ffffff83930d777c 7b00: ffffffe32bda7be0 ffffff83938d5000 ffffffe32bda7be0 ffffffe32bda7c20 7b20: ffffffe32bda7b60 ffffff83930d777c ffffffe32bda7c10 ffffff83938d5000 7b40: ffffffe32bda7c10 ffffffe32bda7c50 ffffff8393c0a000 ffffffe32b23f110 7b60: ffffffe32bda7b70 ffffff8392e09df4 ffffffe32bda7bb0 cb88537fdc8ba600 7b80: ffffff83934428f8 ffffffe32b23c040 0000000000000080 ffffff83938d5018 7ba0: 0000005fbc5b0000 ffffffe32b23c000 ffffffe32bda7e08 0000000052a64887 7bc0: 000000000000001d 0000000000000078 0000007ffdac1708 0000000000000020 7be0: 0000000000000000 0000000000000000 [] uart_break_ctl+0x44/0x84 [] send_break+0xa0/0x114 [] tty_ioctl+0xc50/0xe84 [] do_vfs_ioctl+0xc4/0x6e8 [] SyS_ioctl+0x68/0x9c [] __sys_trace_return+0x0/0x4 Code: b9410ea0 34000160 f9408aa0 f9402814 (b85fc280) ---[ end trace 8606094f1960c5e0 ]--- Kernel panic - not syncing: Fatal exception Fix this problem by adding NULL checks prior to calling break_ctl ops. Signed-off-by: Jiangfeng Xiao Cc: stable Link: https://lore.kernel.org/r/1574263133-28259-1-git-send-email-xiaojiangfeng@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 17e2311f7b00..38bb8f85e88d 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1125,7 +1125,7 @@ static int uart_break_ctl(struct tty_struct *tty, int break_state) if (!uport) goto out; - if (uport->type != PORT_UNKNOWN) + if (uport->type != PORT_UNKNOWN && uport->ops->break_ctl) uport->ops->break_ctl(uport, break_state); ret = 0; out: From 5cbcf71375eaaeef9c127407d34a63682cd1bd12 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 18 Nov 2019 10:48:33 +0800 Subject: [PATCH 1442/3715] serial: ifx6x60: add missed pm_runtime_disable commit 50b2b571c5f3df721fc81bf9a12c521dfbe019ba upstream. The driver forgets to call pm_runtime_disable in remove. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Cc: stable Link: https://lore.kernel.org/r/20191118024833.21587-1-hslester96@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ifx6x60.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index f190a84a0246..d54ebe6b1d50 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -1245,6 +1245,9 @@ static int ifx_spi_spi_remove(struct spi_device *spi) struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); /* stop activity */ tasklet_kill(&ifx_dev->io_work_tasklet); + + pm_runtime_disable(&spi->dev); + /* free irq */ free_irq(gpio_to_irq(ifx_dev->gpio.reset_out), ifx_dev); free_irq(gpio_to_irq(ifx_dev->gpio.srdy), ifx_dev); From 4404070f6a8cf2c86c76176e4b7221fa6edd5368 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Oct 2019 00:03:11 -0400 Subject: [PATCH 1443/3715] autofs: fix a leak in autofs_expire_indirect() [ Upstream commit 03ad0d703df75c43f78bd72e16124b5b94a95188 ] if the second call of should_expire() in there ends up grabbing and returning a new reference to dentry, we need to drop it before continuing. Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/autofs4/expire.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 141f9bc213a3..94a0017c923b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -472,9 +472,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, */ flags &= ~AUTOFS_EXP_LEAVES; found = should_expire(expired, mnt, timeout, how); - if (!found || found != expired) - /* Something has changed, continue */ + if (found != expired) { // something has changed, continue + dput(found); goto next; + } if (expired != dentry) dput(dentry); From 223767740e15a0bd5c4c91baa9ffa21b92653ae1 Mon Sep 17 00:00:00 2001 From: Sirong Wang Date: Fri, 1 Nov 2019 10:33:29 +0800 Subject: [PATCH 1444/3715] RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN [ Upstream commit 531eb45b3da4267fc2a64233ba256c8ffb02edd2 ] Size of pointer to buf field of struct hns_roce_hem_chunk should be considered when calculating HNS_ROCE_HEM_CHUNK_LEN, or sg table size will be larger than expected when allocating hem. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/1572575610-52530-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Sirong Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 435748858252..8e8917ebb013 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -52,7 +52,7 @@ enum { #define HNS_ROCE_HEM_CHUNK_LEN \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ - (sizeof(struct scatterlist))) + (sizeof(struct scatterlist) + sizeof(void *))) enum { HNS_ROCE_HEM_PAGE_SHIFT = 12, From cb8ce68f3037f911cf1b51b37ad8787f7c1cbcd7 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Thu, 7 Nov 2019 13:51:47 +0200 Subject: [PATCH 1445/3715] iwlwifi: pcie: don't consider IV len in A-MSDU [ Upstream commit cb1a4badf59275eb7221dcec621e8154917eabd1 ] From gen2 PN is totally offloaded to hardware (also the space for the IV isn't part of the skb). As you can see in mvm/mac80211.c:3545, the MAC for cipher types CCMP/GCMP doesn't set IEEE80211_KEY_FLAG_PUT_IV_SPACE for gen2 NICs. This causes all the AMSDU data to be corrupted with cipher enabled. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 6f45c8148b27..bbb39d6ec2ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -232,27 +232,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - u16 length, iv_len, amsdu_pad; + u16 length, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; struct page **page_ptr; struct tso_t tso; - /* if the packet is protected, then it must be CCMP or GCMP */ - iv_len = ieee80211_has_protected(hdr->frame_control) ? - IEEE80211_CCMP_HDR_LEN : 0; - trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, start_len, 0); ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); - total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len; + total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; amsdu_pad = 0; /* total amount of header we may need for this A-MSDU */ hdr_room = DIV_ROUND_UP(total_len, mss) * - (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; + (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ hdr_page = get_page_hdr(trans, hdr_room); @@ -263,14 +259,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, start_hdr = hdr_page->pos; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); *page_ptr = hdr_page->page; - memcpy(hdr_page->pos, skb->data + hdr_len, iv_len); - hdr_page->pos += iv_len; /* - * Pull the ieee80211 header + IV to be able to use TSO core, + * Pull the ieee80211 header to be able to use TSO core, * we will restore it for the tx_status flow. */ - skb_pull(skb, hdr_len + iv_len); + skb_pull(skb, hdr_len); /* * Remove the length of all the headers that we don't actually @@ -348,8 +342,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, } } - /* re -add the WiFi header and IV */ - skb_push(skb, hdr_len + iv_len); + /* re -add the WiFi header */ + skb_push(skb, hdr_len); return 0; From 0d29e9eed56cadb805bb2f2432cda97e6621f17c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Nov 2019 22:08:29 -0500 Subject: [PATCH 1446/3715] exportfs_decode_fh(): negative pinned may become positive without the parent locked [ Upstream commit a2ece088882666e1dc7113744ac912eb161e3f87 ] Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exportfs/expfs.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c22cc9d2a5c9..a561ae17cf43 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -508,26 +508,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - inode_lock(target_dir->d_inode); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - inode_unlock(target_dir->d_inode); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + inode_lock(target_dir->d_inode); + nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } + inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. From 0eda2827797d047ad79ee5d9568531bed7950308 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Nov 2019 13:11:41 -0400 Subject: [PATCH 1447/3715] audit_get_nd(): don't unlock parent too early [ Upstream commit 69924b89687a2923e88cc42144aea27868913d0e ] if the child has been negative and just went positive under us, we want coherent d_is_positive() and ->d_inode. Don't unlock the parent until we'd done that work... Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 4a98f6e314a9..35f1d706bd5b 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -365,12 +365,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; } From 7a9b522fa65d4a25754b3a4329f76d132ffab63d Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sun, 10 Nov 2019 17:19:15 +0100 Subject: [PATCH 1448/3715] NFC: nxp-nci: Fix NULL pointer dereference after I2C communication error [ Upstream commit a71a29f50de1ef97ab55c151a1598eb12dde379d ] I2C communication errors (-EREMOTEIO) during the IRQ handler of nxp-nci result in a NULL pointer dereference at the moment: BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 355 Comm: irq/137-nxp-nci Not tainted 5.4.0-rc6 #1 RIP: 0010:skb_queue_tail+0x25/0x50 Call Trace: nci_recv_frame+0x36/0x90 [nci] nxp_nci_i2c_irq_thread_fn+0xd1/0x285 [nxp_nci_i2c] ? preempt_count_add+0x68/0xa0 ? irq_forced_thread_fn+0x80/0x80 irq_thread_fn+0x20/0x60 irq_thread+0xee/0x180 ? wake_threads_waitq+0x30/0x30 kthread+0xfb/0x130 ? irq_thread_check_affinity+0xd0/0xd0 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x40 Afterward the kernel must be rebooted to work properly again. This happens because it attempts to call nci_recv_frame() with skb == NULL. However, unlike nxp_nci_fw_recv_frame(), nci_recv_frame() does not have any NULL checks for skb, causing the NULL pointer dereference. Change the code to call only nxp_nci_fw_recv_frame() in case of an error. Make sure to log it so it is obvious that a communication error occurred. The error above then becomes: nxp-nci_i2c i2c-NXP1001:00: NFC: Read failed with error -121 nci: __nci_request: wait_for_completion_interruptible_timeout failed 0 nxp-nci_i2c i2c-NXP1001:00: NFC: Read failed with error -121 Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Signed-off-by: Stephan Gerhold Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/nxp-nci/i2c.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 198585bbc771..d9492cffd00e 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -236,8 +236,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id) if (r == -EREMOTEIO) { phy->hard_fault = r; - skb = NULL; - } else if (r < 0) { + if (info->mode == NXP_NCI_MODE_FW) + nxp_nci_fw_recv_frame(phy->ndev, NULL); + } + if (r < 0) { nfc_err(&client->dev, "Read failed with error %d\n", r); goto exit_irq_handled; } From 3bbb8eec91fe6939bce32c7658f5d8f4c0442659 Mon Sep 17 00:00:00 2001 From: Xiaodong Xu Date: Mon, 11 Nov 2019 15:05:46 -0800 Subject: [PATCH 1449/3715] xfrm: release device reference for invalid state [ Upstream commit 4944a4b1077f74d89073624bd286219d2fcbfce3 ] An ESP packet could be decrypted in async mode if the input handler for this packet returns -EINPROGRESS in xfrm_input(). At this moment the device reference in skb is held. Later xfrm_input() will be invoked again to resume the processing. If the transform state is still valid it would continue to release the device reference and there won't be a problem; however if the transform state is not valid when async resumption happens, the packet will be dropped while the device reference is still being held. When the device is deleted for some reason and the reference to this device is not properly released, the kernel will keep logging like: unregister_netdevice: waiting for ppp2 to become free. Usage count = 1 The issue is observed when running IPsec traffic over a PPPoE device based on a bridge interface. By terminating the PPPoE connection on the server end for multiple times, the PPPoE device on the client side will eventually get stuck on the above warning message. This patch will check the async mode first and continue to release device reference in async resumption, before it is dropped due to invalid state. v2: Do not assign address family from outer_mode in the transform if the state is invalid v3: Release device reference in the error path instead of jumping to resume Fixes: 4ce3dbe397d7b ("xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0)") Signed-off-by: Xiaodong Xu Reported-by: Bo Chen Tested-by: Bo Chen Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index fc0a9ce1be18..311597401b82 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -245,6 +245,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) else XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); + + if (encap_type == -1) + dev_put(skb->dev); goto drop; } From b4827e98dec326329015e19b767fa942354d32b6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 12 Nov 2019 17:04:54 -0800 Subject: [PATCH 1450/3715] Input: cyttsp4_core - fix use after free bug [ Upstream commit 79aae6acbef16f720a7949f8fc6ac69816c79d62 ] The device md->input is used after it is released. Setting the device data to NULL is unnecessary as the device is never used again. Instead, md->input should be assigned NULL to avoid accessing the freed memory accidently. Besides, checking md->si against NULL is superfluous as it points to a variable address, which cannot be NULL. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/1572936379-6423-1-git-send-email-bianpan2016@163.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/cyttsp4_core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index beaf61ce775b..a9af83de88bb 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -1972,11 +1972,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) /* get sysinfo */ md->si = &cd->sysinfo; - if (!md->si) { - dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n", - __func__, md->si); - goto error_get_sysinfo; - } rc = cyttsp4_setup_input_device(cd); if (rc) @@ -1986,8 +1981,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) error_init_input: input_free_device(md->input); -error_get_sysinfo: - input_set_drvdata(md->input, NULL); error_alloc_failed: dev_err(dev, "%s failed.\n", __func__); return rc; From f506ed55388432e56b5d9c12f0da3a02b149b53d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 1 Oct 2019 11:18:37 +0200 Subject: [PATCH 1451/3715] sched/core: Avoid spurious lock dependencies [ Upstream commit ff51ff84d82aea5a889b85f2b9fb3aa2b8691668 ] While seemingly harmless, __sched_fork() does hrtimer_init(), which, when DEBUG_OBJETS, can end up doing allocations. This then results in the following lock order: rq->lock zone->lock.rlock batched_entropy_u64.lock Which in turn causes deadlocks when we do wakeups while holding that batched_entropy lock -- as the random code does. Solve this by moving __sched_fork() out from under rq->lock. This is safe because nothing there relies on rq->lock, as also evident from the other __sched_fork() callsite. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Qian Cai Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: bigeasy@linutronix.de Cc: cl@linux.com Cc: keescook@chromium.org Cc: penberg@kernel.org Cc: rientjes@google.com Cc: thgarnie@google.com Cc: tytso@mit.edu Cc: will@kernel.org Fixes: b7d5dc21072c ("random: add a spinlock_t to struct batched_entropy") Link: https://lkml.kernel.org/r/20191001091837.GK4536@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bbf8b32fc69e..97a27726ea21 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5242,10 +5242,11 @@ void init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; + __sched_fork(0, idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); - __sched_fork(0, idle); idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; From 1da0c5d2dcd1309a2006b3662e0074cb3713374d Mon Sep 17 00:00:00 2001 From: paulhsia Date: Wed, 13 Nov 2019 01:17:14 +0800 Subject: [PATCH 1452/3715] ALSA: pcm: Fix stream lock usage in snd_pcm_period_elapsed() [ Upstream commit f5cdc9d4003a2f66ea57b3edd3e04acc2b1a4439 ] If the nullity check for `substream->runtime` is outside of the lock region, it is possible to have a null runtime in the critical section if snd_pcm_detach_substream is called right before the lock. Signed-off-by: paulhsia Link: https://lore.kernel.org/r/20191112171715.128727-2-paulhsia@chromium.org Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/pcm_lib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 729a85a6211d..80453266a2de 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1803,11 +1803,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime; unsigned long flags; - if (PCM_RUNTIME_CHECK(substream)) + if (snd_BUG_ON(!substream)) return; - runtime = substream->runtime; snd_pcm_stream_lock_irqsave(substream, flags); + if (PCM_RUNTIME_CHECK(substream)) + goto _unlock; + runtime = substream->runtime; + if (!snd_pcm_running(substream) || snd_pcm_update_hw_ptr0(substream, 1) < 0) goto _end; @@ -1818,6 +1821,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) #endif _end: kill_fasync(&runtime->fasync, SIGIO, POLL_IN); + _unlock: snd_pcm_stream_unlock_irqrestore(substream, flags); } EXPORT_SYMBOL(snd_pcm_period_elapsed); From c4c09c87464eaf59bdb648d074414deb09d84936 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 13 Nov 2019 14:38:47 +0800 Subject: [PATCH 1453/3715] rsxx: add missed destroy_workqueue calls in remove [ Upstream commit dcb77e4b274b8f13ac6482dfb09160cd2fae9a40 ] The driver misses calling destroy_workqueue in remove like what is done when probe fails. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/rsxx/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 34997df132e2..6beafaa335c7 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -1025,8 +1025,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) cancel_work_sync(&card->event_work); + destroy_workqueue(card->event_wq); rsxx_destroy_dev(card); rsxx_dma_destroy(card); + destroy_workqueue(card->creg_ctrl.creg_wq); spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); From faf35651c75253903de694128520e248a4bd486b Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Thu, 14 Nov 2019 23:43:24 +0800 Subject: [PATCH 1454/3715] net: ep93xx_eth: fix mismatch of request_mem_region in remove [ Upstream commit 3df70afe8d33f4977d0e0891bdcfb639320b5257 ] The driver calls release_resource in remove to match request_mem_region in probe, which is incorrect. Fix it by using the right one, release_mem_region. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index e2a702996db4..82bd918bf967 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -767,6 +767,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev) { struct net_device *dev; struct ep93xx_priv *ep; + struct resource *mem; dev = platform_get_drvdata(pdev); if (dev == NULL) @@ -782,8 +783,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev) iounmap(ep->base_addr); if (ep->res != NULL) { - release_resource(ep->res); - kfree(ep->res); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); } free_netdev(dev); From 4ad684ad733f1c8520cc3096a794407462cb1ade Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 8 Nov 2019 16:36:48 +0800 Subject: [PATCH 1455/3715] i2c: core: fix use after free in of_i2c_notify [ Upstream commit a4c2fec16f5e6a5fee4865e6e0e91e2bc2d10f37 ] We can't use "adap->dev" after it has been freed. Fixes: 5bf4fa7daea6 ("i2c: break out OF support into separate file") Signed-off-by: Wen Yang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/i2c-core-of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index 8d474bb1dc15..17d727e0b842 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -238,14 +238,14 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } client = of_i2c_register_device(adap, rd->dn); - put_device(&adap->dev); - if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", rd->dn); + put_device(&adap->dev); of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(client)); } + put_device(&adap->dev); break; case OF_RECONFIG_CHANGE_REMOVE: /* already depopulated? */ From 66ba64dd77c3768ea64aec759ab7de521e1515f3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Oct 2018 15:11:04 -0700 Subject: [PATCH 1456/3715] serial: core: Allow processing sysrq at port unlock time [ Upstream commit d6e1935819db0c91ce4a5af82466f3ab50d17346 ] Right now serial drivers process sysrq keys deep in their character receiving code. This means that they've already grabbed their port->lock spinlock. This can end up getting in the way if we've go to do serial stuff (especially kgdb) in response to the sysrq. Serial drivers have various hacks in them to handle this. Looking at '8250_port.c' you can see that the console_write() skips locking if we're in the sysrq handler. Looking at 'msm_serial.c' you can see that the port lock is dropped around uart_handle_sysrq_char(). It turns out that these hacks aren't exactly perfect. If you have lockdep turned on and use something like the 8250_port hack you'll get a splat that looks like: WARNING: possible circular locking dependency detected [...] is trying to acquire lock: ... (console_owner){-.-.}, at: console_unlock+0x2e0/0x5e4 but task is already holding lock: ... (&port_lock_key){-.-.}, at: serial8250_handle_irq+0x30/0xe4 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&port_lock_key){-.-.}: _raw_spin_lock_irqsave+0x58/0x70 serial8250_console_write+0xa8/0x250 univ8250_console_write+0x40/0x4c console_unlock+0x528/0x5e4 register_console+0x2c4/0x3b0 uart_add_one_port+0x350/0x478 serial8250_register_8250_port+0x350/0x3a8 dw8250_probe+0x67c/0x754 platform_drv_probe+0x58/0xa4 really_probe+0x150/0x294 driver_probe_device+0xac/0xe8 __driver_attach+0x98/0xd0 bus_for_each_dev+0x84/0xc8 driver_attach+0x2c/0x34 bus_add_driver+0xf0/0x1ec driver_register+0xb4/0x100 __platform_driver_register+0x60/0x6c dw8250_platform_driver_init+0x20/0x28 ... -> #0 (console_owner){-.-.}: lock_acquire+0x1e8/0x214 console_unlock+0x35c/0x5e4 vprintk_emit+0x230/0x274 vprintk_default+0x7c/0x84 vprintk_func+0x190/0x1bc printk+0x80/0xa0 __handle_sysrq+0x104/0x21c handle_sysrq+0x30/0x3c serial8250_read_char+0x15c/0x18c serial8250_rx_chars+0x34/0x74 serial8250_handle_irq+0x9c/0xe4 dw8250_handle_irq+0x98/0xcc serial8250_interrupt+0x50/0xe8 ... other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&port_lock_key); lock(console_owner); lock(&port_lock_key); lock(console_owner); *** DEADLOCK *** The hack used in 'msm_serial.c' doesn't cause the above splats but it seems a bit ugly to unlock / lock our spinlock deep in our irq handler. It seems like we could defer processing the sysrq until the end of the interrupt handler right after we've unlocked the port. With this scheme if a whole batch of sysrq characters comes in one irq then we won't handle them all, but that seems like it should be a fine compromise. Signed-off-by: Douglas Anderson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- include/linux/serial_core.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 868b60a79c0b..b2a7b7c15451 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -166,6 +166,7 @@ struct uart_port { struct console *cons; /* struct console, if any */ #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(SUPPORT_SYSRQ) unsigned long sysrq; /* sysrq timeout */ + unsigned int sysrq_ch; /* char for sysrq */ #endif /* flags must be updated while holding port mutex */ @@ -474,8 +475,42 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) } return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (port->sysrq) { + if (ch && time_before(jiffies, port->sysrq)) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + port->sysrq = 0; + } + return 0; +} +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else -#define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; }) +static inline int +uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} #endif /* From 968a8ada7e022f85eb51f47a8eb0949149d2d942 Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Fri, 9 Nov 2018 14:52:01 +0530 Subject: [PATCH 1457/3715] cxgb4vf: fix memleak in mac_hlist initialization [ Upstream commit 24357e06ba511ad874d664d39475dbb01c1ca450 ] mac_hlist was initialized during adapter_up, which will be called every time a vf device is first brought up, or every time when device is brought up again after bringing all devices down. This means our state of previous list is lost, causing a memleak if entries are present in the list. To fix that, move list init to the condition that performs initial one time adapter setup. Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 8996ebbd222e..26ba18ea08c6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -714,6 +714,10 @@ static int adapter_up(struct adapter *adapter) if (adapter->flags & USING_MSIX) name_msix_vecs(adapter); + + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adapter->mac_hlist); + adapter->flags |= FULL_INIT_DONE; } @@ -739,8 +743,6 @@ static int adapter_up(struct adapter *adapter) enable_rx(adapter); t4vf_sge_start(adapter); - /* Initialize hash mac addr list*/ - INIT_LIST_HEAD(&adapter->mac_hlist); return 0; } From 9ae9efa0ae08a9f66ba82605e436d8bf316c796e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 4 Jul 2018 23:12:33 +0200 Subject: [PATCH 1458/3715] iwlwifi: mvm: synchronize TID queue removal [ Upstream commit 06bc6f6ed4ae0246a5e52094d1be90906a1361c7 ] When we mark a TID as no longer having a queue, there's no guarantee the TX path isn't using this txq_id right now, having accessed it just before we reset the value. To fix this, add synchronize_net() when we change the TIDs from having a queue to not having one, so that we can then be sure that the TX path is no longer accessing that queue. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index d16e2ed4419f..0cfdbaa2af3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -436,6 +436,16 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) rcu_read_unlock(); + /* + * The TX path may have been using this TXQ_ID from the tid_data, + * so make sure it's no longer running so that we can safely reuse + * this TXQ later. We've set all the TIDs to IWL_MVM_INVALID_QUEUE + * above, but nothing guarantees we've stopped using them. Thus, + * without this, we could get to iwl_mvm_disable_txq() and remove + * the queue while still sending frames to it. + */ + synchronize_net(); + return disable_agg_tids; } From 1830e1a276bd6c5d4355293195037e4e814f9ce0 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Tue, 24 Jul 2018 21:57:50 +0300 Subject: [PATCH 1459/3715] iwlwifi: mvm: Send non offchannel traffic via AP sta [ Upstream commit dc1aca22f8f38b7e2ad7b118db87404d11e68771 ] TDLS discovery response frame is a unicast direct frame to the peer. Since we don't have a STA for this peer, this frame goes through iwl_tx_skb_non_sta(). As the result aux_sta and some completely arbitrary queue would be selected for this frame, resulting in a queue hang. Fix that by sending such frames through AP sta instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 77ed6ecf5ee5..b86c7a36d3f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -822,6 +822,21 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) sta = NULL; + /* If there is no sta, and it's not offchannel - send through AP */ + if (info->control.vif->type == NL80211_IFTYPE_STATION && + info->hw_queue != IWL_MVM_OFFCHANNEL_QUEUE && !sta) { + struct iwl_mvm_vif *mvmvif = + iwl_mvm_vif_from_mac80211(info->control.vif); + u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id); + + if (ap_sta_id < IWL_MVM_STATION_COUNT) { + /* mac80211 holds rcu read lock */ + sta = rcu_dereference(mvm->fw_id_to_mac_id[ap_sta_id]); + if (IS_ERR_OR_NULL(sta)) + goto drop; + } + } + if (sta) { if (iwl_mvm_defer_tx(mvm, sta, skb)) return; From e82dd54ec0a259c510528ad2ac90f133cd9b9c5d Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 9 Nov 2018 10:12:30 +0100 Subject: [PATCH 1460/3715] ARM: 8813/1: Make aligned 2-byte getuser()/putuser() atomic on ARMv6+ [ Upstream commit 344eb5539abf3e0b6ce22568c03e86450073e097 ] getuser() and putuser() (and there underscored variants) use two strb[t]/ldrb[t] instructions when they are asked to get/put 16-bits. This means that the read/write is not atomic even when performed to a 16-bit-aligned address. This leads to problems with vhost: vhost uses __getuser() to read the vring's 16-bit avail.index field, and if it happens to observe a partial update of the index, wrong descriptors will be used which will lead to a breakdown of the virtio communication. A similar problem exists for __putuser() which is used to write to the vring's used.index field. The reason these functions use strb[t]/ldrb[t] is because strht/ldrht instructions did not exist until ARMv6T2/ARMv7. So we should be easily able to fix this on ARMv7. Also, since all ARMv6 processors also don't actually use the unprivileged instructions anymore for uaccess (since CONFIG_CPU_USE_DOMAINS is not used) we can easily fix them too. Signed-off-by: Vincent Whitchurch Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/include/asm/uaccess.h | 18 ++++++++++++++++++ arch/arm/lib/getuser.S | 11 +++++++++++ arch/arm/lib/putuser.S | 20 ++++++++++---------- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a5807b67ca8a..fe47d24955ea 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -349,6 +349,13 @@ do { \ #define __get_user_asm_byte(x, addr, err) \ __get_user_asm(x, addr, err, ldrb) +#if __LINUX_ARM_ARCH__ >= 6 + +#define __get_user_asm_half(x, addr, err) \ + __get_user_asm(x, addr, err, ldrh) + +#else + #ifndef __ARMEB__ #define __get_user_asm_half(x, __gu_addr, err) \ ({ \ @@ -367,6 +374,8 @@ do { \ }) #endif +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #define __get_user_asm_word(x, addr, err) \ __get_user_asm(x, addr, err, ldr) #endif @@ -442,6 +451,13 @@ do { \ #define __put_user_asm_byte(x, __pu_addr, err) \ __put_user_asm(x, __pu_addr, err, strb) +#if __LINUX_ARM_ARCH__ >= 6 + +#define __put_user_asm_half(x, __pu_addr, err) \ + __put_user_asm(x, __pu_addr, err, strh) + +#else + #ifndef __ARMEB__ #define __put_user_asm_half(x, __pu_addr, err) \ ({ \ @@ -458,6 +474,8 @@ do { \ }) #endif +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #define __put_user_asm_word(x, __pu_addr, err) \ __put_user_asm(x, __pu_addr, err, str) diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 746e7801dcdf..b2e4bc3a635e 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -42,6 +42,12 @@ _ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(ldrh) r2, [r0] + +#else + #ifdef CONFIG_CPU_USE_DOMAINS rb .req ip 2: ldrbt r2, [r0], #1 @@ -56,6 +62,9 @@ rb .req r0 #else orr r2, rb, r2, lsl #8 #endif + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + mov r0, #0 ret lr ENDPROC(__get_user_2) @@ -145,7 +154,9 @@ _ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad +#if __LINUX_ARM_ARCH__ < 6 .long 3b, __get_user_bad +#endif .long 4b, __get_user_bad .long 5b, __get_user_bad8 .long 6b, __get_user_bad8 diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 38d660d3705f..515eeaa9975c 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -41,16 +41,13 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) check_uaccess r0, 2, r1, ip, __put_user_bad - mov ip, r2, lsr #8 -#ifdef CONFIG_THUMB2_KERNEL -#ifndef __ARMEB__ -2: TUSER(strb) r2, [r0] -3: TUSER(strb) ip, [r0, #1] +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(strh) r2, [r0] + #else -2: TUSER(strb) ip, [r0] -3: TUSER(strb) r2, [r0, #1] -#endif -#else /* !CONFIG_THUMB2_KERNEL */ + + mov ip, r2, lsr #8 #ifndef __ARMEB__ 2: TUSER(strb) r2, [r0], #1 3: TUSER(strb) ip, [r0] @@ -58,7 +55,8 @@ ENTRY(__put_user_2) 2: TUSER(strb) ip, [r0], #1 3: TUSER(strb) r2, [r0] #endif -#endif /* CONFIG_THUMB2_KERNEL */ + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ mov r0, #0 ret lr ENDPROC(__put_user_2) @@ -91,7 +89,9 @@ ENDPROC(__put_user_bad) .pushsection __ex_table, "a" .long 1b, __put_user_bad .long 2b, __put_user_bad +#if __LINUX_ARM_ARCH__ < 6 .long 3b, __put_user_bad +#endif .long 4b, __put_user_bad .long 5b, __put_user_bad .long 6b, __put_user_bad From 202d886f7eb77192d337f5ff982de69845edc424 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:08 +0200 Subject: [PATCH 1461/3715] net/mlx5: Release resource on error flow [ Upstream commit 698114968a22f6c0c9f42e983ba033cc36bb7217 ] Fix reference counting leakage when the event handler aborts due to an unsupported event for the resource type. Fixes: a14c2d4beee5 ("net/mlx5_core: Warn on unsupported events of QP/RQ/SQ") Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 889130edb715..5f091c6ea049 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -124,7 +124,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -138,7 +138,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); } From 5a6e4cbc18bb7ee5cff1aafd38965444ff84157a Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Tue, 13 Nov 2018 16:46:08 +0530 Subject: [PATCH 1462/3715] clk: sunxi-ng: a64: Fix gate bit of DSI DPHY [ Upstream commit ee678706e46d0d185c27cc214ad97828e0643159 ] DSI DPHY gate bit on MIPI DSI clock register is bit 15 not bit 30. Signed-off-by: Jagan Teki Acked-by: Stephen Boyd Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 36a30a3cfad7..eaafc038368f 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -565,7 +565,7 @@ static const char * const dsi_dphy_parents[] = { "pll-video0", "pll-periph0" }; static const u8 dsi_dphy_table[] = { 0, 2, }; static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_dphy_clk, "dsi-dphy", dsi_dphy_parents, dsi_dphy_table, - 0x168, 0, 4, 8, 2, BIT(31), CLK_SET_RATE_PARENT); + 0x168, 0, 4, 8, 2, BIT(15), CLK_SET_RATE_PARENT); static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu", 0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT); From c466461b1d4070dd60e235a22fb014ec46946676 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 13 Nov 2018 20:39:50 +0300 Subject: [PATCH 1463/3715] dlm: fix possible call to kfree() for non-initialized pointer [ Upstream commit 58a923adf4d9aca8bf7205985c9c8fc531c65d72 ] Technically dlm_config_nodes() could return error and keep nodes uninitialized. After that on the fail path of we'll call kfree() for that uninitialized value. The patch is simple - we should just initialize nodes with NULL. Signed-off-by: Denis V. Lunev Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/member.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/member.c b/fs/dlm/member.c index cad6d85911a8..0bc43b35d2c5 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -671,7 +671,7 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv, *rv_old; - struct dlm_config_node *nodes; + struct dlm_config_node *nodes = NULL; int error, count; rv = kzalloc(sizeof(*rv), GFP_NOFS); From 8437a3769abc01ff4d03be57983482fe94de96fd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 13 Nov 2018 16:38:47 +0100 Subject: [PATCH 1464/3715] extcon: max8997: Fix lack of path setting in USB device mode [ Upstream commit a2dc50914744eea9f83a70a5db0486be625e5dc0 ] MAX8997 driver disables automatic path selection from MicroUSB connector and manually sets path to either UART or USB lines. However the code for setting USB path worked only for USB host mode (when ID pin is set to ground). When standard USB cable (USB device mode) is connected, path registers are not touched. This means that once the non-USB accessory is connected to MAX8997-operated micro USB port, the path is no longer set to USB and USB device mode doesn't work. This patch fixes it by setting USB path both for USB and USB host modes. Signed-off-by: Marek Szyprowski Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/extcon/extcon-max8997.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c index 4a0612fb9c07..b9b48d45a6dc 100644 --- a/drivers/extcon/extcon-max8997.c +++ b/drivers/extcon/extcon-max8997.c @@ -321,12 +321,10 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info, { int ret = 0; - if (usb_type == MAX8997_USB_HOST) { - ret = max8997_muic_set_path(info, info->path_usb, attached); - if (ret < 0) { - dev_err(info->dev, "failed to update muic register\n"); - return ret; - } + ret = max8997_muic_set_path(info, info->path_usb, attached); + if (ret < 0) { + dev_err(info->dev, "failed to update muic register\n"); + return ret; } switch (usb_type) { From 3be734fe0ed7f1ab7d8c43ae9afed733a91c4200 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 12 Nov 2018 16:00:20 +0200 Subject: [PATCH 1465/3715] net: ethernet: ti: cpts: correct debug for expired txq skb [ Upstream commit d0e14c4d9bcef0d4aa1057d2959adaa6f18d4a17 ] The msgtype and seqid that is smth that belongs to event for comparison but not for staled txq skb. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpts.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 7d1281d81248..23c953496a0d 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -116,9 +116,7 @@ static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event) mtype, seqid); } else if (time_after(jiffies, skb_cb->tmo)) { /* timeout any expired skbs over 1s */ - dev_dbg(cpts->dev, - "expiring tx timestamp mtype %u seqid %04x\n", - mtype, seqid); + dev_dbg(cpts->dev, "expiring tx timestamp from txq\n"); __skb_unlink(skb, &cpts->txq); dev_consume_skb_any(skb); } From d4e3d38b904175860da5937e6afc541084063bd9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 13 Nov 2018 12:32:50 +0100 Subject: [PATCH 1466/3715] rtc: s3c-rtc: Avoid using broken ALMYEAR register [ Upstream commit 50c8aec4212a966817e868056efc9bfbb73337c0 ] (RTC,ALM)YEAR registers of Exynos built-in RTC device contains 3 BCD characters. s3c-rtc driver uses only 2 lower of them and supports years from 2000..2099 range. The third BCD value is typically set to 0, but it looks that handling of it is broken in the hardware. It sometimes defaults to a random (even non-BCD) value. This is not an issue for handling RTCYEAR register, because bcd2bin() properly handles only 8bit values (2 BCD characters, the third one is skipped). The problem is however with ALMYEAR register and proper RTC alarm operation. When YEAREN bit is set for the configured alarm, RTC hardware triggers alarm only when ALMYEAR and RTCYEAR matches. This usually doesn't happen because of the random noise on the third BCD character. Fix this by simply skipping setting ALMYEAR register in alarm configuration. This workaround fixes broken alarm operation on Exynos built-in rtc device. My tests revealed that the issue happens on the following Exynos series: 3250, 4210, 4412, 5250 and 5410. Signed-off-by: Marek Szyprowski Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-s3c.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index a8992c227f61..4120a305954a 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -327,7 +327,6 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) struct rtc_time *tm = &alrm->time; unsigned int alrm_en; int ret; - int year = tm->tm_year - 100; dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", alrm->enabled, @@ -356,11 +355,6 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) writeb(bin2bcd(tm->tm_hour), info->base + S3C2410_ALMHOUR); } - if (year < 100 && year >= 0) { - alrm_en |= S3C2410_RTCALM_YEAREN; - writeb(bin2bcd(year), info->base + S3C2410_ALMYEAR); - } - if (tm->tm_mon < 12 && tm->tm_mon >= 0) { alrm_en |= S3C2410_RTCALM_MONEN; writeb(bin2bcd(tm->tm_mon + 1), info->base + S3C2410_ALMMON); From 095d4dbe73367911351406fac242dc016a7c5c41 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 26 Oct 2018 14:33:28 -0700 Subject: [PATCH 1467/3715] i40e: don't restart nway if autoneg not supported [ Upstream commit 7c3758f7839377ab67529cc50264a640636c47af ] On link types that do not support autoneg, we cannot attempt to restart nway negotiation. This results in a dead link that requires a power cycle to remedy. Fix this by saving off the autoneg state and checking this value before we try to restart nway. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index ef22793d6a03..751ac5616884 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -969,6 +969,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, i40e_status status; u8 aq_failures; int err = 0; + u32 is_an; /* Changing the port's flow control is not supported if this isn't the * port's controlling PF @@ -981,15 +982,14 @@ static int i40e_set_pauseparam(struct net_device *netdev, if (vsi != pf->vsi[pf->lan_vsi]) return -EOPNOTSUPP; - if (pause->autoneg != ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? - AUTONEG_ENABLE : AUTONEG_DISABLE)) { + is_an = hw_link_info->an_info & I40E_AQ_AN_COMPLETED; + if (pause->autoneg != is_an) { netdev_info(netdev, "To change autoneg please use: ethtool -s autoneg \n"); return -EOPNOTSUPP; } /* If we have link and don't have autoneg */ - if (!test_bit(__I40E_DOWN, pf->state) && - !(hw_link_info->an_info & I40E_AQ_AN_COMPLETED)) { + if (!test_bit(__I40E_DOWN, pf->state) && !is_an) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); } @@ -1040,7 +1040,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, err = -EAGAIN; } - if (!test_bit(__I40E_DOWN, pf->state)) { + if (!test_bit(__I40E_DOWN, pf->state) && is_an) { /* Give it a little more time to try to come back */ msleep(75); if (!test_bit(__I40E_DOWN, pf->state)) From 0759b23e9ed6fab0b5dc77debe755661aef65cdc Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Wed, 14 Nov 2018 15:45:49 +0000 Subject: [PATCH 1468/3715] clk: rockchip: fix rk3188 sclk_smc gate data [ Upstream commit a9f0c0e563717b9f63b3bb1c4a7c2df436a206d9 ] Fix sclk_smc gate data. Change variable order, flags come before the register address. Signed-off-by: Finley Xiao Signed-off-by: Johan Jonker Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- drivers/clk/rockchip/clk-rk3188.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 2b0d772b4f43..022fecc9bcdf 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -391,8 +391,8 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { * Clock-Architecture Diagram 4 */ - GATE(SCLK_SMC, "sclk_smc", "hclk_peri", - RK2928_CLKGATE_CON(2), 4, 0, GFLAGS), + GATE(SCLK_SMC, "sclk_smc", "hclk_peri", 0, + RK2928_CLKGATE_CON(2), 4, GFLAGS), COMPOSITE_NOMUX(SCLK_SPI0, "sclk_spi0", "pclk_peri", 0, RK2928_CLKSEL_CON(25), 0, 7, DFLAGS, From 20e935cd77190802d4dbf2f57754884199023222 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 15 Nov 2018 12:17:30 +0100 Subject: [PATCH 1469/3715] clk: rockchip: fix rk3188 sclk_mac_lbtest parameter ordering [ Upstream commit ac8cb53829a6ba119082e067f5bc8fab3611ce6a ] Similar to commit a9f0c0e56371 ("clk: rockchip: fix rk3188 sclk_smc gate data") there is one other gate clock in the rk3188 clock driver with a similar wrong ordering, the sclk_mac_lbtest. So fix it as well. Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- drivers/clk/rockchip/clk-rk3188.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 022fecc9bcdf..8cdfcd77e3ad 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -362,8 +362,8 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(2), 5, GFLAGS), MUX(SCLK_MAC, "sclk_macref", mux_sclk_macref_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(21), 4, 1, MFLAGS), - GATE(0, "sclk_mac_lbtest", "sclk_macref", - RK2928_CLKGATE_CON(2), 12, 0, GFLAGS), + GATE(0, "sclk_mac_lbtest", "sclk_macref", 0, + RK2928_CLKGATE_CON(2), 12, GFLAGS), COMPOSITE(0, "hsadc_src", mux_pll_src_gpll_cpll_p, 0, RK2928_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, From 73623e6344356ea492215a5c05324595a8d11d9d Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 13 Nov 2018 15:24:13 +0000 Subject: [PATCH 1470/3715] ARM: dts: rockchip: Fix rk3288-rock2 vcc_flash name [ Upstream commit 03d9f8fa2bfdc791865624d3adc29070cf67814e ] There is no functional change from this, but it is confusing to find two copies of vcc_sys and no vcc_flash when looking in /sys/class/regulator/*/name. Signed-off-by: John Keeping Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3288-rock2-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288-rock2-som.dtsi b/arch/arm/boot/dts/rk3288-rock2-som.dtsi index b9c471fcbd42..862c2248fcb6 100644 --- a/arch/arm/boot/dts/rk3288-rock2-som.dtsi +++ b/arch/arm/boot/dts/rk3288-rock2-som.dtsi @@ -63,7 +63,7 @@ vcc_flash: flash-regulator { compatible = "regulator-fixed"; - regulator-name = "vcc_sys"; + regulator-name = "vcc_flash"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; startup-delay-us = <150>; From 9ee10af9e9a53b27a9720f0cf5de7d94c3e92f93 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Nov 2018 11:17:40 -0600 Subject: [PATCH 1471/3715] dlm: fix missing idr_destroy for recover_idr [ Upstream commit 8fc6ed9a3508a0435b9270c313600799d210d319 ] Which would leak memory for the idr internals. Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/lockspace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 610f72ae7ad6..9c8c9a09b4a6 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -807,6 +807,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_delete_debug_file(ls); + idr_destroy(&ls->ls_recover_idr); kfree(ls->ls_recover_buf); /* From e12048b273d12f46cf83b98e81b7190bd96b1dc1 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 13 Nov 2018 22:42:37 +0000 Subject: [PATCH 1472/3715] MIPS: SiByte: Enable ZONE_DMA32 for LittleSur [ Upstream commit 756d6d836dbfb04a5a486bc2ec89397aa4533737 ] The LittleSur board is marked for high memory support and therefore clearly must provide a way to have enough memory installed for some to be present outside the low 4GiB physical address range. With the memory map of the BCM1250 SOC it has been built around it means over 1GiB of actual DRAM, as only the first 1GiB is mapped in the low 4GiB physical address range[1]. Complement commit cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32.") then and also enable ZONE_DMA32 for LittleSur. References: [1] "BCM1250/BCM1125/BCM1125H User Manual", Revision 1250_1125-UM100-R, Broadcom Corporation, 21 Oct 2002, Section 3: "System Overview", "Memory Map", pp. 34-38 Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Reviewed-by: Christoph Hellwig Patchwork: https://patchwork.linux-mips.org/patch/21107/ Fixes: cce335ae47e2 ("[MIPS] 64-bit Sibyte kernels need DMA32.") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ae4450e891ab..7e267d657c56 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -812,6 +812,7 @@ config SIBYTE_LITTLESUR select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN + select ZONE_DMA32 if 64BIT config SIBYTE_SENTOSA bool "Sibyte BCM91250E-Sentosa" From 08d28c1840082f88efe7fbe5ace5d5ae4f8a3305 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 12 Nov 2018 18:51:01 +0100 Subject: [PATCH 1473/3715] net: dsa: mv88e6xxx: Work around mv886e6161 SERDES missing MII_PHYSID2 [ Upstream commit ddc49acb659a2d8bfc5fdb0de0ef197712c11d75 ] We already have a workaround for a couple of switches whose internal PHYs only have the Marvel OUI, but no model number. We detect such PHYs and give them the 6390 ID as the model number. However the mv88e6161 has two SERDES interfaces in the same address range as its internal PHYs. These suffer from the same problem, the Marvell OUI, but no model number. As a result, these SERDES interfaces were getting the same PHY ID as the mv88e6390, even though they are not PHYs, and the Marvell PHY driver was trying to drive them. Add a special case to stop this from happen. Reported-by: Chris Healy Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index be17194487c6..10ea01459a36 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2196,11 +2196,22 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) mutex_unlock(&chip->reg_lock); if (reg == MII_PHYSID2) { - /* Some internal PHYS don't have a model number. Use - * the mv88e6390 family model number instead. - */ - if (!(val & 0x3f0)) - val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; + /* Some internal PHYs don't have a model number. */ + if (chip->info->family != MV88E6XXX_FAMILY_6165) + /* Then there is the 6165 family. It gets is + * PHYs correct. But it can also have two + * SERDES interfaces in the PHY address + * space. And these don't have a model + * number. But they are not PHYs, so we don't + * want to give them something a PHY driver + * will recognise. + * + * Use the mv88e6390 family model number + * instead, for anything which really could be + * a PHY, + */ + if (!(val & 0x3f0)) + val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; } return err ? err : val; From e3c914631c0e2130b8a84f326be2a4eda0ba834c Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 8 Nov 2018 15:44:57 +0100 Subject: [PATCH 1474/3715] scsi: zfcp: drop default switch case which might paper over missing case [ Upstream commit 0c902936e55cff9335b27ed632fc45e7115ced75 ] This was introduced with v4.18 commit 8c3d20aada70 ("scsi: zfcp: fix missing REC trigger trace for all objects in ERP_FAILED") but would now suppress helpful -Wswitch compiler warnings when building with W=1 such as the following forced example: drivers/s390/scsi/zfcp_erp.c: In function 'zfcp_erp_handle_failed': drivers/s390/scsi/zfcp_erp.c:126:2: warning: enumeration value 'ZFCP_ERP_ACTION_REOPEN_PORT_FORCED' not handled in switch [-Wswitch] switch (want) { ^~~~~~ But then again, only with W=1 we would notice unhandled enum cases. Without the default cases and a missed unhandled enum case, the code might perform unforeseen things we might not want... As of today, we never run through the removed default case, so removing it is no functional change. In the future, we never should run through a default case but introduce the necessary specific case(s) to handle new functionality. Signed-off-by: Steffen Maier Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/s390/scsi/zfcp_erp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 64d70de98cdb..8f90e4cea254 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -179,9 +179,6 @@ static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter, adapter, ZFCP_STATUS_COMMON_ERP_FAILED); } break; - default: - need = 0; - break; } return need; From 7291f13968761680615ab613f0832433b1123510 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Mon, 5 Nov 2018 11:36:18 +0300 Subject: [PATCH 1475/3715] crypto: ecc - check for invalid values in the key verification test [ Upstream commit 2eb4942b6609d35a4e835644a33203b0aef7443d ] Currently used scalar multiplication algorithm (Matthieu Rivain, 2011) have invalid values for scalar == 1, n-1, and for regularized version n-2, which was previously not checked. Verify that they are not used as private keys. Signed-off-by: Vitaly Chikunov Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/ecc.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 18f32f2a5e1c..3b422e24e647 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -904,28 +904,41 @@ static inline void ecc_swap_digits(const u64 *in, u64 *out, out[i] = __swab64(in[ndigits - 1 - i]); } +static int __ecc_is_key_valid(const struct ecc_curve *curve, + const u64 *private_key, unsigned int ndigits) +{ + u64 one[ECC_MAX_DIGITS] = { 1, }; + u64 res[ECC_MAX_DIGITS]; + + if (!private_key) + return -EINVAL; + + if (curve->g.ndigits != ndigits) + return -EINVAL; + + /* Make sure the private key is in the range [2, n-3]. */ + if (vli_cmp(one, private_key, ndigits) != -1) + return -EINVAL; + vli_sub(res, curve->n, one, ndigits); + vli_sub(res, res, one, ndigits); + if (vli_cmp(res, private_key, ndigits) != 1) + return -EINVAL; + + return 0; +} + int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, const u64 *private_key, unsigned int private_key_len) { int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key) - return -EINVAL; - nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; if (private_key_len != nbytes) return -EINVAL; - if (vli_is_zero(private_key, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, private_key, ndigits) != 1) - return -EINVAL; - - return 0; + return __ecc_is_key_valid(curve, private_key, ndigits); } /* @@ -971,11 +984,8 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) if (err) return err; - if (vli_is_zero(priv, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, priv, ndigits) != 1) + /* Make sure the private key is in the valid range. */ + if (__ecc_is_key_valid(curve, priv, ndigits)) return -EINVAL; ecc_swap_digits(priv, privkey, ndigits); From bb7c0f87426b3686ef1fe306b08fb7408ec2cd50 Mon Sep 17 00:00:00 2001 From: Raveendra Padasalagi Date: Tue, 6 Nov 2018 13:58:58 +0530 Subject: [PATCH 1476/3715] crypto: bcm - fix normal/non key hash algorithm failure [ Upstream commit 4f0129d13e69bad0363fd75553fb22897b32c379 ] Remove setkey() callback handler for normal/non key hash algorithms and keep it for AES-CBC/CMAC which needs key. Fixes: 9d12ba86f818 ("crypto: brcm - Add Broadcom SPU driver") Signed-off-by: Raveendra Padasalagi Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/bcm/cipher.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index b6be383a51a6..84422435f39b 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -4637,12 +4637,16 @@ static int spu_register_ahash(struct iproc_alg_s *driver_alg) hash->halg.statesize = sizeof(struct spu_hash_export_s); if (driver_alg->auth_info.mode != HASH_MODE_HMAC) { - hash->setkey = ahash_setkey; hash->init = ahash_init; hash->update = ahash_update; hash->final = ahash_final; hash->finup = ahash_finup; hash->digest = ahash_digest; + if ((driver_alg->auth_info.alg == HASH_ALG_AES) && + ((driver_alg->auth_info.mode == HASH_MODE_XCBC) || + (driver_alg->auth_info.mode == HASH_MODE_CMAC))) { + hash->setkey = ahash_setkey; + } } else { hash->setkey = ahash_hmac_setkey; hash->init = ahash_hmac_init; From 6aee64a401e4e5a1ca4351e5aa3336a77a3dab03 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 10 Nov 2018 20:34:11 -0500 Subject: [PATCH 1477/3715] pinctrl: qcom: ssbi-gpio: fix gpio-hog related boot issues [ Upstream commit 7ed07855773814337b9814f1c3e866df52ebce68 ] When attempting to setup up a gpio hog, device probing will repeatedly fail with -EPROBE_DEFERED errors. It is caused by a circular dependency between the gpio and pinctrl frameworks. If the gpio-ranges property is present in device tree, then the gpio framework will handle the gpio pin registration and eliminate the circular dependency. See Christian Lamparter's commit a86caa9ba5d7 ("pinctrl: msm: fix gpio-hog related boot issues") for a detailed commit message that explains the issue in much more detail. The code comment in this commit came from Christian's commit. I did not test this change against any hardware supported by this particular driver, however I was able to validate this same fix works for pinctrl-spmi-gpio.c using a LG Nexus 5 (hammerhead) phone. Signed-off-by: Brian Masney Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index 0e153bae322e..6bed433e5420 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -762,12 +762,23 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) return ret; } - ret = gpiochip_add_pin_range(&pctrl->chip, - dev_name(pctrl->dev), - 0, 0, pctrl->chip.ngpio); - if (ret) { - dev_err(pctrl->dev, "failed to add pin range\n"); - goto unregister_gpiochip; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pctrl->dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&pctrl->chip, dev_name(pctrl->dev), + 0, 0, pctrl->chip.ngpio); + if (ret) { + dev_err(pctrl->dev, "failed to add pin range\n"); + goto unregister_gpiochip; + } } platform_set_drvdata(pdev, pctrl); From 1473301fc47c1975a5c31795c5d5d735c96862f2 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Sat, 17 Nov 2018 04:19:07 +0530 Subject: [PATCH 1478/3715] Staging: iio: adt7316: Fix i2c data reading, set the data field [ Upstream commit 688cd642ba0c393344c802647848da5f0d925d0e ] adt7316_i2c_read function nowhere sets the data field. It is necessary to have an appropriate value for it. Hence, assign the value stored in 'ret' variable to data field. This is an ancient bug, and as no one seems to have noticed, probably no sense in applying it to stable. Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/staging/iio/addac/adt7316-i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/iio/addac/adt7316-i2c.c b/drivers/staging/iio/addac/adt7316-i2c.c index f66dd3ebbab1..856bcfa60c6c 100644 --- a/drivers/staging/iio/addac/adt7316-i2c.c +++ b/drivers/staging/iio/addac/adt7316-i2c.c @@ -35,6 +35,8 @@ static int adt7316_i2c_read(void *client, u8 reg, u8 *data) return ret; } + *data = ret; + return 0; } From 2affb3126446e642d5f5ea80aff266844c925b8c Mon Sep 17 00:00:00 2001 From: Janne Huttunen Date: Fri, 16 Nov 2018 15:08:32 -0800 Subject: [PATCH 1479/3715] mm/vmstat.c: fix NUMA statistics updates [ Upstream commit 13c9aaf7fa01cc7600c61981609feadeef3354ec ] Scan through the whole array to see if an update is needed. While we're at it, use sizeof() to be safe against any possible type changes in the future. The bug here is that we wouldn't sync per-cpu counters into global ones if there was an update of numa_stats for higher cpus. Highly theoretical one though because it is much more probable that zone_stats are updated so we would refresh anyway. So I wouldn't bother to mark this for stable, yet something nice to fix. [mhocko@suse.com: changelog enhancement] Link: http://lkml.kernel.org/r/1541601517-17282-1-git-send-email-janne.huttunen@nokia.com Fixes: 1d90ca897cb0 ("mm: update NUMA counter threshold size") Signed-off-by: Janne Huttunen Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/vmstat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index ba9168326413..e2197b03da57 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1805,12 +1805,13 @@ static bool need_update(int cpu) /* * The fast way of checking if there are any vmstat diffs. - * This works because the diffs are byte sized items. */ - if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) + if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * + sizeof(p->vm_stat_diff[0]))) return true; #ifdef CONFIG_NUMA - if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * + sizeof(p->vm_numa_stat_diff[0]))) return true; #endif } From 0209e997a2ed2ec2b382b2f8a4dd35125c249eb1 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Sun, 18 Nov 2018 13:16:12 +0900 Subject: [PATCH 1480/3715] clk: rockchip: fix I2S1 clock gate register for rk3328 [ Upstream commit 5c73ac2f8b70834a603eb2d92eb0bb464634420b ] This patch fixes definition of I2S1 clock gate register for rk3328. Current setting is not related I2S clocks. - bit6 of CRU_CLKGATE_CON0 means clk_ddrmon_en - bit6 of CRU_CLKGATE_CON1 means clk_i2s1_en Signed-off-by: Katsuhiro Suzuki Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- drivers/clk/rockchip/clk-rk3328.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c index 33d1cf4e6d80..0e5222d1944b 100644 --- a/drivers/clk/rockchip/clk-rk3328.c +++ b/drivers/clk/rockchip/clk-rk3328.c @@ -392,7 +392,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { RK3328_CLKGATE_CON(1), 5, GFLAGS, &rk3328_i2s1_fracmux), GATE(SCLK_I2S1, "clk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, - RK3328_CLKGATE_CON(0), 6, GFLAGS), + RK3328_CLKGATE_CON(1), 6, GFLAGS), COMPOSITE_NODIV(SCLK_I2S1_OUT, "i2s1_out", mux_i2s1out_p, 0, RK3328_CLKSEL_CON(8), 12, 1, MFLAGS, RK3328_CLKGATE_CON(1), 7, GFLAGS), From d8d7d24a87f3b37661027d2539e1582975261d01 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Sun, 18 Nov 2018 13:18:02 +0900 Subject: [PATCH 1481/3715] clk: rockchip: fix ID of 8ch clock of I2S1 for rk3328 [ Upstream commit df7b1f2e0a4ae0fceff261e29cde63dafcf2360f ] This patch fixes mistakes in HCLK_I2S1_8CH for running I2S1 successfully. Signed-off-by: Katsuhiro Suzuki Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- include/dt-bindings/clock/rk3328-cru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h index d2b26a4b43eb..4a9db1b2669b 100644 --- a/include/dt-bindings/clock/rk3328-cru.h +++ b/include/dt-bindings/clock/rk3328-cru.h @@ -178,7 +178,7 @@ #define HCLK_TSP 309 #define HCLK_GMAC 310 #define HCLK_I2S0_8CH 311 -#define HCLK_I2S1_8CH 313 +#define HCLK_I2S1_8CH 312 #define HCLK_I2S2_2CH 313 #define HCLK_SPDIF_8CH 314 #define HCLK_VOP 315 From 62a176cb87456f1800ddf3bac406c8f9bba2c698 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 16 Nov 2018 19:19:30 -0800 Subject: [PATCH 1482/3715] regulator: Fix return value of _set_load() stub [ Upstream commit f1abf67217de91f5cd3c757ae857632ca565099a ] The stub implementation of _set_load() returns a mode value which is within the bounds of valid return codes for success (the documentation just says that failures are negative error codes) but not sensible or what the actual implementation does. Fix it to just return 0. Reported-by: Cheng-Yi Chiang Signed-off-by: Mark Brown Reviewed-by: Douglas Anderson Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 25602afd4844..f3f76051e8b0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -508,7 +508,7 @@ static inline int regulator_get_error_flags(struct regulator *regulator, static inline int regulator_set_load(struct regulator *regulator, int load_uA) { - return REGULATOR_MODE_NORMAL; + return 0; } static inline int regulator_allow_bypass(struct regulator *regulator, From f306329dcaf67ff3271488373b7aab2f52c39151 Mon Sep 17 00:00:00 2001 From: Xue Chaojing Date: Tue, 20 Nov 2018 05:47:33 +0000 Subject: [PATCH 1483/3715] net-next/hinic:fix a bug in set mac address [ Upstream commit 9ea72dc9430306b77c73a8a21beb51437cde1d6d ] In add_mac_addr(), if the MAC address is a muliticast address, it will not be set, which causes the network card fail to receive the multicast packet. This patch fixes this bug. Signed-off-by: Xue Chaojing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index a696b5b2d40e..44c73215d026 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -598,9 +598,6 @@ static int add_mac_addr(struct net_device *netdev, const u8 *addr) u16 vid = 0; int err; - if (!is_valid_ether_addr(addr)) - return -EADDRNOTAVAIL; - netif_info(nic_dev, drv, netdev, "set mac addr = %02x %02x %02x %02x %02x %02x\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); @@ -724,6 +721,7 @@ static void set_rx_mode(struct work_struct *work) { struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work); struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work); + struct netdev_hw_addr *ha; netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n"); @@ -731,6 +729,9 @@ static void set_rx_mode(struct work_struct *work) __dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); __dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); + + netdev_for_each_mc_addr(ha, nic_dev->netdev) + add_mac_addr(nic_dev->netdev, ha->addr); } static void hinic_set_rx_mode(struct net_device *netdev) From 427001403f3b12452553e727f1edb094f8b94b5c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Nov 2018 13:31:10 -0800 Subject: [PATCH 1484/3715] iomap: sub-block dio needs to zeroout beyond EOF [ Upstream commit b450672fb66b4a991a5b55ee24209ac7ae7690ce ] If we are doing sub-block dio that extends EOF, we need to zero the unused tail of the block to initialise the data in it it. If we do not zero the tail of the block, then an immediate mmap read of the EOF block will expose stale data beyond EOF to userspace. Found with fsx running sub-block DIO sizes vs MAPREAD/MAPWRITE operations. Fix this by detecting if the end of the DIO write is beyond EOF and zeroing the tail if necessary. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/iomap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/iomap.c b/fs/iomap.c index 467d98bf7054..1cf160ced0d4 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -941,7 +941,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, dio->submit.cookie = submit_bio(bio); } while (nr_pages); - if (need_zeroout) { + /* + * We need to zeroout the tail of a sub-block write if the extent type + * requires zeroing or the write extends beyond EOF. If we don't zero + * the block tail in the latter case, we can expose stale data via mmap + * reads of the EOF block. + */ + if (need_zeroout || + ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) { /* zero out from the end of the write to the end of the block */ pad = pos & (fs_block_size - 1); if (pad) From 861e54e55ede06fc6f094baed3b3ab3fdeff64dd Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 22 Nov 2018 00:37:29 +0200 Subject: [PATCH 1485/3715] MIPS: OCTEON: octeon-platform: fix typing [ Upstream commit 2cf1c8933dd93088cfb5f8f58b3bb9bbdf1781b9 ] Use correct type for fdt_property nameoff field. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21204/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/cavium-octeon/octeon-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 1d92efb82c37..e1e24118c169 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -501,7 +501,7 @@ static void __init octeon_fdt_set_phy(int eth, int phy_addr) if (phy_addr >= 256 && alt_phy > 0) { const struct fdt_property *phy_prop; struct fdt_property *alt_prop; - u32 phy_handle_name; + fdt32_t phy_handle_name; /* Use the alt phy node instead.*/ phy_prop = fdt_get_property(initial_boot_params, eth, "phy-handle", NULL); From 7d04bd4fe981cabd19eab3040a9facd4c74a8535 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 20 Nov 2018 16:46:43 +0100 Subject: [PATCH 1486/3715] net/smc: use after free fix in smc_wr_tx_put_slot() [ Upstream commit e438bae43c1e08e688c09c410407b59fc1c173b4 ] In smc_wr_tx_put_slot() field pend->idx is used after being cleared. That means always idx 0 is cleared in the wr_tx_mask. This results in a broken administration of available WR send payload buffers. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/smc_wr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..ed6736a1a112 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -223,12 +223,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv); if (pend->idx < link->wr_tx_cnt) { + u32 idx = pend->idx; + /* clear the full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[pend->idx], 0, sizeof(link->wr_tx_pends[pend->idx])); memset(&link->wr_tx_bufs[pend->idx], 0, sizeof(link->wr_tx_bufs[pend->idx])); - test_and_clear_bit(pend->idx, link->wr_tx_mask); + test_and_clear_bit(idx, link->wr_tx_mask); return 1; } From 4a0b4f32bec9bf72b4914d70088f426a1cf123d5 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Thu, 22 Nov 2018 11:14:38 +0800 Subject: [PATCH 1487/3715] math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning [ Upstream commit 83312f1b7ae205dca647bf52bbe2d51303cdedfb ] _FP_ROUND_ZERO is defined as 0 and used as a statemente in macro _FP_ROUND. This generates "error: statement with no effect [-Werror=unused-value]" from gcc. Defining _FP_ROUND_ZERO as (void)0 to fix it. This modification is quoted from glibc 'commit (8ed1e7d5894000c155acbd06f)' Signed-off-by: Vincent Chen Acked-by: Greentime Hu Signed-off-by: Greentime Hu Signed-off-by: Sasha Levin --- include/math-emu/soft-fp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index 3f284bc03180..5650c1628383 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -138,7 +138,7 @@ do { \ _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ } while (0) -#define _FP_ROUND_ZERO(wc, X) 0 +#define _FP_ROUND_ZERO(wc, X) (void)0 #define _FP_ROUND_PINF(wc, X) \ do { \ From aa3baaae553dc780b742f75b6a8424ed5a4afe31 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 14 Nov 2018 18:19:51 +0100 Subject: [PATCH 1488/3715] rtc: max8997: Fix the returned value in case of error in 'max8997_rtc_read_alarm()' [ Upstream commit 41ef3878203cd9218d92eaa07df4b85a2cb128fb ] In case of error, we return 0. This is spurious and not consistent with the other functions of the driver. Propagate the error code instead. Signed-off-by: Christophe JAILLET Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-max8997.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index db984d4bf952..4cce5bd448f6 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -221,7 +221,7 @@ static int max8997_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) out: mutex_unlock(&info->lock); - return 0; + return ret; } static int max8997_rtc_stop_alarm(struct max8997_rtc_info *info) From ecebcf49dded44233c891da987cc2b6224b2ff90 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 19 Nov 2018 14:34:02 +0200 Subject: [PATCH 1489/3715] rtc: dt-binding: abx80x: fix resistance scale [ Upstream commit 73852e56827f5cb5db9d6e8dd8191fc2f2e8f424 ] The abracon,tc-resistor property value is in kOhm. Signed-off-by: Baruch Siach Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/rtc/abracon,abx80x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt index be789685a1c2..18b892d010d8 100644 --- a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt +++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt @@ -27,4 +27,4 @@ and valid to enable charging: - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V) - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output - resistor, the other values are in ohm. + resistor, the other values are in kOhm. From 16d1e1892fffe5a319f24e14232d32ce47ce92a4 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 20 Nov 2018 16:54:28 +0100 Subject: [PATCH 1490/3715] ARM: dts: exynos: Use Samsung SoC specific compatible for DWC2 module [ Upstream commit 6035cbcceb069f87296b3cd0bc4736ad5618bf47 ] DWC2 hardware module integrated in Samsung SoCs requires some quirks to operate properly, so use Samsung SoC specific compatible to notify driver to apply respective fixes. Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos3250.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index aa06a02c3ff5..5ba662254909 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -359,7 +359,7 @@ }; hsotg: hsotg@12480000 { - compatible = "snps,dwc2"; + compatible = "samsung,s3c6400-hsotg", "snps,dwc2"; reg = <0x12480000 0x20000>; interrupts = ; clocks = <&cmu CLK_USBOTG>; From 4fb387b553ec2f34eddd2ba64feaa406ff3a5845 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Nov 2018 08:25:53 -0500 Subject: [PATCH 1491/3715] media: pulse8-cec: return 0 when invalidating the logical address [ Upstream commit 2e84eb9affac43eeaf834992888b72426a8cd442 ] Return 0 when invalidating the logical address. The cec core produces a warning for drivers that do this. Signed-off-by: Hans Verkuil Reported-by: Torbjorn Jansson Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/pulse8-cec/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index 50146f263d90..12da631c0fda 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -585,7 +585,7 @@ unlock: else pulse8->config_pending = true; mutex_unlock(&pulse8->config_lock); - return err; + return log_addr == CEC_LOG_ADDR_INVALID ? 0 : err; } static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, From fdb3d54ff703adbb6755be444bb9c5dd8f5d7d7b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 16 Oct 2018 03:44:20 -0400 Subject: [PATCH 1492/3715] media: cec: report Vendor ID after initialization [ Upstream commit 7f02ac77c768ba2bcdd0ce719c1fca0870ffe2fb ] The CEC specification requires that the Vendor ID (if any) is reported after a logical address was claimed. This was never done, so add support for this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/cec-adap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index f8a808d45034..27e57915eb4d 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1403,6 +1403,13 @@ configured: las->log_addr[i], cec_phys_addr_exp(adap->phys_addr)); cec_transmit_msg_fh(adap, &msg, NULL, false); + + /* Report Vendor ID */ + if (adap->log_addrs.vendor_id != CEC_VENDOR_ID_NONE) { + cec_msg_device_vendor_id(&msg, + adap->log_addrs.vendor_id); + cec_transmit_msg_fh(adap, &msg, NULL, false); + } } adap->kthread_config = NULL; complete(&adap->config_completion); From 3efad7bca56705b36f95d54422cdabf979113635 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 6 Nov 2018 11:33:48 +0800 Subject: [PATCH 1493/3715] dmaengine: coh901318: Fix a double-lock bug [ Upstream commit 627469e4445b9b12e0229b3bdf8564d5ce384dd7 ] The function coh901318_alloc_chan_resources() calls spin_lock_irqsave() before calling coh901318_config(). But coh901318_config() calls spin_lock_irqsave() again in its definition, which may cause a double-lock bug. Because coh901318_config() is only called by coh901318_alloc_chan_resources(), the bug fix is to remove the calls to spin-lock and -unlock functions in coh901318_config(). Signed-off-by: Jia-Ju Bai Reviewed-by: Linus Walleij Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/coh901318.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 74794c9859f6..a8886abf0c75 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1802,8 +1802,6 @@ static int coh901318_config(struct coh901318_chan *cohc, int channel = cohc->id; void __iomem *virtbase = cohc->base->virtbase; - spin_lock_irqsave(&cohc->lock, flags); - if (param) p = param; else @@ -1823,8 +1821,6 @@ static int coh901318_config(struct coh901318_chan *cohc, coh901318_set_conf(cohc, p->config); coh901318_set_ctrl(cohc, p->ctrl_lli_last); - spin_unlock_irqrestore(&cohc->lock, flags); - return 0; } From 2a1723f99c3b9d19361b70660a29e0f93577a424 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 26 Nov 2018 13:34:15 +0530 Subject: [PATCH 1494/3715] dmaengine: coh901318: Remove unused variable commit 35faaf0df42d285b40f8a6310afbe096720f7758 upstream. Commit 627469e4445b ("dmaengine: coh901318: Fix a double-lock bug") left flags variable unused, so remove it to fix the warning. drivers/dma/coh901318.c: In function 'coh901318_config': drivers/dma/coh901318.c:1805:16: warning: unused variable 'flags' [-Wunused-variable] unsigned long flags; ^~~~~ Fixes: 627469e4445b ("dmaengine: coh901318: Fix a double-lock bug") Reported-By: Stephen Rothwell Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/coh901318.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index a8886abf0c75..6d7d2d54eacf 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1797,7 +1797,6 @@ static struct dma_chan *coh901318_xlate(struct of_phandle_args *dma_spec, static int coh901318_config(struct coh901318_chan *cohc, struct coh901318_params *param) { - unsigned long flags; const struct coh901318_params *p; int channel = cohc->id; void __iomem *virtbase = cohc->base->virtbase; From 2d91b55213fd41c116013dc39d9ec20f2011c9ea Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 17 Nov 2018 17:17:21 +0100 Subject: [PATCH 1495/3715] dmaengine: dw-dmac: implement dma protection control setting [ Upstream commit 7b0c03ecc42fb223baf015877fee9d517c2c8af1 ] This patch adds a new device-tree property that allows to specify the dma protection control bits for the all of the DMA controller's channel uniformly. Setting the "correct" bits can have a huge impact on the PPC460EX and APM82181 that use this DMA engine in combination with a DesignWare' SATA-II core (sata_dwc_460ex driver). In the OpenWrt Forum, the user takimata reported that: |It seems your patch unleashed the full power of the SATA port. |Where I was previously hitting a really hard limit at around |82 MB/s for reading and 27 MB/s for writing, I am now getting this: | |root@OpenWrt:/mnt# time dd if=/dev/zero of=tempfile bs=1M count=1024 |1024+0 records in |1024+0 records out |real 0m 13.65s |user 0m 0.01s |sys 0m 11.89s | |root@OpenWrt:/mnt# time dd if=tempfile of=/dev/null bs=1M count=1024 |1024+0 records in |1024+0 records out |real 0m 8.41s |user 0m 0.01s |sys 0m 4.70s | |This means: 121 MB/s reading and 75 MB/s writing! | |The drive is a WD Green WD10EARX taken from an older MBL Single. |I repeated the test a few times with even larger files to rule out |any caching, I'm still seeing the same great performance. OpenWrt is |now completely on par with the original MBL firmware's performance. Another user And.short reported: |I can report that your fix worked! Boots up fine with two |drives even with more partitions, and no more reboot on |concurrent disk access! A closer look into the sata_dwc_460ex code revealed that the driver did initally set the correct protection control bits. However, this feature was lost when the sata_dwc_460ex driver was converted to the generic DMA driver framework. BugLink: https://forum.openwrt.org/t/wd-mybook-live-duo-two-disks/16195/55 BugLink: https://forum.openwrt.org/t/wd-mybook-live-duo-two-disks/16195/50 Fixes: 8b3444852a2b ("sata_dwc_460ex: move to generic DMA driver") Reviewed-by: Andy Shevchenko Signed-off-by: Christian Lamparter Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dw/core.c | 2 ++ drivers/dma/dw/platform.c | 6 ++++++ drivers/dma/dw/regs.h | 4 ++++ include/linux/platform_data/dma-dw.h | 6 ++++++ 4 files changed, 18 insertions(+) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 0f389e008ce6..055d83b6cb68 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -160,12 +160,14 @@ static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc) static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); bool hs_polarity = dwc->dws.hs_polarity; cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); /* Set polarity of handshake interface */ cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index bc31fe802061..46a519e07195 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -162,6 +162,12 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->multi_block[tmp] = 1; } + if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) { + if (tmp > CHAN_PROTCTL_MASK) + return NULL; + pdata->protctl = tmp; + } + return pdata; } #else diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 09e7dfdbb790..646c9c960c07 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -200,6 +200,10 @@ enum dw_dma_msize { #define DWC_CFGH_FCMODE (1 << 0) #define DWC_CFGH_FIFO_MODE (1 << 1) #define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_PROTCTL_DATA (0 << 2) /* data access - always set */ +#define DWC_CFGH_PROTCTL_PRIV (1 << 2) /* privileged -> AHB HPROT[1] */ +#define DWC_CFGH_PROTCTL_BUFFER (2 << 2) /* bufferable -> AHB HPROT[2] */ +#define DWC_CFGH_PROTCTL_CACHE (4 << 2) /* cacheable -> AHB HPROT[3] */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) #define DWC_CFGH_SRC_PER(x) ((x) << 7) diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 896cb71a382c..1a1d58ebffbf 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -49,6 +49,7 @@ struct dw_dma_slave { * @data_width: Maximum data width supported by hardware per AHB master * (in bytes, power of 2) * @multi_block: Multi block transfers supported by hardware per channel. + * @protctl: Protection control signals setting per channel. */ struct dw_dma_platform_data { unsigned int nr_channels; @@ -65,6 +66,11 @@ struct dw_dma_platform_data { unsigned char nr_masters; unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; unsigned char multi_block[DW_DMA_MAX_NR_CHANNELS]; +#define CHAN_PROTCTL_PRIVILEGED BIT(0) +#define CHAN_PROTCTL_BUFFERABLE BIT(1) +#define CHAN_PROTCTL_CACHEABLE BIT(2) +#define CHAN_PROTCTL_MASK GENMASK(2, 0) + unsigned char protctl; }; #endif /* _PLATFORM_DATA_DMA_DW_H */ From 0b741a6cb29b4ff2c8b9bce0f9210274e5dab1c8 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 7 Nov 2018 17:55:19 -0800 Subject: [PATCH 1496/3715] usb: dwc3: debugfs: Properly print/set link state for HS [ Upstream commit 0d36dede457873404becd7c9cb9d0f2bcfd0dcd9 ] Highspeed device and below has different state names than superspeed and higher. Add proper checks and printouts of link states for highspeed and below. Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/debug.h | 29 +++++++++++++++++++++++++++++ drivers/usb/dwc3/debugfs.c | 19 +++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index 5e9c070ec874..1b4c2f8bb3da 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -124,6 +124,35 @@ dwc3_gadget_link_string(enum dwc3_link_state link_state) } } +/** + * dwc3_gadget_hs_link_string - returns highspeed and below link name + * @link_state: link state code + */ +static inline const char * +dwc3_gadget_hs_link_string(enum dwc3_link_state link_state) +{ + switch (link_state) { + case DWC3_LINK_STATE_U0: + return "On"; + case DWC3_LINK_STATE_U2: + return "Sleep"; + case DWC3_LINK_STATE_U3: + return "Suspend"; + case DWC3_LINK_STATE_SS_DIS: + return "Disconnected"; + case DWC3_LINK_STATE_RX_DET: + return "Early Suspend"; + case DWC3_LINK_STATE_RECOV: + return "Recovery"; + case DWC3_LINK_STATE_RESET: + return "Reset"; + case DWC3_LINK_STATE_RESUME: + return "Resume"; + default: + return "UNKNOWN link state\n"; + } +} + /** * dwc3_trb_type_string - returns TRB type as a string * @type: the type of the TRB diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c index 4e09be80e59f..0d6a6a168a7e 100644 --- a/drivers/usb/dwc3/debugfs.c +++ b/drivers/usb/dwc3/debugfs.c @@ -436,13 +436,17 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused) unsigned long flags; enum dwc3_link_state state; u32 reg; + u8 speed; spin_lock_irqsave(&dwc->lock, flags); reg = dwc3_readl(dwc->regs, DWC3_DSTS); state = DWC3_DSTS_USBLNKST(reg); - spin_unlock_irqrestore(&dwc->lock, flags); + speed = reg & DWC3_DSTS_CONNECTSPD; - seq_printf(s, "%s\n", dwc3_gadget_link_string(state)); + seq_printf(s, "%s\n", (speed >= DWC3_DSTS_SUPERSPEED) ? + dwc3_gadget_link_string(state) : + dwc3_gadget_hs_link_string(state)); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; } @@ -460,6 +464,8 @@ static ssize_t dwc3_link_state_write(struct file *file, unsigned long flags; enum dwc3_link_state state = 0; char buf[32]; + u32 reg; + u8 speed; if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; @@ -480,6 +486,15 @@ static ssize_t dwc3_link_state_write(struct file *file, return -EINVAL; spin_lock_irqsave(&dwc->lock, flags); + reg = dwc3_readl(dwc->regs, DWC3_DSTS); + speed = reg & DWC3_DSTS_CONNECTSPD; + + if (speed < DWC3_DSTS_SUPERSPEED && + state != DWC3_LINK_STATE_RECOV) { + spin_unlock_irqrestore(&dwc->lock, flags); + return -EINVAL; + } + dwc3_gadget_set_link_state(dwc, state); spin_unlock_irqrestore(&dwc->lock, flags); From 28dd0f809425e88146072b9ef29fb46f0e909a74 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 7 Nov 2018 12:40:29 -0800 Subject: [PATCH 1497/3715] usb: dwc3: don't log probe deferrals; but do log other error codes [ Upstream commit 408d3ba006af57380fa48858b39f72fde6405031 ] It's not very useful to repeat a bunch of probe deferral errors. And it's also not very useful to log "failed" without telling the error code. Signed-off-by: Brian Norris Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 48755c501201..a497b878c3e2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1261,7 +1261,8 @@ static int dwc3_probe(struct platform_device *pdev) ret = dwc3_core_init(dwc); if (ret) { - dev_err(dev, "failed to initialize core\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to initialize core: %d\n", ret); goto err4; } From fd84a6d04ac436f218e8d1ebc71a9e32c8536dfa Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 23 Nov 2018 23:07:14 +0300 Subject: [PATCH 1498/3715] ACPI: fix acpi_find_child_device() invocation in acpi_preset_companion() [ Upstream commit f8c6d1402b89f22a3647705d63cbd171aa19a77e ] acpi_find_child_device() accepts boolean not pointer as last argument. Signed-off-by: Alexey Dobriyan [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7a9700b9333..4bb3bca75004 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -99,7 +99,7 @@ static inline bool has_acpi_companion(struct device *dev) static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { - ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, NULL)); + ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) From cb4174cc219c6586c76be3529b93c001951f1f07 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 24 Oct 2018 16:08:30 +0800 Subject: [PATCH 1499/3715] f2fs: fix count of seg_freed to make sec_freed correct [ Upstream commit d6c66cd19ef322fe0d51ba09ce1b7f386acab04a ] When sbi->segs_per_sec > 1, and if some segno has 0 valid blocks before gc starts, do_garbage_collect will skip counting seg_freed++, and this will cause seg_freed < sbi->segs_per_sec and finally skip sec_freed++. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/gc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 67120181dc2a..9865f6d52fe4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -952,9 +952,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) goto next; sum = page_address(sum_page); @@ -981,6 +981,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; From 36e1c31a864db4402db215966d925d257169d874 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 30 Oct 2018 20:37:55 +0800 Subject: [PATCH 1500/3715] f2fs: change segment to section in f2fs_ioc_gc_range [ Upstream commit 67b0e42b768c9ddc3fd5ca1aee3db815cfaa635c ] f2fs_ioc_gc_range skips blocks_per_seg each time, however, f2fs_gc moves blocks of section each time, so fix it from segment to section. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d68b0132718a..a90173b856f6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2029,7 +2029,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: From 5bfe5f331b905bdb4b2f8f17c5656d3be0ccb5d0 Mon Sep 17 00:00:00 2001 From: Otavio Salvador Date: Mon, 26 Nov 2018 15:35:03 -0200 Subject: [PATCH 1501/3715] ARM: dts: rockchip: Fix the PMU interrupt number for rv1108 [ Upstream commit c955b7aec510145129ca7aaea6ecbf6d748f5ebf ] According to the Rockchip vendor tree the PMU interrupt number is 76, so fix it accordingly. Signed-off-by: Otavio Salvador Tested-by: Fabio Berton Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rv1108.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rv1108.dtsi b/arch/arm/boot/dts/rv1108.dtsi index e7cd1315db1b..6013d2f888c3 100644 --- a/arch/arm/boot/dts/rv1108.dtsi +++ b/arch/arm/boot/dts/rv1108.dtsi @@ -101,7 +101,7 @@ arm-pmu { compatible = "arm,cortex-a7-pmu"; - interrupts = ; + interrupts = ; }; timer { From 446815353148f9b07b26891cf81e56c652431dc1 Mon Sep 17 00:00:00 2001 From: Otavio Salvador Date: Mon, 26 Nov 2018 15:35:04 -0200 Subject: [PATCH 1502/3715] ARM: dts: rockchip: Assign the proper GPIO clocks for rv1108 [ Upstream commit efc2e0bd9594060915696a418564aefd0270b1d6 ] It is not correct to assign the 24MHz clock oscillator to the GPIO ports. Fix it by assigning the proper GPIO clocks instead. Signed-off-by: Otavio Salvador Tested-by: Fabio Berton Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rv1108.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/rv1108.dtsi b/arch/arm/boot/dts/rv1108.dtsi index 6013d2f888c3..aa4119eaea98 100644 --- a/arch/arm/boot/dts/rv1108.dtsi +++ b/arch/arm/boot/dts/rv1108.dtsi @@ -522,7 +522,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x20030000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO0_PMU>; gpio-controller; #gpio-cells = <2>; @@ -535,7 +535,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10310000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO1>; gpio-controller; #gpio-cells = <2>; @@ -548,7 +548,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10320000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO2>; gpio-controller; #gpio-cells = <2>; @@ -561,7 +561,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10330000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO3>; gpio-controller; #gpio-cells = <2>; From 06f891d78348d389955365d4c199f827ff617723 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 26 Nov 2018 13:31:41 +0530 Subject: [PATCH 1503/3715] f2fs: fix to allow node segment for GC by ioctl path [ Upstream commit 08ac9a3870f6babb2b1fff46118536ca8a71ef19 ] Allow node type segments also to be GC'd via f2fs ioctl F2FS_IOC_GARBAGE_COLLECT_RANGE. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/gc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9865f6d52fe4..c2e4c6ce2cf7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -330,8 +330,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; From 616b460b9eb2fafaa2707c07ed73a9b448a1a126 Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 14:52:18 -0800 Subject: [PATCH 1504/3715] sparc: Correct ctx->saw_frame_pointer logic. [ Upstream commit e2ac579a7a18bcd9e8cf14cf42eac0b8a2ba6c4b ] We need to initialize the frame pointer register not just if it is seen as a source operand, but also if it is seen as the destination operand of a store or an atomic instruction (which effectively is a source operand). This is exercised by test_verifier's "non-invalid fp arithmetic" Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/sparc/net/bpf_jit_comp_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index adfb4581bd80..dfb1a62abe93 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1326,6 +1326,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_2_used = true; emit_loadimm(imm, tmp2, ctx); @@ -1364,6 +1367,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp = bpf2sparc[TMP_REG_1]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + switch (BPF_SIZE(code)) { case BPF_W: opcode = ST32; @@ -1396,6 +1402,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1416,6 +1425,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; From 4d9021f2d4310c248481b815deb6c7ffb31981d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 29 Aug 2018 23:29:21 +0200 Subject: [PATCH 1505/3715] dma-mapping: fix return type of dma_set_max_seg_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c9d76d0655c06b8c1f944e46c4fd9e9cf4b331c0 ] The function dma_set_max_seg_size() can return either 0 on success or -EIO on error. Change its return type from unsigned int to int to capture this. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- include/linux/dma-mapping.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 7bf3b99e6fbb..9aee5f345e29 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -650,8 +650,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; From 3e919ae954cc93e1ce379191045ad1660d94412d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 24 Nov 2018 12:34:10 +0000 Subject: [PATCH 1506/3715] altera-stapl: check for a null key before strcasecmp'ing it [ Upstream commit 9ccb645683ef46e3c52c12c088a368baa58447d4 ] Currently the null check on key is occurring after the strcasecmp on the key, hence there is a potential null pointer dereference on key. Fix this by checking if key is null first. Also replace the == 0 check on strcasecmp with just the ! operator. Detected by CoverityScan, CID#1248787 ("Dereference before null check") Fixes: fa766c9be58b ("[media] Altera FPGA firmware download module") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/altera-stapl/altera.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index f53e217e963f..494e263daa74 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2176,8 +2176,7 @@ static int altera_get_note(u8 *p, s32 program_size, key_ptr = &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])]; - if ((strncasecmp(key, key_ptr, strlen(key_ptr)) == 0) && - (key != NULL)) { + if (key && !strncasecmp(key, key_ptr, strlen(key_ptr))) { status = 0; value_ptr = &p[note_strings + From b287fd473e054a9d0cbc625ca96343e870a61a5e Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 14 Nov 2018 18:49:38 +0100 Subject: [PATCH 1507/3715] serial: imx: fix error handling in console_setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 63fd4b94b948c14eeb27a3bbf50ea0f7f0593bad ] The ipg clock only needs to be unprepared in case preparing per clock fails. The ipg clock has already disabled at the point. Fixes: 1cf93e0d5488 ("serial: imx: remove the uart_console() check") Signed-off-by: Stefan Agner Reviewed-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4e827e5a52a3..aae68230fb7b 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1956,7 +1956,7 @@ imx_console_setup(struct console *co, char *options) retval = clk_prepare(sport->clk_per); if (retval) - clk_disable_unprepare(sport->clk_ipg); + clk_unprepare(sport->clk_ipg); error_console: return retval; From ed799aa6288884ff34ea5bdb3d13d38086b54a83 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 14 Nov 2018 18:29:13 +0100 Subject: [PATCH 1508/3715] i2c: imx: don't print error message on probe defer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fece4978510e43f09c8cd386fee15210e8c68493 ] Probe deferral is a normal operating condition in the probe function, so don't spam the log with an error in this case. Signed-off-by: Lucas Stach Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-imx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index b73dd837fb53..26f83029f64a 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1088,7 +1088,8 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Get I2C clock */ i2c_imx->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(i2c_imx->clk)) { - dev_err(&pdev->dev, "can't get I2C clock\n"); + if (PTR_ERR(i2c_imx->clk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "can't get I2C clock\n"); return PTR_ERR(i2c_imx->clk); } From 43e4008bc77f9b6a674e9b471cb5719d4966d174 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Nov 2018 11:36:52 -0500 Subject: [PATCH 1509/3715] lockd: fix decoding of TEST results [ Upstream commit b8db159239b3f51e2b909859935cc25cb3ff3eed ] We fail to advance the read pointer when reading the stat.oh field that identifies the lock-holder in a TEST result. This turns out not to matter if the server is knfsd, which always returns a zero-length field. But other servers (Ganesha is an example) may not do this. The result is bad values in fcntl F_GETLK results. Fix this. Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- fs/lockd/clnt4xdr.c | 22 ++++++---------------- fs/lockd/clntxdr.c | 22 ++++++---------------- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00d5ef5f99f7..214a2fa1f1e3 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -128,24 +128,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 2c6176387143..747b9c8c940a 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -125,24 +125,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* From 13c82e5c5979adcde94dd02ae9015616d06d280c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 27 Nov 2018 07:35:34 +0000 Subject: [PATCH 1510/3715] ASoC: rsnd: tidyup registering method for rsnd_kctrl_new() [ Upstream commit 9c698e8481a15237a5b1db5f8391dd66d59e42a4 ] Current rsnd dvc.c is using flags to avoid duplicating register for MIXer case. OTOH, commit e894efef9ac7 ("ASoC: core: add support to card rebind") allows to rebind sound card without rebinding all drivers. Because of above patch and dvc.c flags, it can't re-register kctrl if only sound card was rebinded, because dvc is keeping old flags. (Of course it will be no problem if rsnd driver also be rebinded, but it is not purpose of above patch). This patch checks current card registered kctrl when registering. In MIXer case, it can avoid duplicate register if card already has same kctrl. In rebind case, it can re-register kctrl because card registered kctl had been removed when unbinding. This patch is updated version of commit b918f1bc7f1ce ("ASoC: rsnd: DVC kctrl sets once") Reported-by: Nguyen Viet Dung Signed-off-by: Kuninori Morimoto Tested-by: Nguyen Viet Dung Cc: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index ab0bbef7eb48..bb06dd72ca9a 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1278,6 +1278,18 @@ int rsnd_kctrl_new(struct rsnd_mod *mod, }; int ret; + /* + * 1) Avoid duplicate register (ex. MIXer case) + * 2) re-register if card was rebinded + */ + list_for_each_entry(kctrl, &card->controls, list) { + struct rsnd_kctrl_cfg *c = kctrl->private_data; + + if (strcmp(kctrl->id.name, name) == 0 && + c->mod == mod) + return 0; + } + if (size > RSND_MAX_CHANNELS) return -EINVAL; From 17fd415ec51c124bb3fc5be5db538611557d7007 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 7 Sep 2018 16:00:22 +0200 Subject: [PATCH 1511/3715] ARM: dts: sun5i: a10s: Fix HDMI output DTC warning [ Upstream commit ed5fc60b909427be6ca93d3e07a0a5f296d7000a ] Our HDMI output endpoint on the A10s DTSI has a warning under DTC: "graph node has single child node 'endpoint', #address-cells/#size-cells are not necessary". Fix this by removing those properties. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun5i-a10s.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 18f25c5e75ae..396fb6632bf0 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -104,8 +104,6 @@ }; hdmi_out: port@1 { - #address-cells = <1>; - #size-cells = <0>; reg = <1>; }; }; From 51d1d9badf233dc5337ecdc5ed6e5c064eea836f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 7 Nov 2018 10:58:01 +0100 Subject: [PATCH 1512/3715] ARM: dts: sun8i: v3s: Change pinctrl nodes to avoid warning [ Upstream commit 438a44ce7e51ce571f942433c6c7cb87c4c0effd ] All our pinctrl nodes were using a node name convention with a unit-address to differentiate the different muxing options. However, since those nodes didn't have a reg property, they were generating warnings in DTC. In order to accomodate for this, convert the old nodes to the syntax we've been using for the new SoCs, including removing the letter suffix of the node labels to the bank of those pins to make things more readable. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts | 4 ++-- arch/arm/boot/dts/sun8i-v3s.dtsi | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts index 387fc2aa546d..333df90e8037 100644 --- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts @@ -78,7 +78,7 @@ }; &mmc0 { - pinctrl-0 = <&mmc0_pins_a>; + pinctrl-0 = <&mmc0_pins>; pinctrl-names = "default"; broken-cd; bus-width = <4>; @@ -87,7 +87,7 @@ }; &uart0 { - pinctrl-0 = <&uart0_pins_a>; + pinctrl-0 = <&uart0_pb_pins>; pinctrl-names = "default"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 3a06dc5b3746..da5823c6fa3e 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -292,17 +292,17 @@ interrupt-controller; #interrupt-cells = <3>; - i2c0_pins: i2c0 { + i2c0_pins: i2c0-pins { pins = "PB6", "PB7"; function = "i2c0"; }; - uart0_pins_a: uart0@0 { + uart0_pb_pins: uart0-pb-pins { pins = "PB8", "PB9"; function = "uart0"; }; - mmc0_pins_a: mmc0@0 { + mmc0_pins: mmc0-pins { pins = "PF0", "PF1", "PF2", "PF3", "PF4", "PF5"; function = "mmc0"; @@ -310,7 +310,7 @@ bias-pull-up; }; - mmc1_pins: mmc1 { + mmc1_pins: mmc1-pins { pins = "PG0", "PG1", "PG2", "PG3", "PG4", "PG5"; function = "mmc1"; @@ -318,7 +318,7 @@ bias-pull-up; }; - spi0_pins: spi0 { + spi0_pins: spi0-pins { pins = "PC0", "PC1", "PC2", "PC3"; function = "spi0"; }; From 180b858fb530f4c38b3668da16b22f35461a8c70 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 28 Nov 2018 15:25:00 +0800 Subject: [PATCH 1513/3715] dlm: NULL check before kmem_cache_destroy is not needed [ Upstream commit f31a89692830061bceba8469607e4e4b0f900159 ] kmem_cache_destroy(NULL) is safe, so removes NULL check before freeing the mem. This patch also fix ifnullfree.cocci warnings. Signed-off-by: Wen Yang Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/memory.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 7cd24bccd4fe..37be29f21d04 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -38,10 +38,8 @@ int __init dlm_memory_init(void) void dlm_memory_exit(void) { - if (lkb_cache) - kmem_cache_destroy(lkb_cache); - if (rsb_cache) - kmem_cache_destroy(rsb_cache); + kmem_cache_destroy(lkb_cache); + kmem_cache_destroy(rsb_cache); } char *dlm_allocate_lvb(struct dlm_ls *ls) @@ -86,8 +84,7 @@ void dlm_free_lkb(struct dlm_lkb *lkb) struct dlm_user_args *ua; ua = lkb->lkb_ua; if (ua) { - if (ua->lksb.sb_lvbptr) - kfree(ua->lksb.sb_lvbptr); + kfree(ua->lksb.sb_lvbptr); kfree(ua); } } From c02b397e8a82297738dd8cc48bc072f27297ccf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= Date: Tue, 9 Oct 2018 13:28:37 +0200 Subject: [PATCH 1514/3715] ARM: debug: enable UART1 for socfpga Cyclone5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f6628486c8489e91c513b62608f89ccdb745600d ] Cyclone5 and Arria10 doesn't have the same memory map for UART1. Split the SOCFPGA_UART1 into 2 options to allow debugging on UART1 for Cyclone5. Signed-off-by: Clément Péron Signed-off-by: Dinh Nguyen Signed-off-by: Sasha Levin --- arch/arm/Kconfig.debug | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index fd4b679945d3..b14f154919a5 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1023,14 +1023,21 @@ choice Say Y here if you want kernel low-level debugging support on SOCFPGA(Cyclone 5 and Arria 5) based platforms. - config DEBUG_SOCFPGA_UART1 + config DEBUG_SOCFPGA_ARRIA10_UART1 depends on ARCH_SOCFPGA - bool "Use SOCFPGA UART1 for low-level debug" + bool "Use SOCFPGA Arria10 UART1 for low-level debug" select DEBUG_UART_8250 help Say Y here if you want kernel low-level debugging support on SOCFPGA(Arria 10) based platforms. + config DEBUG_SOCFPGA_CYCLONE5_UART1 + depends on ARCH_SOCFPGA + bool "Use SOCFPGA Cyclone 5 UART1 for low-level debug" + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on SOCFPGA(Cyclone 5 and Arria 5) based platforms. config DEBUG_SUN9I_UART0 bool "Kernel low-level debugging messages via sun9i UART0" @@ -1585,7 +1592,8 @@ config DEBUG_UART_PHYS default 0xfe800000 if ARCH_IOP32X default 0xff690000 if DEBUG_RK32_UART2 default 0xffc02000 if DEBUG_SOCFPGA_UART0 - default 0xffc02100 if DEBUG_SOCFPGA_UART1 + default 0xffc02100 if DEBUG_SOCFPGA_ARRIA10_UART1 + default 0xffc03000 if DEBUG_SOCFPGA_CYCLONE5_UART1 default 0xffd82340 if ARCH_IOP13XX default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0 default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2 @@ -1689,7 +1697,8 @@ config DEBUG_UART_VIRT default 0xfeb30c00 if DEBUG_KEYSTONE_UART0 default 0xfeb31000 if DEBUG_KEYSTONE_UART1 default 0xfec02000 if DEBUG_SOCFPGA_UART0 - default 0xfec02100 if DEBUG_SOCFPGA_UART1 + default 0xfec02100 if DEBUG_SOCFPGA_ARRIA10_UART1 + default 0xfec03000 if DEBUG_SOCFPGA_CYCLONE5_UART1 default 0xfec12000 if (DEBUG_MVEBU_UART0 || DEBUG_MVEBU_UART0_ALTERNATE) && ARCH_MVEBU default 0xfec12100 if DEBUG_MVEBU_UART1_ALTERNATE default 0xfec10000 if DEBUG_SIRFATLAS7_UART0 @@ -1737,9 +1746,9 @@ config DEBUG_UART_8250_WORD depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250 depends on DEBUG_UART_8250_SHIFT >= 2 default y if DEBUG_PICOXCELL_UART || \ - DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_UART1 || \ - DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \ - DEBUG_ALPINE_UART0 || \ + DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_ARRIA10_UART1 || \ + DEBUG_SOCFPGA_CYCLONE5_UART1 || DEBUG_KEYSTONE_UART0 || \ + DEBUG_KEYSTONE_UART1 || DEBUG_ALPINE_UART0 || \ DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \ DEBUG_DAVINCI_DA8XX_UART2 || \ DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2 From 9a889ad76bd7db3fb6ef09511d27818970d4bae6 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 6 Nov 2018 13:35:08 -0500 Subject: [PATCH 1515/3715] nfsd: fix a warning in __cld_pipe_upcall() [ Upstream commit b493fd31c0b89d9453917e977002de58bebc3802 ] __cld_pipe_upcall() emits a "do not call blocking ops when !TASK_RUNNING" warning due to the dput() call in rpc_queue_upcall(). Fix it by using a completion instead of hand coding the wait. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- fs/nfsd/nfs4recover.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 66eaeb1e8c2c..dc9586feab31 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -661,7 +661,7 @@ struct cld_net { struct cld_upcall { struct list_head cu_list; struct cld_net *cu_net; - struct task_struct *cu_task; + struct completion cu_done; struct cld_msg cu_msg; }; @@ -670,23 +670,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) { int ret; struct rpc_pipe_msg msg; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg); memset(&msg, 0, sizeof(msg)); msg.data = cmsg; msg.len = sizeof(*cmsg); - /* - * Set task state before we queue the upcall. That prevents - * wake_up_process in the downcall from racing with schedule. - */ - set_current_state(TASK_UNINTERRUPTIBLE); ret = rpc_queue_upcall(pipe, &msg); if (ret < 0) { - set_current_state(TASK_RUNNING); goto out; } - schedule(); + wait_for_completion(&cup->cu_done); if (msg.errno < 0) ret = msg.errno; @@ -753,7 +748,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (copy_from_user(&cup->cu_msg, src, mlen) != 0) return -EFAULT; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); return mlen; } @@ -768,7 +763,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno >= 0) return; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); } static const struct rpc_pipe_ops cld_upcall_ops = { @@ -899,7 +894,7 @@ restart_search: goto restart_search; } } - new->cu_task = current; + init_completion(&new->cu_done); new->cu_msg.cm_vers = CLD_UPCALL_VERSION; put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); new->cu_net = cn; From 917f32f65876caf0984cad6fda41054f835416ea Mon Sep 17 00:00:00 2001 From: Young_X Date: Tue, 27 Nov 2018 06:33:16 +0000 Subject: [PATCH 1516/3715] ASoC: au8540: use 64-bit arithmetic instead of 32-bit [ Upstream commit cd7fdc45bc69a62b4e22c6e875f1f1aea566256d ] Add suffix ULL to constant 256 in order to give the compiler complete information about the proper arithmetic to use. Notice that such constant is used in a context that expects an expression of type u64 (64 bits, unsigned) and the following expression is currently being evaluated using 32-bit arithmetic: 256 * fs * 2 * mclk_src_scaling[i].param Signed-off-by: Young_X Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/nau8540.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index f9c9933acffb..c0c64f90a61b 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -548,7 +548,7 @@ static int nau8540_calc_fll_param(unsigned int fll_in, fvco_max = 0; fvco_sel = ARRAY_SIZE(mclk_src_scaling); for (i = 0; i < ARRAY_SIZE(mclk_src_scaling); i++) { - fvco = 256 * fs * 2 * mclk_src_scaling[i].param; + fvco = 256ULL * fs * 2 * mclk_src_scaling[i].param; if (fvco > NAU_FVCO_MIN && fvco < NAU_FVCO_MAX && fvco_max < fvco) { fvco_max = fvco; From fb1957e583d2c85fd747057a3a26e3b4038383f5 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 19 Nov 2018 21:46:41 +0200 Subject: [PATCH 1517/3715] ARM: OMAP1/2: fix SoC name printing [ Upstream commit 04a92358b3964988c78dfe370a559ae550383886 ] Currently we get extra newlines on OMAP1/2 when the SoC name is printed: [ 0.000000] OMAP1510 [ 0.000000] revision 2 handled as 15xx id: bc058c9b93111a16 [ 0.000000] OMAP2420 [ 0.000000] Fix by using pr_cont. Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap1/id.c | 6 +++--- arch/arm/mach-omap2/id.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap1/id.c b/arch/arm/mach-omap1/id.c index 52de382fc804..7e49dfda3d2f 100644 --- a/arch/arm/mach-omap1/id.c +++ b/arch/arm/mach-omap1/id.c @@ -200,10 +200,10 @@ void __init omap_check_revision(void) printk(KERN_INFO "Unknown OMAP cpu type: 0x%02x\n", cpu_type); } - printk(KERN_INFO "OMAP%04x", omap_revision >> 16); + pr_info("OMAP%04x", omap_revision >> 16); if ((omap_revision >> 8) & 0xff) - printk(KERN_INFO "%x", (omap_revision >> 8) & 0xff); - printk(KERN_INFO " revision %i handled as %02xxx id: %08x%08x\n", + pr_cont("%x", (omap_revision >> 8) & 0xff); + pr_cont(" revision %i handled as %02xxx id: %08x%08x\n", die_rev, omap_revision & 0xff, system_serial_low, system_serial_high); } diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 16cb1c195fd8..79d71b1eae59 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -199,8 +199,8 @@ void __init omap2xxx_check_revision(void) pr_info("%s", soc_name); if ((omap_rev() >> 8) & 0x0f) - pr_info("%s", soc_rev); - pr_info("\n"); + pr_cont("%s", soc_rev); + pr_cont("\n"); } #define OMAP3_SHOW_FEATURE(feat) \ From cf974fcbb73ab049649af31f72a04e6cb44bf87c Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 7 Nov 2018 11:45:47 +0100 Subject: [PATCH 1518/3715] arm64: dts: meson-gxl-libretech-cc: fix GPIO lines names [ Upstream commit 11fa9774612decea87144d7f950a9c53a4fe3050 ] The gpio line names were set in the pinctrl node instead of the gpio node, at the time it was merged, it worked, but was obviously wrong. This patch moves the properties to the gpio nodes. Fixes: 47884c5c746e ("ARM64: dts: meson-gxl-libretech-cc: Add GPIO lines names") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 0814b6b29b86..e2c71753e327 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -139,7 +139,7 @@ }; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Blue LED", @@ -152,7 +152,7 @@ "7J1 Header Pin13"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "", "", "", "", "", "", "", "", "", "", "", "", "", "", From 214f7f64f028ce219e3d25445c42c30d0994d582 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 7 Nov 2018 11:45:48 +0100 Subject: [PATCH 1519/3715] arm64: dts: meson-gxbb-nanopi-k2: fix GPIO lines names [ Upstream commit f0783f5edb52af14ecaae6c5ce4f38e0a358f5d8 ] The gpio line names were set in the pinctrl node instead of the gpio node, at the time it was merged, it worked, but was obviously wrong. This patch moves the properties to the gpio nodes. Fixes: 12ada0513d7a ("ARM64: dts: meson-gxbb-nanopi-k2: Add GPIO lines names") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 4b17a76959b2..c83c028e95af 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -178,7 +178,7 @@ pinctrl-names = "default"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Power Control", "Power Key In", "VCCK En", "CON1 Header Pin31", "I2S Header Pin6", "IR In", "I2S Header Pin7", @@ -186,7 +186,7 @@ "I2S Header Pin5", "HDMI CEC", "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "Eth MDIO", "Eth MDC", "Eth RGMII RX Clk", "Eth RX DV", "Eth RX D0", "Eth RX D1", "Eth RX D2", From 24c97fa190b836c659221fdb83d67756b40875e6 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 7 Nov 2018 11:45:49 +0100 Subject: [PATCH 1520/3715] arm64: dts: meson-gxbb-odroidc2: fix GPIO lines names [ Upstream commit 2165b006b65d609140dafafcb14cce5a4aaacbab ] The gpio line names were set in the pinctrl node instead of the gpio node, at the time it was merged, it worked, but was obviously wrong. This patch moves the properties to the gpio nodes. Fixes: b03c7d6438bb ("ARM64: dts: meson-gxbb-odroidc2: Add GPIO lines names") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index c3c65b06ba76..4ea23df81f21 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -189,7 +189,7 @@ pinctrl-names = "default"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "VCCK En", "TF 3V3/1V8 En", "USB HUB nRESET", "USB OTG Power En", "J7 Header Pin2", "IR In", "J7 Header Pin4", @@ -197,7 +197,7 @@ "HDMI CEC", "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "Eth MDIO", "Eth MDC", "Eth RGMII RX Clk", "Eth RX DV", "Eth RX D0", "Eth RX D1", "Eth RX D2", From 2fe0edbd6f53f186cfbf85cc7b2c5e28fb9bfd53 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 7 Nov 2018 11:45:50 +0100 Subject: [PATCH 1521/3715] arm64: dts: meson-gxl-khadas-vim: fix GPIO lines names [ Upstream commit 5b78012636f537344bd551934387f5772c38ba80 ] The gpio line names were set in the pinctrl node instead of the gpio node, at the time it was merged, it worked, but was obviously wrong. This patch moves the properties to the gpio nodes. Fixes: 60795933b709 ("ARM64: dts: meson-gxl-khadas-vim: Add GPIO lines names") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index edc512ad0bac..fb5db5f33e8c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -112,7 +112,7 @@ linux,rc-map-name = "rc-geekbox"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Power Key In", @@ -125,7 +125,7 @@ "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "", "", "", "", "", "", "", "", "", "", "", "", "", "", From fdda36a1dd007f0bbfccc1f3f4b71a4d5a5e0ced Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 27 Nov 2018 09:50:27 +0100 Subject: [PATCH 1522/3715] net/x25: fix called/calling length calculation in x25_parse_address_block [ Upstream commit d449ba3d581ed29f751a59792fdc775572c66904 ] The length of the called and calling address was not calculated correctly (BCD encoding). Signed-off-by: Martin Schiller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/x25/af_x25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1b830a6ee3ff..6e7ad4c6f83c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, } len = *skb->data; - needed = 1 + (len >> 4) + (len & 0x0f); + needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2; if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims From 87989217d997b31478e5e43e97886fcfbefa7469 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 27 Nov 2018 09:50:28 +0100 Subject: [PATCH 1523/3715] net/x25: fix null_x25_address handling [ Upstream commit 06137619f061f498c2924f6543fa45b7d39f0501 ] o x25_find_listener(): the compare for the null_x25_address was wrong. We have to check the x25_addr of the listener socket instead of the x25_addr of the incomming call. o x25_bind(): it was not possible to bind a socket to null_x25_address Signed-off-by: Martin Schiller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/x25/af_x25.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6e7ad4c6f83c..a156b6dc3a72 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr, sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || - !strcmp(addr->x25_addr, + !strcmp(x25_sk(s)->source_addr.x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { /* @@ -684,11 +684,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - len = strlen(addr->sx25_addr.x25_addr); - for (i = 0; i < len; i++) { - if (!isdigit(addr->sx25_addr.x25_addr[i])) { - rc = -EINVAL; - goto out; + /* check for the null_x25_address */ + if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) { + + len = strlen(addr->sx25_addr.x25_addr); + for (i = 0; i < len; i++) { + if (!isdigit(addr->sx25_addr.x25_addr[i])) { + rc = -EINVAL; + goto out; + } } } From 6ed65deea2aa2df9ba3fa035eb8ef33709064388 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 28 Nov 2018 18:53:06 +0100 Subject: [PATCH 1524/3715] ARM: dts: mmp2: fix the gpio interrupt cell number [ Upstream commit 400583983f8a8e95ec02c9c9e2b50188753a87fb ] gpio-pxa uses two cell to encode the interrupt source: the pin number and the trigger type. Adjust the device node accordingly. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/mmp2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi index 47e5b63339d1..e95deed6a797 100644 --- a/arch/arm/boot/dts/mmp2.dtsi +++ b/arch/arm/boot/dts/mmp2.dtsi @@ -180,7 +180,7 @@ clocks = <&soc_clocks MMP2_CLK_GPIO>; resets = <&soc_clocks MMP2_CLK_GPIO>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; ranges; gcb0: gpio@d4019000 { From 7422156369281a11f0ccbbb70de9d33979942429 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 29 Nov 2018 19:05:47 -0600 Subject: [PATCH 1525/3715] ARM: dts: realview-pbx: Fix duplicate regulator nodes [ Upstream commit 7f4b001b7f6e0480b5bdab9cd8ce1711e43e5cb5 ] There's a bug in dtc in checking for duplicate node names when there's another section (e.g. "/ { };"). In this case, skeleton.dtsi provides another section. Upon removal of skeleton.dtsi, the dtb fails to build due to a duplicate node 'fixedregulator@0'. As both nodes were pretty much the same 3.3V fixed regulator, it hasn't really mattered. Fix this by renaming the nodes to something unique. In the process, drop the unit-address which shouldn't be present wtihout reg property. Cc: Linus Walleij Signed-off-by: Rob Herring Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/arm-realview-pbx.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/arm-realview-pbx.dtsi b/arch/arm/boot/dts/arm-realview-pbx.dtsi index 2bf3958b2e6b..068293254fbb 100644 --- a/arch/arm/boot/dts/arm-realview-pbx.dtsi +++ b/arch/arm/boot/dts/arm-realview-pbx.dtsi @@ -43,7 +43,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -51,7 +51,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; @@ -539,4 +539,3 @@ }; }; }; - From 5b7ff64f5040ced0118fce42b9b95230214ff70a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 28 Nov 2018 16:06:43 -0800 Subject: [PATCH 1526/3715] tcp: fix off-by-one bug on aborting window-probing socket [ Upstream commit 3976535af0cb9fe34a55f2ffb8d7e6b39a2f8188 ] Previously there is an off-by-one bug on determining when to abort a stalled window-probing socket. This patch fixes that so it is consistent with tcp_write_timeout(). Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 592d6e9967a9..04e2c43a43a5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -358,7 +358,7 @@ static void tcp_probe_timer(struct sock *sk) return; } - if (icsk->icsk_probes_out > max_probes) { + if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ From 3dc07e00451b6f8a9021c7de214d620c7ba12f26 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 28 Nov 2018 16:06:44 -0800 Subject: [PATCH 1527/3715] tcp: fix SNMP under-estimation on failed retransmission [ Upstream commit ec641b39457e17774313b66697a8a1dc070257bd ] Previously the SNMP counter LINUX_MIB_TCPRETRANSFAIL is not counting the TSO/GSO properly on failed retransmission. This patch fixes that. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5b808089eff8..6025cc509d97 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2932,7 +2932,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; } else if (err != -EBUSY) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); } return err; } From 2e117bb2abf3e5722d6984e358166c674a70250d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 28 Nov 2018 16:06:45 -0800 Subject: [PATCH 1528/3715] tcp: fix SNMP TCP timeout under-estimation [ Upstream commit e1561fe2dd69dc5dddd69bd73aa65355bdfb048b ] Previously the SNMP TCPTIMEOUTS counter has inconsistent accounting: 1. It counts all SYN and SYN-ACK timeouts 2. It counts timeouts in other states except recurring timeouts and timeouts after fast recovery or disorder state. Such selective accounting makes analysis difficult and complicated. For example the monitoring system needs to collect many other SNMP counters to infer the total amount of timeout events. This patch makes TCPTIMEOUTS counter simply counts all the retransmit timeout (SYN or data or FIN). Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_timer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 04e2c43a43a5..65f66bd585e6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -464,11 +464,12 @@ void tcp_retransmit_timer(struct sock *sk) goto out_reset_timer; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { - int mib_idx; + int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) @@ -483,10 +484,9 @@ void tcp_retransmit_timer(struct sock *sk) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; - } else { - mib_idx = LINUX_MIB_TCPTIMEOUTS; } - __NET_INC_STATS(sock_net(sk), mib_idx); + if (mib_idx) + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); From 4d64f26e2a30dd4aab623182a38cbb90b5460b73 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 21 Nov 2018 13:14:13 -0800 Subject: [PATCH 1529/3715] modpost: skip ELF local symbols during section mismatch check [ Upstream commit a4d26f1a0958bb1c2b60c6f1e67c6f5d43e2647b ] During development of a serial console driver with a gcc 8.2.0 toolchain for RISC-V, the following modpost warning appeared: ---- WARNING: vmlinux.o(.data+0x19b10): Section mismatch in reference from the variable .LANCHOR1 to the function .init.text:sifive_serial_console_setup() The variable .LANCHOR1 references the function __init sifive_serial_console_setup() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console ---- ".LANCHOR1" is an ELF local symbol, automatically created by gcc's section anchor generation code: https://gcc.gnu.org/onlinedocs/gccint/Anchored-Addresses.html https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/varasm.c;h=cd9591a45617464946dcf9a126dde277d9de9804;hb=9fb89fa845c1b2e0a18d85ada0b077c84508ab78#l7473 This was verified by compiling the kernel with -fno-section-anchors and observing that the ".LANCHOR1" ELF local symbol disappeared, and modpost no longer warned about the section mismatch. The serial driver code idiom triggering the warning is standard Linux serial driver practice that has a specific whitelist inclusion in modpost.c. I'm neither a modpost nor an ELF expert, but naively, it doesn't seem useful for modpost to report section mismatch warnings caused by ELF local symbols by default. Local symbols have compiler-generated names, and thus bypass modpost's whitelisting algorithm, which relies on the presence of a non-autogenerated symbol name. This increases the likelihood that false positive warnings will be generated (as in the above case). Thus, disable section mismatch reporting on ELF local symbols. The rationale here is similar to that of commit 2e3a10a1551d ("ARM: avoid ARM binutils leaking ELF local symbols") and of similar code already present in modpost.c: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/mod/modpost.c?h=v4.19-rc4&id=7876320f88802b22d4e2daf7eb027dd14175a0f8#n1256 This third version of the patch implements a suggestion from Masahiro Yamada to restructure the code as an additional pattern matching step inside secref_whitelist(), and further improves the patch description. Signed-off-by: Paul Walmsley Signed-off-by: Paul Walmsley Acked-by: Sam Ravnborg Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index c22041a4fc36..b6eb929899c5 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1174,6 +1174,14 @@ static const struct sectioncheck *section_mismatch( * fromsec = text section * refsymname = *.constprop.* * + * Pattern 6: + * Hide section mismatch warnings for ELF local symbols. The goal + * is to eliminate false positive modpost warnings caused by + * compiler-generated ELF local symbol names such as ".LANCHOR1". + * Autogenerated symbol names bypass modpost's "Pattern 2" + * whitelisting, which relies on pattern-matching against symbol + * names to work. (One situation where gcc can autogenerate ELF + * local symbols is when "-fsection-anchors" is used.) **/ static int secref_whitelist(const struct sectioncheck *mismatch, const char *fromsec, const char *fromsym, @@ -1212,6 +1220,10 @@ static int secref_whitelist(const struct sectioncheck *mismatch, match(fromsym, optim_symbols)) return 0; + /* Check for pattern 6 */ + if (strstarts(fromsym, ".L")) + return 0; + return 1; } From a87bd630bc84586f181f07a90a1f2ad715148a06 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 22 Nov 2018 08:11:54 +0900 Subject: [PATCH 1530/3715] kbuild: fix single target build for external module [ Upstream commit e07db28eea38ed4e332b3a89f3995c86b713cb5b ] Building a single target in an external module fails due to missing .tmp_versions directory. For example, $ make -C /lib/modules/$(uname -r)/build M=$PWD foo.o will fail in the following way: CC [M] /home/masahiro/foo/foo.o /bin/sh: 1: cannot create /home/masahiro/foo/.tmp_versions/foo.mod: Directory nonexistent This is because $(cmd_crmodverdir) is executed only before building /, %/, %.ko single targets of external modules. Create .tmp_versions in the 'prepare' target. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- Makefile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index d97288c0754f..4de172b2e1fb 100644 --- a/Makefile +++ b/Makefile @@ -1529,9 +1529,6 @@ else # KBUILD_EXTMOD # We are always building modules KBUILD_MODULES := 1 -PHONY += crmodverdir -crmodverdir: - $(cmd_crmodverdir) PHONY += $(objtree)/Module.symvers $(objtree)/Module.symvers: @@ -1543,7 +1540,7 @@ $(objtree)/Module.symvers: module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) PHONY += $(module-dirs) modules -$(module-dirs): crmodverdir $(objtree)/Module.symvers +$(module-dirs): prepare $(objtree)/Module.symvers $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) modules: $(module-dirs) @@ -1584,7 +1581,8 @@ help: # Dummies... PHONY += prepare scripts -prepare: ; +prepare: + $(cmd_crmodverdir) scripts: ; endif # KBUILD_EXTMOD @@ -1709,17 +1707,14 @@ endif # Modules /: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) # Make sure the latest headers are built for Documentation Documentation/ samples/: headers_install %/: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) %.ko: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) $(@:.ko=.o) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost From 1c1dc30605fd146e9acfdb92c595d99f7861d14b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Sun, 18 Nov 2018 21:18:30 +0100 Subject: [PATCH 1531/3715] mtd: fix mtd_oobavail() incoherent returned value [ Upstream commit 4348433d8c0234f44adb6e12112e69343f50f0c5 ] mtd_oobavail() returns either mtd->oovabail or mtd->oobsize. Both values are unsigned 32-bit entities, so there is no reason to pretend returning a signed one. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 6cd0f6b7658b..aadc2ee050f1 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -401,7 +401,7 @@ static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) return dev_of_node(&mtd->dev); } -static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) +static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) { return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize; } From 1864086ee0766a905f0e09b81de7d1fd32b44aff Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 1 Dec 2018 14:54:51 +0100 Subject: [PATCH 1532/3715] ARM: dts: pxa: clean up USB controller nodes [ Upstream commit c40ad24254f1dbd54f2df5f5f524130dc1862122 ] PXA25xx SoCs don't have a USB controller, so drop the node from the common pxa2xx.dtsi base file. Both pxa27x and pxa3xx have a dedicated node already anyway. While at it, unify the names for the nodes across all pxa platforms. Signed-off-by: Daniel Mack Reported-by: Sergey Yanovich Link: https://patchwork.kernel.org/patch/8375421/ Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/boot/dts/pxa27x.dtsi | 2 +- arch/arm/boot/dts/pxa2xx.dtsi | 7 ------- arch/arm/boot/dts/pxa3xx.dtsi | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/pxa27x.dtsi b/arch/arm/boot/dts/pxa27x.dtsi index 3228ad5fb725..ccbecad9c5c7 100644 --- a/arch/arm/boot/dts/pxa27x.dtsi +++ b/arch/arm/boot/dts/pxa27x.dtsi @@ -35,7 +35,7 @@ clocks = <&clks CLK_NONE>; }; - pxa27x_ohci: usb@4c000000 { + usb0: usb@4c000000 { compatible = "marvell,pxa-ohci"; reg = <0x4c000000 0x10000>; interrupts = <3>; diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index e4ebcde17837..a03bca81ae8a 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -117,13 +117,6 @@ status = "disabled"; }; - usb0: ohci@4c000000 { - compatible = "marvell,pxa-ohci"; - reg = <0x4c000000 0x10000>; - interrupts = <3>; - status = "disabled"; - }; - mmc0: mmc@41100000 { compatible = "marvell,pxa-mmc"; reg = <0x41100000 0x1000>; diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi index 55c75b67351c..affa5b6f6da1 100644 --- a/arch/arm/boot/dts/pxa3xx.dtsi +++ b/arch/arm/boot/dts/pxa3xx.dtsi @@ -189,7 +189,7 @@ status = "disabled"; }; - pxa3xx_ohci: usb@4c000000 { + usb0: usb@4c000000 { compatible = "marvell,pxa-ohci"; reg = <0x4c000000 0x10000>; interrupts = <3>; From a4adade5dde697f778a3dd44ef7fb000671a1bca Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 30 Nov 2018 13:33:28 +0800 Subject: [PATCH 1533/3715] clk: sunxi-ng: h3/h5: Fix CSI_MCLK parent [ Upstream commit 7bb7d29cffdd24bf419516d14b6768591e74069e ] The third parent of CSI_MCLK is PLL_PERIPH1, not PLL_PERIPH0. Fix it. Fixes: 0577e4853bfb ("clk: sunxi-ng: Add H3 clocks") Acked-by: Stephen Boyd Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 1729ff6a5aae..b09acda71abe 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -460,7 +460,7 @@ static const char * const csi_sclk_parents[] = { "pll-periph0", "pll-periph1" }; static SUNXI_CCU_M_WITH_MUX_GATE(csi_sclk_clk, "csi-sclk", csi_sclk_parents, 0x134, 16, 4, 24, 3, BIT(31), 0); -static const char * const csi_mclk_parents[] = { "osc24M", "pll-video", "pll-periph0" }; +static const char * const csi_mclk_parents[] = { "osc24M", "pll-video", "pll-periph1" }; static SUNXI_CCU_M_WITH_MUX_GATE(csi_mclk_clk, "csi-mclk", csi_mclk_parents, 0x134, 0, 5, 8, 3, BIT(15), 0); From f8c0bf3b812161aedeb6f7050e470a207db9fdf3 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 3 Dec 2018 13:12:48 -0600 Subject: [PATCH 1534/3715] ARM: dts: realview: Fix some more duplicate regulator nodes [ Upstream commit f3b2f758ec1e6cdb13c925647cbd8ad4938b78fb ] There's a bug in dtc in checking for duplicate node names when there's another section (e.g. "/ { };"). In this case, skeleton.dtsi provides another section. Upon removal of skeleton.dtsi, the dtb fails to build due to a duplicate node 'fixedregulator@0'. As both nodes were pretty much the same 3.3V fixed regulator, it hasn't really mattered. Fix this by renaming the nodes to something unique. In the process, drop the unit-address which shouldn't be present wtihout reg property. Signed-off-by: Rob Herring Reviewed-by: Linus Walleij Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/arm-realview-pb1176.dts | 4 ++-- arch/arm/boot/dts/arm-realview-pb11mp.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index c1fd5615ddfe..939c108c24a6 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -45,7 +45,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -53,7 +53,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts index e306f1cceb4e..95037c48182d 100644 --- a/arch/arm/boot/dts/arm-realview-pb11mp.dts +++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts @@ -145,7 +145,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -153,7 +153,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; From f569ce4a1072da843d078d4013e1b446ed4af5c3 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 3 Dec 2018 15:27:37 -0600 Subject: [PATCH 1535/3715] dlm: fix invalid cluster name warning [ Upstream commit 3595c559326d0b660bb088a88e22e0ca630a0e35 ] The warning added in commit 3b0e761ba83 "dlm: print log message when cluster name is not set" did not account for the fact that lockspaces created from userland do not supply a cluster name, so bogus warnings are printed every time a userland lockspace is created. Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 1f0c071d4a86..02de11695d0b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -25,6 +25,7 @@ #include "lvb_table.h" #include "user.h" #include "ast.h" +#include "config.h" static const char name_prefix[] = "dlm"; static const struct file_operations device_fops; @@ -404,7 +405,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = dlm_new_lockspace(params->name, NULL, params->flags, + error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags, DLM_USER_LVB_LEN, NULL, NULL, NULL, &lockspace); if (error) From 0f1e0e050f0b91e9892dd516b4a240a5ccfc7a55 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Sun, 2 Dec 2018 17:40:25 +0200 Subject: [PATCH 1536/3715] net/mlx4_core: Fix return codes of unsupported operations [ Upstream commit 95aac2cdafd8c8298c9b2589c52f44db0d824e0e ] Functions __set_port_type and mlx4_check_port_params returned -EINVAL while the proper return code is -EOPNOTSUPP as a result of an unsupported operation. All drivers should generate this and all users should check for it when detecting an unsupported functionality. Signed-off-by: Erez Alfasi Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index c273a3ebb8e8..12d4b891301b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -199,7 +199,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { mlx4_err(dev, "Only same port types supported on this HCA, aborting\n"); - return -EINVAL; + return -EOPNOTSUPP; } } } @@ -208,7 +208,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n", i + 1); - return -EINVAL; + return -EOPNOTSUPP; } } return 0; @@ -1152,8 +1152,7 @@ static int __set_port_type(struct mlx4_port_info *info, mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); - err = -EINVAL; - goto err_sup; + return -EOPNOTSUPP; } mlx4_stop_sense(mdev); @@ -1175,7 +1174,7 @@ static int __set_port_type(struct mlx4_port_info *info, for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; - err = -EINVAL; + err = -EOPNOTSUPP; } } } @@ -1201,7 +1200,7 @@ static int __set_port_type(struct mlx4_port_info *info, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); -err_sup: + return err; } From 6c22e4aea49acdb156a2126a67a1464584fb88e8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Dec 2018 16:39:01 -0800 Subject: [PATCH 1537/3715] pstore/ram: Avoid NULL deref in ftrace merging failure path [ Upstream commit 8665569e97dd52920713b95675409648986b5b0d ] Given corruption in the ftrace records, it might be possible to allocate tmp_prz without assigning prz to it, but still marking it as needing to be freed, which would cause at least a NULL dereference. smatch warnings: fs/pstore/ram.c:340 ramoops_pstore_read() error: we previously assumed 'prz' could be null (see line 255) https://lists.01.org/pipermail/kbuild-all/2018-December/055528.html Reported-by: Dan Carpenter Fixes: 2fbea82bbb89 ("pstore: Merge per-CPU ftrace records into one") Cc: "Joel Fernandes (Google)" Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 40bfc6c58374..1e675be10926 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -297,6 +297,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) GFP_KERNEL); if (!tmp_prz) return -ENOMEM; + prz = tmp_prz; free_prz = true; while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { @@ -319,7 +320,6 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) goto out; } record->id = 0; - prz = tmp_prz; } } From 1725a88d8f5f2b7826206a11fd91a64ce25e43d0 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 4 Dec 2018 09:37:46 +1030 Subject: [PATCH 1538/3715] powerpc/math-emu: Update macros from GCC [ Upstream commit b682c8692442711684befe413cf93cf01c5324ea ] The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate from GCC's longlong.h which in turn was copied from GMP's longlong.h a few decades ago. This was found when compiling with clang: arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a inline asm context requiring an l-value: remove the cast or build with -fheinous-gnu-extensions FP_ADD_D(R, T, B); ^~~~~~~~~~~~~~~~~ ... ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from macro 'sub_ddmmss' : "=r" ((USItype)(sh)), \ ~~~~~~~~~~^~~ Segher points out: this was fixed in GCC over 16 years ago ( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from) presumably before that. Update the add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros to the latest GCC version in order to git rid of the invalid casts. These were taken as-is from GCC's longlong in order to make future syncs obvious. Other parts of sfp-machine.h were left as-is as the file contains more features than present in longlong.h. Link: https://github.com/ClangBuiltLinux/linux/issues/260 Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/sfp-machine.h | 92 ++++++++------------------ 1 file changed, 29 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h index d89beaba26ff..8b957aabb826 100644 --- a/arch/powerpc/include/asm/sfp-machine.h +++ b/arch/powerpc/include/asm/sfp-machine.h @@ -213,30 +213,18 @@ * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow * (i.e. carry out) is not stored anywhere, and is lost. */ -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ - __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to @@ -248,44 +236,24 @@ * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, * and is lost. */ -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) /* asm fragments for mul and div */ @@ -294,13 +262,10 @@ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype * word product in HIGH_PROD and LOW_PROD. */ -#define umul_ppmm(ph, pl, m0, m1) \ +#define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("mulhwu %0,%1,%2" \ - : "=r" ((USItype)(ph)) \ - : "%r" (__m0), \ - "r" (__m1)); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) @@ -312,9 +277,10 @@ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol * UDIV_NEEDS_NORMALIZATION is defined to 1. */ -#define udiv_qrnnd(q, r, n1, n0, d) \ +#define udiv_qrnnd(q, r, n1, n0, d) \ do { \ - UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ __d1 = __ll_highpart (d); \ __d0 = __ll_lowpart (d); \ \ @@ -325,7 +291,7 @@ if (__r1 < __m) \ { \ __q1--, __r1 += (d); \ - if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ if (__r1 < __m) \ __q1--, __r1 += (d); \ } \ From c6a0ade2549d5a09474143a7b7b5ad9c1e7add20 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 29 Nov 2018 11:06:37 +0100 Subject: [PATCH 1539/3715] clk: renesas: r8a77995: Correct parent clock of DU [ Upstream commit 515b2915ee08060ad4f6a3b3de38c5c2c5258e8b ] According to the R-Car Gen3 Hardware Manual Rev 1.00, the parent clock of the DU module clocks on R-Car D3 is S1D1. Fixes: d71e851d82c6cfe5 ("clk: renesas: cpg-mssr: Add R8A77995 support") Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Reviewed-by: Laurent Pinchart Tested-by: Laurent Pinchart Signed-off-by: Sasha Levin --- drivers/clk/renesas/r8a77995-cpg-mssr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/renesas/r8a77995-cpg-mssr.c b/drivers/clk/renesas/r8a77995-cpg-mssr.c index e594cf8ee63b..8434d5530fb1 100644 --- a/drivers/clk/renesas/r8a77995-cpg-mssr.c +++ b/drivers/clk/renesas/r8a77995-cpg-mssr.c @@ -141,8 +141,8 @@ static const struct mssr_mod_clk r8a77995_mod_clks[] __initconst = { DEF_MOD("vspbs", 627, R8A77995_CLK_S0D1), DEF_MOD("ehci0", 703, R8A77995_CLK_S3D2), DEF_MOD("hsusb", 704, R8A77995_CLK_S3D2), - DEF_MOD("du1", 723, R8A77995_CLK_S2D1), - DEF_MOD("du0", 724, R8A77995_CLK_S2D1), + DEF_MOD("du1", 723, R8A77995_CLK_S1D1), + DEF_MOD("du0", 724, R8A77995_CLK_S1D1), DEF_MOD("lvds", 727, R8A77995_CLK_S2D1), DEF_MOD("vin7", 804, R8A77995_CLK_S1D2), DEF_MOD("vin6", 805, R8A77995_CLK_S1D2), From 37ac694eeab669ec86c55e3dcca065920b5b4b3e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 4 Dec 2018 22:12:17 +0200 Subject: [PATCH 1540/3715] MIPS: OCTEON: cvmx_pko_mem_debug8: use oldest forward compatible definition [ Upstream commit 1c6121c39677175bd372076020948e184bad4b6b ] cn58xx is compatible with cn50xx, so use the latter. Signed-off-by: Aaro Koskinen [paul.burton@mips.com: s/cn52xx/cn50xx/ in commit message.] Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 2 +- arch/mips/include/asm/octeon/cvmx-pko.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c index 8241fc6aa17d..3839feba68f2 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c +++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c @@ -266,7 +266,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id) } else { union cvmx_pko_mem_debug8 debug8; debug8.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG8); - return debug8.cn58xx.doorbell; + return debug8.cn50xx.doorbell; } case CVMX_CMD_QUEUE_ZIP: case CVMX_CMD_QUEUE_DFA: diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h index 5f47f76ed510..20eb9c46a75a 100644 --- a/arch/mips/include/asm/octeon/cvmx-pko.h +++ b/arch/mips/include/asm/octeon/cvmx-pko.h @@ -611,7 +611,7 @@ static inline void cvmx_pko_get_port_status(uint64_t port_num, uint64_t clear, pko_reg_read_idx.s.index = cvmx_pko_get_base_queue(port_num); cvmx_write_csr(CVMX_PKO_REG_READ_IDX, pko_reg_read_idx.u64); debug8.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG8); - status->doorbell = debug8.cn58xx.doorbell; + status->doorbell = debug8.cn50xx.doorbell; } } From b7b8d8e667d0e5b4074bdb68e0faed2218b764de Mon Sep 17 00:00:00 2001 From: zhengbin Date: Fri, 30 Nov 2018 16:04:25 +0800 Subject: [PATCH 1541/3715] nfsd: Return EPERM, not EACCES, in some SETATTR cases [ Upstream commit 255fbca65137e25b12bced18ec9a014dc77ecda0 ] As the man(2) page for utime/utimes states, EPERM is returned when the second parameter of utime or utimes is not NULL, the caller's effective UID does not match the owner of the file, and the caller is not privileged. However, in a NFS directory mounted from knfsd, it will return EACCES (from nfsd_setattr-> fh_verify->nfsd_permission). This patch fixes that. Signed-off-by: zhengbin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- fs/nfsd/vfs.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f55527ef21e8..06d1f2edf2ec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -396,10 +396,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; + } + + /* + * If utimes(2) and friends are called with times not NULL, we should + * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission + * will return EACCESS, when the caller's effective UID does not match + * the owner of the file, and the caller is not privileged. In this + * situation, we should return EPERM(notify_change will return this). + */ + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { + accmode |= NFSD_MAY_OWNER_OVERRIDE; + if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET))) + accmode |= NFSD_MAY_WRITE; + } /* Callers that do fh_verify should do the fh_want_write: */ get_write_count = !fhp->fh_dentry; From ebded7823e7f1aedf9d8b47a5cb006119895a9d2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 1 Nov 2018 00:24:48 +0000 Subject: [PATCH 1542/3715] tty: Don't block on IO when ldisc change is pending [ Upstream commit c96cf923a98d1b094df9f0cf97a83e118817e31b ] There might be situations where tty_ldisc_lock() has blocked, but there is already IO on tty and it prevents line discipline changes. It might theoretically turn into dead-lock. Basically, provide more priority to pending tty_ldisc_lock() than to servicing reads/writes over tty. User-visible issue was reported by Mikulas where on pa-risc with Debian 5 reboot took either 80 seconds, 3 minutes or 3:25 after proper locking in tty_reopen(). Cc: Jiri Slaby Reported-by: Mikulas Patocka Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_hdlc.c | 4 ++-- drivers/tty/n_r3964.c | 2 +- drivers/tty/n_tty.c | 8 ++++---- drivers/tty/tty_ldisc.c | 7 +++++++ include/linux/tty.h | 7 +++++++ 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index e83dea8d6633..19c4aa800c81 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -614,7 +614,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, } /* no data */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; break; } @@ -681,7 +681,7 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, if (tbuf) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { error = -EAGAIN; break; } diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 305b6490d405..08ac04d08991 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -1080,7 +1080,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, pMsg = remove_msg(pInfo, pClient); if (pMsg == NULL) { /* no messages available. */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; goto unlock; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 904fc9c37fde..8214b0326b3a 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1704,7 +1704,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, down_read(&tty->termios_rwsem); - while (1) { + do { /* * When PARMRK is set, each input char may take up to 3 chars * in the read buf; reduce the buffer space avail by 3x @@ -1746,7 +1746,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, fp += n; count -= n; rcvd += n; - } + } while (!test_bit(TTY_LDISC_CHANGING, &tty->flags)); tty->receive_room = room; @@ -2213,7 +2213,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, break; if (!timeout) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } @@ -2367,7 +2367,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, } if (!nr) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 01fcdc7ff077..62dd2abb57fe 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -348,6 +348,11 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; + /* Kindly asking blocked readers to release the read side */ + set_bit(TTY_LDISC_CHANGING, &tty->flags); + wake_up_interruptible_all(&tty->read_wait); + wake_up_interruptible_all(&tty->write_wait); + ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; @@ -358,6 +363,8 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); + /* Can be cleared here - ldisc_unlock will wake up writers firstly */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } diff --git a/include/linux/tty.h b/include/linux/tty.h index 1dd587ba6d88..0cd621d8c7f0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -365,6 +365,7 @@ struct tty_file_private { #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ +#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ @@ -382,6 +383,12 @@ static inline void tty_set_flow_change(struct tty_struct *tty, int val) smp_mb(); } +static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) +{ + return file->f_flags & O_NONBLOCK || + test_bit(TTY_LDISC_CHANGING, &tty->flags); +} + static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); From d3ad47d79540c889c45879082063f6482480063e Mon Sep 17 00:00:00 2001 From: Andreas Pape Date: Fri, 23 Nov 2018 11:14:54 -0500 Subject: [PATCH 1543/3715] media: stkwebcam: Bugfix for wrong return values [ Upstream commit 3c28b91380dd1183347d32d87d820818031ebecf ] usb_control_msg returns in case of a successfully sent message the number of sent bytes as a positive number. Don't use this value as a return value for stk_camera_read_reg, as a non-zero return value is used as an error condition in some cases when stk_camera_read_reg is called. Signed-off-by: Andreas Pape Reviewed-by: Kieran Bingham Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/stkwebcam/stk-webcam.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index a7da1356a36e..6992e84f8a8b 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -164,7 +164,11 @@ int stk_camera_read_reg(struct stk_camera *dev, u16 index, u8 *value) *value = *buf; kfree(buf); - return ret; + + if (ret < 0) + return ret; + else + return 0; } static int stk_start_stream(struct stk_camera *dev) From 2fa2b92afedab6464599db563e574811219d621d Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 21 Nov 2018 21:32:25 -0500 Subject: [PATCH 1544/3715] firmware: qcom: scm: fix compilation error when disabled [ Upstream commit 16ad9501b1f2edebe24f8cf3c09da0695871986b ] This fixes the case when CONFIG_QCOM_SCM is not enabled, and linux/errno.h has not been included previously. Signed-off-by: Jonathan Marek Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- include/linux/qcom_scm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index e5380471c2cd..428278a44c7d 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -44,6 +44,9 @@ extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); #else + +#include + static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) { From de4f3fa347e8d3f7e1cb60389aa2e7d843c17bce Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Thu, 6 Dec 2018 17:44:52 +0000 Subject: [PATCH 1545/3715] mlxsw: spectrum_router: Relax GRE decap matching check [ Upstream commit da93d2913fdf43d5cde3c5a53ac9cc29684d5c7c ] GRE decap offload is configured when local routes prefix correspond to the local address of one of the offloaded GRE tunnels. The matching check was found to be too strict, such that for a flat GRE configuration, in which the overlay and underlay traffic share the same non-default VRF, decap flow was not offloaded. Relax the check for decap flow offloading. A match occurs if the local address of the tunnel matches the local route address while both share the same VRF table. Fixes: 4607f6d26950 ("mlxsw: spectrum_router: Support IPv4 underlay decap") Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3ed4fb346f23..5b9a5c3834d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1252,15 +1252,12 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, { u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; - struct net_device *ipip_ul_dev; if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) return false; - ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, - ul_tb_id, ipip_entry) && - (!ipip_ul_dev || ipip_ul_dev == ul_dev); + ul_tb_id, ipip_entry); } /* Given decap parameters, find the corresponding IPIP entry. */ From d78aaa8896bb2b0dc2979e3e81142f1bebeabf7d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 28 Nov 2018 10:19:04 -0800 Subject: [PATCH 1546/3715] IB/hfi1: Ignore LNI errors before DC8051 transitions to Polling state [ Upstream commit c1a797c0818e0122c7ec8422edd971cfec9b15ea ] When it is requested to change its physical state back to Offline while in the process to go up, DC8051 will set the ERROR field in the DC8051_DBG_ERR_INFO_SET_BY_8051 register. This ERROR field will remain until the next time when DC8051 transitions from Offline to Polling. Subsequently, when the host requests DC8051 to change its physical state to Polling again, it may receive a DC8051 interrupt with the stale ERROR field still in DC8051_DBG_ERR_INFO_SET_BY_8051. If the host link state has been changed to Polling, this stale ERROR will force the host to transition to Offline state, resulting in a vicious cycle of Polling ->Offline->Polling->Offline. On the other hand, if the host link state is still Offline when the stale ERROR is received, the stale ERROR will be ignored, and the link will come up correctly. This patch implements the correct behavior by changing host link state to Polling only after DC8051 changes its physical state to Polling. Reviewed-by: Mike Marciniszyn Signed-off-by: Krzysztof Goreczny Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/chip.c | 47 ++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9dcdc0a8685e..9f78bb07744c 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1074,6 +1074,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10731,13 +10733,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10745,6 +10749,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12870,6 +12882,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) From c4e7247f99e600351bd499eb51ef1141a8b04091 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 28 Nov 2018 10:32:48 -0800 Subject: [PATCH 1547/3715] IB/hfi1: Close VNIC sdma_progress sleep window [ Upstream commit 18912c4524385dd6532c682cb9d4f6aa39ba8d47 ] The call to sdma_progress() is called outside the wait lock. In this case, there is a race condition where sdma_progress() can return false and the sdma_engine can idle. If that happens, there will be no more sdma interrupts to cause the wakeup and the vnic_sdma xmit will hang. Fix by moving the lock to enclose the sdma_progress() call. Also, delete the tx_retry. The need for this was removed by: commit bcad29137a97 ("IB/hfi1: Serve the most starved iowait entry first") Fixes: 64551ede6cd1 ("IB/hfi1: VNIC SDMA support") Reviewed-by: Gary Leshner Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/vnic_sdma.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index c3c96c5869ed..718dcdef946e 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, vnic_sdma->pkts_sent); @@ -238,14 +233,14 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, struct hfi1_vnic_sdma *vnic_sdma = container_of(wait, struct hfi1_vnic_sdma, wait); struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&dev->iowait_lock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&dev->iowait_lock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait, &sde->dmawait); write_sequnlock(&dev->iowait_lock); From 7a15c302ad22cf20308b38aba13ad985690ebed3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 29 Nov 2018 21:18:07 -0500 Subject: [PATCH 1548/3715] mlx4: Use snprintf instead of complicated strcpy [ Upstream commit 0fbc9b8b4ea3f688a5da141a64f97aa33ad02ae9 ] This fixes a compilation warning in sysfs.c drivers/infiniband/hw/mlx4/sysfs.c:360:2: warning: 'strncpy' output may be truncated copying 8 bytes from a string of length 31 [-Wstringop-truncation] By eliminating the temporary stack buffer. Signed-off-by: Qian Cai Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/sysfs.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index e219093d2764..d2da28d613f2 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { From 24390936175399315ecfbdbd9aed5afe291b377a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 7 Dec 2018 03:52:43 +0000 Subject: [PATCH 1549/3715] usb: mtu3: fix dbginfo in qmu_tx_zlp_error_handler [ Upstream commit f770e3bc236ee954a3b4052bdf55739e26ee25db ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/usb/mtu3/mtu3_qmu.c: In function 'qmu_tx_zlp_error_handler': drivers/usb/mtu3/mtu3_qmu.c:385:22: warning: variable 'req' set but not used [-Wunused-but-set-variable] It seems dbginfo original intention is print 'req' other than 'mreq' Acked-by: Chunfeng Yun Signed-off-by: YueHaibing Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/mtu3/mtu3_qmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 7d9ba8a52368..c87947fb2694 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -372,7 +372,7 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } - dev_dbg(mtu->dev, "%s send ZLP for req=%p\n", __func__, mreq); + dev_dbg(mtu->dev, "%s send ZLP for req=%p\n", __func__, req); mtu3_clrbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_DMAREQEN); From 36e6f0ec773437ddc086e2b1b2ff71aa038e46b4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Dec 2018 13:11:42 -0600 Subject: [PATCH 1550/3715] ARM: dts: sunxi: Fix PMU compatible strings [ Upstream commit 5719ac19fc32d892434939c1756c2f9a8322e6ef ] "arm,cortex-a15-pmu" is not a valid fallback compatible string for an Cortex-A7 PMU, so drop it. Cc: Maxime Ripard Cc: Chen-Yu Tsai Signed-off-by: Rob Herring Acked-by: Will Deacon Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun6i-a31.dtsi | 2 +- arch/arm/boot/dts/sun7i-a20.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index eef072a21acc..0bb82d0442a5 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -173,7 +173,7 @@ }; pmu { - compatible = "arm,cortex-a7-pmu", "arm,cortex-a15-pmu"; + compatible = "arm,cortex-a7-pmu"; interrupts = , , , diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 96bee776e145..77f04dbdf996 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -171,7 +171,7 @@ }; pmu { - compatible = "arm,cortex-a7-pmu", "arm,cortex-a15-pmu"; + compatible = "arm,cortex-a7-pmu"; interrupts = , ; }; From 5ca05d9f18dbe4aa15f1417431d371c526be0776 Mon Sep 17 00:00:00 2001 From: Helen Fornazier Date: Fri, 7 Dec 2018 12:56:41 -0500 Subject: [PATCH 1551/3715] media: vimc: fix start stream when link is disabled [ Upstream commit e159b6074c82fe31b79aad672e02fa204dbbc6d8 ] If link is disabled, media_entity_remote_pad returns NULL, causing a NULL pointer deference. Ignore links that are not enabled instead. Signed-off-by: Helen Koike Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/vimc/vimc-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/vimc/vimc-common.c b/drivers/media/platform/vimc/vimc-common.c index 743554de724d..a9ab3871ccda 100644 --- a/drivers/media/platform/vimc/vimc-common.c +++ b/drivers/media/platform/vimc/vimc-common.c @@ -241,6 +241,8 @@ int vimc_pipeline_s_stream(struct media_entity *ent, int enable) /* Start the stream in the subdevice direct connected */ pad = media_entity_remote_pad(&ent->pads[i]); + if (!pad) + continue; if (!is_media_entity_v4l2_subdev(pad->entity)) return -EINVAL; From 29ccdd181733e631003c291f26811cf3b07c9ae7 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 7 Dec 2018 14:00:11 +0000 Subject: [PATCH 1552/3715] net: aquantia: fix RSS table and key sizes [ Upstream commit 474fb1150d40780e71f0b569aeac4f375df3af3d ] Set RSS indirection table and RSS hash key sizes to their real size. Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 57e796870595..ea4b7e97c61e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -40,8 +40,8 @@ #define AQ_CFG_IS_LRO_DEF 1U /* RSS */ -#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX 128U -#define AQ_CFG_RSS_HASHKEY_SIZE 320U +#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX 64U +#define AQ_CFG_RSS_HASHKEY_SIZE 40U #define AQ_CFG_IS_RSS_DEF 1U #define AQ_CFG_NUM_RSS_QUEUES_DEF AQ_CFG_VECS_DEF diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index cc658a29cc33..a69f5f1ad32a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -43,7 +43,7 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) struct aq_rss_parameters *rss_params = &cfg->aq_rss; int i = 0; - static u8 rss_key[40] = { + static u8 rss_key[AQ_CFG_RSS_HASHKEY_SIZE] = { 0x1e, 0xad, 0x71, 0x87, 0x65, 0xfc, 0x26, 0x7d, 0x0d, 0x45, 0x67, 0x74, 0xcd, 0x06, 0x1a, 0x18, 0xb6, 0xc1, 0xf0, 0xc7, 0xbb, 0x18, 0xbe, 0xf8, From 6d9175b95504d28045909631514d06660b4b1a9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Dec 2019 10:20:16 -0800 Subject: [PATCH 1553/3715] tcp: exit if nothing to retransmit on RTO timeout Two upstream commits squashed together for v4.14 stable : commit 88f8598d0a302a08380eadefd09b9f5cb1c4c428 upstream. Previously TCP only warns if its RTO timer fires and the retransmission queue is empty, but it'll cause null pointer reference later on. It's better to avoid such catastrophic failure and simply exit with a warning. Squashed with "tcp: refactor tcp_retransmit_timer()" : commit 0d580fbd2db084a5c96ee9c00492236a279d5e0f upstream. It appears linux-4.14 stable needs a backport of commit 88f8598d0a30 ("tcp: exit if nothing to retransmit on RTO timeout") Since tcp_rtx_queue_empty() is not in pre 4.15 kernels, let's refactor tcp_retransmit_timer() to only use tcp_rtx_queue_head() Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_timer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 65f66bd585e6..895129b0928c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -413,6 +413,7 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb; if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -423,10 +424,13 @@ void tcp_retransmit_timer(struct sock *sk) */ return; } - if (!tp->packets_out) - goto out; - WARN_ON(tcp_write_queue_empty(sk)); + if (!tp->packets_out) + return; + + skb = tcp_rtx_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + return; tp->tlp_high_seq = 0; @@ -459,7 +463,7 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); + tcp_retransmit_skb(sk, skb, 1); __sk_dst_reset(sk); goto out_reset_timer; } From c294780a80b0fb2e92ab4e2fd584029ced994186 Mon Sep 17 00:00:00 2001 From: Xuewei Zhang Date: Thu, 3 Oct 2019 17:12:43 -0700 Subject: [PATCH 1554/3715] sched/fair: Scale bandwidth quota and period without losing quota/period ratio precision commit 4929a4e6faa0f13289a67cae98139e727f0d4a97 upstream. The quota/period ratio is used to ensure a child task group won't get more bandwidth than the parent task group, and is calculated as: normalized_cfs_quota() = [(quota_us << 20) / period_us] If the quota/period ratio was changed during this scaling due to precision loss, it will cause inconsistency between parent and child task groups. See below example: A userspace container manager (kubelet) does three operations: 1) Create a parent cgroup, set quota to 1,000us and period to 10,000us. 2) Create a few children cgroups. 3) Set quota to 1,000us and period to 10,000us on a child cgroup. These operations are expected to succeed. However, if the scaling of 147/128 happens before step 3, quota and period of the parent cgroup will be changed: new_quota: 1148437ns, 1148us new_period: 11484375ns, 11484us And when step 3 comes in, the ratio of the child cgroup will be 104857, which will be larger than the parent cgroup ratio (104821), and will fail. Scaling them by a factor of 2 will fix the problem. Tested-by: Phil Auld Signed-off-by: Xuewei Zhang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Phil Auld Cc: Anton Blanchard Cc: Ben Segall Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vincent Guittot Fixes: 2e8e19226398 ("sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup") Link: https://lkml.kernel.org/r/20191004001243.140897-1-xueweiz@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 67433fbdcb5a..0b4e997fea1a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4655,20 +4655,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (++count > 3) { u64 new, old = ktime_to_ns(cfs_b->period); - new = (old * 147) / 128; /* ~115% */ - new = min(new, max_cfs_quota_period); + /* + * Grow period by a factor of 2 to avoid losing precision. + * Precision loss in the quota/period ratio can cause __cfs_schedulable + * to fail. + */ + new = old * 2; + if (new < max_cfs_quota_period) { + cfs_b->period = ns_to_ktime(new); + cfs_b->quota *= 2; - cfs_b->period = ns_to_ktime(new); - - /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ - cfs_b->quota *= new; - cfs_b->quota = div64_u64(cfs_b->quota, old); - - pr_warn_ratelimited( - "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", - smp_processor_id(), - div_u64(new, NSEC_PER_USEC), - div_u64(cfs_b->quota, NSEC_PER_USEC)); + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + } else { + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(old, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + } /* reset count so we don't come right back in here */ count = 0; From 2c48b0da2b514a715591c5ed3819c8ae828b06e6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Nov 2019 11:49:04 +0100 Subject: [PATCH 1555/3715] fuse: verify nlink commit c634da718db9b2fac201df2ae1b1b095344ce5eb upstream. When adding a new hard link, make sure that i_nlink doesn't overflow. Fixes: ac45d61357e8 ("fuse: fix nlink after unlink") Cc: # v3.4 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b79bba77652a..8f7039f5249d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -830,7 +830,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - inc_nlink(inode); + if (likely(inode->i_nlink < UINT_MAX)) + inc_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_update_ctime(inode); From 79804ebaa31a911a2a5b8267055b8248b5d99bdf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Nov 2019 11:49:04 +0100 Subject: [PATCH 1556/3715] fuse: verify attributes commit eb59bd17d2fa6e5e84fba61a5ebdea984222e6d5 upstream. If a filesystem returns negative inode sizes, future reads on the file were causing the cpu to spin on truncate_pagecache. Create a helper to validate the attributes. This now does two things: - check the file mode - check if the file size fits in i_size without overflowing Reported-by: Arijit Banerjee Fixes: d8a5ba45457e ("[PATCH] FUSE - core") Cc: # v2.6.14 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 24 +++++++++++++++++------- fs/fuse/fuse_i.h | 2 ++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8f7039f5249d..4d95a416fc36 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -234,7 +234,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) kfree(forget); if (ret == -ENOMEM) goto out; - if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + if (ret || fuse_invalid_attr(&outarg.attr) || + (outarg.attr.mode ^ inode->i_mode) & S_IFMT) goto invalid; forget_all_cached_acls(inode); @@ -297,6 +298,12 @@ int fuse_valid_type(int m) S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); } +bool fuse_invalid_attr(struct fuse_attr *attr) +{ + return !fuse_valid_type(attr->mode) || + attr->size > LLONG_MAX; +} + int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode) { @@ -328,7 +335,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name err = -EIO; if (!outarg->nodeid) goto out_put_forget; - if (!fuse_valid_type(outarg->attr.mode)) + if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, @@ -451,7 +458,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, goto out_free_ff; err = -EIO; - if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) + if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || + fuse_invalid_attr(&outentry.attr)) goto out_free_ff; ff->fh = outopen.fh; @@ -557,7 +565,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, goto out_put_forget_req; err = -EIO; - if (invalid_nodeid(outarg.nodeid)) + if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) goto out_put_forget_req; if ((outarg.attr.mode ^ mode) & S_IFMT) @@ -911,7 +919,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, args.out.args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) { - if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + if (fuse_invalid_attr(&outarg.attr) || + (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; } else { @@ -1215,7 +1224,7 @@ static int fuse_direntplus_link(struct file *file, if (invalid_nodeid(o->nodeid)) return -EIO; - if (!fuse_valid_type(o->attr.mode)) + if (fuse_invalid_attr(&o->attr)) return -EIO; fc = get_fuse_conn(dir); @@ -1692,7 +1701,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, goto error; } - if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + if (fuse_invalid_attr(&outarg.attr) || + (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e682f2eff6c0..338aa5e266d6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -896,6 +896,8 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc); */ int fuse_valid_type(int m); +bool fuse_invalid_attr(struct fuse_attr *attr); + /** * Is current process allowed to perform filesystem operation? */ From c9437a4c887bfc2f4bcf8eb0c28eb1e4019acbb3 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 26 Nov 2019 17:04:23 +0800 Subject: [PATCH 1557/3715] ALSA: hda/realtek - Dell headphone has noise on unmute for ALC236 commit e1e8c1fdce8b00fce08784d9d738c60ebf598ebc upstream. headphone have noise even the volume is very small. Let it fill up pcbeep hidden register to default value. The issue was gone. Fixes: 4344aec84bd8 ("ALSA: hda/realtek - New codec support for ALC256") Fixes: 736f20a70608 ("ALSA: hda/realtek - Add support for ALC236/ALC3204") Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/9ae47f23a64d4e41a9c81e263cd8a250@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 404c50ab28fa..41e3c77d5fb7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -333,9 +333,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: case 0x10ec0255: - case 0x10ec0256: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -347,6 +345,11 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; + case 0x10ec0236: + case 0x10ec0256: + alc_write_coef_idx(codec, 0x36, 0x5757); + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + break; case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; From 2a76606d8a830a02ea3a7aef6f5362ceccb8749f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Dec 2019 15:48:24 +0100 Subject: [PATCH 1558/3715] ALSA: pcm: oss: Avoid potential buffer overflows commit 4cc8d6505ab82db3357613d36e6c58a297f57f7c upstream. syzkaller reported an invalid access in PCM OSS read, and this seems to be an overflow of the internal buffer allocated for a plugin. Since the rate plugin adjusts its transfer size dynamically, the calculation for the chained plugin might be bigger than the given buffer size in some extreme cases, which lead to such an buffer overflow as caught by KASAN. Fix it by limiting the max transfer size properly by checking against the destination size in each plugin transfer callback. Reported-by: syzbot+f153bde47a62e0b05f83@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20191204144824.17801-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/linear.c | 2 ++ sound/core/oss/mulaw.c | 2 ++ sound/core/oss/route.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/sound/core/oss/linear.c b/sound/core/oss/linear.c index 2045697f449d..797d838a2f9e 100644 --- a/sound/core/oss/linear.c +++ b/sound/core/oss/linear.c @@ -107,6 +107,8 @@ static snd_pcm_sframes_t linear_transfer(struct snd_pcm_plugin *plugin, } } #endif + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; convert(plugin, src_channels, dst_channels, frames); return frames; } diff --git a/sound/core/oss/mulaw.c b/sound/core/oss/mulaw.c index 7915564bd394..3788906421a7 100644 --- a/sound/core/oss/mulaw.c +++ b/sound/core/oss/mulaw.c @@ -269,6 +269,8 @@ static snd_pcm_sframes_t mulaw_transfer(struct snd_pcm_plugin *plugin, } } #endif + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; data = (struct mulaw_priv *)plugin->extra_data; data->func(plugin, src_channels, dst_channels, frames); return frames; diff --git a/sound/core/oss/route.c b/sound/core/oss/route.c index c8171f5783c8..72dea04197ef 100644 --- a/sound/core/oss/route.c +++ b/sound/core/oss/route.c @@ -57,6 +57,8 @@ static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, return -ENXIO; if (frames == 0) return 0; + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; From eed584fbd956d25ee8bf5b515d750a94537e4a38 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Nov 2019 16:20:35 +0800 Subject: [PATCH 1559/3715] ALSA: hda - Add mute led support for HP ProBook 645 G4 commit e190de6941db14813032af87873f5550ad5764fe upstream. Mic mute led does not work on HP ProBook 645 G4. We can use CXT_FIXUP_MUTE_LED_GPIO fixup to support it. Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20191120082035.18937-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 49be42d27761..382b6d2ed803 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -960,6 +960,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x8402, "HP ProBook 645 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), From 5ddcd540fba9b774d9f220b844f930de8f72a1b8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 22 Nov 2019 16:17:08 -0800 Subject: [PATCH 1560/3715] Input: synaptics - switch another X1 Carbon 6 to RMI/SMbus commit fc1156f373e3927e0dcf06678906c367588bfdd6 upstream. Some Lenovo X1 Carbon Gen 6 laptops report LEN0091. Add this to the smbus_pnp_ids list. Signed-off-by: Hans Verkuil Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191119105118.54285-2-hverkuil-cisco@xs4all.nl Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 7db53eab7012..111a71190547 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -175,6 +175,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ + "LEN0091", /* X1 Carbon 6 */ "LEN0092", /* X1 Carbon 6 */ "LEN0093", /* T480 */ "LEN0096", /* X280 */ From fac3956db21c30b89f73163b2090c66eee6b0834 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 2 Dec 2019 09:37:00 -0800 Subject: [PATCH 1561/3715] Input: synaptics-rmi4 - re-enable IRQs in f34v7_do_reflash commit 86bcd3a12999447faad60ec59c2d64d18d8e61ac upstream. F34 is a bit special as it reinitializes the device and related driver structs during the firmware update. This clears the fn_irq_mask which will then prevent F34 from receiving further interrupts, leading to timeouts during the firmware update. Make sure to reinitialize the IRQ enables at the appropriate times. The issue is in F34 code, but the commit in the fixes tag exposed the issue, as before this commit things would work by accident. Fixes: 363c53875aef (Input: synaptics-rmi4 - avoid processing unknown IRQs) Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20191129133514.23224-1-l.stach@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f34v7.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/rmi4/rmi_f34v7.c b/drivers/input/rmi4/rmi_f34v7.c index 3991d2943660..099dde68e332 100644 --- a/drivers/input/rmi4/rmi_f34v7.c +++ b/drivers/input/rmi4/rmi_f34v7.c @@ -1192,6 +1192,9 @@ int rmi_f34v7_do_reflash(struct f34_data *f34, const struct firmware *fw) { int ret; + f34->fn->rmi_dev->driver->set_irq_bits(f34->fn->rmi_dev, + f34->fn->irq_mask); + rmi_f34v7_read_queries_bl_version(f34); f34->v7.image = fw->data; From d5b244e0d7a66fb0bace68d9f6cb8634c8a04481 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 Dec 2019 10:08:12 -0800 Subject: [PATCH 1562/3715] Input: synaptics-rmi4 - don't increment rmiaddr for SMBus transfers commit a284e11c371e446371675668d8c8120a27227339 upstream. This increment of rmi_smbus in rmi_smb_read/write_block() causes garbage to be read/written. The first read of SMB_MAX_COUNT bytes is fine, but after that it is nonsense. Trial-and-error showed that by dropping the increment of rmiaddr everything is fine and the F54 function properly works. I tried a hack with rmi_smb_write_block() as well (writing to the same F54 touchpad data area, then reading it back), and that suggests that there too the rmiaddr increment has to be dropped. It makes sense that if it has to be dropped for read, then it has to be dropped for write as well. It looks like the initial work with F54 was done using i2c, not smbus, and it seems nobody ever tested F54 with smbus. The other functions all read/write less than SMB_MAX_COUNT as far as I can tell, so this issue was never noticed with non-F54 functions. With this change I can read out the touchpad data correctly on my Lenovo X1 Carbon 6th Gen laptop. Signed-off-by: Hans Verkuil Link: https://lore.kernel.org/r/8dd22e21-4933-8e9c-a696-d281872c8de7@xs4all.nl Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_smbus.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index b6ccf39c6a7b..4b2466cf2fb1 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -166,7 +166,6 @@ static int rmi_smb_write_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to write next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; - rmiaddr += SMB_MAX_COUNT; } exit: mutex_unlock(&rmi_smb->page_mutex); @@ -218,7 +217,6 @@ static int rmi_smb_read_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to read next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; - rmiaddr += SMB_MAX_COUNT; } retval = 0; From 7455c3af9d61dc914b1c2f86d76fa7bc644989ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Dec 2019 09:36:15 -0800 Subject: [PATCH 1563/3715] Input: goodix - add upside-down quirk for Teclast X89 tablet commit df5b5e555b356662a5e4a23c6774fdfce8547d54 upstream. The touchscreen on the Teclast X89 is mounted upside down in relation to the display orientation (the touchscreen itself is mounted upright, but the display is mounted upside-down). Add a quirk for this so that we send coordinates which match the display orientation. Signed-off-by: Hans de Goede Reviewed-by: Bastien Nocera Link: https://lore.kernel.org/r/20191202085636.6650-1-hdegoede@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/goodix.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 2bfa89ec552c..777dd5b159d3 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -92,6 +92,15 @@ static const unsigned long goodix_irq_flags[] = { */ static const struct dmi_system_id rotated_screen[] = { #if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + .ident = "Teclast X89", + .matches = { + /* tPAD is too generic, also match on bios date */ + DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"), + DMI_MATCH(DMI_BOARD_NAME, "tPAD"), + DMI_MATCH(DMI_BIOS_DATE, "12/19/2014"), + }, + }, { .ident = "WinBook TW100", .matches = { From 60c5e0c603dd8edd1b571ae322edf2a0c0f47fc0 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 4 Nov 2019 11:12:42 -0700 Subject: [PATCH 1564/3715] coresight: etm4x: Fix input validation for sysfs. commit 2fe6899e36aa174abefd017887f9cfe0cb60c43a upstream. A number of issues are fixed relating to sysfs input validation:- 1) bb_ctrl_store() - incorrect compare of bit select field to absolute value. Reworked per ETMv4 specification. 2) seq_event_store() - incorrect mask value - register has two event values. 3) cyc_threshold_store() - must mask with max before checking min otherwise wrapped values can set illegal value below min. 4) res_ctrl_store() - update to mask off all res0 bits. Reviewed-by: Leo Yan Reviewed-by: Mathieu Poirier Signed-off-by: Mike Leach Fixes: a77de2637c9eb ("coresight: etm4x: moving sysFS entries to a dedicated file") Cc: stable # 4.9+ Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20191104181251.26732-6-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../coresight/coresight-etm4x-sysfs.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index 4e6eab53e34e..2f021c1a2fa6 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -667,10 +667,13 @@ static ssize_t cyc_threshold_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; + + /* mask off max threshold before checking min value */ + val &= ETM_CYC_THRESHOLD_MASK; if (val < drvdata->ccitmin) return -EINVAL; - config->ccctlr = val & ETM_CYC_THRESHOLD_MASK; + config->ccctlr = val; return size; } static DEVICE_ATTR_RW(cyc_threshold); @@ -701,14 +704,16 @@ static ssize_t bb_ctrl_store(struct device *dev, return -EINVAL; if (!drvdata->nr_addr_cmp) return -EINVAL; + /* - * Bit[7:0] selects which address range comparator is used for - * branch broadcast control. + * Bit[8] controls include(1) / exclude(0), bits[0-7] select + * individual range comparators. If include then at least 1 + * range must be selected. */ - if (BMVAL(val, 0, 7) > drvdata->nr_addr_cmp) + if ((val & BIT(8)) && (BMVAL(val, 0, 7) == 0)) return -EINVAL; - config->bb_ctrl = val; + config->bb_ctrl = val & GENMASK(8, 0); return size; } static DEVICE_ATTR_RW(bb_ctrl); @@ -1341,8 +1346,8 @@ static ssize_t seq_event_store(struct device *dev, spin_lock(&drvdata->spinlock); idx = config->seq_idx; - /* RST, bits[7:0] */ - config->seq_ctrl[idx] = val & 0xFF; + /* Seq control has two masks B[15:8] F[7:0] */ + config->seq_ctrl[idx] = val & 0xFFFF; spin_unlock(&drvdata->spinlock); return size; } @@ -1597,7 +1602,7 @@ static ssize_t res_ctrl_store(struct device *dev, if (idx % 2 != 0) /* PAIRINV, bit[21] */ val &= ~BIT(21); - config->res_ctrl[idx] = val; + config->res_ctrl[idx] = val & GENMASK(21, 0); spin_unlock(&drvdata->spinlock); return size; } From 9ed71720e46c881d803bcb2403e48556841515ce Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 21 Nov 2019 14:01:11 -0600 Subject: [PATCH 1565/3715] Input: Fix memory leak in psxpad_spi_probe In the implementation of psxpad_spi_probe() the allocated memory for pdev is leaked if psxpad_spi_init_ff() or input_register_polled_device() fail. The solution is using device managed allocation, like the one used for pad. Perform the allocation using devm_input_allocate_polled_device(). Fixes: 8be193c7b1f4 ("Input: add support for PlayStation 1/2 joypads connected via SPI") Signed-off-by: Navid Emamdoost Acked-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/psxpad-spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/psxpad-spi.c b/drivers/input/joystick/psxpad-spi.c index 28b473f6cbb6..092096ee06b9 100644 --- a/drivers/input/joystick/psxpad-spi.c +++ b/drivers/input/joystick/psxpad-spi.c @@ -292,7 +292,7 @@ static int psxpad_spi_probe(struct spi_device *spi) if (!pad) return -ENOMEM; - pdev = input_allocate_polled_device(); + pdev = devm_input_allocate_polled_device(&spi->dev); if (!pdev) { dev_err(&spi->dev, "failed to allocate input device\n"); return -ENOMEM; From f14270c56125e0a3e80d89138a4c80f1a8c461b4 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 2 Sep 2019 22:52:52 +0800 Subject: [PATCH 1566/3715] x86/PCI: Avoid AMD FCH XHCI USB PME# from D0 defect commit 7e8ce0e2b036dbc6617184317983aea4f2c52099 upstream. The AMD FCH USB XHCI Controller advertises support for generating PME# while in D0. When in D0, it does signal PME# for USB 3.0 connect events, but not for USB 2.0 or USB 1.1 connect events, which means the controller doesn't wake correctly for those events. 00:10.0 USB controller [0c03]: Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller [1022:7914] (rev 20) (prog-if 30 [XHCI]) Subsystem: Dell FCH USB XHCI Controller [1028:087e] Capabilities: [50] Power Management version 3 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+) Clear PCI_PM_CAP_PME_D0 in dev->pme_support to indicate the device will not assert PME# from D0 so we don't rely on it. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203673 Link: https://lore.kernel.org/r/20190902145252.32111-1-kai.heng.feng@canonical.com Signed-off-by: Kai-Heng Feng Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/fixup.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 4210da7b44de..33e9b4f1ce20 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -588,6 +588,17 @@ static void pci_fixup_amd_ehci_pme(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme); +/* + * Device [1022:7914] + * When in D0, PME# doesn't get asserted when plugging USB 2.0 device. + */ +static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev) +{ + dev_info(&dev->dev, "PME# does not work under D0, disabling it\n"); + dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme); + /* * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff] * From defbcd1f8e852b403985a6e12abea2909fbbdbaa Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 27 Nov 2019 16:18:39 -0800 Subject: [PATCH 1567/3715] CIFS: Fix NULL-pointer dereference in smb2_push_mandatory_locks commit 6f582b273ec23332074d970a7fb25bef835df71f upstream. Currently when the client creates a cifsFileInfo structure for a newly opened file, it allocates a list of byte-range locks with a pointer to the new cfile and attaches this list to the inode's lock list. The latter happens before initializing all other fields, e.g. cfile->tlink. Thus a partially initialized cifsFileInfo structure becomes available to other threads that walk through the inode's lock list. One example of such a thread may be an oplock break worker thread that tries to push all cached byte-range locks. This causes NULL-pointer dereference in smb2_push_mandatory_locks() when accessing cfile->tlink: [598428.945633] BUG: kernel NULL pointer dereference, address: 0000000000000038 ... [598428.945749] Workqueue: cifsoplockd cifs_oplock_break [cifs] [598428.945793] RIP: 0010:smb2_push_mandatory_locks+0xd6/0x5a0 [cifs] ... [598428.945834] Call Trace: [598428.945870] ? cifs_revalidate_mapping+0x45/0x90 [cifs] [598428.945901] cifs_oplock_break+0x13d/0x450 [cifs] [598428.945909] process_one_work+0x1db/0x380 [598428.945914] worker_thread+0x4d/0x400 [598428.945921] kthread+0x104/0x140 [598428.945925] ? process_one_work+0x380/0x380 [598428.945931] ? kthread_park+0x80/0x80 [598428.945937] ret_from_fork+0x35/0x40 Fix this by reordering initialization steps of the cifsFileInfo structure: initialize all the fields first and then add the new byte-range lock list to the inode's lock list. Cc: Stable Signed-off-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 40f22932343c..6dc0e092b0fc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -312,9 +312,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - cifs_down_write(&cinode->lock_sem); - list_add(&fdlocks->llist, &cinode->llist); - up_write(&cinode->lock_sem); cfile->count = 1; cfile->pid = current->tgid; @@ -338,6 +335,10 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, oplock = 0; } + cifs_down_write(&cinode->lock_sem); + list_add(&fdlocks->llist, &cinode->llist); + up_write(&cinode->lock_sem); + spin_lock(&tcon->open_file_lock); if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) oplock = fid->pending_open->oplock; From 0082adb163f54a6aff72e456fd0b789cb7c3216c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 31 Oct 2019 14:18:57 -0700 Subject: [PATCH 1568/3715] CIFS: Fix SMB2 oplock break processing commit fa9c2362497fbd64788063288dc4e74daf977ebb upstream. Even when mounting modern protocol version the server may be configured without supporting SMB2.1 leases and the client uses SMB2 oplock to optimize IO performance through local caching. However there is a problem in oplock break handling that leads to missing a break notification on the client who has a file opened. It latter causes big latencies to other clients that are trying to open the same file. The problem reproduces when there are multiple shares from the same server mounted on the client. The processing code tries to match persistent and volatile file ids from the break notification with an open file but it skips all share besides the first one. Fix this by looking up in all shares belonging to the server that issued the oplock break. Cc: Stable Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 31f01f09d25a..ff2ad15f67d6 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -622,10 +622,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); spin_lock(&tcon->open_file_lock); list_for_each(tmp2, &tcon->openFileList) { cfile = list_entry(tmp2, struct cifsFileInfo, @@ -637,6 +637,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) continue; cifs_dbg(FYI, "file id match, oplock break\n"); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); cinode = CIFS_I(d_inode(cfile->dentry)); spin_lock(&cfile->file_info_lock); if (!CIFS_CACHE_WRITE(cinode) && @@ -669,9 +671,6 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) return true; } spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - cifs_dbg(FYI, "No matching file for oplock break\n"); - return true; } } spin_unlock(&cifs_tcp_ses_lock); From c68ab0f33eabef9c9d4206cec2fb6509d1d5b621 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 22 Nov 2019 12:42:20 -0800 Subject: [PATCH 1569/3715] tty: vt: keyboard: reject invalid keycodes commit b2b2dd71e0859436d4e05b2f61f86140250ed3f8 upstream. Do not try to handle keycodes that are too big, otherwise we risk doing out-of-bounds writes: BUG: KASAN: global-out-of-bounds in clear_bit include/asm-generic/bitops-instrumented.h:56 [inline] BUG: KASAN: global-out-of-bounds in kbd_keycode drivers/tty/vt/keyboard.c:1411 [inline] BUG: KASAN: global-out-of-bounds in kbd_event+0xe6b/0x3790 drivers/tty/vt/keyboard.c:1495 Write of size 8 at addr ffffffff89a1b2d8 by task syz-executor108/1722 ... kbd_keycode drivers/tty/vt/keyboard.c:1411 [inline] kbd_event+0xe6b/0x3790 drivers/tty/vt/keyboard.c:1495 input_to_handler+0x3b6/0x4c0 drivers/input/input.c:118 input_pass_values.part.0+0x2e3/0x720 drivers/input/input.c:145 input_pass_values drivers/input/input.c:949 [inline] input_set_keycode+0x290/0x320 drivers/input/input.c:954 evdev_handle_set_keycode_v2+0xc4/0x120 drivers/input/evdev.c:882 evdev_do_ioctl drivers/input/evdev.c:1150 [inline] In this case we were dealing with a fuzzed HID device that declared over 12K buttons, and while HID layer should not be reporting to us such big keycodes, we should also be defensive and reject invalid data ourselves as well. Reported-by: syzbot+19340dff067c2d3835c0@syzkaller.appspotmail.com Signed-off-by: Dmitry Torokhov Cc: stable Link: https://lore.kernel.org/r/20191122204220.GA129459@dtor-ws Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 7506bbcf8259..b9ec4e2828e2 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1460,7 +1460,7 @@ static void kbd_event(struct input_handle *handle, unsigned int event_type, if (event_type == EV_MSC && event_code == MSC_RAW && HW_RAW(handle->dev)) kbd_rawcode(value); - if (event_type == EV_KEY) + if (event_type == EV_KEY && event_code <= KEY_MAX) kbd_keycode(event_code, value, HW_RAW(handle->dev)); spin_unlock(&kbd_event_lock); From fd6eec4ce9d9f027ce5ece1fa880ad4eb516bdd9 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 27 Nov 2019 08:40:26 +0200 Subject: [PATCH 1570/3715] can: slcan: Fix use-after-free Read in slcan_open commit 9ebd796e24008f33f06ebea5a5e6aceb68b51794 upstream. Slcan_open doesn't clean-up device which registration failed from the slcan_devs device list. On next open this list is iterated and freed device is accessed. Fix this by calling slc_free_netdev in error path. Driver/net/can/slcan.c is derived from slip.c. Use-after-free error was identified in slip_open by syzboz. Same bug is in slcan.c. Here is the trace from the Syzbot slip report: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:634 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:132 sl_sync drivers/net/slip/slip.c:725 [inline] slip_open+0xecd/0x11b7 drivers/net/slip/slip.c:801 tty_ldisc_open.isra.0+0xa3/0x110 drivers/tty/tty_ldisc.c:469 tty_set_ldisc+0x30e/0x6b0 drivers/tty/tty_ldisc.c:596 tiocsetd drivers/tty/tty_io.c:2334 [inline] tty_ioctl+0xe8d/0x14f0 drivers/tty/tty_io.c:2594 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xdb6/0x13e0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: ed50e1600b44 ("slcan: Fix memory leak in error path") Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: David Miller Cc: Oliver Hartkopp Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Cc: linux-stable # >= v5.4 Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 49427f44dc5b..a42737b4ac79 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -613,6 +613,7 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + slc_free_netdev(sl->dev); free_netdev(sl->dev); err_exit: From dc9a5fd2f3053eeb6ce6e0b7d9d5ec4af13a0cd3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:29 -0800 Subject: [PATCH 1571/3715] kernfs: fix ino wrap-around detection commit e23f568aa63f64cd6b355094224cc9356c0f696b upstream. When the 32bit ino wraps around, kernfs increments the generation number to distinguish reused ino instances. The wrap-around detection tests whether the allocated ino is lower than what the cursor but the cursor is pointing to the next ino to allocate so the condition never triggers. Fix it by remembering the last ino and comparing against that. Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Fixes: 4a3ef68acacf ("kernfs: implement i_generation") Cc: Namhyung Kim Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 5 ++--- include/linux/kernfs.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 9e9117533fd7..8697b750b1c9 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -623,7 +623,6 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, { struct kernfs_node *kn; u32 gen; - int cursor; int ret; name = kstrdup_const(name, GFP_KERNEL); @@ -636,11 +635,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, idr_preload(GFP_KERNEL); spin_lock(&kernfs_idr_lock); - cursor = idr_get_cursor(&root->ino_idr); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); - if (ret >= 0 && ret < cursor) + if (ret >= 0 && ret < root->last_ino) root->next_generation++; gen = root->next_generation; + root->last_ino = ret; spin_unlock(&kernfs_idr_lock); idr_preload_end(); if (ret < 0) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index ab25c8b6d9e3..5e539e5bb70c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -185,6 +185,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct idr ino_idr; + u32 last_ino; u32 next_generation; struct kernfs_syscall_ops *syscall_ops; From 9d0c2dc1a3a54cecddaafe2b5aa29f4698b6d912 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:07 +0100 Subject: [PATCH 1572/3715] jbd2: Fix possible overflow in jbd2_log_space_left() commit add3efdd78b8a0478ce423bb9d4df6bd95e8b335 upstream. When number of free space in the journal is very low, the arithmetic in jbd2_log_space_left() could underflow resulting in very high number of free blocks and thus triggering assertion failure in transaction commit code complaining there's not enough space in the journal: J_ASSERT(journal->j_free > 1); Properly check for the low number of free blocks. CC: stable@vger.kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 11b3ab68f6a7..cb41329a3ee4 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1584,7 +1584,7 @@ static inline int jbd2_space_needed(journal_t *journal) static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ - unsigned long free = journal->j_free - 32; + long free = journal->j_free - 32; if (journal->j_committing_transaction) { unsigned long committing = atomic_read(&journal-> @@ -1593,7 +1593,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) /* Transaction + control blocks */ free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); } - return free; + return max_t(long, free, 0); } /* From 48612e9cc0a0c8c676c59f6a3ae0d9b46f7ccee7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Oct 2019 13:22:51 +0300 Subject: [PATCH 1573/3715] drm/i810: Prevent underflow in ioctl commit 4f69851fbaa26b155330be35ce8ac393e93e7442 upstream. The "used" variables here come from the user in the ioctl and it can be negative. It could result in an out of bounds write. Signed-off-by: Dan Carpenter Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191004102251.GC823@mwanda Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i810/i810_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 576a417690d4..128d6cfb7bbb 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -721,7 +721,7 @@ static void i810_dma_dispatch_vertex(struct drm_device *dev, if (nbox > I810_NR_SAREA_CLIPRECTS) nbox = I810_NR_SAREA_CLIPRECTS; - if (used > 4 * 1024) + if (used < 0 || used > 4 * 1024) used = 0; if (sarea_priv->dirty) @@ -1041,7 +1041,7 @@ static void i810_dma_dispatch_mc(struct drm_device *dev, struct drm_buf *buf, in if (u != I810_BUF_CLIENT) DRM_DEBUG("MC found buffer that isn't mine!\n"); - if (used > 4 * 1024) + if (used < 0 || used > 4 * 1024) used = 0; sarea_priv->dirty = 0x7f; From 69d2eb21297bffd1a2ba6807ac030b42f73dce1c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 29 Oct 2019 15:19:19 +0800 Subject: [PATCH 1574/3715] KVM: arm/arm64: vgic: Don't rely on the wrong pending table commit ca185b260951d3b55108c0b95e188682d8a507b7 upstream. It's possible that two LPIs locate in the same "byte_offset" but target two different vcpus, where their pending status are indicated by two different pending tables. In such a scenario, using last_byte_offset optimization will lead KVM relying on the wrong pending table entry. Let us use last_ptr instead, which can be treated as a byte index into a pending table and also, can be vcpu specific. Fixes: 280771252c1b ("KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES") Cc: stable@vger.kernel.org Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Acked-by: Eric Auger Link: https://lore.kernel.org/r/20191029071919.177-4-yuzenghui@huawei.com Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 084edc9dc553..f16a55012ea3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -331,8 +331,8 @@ retry: int vgic_v3_save_pending_tables(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - int last_byte_offset = -1; struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; int ret; u8 val; @@ -352,11 +352,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - if (byte_offset != last_byte_offset) { + if (ptr != last_ptr) { ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; - last_byte_offset = byte_offset; + last_ptr = ptr; } stored = val & (1U << bit_nr); From a5591672b05c953ec07f6aec042d25d58956e553 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Nov 2019 12:23:00 -0500 Subject: [PATCH 1575/3715] KVM: x86: do not modify masked bits of shared MSRs commit de1fca5d6e0105c9d33924e1247e2f386efc3ece upstream. "Shared MSRs" are guest MSRs that are written to the host MSRs but keep their value until the next return to userspace. They support a mask, so that some bits keep the host value, but this mask is only used to skip an unnecessary MSR write and the value written to the MSR is always the guest MSR. Fix this and, while at it, do not update smsr->values[slot].curr if for whatever reason the wrmsr fails. This should only happen due to reserved bits, so the value written to smsr->values[slot].curr will not match when the user-return notifier and the host value will always be restored. However, it is untidy and in rare cases this can actually avoid spurious WRMSRs on return to userspace. Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Tested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f9360320a82..08ec754561b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -276,13 +276,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); int err; - if (((value ^ smsr->values[slot].curr) & mask) == 0) + value = (value & mask) | (smsr->values[slot].host & ~mask); + if (value == smsr->values[slot].curr) return 0; - smsr->values[slot].curr = value; err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); if (err) return 1; + smsr->values[slot].curr = value; if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); From cd606fb8f575ec2eaca138d9b3ba3b5131d9a875 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Nov 2019 18:58:26 +0100 Subject: [PATCH 1576/3715] KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES commit cbbaa2727aa3ae9e0a844803da7cef7fd3b94f2b upstream. KVM does not implement MSR_IA32_TSX_CTRL, so it must not be presented to the guests. It is also confusing to have !ARCH_CAP_TSX_CTRL_MSR && !RTM && ARCH_CAP_TAA_NO: lack of MSR_IA32_TSX_CTRL suggests TSX was not hidden (it actually was), yet the value says that TSX is not vulnerable to microarchitectural data sampling. Fix both. Cc: stable@vger.kernel.org Tested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 08ec754561b8..8a51442247c5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1113,10 +1113,15 @@ u64 kvm_get_arch_capabilities(void) * If TSX is disabled on the system, guests are also mitigated against * TAA and clear CPU buffer mitigation is not required for guests. */ - if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && - (data & ARCH_CAP_TSX_CTRL_MSR)) + if (!boot_cpu_has(X86_FEATURE_RTM)) + data &= ~ARCH_CAP_TAA_NO; + else if (!boot_cpu_has_bug(X86_BUG_TAA)) + data |= ARCH_CAP_TAA_NO; + else if (data & ARCH_CAP_TSX_CTRL_MSR) data &= ~ARCH_CAP_MDS_NO; + /* KVM does not emulate MSR_IA32_TSX_CTRL. */ + data &= ~ARCH_CAP_TSX_CTRL_MSR; return data; } From 8b161fccec2120511edc840797594cd01ecf872d Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Thu, 31 Oct 2019 17:14:38 +0100 Subject: [PATCH 1577/3715] crypto: crypto4xx - fix double-free in crypto4xx_destroy_sdr commit 746c908c4d72e49068ab216c3926d2720d71a90d upstream. This patch fixes a crash that can happen during probe when the available dma memory is not enough (this can happen if the crypto4xx is built as a module). The descriptor window mapping would end up being free'd twice, once in crypto4xx_build_pdr() and the second time in crypto4xx_destroy_sdr(). Fixes: 5d59ad6eea82 ("crypto: crypto4xx - fix crypto4xx_build_pdr, crypto4xx_build_sdr leak") Cc: Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 8d4d8db244e9..d1d041de7f8a 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -399,12 +399,8 @@ static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev) dma_alloc_coherent(dev->core_dev->device, dev->scatter_buffer_size * PPC4XX_NUM_SD, &dev->scatter_buffer_pa, GFP_ATOMIC); - if (!dev->scatter_buffer_va) { - dma_free_coherent(dev->core_dev->device, - sizeof(struct ce_sd) * PPC4XX_NUM_SD, - dev->sdr, dev->sdr_pa); + if (!dev->scatter_buffer_va) return -ENOMEM; - } sd_array = dev->sdr; From 9fdc6de63a205a3a469be55aa9863e3969e2c791 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Fri, 4 Oct 2019 10:50:58 -0700 Subject: [PATCH 1578/3715] crypto: af_alg - cast ki_complete ternary op to int commit 64e7f852c47ce99f6c324c46d6a299a5a7ebead9 upstream. when libkcapi test is executed using HW accelerator, cipher operation return -74.Since af_alg_async_cb->ki_complete treat err as unsigned int, libkcapi receive 429467222 even though it expect -ve value. Hence its required to cast resultlen to int so that proper error is returned to libkcapi. AEAD one shot non-aligned test 2(libkcapi test) ./../bin/kcapi -x 10 -c "gcm(aes)" -i 7815d4b06ae50c9c56e87bd7 -k ea38ac0c9b9998c80e28fb496a2b88d9 -a "853f98a750098bec1aa7497e979e78098155c877879556bb51ddeb6374cbaefc" -t "c4ce58985b7203094be1d134c1b8ab0b" -q "b03692f86d1b8b39baf2abb255197c98" Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Cc: Signed-off-by: Ayush Sawal Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f816a7289104..422bba808f73 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1086,7 +1086,7 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) af_alg_free_resources(areq); sock_put(sk); - iocb->ki_complete(iocb, err ? err : resultlen, 0); + iocb->ki_complete(iocb, err ? err : (int)resultlen, 0); } EXPORT_SYMBOL_GPL(af_alg_async_cb); From 3f59e0c6a9b1b33003207ede29e934be48e237c4 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 21 Oct 2019 11:29:49 -0400 Subject: [PATCH 1579/3715] crypto: ccp - fix uninitialized list head commit 691505a803a7f223b2af621848d581259c61f77d upstream. A NULL-pointer dereference was reported in fedora bz#1762199 while reshaping a raid6 array after adding a fifth drive to an existing array. [ 47.343549] md/raid:md0: raid level 6 active with 3 out of 5 devices, algorithm 2 [ 47.804017] md0: detected capacity change from 0 to 7885289422848 [ 47.822083] Unable to handle kernel read from unreadable memory at virtual address 0000000000000000 ... [ 47.940477] CPU: 1 PID: 14210 Comm: md0_raid6 Tainted: G W 5.2.18-200.fc30.aarch64 #1 [ 47.949594] Hardware name: AMD Overdrive/Supercharger/To be filled by O.E.M., BIOS ROD1002C 04/08/2016 [ 47.958886] pstate: 00400085 (nzcv daIf +PAN -UAO) [ 47.963668] pc : __list_del_entry_valid+0x2c/0xa8 [ 47.968366] lr : ccp_tx_submit+0x84/0x168 [ccp] [ 47.972882] sp : ffff00001369b970 [ 47.976184] x29: ffff00001369b970 x28: ffff00001369bdb8 [ 47.981483] x27: 00000000ffffffff x26: ffff8003b758af70 [ 47.986782] x25: ffff8003b758b2d8 x24: ffff8003e6245818 [ 47.992080] x23: 0000000000000000 x22: ffff8003e62450c0 [ 47.997379] x21: ffff8003dfd6add8 x20: 0000000000000003 [ 48.002678] x19: ffff8003e6245100 x18: 0000000000000000 [ 48.007976] x17: 0000000000000000 x16: 0000000000000000 [ 48.013274] x15: 0000000000000000 x14: 0000000000000000 [ 48.018572] x13: ffff7e000ef83a00 x12: 0000000000000001 [ 48.023870] x11: ffff000010eff998 x10: 00000000000019a0 [ 48.029169] x9 : 0000000000000000 x8 : ffff8003e6245180 [ 48.034467] x7 : 0000000000000000 x6 : 000000000000003f [ 48.039766] x5 : 0000000000000040 x4 : ffff8003e0145080 [ 48.045064] x3 : dead000000000200 x2 : 0000000000000000 [ 48.050362] x1 : 0000000000000000 x0 : ffff8003e62450c0 [ 48.055660] Call trace: [ 48.058095] __list_del_entry_valid+0x2c/0xa8 [ 48.062442] ccp_tx_submit+0x84/0x168 [ccp] [ 48.066615] async_tx_submit+0x224/0x368 [async_tx] [ 48.071480] async_trigger_callback+0x68/0xfc [async_tx] [ 48.076784] ops_run_biofill+0x178/0x1e8 [raid456] [ 48.081566] raid_run_ops+0x248/0x818 [raid456] [ 48.086086] handle_stripe+0x864/0x1208 [raid456] [ 48.090781] handle_active_stripes.isra.0+0xb0/0x278 [raid456] [ 48.096604] raid5d+0x378/0x618 [raid456] [ 48.100602] md_thread+0xa0/0x150 [ 48.103905] kthread+0x104/0x130 [ 48.107122] ret_from_fork+0x10/0x18 [ 48.110686] Code: d2804003 f2fbd5a3 eb03003f 54000320 (f9400021) [ 48.116766] ---[ end trace 23f390a527f7ad77 ]--- ccp_tx_submit is passed a dma_async_tx_descriptor which is contained in a ccp_dma_desc and adds it to a ccp channel's pending list: list_del(&desc->entry); list_add_tail(&desc->entry, &chan->pending); The problem is that desc->entry may be uninitialized in the async_trigger_callback path where the descriptor was gotten from ccp_prep_dma_interrupt which got it from ccp_alloc_dma_desc which doesn't initialize the desc->entry list head. So, just initialize the list head to avoid the problem. Cc: Reported-by: Sahaj Sarup Signed-off-by: Mark Salter Acked-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dmaengine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index d608043c0280..df82af3dd970 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -341,6 +341,7 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, desc->tx_desc.flags = flags; desc->tx_desc.tx_submit = ccp_tx_submit; desc->ccp = chan->ccp; + INIT_LIST_HEAD(&desc->entry); INIT_LIST_HEAD(&desc->pending); INIT_LIST_HEAD(&desc->active); desc->status = DMA_IN_PROGRESS; From cfb63e86ff0ebfd7419d1cb85a6c55d4b9380613 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Oct 2019 11:50:44 +0200 Subject: [PATCH 1580/3715] crypto: ecdh - fix big endian bug in ECC library commit f398243e9fd6a3a059c1ea7b380c40628dbf0c61 upstream. The elliptic curve arithmetic library used by the EC-DH KPP implementation assumes big endian byte order, and unconditionally reverses the byte and word order of multi-limb quantities. On big endian systems, the byte reordering is not necessary, while the word ordering needs to be retained. So replace the __swab64() invocation with a call to be64_to_cpu() which should do the right thing for both little and big endian builds. Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support") Cc: # v4.9+ Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ecc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 3b422e24e647..65ee29dda063 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -898,10 +898,11 @@ static void ecc_point_mult(struct ecc_point *result, static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits) { + const __be64 *src = (__force __be64 *)in; int i; for (i = 0; i < ndigits; i++) - out[i] = __swab64(in[ndigits - 1 - i]); + out[i] = be64_to_cpu(src[ndigits - 1 - i]); } static int __ecc_is_key_valid(const struct ecc_curve *curve, From 41b3b8e0775465c33083ce1998df7f15c039c9bf Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 14:29:16 -0500 Subject: [PATCH 1581/3715] crypto: user - fix memory leak in crypto_report commit ffdde5932042600c6807d46c1550b28b0db6a3bc upstream. In crypto_report, a new skb is created via nlmsg_new(). This skb should be released if crypto_report_alg() fails. Fixes: a38f7907b926 ("crypto: Add userspace configuration API") Cc: Signed-off-by: Navid Emamdoost Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/crypto_user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 5e457a7dd1c9..b6899be8065d 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -288,8 +288,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, drop_alg: crypto_mod_put(alg); - if (err) + if (err) { + kfree_skb(skb); return err; + } return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } From d83013e02e15826658e25857795270e12e290529 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 17 Oct 2019 16:18:41 +0200 Subject: [PATCH 1582/3715] spi: atmel: Fix CS high support commit 7cbb16b2122c09f2ae393a1542fed628505b9da6 upstream. Until a few years ago, this driver was only used with CS GPIO. The only exception is CS0 on AT91RM9200 which has to use internal CS. A limitation of the internal CS is that they don't support CS High. So by using the CS GPIO the CS high configuration was available except for the particular case CS0 on RM9200. When the support for the internal chip-select was added, the check of the CS high support was not updated. Due to this the driver accepts this configuration for all the SPI controller v2 (used by all SoCs excepting the AT91RM9200) whereas the hardware doesn't support it for infernal CS. This patch fixes the test to match the hardware capabilities. Fixes: 4820303480a1 ("spi: atmel: add support for the internal chip-select of the spi controller") Cc: Signed-off-by: Gregory CLEMENT Link: https://lore.kernel.org/r/20191017141846.7523-3-gregory.clement@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 047875861df1..d19331b66222 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1150,10 +1150,8 @@ static int atmel_spi_setup(struct spi_device *spi) as = spi_master_get_devdata(spi->master); /* see notes above re chipselect */ - if (!atmel_spi_is_v2(as) - && spi->chip_select == 0 - && (spi->mode & SPI_CS_HIGH)) { - dev_dbg(&spi->dev, "setup: can't be active-high\n"); + if (!as->use_cs_gpios && (spi->mode & SPI_CS_HIGH)) { + dev_warn(&spi->dev, "setup: non GPIO CS can't be active-high\n"); return -EINVAL; } From 589d00cebfc204c579c3212dd3c00a03c298d298 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Nov 2019 08:50:25 +0530 Subject: [PATCH 1583/3715] RDMA/qib: Validate ->show()/store() callbacks before calling them commit 7ee23491b39259ae83899dd93b2a29ef0f22f0a7 upstream. The permissions of the read-only or write-only sysfs files can be changed (as root) and the user can then try to read a write-only file or write to a read-only file which will lead to kernel crash here. Protect against that by always validating the show/store callbacks. Link: https://lore.kernel.org/r/d45cc26361a174ae12dbb86c994ef334d257924b.1573096807.git.viresh.kumar@linaro.org Signed-off-by: Viresh Kumar Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index ca2638d8f35e..d831f3e61ae8 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } From 44e7fd346b31527d49e923fc36f6c614fef1da49 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 Nov 2019 16:14:38 -0800 Subject: [PATCH 1584/3715] iomap: Fix pipe page leakage during splicing commit 419e9c38aa075ed0cd3c13d47e15954b686bcdb6 upstream. When splicing using iomap_dio_rw() to a pipe, we may leak pipe pages because bio_iov_iter_get_pages() records that the pipe will have full extent worth of data however if file size is not block size aligned iomap_dio_rw() returns less than what bio_iov_iter_get_pages() set up and splice code gets confused leaking a pipe page with the file tail. Handle the situation similarly to the old direct IO implementation and revert iter to actually returned read amount which makes iter consistent with value returned from iomap_dio_rw() and thus the splice code is happy. Fixes: ff6a9292e6f6 ("iomap: implement direct I/O") CC: stable@vger.kernel.org Reported-by: syzbot+991400e8eba7e00a26e1@syzkaller.appspotmail.com Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/iomap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/iomap.c b/fs/iomap.c index 1cf160ced0d4..3f5b1655cfce 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1053,8 +1053,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } pos += ret; - if (iov_iter_rw(iter) == READ && pos >= dio->i_size) + if (iov_iter_rw(iter) == READ && pos >= dio->i_size) { + /* + * We only report that we've read data up to i_size. + * Revert iter to a state corresponding to that as + * some callers (such as splice code) rely on it. + */ + iov_iter_revert(iter, pos - dio->i_size); break; + } } while ((count = iov_iter_count(iter)) > 0); blk_finish_plug(&plug); From 3be5da34757a684c239e7164c5bd783b6e9e724f Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 12 Nov 2019 12:42:23 -0800 Subject: [PATCH 1585/3715] thermal: Fix deadlock in thermal thermal_zone_device_check commit 163b00cde7cf2206e248789d2780121ad5e6a70b upstream. 1851799e1d29 ("thermal: Fix use-after-free when unregistering thermal zone device") changed cancel_delayed_work to cancel_delayed_work_sync to avoid a use-after-free issue. However, cancel_delayed_work_sync could be called insides the WQ causing deadlock. [54109.642398] c0 1162 kworker/u17:1 D 0 11030 2 0x00000000 [54109.642437] c0 1162 Workqueue: thermal_passive_wq thermal_zone_device_check [54109.642447] c0 1162 Call trace: [54109.642456] c0 1162 __switch_to+0x138/0x158 [54109.642467] c0 1162 __schedule+0xba4/0x1434 [54109.642480] c0 1162 schedule_timeout+0xa0/0xb28 [54109.642492] c0 1162 wait_for_common+0x138/0x2e8 [54109.642511] c0 1162 flush_work+0x348/0x40c [54109.642522] c0 1162 __cancel_work_timer+0x180/0x218 [54109.642544] c0 1162 handle_thermal_trip+0x2c4/0x5a4 [54109.642553] c0 1162 thermal_zone_device_update+0x1b4/0x25c [54109.642563] c0 1162 thermal_zone_device_check+0x18/0x24 [54109.642574] c0 1162 process_one_work+0x3cc/0x69c [54109.642583] c0 1162 worker_thread+0x49c/0x7c0 [54109.642593] c0 1162 kthread+0x17c/0x1b0 [54109.642602] c0 1162 ret_from_fork+0x10/0x18 [54109.643051] c0 1162 kworker/u17:2 D 0 16245 2 0x00000000 [54109.643067] c0 1162 Workqueue: thermal_passive_wq thermal_zone_device_check [54109.643077] c0 1162 Call trace: [54109.643085] c0 1162 __switch_to+0x138/0x158 [54109.643095] c0 1162 __schedule+0xba4/0x1434 [54109.643104] c0 1162 schedule_timeout+0xa0/0xb28 [54109.643114] c0 1162 wait_for_common+0x138/0x2e8 [54109.643122] c0 1162 flush_work+0x348/0x40c [54109.643131] c0 1162 __cancel_work_timer+0x180/0x218 [54109.643141] c0 1162 handle_thermal_trip+0x2c4/0x5a4 [54109.643150] c0 1162 thermal_zone_device_update+0x1b4/0x25c [54109.643159] c0 1162 thermal_zone_device_check+0x18/0x24 [54109.643167] c0 1162 process_one_work+0x3cc/0x69c [54109.643177] c0 1162 worker_thread+0x49c/0x7c0 [54109.643186] c0 1162 kthread+0x17c/0x1b0 [54109.643195] c0 1162 ret_from_fork+0x10/0x18 [54109.644500] c0 1162 cat D 0 7766 1 0x00000001 [54109.644515] c0 1162 Call trace: [54109.644524] c0 1162 __switch_to+0x138/0x158 [54109.644536] c0 1162 __schedule+0xba4/0x1434 [54109.644546] c0 1162 schedule_preempt_disabled+0x80/0xb0 [54109.644555] c0 1162 __mutex_lock+0x3a8/0x7f0 [54109.644563] c0 1162 __mutex_lock_slowpath+0x14/0x20 [54109.644575] c0 1162 thermal_zone_get_temp+0x84/0x360 [54109.644586] c0 1162 temp_show+0x30/0x78 [54109.644609] c0 1162 dev_attr_show+0x5c/0xf0 [54109.644628] c0 1162 sysfs_kf_seq_show+0xcc/0x1a4 [54109.644636] c0 1162 kernfs_seq_show+0x48/0x88 [54109.644656] c0 1162 seq_read+0x1f4/0x73c [54109.644664] c0 1162 kernfs_fop_read+0x84/0x318 [54109.644683] c0 1162 __vfs_read+0x50/0x1bc [54109.644692] c0 1162 vfs_read+0xa4/0x140 [54109.644701] c0 1162 SyS_read+0xbc/0x144 [54109.644708] c0 1162 el0_svc_naked+0x34/0x38 [54109.845800] c0 1162 D 720.000s 1->7766->7766 cat [panic] Fixes: 1851799e1d29 ("thermal: Fix use-after-free when unregistering thermal zone device") Cc: stable@vger.kernel.org Signed-off-by: Wei Wang Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 456ef213dc14..fcefafe7df48 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -299,7 +299,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, mod_delayed_work(system_freezable_wq, &tz->poll_queue, msecs_to_jiffies(delay)); else - cancel_delayed_work_sync(&tz->poll_queue); + cancel_delayed_work(&tz->poll_queue); } static void monitor_thermal_zone(struct thermal_zone_device *tz) @@ -1350,7 +1350,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) mutex_unlock(&thermal_list_lock); - thermal_zone_device_set_polling(tz, 0); + cancel_delayed_work_sync(&tz->poll_queue); thermal_set_governor(tz, NULL); From 1f8c097ed7fc36a8b8fba0f59ed47840b2e20db5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Oct 2019 22:56:31 +0200 Subject: [PATCH 1586/3715] binder: Handle start==NULL in binder_update_page_range() commit 2a9edd056ed4fbf9d2e797c3fc06335af35bccc4 upstream. The old loop wouldn't stop when reaching `start` if `start==NULL`, instead continuing backwards to index -1 and crashing. Luckily you need to be highly privileged to map things at NULL, so it's not a big problem. Fix it by adjusting the loop so that the loop variable is always in bounds. This patch is deliberately minimal to simplify backporting, but IMO this function could use a refactor. The jump labels in the second loop body are horrible (the error gotos should be jumping to free_range instead), and both loops would look nicer if they just iterated upwards through indices. And the up_read()+mmput() shouldn't be duplicated like that. Cc: stable@vger.kernel.org Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Signed-off-by: Jann Horn Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20191018205631.248274-3-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index e0b0399ff7ec..9d5cb3b7a7a2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -289,8 +289,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, return 0; free_range: - for (page_addr = end - PAGE_SIZE; page_addr >= start; - page_addr -= PAGE_SIZE) { + for (page_addr = end - PAGE_SIZE; 1; page_addr -= PAGE_SIZE) { bool ret; size_t index; @@ -303,6 +302,8 @@ free_range: WARN_ON(!ret); trace_binder_free_lru_end(alloc, index); + if (page_addr == start) + break; continue; err_vm_insert_page_failed: @@ -312,7 +313,8 @@ err_map_kernel_failed: page->page_ptr = NULL; err_alloc_page_failed: err_page_ptr_cleared: - ; + if (page_addr == start) + break; } err_no_vma: if (mm) { From d9792606b604b972afebc89375e1ffad763135ac Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 1 Feb 2019 16:49:30 +0900 Subject: [PATCH 1587/3715] ASoC: rsnd: fixup MIX kctrl registration commit 7aea8a9d71d54f449f49e20324df06341cc18395 upstream. Renesas sound device has many IPs and many situations. If platform/board uses MIXer, situation will be more complex. To avoid duplicate DVC kctrl registration when MIXer was used, it had original flags. But it was issue when sound card was re-binded, because no one can't cleanup this flags then. To solve this issue, commit 9c698e8481a15237a ("ASoC: rsnd: tidyup registering method for rsnd_kctrl_new()") checks registered card->controls, because if card was re-binded, these were cleanuped automatically. This patch could solve re-binding issue. But, it start to avoid MIX kctrl. To solve these issues, we need below. To avoid card re-binding issue: check registered card->controls To avoid duplicate DVC registration: check registered rsnd_kctrl_cfg To allow multiple MIX registration: check registered rsnd_kctrl_cfg This patch do it. Fixes: 9c698e8481a15237a ("ASoC: rsnd: tidyup registering method for rsnd_kctrl_new()") Reported-by: Jiada Wang Signed-off-by: Kuninori Morimoto Tested-By: Jiada Wang Signed-off-by: Mark Brown Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index bb06dd72ca9a..f203c0878e69 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1279,14 +1279,14 @@ int rsnd_kctrl_new(struct rsnd_mod *mod, int ret; /* - * 1) Avoid duplicate register (ex. MIXer case) - * 2) re-register if card was rebinded + * 1) Avoid duplicate register for DVC with MIX case + * 2) Allow duplicate register for MIX + * 3) re-register if card was rebinded */ list_for_each_entry(kctrl, &card->controls, list) { struct rsnd_kctrl_cfg *c = kctrl->private_data; - if (strcmp(kctrl->id.name, name) == 0 && - c->mod == mod) + if (c == cfg) return 0; } From f70609f898d63973388b36adf3650489311b13b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 4 Dec 2019 10:28:54 +0100 Subject: [PATCH 1588/3715] KVM: x86: fix out-of-bounds write in KVM_GET_EMULATED_CPUID (CVE-2019-19332) commit 433f4ba1904100da65a311033f17a9bf586b287e upstream. The bounds check was present in KVM_GET_SUPPORTED_CPUID but not KVM_GET_EMULATED_CPUID. Reported-by: syzbot+e3f4897236c4eeb8af4f@syzkaller.appspotmail.com Fixes: 84cffe499b94 ("kvm: Emulate MOVBE", 2013-10-29) Signed-off-by: Paolo Bonzini Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 33f87b696487..38959b173a42 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -404,7 +404,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, r = -E2BIG; - if (*nent >= maxnent) + if (WARN_ON(*nent >= maxnent)) goto out; do_cpuid_1_ent(entry, function, index); @@ -707,6 +707,9 @@ out: static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, u32 idx, int *nent, int maxnent, unsigned int type) { + if (*nent >= maxnent) + return -E2BIG; + if (type == KVM_GET_EMULATED_CPUID) return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); From 39de8cc3678f5cd1f4e0ae274c20acabbd85a8b1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 14 Mar 2019 13:47:59 +0800 Subject: [PATCH 1589/3715] appletalk: Fix potential NULL pointer dereference in unregister_snap_client commit 9804501fa1228048857910a6bf23e085aade37cc upstream. register_snap_client may return NULL, all the callers check it, but only print a warning. This will result in NULL pointer dereference in unregister_snap_client and other places. It has always been used like this since v2.6 Reported-by: Dan Carpenter Signed-off-by: YueHaibing Signed-off-by: David S. Miller [bwh: Backported to <4.15: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/atalk.h | 2 +- net/appletalk/aarp.c | 15 ++++++++++++--- net/appletalk/ddp.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 03885e63f92b..2664b9e89f9b 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 8ad3ec2610b6..b9e85a4751a6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } setup_timer(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 475f332b1ad2..ab59c56e0513 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1911,9 +1911,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1928,17 +1925,22 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1952,11 +1954,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); From 709438c431239cc13fc9e0a0483d0c9d05984fa2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 30 Apr 2019 19:34:08 +0800 Subject: [PATCH 1590/3715] appletalk: Set error code if register_snap_client failed commit c93ad1337ad06a718890a89cdd85188ff9a5a5cc upstream. If register_snap_client fails in atalk_init, error code should be set, otherwise it will triggers NULL pointer dereference while unloading module. Fixes: 9804501fa122 ("appletalk: Fix potential NULL pointer dereference in unregister_snap_client") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/appletalk/ddp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ab59c56e0513..b4268bd2e557 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1927,6 +1927,7 @@ static int __init atalk_init(void) ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) { pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; goto out_sock; } From f777dd5b9540fe4640256615522c64219782c2c4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 30 Oct 2019 03:40:46 +0000 Subject: [PATCH 1591/3715] usb: gadget: configfs: Fix missing spin_lock_init() commit 093edc2baad2c258b1f55d1ab9c63c2b5ae67e42 upstream. The driver allocates the spinlock but not initialize it. Use spin_lock_init() on it to initialize it correctly. This is detected by Coccinelle semantic patch. Fixes: 1a1c851bbd70 ("usb: gadget: configfs: fix concurrent issue between composite APIs") Signed-off-by: Wei Yongjun Cc: stable Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20191030034046.188808-1-weiyongjun1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index d0143d02e2f7..78a5832c209c 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1543,6 +1543,7 @@ static struct config_group *gadgets_make( gi->composite.resume = NULL; gi->composite.max_speed = USB_SPEED_SUPER; + spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); INIT_LIST_HEAD(&gi->string_list); INIT_LIST_HEAD(&gi->available_func); From 802317c442a362ec935a41966f2de4552360fe5b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 6 Nov 2019 14:28:21 -0600 Subject: [PATCH 1592/3715] usb: gadget: pch_udc: fix use after free commit 66d1b0c0580b7f1b1850ee4423f32ac42afa2e92 upstream. Remove pointer dereference after free. pci_pool_free doesn't care about contents of td. It's just a void* for it Addresses-Coverity-ID: 1091173 ("Use after free") Cc: stable@vger.kernel.org Acked-by: Michal Nazarewicz Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20191106202821.GA20347@embeddedor Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pch_udc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 84dcbcd756f0..08bbe2c24134 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; dma_pool_free(dev->data_requests, td, addr); - td->next = 0x00; addr = addr2; } req->chain_len = 1; From c2ae20e7a5f2979d13e76e8b83ed355b5d909d8e Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 5 Nov 2019 07:06:53 -0800 Subject: [PATCH 1593/3715] scsi: qla2xxx: Fix driver unload hang commit dd322b7f3efc8cda085bb60eadc4aee6324eadd8 upstream. This patch fixes driver unload hang by removing msleep() Fixes: d74595278f4ab ("scsi: qla2xxx: Add multiple queue pair functionality.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191105150657.8092-5-hmadhani@marvell.com Reviewed-by: Ewan D. Milne Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 0e154fea693e..bd2421863510 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -8092,8 +8092,6 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair) struct qla_hw_data *ha = qpair->hw; qpair->delete_in_progress = 1; - while (atomic_read(&qpair->ref_count)) - msleep(500); ret = qla25xx_delete_req_que(vha, qpair->req); if (ret != QLA_SUCCESS) From 6c8957279aabecdcfaad925a465f02a9b469603d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Nov 2019 10:06:54 +0100 Subject: [PATCH 1594/3715] media: venus: remove invalid compat_ioctl32 handler commit 4adc0423de92cf850d1ef5c0e7cb28fd7a38219e upstream. v4l2_compat_ioctl32() is the function that calls into v4l2_file_operations->compat_ioctl32(), so setting that back to the same function leads to a trivial endless loop, followed by a kernel stack overrun. Remove the incorrect assignment. Cc: stable@vger.kernel.org Fixes: 7472c1c69138 ("[media] media: venus: vdec: add video decoder files") Fixes: aaaa93eda64b ("[media] media: venus: venc: add video encoder files") Signed-off-by: Arnd Bergmann Acked-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/vdec.c | 3 --- drivers/media/platform/qcom/venus/venc.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index c9e9576bb08a..5f0965593a0d 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -1060,9 +1060,6 @@ static const struct v4l2_file_operations vdec_fops = { .unlocked_ioctl = video_ioctl2, .poll = v4l2_m2m_fop_poll, .mmap = v4l2_m2m_fop_mmap, -#ifdef CONFIG_COMPAT - .compat_ioctl32 = v4l2_compat_ioctl32, -#endif }; static int vdec_probe(struct platform_device *pdev) diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index 3fcf0e9b7b29..a8af4a09485e 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -1166,9 +1166,6 @@ static const struct v4l2_file_operations venc_fops = { .unlocked_ioctl = video_ioctl2, .poll = v4l2_m2m_fop_poll, .mmap = v4l2_m2m_fop_mmap, -#ifdef CONFIG_COMPAT - .compat_ioctl32 = v4l2_compat_ioctl32, -#endif }; static int venc_probe(struct platform_device *pdev) From e8ee2f8c4aed7191fadc5542c9d762566f231e99 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 12:27:56 +0100 Subject: [PATCH 1595/3715] USB: uas: honor flag to avoid CAPACITY16 commit bff000cae1eec750d62e265c4ba2db9af57b17e1 upstream. Copy the support over from usb-storage to get feature parity Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20191114112758.32747-2-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 1e62f2134b3a..1397bbe79497 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -832,6 +832,10 @@ static int uas_slave_configure(struct scsi_device *sdev) sdev->wce_default_on = 1; } + /* Some disks cannot handle READ_CAPACITY_16 */ + if (devinfo->flags & US_FL_NO_READ_CAPACITY_16) + sdev->no_read_capacity_16 = 1; + /* * Some disks return the total number of blocks in response * to READ CAPACITY rather than the highest block number. From 36808fd964c75b54b7f25d87d3d3fe27c33c14f6 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 12:27:57 +0100 Subject: [PATCH 1596/3715] USB: uas: heed CAPACITY_HEURISTICS commit 335cbbd5762d5e5c67a8ddd6e6362c2aa42a328f upstream. There is no need to ignore this flag. We should be as close to storage in that regard as makes sense, so honor flags whose cost is tiny. Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20191114112758.32747-3-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 1397bbe79497..8391a88cf90f 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -844,6 +844,12 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_FIX_CAPACITY) sdev->fix_capacity = 1; + /* + * in some cases we have to guess + */ + if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) + sdev->guess_capacity = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device From 657d54ede01239ea658a436f0d3407febfbbef91 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 12:27:58 +0100 Subject: [PATCH 1597/3715] USB: documentation: flags on usb-storage versus UAS commit 65cc8bf99349f651a0a2cee69333525fe581f306 upstream. Document which flags work storage, UAS or both Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20191114112758.32747-4-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b0da6050a254..933465eff40e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4693,13 +4693,13 @@ Flags is a set of characters, each corresponding to a common usb-storage quirk flag as follows: a = SANE_SENSE (collect more than 18 bytes - of sense data); + of sense data, not on uas); b = BAD_SENSE (don't collect more than 18 - bytes of sense data); + bytes of sense data, not on uas); c = FIX_CAPACITY (decrease the reported device capacity by one sector); d = NO_READ_DISC_INFO (don't use - READ_DISC_INFO command); + READ_DISC_INFO command, not on uas); e = NO_READ_CAPACITY_16 (don't use READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes @@ -4714,17 +4714,18 @@ j = NO_REPORT_LUNS (don't use report luns command, uas only); l = NOT_LOCKABLE (don't try to lock and - unlock ejectable media); + unlock ejectable media, not on uas); m = MAX_SECTORS_64 (don't transfer more - than 64 sectors = 32 KB at a time); + than 64 sectors = 32 KB at a time, + not on uas); n = INITIAL_READ10 (force a retry of the - initial READ(10) command); + initial READ(10) command, not on uas); o = CAPACITY_OK (accept the capacity - reported by the device); + reported by the device, not on uas); p = WRITE_CACHE (the device cache is ON - by default); + by default, not on uas); r = IGNORE_RESIDUE (the device reports - bogus residue values); + bogus residue values, not on uas); s = SINGLE_LUN (the device has only one Logical Unit); t = NO_ATA_1X (don't allow ATA(12) and ATA(16) @@ -4733,7 +4734,8 @@ w = NO_WP_DETECT (don't test whether the medium is write-protected). y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE - even if the device claims no cache) + even if the device claims no cache, + not on uas) Example: quirks=0419:aaf5:rl,0421:0433:rc user_debug= [KNL,ARM] From b394c7780773890ca1ad0bc50a9e5d52805f36bb Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 6 Nov 2019 14:27:10 +0800 Subject: [PATCH 1598/3715] usb: Allow USB device to be warm reset in suspended state commit e76b3bf7654c3c94554c24ba15a3d105f4006c80 upstream. On Dell WD15 dock, sometimes USB ethernet cannot be detected after plugging cable to the ethernet port, the hub and roothub get runtime resumed and runtime suspended immediately: ... [ 433.315169] xhci_hcd 0000:3a:00.0: hcd_pci_runtime_resume: 0 [ 433.315204] usb usb4: usb auto-resume [ 433.315226] hub 4-0:1.0: hub_resume [ 433.315239] xhci_hcd 0000:3a:00.0: Get port status 4-1 read: 0x10202e2, return 0x10343 [ 433.315264] usb usb4-port1: status 0343 change 0001 [ 433.315279] xhci_hcd 0000:3a:00.0: clear port1 connect change, portsc: 0x10002e2 [ 433.315293] xhci_hcd 0000:3a:00.0: Get port status 4-2 read: 0x2a0, return 0x2a0 [ 433.317012] xhci_hcd 0000:3a:00.0: xhci_hub_status_data: stopping port polling. [ 433.422282] xhci_hcd 0000:3a:00.0: Get port status 4-1 read: 0x10002e2, return 0x343 [ 433.422307] usb usb4-port1: do warm reset [ 433.422311] usb 4-1: device reset not allowed in state 8 [ 433.422339] hub 4-0:1.0: state 7 ports 2 chg 0002 evt 0000 [ 433.422346] xhci_hcd 0000:3a:00.0: Get port status 4-1 read: 0x10002e2, return 0x343 [ 433.422356] usb usb4-port1: do warm reset [ 433.422358] usb 4-1: device reset not allowed in state 8 [ 433.422428] xhci_hcd 0000:3a:00.0: set port remote wake mask, actual port 0 status = 0xf0002e2 [ 433.422455] xhci_hcd 0000:3a:00.0: set port remote wake mask, actual port 1 status = 0xe0002a0 [ 433.422465] hub 4-0:1.0: hub_suspend [ 433.422475] usb usb4: bus auto-suspend, wakeup 1 [ 433.426161] xhci_hcd 0000:3a:00.0: xhci_hub_status_data: stopping port polling. [ 433.466209] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.510204] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.554051] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.598235] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.642154] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.686204] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.730205] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.774203] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.818207] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.862040] xhci_hcd 0000:3a:00.0: port 0 polling in bus suspend, waiting [ 433.862053] xhci_hcd 0000:3a:00.0: xhci_hub_status_data: stopping port polling. [ 433.862077] xhci_hcd 0000:3a:00.0: xhci_suspend: stopping port polling. [ 433.862096] xhci_hcd 0000:3a:00.0: // Setting command ring address to 0x8578fc001 [ 433.862312] xhci_hcd 0000:3a:00.0: hcd_pci_runtime_suspend: 0 [ 433.862445] xhci_hcd 0000:3a:00.0: PME# enabled [ 433.902376] xhci_hcd 0000:3a:00.0: restoring config space at offset 0xc (was 0x0, writing 0x20) [ 433.902395] xhci_hcd 0000:3a:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100403) [ 433.902490] xhci_hcd 0000:3a:00.0: PME# disabled [ 433.902504] xhci_hcd 0000:3a:00.0: enabling bus mastering [ 433.902547] xhci_hcd 0000:3a:00.0: // Setting command ring address to 0x8578fc001 [ 433.902649] pcieport 0000:00:1b.0: PME: Spurious native interrupt! [ 433.902839] xhci_hcd 0000:3a:00.0: Port change event, 4-1, id 3, portsc: 0xb0202e2 [ 433.902842] xhci_hcd 0000:3a:00.0: resume root hub [ 433.902845] xhci_hcd 0000:3a:00.0: handle_port_status: starting port polling. [ 433.902877] xhci_hcd 0000:3a:00.0: xhci_resume: starting port polling. [ 433.902889] xhci_hcd 0000:3a:00.0: xhci_hub_status_data: stopping port polling. [ 433.902891] xhci_hcd 0000:3a:00.0: hcd_pci_runtime_resume: 0 [ 433.902919] usb usb4: usb wakeup-resume [ 433.902942] usb usb4: usb auto-resume [ 433.902966] hub 4-0:1.0: hub_resume ... As Mathias pointed out, the hub enters Cold Attach Status state and requires a warm reset. However usb_reset_device() bails out early when the device is in suspended state, as its callers port_event() and hub_event() don't always resume the device. Since there's nothing wrong to reset a suspended device, allow usb_reset_device() to do so to solve the issue. Signed-off-by: Kai-Heng Feng Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/20191106062710.29880-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bb20aa433e98..4efccf8bf99f 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5630,7 +5630,7 @@ re_enumerate_no_bos: /** * usb_reset_device - warn interface drivers and perform a USB port reset - * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) + * @udev: device to reset (not in NOTATTACHED state) * * Warns all drivers bound to registered interfaces (using their pre_reset * method), performs the port reset, and then lets the drivers know that @@ -5658,8 +5658,7 @@ int usb_reset_device(struct usb_device *udev) struct usb_host_config *config = udev->actconfig; struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); - if (udev->state == USB_STATE_NOTATTACHED || - udev->state == USB_STATE_SUSPENDED) { + if (udev->state == USB_STATE_NOTATTACHED) { dev_dbg(&udev->dev, "device reset not allowed in state %d\n", udev->state); return -EINVAL; From ca922fe7d466944dab40a7d93e3203f81d7110d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:47:50 +0100 Subject: [PATCH 1599/3715] staging: rtl8188eu: fix interface sanity check commit 74ca34118a0e05793935d804ccffcedd6eb56596 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: c2478d39076b ("staging: r8188eu: Add files for new driver - part 20") Cc: stable # 3.12 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210114751.5119-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 77c339a93525..3733b73863b6 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -78,7 +78,7 @@ static struct dvobj_priv *usb_dvobj_init(struct usb_interface *usb_intf) phost_conf = pusbd->actconfig; pconf_desc = &phost_conf->desc; - phost_iface = &usb_intf->altsetting[0]; + phost_iface = usb_intf->cur_altsetting; piface_desc = &phost_iface->desc; pdvobjpriv->NumInterfaces = pconf_desc->bNumInterfaces; From 3c64cee83f105ef564348ac06648057987a879c1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:47:51 +0100 Subject: [PATCH 1600/3715] staging: rtl8712: fix interface sanity check commit c724f776f048538ecfdf53a52b7a522309f5c504 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Cc: stable # 2.6.37 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210114751.5119-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/usb_intf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index b3e266bd57ab..8be4fcc54ad6 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -275,7 +275,7 @@ static uint r8712_usb_dvobj_init(struct _adapter *padapter) pdvobjpriv->padapter = padapter; padapter->EepromAddressSize = 6; - phost_iface = &pintf->altsetting[0]; + phost_iface = pintf->cur_altsetting; piface_desc = &phost_iface->desc; pdvobjpriv->nr_endpoint = piface_desc->bNumEndpoints; if (pusbd->speed == USB_SPEED_HIGH) { From b3891da012011da4ac8b0bd4e4453ca758633208 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Dec 2019 09:56:08 +0100 Subject: [PATCH 1601/3715] staging: gigaset: fix general protection fault on probe commit 53f35a39c3860baac1e5ca80bf052751cfb24a99 upstream. Fix a general protection fault when accessing the endpoint descriptors which could be triggered by a malicious device due to missing sanity checks on the number of endpoints. Reported-by: syzbot+35b1c403a14f5c89eba7@syzkaller.appspotmail.com Fixes: 07dc1f9f2f80 ("[PATCH] isdn4linux: Siemens Gigaset drivers - M105 USB DECT adapter") Cc: stable # 2.6.17 Cc: Hansjoerg Lipp Cc: Tilman Schmidt Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191202085610.12719-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/usb-gigaset.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index eade36dafa34..4219455bc8a7 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -688,6 +688,11 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 2) { + dev_err(&interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev_info(&udev->dev, "%s: Device matched ... !\n", __func__); /* allocate memory for our device state and initialize it */ From 56a6f724344677de2a4c72935b5c12f3a1216aeb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Dec 2019 09:56:09 +0100 Subject: [PATCH 1602/3715] staging: gigaset: fix illegal free on probe errors commit 84f60ca7b326ed8c08582417493982fe2573a9ad upstream. The driver failed to initialise its receive-buffer pointer, something which could lead to an illegal free on late probe errors. Fix this by making sure to clear all driver data at allocation. Fixes: 2032e2c2309d ("usb_gigaset: code cleanup") Cc: stable # 2.6.33 Cc: Tilman Schmidt Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191202085610.12719-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/usb-gigaset.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 4219455bc8a7..6dbd5ea9399c 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -574,8 +574,7 @@ static int gigaset_initcshw(struct cardstate *cs) { struct usb_cardstate *ucs; - cs->hw.usb = ucs = - kmalloc(sizeof(struct usb_cardstate), GFP_KERNEL); + cs->hw.usb = ucs = kzalloc(sizeof(struct usb_cardstate), GFP_KERNEL); if (!ucs) { pr_err("out of memory\n"); return -ENOMEM; @@ -587,9 +586,6 @@ static int gigaset_initcshw(struct cardstate *cs) ucs->bchars[3] = 0; ucs->bchars[4] = 0x11; ucs->bchars[5] = 0x13; - ucs->bulk_out_buffer = NULL; - ucs->bulk_out_urb = NULL; - ucs->read_urb = NULL; tasklet_init(&cs->write_tasklet, gigaset_modem_fill, (unsigned long) cs); From 8c4d935cc494f9ede67820fa217850a31f26b4b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Dec 2019 09:56:10 +0100 Subject: [PATCH 1603/3715] staging: gigaset: add endpoint-type sanity check commit ed9ed5a89acba51b82bdff61144d4e4a4245ec8a upstream. Add missing endpoint-type sanity checks to probe. This specifically prevents a warning in USB core on URB submission when fuzzing USB descriptors. Signed-off-by: Johan Hovold Cc: stable Link: https://lore.kernel.org/r/20191202085610.12719-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/usb-gigaset.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 6dbd5ea9399c..4c239f18240d 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -708,6 +708,12 @@ static int gigaset_probe(struct usb_interface *interface, endpoint = &hostif->endpoint[0].desc; + if (!usb_endpoint_is_bulk_out(endpoint)) { + dev_err(&interface->dev, "missing bulk-out endpoint\n"); + retval = -ENODEV; + goto error; + } + buffer_size = le16_to_cpu(endpoint->wMaxPacketSize); ucs->bulk_out_size = buffer_size; ucs->bulk_out_epnum = usb_endpoint_num(endpoint); @@ -727,6 +733,12 @@ static int gigaset_probe(struct usb_interface *interface, endpoint = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_int_in(endpoint)) { + dev_err(&interface->dev, "missing int-in endpoint\n"); + retval = -ENODEV; + goto error; + } + ucs->busy = 0; ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL); From 839a996485c7159cee3c81b06ae7b3c206f4238f Mon Sep 17 00:00:00 2001 From: Henry Lin Date: Wed, 11 Dec 2019 16:20:04 +0200 Subject: [PATCH 1604/3715] usb: xhci: only set D3hot for pci device commit f2c710f7dca8457e88b4ac9de2060f011254f9dd upstream. Xhci driver cannot call pci_set_power_state() on non-pci xhci host controllers. For example, NVIDIA Tegra XHCI host controller which acts as platform device with XHCI_SPURIOUS_WAKEUP quirk set in some platform hits this issue during shutdown. Cc: Fixes: 638298dc66ea ("xhci: Fix spurious wakeups after S5 on Haswell") Signed-off-by: Henry Lin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 13 +++++++++++++ drivers/usb/host/xhci.c | 7 ++----- drivers/usb/host/xhci.h | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1de006aebec5..021a2d320acc 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -499,6 +499,18 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) } #endif /* CONFIG_PM */ +static void xhci_pci_shutdown(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + xhci_shutdown(hcd); + + /* Yet another workaround for spurious wakeups at shutdown with HSW */ + if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) + pci_set_power_state(pdev, PCI_D3hot); +} + /*-------------------------------------------------------------------------*/ /* PCI driver selection metadata; PCI hotplugging uses this */ @@ -534,6 +546,7 @@ static int __init xhci_pci_init(void) #ifdef CONFIG_PM xhci_pci_hc_driver.pci_suspend = xhci_pci_suspend; xhci_pci_hc_driver.pci_resume = xhci_pci_resume; + xhci_pci_hc_driver.shutdown = xhci_pci_shutdown; #endif return pci_register_driver(&xhci_pci_driver); } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 465d7fd507ad..abc08aa9cd4e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -717,7 +717,7 @@ static void xhci_stop(struct usb_hcd *hcd) * * This will only ever be called with the main usb_hcd (the USB3 roothub). */ -static void xhci_shutdown(struct usb_hcd *hcd) +void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -736,11 +736,8 @@ static void xhci_shutdown(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_shutdown completed - status = %x", readl(&xhci->op_regs->status)); - - /* Yet another workaround for spurious wakeups at shutdown with HSW */ - if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - pci_set_power_state(to_pci_dev(hcd->self.sysdev), PCI_D3hot); } +EXPORT_SYMBOL_GPL(xhci_shutdown); #ifdef CONFIG_PM static void xhci_save_registers(struct xhci_hcd *xhci) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index cbc91536e512..db1af99d53bd 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2022,6 +2022,7 @@ int xhci_start(struct xhci_hcd *xhci); int xhci_reset(struct xhci_hcd *xhci); int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); +void xhci_shutdown(struct usb_hcd *hcd); void xhci_init_driver(struct hc_driver *drv, const struct xhci_driver_overrides *over); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); From 6b399e679da42f76abdeab9722e687a61bb581f5 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 11 Dec 2019 16:20:05 +0200 Subject: [PATCH 1605/3715] xhci: Increase STS_HALT timeout in xhci_suspend() commit 7c67cf6658cec70d8a43229f2ce74ca1443dc95e upstream. I've recently observed failed xHCI suspend attempt on AMD Raven Ridge system: kernel: xhci_hcd 0000:04:00.4: WARN: xHC CMD_RUN timeout kernel: PM: suspend_common(): xhci_pci_suspend+0x0/0xd0 returns -110 kernel: PM: pci_pm_suspend(): hcd_pci_suspend+0x0/0x30 returns -110 kernel: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns -110 kernel: PM: Device 0000:04:00.4 failed to suspend async: error -110 Similar to commit ac343366846a ("xhci: Increase STS_SAVE timeout in xhci_suspend()") we also need to increase the HALT timeout to make it be able to suspend again. Cc: # 5.2+ Fixes: f7fac17ca925 ("xhci: Convert xhci_handshake() to use readl_poll_timeout_atomic()") Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index abc08aa9cd4e..6c0a0ca316d3 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -908,7 +908,7 @@ static bool xhci_pending_portevent(struct xhci_hcd *xhci) int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) { int rc = 0; - unsigned int delay = XHCI_MAX_HALT_USEC; + unsigned int delay = XHCI_MAX_HALT_USEC * 2; struct usb_hcd *hcd = xhci_to_hcd(xhci); u32 command; u32 res; From 8d0b11b6ac68f280aabe962349678342f0dcc297 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 11 Dec 2019 16:20:06 +0200 Subject: [PATCH 1606/3715] xhci: handle some XHCI_TRUST_TX_LENGTH quirks cases as default behaviour. commit 7ff11162808cc2ec66353fc012c58bb449c892c3 upstream. xhci driver claims it needs XHCI_TRUST_TX_LENGTH quirk for both Broadcom/Cavium and a Renesas xHC controllers. The quirk was inteded for handling false "success" complete event for transfers that had data left untransferred. These transfers should complete with "short packet" events instead. In these two new cases the false "success" completion is reported after a "short packet" if the TD consists of several TRBs. xHCI specs 4.10.1.1.2 say remaining TRBs should report "short packet" as well after the first short packet in a TD, but this issue seems so common it doesn't make sense to add the quirk for all vendors. Turn these events into short packets automatically instead. This gets rid of the "The WARN Successful completion on short TX for slot 1 ep 1: needs XHCI_TRUST_TX_LENGTH quirk" warning in many cases. Cc: Reported-by: Eli Billauer Reported-by: Ard Biesheuvel Tested-by: Eli Billauer Tested-by: Ard Biesheuvel Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 009b6796f405..89af395cd89c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2398,7 +2398,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, case COMP_SUCCESS: if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) break; - if (xhci->quirks & XHCI_TRUST_TX_LENGTH) + if (xhci->quirks & XHCI_TRUST_TX_LENGTH || + ep_ring->last_td_was_short) trb_comp_code = COMP_SHORT_PACKET; else xhci_warn_ratelimited(xhci, From f2643dec8fd5b6736f71b1cdc036982eefb04fe4 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:36 +0100 Subject: [PATCH 1607/3715] ARM: dts: pandora-common: define wl1251 as child node of mmc3 commit 4f9007d692017cef38baf2a9b82b7879d5b2407b upstream. Since v4.7 the dma initialization requires that there is a device tree property for "rx" and "tx" channels which is not provided by the pdata-quirks initialization. By conversion of the mmc3 setup to device tree this will finally allows to remove the OpenPandora wlan specific omap3 data-quirks. Fixes: 81eef6ca9201 ("mmc: omap_hsmmc: Use dma_request_chan() for requesting DMA channel") Signed-off-by: H. Nikolaus Schaller Cc: # v4.7+ Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap3-pandora-common.dtsi | 36 +++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index 53e007abdc71..964240a0f4a9 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -221,6 +221,17 @@ gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */ }; + /* wl1251 wifi+bt module */ + wlan_en: fixed-regulator-wg7210_en { + compatible = "regulator-fixed"; + regulator-name = "vwlan"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + startup-delay-us = <50000>; + enable-active-high; + gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>; + }; + /* wg7210 (wifi+bt module) 32k clock buffer */ wg7210_32k: fixed-regulator-wg7210_32k { compatible = "regulator-fixed"; @@ -514,9 +525,30 @@ /*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */ }; -/* mmc3 is probed using pdata-quirks to pass wl1251 card data */ &mmc3 { - status = "disabled"; + vmmc-supply = <&wlan_en>; + + bus-width = <4>; + non-removable; + ti,non-removable; + cap-power-off-card; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; + + #address-cells = <1>; + #size-cells = <0>; + + wlan: wifi@1 { + compatible = "ti,wl1251"; + + reg = <1>; + + interrupt-parent = <&gpio1>; + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */ + + ti,wl1251-has-eeprom; + }; }; /* bluetooth*/ From d1958757ac6fb4046d143ae9e905c3e6b0fe8b69 Mon Sep 17 00:00:00 2001 From: Chris Lesiak Date: Thu, 21 Nov 2019 20:39:42 +0000 Subject: [PATCH 1608/3715] iio: humidity: hdc100x: fix IIO_HUMIDITYRELATIVE channel reporting commit 342a6928bd5017edbdae376042d8ad6af3d3b943 upstream. The IIO_HUMIDITYRELATIVE channel was being incorrectly reported back as percent when it should have been milli percent. This is via an incorrect scale value being returned to userspace. Signed-off-by: Chris Lesiak Acked-by: Matt Ranostay Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/humidity/hdc100x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 7851bd90ef64..b470cb8132da 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -237,7 +237,7 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev, *val2 = 65536; return IIO_VAL_FRACTIONAL; } else { - *val = 100; + *val = 100000; *val2 = 65536; return IIO_VAL_FRACTIONAL; } From 38aca0b906833dfc48544461d3e8b2ee517e2b40 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:25:58 +0100 Subject: [PATCH 1609/3715] USB: atm: ueagle-atm: add missing endpoint check commit 09068c1ad53fb077bdac288869dec2435420bdc4 upstream. Make sure that the interrupt interface has an endpoint before trying to access its endpoint descriptors to avoid dereferencing a NULL pointer. The driver binds to the interrupt interface with interface number 0, but must not assume that this interface or its current alternate setting are the first entries in the corresponding configuration arrays. Fixes: b72458a80c75 ("[PATCH] USB: Eagle and ADI 930 usb adsl modem driver") Cc: stable # 2.6.16 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210112601.3561-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/atm/ueagle-atm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index ba7616395db2..f649b7b83200 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -2167,10 +2167,11 @@ resubmit: /* * Start the modem : init the data and start kernel thread */ -static int uea_boot(struct uea_softc *sc) +static int uea_boot(struct uea_softc *sc, struct usb_interface *intf) { - int ret, size; struct intr_pkt *intr; + int ret = -ENOMEM; + int size; uea_enters(INS_TO_USBDEV(sc)); @@ -2195,6 +2196,11 @@ static int uea_boot(struct uea_softc *sc) if (UEA_CHIP_VERSION(sc) == ADI930) load_XILINX_firmware(sc); + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { + ret = -ENODEV; + goto err0; + } + intr = kmalloc(size, GFP_KERNEL); if (!intr) goto err0; @@ -2206,8 +2212,7 @@ static int uea_boot(struct uea_softc *sc) usb_fill_int_urb(sc->urb_int, sc->usb_dev, usb_rcvintpipe(sc->usb_dev, UEA_INTR_PIPE), intr, size, uea_intr, sc, - sc->usb_dev->actconfig->interface[0]->altsetting[0]. - endpoint[0].desc.bInterval); + intf->cur_altsetting->endpoint[0].desc.bInterval); ret = usb_submit_urb(sc->urb_int, GFP_KERNEL); if (ret < 0) { @@ -2222,6 +2227,7 @@ static int uea_boot(struct uea_softc *sc) sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm"); if (IS_ERR(sc->kthread)) { uea_err(INS_TO_USBDEV(sc), "failed to create thread\n"); + ret = PTR_ERR(sc->kthread); goto err2; } @@ -2236,7 +2242,7 @@ err1: kfree(intr); err0: uea_leaves(INS_TO_USBDEV(sc)); - return -ENOMEM; + return ret; } /* @@ -2597,7 +2603,7 @@ static int uea_bind(struct usbatm_data *usbatm, struct usb_interface *intf, if (ret < 0) goto error; - ret = uea_boot(sc); + ret = uea_boot(sc, intf); if (ret < 0) goto error_rm_grp; From 19401ee0fb9e199e1ba01adb3c3da13163c2ee6a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:26:00 +0100 Subject: [PATCH 1610/3715] USB: idmouse: fix interface sanity checks commit 59920635b89d74b9207ea803d5e91498d39e8b69 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210112601.3561-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/idmouse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 39d8fedfaf3b..01ef2551be46 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -341,7 +341,7 @@ static int idmouse_probe(struct usb_interface *interface, int result; /* check if we have gotten the data or the hid interface */ - iface_desc = &interface->altsetting[0]; + iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; From 5b62b19936438532cb578a0f4060ebfb377a1d99 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:26:01 +0100 Subject: [PATCH 1611/3715] USB: serial: io_edgeport: fix epic endpoint lookup commit 7c5a2df3367a2c4984f1300261345817d95b71f8 upstream. Make sure to use the current alternate setting when looking up the endpoints on epic devices to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 6e8cf7751f9f ("USB: add EPIC support to the io_edgeport driver") Cc: stable # 2.6.21 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210112601.3561-5-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 01f3ac7769f3..51b61545ccf2 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2918,16 +2918,18 @@ static int edge_startup(struct usb_serial *serial) response = 0; if (edge_serial->is_epic) { + struct usb_host_interface *alt; + + alt = serial->interface->cur_altsetting; + /* EPIC thing, set up our interrupt polling now and our read * urb, so that the device knows it really is connected. */ interrupt_in_found = bulk_in_found = bulk_out_found = false; - for (i = 0; i < serial->interface->altsetting[0] - .desc.bNumEndpoints; ++i) { + for (i = 0; i < alt->desc.bNumEndpoints; ++i) { struct usb_endpoint_descriptor *endpoint; int buffer_size; - endpoint = &serial->interface->altsetting[0]. - endpoint[i].desc; + endpoint = &alt->endpoint[i].desc; buffer_size = usb_endpoint_maxp(endpoint); if (!interrupt_in_found && (usb_endpoint_is_int_in(endpoint))) { From ea1570688c3f0b8bbf03218c1e94ce0251934e0d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:25:59 +0100 Subject: [PATCH 1612/3715] USB: adutux: fix interface sanity check commit 3c11c4bed02b202e278c0f5c319ae435d7fb9815 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 03270634e242 ("USB: Add ADU support for Ontrak ADU devices") Cc: stable # 2.6.19 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191210112601.3561-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/adutux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index c8c8fa3f1f46..45390045c75d 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -671,7 +671,7 @@ static int adu_probe(struct usb_interface *interface, init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); - res = usb_find_common_endpoints_reverse(&interface->altsetting[0], + res = usb_find_common_endpoints_reverse(interface->cur_altsetting, NULL, NULL, &dev->interrupt_in_endpoint, &dev->interrupt_out_endpoint); From 2a275fa6ad522f6b48bce59617dcce1d4ef5ecd2 Mon Sep 17 00:00:00 2001 From: Emiliano Ingrassia Date: Wed, 27 Nov 2019 17:03:55 +0100 Subject: [PATCH 1613/3715] usb: core: urb: fix URB structure initialization function commit 1cd17f7f0def31e3695501c4f86cd3faf8489840 upstream. Explicitly initialize URB structure urb_list field in usb_init_urb(). This field can be potentially accessed uninitialized and its initialization is coherent with the usage of list_del_init() in usb_hcd_unlink_urb_from_ep() and usb_giveback_urb_bh() and its explicit initialization in usb_hcd_submit_urb() error path. Signed-off-by: Emiliano Ingrassia Cc: stable Link: https://lore.kernel.org/r/20191127160355.GA27196@ingrassia.epigenesys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/urb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 8b800e34407b..83bd48734af5 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -45,6 +45,7 @@ void usb_init_urb(struct urb *urb) if (urb) { memset(urb, 0, sizeof(*urb)); kref_init(&urb->kref); + INIT_LIST_HEAD(&urb->urb_list); INIT_LIST_HEAD(&urb->anchor_list); } } From d41971493d28edf2b916ad5201d8301a8513ed51 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Wed, 4 Dec 2019 20:39:41 -0600 Subject: [PATCH 1614/3715] usb: mon: Fix a deadlock in usbmon between mmap and read commit 19e6317d24c25ee737c65d1ffb7483bdda4bb54a upstream. The problem arises because our read() function grabs a lock of the circular buffer, finds something of interest, then invokes copy_to_user() straight from the buffer, which in turn takes mm->mmap_sem. In the same time, the callback mon_bin_vma_fault() is invoked under mm->mmap_sem. It attempts to take the fetch lock and deadlocks. This patch does away with protecting of our page list with any semaphores, and instead relies on the kernel not close the device while mmap is active in a process. In addition, we prohibit re-sizing of a buffer while mmap is active. This way, when (now unlocked) fault is processed, it works with the page that is intended to be mapped-in, and not some other random page. Note that this may have an ABI impact, but hopefully no legitimate program is this wrong. Signed-off-by: Pete Zaitcev Reported-by: syzbot+56f9673bb4cdcbeb0e92@syzkaller.appspotmail.com Reviewed-by: Alan Stern Fixes: 46eb14a6e158 ("USB: fix usbmon BUG trigger") Cc: Link: https://lore.kernel.org/r/20191204203941.3503452b@suzdal.zaitcev.lan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_bin.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index f932f40302df..156aebf62e61 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1038,12 +1038,18 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); - mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); - kfree(rp->b_vec); - rp->b_vec = vec; - rp->b_size = size; - rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; - rp->cnt_lost = 0; + if (rp->mmap_active) { + mon_free_buff(vec, size/CHUNK_SIZE); + kfree(vec); + ret = -EBUSY; + } else { + mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); + kfree(rp->b_vec); + rp->b_vec = vec; + rp->b_size = size; + rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; + rp->cnt_lost = 0; + } spin_unlock_irqrestore(&rp->b_lock, flags); mutex_unlock(&rp->fetch_lock); } @@ -1215,13 +1221,21 @@ mon_bin_poll(struct file *file, struct poll_table_struct *wait) static void mon_bin_vma_open(struct vm_area_struct *vma) { struct mon_reader_bin *rp = vma->vm_private_data; + unsigned long flags; + + spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active++; + spin_unlock_irqrestore(&rp->b_lock, flags); } static void mon_bin_vma_close(struct vm_area_struct *vma) { + unsigned long flags; + struct mon_reader_bin *rp = vma->vm_private_data; + spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active--; + spin_unlock_irqrestore(&rp->b_lock, flags); } /* @@ -1233,16 +1247,12 @@ static int mon_bin_vma_fault(struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; - mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) { - mutex_unlock(&rp->fetch_lock); + if (offset >= rp->b_size) return VM_FAULT_SIGBUS; - } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); - mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } From 3be0e56cd6a8ea11fd8ecfc5f52b5cc52a236213 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 7 Oct 2019 14:46:37 -0700 Subject: [PATCH 1615/3715] tpm: add check after commands attribs tab allocation commit f1689114acc5e89a196fec6d732dae3e48edb6ad upstream. devm_kcalloc() can fail and return NULL so we need to check for that. Cc: stable@vger.kernel.org Fixes: 58472f5cd4f6f ("tpm: validate TPM 2.0 commands") Signed-off-by: Tadeusz Struk Reviewed-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm2-cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 44a3d16231f6..dd64b3b37400 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -1029,6 +1029,10 @@ static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip) chip->cc_attrs_tbl = devm_kzalloc(&chip->dev, 4 * nr_commands, GFP_KERNEL); + if (!chip->cc_attrs_tbl) { + rc = -ENOMEM; + goto out; + } rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_GET_CAPABILITY); if (rc) From 26bde72620ba28d7a979aeb23109c771b4b5ea18 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 22 Oct 2019 16:58:59 +0200 Subject: [PATCH 1616/3715] mtd: spear_smi: Fix Write Burst mode commit 69c7f4618c16b4678f8a4949b6bb5ace259c0033 upstream. Any write with either dd or flashcp to a device driven by the spear_smi.c driver will pass through the spear_smi_cpy_toio() function. This function will get called for chunks of up to 256 bytes. If the amount of data is smaller, we may have a problem if the data length is not 4-byte aligned. In this situation, the kernel panics during the memcpy: # dd if=/dev/urandom bs=1001 count=1 of=/dev/mtd6 spear_smi_cpy_toio [620] dest c9070000, src c7be8800, len 256 spear_smi_cpy_toio [620] dest c9070100, src c7be8900, len 256 spear_smi_cpy_toio [620] dest c9070200, src c7be8a00, len 256 spear_smi_cpy_toio [620] dest c9070300, src c7be8b00, len 233 Unhandled fault: external abort on non-linefetch (0x808) at 0xc90703e8 [...] PC is at memcpy+0xcc/0x330 The above error occurs because the implementation of memcpy_toio() tries to optimize the number of I/O by writing 4 bytes at a time as much as possible, until there are less than 4 bytes left and then switches to word or byte writes. Unfortunately, the specification states about the Write Burst mode: "the next AHB Write request should point to the next incremented address and should have the same size (byte, half-word or word)" This means ARM architecture implementation of memcpy_toio() cannot reliably be used blindly here. Workaround this situation by update the write path to stick to byte access when the burst length is not multiple of 4. Fixes: f18dbbb1bfe0 ("mtd: ST SPEAr: Add SMI driver for serial NOR flash") Cc: Russell King Cc: Boris Brezillon Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Reviewed-by: Russell King Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/spear_smi.c | 38 ++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c index ddf478976013..c2922e129db8 100644 --- a/drivers/mtd/devices/spear_smi.c +++ b/drivers/mtd/devices/spear_smi.c @@ -595,6 +595,26 @@ static int spear_mtd_read(struct mtd_info *mtd, loff_t from, size_t len, return 0; } +/* + * The purpose of this function is to ensure a memcpy_toio() with byte writes + * only. Its structure is inspired from the ARM implementation of _memcpy_toio() + * which also does single byte writes but cannot be used here as this is just an + * implementation detail and not part of the API. Not mentioning the comment + * stating that _memcpy_toio() should be optimized. + */ +static void spear_smi_memcpy_toio_b(volatile void __iomem *dest, + const void *src, size_t len) +{ + const unsigned char *from = src; + + while (len) { + len--; + writeb(*from, dest); + from++; + dest++; + } +} + static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank, void __iomem *dest, const void *src, size_t len) { @@ -617,7 +637,23 @@ static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank, ctrlreg1 = readl(dev->io_base + SMI_CR1); writel((ctrlreg1 | WB_MODE) & ~SW_MODE, dev->io_base + SMI_CR1); - memcpy_toio(dest, src, len); + /* + * In Write Burst mode (WB_MODE), the specs states that writes must be: + * - incremental + * - of the same size + * The ARM implementation of memcpy_toio() will optimize the number of + * I/O by using as much 4-byte writes as possible, surrounded by + * 2-byte/1-byte access if: + * - the destination is not 4-byte aligned + * - the length is not a multiple of 4-byte. + * Avoid this alternance of write access size by using our own 'byte + * access' helper if at least one of the two conditions above is true. + */ + if (IS_ALIGNED(len, sizeof(u32)) && + IS_ALIGNED((uintptr_t)dest, sizeof(u32))) + memcpy_toio(dest, src, len); + else + spear_smi_memcpy_toio_b(dest, src, len); writel(ctrlreg1, dev->io_base + SMI_CR1); From ef785dd3ca4407e06210645a332728a3f84b34c7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Dec 2019 12:11:52 +0100 Subject: [PATCH 1617/3715] virtio-balloon: fix managed page counts when migrating pages between zones commit 63341ab03706e11a31e3dd8ccc0fbc9beaf723f0 upstream. In case we have to migrate a ballon page to a newpage of another zone, the managed page count of both zones is wrong. Paired with memory offlining (which will adjust the managed page count), we can trigger kernel crashes and all kinds of different symptoms. One way to reproduce: 1. Start a QEMU guest with 4GB, no NUMA 2. Hotplug a 1GB DIMM and online the memory to ZONE_NORMAL 3. Inflate the balloon to 1GB 4. Unplug the DIMM (be quick, otherwise unmovable data ends up on it) 5. Observe /proc/zoneinfo Node 0, zone Normal pages free 16810 min 24848885473806 low 18471592959183339 high 36918337032892872 spanned 262144 present 262144 managed 18446744073709533486 6. Do anything that requires some memory (e.g., inflate the balloon some more). The OOM goes crazy and the system crashes [ 238.324946] Out of memory: Killed process 537 (login) total-vm:27584kB, anon-rss:860kB, file-rss:0kB, shmem-rss:00 [ 238.338585] systemd invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=0 [ 238.339420] CPU: 0 PID: 1 Comm: systemd Tainted: G D W 5.4.0-next-20191204+ #75 [ 238.340139] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4 [ 238.341121] Call Trace: [ 238.341337] dump_stack+0x8f/0xd0 [ 238.341630] dump_header+0x61/0x5ea [ 238.341942] oom_kill_process.cold+0xb/0x10 [ 238.342299] out_of_memory+0x24d/0x5a0 [ 238.342625] __alloc_pages_slowpath+0xd12/0x1020 [ 238.343024] __alloc_pages_nodemask+0x391/0x410 [ 238.343407] pagecache_get_page+0xc3/0x3a0 [ 238.343757] filemap_fault+0x804/0xc30 [ 238.344083] ? ext4_filemap_fault+0x28/0x42 [ 238.344444] ext4_filemap_fault+0x30/0x42 [ 238.344789] __do_fault+0x37/0x1a0 [ 238.345087] __handle_mm_fault+0x104d/0x1ab0 [ 238.345450] handle_mm_fault+0x169/0x360 [ 238.345790] do_user_addr_fault+0x20d/0x490 [ 238.346154] do_page_fault+0x31/0x210 [ 238.346468] async_page_fault+0x43/0x50 [ 238.346797] RIP: 0033:0x7f47eba4197e [ 238.347110] Code: Bad RIP value. [ 238.347387] RSP: 002b:00007ffd7c0c1890 EFLAGS: 00010293 [ 238.347834] RAX: 0000000000000002 RBX: 000055d196a20a20 RCX: 00007f47eba4197e [ 238.348437] RDX: 0000000000000033 RSI: 00007ffd7c0c18c0 RDI: 0000000000000004 [ 238.349047] RBP: 00007ffd7c0c1c20 R08: 0000000000000000 R09: 0000000000000033 [ 238.349660] R10: 00000000ffffffff R11: 0000000000000293 R12: 0000000000000001 [ 238.350261] R13: ffffffffffffffff R14: 0000000000000000 R15: 00007ffd7c0c18c0 [ 238.350878] Mem-Info: [ 238.351085] active_anon:3121 inactive_anon:51 isolated_anon:0 [ 238.351085] active_file:12 inactive_file:7 isolated_file:0 [ 238.351085] unevictable:0 dirty:0 writeback:0 unstable:0 [ 238.351085] slab_reclaimable:5565 slab_unreclaimable:10170 [ 238.351085] mapped:3 shmem:111 pagetables:155 bounce:0 [ 238.351085] free:720717 free_pcp:2 free_cma:0 [ 238.353757] Node 0 active_anon:12484kB inactive_anon:204kB active_file:48kB inactive_file:28kB unevictable:0kB iss [ 238.355979] Node 0 DMA free:11556kB min:36kB low:48kB high:60kB reserved_highatomic:0KB active_anon:152kB inactivB [ 238.358345] lowmem_reserve[]: 0 2955 2884 2884 2884 [ 238.358761] Node 0 DMA32 free:2677864kB min:7004kB low:10028kB high:13052kB reserved_highatomic:0KB active_anon:0B [ 238.361202] lowmem_reserve[]: 0 0 72057594037927865 72057594037927865 72057594037927865 [ 238.361888] Node 0 Normal free:193448kB min:99395541895224kB low:73886371836733356kB high:147673348131571488kB reB [ 238.364765] lowmem_reserve[]: 0 0 0 0 0 [ 238.365101] Node 0 DMA: 7*4kB (U) 5*8kB (UE) 6*16kB (UME) 2*32kB (UM) 1*64kB (U) 2*128kB (UE) 3*256kB (UME) 2*512B [ 238.366379] Node 0 DMA32: 0*4kB 1*8kB (U) 2*16kB (UM) 2*32kB (UM) 2*64kB (UM) 1*128kB (U) 1*256kB (U) 1*512kB (U)B [ 238.367654] Node 0 Normal: 1985*4kB (UME) 1321*8kB (UME) 844*16kB (UME) 524*32kB (UME) 300*64kB (UME) 138*128kB (B [ 238.369184] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB [ 238.369915] 130 total pagecache pages [ 238.370241] 0 pages in swap cache [ 238.370533] Swap cache stats: add 0, delete 0, find 0/0 [ 238.370981] Free swap = 0kB [ 238.371239] Total swap = 0kB [ 238.371488] 1048445 pages RAM [ 238.371756] 0 pages HighMem/MovableOnly [ 238.372090] 306992 pages reserved [ 238.372376] 0 pages cma reserved [ 238.372661] 0 pages hwpoisoned In another instance (older kernel), I was able to observe this (negative page count :/): [ 180.896971] Offlined Pages 32768 [ 182.667462] Offlined Pages 32768 [ 184.408117] Offlined Pages 32768 [ 186.026321] Offlined Pages 32768 [ 187.684861] Offlined Pages 32768 [ 189.227013] Offlined Pages 32768 [ 190.830303] Offlined Pages 32768 [ 190.833071] Built 1 zonelists, mobility grouping on. Total pages: -36920272750453009 In another instance (older kernel), I was no longer able to start any process: [root@vm ~]# [ 214.348068] Offlined Pages 32768 [ 215.973009] Offlined Pages 32768 cat /proc/meminfo -bash: fork: Cannot allocate memory [root@vm ~]# cat /proc/meminfo -bash: fork: Cannot allocate memory Fix it by properly adjusting the managed page count when migrating if the zone changed. The managed page count of the zones now looks after unplug of the DIMM (and after deflating the balloon) just like before inflating the balloon (and plugging+onlining the DIMM). We'll temporarily modify the totalram page count. If this ever becomes a problem, we can fine tune by providing helpers that don't touch the totalram pages (e.g., adjust_zone_managed_page_count()). Please note that fixing up the managed page count is only necessary when we adjusted the managed page count when inflating - only if we don't have VIRTIO_BALLOON_F_DEFLATE_ON_OOM. With that feature, the managed page count is not touched when inflating/deflating. Reported-by: Yumei Huang Fixes: 3dcc0571cd64 ("mm: correctly update zone->managed_pages") Cc: # v3.11+ Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Jiang Liu Cc: Andrew Morton Cc: Igor Mammedov Cc: virtualization@lists.linux-foundation.org Signed-off-by: David Hildenbrand Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d9873aa014a6..499531608fa2 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -492,6 +492,17 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, get_page(newpage); /* balloon reference */ + /* + * When we migrate a page to a different zone and adjusted the + * managed page count when inflating, we have to fixup the count of + * both involved zones. + */ + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) && + page_zone(page) != page_zone(newpage)) { + adjust_managed_page_count(page, 1); + adjust_managed_page_count(newpage, -1); + } + /* balloon's page migration 1st step -- inflate "newpage" */ spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_insert(vb_dev_info, newpage); From c5320472be4fe909f53279d2fd5aaa422dd06f69 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 27 Nov 2019 13:10:54 -0800 Subject: [PATCH 1618/3715] usb: dwc3: ep0: Clear started flag on completion commit 2d7b78f59e020b07fc6338eefe286f54ee2d6773 upstream. Clear ep0's DWC3_EP_TRANSFER_STARTED flag if the END_TRANSFER command is completed. Otherwise, we can't start control transfer again after END_TRANSFER. Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 89fe53c846ef..cb50806d2459 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -1147,6 +1147,9 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc, void dwc3_ep0_interrupt(struct dwc3 *dwc, const struct dwc3_event_depevt *event) { + struct dwc3_ep *dep = dwc->eps[event->endpoint_number]; + u8 cmd; + switch (event->endpoint_event) { case DWC3_DEPEVT_XFERCOMPLETE: dwc3_ep0_xfer_complete(dwc, event); @@ -1159,7 +1162,12 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc, case DWC3_DEPEVT_XFERINPROGRESS: case DWC3_DEPEVT_RXTXFIFOEVT: case DWC3_DEPEVT_STREAMEVT: + break; case DWC3_DEPEVT_EPCMDCMPLT: + cmd = DEPEVT_PARAMETER_CMD(event->parameters); + + if (cmd == DWC3_DEPCMD_ENDTRANSFER) + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; break; } } From 797200898adbea5e7ece68f92331efd26faa3efc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 24 Sep 2019 16:50:43 -0400 Subject: [PATCH 1619/3715] btrfs: check page->mapping when loading free space cache commit 3797136b626ad4b6582223660c041efdea8f26b2 upstream. While testing 5.2 we ran into the following panic [52238.017028] BUG: kernel NULL pointer dereference, address: 0000000000000001 [52238.105608] RIP: 0010:drop_buffers+0x3d/0x150 [52238.304051] Call Trace: [52238.308958] try_to_free_buffers+0x15b/0x1b0 [52238.317503] shrink_page_list+0x1164/0x1780 [52238.325877] shrink_inactive_list+0x18f/0x3b0 [52238.334596] shrink_node_memcg+0x23e/0x7d0 [52238.342790] ? do_shrink_slab+0x4f/0x290 [52238.350648] shrink_node+0xce/0x4a0 [52238.357628] balance_pgdat+0x2c7/0x510 [52238.365135] kswapd+0x216/0x3e0 [52238.371425] ? wait_woken+0x80/0x80 [52238.378412] ? balance_pgdat+0x510/0x510 [52238.386265] kthread+0x111/0x130 [52238.392727] ? kthread_create_on_node+0x60/0x60 [52238.401782] ret_from_fork+0x1f/0x30 The page we were trying to drop had a page->private, but had no page->mapping and so called drop_buffers, assuming that we had a buffer_head on the page, and then panic'ed trying to deref 1, which is our page->private for data pages. This is happening because we're truncating the free space cache while we're trying to load the free space cache. This isn't supposed to happen, and I'll fix that in a followup patch. However we still shouldn't allow those sort of mistakes to result in messing with pages that do not belong to us. So add the page->mapping check to verify that we still own this page after dropping and re-acquiring the page lock. This page being unlocked as: btrfs_readpage extent_read_full_page __extent_read_full_page __do_readpage if (!nr) unlock_page <-- nr can be 0 only if submit_extent_page returns an error CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik [ add callchain ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9f31b81a5e27..abeb26d48d0a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -398,6 +398,12 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode if (uptodate && !PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); + if (page->mapping != inode->i_mapping) { + btrfs_err(BTRFS_I(inode)->root->fs_info, + "free space cache page truncated"); + io_ctl_drop_pages(io_ctl); + return -EIO; + } if (!PageUptodate(page)) { btrfs_err(BTRFS_I(inode)->root->fs_info, "error reading free space cache"); From ca64b008af0edf8e17e3f13c535d2bc5b2a2d506 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 26 Sep 2019 08:29:32 -0400 Subject: [PATCH 1620/3715] btrfs: use refcount_inc_not_zero in kill_all_nodes commit baf320b9d531f1cfbf64c60dd155ff80a58b3796 upstream. We hit the following warning while running down a different problem [ 6197.175850] ------------[ cut here ]------------ [ 6197.185082] refcount_t: underflow; use-after-free. [ 6197.194704] WARNING: CPU: 47 PID: 966 at lib/refcount.c:190 refcount_sub_and_test_checked+0x53/0x60 [ 6197.521792] Call Trace: [ 6197.526687] __btrfs_release_delayed_node+0x76/0x1c0 [ 6197.536615] btrfs_kill_all_delayed_nodes+0xec/0x130 [ 6197.546532] ? __btrfs_btree_balance_dirty+0x60/0x60 [ 6197.556482] btrfs_clean_one_deleted_snapshot+0x71/0xd0 [ 6197.566910] cleaner_kthread+0xfa/0x120 [ 6197.574573] kthread+0x111/0x130 [ 6197.581022] ? kthread_create_on_node+0x60/0x60 [ 6197.590086] ret_from_fork+0x1f/0x30 [ 6197.597228] ---[ end trace 424bb7ae00509f56 ]--- This is because the free side drops the ref without the lock, and then takes the lock if our refcount is 0. So you can have nodes on the tree that have a refcount of 0. Fix this by zero'ing out that element in our temporary array so we don't try to kill it again. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add comment ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 04f39111fafb..87414fc9e268 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1975,12 +1975,19 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) } inode_id = delayed_nodes[n - 1]->inode_id + 1; - - for (i = 0; i < n; i++) - refcount_inc(&delayed_nodes[i]->refs); + for (i = 0; i < n; i++) { + /* + * Don't increase refs in case the node is dead and + * about to be removed from the tree in the loop below + */ + if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) + delayed_nodes[i] = NULL; + } spin_unlock(&root->inode_lock); for (i = 0; i < n; i++) { + if (!delayed_nodes[i]) + continue; __btrfs_kill_delayed_node(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i]); } From 8a2e1bc91abc04481f81ac4f9c6cb8a65bc4f867 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 11 Oct 2019 16:41:20 +0100 Subject: [PATCH 1621/3715] Btrfs: fix negative subv_writers counter and data space leak after buffered write commit a0e248bb502d5165b3314ac3819e888fdcdf7d9f upstream. When doing a buffered write it's possible to leave the subv_writers counter of the root, used for synchronization between buffered nocow writers and snapshotting. This happens in an exceptional case like the following: 1) We fail to allocate data space for the write, since there's not enough available data space nor enough unallocated space for allocating a new data block group; 2) Because of that failure, we try to go to NOCOW mode, which succeeds and therefore we set the local variable 'only_release_metadata' to true and set the root's sub_writers counter to 1 through the call to btrfs_start_write_no_snapshotting() made by check_can_nocow(); 3) The call to btrfs_copy_from_user() returns zero, which is very unlikely to happen but not impossible; 4) No pages are copied because btrfs_copy_from_user() returned zero; 5) We call btrfs_end_write_no_snapshotting() which decrements the root's subv_writers counter to 0; 6) We don't set 'only_release_metadata' back to 'false' because we do it only if 'copied', the value returned by btrfs_copy_from_user(), is greater than zero; 7) On the next iteration of the while loop, which processes the same page range, we are now able to allocate data space for the write (we got enough data space released in the meanwhile); 8) After this if we fail at btrfs_delalloc_reserve_metadata(), because now there isn't enough free metadata space, or in some other place further below (prepare_pages(), lock_and_cleanup_extent_if_need(), btrfs_dirty_pages()), we break out of the while loop with 'only_release_metadata' having a value of 'true'; 9) Because 'only_release_metadata' is 'true' we end up decrementing the root's subv_writers counter to -1 (through a call to btrfs_end_write_no_snapshotting()), and we also end up not releasing the data space previously reserved through btrfs_check_data_free_space(). As a consequence the mechanism for synchronizing NOCOW buffered writes with snapshotting gets broken. Fix this by always setting 'only_release_metadata' to false at the start of each iteration. Fixes: 8257b2dc3c1a ("Btrfs: introduce btrfs_{start, end}_nocow_write() for each subvolume") Fixes: 7ee9e4405f26 ("Btrfs: check if we can nocow if we don't have data space") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6fbae1357644..bf654d48eb46 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1625,6 +1625,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, break; } + only_release_metadata = false; sector_offset = pos & (fs_info->sectorsize - 1); reserve_bytes = round_up(write_bytes + sector_offset, fs_info->sectorsize); @@ -1778,7 +1779,6 @@ again: set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, EXTENT_NORESERVE, NULL, NULL, GFP_NOFS); - only_release_metadata = false; } btrfs_drop_pages(pages, num_pages); From b0e1357a0a5c4e152f47c313737e6e05f8de70d1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 24 Oct 2019 09:38:29 +0800 Subject: [PATCH 1622/3715] btrfs: Remove btrfs_bio::flags member commit 34b127aecd4fe8e6a3903e10f204a7b7ffddca22 upstream. The last user of btrfs_bio::flags was removed in commit 326e1dbb5736 ("block: remove management of bi_remaining when restoring original bi_end_io"), remove it. (Tagged for stable as the structure is heavily used and space savings are desirable.) CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f6ae6cdf233d..07b805d08e55 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -317,7 +317,6 @@ struct btrfs_bio { u64 map_type; /* get from map_lookup->type */ bio_end_io_t *end_io; struct bio *orig_bio; - unsigned long flags; void *private; atomic_t error; int max_errors; From ebf66f5a00c3483d17261af50bf5f913586ad16f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Oct 2019 12:23:01 +0000 Subject: [PATCH 1623/3715] Btrfs: send, skip backreference walking for extents with many references commit fd0ddbe2509568b00df364156f47561e9f469f15 upstream. Backreference walking, which is used by send to figure if it can issue clone operations instead of write operations, can be very slow and use too much memory when extents have many references. This change simply skips backreference walking when an extent has more than 64 references, in which case we fallback to a write operation instead of a clone operation. This limit is conservative and in practice I observed no signicant slowdown with up to 100 references and still low memory usage up to that limit. This is a temporary workaround until there are speedups in the backref walking code, and as such it does not attempt to add extra interfaces or knobs to tweak the threshold. Reported-by: Atemu Link: https://lore.kernel.org/linux-btrfs/CAE4GHgkvqVADtS4AzcQJxo0Q1jKQgKaW3JGp3SGdoinVo=C9eQ@mail.gmail.com/T/#me55dc0987f9cc2acaa54372ce0492c65782be3fa CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a5905f97b3db..1211fdcd425d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -36,6 +36,14 @@ #include "transaction.h" #include "compression.h" +/* + * Maximum number of references an extent can have in order for us to attempt to + * issue clone operations instead of write operations. This currently exists to + * avoid hitting limitations of the backreference walking code (taking a lot of + * time and using too much memory for extents with large number of references). + */ +#define SEND_MAX_EXTENT_REFS 64 + /* * A fs_path is a helper to dynamically build path names with unknown size. * It reallocates the internal buffer on demand. @@ -1324,6 +1332,7 @@ static int find_extent_clone(struct send_ctx *sctx, struct clone_root *cur_clone_root; struct btrfs_key found_key; struct btrfs_path *tmp_path; + struct btrfs_extent_item *ei; int compressed; u32 i; @@ -1373,7 +1382,6 @@ static int find_extent_clone(struct send_ctx *sctx, ret = extent_from_logical(fs_info, disk_byte, tmp_path, &found_key, &flags); up_read(&fs_info->commit_root_sem); - btrfs_release_path(tmp_path); if (ret < 0) goto out; @@ -1382,6 +1390,21 @@ static int find_extent_clone(struct send_ctx *sctx, goto out; } + ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], + struct btrfs_extent_item); + /* + * Backreference walking (iterate_extent_inodes() below) is currently + * too expensive when an extent has a large number of references, both + * in time spent and used memory. So for now just fallback to write + * operations instead of clone operations when an extent has more than + * a certain amount of references. + */ + if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { + ret = -ENOENT; + goto out; + } + btrfs_release_path(tmp_path); + /* * Setup the clone roots. */ From 04e23c8fced1cb2e015ace155a4dd02c32fd1227 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Nov 2019 15:43:06 -0500 Subject: [PATCH 1624/3715] btrfs: record all roots for rename exchange on a subvol commit 3e1740993e43116b3bc71b0aad1e6872f6ccf341 upstream. Testing with the new fsstress support for subvolumes uncovered a pretty bad problem with rename exchange on subvolumes. We're modifying two different subvolumes, but we only start the transaction on one of them, so the other one is not added to the dirty root list. This is caught by btrfs_cow_block() with a warning because the root has not been updated, however if we do not modify this root again we'll end up pointing at an invalid root because the root item is never updated. Fix this by making sure we add the destination root to the trans list, the same as we do with normal renames. This fixes the corruption. Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ddc1d1d1a29f..739f45b04b52 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9839,6 +9839,9 @@ static int btrfs_rename_exchange(struct inode *old_dir, goto out_notrans; } + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + /* * We need to find a free sequence number both in the source and * in the destination directory for the exchange. From c7ed1e7f8d8ce78338732a894da67c94a4ec22de Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 11 Nov 2019 13:40:44 -0600 Subject: [PATCH 1625/3715] rtlwifi: rtl8192de: Fix missing code to retrieve RX buffer address commit 0e531cc575c4e9e3dd52ad287b49d3c2dc74c810 upstream. In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers"), a callback to get the RX buffer address was added to the PCI driver. Unfortunately, driver rtl8192de was not modified appropriately and the code runs into a WARN_ONCE() call. The use of an incorrect array is also fixed. Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") Cc: Stable # 3.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c index 86019f654428..5397481875b8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c @@ -839,13 +839,15 @@ u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name) break; } } else { - struct rx_desc_92c *pdesc = (struct rx_desc_92c *)p_desc; switch (desc_name) { case HW_DESC_OWN: - ret = GET_RX_DESC_OWN(pdesc); + ret = GET_RX_DESC_OWN(p_desc); break; case HW_DESC_RXPKT_LEN: - ret = GET_RX_DESC_PKT_LEN(pdesc); + ret = GET_RX_DESC_PKT_LEN(p_desc); + break; + case HW_DESC_RXBUFF_ADDR: + ret = GET_RX_DESC_BUFF_ADDR(p_desc); break; default: WARN_ONCE(true, "rtl8192de: ERR rxdesc :%d not processed\n", From f2ab9a8520f0adb3dd3f88ca17f7b16932a67257 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 11 Nov 2019 13:40:45 -0600 Subject: [PATCH 1626/3715] rtlwifi: rtl8192de: Fix missing callback that tests for hw release of buffer commit 3155db7613edea8fb943624062baf1e4f9cfbfd6 upstream. In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers"), a callback needed to check if the hardware has released a buffer indicating that a DMA operation is completed was not added. Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") Cc: Stable # v3.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 1 + .../wireless/realtek/rtlwifi/rtl8192de/trx.c | 17 +++++++++++++++++ .../wireless/realtek/rtlwifi/rtl8192de/trx.h | 2 ++ 3 files changed, 20 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index a6549f5f6c59..3ec75032b9be 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -237,6 +237,7 @@ static struct rtl_hal_ops rtl8192de_hal_ops = { .led_control = rtl92de_led_control, .set_desc = rtl92de_set_desc, .get_desc = rtl92de_get_desc, + .is_tx_desc_closed = rtl92de_is_tx_desc_closed, .tx_polling = rtl92de_tx_polling, .enable_hw_sec = rtl92de_enable_hw_security_config, .set_key = rtl92de_set_key, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c index 5397481875b8..d1e56e09cfe8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c @@ -858,6 +858,23 @@ u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name) return ret; } +bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, + u8 hw_queue, u16 index) +{ + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; + u8 *entry = (u8 *)(&ring->desc[ring->idx]); + u8 own = (u8)rtl92de_get_desc(entry, true, HW_DESC_OWN); + + /* a beacon packet will only use the first + * descriptor by defaut, and the own bit may not + * be cleared by the hardware + */ + if (own) + return false; + return true; +} + void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue) { struct rtl_priv *rtlpriv = rtl_priv(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h index 9bb6cc648590..6cf23c278953 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h @@ -736,6 +736,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); u32 rtl92de_get_desc(u8 *pdesc, bool istx, u8 desc_name); +bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, + u8 hw_queue, u16 index); void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, bool b_firstseg, bool b_lastseg, From 3ee9e4b70861a4af9a947cf67fc817aaffbcdd62 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 11 Nov 2019 13:40:46 -0600 Subject: [PATCH 1627/3715] rtlwifi: rtl8192de: Fix missing enable interrupt flag commit 330bb7117101099c687e9c7f13d48068670b9c62 upstream. In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers"), the flag that indicates that interrupts are enabled was never set. In addition, there are several places when enable/disable interrupts were commented out are restored. A sychronize_interrupts() call is removed. Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") Cc: Stable # v3.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index cf28d25c551f..80002292cd27 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1198,6 +1198,7 @@ void rtl92de_enable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); + rtlpci->irq_enabled = true; } void rtl92de_disable_interrupt(struct ieee80211_hw *hw) @@ -1207,7 +1208,7 @@ void rtl92de_disable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, REG_HIMR, IMR8190_DISABLED); rtl_write_dword(rtlpriv, REG_HIMRE, IMR8190_DISABLED); - synchronize_irq(rtlpci->pdev->irq); + rtlpci->irq_enabled = false; } static void _rtl92de_poweroff_adapter(struct ieee80211_hw *hw) @@ -1378,7 +1379,7 @@ void rtl92de_set_beacon_related_registers(struct ieee80211_hw *hw) bcn_interval = mac->beacon_interval; atim_window = 2; - /*rtl92de_disable_interrupt(hw); */ + rtl92de_disable_interrupt(hw); rtl_write_word(rtlpriv, REG_ATIMWND, atim_window); rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); rtl_write_word(rtlpriv, REG_BCNTCFG, 0x660f); @@ -1398,9 +1399,9 @@ void rtl92de_set_beacon_interval(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_BEACON, DBG_DMESG, "beacon_interval:%d\n", bcn_interval); - /* rtl92de_disable_interrupt(hw); */ + rtl92de_disable_interrupt(hw); rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); - /* rtl92de_enable_interrupt(hw); */ + rtl92de_enable_interrupt(hw); } void rtl92de_update_interrupt_mask(struct ieee80211_hw *hw, From 02072c31e11badb90cc5000fe240ddd2b538570c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Dec 2019 16:26:00 +0100 Subject: [PATCH 1628/3715] lib: raid6: fix awk build warnings commit 702600eef73033ddd4eafcefcbb6560f3e3a90f7 upstream. Newer versions of awk spit out these fun warnings: awk: ../lib/raid6/unroll.awk:16: warning: regexp escape sequence `\#' is not a known regexp operator As commit 700c1018b86d ("x86/insn: Fix awk regexp warnings") showed, it turns out that there are a number of awk strings that do not need to be escaped and newer versions of awk now warn about this. Fix the string up so that no warning is produced. The exact same kernel module gets created before and after this patch, showing that it wasn't needed. Link: https://lore.kernel.org/r/20191206152600.GA75093@kroah.com Signed-off-by: Greg Kroah-Hartman --- lib/raid6/unroll.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk index c6aa03631df8..0809805a7e23 100644 --- a/lib/raid6/unroll.awk +++ b/lib/raid6/unroll.awk @@ -13,7 +13,7 @@ BEGIN { for (i = 0; i < rep; ++i) { tmp = $0 gsub(/\$\$/, i, tmp) - gsub(/\$\#/, n, tmp) + gsub(/\$#/, n, tmp) gsub(/\$\*/, "$", tmp) print tmp } From 6890751cfea7d79d998b049647a8c49e2f288ed0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 6 Dec 2019 08:33:36 +0200 Subject: [PATCH 1629/3715] ovl: relax WARN_ON() on rename to self commit 6889ee5a53b8d969aa542047f5ac8acdc0e79a91 upstream. In ovl_rename(), if new upper is hardlinked to old upper underneath overlayfs before upper dirs are locked, user will get an ESTALE error and a WARN_ON will be printed. Changes to underlying layers while overlayfs is mounted may result in unexpected behavior, but it shouldn't crash the kernel and it shouldn't trigger WARN_ON() either, so relax this WARN_ON(). Reported-by: syzbot+bb1836a212e69f8e201a@syzkaller.appspotmail.com Fixes: 804032fabb3b ("ovl: don't check rename to self") Cc: # v4.9+ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index ef11fa7b869e..8c561703275a 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1042,7 +1042,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (newdentry == trap) goto out_dput; - if (WARN_ON(olddentry->d_inode == newdentry->d_inode)) + if (olddentry->d_inode == newdentry->d_inode) goto out_dput; err = 0; From 7dc871b7257a7afe05d1a6f145cbd6d69efb6800 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Oct 2019 11:58:03 +0100 Subject: [PATCH 1630/3715] ALSA: hda - Fix pending unsol events at shutdown [ Upstream commit ca58f55108fee41d87c9123f85ad4863e5de7f45 ] This is an alternative fix attemp for the issue reported in the commit caa8422d01e9 ("ALSA: hda: Flush interrupts on disabling") that was reverted later due to regressions. Instead of tweaking the hardware disablement order and the enforced irq flushing, do calling cancel_work_sync() of the unsol work early enough, and explicitly ignore the unsol events during the shutdown by checking the bus->shutdown flag. Fixes: caa8422d01e9 ("ALSA: hda: Flush interrupts on disabling") Cc: Chris Wilson Link: https://lore.kernel.org/r/s5h1ruxt9cz.wl-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_bind.c | 4 ++++ sound/pci/hda/hda_intel.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 8db1890605f6..c175b2cf63f7 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -42,6 +42,10 @@ static void hda_codec_unsol_event(struct hdac_device *dev, unsigned int ev) { struct hda_codec *codec = container_of(dev, struct hda_codec, core); + /* ignore unsol events during shutdown */ + if (codec->bus->shutdown) + return; + if (codec->patch_ops.unsol_event) codec->patch_ops.unsol_event(codec, ev); } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 96e9b3944b92..890793ad85ca 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1450,8 +1450,11 @@ static int azx_free(struct azx *chip) static int azx_dev_disconnect(struct snd_device *device) { struct azx *chip = device->device_data; + struct hdac_bus *bus = azx_bus(chip); chip->bus.shutdown = 1; + cancel_work_sync(&bus->unsol_work); + return 0; } From 8f873b8b1f78a329711b88972c1da53440d6d84a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 21 Sep 2019 09:00:31 +0300 Subject: [PATCH 1631/3715] md/raid0: Fix an error message in raid0_make_request() [ Upstream commit e3fc3f3d0943b126f76b8533960e4168412d9e5a ] The first argument to WARN() is supposed to be a condition. The original code will just print the mdname() instead of the full warning message. Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") Signed-off-by: Dan Carpenter Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 449c4dd060fc..204adde004a3 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -616,7 +616,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) tmp_dev = map_sector(mddev, zone, sector, §or); break; default: - WARN("md/raid0:%s: Invalid layout\n", mdname(mddev)); + WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev)); bio_io_error(bio); return true; } From 6a8791ab32a79c967a21c46ddcd9f6c5da59308b Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 8 Nov 2019 13:59:05 +1030 Subject: [PATCH 1632/3715] watchdog: aspeed: Fix clock behaviour for ast2600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c04571251b3d842096f1597f5d4badb508be016d ] The ast2600 no longer uses bit 4 in the control register to indicate a 1MHz clock (It now controls whether this watchdog is reset by a SOC reset). This means we do not want to set it. It also does not need to be set for the ast2500, as it is read-only on that SoC. The comment next to the clock rate selection wandered away from where it was set, so put it back next to the register setting it's describing. Fixes: b3528b487448 ("watchdog: aspeed: Add support for AST2600") Signed-off-by: Joel Stanley Reviewed-by: Cédric Le Goater Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191108032905.22463-1-joel@jms.id.au Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/aspeed_wdt.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index cee7334b2a00..f5835cbd5d41 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -204,11 +204,6 @@ static int aspeed_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - /* - * The ast2400 wdt can run at PCLK, or 1MHz. The ast2500 only - * runs at 1MHz. We chose to always run at 1MHz, as there's no - * good reason to have a faster watchdog counter. - */ wdt->wdd.info = &aspeed_wdt_info; wdt->wdd.ops = &aspeed_wdt_ops; wdt->wdd.max_hw_heartbeat_ms = WDT_MAX_TIMEOUT_MS; @@ -224,7 +219,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev) return -EINVAL; config = ofdid->data; - wdt->ctrl = WDT_CTRL_1MHZ_CLK; + /* + * On clock rates: + * - ast2400 wdt can run at PCLK, or 1MHz + * - ast2500 only runs at 1MHz, hard coding bit 4 to 1 + * - ast2600 always runs at 1MHz + * + * Set the ast2400 to run at 1MHz as it simplifies the driver. + */ + if (of_device_is_compatible(np, "aspeed,ast2400-wdt")) + wdt->ctrl = WDT_CTRL_1MHZ_CLK; /* * Control reset on a per-device basis to ensure the From 53c2427d221918fb3cc3e71a08664bc9f9132e8c Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Mon, 14 Oct 2019 17:32:45 +0530 Subject: [PATCH 1633/3715] hwrng: omap - Fix RNG wait loop timeout commit be867f987a4e1222114dd07a01838a17c26f3fff upstream. Existing RNG data read timeout is 200us but it doesn't cover EIP76 RNG data rate which takes approx. 700us to produce 16 bytes of output data as per testing results. So configure the timeout as 1000us to also take account of lack of udelay()'s reliability. Fixes: 383212425c92 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") Cc: Signed-off-by: Sumit Garg Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/omap-rng.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index 25173454efa3..091753765d99 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -66,6 +66,13 @@ #define OMAP4_RNG_OUTPUT_SIZE 0x8 #define EIP76_RNG_OUTPUT_SIZE 0x10 +/* + * EIP76 RNG takes approx. 700us to produce 16 bytes of output data + * as per testing results. And to account for the lack of udelay()'s + * reliability, we keep the timeout as 1000us. + */ +#define RNG_DATA_FILL_TIMEOUT 100 + enum { RNG_OUTPUT_0_REG = 0, RNG_OUTPUT_1_REG, @@ -175,7 +182,7 @@ static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max, if (max < priv->pdata->data_size) return 0; - for (i = 0; i < 20; i++) { + for (i = 0; i < RNG_DATA_FILL_TIMEOUT; i++) { present = priv->pdata->data_present(priv); if (present || !wait) break; From 6b3a529808ab7d3a936d0253d4b3195262af8b83 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Wed, 6 Nov 2019 14:34:35 -0800 Subject: [PATCH 1634/3715] dm zoned: reduce overhead of backing device checks commit e7fad909b68aa37470d9f2d2731b5bec355ee5d6 upstream. Commit 75d66ffb48efb3 added backing device health checks and as a part of these checks, check_events() block ops template call is invoked in dm-zoned mapping path as well as in reclaim and flush path. Calling check_events() with ATA or SCSI backing devices introduces a blocking scsi_test_unit_ready() call being made in sd_check_events(). Even though the overhead of calling scsi_test_unit_ready() is small for ATA zoned devices, it is much larger for SCSI and it affects performance in a very negative way. Fix this performance regression by executing check_events() only in case of any I/O errors. The function dmz_bdev_is_dying() is modified to call only blk_queue_dying(), while calls to check_events() are made in a new helper function, dmz_check_bdev(). Reported-by: zhangxiaoxu Fixes: 75d66ffb48efb3 ("dm zoned: properly handle backing device failure") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 29 +++++++++++------- drivers/md/dm-zoned-reclaim.c | 8 ++--- drivers/md/dm-zoned-target.c | 54 ++++++++++++++++++++++++---------- drivers/md/dm-zoned.h | 2 ++ 4 files changed, 61 insertions(+), 32 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b322821a6323..9b78f4a74a12 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -552,6 +552,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { dmz_release_mblock(zmd, mblk); + dmz_check_bdev(zmd->dev); return ERR_PTR(-EIO); } @@ -623,6 +624,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, ret = submit_bio_wait(bio); bio_put(bio); + if (ret) + dmz_check_bdev(zmd->dev); return ret; } @@ -689,6 +692,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); + dmz_check_bdev(zmd->dev); ret = -EIO; } nr_mblks_submitted--; @@ -766,7 +770,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); - goto out; + goto err; } /* @@ -776,7 +780,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ ret = dmz_log_dirty_mblocks(zmd, &write_list); if (ret) - goto out; + goto err; /* * The log is on disk. It is now safe to update in place @@ -784,11 +788,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary); if (ret) - goto out; + goto err; ret = dmz_write_sb(zmd, zmd->mblk_primary); if (ret) - goto out; + goto err; while (!list_empty(&write_list)) { mblk = list_first_entry(&write_list, struct dmz_mblock, link); @@ -803,16 +807,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) zmd->sb_gen++; out: - if (ret && !list_empty(&write_list)) { - spin_lock(&zmd->mblk_lock); - list_splice(&write_list, &zmd->mblk_dirty_list); - spin_unlock(&zmd->mblk_lock); - } - dmz_unlock_flush(zmd); up_write(&zmd->mblk_sem); return ret; + +err: + if (!list_empty(&write_list)) { + spin_lock(&zmd->mblk_lock); + list_splice(&write_list, &zmd->mblk_dirty_list); + spin_unlock(&zmd->mblk_lock); + } + if (!dmz_check_bdev(zmd->dev)) + ret = -EIO; + goto out; } /* @@ -1235,6 +1243,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (ret) { dmz_dev_err(zmd->dev, "Get zone %u report failed", dmz_id(zmd, zone)); + dmz_check_bdev(zmd->dev); return ret; } diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index a9f84a998476..2fad512dce98 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -81,6 +81,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", dmz_id(zmd, zone), (unsigned long long)wp_block, (unsigned long long)block, nr_blocks, ret); + dmz_check_bdev(zrc->dev); return ret; } @@ -490,12 +491,7 @@ static void dmz_reclaim_work(struct work_struct *work) ret = dmz_do_reclaim(zrc); if (ret) { dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); - if (ret == -EIO) - /* - * LLD might be performing some error handling sequence - * at the underlying device. To not interfere, do not - * attempt to schedule the next reclaim run immediately. - */ + if (!dmz_check_bdev(zrc->dev)) return; } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 4694763f9d40..497a2bc5da51 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -79,6 +79,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) bio->bi_status = status; + if (bio->bi_status != BLK_STS_OK) + bioctx->target->dev->flags |= DMZ_CHECK_BDEV; if (atomic_dec_and_test(&bioctx->ref)) { struct dm_zone *zone = bioctx->zone; @@ -564,31 +566,51 @@ out: } /* - * Check the backing device availability. If it's on the way out, + * Check if the backing device is being removed. If it's on the way out, * start failing I/O. Reclaim and metadata components also call this * function to cleanly abort operation in the event of such failure. */ bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) { - struct gendisk *disk; + if (dmz_dev->flags & DMZ_BDEV_DYING) + return true; - if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { - disk = dmz_dev->bdev->bd_disk; - if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { - dmz_dev_warn(dmz_dev, "Backing device queue dying"); - dmz_dev->flags |= DMZ_BDEV_DYING; - } else if (disk->fops->check_events) { - if (disk->fops->check_events(disk, 0) & - DISK_EVENT_MEDIA_CHANGE) { - dmz_dev_warn(dmz_dev, "Backing device offline"); - dmz_dev->flags |= DMZ_BDEV_DYING; - } - } + if (dmz_dev->flags & DMZ_CHECK_BDEV) + return !dmz_check_bdev(dmz_dev); + + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; } return dmz_dev->flags & DMZ_BDEV_DYING; } +/* + * Check the backing device availability. This detects such events as + * backing device going offline due to errors, media removals, etc. + * This check is less efficient than dmz_bdev_is_dying() and should + * only be performed as a part of error handling. + */ +bool dmz_check_bdev(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + dmz_dev->flags &= ~DMZ_CHECK_BDEV; + + if (dmz_bdev_is_dying(dmz_dev)) + return false; + + disk = dmz_dev->bdev->bd_disk; + if (disk->fops->check_events && + disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + + return !(dmz_dev->flags & DMZ_BDEV_DYING); +} + /* * Process a new BIO. */ @@ -901,8 +923,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, { struct dmz_target *dmz = ti->private; - if (dmz_bdev_is_dying(dmz->dev)) - return -ENODEV; + if (!dmz_check_bdev(dmz->dev)) + return -EIO; *bdev = dmz->dev->bdev; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 93a64529f219..2662746ba8b9 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -71,6 +71,7 @@ struct dmz_dev { /* Device flags. */ #define DMZ_BDEV_DYING (1 << 0) +#define DMZ_CHECK_BDEV (2 << 0) /* * Zone descriptor. @@ -254,5 +255,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc); * Functions defined in dm-zoned-target.c */ bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); +bool dmz_check_bdev(struct dmz_dev *dmz_dev); #endif /* DM_ZONED_H */ From 05905c2f2123a80dd8bdeee7f3178303ec97d08f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Sep 2019 18:43:40 -0700 Subject: [PATCH 1635/3715] workqueue: Fix spurious sanity check failures in destroy_workqueue() commit def98c84b6cdf2eeea19ec5736e90e316df5206b upstream. Before actually destrying a workqueue, destroy_workqueue() checks whether it's actually idle. If it isn't, it prints out a bunch of warning messages and leaves the workqueue dangling. It unfortunately has a couple issues. * Mayday list queueing increments pwq's refcnts which gets detected as busy and fails the sanity checks. However, because mayday list queueing is asynchronous, this condition can happen without any actual work items left in the workqueue. * Sanity check failure leaves the sysfs interface behind too which can lead to init failure of newer instances of the workqueue. This patch fixes the above two by * If a workqueue has a rescuer, disable and kill the rescuer before sanity checks. Disabling and killing is guaranteed to flush the existing mayday list. * Remove sysfs interface before sanity checks. Signed-off-by: Tejun Heo Reported-by: Marcin Pawlowski Reported-by: "Williams, Gerald S" Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 08bc551976b2..7d51ef56882b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4084,9 +4084,28 @@ void destroy_workqueue(struct workqueue_struct *wq) struct pool_workqueue *pwq; int node; + /* + * Remove it from sysfs first so that sanity check failure doesn't + * lead to sysfs name conflicts. + */ + workqueue_sysfs_unregister(wq); + /* drain it before proceeding with destruction */ drain_workqueue(wq); + /* kill rescuer, if sanity checks fail, leave it w/o rescuer */ + if (wq->rescuer) { + struct worker *rescuer = wq->rescuer; + + /* this prevents new queueing */ + spin_lock_irq(&wq_mayday_lock); + wq->rescuer = NULL; + spin_unlock_irq(&wq_mayday_lock); + + /* rescuer will empty maydays list before exiting */ + kthread_stop(rescuer->task); + } + /* sanity checks */ mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) { @@ -4118,11 +4137,6 @@ void destroy_workqueue(struct workqueue_struct *wq) list_del_rcu(&wq->list); mutex_unlock(&wq_pool_mutex); - workqueue_sysfs_unregister(wq); - - if (wq->rescuer) - kthread_stop(wq->rescuer->task); - if (!(wq->flags & WQ_UNBOUND)) { /* * The base ref is never dropped on per-cpu pwqs. Directly From 9e0f33da9079a3e4e3acd5bdf163963b901d8484 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2019 06:59:15 -0700 Subject: [PATCH 1636/3715] workqueue: Fix pwq ref leak in rescuer_thread() commit e66b39af00f426b3356b96433d620cb3367ba1ff upstream. 008847f66c3 ("workqueue: allow rescuer thread to do more work.") made the rescuer worker requeue the pwq immediately if there may be more work items which need rescuing instead of waiting for the next mayday timer expiration. Unfortunately, it doesn't check whether the pwq is already on the mayday list and unconditionally gets the ref and moves it onto the list. This doesn't corrupt the list but creates an additional reference to the pwq. It got queued twice but will only be removed once. This leak later can trigger pwq refcnt warning on workqueue destruction and prevent freeing of the workqueue. Signed-off-by: Tejun Heo Cc: "Williams, Gerald S" Cc: NeilBrown Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7d51ef56882b..183ddc34fd54 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2366,8 +2366,14 @@ repeat: */ if (need_to_create_worker(pool)) { spin_lock(&wq_mayday_lock); - get_pwq(pwq); - list_move_tail(&pwq->mayday_node, &wq->maydays); + /* + * Queue iff we aren't racing destruction + * and somebody else hasn't queued it already. + */ + if (wq->rescuer && list_empty(&pwq->mayday_node)) { + get_pwq(pwq); + list_add_tail(&pwq->mayday_node, &wq->maydays); + } spin_unlock(&wq_mayday_lock); } } @@ -4413,7 +4419,8 @@ static void show_pwq(struct pool_workqueue *pwq) pr_info(" pwq %d:", pool->id); pr_cont_pool_info(pool); - pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active, + pr_cont(" active=%d/%d refcnt=%d%s\n", + pwq->nr_active, pwq->max_active, pwq->refcnt, !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); hash_for_each(pool->busy_hash, bkt, worker, hentry) { From 51fb33d0b2b1c9bb69e5612eae5b57837f767e71 Mon Sep 17 00:00:00 2001 From: Pawel Harlozinski Date: Tue, 12 Nov 2019 14:02:36 +0100 Subject: [PATCH 1637/3715] ASoC: Jack: Fix NULL pointer dereference in snd_soc_jack_report commit 8f157d4ff039e03e2ed4cb602eeed2fd4687a58f upstream. Check for existance of jack before tracing. NULL pointer dereference has been reported by KASAN while unloading machine driver (snd_soc_cnl_rt274). Signed-off-by: Pawel Harlozinski Link: https://lore.kernel.org/r/20191112130237.10141-1-pawel.harlozinski@linux.intel.com Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-jack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 99902ae1a2d9..b04ecc633da3 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -127,10 +127,9 @@ void snd_soc_jack_report(struct snd_soc_jack *jack, int status, int mask) unsigned int sync = 0; int enable; - trace_snd_soc_jack_report(jack, mask, status); - if (!jack) return; + trace_snd_soc_jack_report(jack, mask, status); dapm = &jack->card->dapm; From 365874a0eab5478d2d4f7b30e57bfc51dde7843c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 2 Nov 2019 16:02:15 +0800 Subject: [PATCH 1638/3715] blk-mq: avoid sysfs buffer overflow with too many CPU cores commit 8962842ca5abdcf98e22ab3b2b45a103f0408b95 upstream. It is reported that sysfs buffer overflow can be triggered if the system has too many CPU cores(>841 on 4K PAGE_SIZE) when showing CPUs of hctx via /sys/block/$DEV/mq/$N/cpu_list. Use snprintf to avoid the potential buffer overflow. This version doesn't change the attribute format, and simply stops showing CPU numbers if the buffer is going to overflow. Cc: stable@vger.kernel.org Fixes: 676141e48af7("blk-mq: don't dump CPU -> hw queue map on driver load") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-sysfs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 79969c3c234f..de733e8cab2e 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -145,20 +145,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) { + const size_t size = PAGE_SIZE - 1; unsigned int i, first = 1; - ssize_t ret = 0; + int ret = 0, pos = 0; for_each_cpu(i, hctx->cpumask) { if (first) - ret += sprintf(ret + page, "%u", i); + ret = snprintf(pos + page, size - pos, "%u", i); else - ret += sprintf(ret + page, ", %u", i); + ret = snprintf(pos + page, size - pos, ", %u", i); + + if (ret >= size - pos) + break; first = 0; + pos += ret; } - ret += sprintf(ret + page, "\n"); - return ret; + ret = snprintf(pos + page, size - pos, "\n"); + return pos + ret; } static struct attribute *default_ctx_attrs[] = { From fc992138b37d85ff426f7f0c4fb90d0eb2ec7fff Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 17 Oct 2019 02:50:01 +1100 Subject: [PATCH 1639/3715] cgroup: pids: use atomic64_t for pids->limit commit a713af394cf382a30dd28a1015cbe572f1b9ca75 upstream. Because pids->limit can be changed concurrently (but we don't want to take a lock because it would be needlessly expensive), use atomic64_ts instead. Fixes: commit 49b786ea146f ("cgroup: implement the PIDs subsystem") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/pids.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index c9960baaa14f..940d2e8db776 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -48,7 +48,7 @@ struct pids_cgroup { * %PIDS_MAX = (%PID_MAX_LIMIT + 1). */ atomic64_t counter; - int64_t limit; + atomic64_t limit; /* Handle for "pids.events" */ struct cgroup_file events_file; @@ -76,8 +76,8 @@ pids_css_alloc(struct cgroup_subsys_state *parent) if (!pids) return ERR_PTR(-ENOMEM); - pids->limit = PIDS_MAX; atomic64_set(&pids->counter, 0); + atomic64_set(&pids->limit, PIDS_MAX); atomic64_set(&pids->events_limit, 0); return &pids->css; } @@ -149,13 +149,14 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) for (p = pids; parent_pids(p); p = parent_pids(p)) { int64_t new = atomic64_add_return(num, &p->counter); + int64_t limit = atomic64_read(&p->limit); /* * Since new is capped to the maximum number of pid_t, if * p->limit is %PIDS_MAX then we know that this test will never * fail. */ - if (new > p->limit) + if (new > limit) goto revert; } @@ -280,7 +281,7 @@ set_limit: * Limit updates don't need to be mutex'd, since it isn't * critical that any racing fork()s follow the new limit. */ - pids->limit = limit; + atomic64_set(&pids->limit, limit); return nbytes; } @@ -288,7 +289,7 @@ static int pids_max_show(struct seq_file *sf, void *v) { struct cgroup_subsys_state *css = seq_css(sf); struct pids_cgroup *pids = css_pids(css); - int64_t limit = pids->limit; + int64_t limit = atomic64_read(&pids->limit); if (limit >= PIDS_MAX) seq_printf(sf, "%s\n", PIDS_MAX_STR); From 3557094f407c744e12b38fc4ddd5ee28b3ba1cbb Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 30 Sep 2019 23:31:47 +0300 Subject: [PATCH 1640/3715] ar5523: check NULL before memcpy() in ar5523_cmd() commit 315cee426f87658a6799815845788fde965ddaad upstream. memcpy() call with "idata == NULL && ilen == 0" results in undefined behavior in ar5523_cmd(). For example, NULL is passed in callchain "ar5523_stat_work() -> ar5523_cmd_write() -> ar5523_cmd()". This patch adds ilen check before memcpy() call in ar5523_cmd() to prevent an undefined behavior. Cc: Pontus Fuchs Cc: Kalle Valo Cc: "David S. Miller" Cc: David Laight Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ar5523/ar5523.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 68f0463ed8df..ad4a1efc57c9 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -255,7 +255,8 @@ static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata, if (flags & AR5523_CMD_FLAG_MAGIC) hdr->magic = cpu_to_be32(1 << 24); - memcpy(hdr + 1, idata, ilen); + if (ilen) + memcpy(hdr + 1, idata, ilen); cmd->odata = odata; cmd->olen = olen; From 6ff3dd95600168b1695d818fcf4cd3a092adfc95 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 11 Sep 2019 19:42:23 +0200 Subject: [PATCH 1641/3715] s390/mm: properly clear _PAGE_NOEXEC bit when it is not supported commit ab874f22d35a8058d8fdee5f13eb69d8867efeae upstream. On older HW or under a hypervisor, w/o the instruction-execution- protection (IEP) facility, and also w/o EDAT-1, a translation-specification exception may be recognized when bit 55 of a pte is one (_PAGE_NOEXEC). The current code tries to prevent setting _PAGE_NOEXEC in such cases, by removing it within set_pte_at(). However, ptep_set_access_flags() will modify a pte directly, w/o using set_pte_at(). There is at least one scenario where this can result in an active pte with _PAGE_NOEXEC set, which would then lead to a panic due to a translation-specification exception (write to swapped out page): do_swap_page pte = mk_pte (with _PAGE_NOEXEC bit) set_pte_at (will remove _PAGE_NOEXEC bit in page table, but keep it in local variable pte) vmf->orig_pte = pte (pte still contains _PAGE_NOEXEC bit) do_wp_page wp_page_reuse entry = vmf->orig_pte (still with _PAGE_NOEXEC bit) ptep_set_access_flags (writes entry with _PAGE_NOEXEC bit) Fix this by clearing _PAGE_NOEXEC already in mk_pte_phys(), where the pgprot value is applied, so that no pte with _PAGE_NOEXEC will ever be visible, if it is not supported. The check in set_pte_at() can then also be removed. Cc: # 4.11+ Fixes: 57d7f939e7bd ("s390: add no-execute support") Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d..328710b386e3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1126,8 +1126,6 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_NX) - pte_val(entry) &= ~_PAGE_NOEXEC; if (pte_present(entry)) pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) @@ -1144,6 +1142,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); + if (!MACHINE_HAS_NX) + pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); } From 721e7e7f3de9d00896ba902278ee4025a0ab58bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 10:13:31 -0300 Subject: [PATCH 1642/3715] media: bdisp: fix memleak on release commit 11609a7e21f8cea42630350aa57662928fa4dc63 upstream. If a process is interrupted while accessing the video device and the device lock is contended, release() could return early and fail to free related resources. Note that the return value of the v4l2 release file operation is ignored. Fixes: 28ffeebbb7bd ("[media] bdisp: 2D blitter driver using v4l2 mem2mem framework") Cc: stable # 4.2 Signed-off-by: Johan Hovold Reviewed-by: Fabien Dessenne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c index 939da6da7644..601ca2b2ecd3 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c +++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c @@ -651,8 +651,7 @@ static int bdisp_release(struct file *file) dev_dbg(bdisp->dev, "%s\n", __func__); - if (mutex_lock_interruptible(&bdisp->lock)) - return -ERESTARTSYS; + mutex_lock(&bdisp->lock); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); From 62b9c0f96f80bee929027b0ac23b240c7464fe73 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 10 Oct 2019 10:13:32 -0300 Subject: [PATCH 1643/3715] media: radio: wl1273: fix interrupt masking on release commit 1091eb830627625dcf79958d99353c2391f41708 upstream. If a process is interrupted while accessing the radio device and the core lock is contended, release() could return early and fail to update the interrupt mask. Note that the return value of the v4l2 release file operation is ignored. Fixes: 87d1a50ce451 ("[media] V4L2: WL1273 FM Radio: TI WL1273 FM radio driver") Cc: stable # 2.6.38 Cc: Matti Aaltonen Signed-off-by: Johan Hovold Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/radio/radio-wl1273.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 903fcd5e99c0..fc1ae86911b9 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -1156,8 +1156,7 @@ static int wl1273_fm_fops_release(struct file *file) if (radio->rds_users > 0) { radio->rds_users--; if (radio->rds_users == 0) { - if (mutex_lock_interruptible(&core->lock)) - return -EINTR; + mutex_lock(&core->lock); radio->irq_flags &= ~WL1273_RDS_EVENT; From 958d6e4dc61f9bff9fffd04c9cab70772a8bf4ee Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 16 Sep 2019 02:47:41 -0300 Subject: [PATCH 1644/3715] media: cec.h: CEC_OP_REC_FLAG_ values were swapped commit 806e0cdfee0b99efbb450f9f6e69deb7118602fc upstream. CEC_OP_REC_FLAG_NOT_USED is 0 and CEC_OP_REC_FLAG_USED is 1, not the other way around. Signed-off-by: Hans Verkuil Reported-by: Jiunn Chang Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/cec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index c3114c989e91..f50dd34e4f7b 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -789,8 +789,8 @@ struct cec_event { #define CEC_MSG_SELECT_DIGITAL_SERVICE 0x93 #define CEC_MSG_TUNER_DEVICE_STATUS 0x07 /* Recording Flag Operand (rec_flag) */ -#define CEC_OP_REC_FLAG_USED 0 -#define CEC_OP_REC_FLAG_NOT_USED 1 +#define CEC_OP_REC_FLAG_NOT_USED 0 +#define CEC_OP_REC_FLAG_USED 1 /* Tuner Display Info Operand (tuner_display_info) */ #define CEC_OP_TUNER_DISPLAY_INFO_DIGITAL 0 #define CEC_OP_TUNER_DISPLAY_INFO_NONE 1 From 9c4aca8b2a0dd01bfd4b06fc1b5513b158669c1f Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 23 Oct 2019 09:57:14 +0800 Subject: [PATCH 1645/3715] cpuidle: Do not unset the driver if it is there already commit 918c1fe9fbbe46fcf56837ff21f0ef96424e8b29 upstream. Fix __cpuidle_set_driver() to check if any of the CPUs in the mask has a driver different from drv already and, if so, return -EBUSY before updating any cpuidle_drivers per-CPU pointers. Fixes: 82467a5a885d ("cpuidle: simplify multiple driver support") Cc: 3.11+ # 3.11+ Signed-off-by: Zhenzhong Duan [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/driver.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index dc32f34e68d9..01acd88c4193 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -62,25 +62,24 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) * __cpuidle_set_driver - set per CPU driver variables for the given driver. * @drv: a valid pointer to a struct cpuidle_driver * - * For each CPU in the driver's cpumask, unset the registered driver per CPU - * to @drv. - * - * Returns 0 on success, -EBUSY if the CPUs have driver(s) already. + * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver + * different from drv already. */ static inline int __cpuidle_set_driver(struct cpuidle_driver *drv) { int cpu; for_each_cpu(cpu, drv->cpumask) { + struct cpuidle_driver *old_drv; - if (__cpuidle_get_cpu_driver(cpu)) { - __cpuidle_unset_driver(drv); + old_drv = __cpuidle_get_cpu_driver(cpu); + if (old_drv && old_drv != drv) return -EBUSY; - } - - per_cpu(cpuidle_drivers, cpu) = drv; } + for_each_cpu(cpu, drv->cpumask) + per_cpu(cpuidle_drivers, cpu) = drv; + return 0; } From 644d976f0b0032abc548a8a02e145988f33de47b Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 20 Nov 2019 15:08:04 +0200 Subject: [PATCH 1646/3715] intel_th: Fix a double put_device() in error path commit 512592779a337feb5905d8fcf9498dbf33672d4a upstream. Commit a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices") factored out intel_th_subdevice_alloc() from intel_th_populate(), but got the error path wrong, resulting in two instances of a double put_device() on a freshly initialized, but not 'added' device. Fix this by only doing one put_device() in the error path. Signed-off-by: Alexander Shishkin Fixes: a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices") Reported-by: Wen Yang Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org # v4.14+ Link: https://lore.kernel.org/r/20191120130806.44028-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/core.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index 757801d27604..6a451b4fc04d 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -628,10 +628,8 @@ intel_th_subdevice_alloc(struct intel_th *th, } err = intel_th_device_add_resources(thdev, res, subdev->nres); - if (err) { - put_device(&thdev->dev); + if (err) goto fail_put_device; - } if (subdev->type == INTEL_TH_OUTPUT) { thdev->dev.devt = MKDEV(th->major, th->num_thdevs); @@ -644,10 +642,8 @@ intel_th_subdevice_alloc(struct intel_th *th, } err = device_add(&thdev->dev); - if (err) { - put_device(&thdev->dev); + if (err) goto fail_free_res; - } /* need switch driver to be loaded to enumerate the rest */ if (subdev->type == INTEL_TH_SWITCH && !req) { From 735ef8110f629226d61f393e8254bef46e69d6c2 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 20 Nov 2019 15:08:05 +0200 Subject: [PATCH 1647/3715] intel_th: pci: Add Ice Lake CPU support commit 6a1743422a7c0fda26764a544136cac13e5ae486 upstream. This adds support for the Trace Hub in Ice Lake CPU. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191120130806.44028-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 7486d5d67186..8cdd23506ba9 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -193,6 +193,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Ice Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8a29), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Tiger Lake PCH */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), From 638327101f3e022ab4edf496f0cc8ee1c5e197e6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 20 Nov 2019 15:08:06 +0200 Subject: [PATCH 1648/3715] intel_th: pci: Add Tiger Lake CPU support commit 6e6c18bcb78c0dc0601ebe216bed12c844492d0c upstream. This adds support for the Trace Hub in Tiger Lake CPU. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191120130806.44028-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 8cdd23506ba9..c224b92a80f1 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -198,6 +198,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8a29), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Tiger Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9a33), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Tiger Lake PCH */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), From 19c513e62cc6a8b7adb8b9a98c6289bceddbd692 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Sep 2019 10:52:23 +0300 Subject: [PATCH 1649/3715] PM / devfreq: Lock devfreq in trans_stat_show commit 2abb0d5268ae7b5ddf82099b1f8d5aa8414637d4 upstream. There is no locking in this sysfs show function so stats printing can race with a devfreq_update_status called as part of freq switching or with initialization. Also add an assert in devfreq_update_status to make it clear that lock must be held by caller. Fixes: 39688ce6facd ("PM / devfreq: account suspend/resume for stats") Cc: stable@vger.kernel.org Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 8a411514a7c5..dc9c0032c97b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -133,6 +133,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) int lev, prev_lev, ret = 0; unsigned long cur_time; + lockdep_assert_held(&devfreq->lock); cur_time = jiffies; /* Immediately exit if previous_freq is not initialized yet. */ @@ -1161,12 +1162,17 @@ static ssize_t trans_stat_show(struct device *dev, int i, j; unsigned int max_state = devfreq->profile->max_state; - if (!devfreq->stop_polling && - devfreq_update_status(devfreq, devfreq->previous_freq)) - return 0; if (max_state == 0) return sprintf(buf, "Not Supported.\n"); + mutex_lock(&devfreq->lock); + if (!devfreq->stop_polling && + devfreq_update_status(devfreq, devfreq->previous_freq)) { + mutex_unlock(&devfreq->lock); + return 0; + } + mutex_unlock(&devfreq->lock); + len = sprintf(buf, " From : To\n"); len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) From 6dd08f64d4aec82510820f9359b8e6d37ef61728 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 30 Oct 2019 22:21:59 -0700 Subject: [PATCH 1650/3715] cpufreq: powernv: fix stack bloat and hard limit on number of CPUs commit db0d32d84031188443e25edbd50a71a6e7ac5d1d upstream. The following build warning occurred on powerpc 64-bit builds: drivers/cpufreq/powernv-cpufreq.c: In function 'init_chip_info': drivers/cpufreq/powernv-cpufreq.c:1070:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] This is with a cross-compiler based on gcc 8.1.0, which I got from: https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/8.1.0/ The warning is due to putting 1024 bytes on the stack: unsigned int chip[256]; ...and it's also undesirable to have a hard limit on the number of CPUs here. Fix both problems by dynamically allocating based on num_possible_cpus, as recommended by Michael Ellerman. Fixes: 053819e0bf840 ("cpufreq: powernv: Handle throttling due to Pmax capping at chip level") Signed-off-by: John Hubbard Acked-by: Viresh Kumar Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/powernv-cpufreq.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index a28bb8f3f395..33854bf127f9 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1002,9 +1002,14 @@ static struct cpufreq_driver powernv_cpufreq_driver = { static int init_chip_info(void) { - unsigned int chip[256]; + unsigned int *chip; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + int ret = 0; + + chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; for_each_possible_cpu(cpu) { unsigned int id = cpu_to_chip_id(cpu); @@ -1016,8 +1021,10 @@ static int init_chip_info(void) } chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); - if (!chips) - return -ENOMEM; + if (!chips) { + ret = -ENOMEM; + goto free_and_return; + } for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; @@ -1027,7 +1034,9 @@ static int init_chip_info(void) per_cpu(chip_info, cpu) = &chips[i]; } - return 0; +free_and_return: + kfree(chip); + return ret; } static inline void clean_chip_info(void) From 9c996bfa0802a7d69387efc27e16dacdaa4fa227 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Tue, 19 Nov 2019 21:47:27 -0800 Subject: [PATCH 1651/3715] ACPI: OSL: only free map once in osl.c commit 833a426cc471b6088011b3d67f1dc4e147614647 upstream. acpi_os_map_cleanup checks map->refcount outside of acpi_ioremap_lock before freeing the map. This creates a race condition the can result in the map being freed more than once. A panic can be caused by running for ((i=0; i<10; i++)) do for ((j=0; j<100000; j++)) do cat /sys/firmware/acpi/tables/data/BERT >/dev/null done & done This patch makes sure that only the process that drops the reference to 0 does the freeing. Fixes: b7c1fadd6c2e ("ACPI: Do not use krefs under a mutex in osl.c") Signed-off-by: Francesco Ruggeri Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9da7e7d874bd..ff36b0101ff0 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -371,19 +371,21 @@ void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) } EXPORT_SYMBOL_GPL(acpi_os_map_memory); -static void acpi_os_drop_map_ref(struct acpi_ioremap *map) +/* Must be called with mutex_lock(&acpi_ioremap_lock) */ +static unsigned long acpi_os_drop_map_ref(struct acpi_ioremap *map) { - if (!--map->refcount) + unsigned long refcount = --map->refcount; + + if (!refcount) list_del_rcu(&map->list); + return refcount; } static void acpi_os_map_cleanup(struct acpi_ioremap *map) { - if (!map->refcount) { - synchronize_rcu_expedited(); - acpi_unmap(map->phys, map->virt); - kfree(map); - } + synchronize_rcu_expedited(); + acpi_unmap(map->phys, map->virt); + kfree(map); } /** @@ -403,6 +405,7 @@ static void acpi_os_map_cleanup(struct acpi_ioremap *map) void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; + unsigned long refcount; if (!acpi_permanent_mmap) { __acpi_unmap_table(virt, size); @@ -416,10 +419,11 @@ void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size) WARN(true, PREFIX "%s: bad address %p\n", __func__, virt); return; } - acpi_os_drop_map_ref(map); + refcount = acpi_os_drop_map_ref(map); mutex_unlock(&acpi_ioremap_lock); - acpi_os_map_cleanup(map); + if (!refcount) + acpi_os_map_cleanup(map); } EXPORT_SYMBOL_GPL(acpi_os_unmap_iomem); @@ -454,6 +458,7 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas) { u64 addr; struct acpi_ioremap *map; + unsigned long refcount; if (gas->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) return; @@ -469,10 +474,11 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas) mutex_unlock(&acpi_ioremap_lock); return; } - acpi_os_drop_map_ref(map); + refcount = acpi_os_drop_map_ref(map); mutex_unlock(&acpi_ioremap_lock); - acpi_os_map_cleanup(map); + if (!refcount) + acpi_os_map_cleanup(map); } EXPORT_SYMBOL(acpi_os_unmap_generic_address); From a136f412dbac4718137a05f24d9bd6906e53301a Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Thu, 28 Nov 2019 15:58:29 +0530 Subject: [PATCH 1652/3715] ACPI: bus: Fix NULL pointer check in acpi_bus_get_private_data() commit 627ead724eff33673597216f5020b72118827de4 upstream. kmemleak reported backtrace: [] kmem_cache_alloc_trace+0x128/0x260 [<6677f215>] i2c_acpi_install_space_handler+0x4b/0xe0 [<1180f4fc>] i2c_register_adapter+0x186/0x400 [<6083baf7>] i2c_add_adapter+0x4e/0x70 [] intel_gmbus_setup+0x1a2/0x2c0 [i915] [<84cb69ae>] i915_driver_probe+0x8d8/0x13a0 [i915] [<81911d4b>] i915_pci_probe+0x48/0x160 [i915] [<4b159af1>] pci_device_probe+0xdc/0x160 [] really_probe+0x1ee/0x450 [] driver_probe_device+0x142/0x1b0 [] device_driver_attach+0x49/0x50 [] __driver_attach+0xc9/0x150 [] bus_for_each_dev+0x56/0xa0 [<80089bba>] driver_attach+0x19/0x20 [] bus_add_driver+0x177/0x220 [<7b29d8c7>] driver_register+0x56/0xf0 In i2c_acpi_remove_space_handler(), a leak occurs whenever the "data" parameter is initialized to 0 before being passed to acpi_bus_get_private_data(). This is because the NULL pointer check in acpi_bus_get_private_data() (condition->if(!*data)) returns EINVAL and, in consequence, memory is never freed in i2c_acpi_remove_space_handler(). Fix the NULL pointer check in acpi_bus_get_private_data() to follow the analogous check in acpi_get_data_full(). Signed-off-by: Vamshi K Sthambamkadi [ rjw: Subject & changelog ] Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index f0348e388d01..1cb7c6a52f61 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -196,7 +196,7 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data) { acpi_status status; - if (!*data) + if (!data) return -EINVAL; status = acpi_get_data(handle, acpi_bus_private_data_handler, data); From 63d22de1f7aa3b8a588d53f3a5ca0f4fec1cc8af Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Dec 2019 02:54:27 +0100 Subject: [PATCH 1653/3715] ACPI: PM: Avoid attaching ACPI PM domain to certain devices commit b9ea0bae260f6aae546db224daa6ac1bd9d94b91 upstream. Certain ACPI-enumerated devices represented as platform devices in Linux, like fans, require special low-level power management handling implemented by their drivers that is not in agreement with the ACPI PM domain behavior. That leads to problems with managing ACPI fans during system-wide suspend and resume. For this reason, make acpi_dev_pm_attach() skip the affected devices by adding a list of device IDs to avoid to it and putting the IDs of the affected devices into that list. Fixes: e5cc8ef31267 (ACPI / PM: Provide ACPI PM callback routines for subsystems) Reported-by: Zhang Rui Tested-by: Todd Brandt Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 18af71057b44..fc300ce3ae8e 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1154,9 +1154,19 @@ static void acpi_dev_pm_detach(struct device *dev, bool power_off) */ int acpi_dev_pm_attach(struct device *dev, bool power_on) { + /* + * Skip devices whose ACPI companions match the device IDs below, + * because they require special power management handling incompatible + * with the generic ACPI PM domain. + */ + static const struct acpi_device_id special_pm_ids[] = { + {"PNP0C0B", }, /* Generic ACPI fan */ + {"INT3404", }, /* Fan */ + {} + }; struct acpi_device *adev = ACPI_COMPANION(dev); - if (!adev) + if (!adev || !acpi_match_device_ids(adev, special_pm_ids)) return -ENODEV; if (dev->pm_domain) From d892c97851235087932113d7fefd818484ceba5a Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Sun, 4 Aug 2019 21:32:00 +0530 Subject: [PATCH 1654/3715] pinctrl: samsung: Add of_node_put() before return in error path commit 3d2557ab75d4c568c79eefa2e550e0d80348a6bd upstream. Each iteration of for_each_child_of_node puts the previous node, but in the case of a return from the middle of the loop, there is no put, thus causing a memory leak. Hence add an of_node_put before the return of exynos_eint_wkup_init() error path. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Cc: Fixes: 14c255d35b25 ("pinctrl: exynos: Add irq_chip instance for Exynos7 wakeup interrupts") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-exynos.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index c8d0de7ea160..1c534d823fd7 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -467,8 +467,10 @@ int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) if (match) { irq_chip = kmemdup(match->data, sizeof(*irq_chip), GFP_KERNEL); - if (!irq_chip) + if (!irq_chip) { + of_node_put(np); return -ENOMEM; + } wkup_np = np; break; } From a5958149d5eaac72e78304a7e3d501eb59b4ee7f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Aug 2019 18:27:08 +0200 Subject: [PATCH 1655/3715] pinctrl: samsung: Fix device node refcount leaks in S3C24xx wakeup controller init commit 6fbbcb050802d6ea109f387e961b1dbcc3a80c96 upstream. In s3c24xx_eint_init() the for_each_child_of_node() loop is used with a break to find a matching child node. Although each iteration of for_each_child_of_node puts the previous node, but early exit from loop misses it. This leads to leak of device node. Cc: Fixes: af99a7507469 ("pinctrl: Add pinctrl-s3c24xx driver") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-s3c24xx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 67da1cf18b68..46b1a9b2238b 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -495,8 +495,10 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d) return -ENODEV; eint_data = devm_kzalloc(dev, sizeof(*eint_data), GFP_KERNEL); - if (!eint_data) + if (!eint_data) { + of_node_put(eint_np); return -ENOMEM; + } eint_data->drvdata = d; @@ -508,12 +510,14 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d) irq = irq_of_parse_and_map(eint_np, i); if (!irq) { dev_err(dev, "failed to get wakeup EINT IRQ %d\n", i); + of_node_put(eint_np); return -ENXIO; } eint_data->parents[i] = irq; irq_set_chained_handler_and_data(irq, handlers[i], eint_data); } + of_node_put(eint_np); bank = d->pin_banks; for (i = 0; i < d->nr_banks; ++i, ++bank) { From 3708cad610607c7b5f1d618940eb6742744385b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Aug 2019 18:27:10 +0200 Subject: [PATCH 1656/3715] pinctrl: samsung: Fix device node refcount leaks in init code commit a322b3377f4bac32aa25fb1acb9e7afbbbbd0137 upstream. Several functions use for_each_child_of_node() loop with a break to find a matching child node. Although each iteration of for_each_child_of_node puts the previous node, but early exit from loop misses it. This leads to leak of device node. Cc: Fixes: 9a2c1c3b91aa ("pinctrl: samsung: Allow grouping multiple pinmux/pinconf nodes") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-samsung.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 26e8fab736f1..7c0f5d4e89f3 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -277,6 +277,7 @@ static int samsung_dt_node_to_map(struct pinctrl_dev *pctldev, &reserved_maps, num_maps); if (ret < 0) { samsung_dt_free_map(pctldev, *map, *num_maps); + of_node_put(np); return ret; } } @@ -761,8 +762,10 @@ static struct samsung_pmx_func *samsung_pinctrl_create_functions( if (!of_get_child_count(cfg_np)) { ret = samsung_pinctrl_create_function(dev, drvdata, cfg_np, func); - if (ret < 0) + if (ret < 0) { + of_node_put(cfg_np); return ERR_PTR(ret); + } if (ret > 0) { ++func; ++func_cnt; @@ -773,8 +776,11 @@ static struct samsung_pmx_func *samsung_pinctrl_create_functions( for_each_child_of_node(cfg_np, func_np) { ret = samsung_pinctrl_create_function(dev, drvdata, func_np, func); - if (ret < 0) + if (ret < 0) { + of_node_put(func_np); + of_node_put(cfg_np); return ERR_PTR(ret); + } if (ret > 0) { ++func; ++func_cnt; From 05ee4d44389663446ba94af894d0df183f25d67c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Aug 2019 18:27:09 +0200 Subject: [PATCH 1657/3715] pinctrl: samsung: Fix device node refcount leaks in S3C64xx wakeup controller init commit 7f028caadf6c37580d0f59c6c094ed09afc04062 upstream. In s3c64xx_eint_eint0_init() the for_each_child_of_node() loop is used with a break to find a matching child node. Although each iteration of for_each_child_of_node puts the previous node, but early exit from loop misses it. This leads to leak of device node. Cc: Fixes: 61dd72613177 ("pinctrl: Add pinctrl-s3c64xx driver") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-s3c64xx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index 0bdc1e683181..cf3a3af82321 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -709,8 +709,10 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) return -ENODEV; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) + if (!data) { + of_node_put(eint0_np); return -ENOMEM; + } data->drvdata = d; for (i = 0; i < NUM_EINT0_IRQ; ++i) { @@ -719,6 +721,7 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) irq = irq_of_parse_and_map(eint0_np, i); if (!irq) { dev_err(dev, "failed to get wakeup EINT IRQ %d\n", i); + of_node_put(eint0_np); return -ENXIO; } @@ -726,6 +729,7 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) s3c64xx_eint0_handlers[i], data); } + of_node_put(eint0_np); bank = d->pin_banks; for (i = 0; i < d->nr_banks; ++i, ++bank) { From c8838dba5ad776273f8472c2e760dda1973f0e42 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:37 +0100 Subject: [PATCH 1658/3715] mmc: host: omap_hsmmc: add code for special init of wl1251 to get rid of pandora_wl1251_init_card commit f6498b922e57aecbe3b7fa30a308d9d586c0c369 upstream. Pandora_wl1251_init_card was used to do special pdata based setup of the sdio mmc interface. This does no longer work with v4.7 and later. A fix requires a device tree based mmc3 setup. Therefore we move the special setup to omap_hsmmc.c instead of calling some pdata supplied init_card function. The new code checks for a DT child node compatible to wl1251 so it will not affect other MMC3 use cases. Generally, this code was and still is a hack and should be moved to mmc core to e.g. read such properties from optional DT child nodes. Fixes: 81eef6ca9201 ("mmc: omap_hsmmc: Use dma_request_chan() for requesting DMA channel") Signed-off-by: H. Nikolaus Schaller Cc: # v4.7+ [Ulf: Fixed up some checkpatch complaints] Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap_hsmmc.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3f3ff7530b76..ea12712bd2c3 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1678,6 +1678,36 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card) if (mmc_pdata(host)->init_card) mmc_pdata(host)->init_card(card); + else if (card->type == MMC_TYPE_SDIO || + card->type == MMC_TYPE_SD_COMBO) { + struct device_node *np = mmc_dev(mmc)->of_node; + + /* + * REVISIT: should be moved to sdio core and made more + * general e.g. by expanding the DT bindings of child nodes + * to provide a mechanism to provide this information: + * Documentation/devicetree/bindings/mmc/mmc-card.txt + */ + + np = of_get_compatible_child(np, "ti,wl1251"); + if (np) { + /* + * We have TI wl1251 attached to MMC3. Pass this + * information to the SDIO core because it can't be + * probed by normal methods. + */ + + dev_info(host->dev, "found wl1251\n"); + card->quirks |= MMC_QUIRK_NONSTD_SDIO; + card->cccr.wide_bus = 1; + card->cis.vendor = 0x104c; + card->cis.device = 0x9066; + card->cis.blksize = 512; + card->cis.max_dtr = 24000000; + card->ocr = 0x80; + of_node_put(np); + } + } } static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) From 822e1ba45f4879184fc17b4a0246da71d771f7e3 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Sat, 16 Nov 2019 17:16:51 +0200 Subject: [PATCH 1659/3715] ARM: dts: omap3-tao3530: Fix incorrect MMC card detection GPIO polarity commit 287897f9aaa2ad1c923d9875914f57c4dc9159c8 upstream. The MMC card detection GPIO polarity is active low on TAO3530, like in many other similar boards. Now the card is not detected and it is unable to mount rootfs from an SD card. Fix this by using the correct polarity. This incorrect polarity was defined already in the commit 30d95c6d7092 ("ARM: dts: omap3: Add Technexion TAO3530 SOM omap3-tao3530.dtsi") in v3.18 kernel and later changed to use defined GPIO constants in v4.4 kernel by the commit 3a637e008e54 ("ARM: dts: Use defined GPIO constants in flags cell for OMAP2+ boards"). While the latter commit did not introduce the issue I'm marking it with Fixes tag due the v4.4 kernels still being maintained. Fixes: 3a637e008e54 ("ARM: dts: Use defined GPIO constants in flags cell for OMAP2+ boards") Cc: linux-stable # 4.4+ Signed-off-by: Jarkko Nikula Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap3-tao3530.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi index 9a601d15247b..5b7bda74752b 100644 --- a/arch/arm/boot/dts/omap3-tao3530.dtsi +++ b/arch/arm/boot/dts/omap3-tao3530.dtsi @@ -224,7 +224,7 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; vqmmc-supply = <&vsim>; - cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; bus-width = <8>; }; From 5a963f7589ba8f2af0089608e414059196f92dd7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 21:34:30 +0100 Subject: [PATCH 1660/3715] ppdev: fix PPGETTIME/PPSETTIME ioctls commit 998174042da229e2cf5841f574aba4a743e69650 upstream. Going through the uses of timeval in the user space API, I noticed two bugs in ppdev that were introduced in the y2038 conversion: * The range check was accidentally moved from ppsettime to ppgettime * On sparc64, the microseconds are in the other half of the 64-bit word. Fix both, and mark the fix for stable backports. Cc: stable@vger.kernel.org Fixes: 3b9ab374a1e6 ("ppdev: convert to y2038 safe") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20191108203435.112759-8-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/char/ppdev.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index d256110ba672..0023bde4d4ff 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -623,20 +623,27 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(time32, argp, sizeof(time32))) return -EFAULT; + if ((time32[0] < 0) || (time32[1] < 0)) + return -EINVAL; + return pp_set_timeout(pp->pdev, time32[0], time32[1]); case PPSETTIME64: if (copy_from_user(time64, argp, sizeof(time64))) return -EFAULT; + if ((time64[0] < 0) || (time64[1] < 0)) + return -EINVAL; + + if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall()) + time64[1] >>= 32; + return pp_set_timeout(pp->pdev, time64[0], time64[1]); case PPGETTIME32: jiffies_to_timespec64(pp->pdev->timeout, &ts); time32[0] = ts.tv_sec; time32[1] = ts.tv_nsec / NSEC_PER_USEC; - if ((time32[0] < 0) || (time32[1] < 0)) - return -EINVAL; if (copy_to_user(argp, time32, sizeof(time32))) return -EFAULT; @@ -647,8 +654,9 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) jiffies_to_timespec64(pp->pdev->timeout, &ts); time64[0] = ts.tv_sec; time64[1] = ts.tv_nsec / NSEC_PER_USEC; - if ((time64[0] < 0) || (time64[1] < 0)) - return -EINVAL; + + if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall()) + time64[1] <<= 32; if (copy_to_user(argp, time64, sizeof(time64))) return -EFAULT; From 6ff419d70d9f5bd65b3c59334acdc4e01466069b Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:54 +1100 Subject: [PATCH 1661/3715] powerpc: Allow 64bit VDSO __kernel_sync_dicache to work across ranges >4GB commit f9ec11165301982585e5e5f606739b5bae5331f3 upstream. When calling __kernel_sync_dicache with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-3-alastair@au1.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/vdso64/cacheflush.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 69c5af2b3c96..228a4a2383d6 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -39,7 +39,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -56,7 +56,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 From 9aeaa898f580ed9a20de0235430149a26da65d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 31 Oct 2019 07:31:00 +0100 Subject: [PATCH 1662/3715] powerpc/xive: Prevent page fault issues in the machine crash handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1ca3dec2b2dff9d286ce6cd64108bda0e98f9710 upstream. When the machine crash handler is invoked, all interrupts are masked but interrupts which have not been started yet do not have an ESB page mapped in the Linux address space. This crashes the 'crash kexec' sequence on sPAPR guests. To fix, force the mapping of the ESB page when an interrupt is being mapped in the Linux IRQ number space. This is done by setting the initial state of the interrupt to OFF which is not necessarily the case on PowerNV. Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031063100.3864-1-clg@kaod.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xive/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 110d8bb16ebb..a820370883d9 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -967,6 +967,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) xd->target = XIVE_INVALID_TARGET; irq_set_handler_data(virq, xd); + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + return 0; } From 0cea5de4dbad9f821efaab09699ac36950719933 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:53 +1100 Subject: [PATCH 1663/3715] powerpc: Allow flush_icache_range to work across ranges >4GB commit 29430fae82073d39b1b881a3cd507416a56a363f upstream. When calling flush_icache_range with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-2-alastair@au1.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 09af857ca099..afe086f48e7c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -86,7 +86,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 @@ -102,7 +102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 2: icbi 0,r6 From 3a4c1a8952b19e2d6c39cce5de13c157cd0b8f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 3 Dec 2019 17:36:42 +0100 Subject: [PATCH 1664/3715] powerpc/xive: Skip ioremap() of ESB pages for LSI interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b67a95f2abff0c34e5667c15ab8900de73d8d087 upstream. The PCI INTx interrupts and other LSI interrupts are handled differently under a sPAPR platform. When the interrupt source characteristics are queried, the hypervisor returns an H_INT_ESB flag to inform the OS that it should be using the H_INT_ESB hcall for interrupt management and not loads and stores on the interrupt ESB pages. A default -1 value is returned for the addresses of the ESB pages. The driver ignores this condition today and performs a bogus IO mapping. Recent changes and the DEBUG_VM configuration option make the bug visible with : kernel BUG at arch/powerpc/include/asm/book3s/64/pgtable.h:612! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=1024 NUMA pSeries Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.4.0-0.rc6.git0.1.fc32.ppc64le #1 NIP: c000000000f63294 LR: c000000000f62e44 CTR: 0000000000000000 REGS: c0000000fa45f0d0 TRAP: 0700 Not tainted (5.4.0-0.rc6.git0.1.fc32.ppc64le) ... NIP ioremap_page_range+0x4c4/0x6e0 LR ioremap_page_range+0x74/0x6e0 Call Trace: ioremap_page_range+0x74/0x6e0 (unreliable) do_ioremap+0x8c/0x120 __ioremap_caller+0x128/0x140 ioremap+0x30/0x50 xive_spapr_populate_irq_data+0x170/0x260 xive_irq_domain_map+0x8c/0x170 irq_domain_associate+0xb4/0x2d0 irq_create_mapping+0x1e0/0x3b0 irq_create_fwspec_mapping+0x27c/0x3e0 irq_create_of_mapping+0x98/0xb0 of_irq_parse_and_map_pci+0x168/0x230 pcibios_setup_device+0x88/0x250 pcibios_setup_bus_devices+0x54/0x100 __of_scan_bus+0x160/0x310 pcibios_scan_phb+0x330/0x390 pcibios_init+0x8c/0x128 do_one_initcall+0x60/0x2c0 kernel_init_freeable+0x290/0x378 kernel_init+0x2c/0x148 ret_from_kernel_thread+0x5c/0x80 Fixes: bed81ee181dd ("powerpc/xive: introduce H_INT_ESB hcall") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Cédric Le Goater Tested-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191203163642.2428-1-clg@kaod.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xive/spapr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 091f1d0d0af1..7fc41bf30fd5 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -293,20 +293,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->esb_shift = esb_shift; data->trig_page = trig_page; + data->hw_irq = hw_irq; + /* * No chip-id for the sPAPR backend. This has an impact how we * pick a target. See xive_pick_irq_target(). */ data->src_chip = XIVE_INVALID_CHIP_ID; + /* + * When the H_INT_ESB flag is set, the H_INT_ESB hcall should + * be used for interrupt management. Skip the remapping of the + * ESB pages which are not available. + */ + if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB) + return 0; + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); if (!data->eoi_mmio) { pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); return -ENOMEM; } - data->hw_irq = hw_irq; - /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; From 772098e8b6999656b9b28d78f52f80b224556c06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Sep 2019 16:28:53 +0300 Subject: [PATCH 1665/3715] video/hdmi: Fix AVI bar unpack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6039f37dd6b76641198e290f26b31c475248f567 upstream. The bar values are little endian, not big endian. The pack function did it right but the unpack got it wrong. Fix it. Cc: stable@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Martin Bugge Cc: Hans Verkuil Cc: Thierry Reding Cc: Mauro Carvalho Chehab Fixes: 2c676f378edb ("[media] hdmi: added unpack and logging functions for InfoFrames") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190919132853.30954-1-ville.syrjala@linux.intel.com Reviewed-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/video/hdmi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 111a0ab6280a..ce7c4a269f77 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -1036,12 +1036,12 @@ static int hdmi_avi_infoframe_unpack(struct hdmi_avi_infoframe *frame, if (ptr[0] & 0x10) frame->active_aspect = ptr[1] & 0xf; if (ptr[0] & 0x8) { - frame->top_bar = (ptr[5] << 8) + ptr[6]; - frame->bottom_bar = (ptr[7] << 8) + ptr[8]; + frame->top_bar = (ptr[6] << 8) | ptr[5]; + frame->bottom_bar = (ptr[8] << 8) | ptr[7]; } if (ptr[0] & 0x4) { - frame->left_bar = (ptr[9] << 8) + ptr[10]; - frame->right_bar = (ptr[11] << 8) + ptr[12]; + frame->left_bar = (ptr[10] << 8) | ptr[9]; + frame->right_bar = (ptr[12] << 8) | ptr[11]; } frame->scan_mode = ptr[0] & 0x3; From f780a35182bf0c37668f734d2bbf8e5dd63d8713 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 31 Oct 2019 10:39:20 +0000 Subject: [PATCH 1666/3715] quota: Check that quota is not dirty before release commit df4bb5d128e2c44848aeb36b7ceceba3ac85080d upstream. There is a race window where quota was redirted once we drop dq_list_lock inside dqput(), but before we grab dquot->dq_lock inside dquot_release() TASK1 TASK2 (chowner) ->dqput() we_slept: spin_lock(&dq_list_lock) if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->write_dquot(dquot); goto we_slept if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); dqget() mark_dquot_dirty() dqput() goto we_slept; } So dquot dirty quota will be released by TASK1, but on next we_sleept loop we detect this and call ->write_dquot() for it. XFSTEST: https://github.com/dmonakhov/xfstests/commit/440a80d4cbb39e9234df4d7240aee1d551c36107 Link: https://lore.kernel.org/r/20191031103920.3919-2-dmonakhov@openvz.org CC: stable@vger.kernel.org Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/quota_global.c | 2 +- fs/quota/dquot.c | 2 +- include/linux/quotaops.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b39d14cbfa34..d212d09c00b1 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -727,7 +727,7 @@ static int ocfs2_release_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ - if (atomic_read(&dquot->dq_count) > 1) + if (dquot_is_busy(dquot)) goto out; /* Running from downconvert thread? Postpone quota processing to wq */ if (current == osb->dc_task) { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9c81fd973418..9d40265ccd7d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -491,7 +491,7 @@ int dquot_release(struct dquot *dquot) mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ - if (atomic_read(&dquot->dq_count) > 1) + if (dquot_is_busy(dquot)) goto out_dqlock; if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a109e6107c06..50ab5d6ccc4e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -51,6 +51,16 @@ static inline struct dquot *dqgrab(struct dquot *dquot) atomic_inc(&dquot->dq_count); return dquot; } + +static inline bool dquot_is_busy(struct dquot *dquot) +{ + if (test_bit(DQ_MOD_B, &dquot->dq_flags)) + return true; + if (atomic_read(&dquot->dq_count) > 1) + return true; + return false; +} + void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), From f4845e598a438c5a168084adbfe6312b5da4a2f7 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 5 Nov 2019 12:51:00 +0800 Subject: [PATCH 1667/3715] ext2: check err when partial != NULL commit e705f4b8aa27a59f8933e8f384e9752f052c469c upstream. Check err when partial == NULL is meaningless because partial == NULL means getting branch successfully without error. CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191105045100.7104-1-cgxu519@mykernel.net Signed-off-by: Chengguang Xu Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index a7c87d593083..31c5a7b5f1f3 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -699,10 +699,13 @@ static int ext2_get_blocks(struct inode *inode, if (!partial) { count++; mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; goto got_it; } + + if (err) { + mutex_unlock(&ei->truncate_mutex); + goto cleanup; + } } /* From 17da23ef719d36495b60372d552933dedca4baba Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 31 Oct 2019 10:39:19 +0000 Subject: [PATCH 1668/3715] quota: fix livelock in dquot_writeback_dquots commit 6ff33d99fc5c96797103b48b7b0902c296f09c05 upstream. Write only quotas which are dirty at entry. XFSTEST: https://github.com/dmonakhov/xfstests/commit/b10ad23566a5bf75832a6f500e1236084083cddc Link: https://lore.kernel.org/r/20191031103920.3919-1-dmonakhov@openvz.org CC: stable@vger.kernel.org Signed-off-by: Konstantin Khlebnikov Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9d40265ccd7d..3254c90fd899 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -617,7 +617,7 @@ EXPORT_SYMBOL(dquot_scan_active); /* Write all dquot structures to quota files */ int dquot_writeback_dquots(struct super_block *sb, int type) { - struct list_head *dirty; + struct list_head dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; @@ -631,9 +631,10 @@ int dquot_writeback_dquots(struct super_block *sb, int type) if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); - dirty = &dqopt->info[cnt].dqi_dirty_list; - while (!list_empty(dirty)) { - dquot = list_first_entry(dirty, struct dquot, + /* Move list away to avoid livelock. */ + list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty); + while (!list_empty(&dirty)) { + dquot = list_first_entry(&dirty, struct dquot, dq_dirty); WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)); From 2c21619f86370aed2c8fb93b485ce082b1e4d646 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:12 +0100 Subject: [PATCH 1669/3715] ext4: Fix credit estimate for final inode freeing commit 65db869c754e7c271691dd5feabf884347e694f5 upstream. Estimate for the number of credits needed for final freeing of inode in ext4_evict_inode() was to small. We may modify 4 blocks (inode & sb for orphan deletion, bitmap & group descriptor for inode freeing) and not just 3. [ Fixed minor whitespace nit. -- TYT ] Fixes: e50e5129f384 ("ext4: xattr-in-inode support") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-6-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b3d5fd84b485..77130af378f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -195,7 +195,12 @@ void ext4_evict_inode(struct inode *inode) { handle_t *handle; int err; - int extra_credits = 3; + /* + * Credits for final inode cleanup and freeing: + * sb + inode (ext4_orphan_del()), block bitmap, group descriptor + * (xattr block freeing), bitmap, group descriptor (inode freeing) + */ + int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; trace_ext4_evict_inode(inode); @@ -251,8 +256,12 @@ void ext4_evict_inode(struct inode *inode) if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); + /* + * Block bitmap, group descriptor, and inode are accounted in both + * ext4_blocks_for_truncate() and extra_credits. So subtract 3. + */ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, - ext4_blocks_for_truncate(inode)+extra_credits); + ext4_blocks_for_truncate(inode) + extra_credits - 3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* From 7f2e7e222144061d10e777311f327af7b73653f8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 24 Oct 2019 10:31:27 -0400 Subject: [PATCH 1670/3715] reiserfs: fix extended attributes on the root directory commit 60e4cf67a582d64f07713eda5fcc8ccdaf7833e6 upstream. Since commit d0a5b995a308 (vfs: Add IOP_XATTR inode operations flag) extended attributes haven't worked on the root directory in reiserfs. This is due to reiserfs conditionally setting the sb->s_xattrs handler array depending on whether it located or create the internal privroot directory. It necessarily does this after the root inode is already read in. The IOP_XATTR flag is set during inode initialization, so it never gets set on the root directory. This commit unconditionally assigns sb->s_xattrs and clears IOP_XATTR on internal inodes. The old return values due to the conditional assignment are handled via open_xa_root, which now returns EOPNOTSUPP as the VFS would have done. Link: https://lore.kernel.org/r/20191024143127.17509-1-jeffm@suse.com CC: stable@vger.kernel.org Fixes: d0a5b995a308 ("vfs: Add IOP_XATTR inode operations flag") Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/inode.c | 12 ++++++++++-- fs/reiserfs/namei.c | 7 +++++-- fs/reiserfs/reiserfs.h | 2 ++ fs/reiserfs/super.c | 2 ++ fs/reiserfs/xattr.c | 19 ++++++++++++------- fs/reiserfs/xattr_acl.c | 4 +--- 6 files changed, 32 insertions(+), 14 deletions(-) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 11a48affa882..683496322aa8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2096,6 +2096,15 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, goto out_inserted_sd; } + /* + * Mark it private if we're creating the privroot + * or something under it. + */ + if (IS_PRIVATE(dir) || dentry == REISERFS_SB(sb)->priv_root) { + inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } + if (reiserfs_posixacl(inode->i_sb)) { reiserfs_write_unlock(inode->i_sb); retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); @@ -2110,8 +2119,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, reiserfs_warning(inode->i_sb, "jdm-13090", "ACLs aren't enabled in the fs, " "but vfs thinks they are!"); - } else if (IS_PRIVATE(dir)) - inode->i_flags |= S_PRIVATE; + } if (security->name) { reiserfs_write_unlock(inode->i_sb); diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5089dac02660..14ba7a12b89d 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -377,10 +377,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, /* * Propagate the private flag so we know we're - * in the priv tree + * in the priv tree. Also clear IOP_XATTR + * since we don't have xattrs on xattr files. */ - if (IS_PRIVATE(dir)) + if (IS_PRIVATE(dir)) { inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } } reiserfs_write_unlock(dir->i_sb); if (retval == IO_ERROR) { diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index eabf85371ece..0efe7c7c4124 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1168,6 +1168,8 @@ static inline int bmap_would_wrap(unsigned bmap_nr) return bmap_nr > ((1LL << 16) - 1); } +extern const struct xattr_handler *reiserfs_xattr_handlers[]; + /* * this says about version of key of all items (but stat data) the * object consists of diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 4885c7b6e44f..cc0b22c72e83 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2052,6 +2052,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (replay_only(s)) goto error_unlocked; + s->s_xattr = reiserfs_xattr_handlers; + if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 83423192588c..29a0c0969e91 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -122,13 +122,13 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags) struct dentry *xaroot; if (d_really_is_negative(privroot)) - return ERR_PTR(-ENODATA); + return ERR_PTR(-EOPNOTSUPP); inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR); xaroot = dget(REISERFS_SB(sb)->xattr_root); if (!xaroot) - xaroot = ERR_PTR(-ENODATA); + xaroot = ERR_PTR(-EOPNOTSUPP); else if (d_really_is_negative(xaroot)) { int err = -ENODATA; @@ -610,6 +610,10 @@ int reiserfs_xattr_set(struct inode *inode, const char *name, int error, error2; size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); + /* Check before we start a transaction and then do nothing. */ + if (!d_really_is_positive(REISERFS_SB(inode->i_sb)->priv_root)) + return -EOPNOTSUPP; + if (!(flags & XATTR_REPLACE)) jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); @@ -832,8 +836,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (d_really_is_negative(dentry)) return -EINVAL; - if (!dentry->d_sb->s_xattr || - get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) + if (get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; dir = open_xa_dir(d_inode(dentry), XATTR_REPLACE); @@ -873,6 +876,7 @@ static int create_privroot(struct dentry *dentry) } d_inode(dentry)->i_flags |= S_PRIVATE; + d_inode(dentry)->i_opflags &= ~IOP_XATTR; reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " "storage.\n", PRIVROOT_NAME); @@ -886,7 +890,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } #endif /* Actual operations that are exported to VFS-land */ -static const struct xattr_handler *reiserfs_xattr_handlers[] = { +const struct xattr_handler *reiserfs_xattr_handlers[] = { #ifdef CONFIG_REISERFS_FS_XATTR &reiserfs_xattr_user_handler, &reiserfs_xattr_trusted_handler, @@ -957,8 +961,10 @@ int reiserfs_lookup_privroot(struct super_block *s) if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; d_set_d_op(dentry, &xattr_lookup_poison_ops); - if (d_really_is_positive(dentry)) + if (d_really_is_positive(dentry)) { d_inode(dentry)->i_flags |= S_PRIVATE; + d_inode(dentry)->i_opflags &= ~IOP_XATTR; + } } else err = PTR_ERR(dentry); inode_unlock(d_inode(s->s_root)); @@ -987,7 +993,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) } if (d_really_is_positive(privroot)) { - s->s_xattr = reiserfs_xattr_handlers; inode_lock(d_inode(privroot)); if (!REISERFS_SB(s)->xattr_root) { struct dentry *dentry; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index aa9380bac196..05f666794561 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -320,10 +320,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, * would be useless since permissions are ignored, and a pain because * it introduces locking cycles */ - if (IS_PRIVATE(dir)) { - inode->i_flags |= S_PRIVATE; + if (IS_PRIVATE(inode)) goto apply_umask; - } err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (err) From 9fb95b97ee1492d9e0d3e3b911d39b741f394acd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 1 Dec 2018 00:38:18 +0800 Subject: [PATCH 1671/3715] block: fix single range discard merge commit 2a5cf35cd6c56b2924bce103413ad3381bdc31fa upstream. There are actually two kinds of discard merge: - one is the normal discard merge, just like normal read/write request, and call it single-range discard - another is the multi-range discard, queue_max_discard_segments(rq->q) > 1 For the former case, queue_max_discard_segments(rq->q) is 1, and we should handle this kind of discard merge like the normal read/write request. This patch fixes the following kernel panic issue[1], which is caused by not removing the single-range discard request from elevator queue. Guangwu has one raid discard test case, in which this issue is a bit easier to trigger, and I verified that this patch can fix the kernel panic issue in Guangwu's test case. [1] kernel panic log from Jens's report BUG: unable to handle kernel NULL pointer dereference at 0000000000000148 PGD 0 P4D 0. Oops: 0000 [#1] SMP PTI CPU: 37 PID: 763 Comm: kworker/37:1H Not tainted \ 4.20.0-rc3-00649-ge64d9a554a91-dirty #14 Hardware name: Wiwynn \ Leopard-Orv2/Leopard-DDR BW, BIOS LBM08 03/03/2017 Workqueue: kblockd \ blk_mq_run_work_fn RIP: \ 0010:blk_mq_get_driver_tag+0x81/0x120 Code: 24 \ 10 48 89 7c 24 20 74 21 83 fa ff 0f 95 c0 48 8b 4c 24 28 65 48 33 0c 25 28 00 00 00 \ 0f 85 96 00 00 00 48 83 c4 30 5b 5d c3 <48> 8b 87 48 01 00 00 8b 40 04 39 43 20 72 37 \ f6 87 b0 00 00 00 02 RSP: 0018:ffffc90004aabd30 EFLAGS: 00010246 \ RAX: 0000000000000003 RBX: ffff888465ea1300 RCX: ffffc90004aabde8 RDX: 00000000ffffffff RSI: ffffc90004aabde8 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888465ea1348 R09: 0000000000000000 R10: 0000000000001000 R11: 00000000ffffffff R12: ffff888465ea1300 R13: 0000000000000000 R14: ffff888465ea1348 R15: ffff888465d10000 FS: 0000000000000000(0000) GS:ffff88846f9c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000148 CR3: 000000000220a003 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: blk_mq_dispatch_rq_list+0xec/0x480 ? elv_rb_del+0x11/0x30 blk_mq_do_dispatch_sched+0x6e/0xf0 blk_mq_sched_dispatch_requests+0xfa/0x170 __blk_mq_run_hw_queue+0x5f/0xe0 process_one_work+0x154/0x350 worker_thread+0x46/0x3c0 kthread+0xf5/0x130 ? process_one_work+0x350/0x350 ? kthread_destroy_worker+0x50/0x50 ret_from_fork+0x1f/0x30 Modules linked in: sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel \ kvm switchtec irqbypass iTCO_wdt iTCO_vendor_support efivars cdc_ether usbnet mii \ cdc_acm i2c_i801 lpc_ich mfd_core ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq \ button sch_fq_codel nfsd nfs_acl lockd grace auth_rpcgss oid_registry sunrpc nvme \ nvme_core fuse sg loop efivarfs autofs4 CR2: 0000000000000148 \ ---[ end trace 340a1fb996df1b9b ]--- RIP: 0010:blk_mq_get_driver_tag+0x81/0x120 Code: 24 10 48 89 7c 24 20 74 21 83 fa ff 0f 95 c0 48 8b 4c 24 28 65 48 33 0c 25 28 \ 00 00 00 0f 85 96 00 00 00 48 83 c4 30 5b 5d c3 <48> 8b 87 48 01 00 00 8b 40 04 39 43 \ 20 72 37 f6 87 b0 00 00 00 02 Fixes: 445251d0f4d329a ("blk-mq: fix discard merge with scheduler attached") Reported-by: Jens Axboe Cc: Guangwu Zhang Cc: Christoph Hellwig Cc: Jianchao Wang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Cc: Andre Tomt Cc: Jack Wang Signed-off-by: Greg Kroah-Hartman --- block/blk-merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 94650cdf2924..f61b50a01bc7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -765,7 +765,7 @@ static struct request *attempt_merge(struct request_queue *q, req->__data_len += blk_rq_bytes(next); - if (req_op(req) != REQ_OP_DISCARD) + if (!blk_discard_mergable(req)) elv_merge_requests(q, req, next); /* From f0297a3cb3eb6d271c052962e094392ba9c86d4d Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 25 Oct 2019 18:12:53 +0200 Subject: [PATCH 1672/3715] scsi: zfcp: trace channel log even for FCP command responses [ Upstream commit 100843f176109af94600e500da0428e21030ca7f ] While v2.6.26 commit b75db73159cc ("[SCSI] zfcp: Add qtcb dump to hba debug trace") is right that we don't want to flood the (payload) trace ring buffer, we don't trace successful FCP command responses by default. So we can include the channel log for problem determination with failed responses of any FSF request type. Fixes: b75db73159cc ("[SCSI] zfcp: Add qtcb dump to hba debug trace") Fixes: a54ca0f62f95 ("[SCSI] zfcp: Redesign of the debug tracing for HBA records.") Cc: #2.6.38+ Link: https://lore.kernel.org/r/e37597b5c4ae123aaa85fd86c23a9f71e994e4a9.1572018132.git.bblock@linux.ibm.com Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/s390/scsi/zfcp_dbf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 599447032e50..bc6c1d6a1c42 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -94,11 +94,9 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req) memcpy(rec->u.res.fsf_status_qual, &q_head->fsf_status_qual, FSF_STATUS_QUALIFIER_SIZE); - if (req->fsf_command != FSF_QTCB_FCP_CMND) { - rec->pl_len = q_head->log_length; - zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start, - rec->pl_len, "fsf_res", req->req_id); - } + rec->pl_len = q_head->log_length; + zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start, + rec->pl_len, "fsf_res", req->req_id); debug_event(dbf->hba, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->hba_lock, flags); From 890140f2d4b3e111c99f3d97ca3c165db806f76c Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 26 Jul 2019 09:07:26 -0700 Subject: [PATCH 1673/3715] scsi: qla2xxx: Fix DMA unmap leak [ Upstream commit 5d328de64d89400dcf9911125844d8adc0db697f ] With debug kernel we see following wanings indicating memory leak. [28809.523959] WARNING: CPU: 3 PID: 6790 at lib/dma-debug.c:978 dma_debug_device_change+0x166/0x1d0 [28809.523964] pci 0000:0c:00.6: DMA-API: device driver has pending DMA allocations while released from device [count=5] [28809.523964] One of leaked entries details: [device address=0x00000002aefe4000] [size=8208 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as coherent] Fix this by unmapping DMA memory. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_bsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 7472d3882ad4..6f8c7df69f66 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -342,6 +342,8 @@ qla2x00_process_els(struct bsg_job *bsg_job) dma_map_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list, bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE); if (!req_sg_cnt) { + dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list, + bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE); rval = -ENOMEM; goto done_free_fcport; } @@ -349,6 +351,8 @@ qla2x00_process_els(struct bsg_job *bsg_job) rsp_sg_cnt = dma_map_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list, bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE); if (!rsp_sg_cnt) { + dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list, + bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE); rval = -ENOMEM; goto done_free_fcport; } From a5caef09ebace942a12b0e17476ffb1ff2e9c784 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 8 Aug 2019 20:01:40 -0700 Subject: [PATCH 1674/3715] scsi: qla2xxx: Fix session lookup in qlt_abort_work() [ Upstream commit ac452b8e79320c9e90c78edf32ba2d42431e4daf ] Pass the correct session ID to find_sess_by_s_id() instead of passing an uninitialized variable. Cc: Himanshu Madhani Fixes: 2d70c103fd2a ("[SCSI] qla2xxx: Add LLD target-mode infrastructure for >= 24xx series") # v3.5. Signed-off-by: Bart Van Assche Tested-by: Himanshu Madhani Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 11753ed3433c..2f5658554275 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5918,7 +5918,6 @@ static void qlt_abort_work(struct qla_tgt *tgt, struct qla_hw_data *ha = vha->hw; struct fc_port *sess = NULL; unsigned long flags = 0, flags2 = 0; - uint32_t be_s_id; uint8_t s_id[3]; int rc; @@ -5931,8 +5930,7 @@ static void qlt_abort_work(struct qla_tgt *tgt, s_id[1] = prm->abts.fcp_hdr_le.s_id[1]; s_id[2] = prm->abts.fcp_hdr_le.s_id[0]; - sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, - (unsigned char *)&be_s_id); + sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id); if (!sess) { spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); From 940774ffa457e09aac3508a515285c1ccc3144a8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 8 Aug 2019 20:01:48 -0700 Subject: [PATCH 1675/3715] scsi: qla2xxx: Fix qla24xx_process_bidir_cmd() [ Upstream commit c29282c65d1cf54daeea63be46243d7f69d72f4d ] Set the r??_data_len variables before using these instead of after. This patch fixes the following Coverity complaint: const: At condition req_data_len != rsp_data_len, the value of req_data_len must be equal to 0. const: At condition req_data_len != rsp_data_len, the value of rsp_data_len must be equal to 0. dead_error_condition: The condition req_data_len != rsp_data_len cannot be true. Cc: Himanshu Madhani Fixes: a9b6f722f62d ("[SCSI] qla2xxx: Implementation of bidirectional.") # v3.7. Signed-off-by: Bart Van Assche Tested-by: Himanshu Madhani Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_bsg.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 6f8c7df69f66..c1ca21a88a09 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -1782,8 +1782,8 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) uint16_t nextlid = 0; uint32_t tot_dsds; srb_t *sp = NULL; - uint32_t req_data_len = 0; - uint32_t rsp_data_len = 0; + uint32_t req_data_len; + uint32_t rsp_data_len; /* Check the type of the adapter */ if (!IS_BIDI_CAPABLE(ha)) { @@ -1888,6 +1888,9 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) goto done_unmap_sg; } + req_data_len = bsg_job->request_payload.payload_len; + rsp_data_len = bsg_job->reply_payload.payload_len; + if (req_data_len != rsp_data_len) { rval = EXT_STATUS_BUSY; ql_log(ql_log_warn, vha, 0x70aa, @@ -1895,10 +1898,6 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) goto done_unmap_sg; } - req_data_len = bsg_job->request_payload.payload_len; - rsp_data_len = bsg_job->reply_payload.payload_len; - - /* Alloc SRB structure */ sp = qla2x00_get_sp(vha, &(vha->bidir_fcport), GFP_KERNEL); if (!sp) { From 88a18e5f69e5cb1e072343585bd1ef5233ab1e49 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 8 Aug 2019 20:01:52 -0700 Subject: [PATCH 1676/3715] scsi: qla2xxx: Always check the qla2x00_wait_for_hba_online() return value [ Upstream commit e6803efae5acd109fad9f2f07dab674563441a53 ] This patch fixes several Coverity complaints about not always checking the qla2x00_wait_for_hba_online() return value. Cc: Himanshu Madhani Signed-off-by: Bart Van Assche Tested-by: Himanshu Madhani Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_attr.c | 3 ++- drivers/scsi/qla2xxx/qla_target.c | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 1844c2f59460..656253285db9 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -652,7 +652,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, break; } else { /* Make sure FC side is not in reset */ - qla2x00_wait_for_hba_online(vha); + WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) != + QLA_SUCCESS); /* Issue MPI reset */ scsi_block_requests(vha->host); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2f5658554275..69ed544d80ef 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6394,7 +6394,8 @@ qlt_enable_vha(struct scsi_qla_host *vha) } else { set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); qla2xxx_wake_dpc(base_vha); - qla2x00_wait_for_hba_online(base_vha); + WARN_ON_ONCE(qla2x00_wait_for_hba_online(base_vha) != + QLA_SUCCESS); } } EXPORT_SYMBOL(qlt_enable_vha); @@ -6424,7 +6425,9 @@ static void qlt_disable_vha(struct scsi_qla_host *vha) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); - qla2x00_wait_for_hba_online(vha); + if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) + ql_dbg(ql_dbg_tgt, vha, 0xe081, + "qla2x00_wait_for_hba_online() failed\n"); } /* From 4b4a166ce8b96199c9812af094a631e8ad2d8eb7 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 30 Aug 2019 15:23:57 -0700 Subject: [PATCH 1677/3715] scsi: qla2xxx: Fix message indicating vectors used by driver [ Upstream commit da48b82425b8bf999fb9f7c220e967c4d661b5f8 ] This patch updates log message which indicates number of vectors used by the driver instead of displaying failure to get maximum requested vectors. Driver will always request maximum vectors during initialization. In the event driver is not able to get maximum requested vectors, it will adjust the allocated vectors. This is normal and does not imply failure in driver. Signed-off-by: Himanshu Madhani Reviewed-by: Ewan D. Milne Reviewed-by: Lee Duncan Link: https://lore.kernel.org/r/20190830222402.23688-2-hmadhani@marvell.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_isr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 6a76d7217515..ebca1a470e9b 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3369,10 +3369,8 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) ha->msix_count, ret); goto msix_out; } else if (ret < ha->msix_count) { - ql_log(ql_log_warn, vha, 0x00c6, - "MSI-X: Failed to enable support " - "with %d vectors, using %d vectors.\n", - ha->msix_count, ret); + ql_log(ql_log_info, vha, 0x00c6, + "MSI-X: Using %d vectors\n", ret); ha->msix_count = ret; /* Recalculate queue values */ if (ha->mqiobase && ql2xmqsupport) { From 1fbaac5f1827570565dd0373190e75bde878c854 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 11 Dec 2019 16:20:02 +0200 Subject: [PATCH 1678/3715] xhci: Fix memory leak in xhci_add_in_port() [ Upstream commit ce91f1a43b37463f517155bdfbd525eb43adbd1a ] When xHCI is part of Alpine or Titan Ridge Thunderbolt controller and the xHCI device is hot-removed as a result of unplugging a dock for example, the driver leaks memory it allocates for xhci->usb3_rhub.psi and xhci->usb2_rhub.psi in xhci_add_in_port() as reported by kmemleak: unreferenced object 0xffff922c24ef42f0 (size 16): comm "kworker/u16:2", pid 178, jiffies 4294711640 (age 956.620s) hex dump (first 16 bytes): 21 00 0c 00 12 00 dc 05 23 00 e0 01 00 00 00 00 !.......#....... backtrace: [<000000007ac80914>] xhci_mem_init+0xcf8/0xeb7 [<0000000001b6d775>] xhci_init+0x7c/0x160 [<00000000db443fe3>] xhci_gen_setup+0x214/0x340 [<00000000fdffd320>] xhci_pci_setup+0x48/0x110 [<00000000541e1e03>] usb_add_hcd.cold+0x265/0x747 [<00000000ca47a56b>] usb_hcd_pci_probe+0x219/0x3b4 [<0000000021043861>] xhci_pci_probe+0x24/0x1c0 [<00000000b9231f25>] local_pci_probe+0x3d/0x70 [<000000006385c9d7>] pci_device_probe+0xd0/0x150 [<0000000070241068>] really_probe+0xf5/0x3c0 [<0000000061f35c0a>] driver_probe_device+0x58/0x100 [<000000009da11198>] bus_for_each_drv+0x79/0xc0 [<000000009ce45f69>] __device_attach+0xda/0x160 [<00000000df201aaf>] pci_bus_add_device+0x46/0x70 [<0000000088a1bc48>] pci_bus_add_devices+0x27/0x60 [<00000000ad9ee708>] pci_bus_add_devices+0x52/0x60 unreferenced object 0xffff922c24ef3318 (size 8): comm "kworker/u16:2", pid 178, jiffies 4294711640 (age 956.620s) hex dump (first 8 bytes): 34 01 05 00 35 41 0a 00 4...5A.. backtrace: [<000000007ac80914>] xhci_mem_init+0xcf8/0xeb7 [<0000000001b6d775>] xhci_init+0x7c/0x160 [<00000000db443fe3>] xhci_gen_setup+0x214/0x340 [<00000000fdffd320>] xhci_pci_setup+0x48/0x110 [<00000000541e1e03>] usb_add_hcd.cold+0x265/0x747 [<00000000ca47a56b>] usb_hcd_pci_probe+0x219/0x3b4 [<0000000021043861>] xhci_pci_probe+0x24/0x1c0 [<00000000b9231f25>] local_pci_probe+0x3d/0x70 [<000000006385c9d7>] pci_device_probe+0xd0/0x150 [<0000000070241068>] really_probe+0xf5/0x3c0 [<0000000061f35c0a>] driver_probe_device+0x58/0x100 [<000000009da11198>] bus_for_each_drv+0x79/0xc0 [<000000009ce45f69>] __device_attach+0xda/0x160 [<00000000df201aaf>] pci_bus_add_device+0x46/0x70 [<0000000088a1bc48>] pci_bus_add_devices+0x27/0x60 [<00000000ad9ee708>] pci_bus_add_devices+0x52/0x60 Fix this by calling kfree() for the both psi objects in xhci_mem_cleanup(). Cc: # 4.4+ Fixes: 47189098f8be ("xhci: parse xhci protocol speed ID list for usb 3.1 usage") Signed-off-by: Mika Westerberg Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b7b55eb82714..a80a57decda1 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1866,10 +1866,14 @@ no_bw: kfree(xhci->port_array); kfree(xhci->rh_bw); kfree(xhci->ext_caps); + kfree(xhci->usb2_rhub.psi); + kfree(xhci->usb3_rhub.psi); xhci->usb2_ports = NULL; xhci->usb3_ports = NULL; xhci->port_array = NULL; + xhci->usb2_rhub.psi = NULL; + xhci->usb3_rhub.psi = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; From d12f592f95c956813e513a9f443f1375f5c73b28 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 11 Dec 2019 16:20:07 +0200 Subject: [PATCH 1679/3715] xhci: make sure interrupts are restored to correct state [ Upstream commit bd82873f23c9a6ad834348f8b83f3b6a5bca2c65 ] spin_unlock_irqrestore() might be called with stale flags after reading port status, possibly restoring interrupts to a incorrect state. If a usb2 port just finished resuming while the port status is read the spin lock will be temporary released and re-acquired in a separate function. The flags parameter is passed as value instead of a pointer, not updating flags properly before the final spin_unlock_irqrestore() is called. Cc: # v3.12+ Fixes: 8b3d45705e54 ("usb: Fix xHCI host issues on remote wakeup.") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-7-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-hub.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 997ff183c9cb..95503bb9b067 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -855,7 +855,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, struct xhci_bus_state *bus_state, __le32 __iomem **port_array, u16 wIndex, u32 raw_port_status, - unsigned long flags) + unsigned long *flags) __releases(&xhci->lock) __acquires(&xhci->lock) { @@ -937,12 +937,12 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); - spin_unlock_irqrestore(&xhci->lock, flags); + spin_unlock_irqrestore(&xhci->lock, *flags); time_left = wait_for_completion_timeout( &bus_state->rexit_done[wIndex], msecs_to_jiffies( XHCI_MAX_REXIT_TIMEOUT_MS)); - spin_lock_irqsave(&xhci->lock, flags); + spin_lock_irqsave(&xhci->lock, *flags); if (time_left) { slot_id = xhci_find_slot_id_by_port(hcd, @@ -1090,7 +1090,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; } status = xhci_get_port_status(hcd, bus_state, port_array, - wIndex, temp, flags); + wIndex, temp, &flags); if (status == 0xffffffff) goto error; From fe09dc3e5168d8b967a9e6fc66cfb421cacc6f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 28 Oct 2019 17:33:49 +0100 Subject: [PATCH 1680/3715] iio: adis16480: Add debugfs_reg_access entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4c35b7a51e2f291471f7221d112c6a45c63e83bc ] The driver is defining debugfs entries by calling `adis16480_debugfs_init()`. However, those entries are attached to the iio_dev debugfs entry which won't exist if no debugfs_reg_access callback is provided. Fixes: 2f3abe6cbb6c ("iio:imu: Add support for the ADIS16480 and similar IMUs") Signed-off-by: Nuno Sá Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/imu/adis16480.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 6f975538996c..c950aa10d0ae 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -724,6 +724,7 @@ static const struct iio_info adis16480_info = { .write_raw = &adis16480_write_raw, .update_scan_mode = adis_update_scan_mode, .driver_module = THIS_MODULE, + .debugfs_reg_access = adis_debugfs_reg_access, }; static int adis16480_stop_device(struct iio_dev *indio_dev) From 96dd9ce3d01630998200162864f256de495057cd Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 7 Oct 2019 16:55:10 +0900 Subject: [PATCH 1681/3715] phy: renesas: rcar-gen3-usb2: Fix sysfs interface of "role" [ Upstream commit 4bd5ead82d4b877ebe41daf95f28cda53205b039 ] Since the role_store() uses strncmp(), it's possible to refer out-of-memory if the sysfs data size is smaller than strlen("host"). This patch fixes it by using sysfs_streq() instead of strncmp(). Reported-by: Pavel Machek Fixes: 9bb86777fb71 ("phy: rcar-gen3-usb2: add sysfs for usb role swap") Cc: # v4.10+ Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Acked-by: Pavel Machek Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 7f5e36bfeee8..f8c7ce89d8d7 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /******* USB2.0 Host registers (original offset is +0x200) *******/ @@ -234,9 +235,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, */ is_b_device = rcar_gen3_check_id(ch); is_host = rcar_gen3_is_host(ch); - if (!strncmp(buf, "host", strlen("host"))) + if (sysfs_streq(buf, "host")) new_mode_is_host = true; - else if (!strncmp(buf, "peripheral", strlen("peripheral"))) + else if (sysfs_streq(buf, "peripheral")) new_mode_is_host = false; else return -EINVAL; From 0f3c518936e275eeb4a5608a171350bbd0b74ae5 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:39 +0100 Subject: [PATCH 1682/3715] omap: pdata-quirks: remove openpandora quirks for mmc3 and wl1251 [ Upstream commit 2398c41d64321e62af54424fd399964f3d48cdc2 ] With a wl1251 child node of mmc3 in the device tree decoded in omap_hsmmc.c to handle special wl1251 initialization, we do no longer need to instantiate the mmc3 through pdata quirks. We also can remove the wlan regulator and reset/interrupt definitions and do them through device tree. Fixes: 81eef6ca9201 ("mmc: omap_hsmmc: Use dma_request_chan() for requesting DMA channel") Signed-off-by: H. Nikolaus Schaller Cc: # v4.7+ Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/pdata-quirks.c | 93 ------------------------------ 1 file changed, 93 deletions(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 6b433fce65a5..2477f6086de4 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -307,108 +307,15 @@ static void __init omap3_logicpd_torpedo_init(void) } /* omap3pandora legacy devices */ -#define PANDORA_WIFI_IRQ_GPIO 21 -#define PANDORA_WIFI_NRESET_GPIO 23 static struct platform_device pandora_backlight = { .name = "pandora-backlight", .id = -1, }; -static struct regulator_consumer_supply pandora_vmmc3_supply[] = { - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"), -}; - -static struct regulator_init_data pandora_vmmc3 = { - .constraints = { - .valid_ops_mask = REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply), - .consumer_supplies = pandora_vmmc3_supply, -}; - -static struct fixed_voltage_config pandora_vwlan = { - .supply_name = "vwlan", - .microvolts = 1800000, /* 1.8V */ - .gpio = PANDORA_WIFI_NRESET_GPIO, - .startup_delay = 50000, /* 50ms */ - .enable_high = 1, - .init_data = &pandora_vmmc3, -}; - -static struct platform_device pandora_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 1, - .dev = { - .platform_data = &pandora_vwlan, - }, -}; - -static void pandora_wl1251_init_card(struct mmc_card *card) -{ - /* - * We have TI wl1251 attached to MMC3. Pass this information to - * SDIO core because it can't be probed by normal methods. - */ - if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - card->quirks |= MMC_QUIRK_NONSTD_SDIO; - card->cccr.wide_bus = 1; - card->cis.vendor = 0x104c; - card->cis.device = 0x9066; - card->cis.blksize = 512; - card->cis.max_dtr = 24000000; - card->ocr = 0x80; - } -} - -static struct omap2_hsmmc_info pandora_mmc3[] = { - { - .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .gpio_cd = -EINVAL, - .gpio_wp = -EINVAL, - .init_card = pandora_wl1251_init_card, - }, - {} /* Terminator */ -}; - -static void __init pandora_wl1251_init(void) -{ - struct wl1251_platform_data pandora_wl1251_pdata; - int ret; - - memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - - pandora_wl1251_pdata.power_gpio = -1; - - ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); - if (ret < 0) - goto fail; - - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); - if (pandora_wl1251_pdata.irq < 0) - goto fail_irq; - - pandora_wl1251_pdata.use_eeprom = true; - ret = wl1251_set_platform_data(&pandora_wl1251_pdata); - if (ret < 0) - goto fail_irq; - - return; - -fail_irq: - gpio_free(PANDORA_WIFI_IRQ_GPIO); -fail: - pr_err("wl1251 board initialisation failed\n"); -} - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - platform_device_register(&pandora_vwlan_device); - omap_hsmmc_init(pandora_mmc3); - omap_hsmmc_late_init(pandora_mmc3); - pandora_wl1251_init(); } #endif /* CONFIG_ARCH_OMAP3 */ From 3bfe0a6aa251c88aa439e8a09fc68a858d6eecc9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 29 Nov 2018 16:09:33 -0800 Subject: [PATCH 1683/3715] scsi: lpfc: Cap NPIV vports to 256 [ Upstream commit 8b47ae69e049ae0b3373859d901f0334322f9fe9 ] Depending on the chipset, the number of NPIV vports may vary and be in excess of what most switches support (256). To avoid confusion with the users, limit the reported NPIV vports to 256. Additionally correct the 16G adapter which is reporting a bogus NPIV vport number if the link is down. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc.h | 3 ++- drivers/scsi/lpfc/lpfc_attr.c | 12 ++++++++++-- drivers/scsi/lpfc/lpfc_init.c | 3 +++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 03e95a3216c8..5fc41aa53ceb 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -969,7 +969,8 @@ struct lpfc_hba { struct list_head port_list; struct lpfc_vport *pport; /* physical lpfc_vport pointer */ uint16_t max_vpi; /* Maximum virtual nports */ -#define LPFC_MAX_VPI 0xFFFF /* Max number of VPI supported */ +#define LPFC_MAX_VPI 0xFF /* Max number VPI supported 0 - 0xff */ +#define LPFC_MAX_VPORTS 0x100 /* Max vports per port, with pport */ uint16_t max_vports; /* * For IOV HBAs max_vpi can change * after a reset. max_vports is max diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 82ce5d193018..f447355cc9c0 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1478,6 +1478,9 @@ lpfc_get_hba_info(struct lpfc_hba *phba, max_vpi = (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) > 0) ? (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) - 1) : 0; + /* Limit the max we support */ + if (max_vpi > LPFC_MAX_VPI) + max_vpi = LPFC_MAX_VPI; if (mvpi) *mvpi = max_vpi; if (avpi) @@ -1493,8 +1496,13 @@ lpfc_get_hba_info(struct lpfc_hba *phba, *axri = pmb->un.varRdConfig.avail_xri; if (mvpi) *mvpi = pmb->un.varRdConfig.max_vpi; - if (avpi) - *avpi = pmb->un.varRdConfig.avail_vpi; + if (avpi) { + /* avail_vpi is only valid if link is up and ready */ + if (phba->link_state == LPFC_HBA_READY) + *avpi = pmb->un.varRdConfig.avail_vpi; + else + *avpi = pmb->un.varRdConfig.max_vpi; + } } mempool_free(pmboxq, phba->mbox_mem_pool); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c69c2a2b2ead..9fc5507ee39e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -7643,6 +7643,9 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) bf_get(lpfc_mbx_rd_conf_xri_base, rd_config); phba->sli4_hba.max_cfg_param.max_vpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config); + /* Limit the max we support */ + if (phba->sli4_hba.max_cfg_param.max_vpi > LPFC_MAX_VPORTS) + phba->sli4_hba.max_cfg_param.max_vpi = LPFC_MAX_VPORTS; phba->sli4_hba.max_cfg_param.vpi_base = bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config); phba->sli4_hba.max_cfg_param.max_rpi = From cbe87667e31662c57c530ad3c50d2ded9157e9bf Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 29 Nov 2018 16:09:37 -0800 Subject: [PATCH 1684/3715] scsi: lpfc: Correct code setting non existent bits in sli4 ABORT WQE [ Upstream commit 1c36833d82ff24d0d54215fd956e7cc30fffce54 ] Driver is setting bits in word 10 of the SLI4 ABORT WQE (the wqid). The field was a carry over from a prior SLI revision. The field does not exist in SLI4, and the action may result in an overlap with future definition of the WQE. Remove the setting of WQID in the ABORT WQE. Also cleaned up WQE field settings - initialize to zero, don't bother to set fields to zero. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nvme.c | 2 -- drivers/scsi/lpfc/lpfc_sli.c | 14 +++----------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 6c4499db969c..fcf4b4175d77 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1544,7 +1544,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->iocb.ulpClass); @@ -1559,7 +1558,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, abts_buf->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 62bea4ffdc25..d3bad0dbfaf7 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -10722,19 +10722,12 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Complete prepping the abort wqe and issue to the FW. */ abts_wqe = &abtsiocbp->wqe; - bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0); + + /* Clear any stale WQE contents */ + memset(abts_wqe, 0, sizeof(union lpfc_wqe)); bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - /* Explicitly set reserved fields to zero.*/ - abts_wqe->abort_cmd.rsrvd4 = 0; - abts_wqe->abort_cmd.rsrvd5 = 0; - - /* WQE Common - word 6. Context is XRI tag. Set 0. */ - bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0); - bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0); - /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, cmdiocb->iocb.ulpClass); @@ -10749,7 +10742,6 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, abtsiocbp->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); From 20e106dfb16f1207308f832f0b2f7c09f4212ea2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 20 Dec 2018 17:23:44 +0100 Subject: [PATCH 1685/3715] drbd: Change drbd_request_detach_interruptible's return type to int [ Upstream commit 5816a0932b4fd74257b8cc5785bc8067186a8723 ] Clang warns when an implicit conversion is done between enumerated types: drivers/block/drbd/drbd_state.c:708:8: warning: implicit conversion from enumeration type 'enum drbd_ret_code' to different enumeration type 'enum drbd_state_rv' [-Wenum-conversion] rv = ERR_INTR; ~ ^~~~~~~~ drbd_request_detach_interruptible's only call site is in the return statement of adm_detach, which returns an int. Change the return type of drbd_request_detach_interruptible to match, silencing Clang's warning. Reported-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_state.c | 6 ++---- drivers/block/drbd/drbd_state.h | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0813c654c893..b452359b6aae 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -688,11 +688,9 @@ request_detach(struct drbd_device *device) CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); } -enum drbd_state_rv -drbd_request_detach_interruptible(struct drbd_device *device) +int drbd_request_detach_interruptible(struct drbd_device *device) { - enum drbd_state_rv rv; - int ret; + int ret, rv; drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ wait_event_interruptible(device->state_wait, diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index b2a390ba73a0..f87371e55e68 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -162,8 +162,7 @@ static inline int drbd_request_state(struct drbd_device *device, } /* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ -enum drbd_state_rv -drbd_request_detach_interruptible(struct drbd_device *device); +int drbd_request_detach_interruptible(struct drbd_device *device); enum drbd_role conn_highest_role(struct drbd_connection *connection); enum drbd_role conn_highest_peer(struct drbd_connection *connection); From f27b638437dadd059ea182c4fb13e310241758d5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 19 Nov 2018 20:48:19 +0800 Subject: [PATCH 1686/3715] e100: Fix passing zero to 'PTR_ERR' warning in e100_load_ucode_wait [ Upstream commit cd0d465bb697a9c7bf66a9fe940f7981232f1676 ] Fix a static code checker warning: drivers/net/ethernet/intel/e100.c:1349 e100_load_ucode_wait() warn: passing zero to 'PTR_ERR' Signed-off-by: YueHaibing Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 4d10270ddf8f..90974462743b 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1370,8 +1370,8 @@ static inline int e100_load_ucode_wait(struct nic *nic) fw = e100_request_firmware(nic); /* If it's NULL, then no ucode is required */ - if (!fw || IS_ERR(fw)) - return PTR_ERR(fw); + if (IS_ERR_OR_NULL(fw)) + return PTR_ERR_OR_ZERO(fw); if ((err = e100_exec_cb(nic, (void *)fw, e100_setup_ucode))) netif_err(nic, probe, nic->netdev, From 67717e5ec607db7b841f2c48eb8c030d7466a0df Mon Sep 17 00:00:00 2001 From: Shirish S Date: Thu, 10 Jan 2019 07:54:40 +0000 Subject: [PATCH 1687/3715] x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models [ Upstream commit c95b323dcd3598dd7ef5005d6723c1ba3b801093 ] MC4_MISC thresholding is not supported on all family 0x15 processors, hence skip the x86_model check when applying the quirk. [ bp: massage commit message. ] Signed-off-by: Shirish S Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Tony Luck Cc: Vishal Verma Cc: x86-ml Link: https://lkml.kernel.org/r/1547106849-3476-2-git-send-email-shirish.s@amd.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4f3be91f0b0b..dcc11303885b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1661,11 +1661,10 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) mce_flags.overflow_recov = 1; /* - * Turn off MC4_MISC thresholding banks on those models since + * Turn off MC4_MISC thresholding banks on all models since * they're not supported there. */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { + if (c->x86 == 0x15) { int i; u64 hwcr; bool need_toggle; From b5b43a3dba23028b2a3ea388fe4d9d6e8ebec9ef Mon Sep 17 00:00:00 2001 From: Shirish S Date: Wed, 16 Jan 2019 15:10:40 +0000 Subject: [PATCH 1688/3715] x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk [ Upstream commit 30aa3d26edb0f3d7992757287eec0ca588a5c259 ] The MC4_MISC thresholding quirk needs to be applied during S5 -> S0 and S3 -> S0 state transitions, which follow different code paths. Carve it out into a separate function and call it mce_amd_feature_init() where the two code paths of the state transitions converge. [ bp: massage commit message and the carved out function. ] Signed-off-by: Shirish S Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Kees Cook Cc: Thomas Gleixner Cc: Tony Luck Cc: Vishal Verma Cc: Yazen Ghannam Cc: x86-ml Link: https://lkml.kernel.org/r/1547651417-23583-3-git-send-email-shirish.s@amd.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce.c | 29 ---------------------- arch/x86/kernel/cpu/mcheck/mce_amd.c | 36 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index dcc11303885b..c7bd2e549a6a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1660,35 +1660,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; - /* - * Turn off MC4_MISC thresholding banks on all models since - * they're not supported there. - */ - if (c->x86 == 0x15) { - int i; - u64 hwcr; - bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; - - rdmsrl(MSR_K7_HWCR, hwcr); - - /* McStatusWrEn has to be set */ - need_toggle = !(hwcr & BIT(18)); - - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); - - /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) - msr_clear_bit(msrs[i], 62); - - /* restore old settings */ - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); - } } if (c->x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 4fa97a44e73f..b434780ae680 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -544,6 +544,40 @@ out: return offset; } +/* + * Turn off MC4_MISC thresholding banks on all family 0x15 models since + * they're not supported there. + */ +void disable_err_thresholding(struct cpuinfo_x86 *c) +{ + int i; + u64 hwcr; + bool need_toggle; + u32 msrs[] = { + 0x00000413, /* MC4_MISC0 */ + 0xc0000408, /* MC4_MISC1 */ + }; + + if (c->x86 != 0x15) + return; + + rdmsrl(MSR_K7_HWCR, hwcr); + + /* McStatusWrEn has to be set */ + need_toggle = !(hwcr & BIT(18)); + + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + + /* Clear CntP bit safely */ + for (i = 0; i < ARRAY_SIZE(msrs); i++) + msr_clear_bit(msrs[i], 62); + + /* restore old settings */ + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -551,6 +585,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; + disable_err_thresholding(c); + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); From 8799fa7c7d07cb2fa5d375069a07de8f3fda33b4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 7 Apr 2019 11:12:50 -0700 Subject: [PATCH 1689/3715] power: supply: cpcap-battery: Fix signed counter sample register [ Upstream commit c68b901ac4fa969db8917b6a9f9b40524a690d20 ] The accumulator sample register is signed 32-bits wide register on droid 4. And only the earlier version of cpcap has a signed 24-bits wide register. We're currently passing it around as unsigned, so let's fix that and use sign_extend32() for the earlier revision. Signed-off-by: Tony Lindgren Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/cpcap-battery.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c index fe7fcf3a2ad0..7df9d432ee42 100644 --- a/drivers/power/supply/cpcap-battery.c +++ b/drivers/power/supply/cpcap-battery.c @@ -82,7 +82,7 @@ struct cpcap_battery_config { }; struct cpcap_coulomb_counter_data { - s32 sample; /* 24-bits */ + s32 sample; /* 24 or 32 bits */ s32 accumulator; s16 offset; /* 10-bits */ }; @@ -213,7 +213,7 @@ static int cpcap_battery_get_current(struct cpcap_battery_ddata *ddata) * TI or ST coulomb counter in the PMIC. */ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset, u32 divider) { s64 acc; @@ -224,7 +224,6 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, if (!divider) return 0; - sample &= 0xffffff; /* 24-bits, unsigned */ offset &= 0x7ff; /* 10-bits, signed */ switch (ddata->vendor) { @@ -259,7 +258,7 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, /* 3600000μAms = 1μAh */ static int cpcap_battery_cc_to_uah(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset) { return cpcap_battery_cc_raw_div(ddata, sample, @@ -268,7 +267,7 @@ static int cpcap_battery_cc_to_uah(struct cpcap_battery_ddata *ddata, } static int cpcap_battery_cc_to_ua(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset) { return cpcap_battery_cc_raw_div(ddata, sample, @@ -312,6 +311,8 @@ cpcap_battery_read_accumulated(struct cpcap_battery_ddata *ddata, /* Sample value CPCAP_REG_CCS1 & 2 */ ccd->sample = (buf[1] & 0x0fff) << 16; ccd->sample |= buf[0]; + if (ddata->vendor == CPCAP_VENDOR_TI) + ccd->sample = sign_extend32(24, ccd->sample); /* Accumulator value CPCAP_REG_CCA1 & 2 */ ccd->accumulator = ((s16)buf[3]) << 16; From 7aeb6588a4a777496834787f4f19cae9dcdfecbe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jun 2019 10:19:41 +0300 Subject: [PATCH 1690/3715] mlxsw: spectrum_router: Refresh nexthop neighbour when it becomes dead [ Upstream commit 83d5782681cc12b3d485a83cb34c46b2445f510c ] The driver tries to periodically refresh neighbours that are used to reach nexthops. This is done by periodically calling neigh_event_send(). However, if the neighbour becomes dead, there is nothing we can do to return it to a connected state and the above function call is basically a NOP. This results in the nexthop never being written to the device's adjacency table and therefore never used to forward packets. Fix this by dropping our reference from the dead neighbour and associating the nexthop with a new neigbhour which we will try to refresh. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alex Veber Tested-by: Alex Veber Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 73 ++++++++++++++++++- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 5b9a5c3834d9..05a2006a20b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1762,7 +1762,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing); + bool removing, bool dead); static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) { @@ -1891,7 +1891,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); @@ -2535,13 +2536,79 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, nh->update = 1; } +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing) + bool removing, bool dead) { struct mlxsw_sp_nexthop *nh; + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); From 5e3816e76d26696acb298920e0cf9be495a9c99c Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Fri, 17 May 2019 13:20:11 -0400 Subject: [PATCH 1691/3715] media: vimc: fix component match compare [ Upstream commit ee1c71a8e1456ab53fe667281d855849edf26a4d ] If the system has other devices being registered in the component framework, the compare function will be called with a device that doesn't belong to vimc. This device is not necessarily a platform_device, nor have a platform_data (which causes a NULL pointer dereference error) and if it does have a pdata, it is not necessarily type of struct vimc_platform_data. So casting to any of these types is wrong. Instead of expecting a given pdev with a given pdata, just expect for the device it self. vimc-core is the one who creates them, we know in advance exactly which object to expect in the match. Fixes: 4a29b7090749 ("[media] vimc: Subdevices as modules") Signed-off-by: Helen Koike Reviewed-by: Boris Brezillon Tested-by: Boris Brezillon Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/vimc/vimc-core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/vimc/vimc-core.c b/drivers/media/platform/vimc/vimc-core.c index 57e5d6a020b0..447a01ff4e23 100644 --- a/drivers/media/platform/vimc/vimc-core.c +++ b/drivers/media/platform/vimc/vimc-core.c @@ -243,10 +243,7 @@ static void vimc_comp_unbind(struct device *master) static int vimc_comp_compare(struct device *comp, void *data) { - const struct platform_device *pdev = to_platform_device(comp); - const char *name = data; - - return !strcmp(pdev->dev.platform_data, name); + return comp == data; } static struct component_match *vimc_add_subdevs(struct vimc_device *vimc) @@ -275,7 +272,7 @@ static struct component_match *vimc_add_subdevs(struct vimc_device *vimc) } component_match_add(&vimc->pdev.dev, &match, vimc_comp_compare, - (void *)vimc->pipe_cfg->ents[i].name); + &vimc->subdevs[i]->dev); } return match; From cd087b7f2a1db06185461f143f039a399f5b9e21 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Fri, 24 May 2019 11:16:22 +0800 Subject: [PATCH 1692/3715] ath10k: fix fw crash by moving chip reset after napi disabled [ Upstream commit 08d80e4cd27ba19f9bee9e5f788f9a9fc440a22f ] On SMP platform, when continuously running wifi up/down, the napi poll can be scheduled during chip reset, which will call ath10k_pci_has_fw_crashed() to check the fw status. But in the reset period, the value from FW_INDICATOR_ADDRESS register will return 0xdeadbeef, which also be treated as fw crash. Fix the issue by moving chip reset after napi disabled. ath10k_pci 0000:01:00.0: firmware crashed! (guid 73b30611-5b1e-4bdd-90b4-64c81eb947b6) ath10k_pci 0000:01:00.0: qca9984/qca9994 hw1.0 target 0x01000000 chip_id 0x00000000 sub 168c:cafe ath10k_pci 0000:01:00.0: htt-ver 2.2 wmi-op 6 htt-op 4 cal otp max-sta 512 raw 0 hwcrypto 1 ath10k_pci 0000:01:00.0: failed to get memcpy hi address for firmware address 4: -16 ath10k_pci 0000:01:00.0: failed to read firmware dump area: -16 ath10k_pci 0000:01:00.0: Copy Engine register dump: ath10k_pci 0000:01:00.0: [00]: 0x0004a000 0 0 0 0 ath10k_pci 0000:01:00.0: [01]: 0x0004a400 0 0 0 0 ath10k_pci 0000:01:00.0: [02]: 0x0004a800 0 0 0 0 ath10k_pci 0000:01:00.0: [03]: 0x0004ac00 0 0 0 0 ath10k_pci 0000:01:00.0: [04]: 0x0004b000 0 0 0 0 ath10k_pci 0000:01:00.0: [05]: 0x0004b400 0 0 0 0 ath10k_pci 0000:01:00.0: [06]: 0x0004b800 0 0 0 0 ath10k_pci 0000:01:00.0: [07]: 0x0004bc00 1 0 1 0 ath10k_pci 0000:01:00.0: [08]: 0x0004c000 0 0 0 0 ath10k_pci 0000:01:00.0: [09]: 0x0004c400 0 0 0 0 ath10k_pci 0000:01:00.0: [10]: 0x0004c800 0 0 0 0 ath10k_pci 0000:01:00.0: [11]: 0x0004cc00 0 0 0 0 Tested HW: QCA9984,QCA9887,WCN3990 Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 0298ddc1ff06..f9e409caca68 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1771,6 +1771,11 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n"); + ath10k_pci_irq_disable(ar); + ath10k_pci_irq_sync(ar); + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); + /* Most likely the device has HTT Rx ring configured. The only way to * prevent the device from accessing (and possible corrupting) host * memory is to reset the chip now. @@ -1784,10 +1789,6 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) */ ath10k_pci_safe_chip_reset(ar); - ath10k_pci_irq_disable(ar); - ath10k_pci_irq_sync(ar); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); ath10k_pci_flush(ar); spin_lock_irqsave(&ar_pci->ps_lock, flags); From c1a61364fb72a1b2c03ec9fbf9d5329e432956ea Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Nov 2019 21:57:11 -0700 Subject: [PATCH 1693/3715] powerpc: Avoid clang warnings around setjmp and longjmp [ Upstream commit c9029ef9c95765e7b63c4d9aa780674447db1ec0 ] Commit aea447141c7e ("powerpc: Disable -Wbuiltin-requires-header when setjmp is used") disabled -Wbuiltin-requires-header because of a warning about the setjmp and longjmp declarations. r367387 in clang added another diagnostic around this, complaining that there is no jmp_buf declaration. In file included from ../arch/powerpc/xmon/xmon.c:47: ../arch/powerpc/include/asm/setjmp.h:10:13: error: declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern long setjmp(long *); ^ ../arch/powerpc/include/asm/setjmp.h:11:13: error: declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern void longjmp(long *, long); ^ 2 errors generated. We are not using the standard library's longjmp/setjmp implementations for obvious reasons; make this clear to clang by using -ffreestanding on these files. Cc: stable@vger.kernel.org # 4.14+ Suggested-by: Segher Boessenkool Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191119045712.39633-3-natechancellor@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/Makefile | 4 ++-- arch/powerpc/xmon/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 142b08d40642..5607ce67d178 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,8 +5,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +CFLAGS_crash.o += -ffreestanding subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index ac5ee067aa51..a60c44b4a3e5 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +subdir-ccflags-y := -ffreestanding subdir-ccflags-$(CONFIG_PPC_WERROR) += -Werror From cf9e11a310348a49ed481195ea6d686d3a3bfd1c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 2 Dec 2019 07:57:29 +0000 Subject: [PATCH 1694/3715] powerpc: Fix vDSO clock_getres() [ Upstream commit 552263456215ada7ee8700ce022d12b0cffe4802 ] clock_getres in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; and hrtimer_resolution depends on the enablement of the high resolution timers that can happen either at compile or at run time. Fix the powerpc vdso implementation of clock_getres keeping a copy of hrtimer_resolution in vdso data and using that directly. Fixes: a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support to 32 bits kernel") Cc: stable@vger.kernel.org Signed-off-by: Vincenzo Frascino Reviewed-by: Christophe Leroy Acked-by: Shuah Khan [chleroy: changed CLOCK_REALTIME_RES to CLOCK_HRTIMER_RES] Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a55eca3a5e85233838c2349783bcb5164dae1d09.1575273217.git.christophe.leroy@c-s.fr Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/vdso_datapage.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/time.c | 1 + arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +++++-- arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +++++-- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 1afe90ade595..674c03350cd1 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -86,6 +86,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; @@ -107,6 +108,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2e5ea300258a..1bc761e537a9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -373,6 +373,7 @@ int main(void) OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); + OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); @@ -401,7 +402,6 @@ int main(void) DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7c7c5a16284d..14f3f28a089e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -920,6 +920,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; vdso_data->stamp_xtime = xt; vdso_data->stamp_sec_fraction = frac_sec; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++(vdso_data->tb_update_count); } diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 1e0bc5955a40..03a65fee8020 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -160,12 +160,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local /* get data page */ + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpli cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l stw r3,TSPC32_TV_SEC(r4) stw r5,TSPC32_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 09b2a49f6dd5..c973378e1f2b 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -145,12 +145,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpldi cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l std r3,TSPC64_TV_SEC(r4) std r5,TSPC64_TV_NSEC(r4) blr From d5a2955049171e48feba628e60f15206689bba94 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 11 Nov 2019 22:18:13 -0500 Subject: [PATCH 1695/3715] ext4: work around deleting a file with i_nlink == 0 safely commit c7df4a1ecb8579838ec8c56b2bb6a6716e974f37 upstream. If the file system is corrupted such that a file's i_links_count is too small, then it's possible that when unlinking that file, i_nlink will already be zero. Previously we were working around this kind of corruption by forcing i_nlink to one; but we were doing this before trying to delete the directory entry --- and if the file system is corrupted enough that ext4_delete_entry() fails, then we exit with i_nlink elevated, and this causes the orphan inode list handling to be FUBAR'ed, such that when we unmount the file system, the orphan inode list can get corrupted. A better way to fix this is to simply skip trying to call drop_nlink() if i_nlink is already zero, thus moving the check to the place where it makes the most sense. https://bugzilla.kernel.org/show_bug.cgi?id=205433 Link: https://lore.kernel.org/r/20191112032903.8828-1-tytso@mit.edu Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 212b01861d94..b4e0c270def4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3065,18 +3065,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (inode->i_nlink == 0) { - ext4_warning_inode(inode, "Deleting file '%.*s' with no links", - dentry->d_name.len, dentry->d_name.name); - set_nlink(inode, 1); - } retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + if (inode->i_nlink == 0) + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + dentry->d_name.len, dentry->d_name.name); + else + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); From e61a32ba9a028d0288cfb0f9ec4243086e8ec90b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Nov 2019 15:58:15 +0000 Subject: [PATCH 1696/3715] firmware: qcom: scm: Ensure 'a0' status code is treated as signed commit ff34f3cce278a0982a7b66b1afaed6295141b1fc upstream. The 'a0' member of 'struct arm_smccc_res' is declared as 'unsigned long', however the Qualcomm SCM firmware interface driver expects to receive negative error codes via this field, so ensure that it's cast to 'long' before comparing to see if it is less than 0. Cc: Reviewed-by: Bjorn Andersson Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qcom_scm-64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 6e6d561708e2..e9001075f676 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -158,7 +158,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, kfree(args_virt); } - if (res->a0 < 0) + if ((long)res->a0 < 0) return qcom_scm_remap_error(res->a0); return 0; From f49aa24d035c965146a96c2c64901b099f7fbae1 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Sat, 30 Nov 2019 17:58:11 -0800 Subject: [PATCH 1697/3715] mm/shmem.c: cast the type of unmap_start to u64 commit aa71ecd8d86500da6081a72da6b0b524007e0627 upstream. In 64bit system. sb->s_maxbytes of shmem filesystem is MAX_LFS_FILESIZE, which equal LLONG_MAX. If offset > LLONG_MAX - PAGE_SIZE, offset + len < LLONG_MAX in shmem_fallocate, which will pass the checking in vfs_fallocate. /* Check for wrap through zero too */ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) return -EFBIG; loff_t unmap_start = round_up(offset, PAGE_SIZE) in shmem_fallocate causes a overflow. Syzkaller reports a overflow problem in mm/shmem: UBSAN: Undefined behaviour in mm/shmem.c:2014:10 signed integer overflow: '9223372036854775807 + 1' cannot be represented in type 'long long int' CPU: 0 PID:17076 Comm: syz-executor0 Not tainted 4.1.46+ #1 Hardware name: linux, dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2c8 arch/arm64/kernel/traps.c:100 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:238 __dump_stack lib/dump_stack.c:15 [inline] ubsan_epilogue+0x18/0x70 lib/ubsan.c:164 handle_overflow+0x158/0x1b0 lib/ubsan.c:195 shmem_fallocate+0x6d0/0x820 mm/shmem.c:2104 vfs_fallocate+0x238/0x428 fs/open.c:312 SYSC_fallocate fs/open.c:335 [inline] SyS_fallocate+0x54/0xc8 fs/open.c:239 The highest bit of unmap_start will be appended with sign bit 1 (overflow) when calculate shmem_falloc.start: shmem_falloc.start = unmap_start >> PAGE_SHIFT. Fix it by casting the type of unmap_start to u64, when right shifted. This bug is found in LTS Linux 4.1. It also seems to exist in mainline. Link: http://lkml.kernel.org/r/1573867464-5107-1-git-send-email-chenjun102@huawei.com Signed-off-by: Chen Jun Reviewed-by: Andrew Morton Cc: Hugh Dickins Cc: Qian Cai Cc: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index e55aa460a2c0..69106c600692 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2895,7 +2895,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, } shmem_falloc.waitq = &shmem_falloc_waitq; - shmem_falloc.start = unmap_start >> PAGE_SHIFT; + shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; spin_lock(&inode->i_lock); inode->i_private = &shmem_falloc; From 7c112026ce3b6d215b9ceccbe339634b17c1e0f3 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Thu, 19 Sep 2019 14:35:08 +0800 Subject: [PATCH 1698/3715] ext4: fix a bug in ext4_wait_for_tail_page_commit commit 565333a1554d704789e74205989305c811fd9c7a upstream. No need to wait for any commit once the page is fully truncated. Besides, it may confuse e.g. concurrent ext4_writepage() with the page still be dirty (will be cleared by truncate_pagecache() in ext4_setattr()) but buffers has been freed; and then trigger a bug show as below: [ 26.057508] ------------[ cut here ]------------ [ 26.058531] kernel BUG at fs/ext4/inode.c:2134! ... [ 26.088130] Call trace: [ 26.088695] ext4_writepage+0x914/0xb28 [ 26.089541] writeout.isra.4+0x1b4/0x2b8 [ 26.090409] move_to_new_page+0x3b0/0x568 [ 26.091338] __unmap_and_move+0x648/0x988 [ 26.092241] unmap_and_move+0x48c/0xbb8 [ 26.093096] migrate_pages+0x220/0xb28 [ 26.093945] kernel_mbind+0x828/0xa18 [ 26.094791] __arm64_sys_mbind+0xc8/0x138 [ 26.095716] el0_svc_common+0x190/0x490 [ 26.096571] el0_svc_handler+0x60/0xd0 [ 26.097423] el0_svc+0x8/0xc Run the procedure (generate by syzkaller) parallel with ext3. void main() { int fd, fd1, ret; void *addr; size_t length = 4096; int flags; off_t offset = 0; char *str = "12345"; fd = open("a", O_RDWR | O_CREAT); assert(fd >= 0); /* Truncate to 4k */ ret = ftruncate(fd, length); assert(ret == 0); /* Journal data mode */ flags = 0xc00f; ret = ioctl(fd, _IOW('f', 2, long), &flags); assert(ret == 0); /* Truncate to 0 */ fd1 = open("a", O_TRUNC | O_NOATIME); assert(fd1 >= 0); addr = mmap(NULL, length, PROT_WRITE | PROT_READ, MAP_SHARED, fd, offset); assert(addr != (void *)-1); memcpy(addr, str, 5); mbind(addr, length, 0, 0, 0, MPOL_MF_MOVE); } And the bug will be triggered once we seen the below order. reproduce1 reproduce2 ... | ... truncate to 4k | change to journal data mode | | memcpy(set page dirty) truncate to 0: | ext4_setattr: | ... | ext4_wait_for_tail_page_commit | | mbind(trigger bug) truncate_pagecache(clean dirty)| ... ... | mbind will call ext4_writepage() since the page still be dirty, and then report the bug since the buffers has been free. Fix it by return directly once offset equals to 0 which means the page has been fully truncated. Reported-by: Hulk Robot Signed-off-by: yangerkun Link: https://lore.kernel.org/r/20190919063508.1045-1-yangerkun@huawei.com Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 77130af378f3..11bc4c69bf16 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5305,11 +5305,15 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_SIZE == - * blocksize case + * If the page is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidatepage() may + * strip all buffers from the page but keep the page dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * buffers). Also we don't need to wait for any commit if all buffers in + * the page remain valid. This is most beneficial for the common case of + * blocksize == PAGESIZE. */ - if (offset > PAGE_SIZE - i_blocksize(inode)) + if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { page = find_lock_page(inode->i_mapping, From dca66097c0187a3b8e301991873eb5007f6cfae6 Mon Sep 17 00:00:00 2001 From: Daniel Schultz Date: Tue, 17 Sep 2019 10:12:53 +0200 Subject: [PATCH 1699/3715] mfd: rk808: Fix RK818 ID template [ Upstream commit 37ef8c2c15bdc1322b160e38986c187de2b877b2 ] The Rockchip PMIC driver can automatically detect connected component versions by reading the ID_MSB and ID_LSB registers. The probe function will always fail with RK818 PMICs because the ID_MSK is 0xFFF0 and the RK818 template ID is 0x8181. This patch changes this value to 0x8180. Fixes: 9d6105e19f61 ("mfd: rk808: Fix up the chip id get failed") Cc: stable@vger.kernel.org Cc: Elaine Zhang Cc: Joseph Chen Signed-off-by: Daniel Schultz Signed-off-by: Heiko Stuebner Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- include/linux/mfd/rk808.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index d3156594674c..338e0f6e2226 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -443,7 +443,7 @@ enum { enum { RK805_ID = 0x8050, RK808_ID = 0x0000, - RK818_ID = 0x8181, + RK818_ID = 0x8180, }; struct rk808 { From c499f898286777653deac3d47885aa95ffad1046 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 4 Nov 2019 16:26:53 +0800 Subject: [PATCH 1700/3715] blk-mq: make sure that line break can be printed commit d2c9be89f8ebe7ebcc97676ac40f8dec1cf9b43a upstream. 8962842ca5ab ("blk-mq: avoid sysfs buffer overflow with too many CPU cores") avoids sysfs buffer overflow, and reserves one character for line break. However, the last snprintf() doesn't get correct 'size' parameter passed in, so fixed it. Fixes: 8962842ca5ab ("blk-mq: avoid sysfs buffer overflow with too many CPU cores") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index de733e8cab2e..c97fafa1b206 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -162,7 +162,7 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) pos += ret; } - ret = snprintf(pos + page, size - pos, "\n"); + ret = snprintf(pos + page, size + 1 - pos, "\n"); return pos + ret; } From 80797bdcc591c24d8714bbc1342595b7b171bdd4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Sep 2019 13:39:57 -0700 Subject: [PATCH 1701/3715] workqueue: Fix missing kfree(rescuer) in destroy_workqueue() commit 8efe1223d73c218ce7e8b2e0e9aadb974b582d7f upstream. Signed-off-by: Tejun Heo Reported-by: Qian Cai Fixes: def98c84b6cd ("workqueue: Fix spurious sanity check failures in destroy_workqueue()") Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 183ddc34fd54..a37f5dc7cb39 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4110,6 +4110,7 @@ void destroy_workqueue(struct workqueue_struct *wq) /* rescuer will empty maydays list before exiting */ kthread_stop(rescuer->task); + kfree(rescuer); } /* sanity checks */ From cfd2194d549935af5230386aebfb6693c1770f83 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 1 Oct 2019 11:03:59 +0300 Subject: [PATCH 1702/3715] sunrpc: fix crash when cache_head become valid before update [ Upstream commit 5fcaf6982d1167f1cd9b264704f6d1ef4c505d54 ] I was investigating a crash in our Virtuozzo7 kernel which happened in in svcauth_unix_set_client. I found out that we access m_client field in ip_map structure, which was received from sunrpc_cache_lookup (we have a bit older kernel, now the code is in sunrpc_cache_add_entry), and these field looks uninitialized (m_client == 0x74 don't look like a pointer) but in the cache_head in flags we see 0x1 which is CACHE_VALID. It looks like the problem appeared from our previous fix to sunrpc (1): commit 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request") And we've also found a patch already fixing our patch (2): commit d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Though the crash is eliminated, I think the core of the problem is not completely fixed: Neil in the patch (2) makes cache_head CACHE_NEGATIVE, before cache_fresh_locked which was added in (1) to fix crash. These way cache_is_valid won't say the cache is valid anymore and in svcauth_unix_set_client the function cache_check will return error instead of 0, and we don't count entry as initialized. But it looks like we need to remove cache_fresh_locked completely in sunrpc_cache_lookup: In (1) we've only wanted to make cache_fresh_unlocked->cache_dequeue so that cache_requests with no readers also release corresponding cache_head, to fix their leak. We with Vasily were not sure if cache_fresh_locked and cache_fresh_unlocked should be used in pair or not, so we've guessed to use them in pair. Now we see that we don't want the CACHE_VALID bit set here by cache_fresh_locked, as "valid" means "initialized" and there is no initialization in sunrpc_cache_add_entry. Both expiry_time and last_refresh are not used in cache_fresh_unlocked code-path and also not required for the initial fix. So to conclude cache_fresh_locked was called by mistake, and we can just safely remove it instead of crutching it with CACHE_NEGATIVE. It looks ideologically better for me. Hope I don't miss something here. Here is our crash backtrace: [13108726.326291] BUG: unable to handle kernel NULL pointer dereference at 0000000000000074 [13108726.326365] IP: [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.326448] PGD 0 [13108726.326468] Oops: 0002 [#1] SMP [13108726.326497] Modules linked in: nbd isofs xfs loop kpatch_cumulative_81_0_r1(O) xt_physdev nfnetlink_queue bluetooth rfkill ip6table_nat nf_nat_ipv6 ip_vs_wrr ip_vs_wlc ip_vs_sh nf_conntrack_netlink ip_vs_sed ip_vs_pe_sip nf_conntrack_sip ip_vs_nq ip_vs_lc ip_vs_lblcr ip_vs_lblc ip_vs_ftp ip_vs_dh nf_nat_ftp nf_conntrack_ftp iptable_raw xt_recent nf_log_ipv6 xt_hl ip6t_rt nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_TCPMSS xt_tcpmss vxlan ip6_udp_tunnel udp_tunnel xt_statistic xt_NFLOG nfnetlink_log dummy xt_mark xt_REDIRECT nf_nat_redirect raw_diag udp_diag tcp_diag inet_diag netlink_diag af_packet_diag unix_diag rpcsec_gss_krb5 xt_addrtype ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 ebtable_nat ebtable_broute nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_raw nfsv4 [13108726.327173] dns_resolver cls_u32 binfmt_misc arptable_filter arp_tables ip6table_filter ip6_tables devlink fuse_kio_pcs ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_nat iptable_nat nf_nat_ipv4 xt_comment nf_conntrack_ipv4 nf_defrag_ipv4 xt_wdog_tmo xt_multiport bonding xt_set xt_conntrack iptable_filter iptable_mangle kpatch(O) ebtable_filter ebt_among ebtables ip_set_hash_ip ip_set nfnetlink vfat fat skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass fuse pcspkr ses enclosure joydev sg mei_me hpwdt hpilo lpc_ich mei ipmi_si shpchp ipmi_devintf ipmi_msghandler xt_ipvs acpi_power_meter ip_vs_rr nfsv3 nfsd auth_rpcgss nfs_acl nfs lockd grace fscache nf_nat cls_fw sch_htb sch_cbq sch_sfq ip_vs em_u32 nf_conntrack tun br_netfilter veth overlay ip6_vzprivnet ip6_vznetstat ip_vznetstat [13108726.327817] ip_vzprivnet vziolimit vzevent vzlist vzstat vznetstat vznetdev vzmon vzdev bridge pio_kaio pio_nfs pio_direct pfmt_raw pfmt_ploop1 ploop ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper scsi_transport_iscsi 8021q syscopyarea sysfillrect garp sysimgblt fb_sys_fops mrp stp ttm llc bnx2x crct10dif_pclmul crct10dif_common crc32_pclmul crc32c_intel drm dm_multipath ghash_clmulni_intel uas aesni_intel lrw gf128mul glue_helper ablk_helper cryptd tg3 smartpqi scsi_transport_sas mdio libcrc32c i2c_core usb_storage ptp pps_core wmi sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kpatch_cumulative_82_0_r1] [13108726.328403] CPU: 35 PID: 63742 Comm: nfsd ve: 51332 Kdump: loaded Tainted: G W O ------------ 3.10.0-862.20.2.vz7.73.29 #1 73.29 [13108726.328491] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 10/02/2018 [13108726.328554] task: ffffa0a6a41b1160 ti: ffffa0c2a74bc000 task.ti: ffffa0c2a74bc000 [13108726.328610] RIP: 0010:[] [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.328706] RSP: 0018:ffffa0c2a74bfd80 EFLAGS: 00010246 [13108726.328750] RAX: 0000000000000001 RBX: ffffa0a6183ae000 RCX: 0000000000000000 [13108726.328811] RDX: 0000000000000074 RSI: 0000000000000286 RDI: ffffa0c2a74bfcf0 [13108726.328864] RBP: ffffa0c2a74bfe00 R08: ffffa0bab8c22960 R09: 0000000000000001 [13108726.328916] R10: 0000000000000001 R11: 0000000000000001 R12: ffffa0a32aa7f000 [13108726.328969] R13: ffffa0a6183afac0 R14: ffffa0c233d88d00 R15: ffffa0c2a74bfdb4 [13108726.329022] FS: 0000000000000000(0000) GS:ffffa0e17f9c0000(0000) knlGS:0000000000000000 [13108726.329081] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13108726.332311] CR2: 0000000000000074 CR3: 00000026a1b28000 CR4: 00000000007607e0 [13108726.334606] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [13108726.336754] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [13108726.338908] PKRU: 00000000 [13108726.341047] Call Trace: [13108726.343074] [] ? groups_alloc+0x34/0x110 [13108726.344837] [] svc_set_client+0x24/0x30 [sunrpc] [13108726.346631] [] svc_process_common+0x241/0x710 [sunrpc] [13108726.348332] [] svc_process+0x103/0x190 [sunrpc] [13108726.350016] [] nfsd+0xdf/0x150 [nfsd] [13108726.351735] [] ? nfsd_destroy+0x80/0x80 [nfsd] [13108726.353459] [] kthread+0xd1/0xe0 [13108726.355195] [] ? create_kthread+0x60/0x60 [13108726.356896] [] ret_from_fork_nospec_begin+0x7/0x21 [13108726.358577] [] ? create_kthread+0x60/0x60 [13108726.360240] Code: 4c 8b 45 98 0f 8e 2e 01 00 00 83 f8 fe 0f 84 76 fe ff ff 85 c0 0f 85 2b 01 00 00 49 8b 50 40 b8 01 00 00 00 48 89 93 d0 1a 00 00 0f c1 02 83 c0 01 83 f8 01 0f 8e 53 02 00 00 49 8b 44 24 38 [13108726.363769] RIP [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.365530] RSP [13108726.367179] CR2: 0000000000000074 Fixes: d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Signed-off-by: Pavel Tikhomirov Acked-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- net/sunrpc/cache.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 475b453dc7ae..556989b0b5fc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,9 +54,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -static inline int cache_is_valid(struct cache_head *h); -static void cache_fresh_locked(struct cache_head *head, time_t expiry, - struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -101,9 +98,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init(&tmp->cache_list); detail->entries --; - if (cache_is_valid(tmp) == -EAGAIN) - set_bit(CACHE_NEGATIVE, &tmp->flags); - cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } From 6360d971e40fe56e6c945d17972d69c69194c4d8 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 5 Dec 2019 10:30:22 +0200 Subject: [PATCH 1703/3715] net/mlx5e: Fix SFF 8472 eeprom length [ Upstream commit c431f8597863a91eea6024926e0c1b179cfa4852 ] SFF 8472 eeprom length is 512 bytes. Fix module info return value to support 512 bytes read. Fixes: ace329f4ab3b ("net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query") Signed-off-by: Eran Ben Elisha Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f6beb5ef5971..c3f1e2d76a46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1625,7 +1625,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", From cb9e778a29b803402ef7451451063d11f583c598 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 14 Nov 2019 09:49:11 -0500 Subject: [PATCH 1704/3715] gfs2: fix glock reference problem in gfs2_trans_remove_revoke [ Upstream commit fe5e7ba11fcf1d75af8173836309e8562aefedef ] Commit 9287c6452d2b fixed a situation in which gfs2 could use a glock after it had been freed. To do that, it temporarily added a new glock reference by calling gfs2_glock_hold in function gfs2_add_revoke. However, if the bd element was removed by gfs2_trans_remove_revoke, it failed to drop the additional reference. This patch adds logic to gfs2_trans_remove_revoke to properly drop the additional glock reference. Fixes: 9287c6452d2b ("gfs2: Fix occasional glock use-after-free") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/log.c | 8 ++++++++ fs/gfs2/log.h | 1 + fs/gfs2/lops.c | 5 +---- fs/gfs2/trans.c | 2 ++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 483b82e2be92..a3208511f35a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -594,6 +594,14 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) list_add(&bd->bd_list, &sdp->sd_log_le_revoke); } +void gfs2_glock_remove_revoke(struct gfs2_glock *gl) +{ + if (atomic_dec_return(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + gfs2_glock_queue_put(gl); + } +} + void gfs2_write_revokes(struct gfs2_sbd *sdp) { struct gfs2_trans *tr; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 9499a6049212..3b7b7839ec6a 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -80,6 +80,7 @@ extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); extern void gfs2_write_revokes(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 049f8c6721b4..a5041e6d2c0d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -660,10 +660,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gl = bd->bd_gl; - if (atomic_dec_return(&gl->gl_revokes) == 0) { - clear_bit(GLF_LFLUSH, &gl->gl_flags); - gfs2_glock_queue_put(gl); - } + gfs2_glock_remove_revoke(gl); kmem_cache_free(gfs2_bufdata_cachep, bd); } } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index affef3c066e0..69e3402a3cc5 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -277,6 +277,8 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) list_del_init(&bd->bd_list); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; + if (bd->bd_gl) + gfs2_glock_remove_revoke(bd->bd_gl); kmem_cache_free(gfs2_bufdata_cachep, bd); tr->tr_num_revoke_rm++; if (--n == 0) From 4faf1cc3dbbd85e49787f4ff3ffa21257c2256bb Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Wed, 13 Nov 2019 12:29:50 +0300 Subject: [PATCH 1705/3715] kernel/module.c: wakeup processes in module_wq on module unload [ Upstream commit 5d603311615f612320bb77bd2a82553ef1ced5b7 ] Fix the race between load and unload a kernel module. sys_delete_module() try_stop_module() mod->state = _GOING add_unformed_module() old = find_module_all() (old->state == _GOING => wait_event_interruptible()) During pre-condition finished_loading() rets 0 schedule() (never gets waken up later) free_module() mod->state = _UNFORMED list_del_rcu(&mod->list) (dels mod from "modules" list) return The race above leads to modprobe hanging forever on loading a module. Error paths on loading module call wake_up_all(&module_wq) after freeing module, so let's do the same on straight module unload. Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") Reviewed-by: Prarit Bhargava Signed-off-by: Konstantin Khorenko Signed-off-by: Jessica Yu Signed-off-by: Sasha Levin --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 468567591241..feb1e0fbc3e8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1020,6 +1020,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); + /* someone could wait for the module in add_unformed_module() */ + wake_up_all(&module_wq); return 0; out: mutex_unlock(&module_mutex); From d6000c7fc76e02d259e9e987d960d3404115d693 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Nov 2019 12:51:09 +0100 Subject: [PATCH 1706/3715] gpiolib: acpi: Add Terra Pad 1061 to the run_edge_events_on_boot_blacklist [ Upstream commit 2727315df3f5ffbebcb174eed3153944a858b66f ] The Terra Pad 1061 has the usual micro-USB-B id-pin handler, but instead of controlling the actual micro-USB-B it turns the 5V boost for the tablet's USB-A connector and its keyboard-cover connector off. The actual micro-USB-B connector on the tablet is wired for charging only, and its id pin is *not* connected to the GPIO which is used for the (broken) id-pin event handler in the DSDT. While at it not only add a comment why the Terra Pad 1061 is on the blacklist, but also fix the missing comment for the Minix Neo Z83-4 entry. Fixes: 61f7f7c8f978 ("gpiolib: acpi: Add gpiolib_acpi_run_edge_events_on_boot option and blacklist") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-acpi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 715babaa886a..3aa7fe6baf2a 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1314,11 +1314,28 @@ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { { + /* + * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for + * a non existing micro-USB-B connector which puts the HDMI + * DDC pins in GPIO mode, breaking HDMI support. + */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), } }, + { + /* + * The Terra Pad 1061 has a micro-USB-B id-pin handler, which + * instead of controlling the actual micro-USB-B turns the 5V + * boost for its USB-A connector off. The actual micro-USB-B + * connector is wired for charging only. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), + } + }, {} /* Terminating entry */ }; From 2e22c56fb8993f9e7f329a29a2fb87585526859d Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 27 Nov 2019 17:57:50 +0100 Subject: [PATCH 1707/3715] raid5: need to set STRIPE_HANDLE for batch head [ Upstream commit a7ede3d16808b8f3915c8572d783530a82b2f027 ] With commit 6ce220dd2f8ea71d6afc29b9a7524c12e39f374a ("raid5: don't set STRIPE_HANDLE to stripe which is in batch list"), we don't want to set STRIPE_HANDLE flag for sh which is already in batch list. However, the stripe which is the head of batch list should set this flag, otherwise panic could happen inside init_stripe at BUG_ON(sh->batch_head), it is reproducible with raid5 on top of nvdimm devices per Xiao oberserved. Thanks for Xiao's effort to verify the change. Fixes: 6ce220dd2f8ea ("raid5: don't set STRIPE_HANDLE to stripe which is in batch list") Reported-by: Xiao Ni Tested-by: Xiao Ni Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4c49bed40f1f..d5c14d56a714 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5721,7 +5721,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) do_flush = false; } - if (!sh->batch_head) + if (!sh->batch_head || sh == sh->batch_head) set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); if ((!sh->batch_head || sh == sh->batch_head) && From 002fffa0fb32a3bb14b6422bac2cbb263cc0476b Mon Sep 17 00:00:00 2001 From: Erhard Furtner Date: Tue, 26 Nov 2019 02:48:04 +0100 Subject: [PATCH 1708/3715] of: unittest: fix memory leak in attach_node_and_children [ Upstream commit 2aacace6dbbb6b6ce4e177e6c7ea901f389c0472 ] In attach_node_and_children memory is allocated for full_name via kasprintf. If the condition of the 1st if is not met the function returns early without freeing the memory. Add a kfree() to fix that. This has been detected with kmemleak: Link: https://bugzilla.kernel.org/show_bug.cgi?id=205327 It looks like the leak was introduced by this commit: Fixes: 5babefb7f7ab ("of: unittest: allow base devicetree to have symbol metadata") Signed-off-by: Erhard Furtner Reviewed-by: Michael Ellerman Reviewed-by: Tyrel Datwyler Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 9d204649c963..4bf6a9db6ac0 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -965,8 +965,10 @@ static void attach_node_and_children(struct device_node *np) full_name = kasprintf(GFP_KERNEL, "%pOF", np); if (!strcmp(full_name, "/__local_fixups__") || - !strcmp(full_name, "/__fixups__")) + !strcmp(full_name, "/__fixups__")) { + kfree(full_name); return; + } dup = of_find_node_by_path(full_name); kfree(full_name); From bfb9e5c03076a446b1f4f6a523ddc8d723c907a6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 17 Dec 2019 20:40:05 +0100 Subject: [PATCH 1709/3715] Linux 4.14.159 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4de172b2e1fb..e14ad8f064ec 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 158 +SUBLEVEL = 159 EXTRAVERSION = NAME = Petit Gorille From cb87cb1107c01c0e82a916330ee3347f043c07ee Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 17 Dec 2019 11:53:52 -0800 Subject: [PATCH 1710/3715] ANDROID: kbuild: disable clang-specific configs with other compilers cuttlefish_defconfig explicitly enables options that fail to compile with compilers other than clang. This change detects when a different compiler is used and disables clang-specific features after printing a warning. Bug: 145297810 Change-Id: I3371576b45c9715a63c5668ab58e996cab612f53 Signed-off-by: Sami Tolvanen --- Makefile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Makefile b/Makefile index 8d4c1e84c819..0157f78a1f5b 100644 --- a/Makefile +++ b/Makefile @@ -596,6 +596,8 @@ ifeq ($(dot-config),1) -include include/config/auto.conf ifeq ($(KBUILD_EXTMOD),) +include/config/auto.conf.cmd: check-clang-specific-options + # Read in dependencies to all Kconfig* files, make sure to run # oldconfig if changes are detected. -include include/config/auto.conf.cmd @@ -1225,6 +1227,22 @@ else endif endif +# Disable clang-specific config options when using a different compiler +clang-specific-configs := LTO_CLANG CFI_CLANG SHADOW_CALL_STACK + +PHONY += check-clang-specific-options +check-clang-specific-options: $(KCONFIG_CONFIG) FORCE +ifneq ($(cc-name),clang) +ifneq ($(findstring y,$(shell $(CONFIG_SHELL) \ + $(srctree)/scripts/config --file $(KCONFIG_CONFIG) \ + $(foreach c,$(clang-specific-configs),-s $(c)))),) + @echo WARNING: Disabling clang-specific options with $(cc-name) >&2 + $(Q)$(srctree)/scripts/config --file $(KCONFIG_CONFIG) \ + $(foreach c,$(clang-specific-configs),-d $(c)) && \ + $(MAKE) -f $(srctree)/Makefile olddefconfig +endif +endif + # Check for CONFIG flags that require compiler support. Abort the build # after .config has been processed, but before the kernel build starts. # From fe7096ab25ac211f51857bb4803d00d4673d7419 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 13 Dec 2019 12:25:31 -0800 Subject: [PATCH 1711/3715] UPSTREAM: binder: fix incorrect calculation for num_valid commit 16981742717b04644a41052570fb502682a315d2 upstream. For BINDER_TYPE_PTR and BINDER_TYPE_FDA transactions, the num_valid local was calculated incorrectly causing the range check in binder_validate_ptr() to miss out-of-bounds offsets. Fixes: bde4a19fc04f ("binder: use userspace pointer as base of buffer space") Change-Id: Ida77db13d8e5b726f0b14513f55c2b30277338cd Signed-off-by: Todd Kjos Cc: stable Link: https://lore.kernel.org/r/20191213202531.55010-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Bug: 145988638 Signed-off-by: Todd Kjos --- drivers/android/binder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f6ddec245187..2c8b629c90c3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3428,7 +3428,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t parent_offset; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); - size_t num_valid = (buffer_offset - off_start_offset) * + size_t num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); struct binder_buffer_object *parent = binder_validate_ptr(target_proc, t->buffer, @@ -3502,7 +3502,7 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->user_data + sg_buf_offset; sg_buf_offset += ALIGN(bp->length, sizeof(u64)); - num_valid = (buffer_offset - off_start_offset) * + num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); ret = binder_fixup_parent(t, thread, bp, off_start_offset, From aefee87ec902e639288debd06c55218cb023374e Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Tue, 17 Dec 2019 18:54:32 -0800 Subject: [PATCH 1712/3715] ANDROID: cuttlefish_defconfig: set BINFMT_MISC Bug: 145774383 Test: None Change-Id: I1d7310069da60d7e55a82ca2cc25331006ed6779 Signed-off-by: Ram Muthiah --- arch/arm64/configs/cuttlefish_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 89b2521aef19..c71406e4c373 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -65,6 +65,7 @@ CONFIG_ARM64_LSE_ATOMICS=y CONFIG_RANDOMIZE_BASE=y # CONFIG_EFI is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y CONFIG_COMPAT=y CONFIG_PM_DEBUG=y CONFIG_CPU_IDLE=y From cdad3113fcaa17a85e6632edc4eb2215780f593c Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Fri, 13 Dec 2019 13:56:42 -0800 Subject: [PATCH 1713/3715] ANDROID: serdev: add platform device support Enables devices on a platform bus, such as serial8250 on the ISA bus, to be enumerated by the serdev subsystem. This enables further layering by e.g. the gnss subsystem. With this in change, these devices can now register with the serdev core and show up as serdev tty ports (serialX) and child devices (serialX-Y). serial8250: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a U6_16550A serial serial0: tty port ttyS0 registered serial8250: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a U6_16550A serial serial1: tty port ttyS1 registered serial8250: ttyS2 at I/O 0x3e8 (irq = 4, base_baud = 115200) is a U6_16550A serial serial2: tty port ttyS2 registered serial8250: ttyS3 at I/O 0x2e8 (irq = 3, base_baud = 115200) is a U6_16550A serial serial3: tty port ttyS3 registered The modalias shows up like this: # cat /sys/bus/serial/devices/serial0-0/modalias platform:serial8250 Bug: 146517987 Change-Id: I3711c9d9ecd66fad638a45a8745e97569ae01791 Signed-off-by: Alistair Delva --- drivers/tty/serdev/core.c | 66 ++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 8ad3724bfe32..4fa59314640a 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -49,14 +50,32 @@ static const struct device_type serdev_ctrl_type = { static int serdev_device_match(struct device *dev, struct device_driver *drv) { - /* TODO: ACPI and platform matching */ - return of_driver_match_device(dev, drv); + /* TODO: ACPI matching */ + + if (of_driver_match_device(dev, drv)) + return 1; + + if (dev->parent->parent->bus == &platform_bus_type && + dev->parent->parent->bus->match(dev, drv)) + return 1; + + return 0; } static int serdev_uevent(struct device *dev, struct kobj_uevent_env *env) { - /* TODO: ACPI and platform modalias */ - return of_device_uevent_modalias(dev, env); + int rc; + + /* TODO: ACPI modalias */ + + rc = of_device_uevent_modalias(dev, env); + if (rc != -ENODEV) + return rc; + + if (dev->parent->parent->bus == &platform_bus_type) + rc = dev->parent->parent->bus->uevent(dev, env); + + return rc; } /** @@ -406,6 +425,33 @@ static int of_serdev_register_devices(struct serdev_controller *ctrl) return 0; } +static int platform_serdev_register_devices(struct serdev_controller *ctrl) +{ + struct serdev_device *serdev; + int err; + + if (ctrl->dev.parent->bus != &platform_bus_type) + return -ENODEV; + + serdev = serdev_device_alloc(ctrl); + if (!serdev) { + dev_err(&ctrl->dev, "failed to allocate serdev device for %s\n", + dev_name(ctrl->dev.parent)); + return -ENOMEM; + } + + pm_runtime_no_callbacks(&serdev->dev); + + err = serdev_device_add(serdev); + if (err) { + dev_err(&serdev->dev, + "failure adding device. status %d\n", err); + serdev_device_put(serdev); + } + + return err; +} + /** * serdev_controller_add() - Add an serdev controller * @ctrl: controller to be registered. @@ -415,7 +461,7 @@ static int of_serdev_register_devices(struct serdev_controller *ctrl) */ int serdev_controller_add(struct serdev_controller *ctrl) { - int ret; + int ret_of, ret_platform, ret; /* Can't register until after driver model init */ if (WARN_ON(!is_registered)) @@ -425,9 +471,15 @@ int serdev_controller_add(struct serdev_controller *ctrl) if (ret) return ret; - ret = of_serdev_register_devices(ctrl); - if (ret) + ret_platform = platform_serdev_register_devices(ctrl); + ret_of = of_serdev_register_devices(ctrl); + if (ret_of && ret_platform) { + dev_dbg(&ctrl->dev, "no devices registered: of:%d " + "platform:%d\n", + ret_of, ret_platform); + ret = -ENODEV; goto out_dev_del; + } dev_dbg(&ctrl->dev, "serdev%d registered: dev:%p\n", ctrl->nr, &ctrl->dev); From 1f70bbcd4b08aae903f06dd0ab30804d8f9c2e97 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 11 Dec 2019 16:44:45 -0800 Subject: [PATCH 1714/3715] ANDROID: gnss: Add command line test driver This driver enables the "takeover" of a serdev bus device by the GNSS subsystem. It can be used to test the GNSS subsystem without needing to write a specific hardware backend. The new module supports the following parameters: gnss_cmdline.serdev=driver/port/serdev gnss_cmdline.type= An example which allows GNSS to wrap a platform serial8250 port and advertise NMEA-0183 data is: gnss_cmdline.serdev=serial8250/serial0/serial0-0 ^ ^ ^ driver port serdev gnss_cmdline.type=0 ^ GNSS_TYPE_NMEA Bug: 146517987 Change-Id: I421386ee4f2ba8f1f0832d9c56a067a600892d3c Signed-off-by: Alistair Delva --- drivers/gnss/Kconfig | 15 +++++ drivers/gnss/Makefile | 3 + drivers/gnss/cmdline.c | 139 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 drivers/gnss/cmdline.c diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig index f8ee54f99a8d..db79453a975d 100644 --- a/drivers/gnss/Kconfig +++ b/drivers/gnss/Kconfig @@ -15,4 +15,19 @@ if GNSS config GNSS_SERIAL tristate +config GNSS_CMDLINE_SERIAL + tristate "Command line test driver for GNSS" + depends on SERIAL_DEV_BUS + select GNSS_SERIAL + ---help--- + Say Y here if you want to test the GNSS subsystem but do not have a + way to communicate a binding through firmware such as DT or ACPI. + The correct serdev device and protocol type must be specified on + the module command line. + + To compile this driver as a module, choose M here: the module will + be called gnss-cmdline. + + If unsure, say N. + endif # GNSS diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile index 171aba71684d..f637e90527d6 100644 --- a/drivers/gnss/Makefile +++ b/drivers/gnss/Makefile @@ -8,3 +8,6 @@ gnss-y := core.o obj-$(CONFIG_GNSS_SERIAL) += gnss-serial.o gnss-serial-y := serial.o + +obj-$(CONFIG_GNSS_CMDLINE_SERIAL) += gnss-cmdline.o +gnss-cmdline-y := cmdline.o diff --git a/drivers/gnss/cmdline.c b/drivers/gnss/cmdline.c new file mode 100644 index 000000000000..3e1d24636b8c --- /dev/null +++ b/drivers/gnss/cmdline.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test driver for GNSS. This driver requires the serdev binding and protocol + * type to be specified on the module command line. + * + * Copyright 2019 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "serial.h" + +#define GNSS_CMDLINE_MODULE_NAME "gnss-cmdline" + +#define gnss_cmdline_err(...) \ + pr_err(GNSS_CMDLINE_MODULE_NAME ": " __VA_ARGS__) + +static char *serdev; +module_param(serdev, charp, 0644); +MODULE_PARM_DESC(serdev, "serial device to wrap"); + +static int type; +module_param(type, int, 0644); +MODULE_PARM_DESC(serdev, "GNSS protocol type (see 'enum gnss_type')"); + +static struct serdev_device *serdev_device; + +static int name_match(struct device *dev, void *data) +{ + return strstr(dev_name(dev), data) != NULL; +} + +static int __init gnss_cmdline_init(void) +{ + struct device *serial_dev, *port_dev, *serdev_dev; + char *driver_name, *port_name, *serdev_name; + char *serdev_dup, *serdev_dup_sep; + struct gnss_serial *gserial; + int err = -ENODEV; + + /* User did not set the serdev module parameter */ + if (!serdev) + return 0; + + if (type < 0 || type >= GNSS_TYPE_COUNT) { + gnss_cmdline_err("invalid gnss type '%d'\n", type); + return -EINVAL; + } + + serdev_dup = serdev_dup_sep = kstrdup(serdev, GFP_KERNEL); + if (!serdev_dup) + return -ENOMEM; + + driver_name = strsep(&serdev_dup_sep, "/"); + if (!driver_name) { + gnss_cmdline_err("driver name missing\n"); + goto err_free_serdev_dup; + } + + port_name = strsep(&serdev_dup_sep, "/"); + if (!port_name) { + gnss_cmdline_err("port name missing\n"); + goto err_free_serdev_dup; + } + + serdev_name = strsep(&serdev_dup_sep, "/"); + if (!serdev_name) { + gnss_cmdline_err("serdev name missing\n"); + goto err_free_serdev_dup; + } + + /* Find the driver device instance (e.g. serial8250) */ + serial_dev = bus_find_device_by_name(&platform_bus_type, + NULL, driver_name); + if (!serial_dev) { + gnss_cmdline_err("no device '%s'\n", driver_name); + goto err_free_serdev_dup; + } + + /* Find the port device instance (e.g. serial0) */ + port_dev = device_find_child(serial_dev, port_name, name_match); + if (!port_dev) { + gnss_cmdline_err("no port '%s'\n", port_name); + goto err_free_serdev_dup; + } + + /* Find the serdev device instance (e.g. serial0-0) */ + serdev_dev = device_find_child(port_dev, serdev_name, name_match); + if (!serdev_dev) { + gnss_cmdline_err("no serdev '%s'\n", serdev_name); + goto err_free_serdev_dup; + } + + gserial = gnss_serial_allocate(to_serdev_device(serdev_dev), 0); + if (IS_ERR(gserial)) { + err = PTR_ERR(gserial); + goto err_free_serdev_dup; + } + + gserial->gdev->type = type; + + err = gnss_serial_register(gserial); + if (err) { + gnss_serial_free(gserial); + goto err_free_serdev_dup; + } + + serdev_device = to_serdev_device(serdev_dev); + err = 0; +err_free_serdev_dup: + kfree(serdev_dup); + return err; +} + +static void __exit gnss_cmdline_exit(void) +{ + struct gnss_serial *gserial; + + if (!serdev_device) + return; + + gserial = serdev_device_get_drvdata(serdev_device); + + gnss_serial_deregister(gserial); + gnss_serial_free(gserial); +} + +module_init(gnss_cmdline_init); +module_exit(gnss_cmdline_exit); + +MODULE_AUTHOR("Alistair Delva "); +MODULE_DESCRIPTION("GNSS command line driver"); +MODULE_LICENSE("GPL v2"); From f0d14da4400ff9d8c9fe600c061b87697c663492 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 18 Dec 2019 15:27:33 -0800 Subject: [PATCH 1715/3715] ANDROID: cuttlefish_defconfig: Enable CONFIG_GNSS_CMDLINE_SERIAL Enables a GNSS serial driver for cuttlefish. Bug: 146517987 Change-Id: Ife42306a3052e49365e3f0004f220424f0be7782 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c71406e4c373..3346572df45f 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -208,6 +208,7 @@ CONFIG_RFKILL=y # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_DEBUG_DEVRES=y CONFIG_GNSS=y +CONFIG_GNSS_CMDLINE_SERIAL=m CONFIG_OF_UNITTEST=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 1713f383da61..38e8765c808c 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -216,6 +216,7 @@ CONFIG_RFKILL=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_GNSS=y +CONFIG_GNSS_CMDLINE_SERIAL=m CONFIG_OF=y CONFIG_OF_UNITTEST=y # CONFIG_PNP_DEBUG_MESSAGES is not set From 8edc9ddadb63bbfe9653b7f0b387acffc63ffb0c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Dec 2019 16:48:06 +0200 Subject: [PATCH 1716/3715] net: bridge: deny dev_set_mac_address() when unregistering [ Upstream commit c4b4c421857dc7b1cf0dccbd738472360ff2cd70 ] We have an interesting memory leak in the bridge when it is being unregistered and is a slave to a master device which would change the mac of its slaves on unregister (e.g. bond, team). This is a very unusual setup but we do end up leaking 1 fdb entry because dev_set_mac_address() would cause the bridge to insert the new mac address into its table after all fdbs are flushed, i.e. after dellink() on the bridge has finished and we call NETDEV_UNREGISTER the bond/team would release it and will call dev_set_mac_address() to restore its original address and that in turn will add an fdb in the bridge. One fix is to check for the bridge dev's reg_state in its ndo_set_mac_address callback and return an error if the bridge is not in NETREG_REGISTERED. Easy steps to reproduce: 1. add bond in mode != A/B 2. add any slave to the bond 3. add bridge dev as a slave to the bond 4. destroy the bridge device Trace: unreferenced object 0xffff888035c4d080 (size 128): comm "ip", pid 4068, jiffies 4296209429 (age 1413.753s) hex dump (first 32 bytes): 41 1d c9 36 80 88 ff ff 00 00 00 00 00 00 00 00 A..6............ d2 19 c9 5e 3f d7 00 00 00 00 00 00 00 00 00 00 ...^?........... backtrace: [<00000000ddb525dc>] kmem_cache_alloc+0x155/0x26f [<00000000633ff1e0>] fdb_create+0x21/0x486 [bridge] [<0000000092b17e9c>] fdb_insert+0x91/0xdc [bridge] [<00000000f2a0f0ff>] br_fdb_change_mac_address+0xb3/0x175 [bridge] [<000000001de02dbd>] br_stp_change_bridge_id+0xf/0xff [bridge] [<00000000ac0e32b1>] br_set_mac_address+0x76/0x99 [bridge] [<000000006846a77f>] dev_set_mac_address+0x63/0x9b [<00000000d30738fc>] __bond_release_one+0x3f6/0x455 [bonding] [<00000000fc7ec01d>] bond_netdev_event+0x2f2/0x400 [bonding] [<00000000305d7795>] notifier_call_chain+0x38/0x56 [<0000000028885d4a>] call_netdevice_notifiers+0x1e/0x23 [<000000008279477b>] rollback_registered_many+0x353/0x6a4 [<0000000018ef753a>] unregister_netdevice_many+0x17/0x6f [<00000000ba854b7a>] rtnl_delete_link+0x3c/0x43 [<00000000adf8618d>] rtnl_dellink+0x1dc/0x20a [<000000009b6395fd>] rtnetlink_rcv_msg+0x23d/0x268 Fixes: 43598813386f ("bridge: add local MAC address to forwarding table (v2)") Reported-by: syzbot+2add91c08eb181fea1bf@syzkaller.appspotmail.com Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f6b6a92f1c48..b7cc322acdc8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -217,6 +217,12 @@ static int br_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + /* dev_set_mac_addr() can be called by a master device on bridge's + * NETDEV_UNREGISTER, but since it's being destroyed do nothing + */ + if (dev->reg_state != NETREG_REGISTERED) + return -EBUSY; + spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { /* Mac address will be changed in br_stp_change_bridge_id(). */ From dd93daea8132b9c95e35f211a008e9f3ad2e4b87 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 5 Dec 2019 13:02:35 +0300 Subject: [PATCH 1717/3715] net: dsa: fix flow dissection on Tx path [ Upstream commit 8bef0af09a5415df761b04fa487a6c34acae74bc ] Commit 43e665287f93 ("net-next: dsa: fix flow dissection") added an ability to override protocol and network offset during flow dissection for DSA-enabled devices (i.e. controllers shipped as switch CPU ports) in order to fix skb hashing for RPS on Rx path. However, skb_hash() and added part of code can be invoked not only on Rx, but also on Tx path if we have a multi-queued device and: - kernel is running on UP system or - XPS is not configured. The call stack in this two cases will be like: dev_queue_xmit() -> __dev_queue_xmit() -> netdev_core_pick_tx() -> netdev_pick_tx() -> skb_tx_hash() -> skb_get_hash(). The problem is that skbs queued for Tx have both network offset and correct protocol already set up even after inserting a CPU tag by DSA tagger, so calling tag_ops->flow_dissect() on this path actually only breaks flow dissection and hashing. This can be observed by adding debug prints just before and right after tag_ops->flow_dissect() call to the related block of code: Before the patch: Rx path (RPS): [ 19.240001] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 19.244271] tag_ops->flow_dissect() [ 19.247811] Rx: proto: 0x0800, nhoff: 8 /* ETH_P_IP */ [ 19.215435] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 19.219746] tag_ops->flow_dissect() [ 19.223241] Rx: proto: 0x0806, nhoff: 8 /* ETH_P_ARP */ [ 18.654057] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 18.658332] tag_ops->flow_dissect() [ 18.661826] Rx: proto: 0x8100, nhoff: 8 /* ETH_P_8021Q */ Tx path (UP system): [ 18.759560] Tx: proto: 0x0800, nhoff: 26 /* ETH_P_IP */ [ 18.763933] tag_ops->flow_dissect() [ 18.767485] Tx: proto: 0x920b, nhoff: 34 /* junk */ [ 22.800020] Tx: proto: 0x0806, nhoff: 26 /* ETH_P_ARP */ [ 22.804392] tag_ops->flow_dissect() [ 22.807921] Tx: proto: 0x920b, nhoff: 34 /* junk */ [ 16.898342] Tx: proto: 0x86dd, nhoff: 26 /* ETH_P_IPV6 */ [ 16.902705] tag_ops->flow_dissect() [ 16.906227] Tx: proto: 0x920b, nhoff: 34 /* junk */ After: Rx path (RPS): [ 16.520993] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 16.525260] tag_ops->flow_dissect() [ 16.528808] Rx: proto: 0x0800, nhoff: 8 /* ETH_P_IP */ [ 15.484807] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 15.490417] tag_ops->flow_dissect() [ 15.495223] Rx: proto: 0x0806, nhoff: 8 /* ETH_P_ARP */ [ 17.134621] Rx: proto: 0x00f8, nhoff: 0 /* ETH_P_XDSA */ [ 17.138895] tag_ops->flow_dissect() [ 17.142388] Rx: proto: 0x8100, nhoff: 8 /* ETH_P_8021Q */ Tx path (UP system): [ 15.499558] Tx: proto: 0x0800, nhoff: 26 /* ETH_P_IP */ [ 20.664689] Tx: proto: 0x0806, nhoff: 26 /* ETH_P_ARP */ [ 18.565782] Tx: proto: 0x86dd, nhoff: 26 /* ETH_P_IPV6 */ In order to fix that we can add the check 'proto == htons(ETH_P_XDSA)' to prevent code from calling tag_ops->flow_dissect() on Tx. I also decided to initialize 'offset' variable so tagger callbacks can now safely leave it untouched without provoking a chaos. Fixes: 43e665287f93 ("net-next: dsa: fix flow dissection") Signed-off-by: Alexander Lobakin Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7be5c20a93a5..071de3013364 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -450,9 +450,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) - if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { + if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && + proto == htons(ETH_P_XDSA))) { const struct dsa_device_ops *ops; - int offset; + int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; if (ops->flow_dissect && From 84d9373c385e882fc018fac60b4495712c3e5556 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 6 Dec 2019 14:28:20 +0200 Subject: [PATCH 1718/3715] net: ethernet: ti: cpsw: fix extra rx interrupt [ Upstream commit 51302f77bedab8768b761ed1899c08f89af9e4e2 ] Now RX interrupt is triggered twice every time, because in cpsw_rx_interrupt() it is asked first and then disabled. So there will be pending interrupt always, when RX interrupt is enabled again in NAPI handler. Fix it by first disabling IRQ and then do ask. Fixes: 870915feabdc ("drivers: net: cpsw: remove disable_irq/enable_irq as irq can be masked from cpsw itself") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index a44838aac97d..3189afcd5888 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -862,8 +862,8 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { struct cpsw_common *cpsw = dev_id; - cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); writel(0, &cpsw->wr_regs->rx_en); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); if (cpsw->quirk_irq) { disable_irq_nosync(cpsw->irqs_table[0]); From 0f68a211193afdf1f770ac203ff5b31fa83d1ac5 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 5 Dec 2019 10:41:16 +0100 Subject: [PATCH 1719/3715] net: thunderx: start phy before starting autonegotiation [ Upstream commit a350d2e7adbb57181d33e3aa6f0565632747feaa ] Since commit 2b3e88ea6528 ("net: phy: improve phy state checking") phy_start_aneg() expects phy state to be >= PHY_UP. Call phy_start() before calling phy_start_aneg() during probe so that autonegotiation is initiated. As phy_start() takes care of calling phy_start_aneg(), drop the explicit call to phy_start_aneg(). Network fails without this patch on Octeon TX. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Signed-off-by: Mian Yousaf Kaukab Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 5e5c4d7796b8..586e35593310 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -915,7 +915,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) phy_interface_mode(lmac->lmac_type))) return -ENODEV; - phy_start_aneg(lmac->phydev); + phy_start(lmac->phydev); return 0; } From 22f73f807e3dce32313ae7c87d81c95d4d8e7b66 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 3 Dec 2019 16:34:13 -0500 Subject: [PATCH 1720/3715] openvswitch: support asymmetric conntrack [ Upstream commit 5d50aa83e2c8e91ced2cca77c198b468ca9210f4 ] The openvswitch module shares a common conntrack and NAT infrastructure exposed via netfilter. It's possible that a packet needs both SNAT and DNAT manipulation, due to e.g. tuple collision. Netfilter can support this because it runs through the NAT table twice - once on ingress and again after egress. The openvswitch module doesn't have such capability. Like netfilter hook infrastructure, we should run through NAT twice to keep the symmetry. Fixes: 05752523e565 ("openvswitch: Interface with NAT.") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/conntrack.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 48d81857961c..737e37b28d93 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -879,6 +879,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + if (err == NF_ACCEPT && + ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } + /* Mark NAT done if successful and update the flow key. */ if (err == NF_ACCEPT) ovs_nat_update_key(key, skb, maniptype); From 3485dc1441b8037945a29217ab745f456f96450e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2019 10:10:15 -0800 Subject: [PATCH 1721/3715] tcp: md5: fix potential overestimation of TCP option space [ Upstream commit 9424e2e7ad93ffffa88f882c9bc5023570904b55 ] Back in 2008, Adam Langley fixed the corner case of packets for flows having all of the following options : MD5 TS SACK Since MD5 needs 20 bytes, and TS needs 12 bytes, no sack block can be cooked from the remaining 8 bytes. tcp_established_options() correctly sets opts->num_sack_blocks to zero, but returns 36 instead of 32. This means TCP cooks packets with 4 extra bytes at the end of options, containing unitialized bytes. Fixes: 33ad798c924b ("tcp: options clean up") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6025cc509d97..e3b28140c10b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -708,8 +708,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb min_t(unsigned int, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); - size += TCPOLEN_SACK_BASE_ALIGNED + - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + if (likely(opts->num_sack_blocks)) + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } return size; From 9fe54cf41836fe761cbc5d154dda0d45d98d784c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 6 Dec 2019 05:25:48 +0000 Subject: [PATCH 1722/3715] tipc: fix ordering of tipc module init and exit routine [ Upstream commit 9cf1cd8ee3ee09ef2859017df2058e2f53c5347f ] In order to set/get/dump, the tipc uses the generic netlink infrastructure. So, when tipc module is inserted, init function calls genl_register_family(). After genl_register_family(), set/get/dump commands are immediately allowed and these callbacks internally use the net_generic. net_generic is allocated by register_pernet_device() but this is called after genl_register_family() in the __init function. So, these callbacks would use un-initialized net_generic. Test commands: #SHELL1 while : do modprobe tipc modprobe -rv tipc done #SHELL2 while : do tipc link list done Splat looks like: [ 59.616322][ T2788] kasan: CONFIG_KASAN_INLINE enabled [ 59.617234][ T2788] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 59.618398][ T2788] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 59.619389][ T2788] CPU: 3 PID: 2788 Comm: tipc Not tainted 5.4.0+ #194 [ 59.620231][ T2788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 59.621428][ T2788] RIP: 0010:tipc_bcast_get_broadcast_mode+0x131/0x310 [tipc] [ 59.622379][ T2788] Code: c7 c6 ef 8b 38 c0 65 ff 0d 84 83 c9 3f e8 d7 a5 f2 e3 48 8d bb 38 11 00 00 48 b8 00 00 00 00 [ 59.622550][ T2780] NET: Registered protocol family 30 [ 59.624627][ T2788] RSP: 0018:ffff88804b09f578 EFLAGS: 00010202 [ 59.624630][ T2788] RAX: dffffc0000000000 RBX: 0000000000000011 RCX: 000000008bc66907 [ 59.624631][ T2788] RDX: 0000000000000229 RSI: 000000004b3cf4cc RDI: 0000000000001149 [ 59.624633][ T2788] RBP: ffff88804b09f588 R08: 0000000000000003 R09: fffffbfff4fb3df1 [ 59.624635][ T2788] R10: fffffbfff50318f8 R11: ffff888066cadc18 R12: ffffffffa6cc2f40 [ 59.624637][ T2788] R13: 1ffff11009613eba R14: ffff8880662e9328 R15: ffff8880662e9328 [ 59.624639][ T2788] FS: 00007f57d8f7b740(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000 [ 59.624645][ T2788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 59.625875][ T2780] tipc: Started in single node mode [ 59.626128][ T2788] CR2: 00007f57d887a8c0 CR3: 000000004b140002 CR4: 00000000000606e0 [ 59.633991][ T2788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 59.635195][ T2788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 59.636478][ T2788] Call Trace: [ 59.637025][ T2788] tipc_nl_add_bc_link+0x179/0x1470 [tipc] [ 59.638219][ T2788] ? lock_downgrade+0x6e0/0x6e0 [ 59.638923][ T2788] ? __tipc_nl_add_link+0xf90/0xf90 [tipc] [ 59.639533][ T2788] ? tipc_nl_node_dump_link+0x318/0xa50 [tipc] [ 59.640160][ T2788] ? mutex_lock_io_nested+0x1380/0x1380 [ 59.640746][ T2788] tipc_nl_node_dump_link+0x4fd/0xa50 [tipc] [ 59.641356][ T2788] ? tipc_nl_node_reset_link_stats+0x340/0x340 [tipc] [ 59.642088][ T2788] ? __skb_ext_del+0x270/0x270 [ 59.642594][ T2788] genl_lock_dumpit+0x85/0xb0 [ 59.643050][ T2788] netlink_dump+0x49c/0xed0 [ 59.643529][ T2788] ? __netlink_sendskb+0xc0/0xc0 [ 59.644044][ T2788] ? __netlink_dump_start+0x190/0x800 [ 59.644617][ T2788] ? __mutex_unlock_slowpath+0xd0/0x670 [ 59.645177][ T2788] __netlink_dump_start+0x5a0/0x800 [ 59.645692][ T2788] genl_rcv_msg+0xa75/0xe90 [ 59.646144][ T2788] ? __lock_acquire+0xdfe/0x3de0 [ 59.646692][ T2788] ? genl_family_rcv_msg_attrs_parse+0x320/0x320 [ 59.647340][ T2788] ? genl_lock_dumpit+0xb0/0xb0 [ 59.647821][ T2788] ? genl_unlock+0x20/0x20 [ 59.648290][ T2788] ? genl_parallel_done+0xe0/0xe0 [ 59.648787][ T2788] ? find_held_lock+0x39/0x1d0 [ 59.649276][ T2788] ? genl_rcv+0x15/0x40 [ 59.649722][ T2788] ? lock_contended+0xcd0/0xcd0 [ 59.650296][ T2788] netlink_rcv_skb+0x121/0x350 [ 59.650828][ T2788] ? genl_family_rcv_msg_attrs_parse+0x320/0x320 [ 59.651491][ T2788] ? netlink_ack+0x940/0x940 [ 59.651953][ T2788] ? lock_acquire+0x164/0x3b0 [ 59.652449][ T2788] genl_rcv+0x24/0x40 [ 59.652841][ T2788] netlink_unicast+0x421/0x600 [ ... ] Fixes: 7e4369057806 ("tipc: fix a slab object leak") Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace") Signed-off-by: Taehee Yoo Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/core.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 67ac10434ba2..35f162ece2b7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -116,14 +116,6 @@ static int __init tipc_init(void) sysctl_tipc_rmem[1] = RCVBUF_DEF; sysctl_tipc_rmem[2] = RCVBUF_MAX; - err = tipc_netlink_start(); - if (err) - goto out_netlink; - - err = tipc_netlink_compat_start(); - if (err) - goto out_netlink_compat; - err = tipc_register_sysctl(); if (err) goto out_sysctl; @@ -144,8 +136,21 @@ static int __init tipc_init(void) if (err) goto out_bearer; + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + pr_info("Started in single node mode\n"); return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); out_bearer: unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: @@ -155,22 +160,18 @@ out_socket: out_pernet: tipc_unregister_sysctl(); out_sysctl: - tipc_netlink_compat_stop(); -out_netlink_compat: - tipc_netlink_stop(); -out_netlink: pr_err("Unable to start in single node mode\n"); return err; } static void __exit tipc_exit(void) { + tipc_netlink_compat_stop(); + tipc_netlink_stop(); tipc_bearer_cleanup(); unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); unregister_pernet_device(&tipc_net_ops); - tipc_netlink_stop(); - tipc_netlink_compat_stop(); tipc_unregister_sysctl(); pr_info("Deactivated\n"); From 9a8f9033dde9094f069af6d1b4e0c753b40b5dc4 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Dec 2019 12:38:36 +0100 Subject: [PATCH 1723/3715] tcp: fix rejected syncookies due to stale timestamps [ Upstream commit 04d26e7b159a396372646a480f4caa166d1b6720 ] If no synflood happens for a long enough period of time, then the synflood timestamp isn't refreshed and jiffies can advance so much that time_after32() can't accurately compare them any more. Therefore, we can end up in a situation where time_after32(now, last_overflow + HZ) returns false, just because these two values are too far apart. In that case, the synflood timestamp isn't updated as it should be, which can trick tcp_synq_no_recent_overflow() into rejecting valid syncookies. For example, let's consider the following scenario on a system with HZ=1000: * The synflood timestamp is 0, either because that's the timestamp of the last synflood or, more commonly, because we're working with a freshly created socket. * We receive a new SYN, which triggers synflood protection. Let's say that this happens when jiffies == 2147484649 (that is, 'synflood timestamp' + HZ + 2^31 + 1). * Then tcp_synq_overflow() doesn't update the synflood timestamp, because time_after32(2147484649, 1000) returns false. With: - 2147484649: the value of jiffies, aka. 'now'. - 1000: the value of 'last_overflow' + HZ. * A bit later, we receive the ACK completing the 3WHS. But cookie_v[46]_check() rejects it because tcp_synq_no_recent_overflow() says that we're not under synflood. That's because time_after32(2147484649, 120000) returns false. With: - 2147484649: the value of jiffies, aka. 'now'. - 120000: the value of 'last_overflow' + TCP_SYNCOOKIE_VALID. Of course, in reality jiffies would have increased a bit, but this condition will last for the next 119 seconds, which is far enough to accommodate for jiffie's growth. Fix this by updating the overflow timestamp whenever jiffies isn't within the [last_overflow, last_overflow + HZ] range. That shouldn't have any performance impact since the update still happens at most once per second. Now we're guaranteed to have fresh timestamps while under synflood, so tcp_synq_no_recent_overflow() can safely use it with time_after32() in such situations. Stale timestamps can still make tcp_synq_no_recent_overflow() return the wrong verdict when not under synflood. This will be handled in the next patch. For 64 bits architectures, the problem was introduced with the conversion of ->tw_ts_recent_stamp to 32 bits integer by commit cca9bab1b72c ("tcp: use monotonic timestamps for PAWS"). The problem has always been there on 32 bits architectures. Fixes: cca9bab1b72c ("tcp: use monotonic timestamps for PAWS") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/time.h | 13 +++++++++++++ include/net/tcp.h | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/time.h b/include/linux/time.h index 87c36cf1cec2..21086c5143d9 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -301,4 +301,17 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) */ #define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) #define time_before32(b, a) time_after32(a, b) + +/** + * time_between32 - check if a 32-bit timestamp is within a given time range + * @t: the time which may be within [l,h] + * @l: the lower bound of the range + * @h: the higher bound of the range + * + * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are + * treated as 32-bit integers. + * + * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). + */ +#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 785c4ef4e1bf..2424026a6530 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -503,7 +503,7 @@ static inline void tcp_synq_overflow(const struct sock *sk) unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; unsigned long now = jiffies; - if (time_after(now, last_overflow + HZ)) + if (!time_between32(now, last_overflow, last_overflow + HZ)) tcp_sk(sk)->rx_opt.ts_recent_stamp = now; } From 12f1107bd7fdb15f144575ce391e7571db8098bf Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Dec 2019 12:38:43 +0100 Subject: [PATCH 1724/3715] tcp: tighten acceptance of ACKs not matching a child socket [ Upstream commit cb44a08f8647fd2e8db5cc9ac27cd8355fa392d8 ] When no synflood occurs, the synflood timestamp isn't updated. Therefore it can be so old that time_after32() can consider it to be in the future. That's a problem for tcp_synq_no_recent_overflow() as it may report that a recent overflow occurred while, in fact, it's just that jiffies has grown past 'last_overflow' + TCP_SYNCOOKIE_VALID + 2^31. Spurious detection of recent overflows lead to extra syncookie verification in cookie_v[46]_check(). At that point, the verification should fail and the packet dropped. But we should have dropped the packet earlier as we didn't even send a syncookie. Let's refine tcp_synq_no_recent_overflow() to report a recent overflow only if jiffies is within the [last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval. This way, no spurious recent overflow is reported when jiffies wraps and 'last_overflow' becomes in the future from the point of view of time_after32(). However, if jiffies wraps and enters the [last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval (with 'last_overflow' being a stale synflood timestamp), then tcp_synq_no_recent_overflow() still erroneously reports an overflow. In such cases, we have to rely on syncookie verification to drop the packet. We unfortunately have no way to differentiate between a fresh and a stale syncookie timestamp. In practice, using last_overflow as lower bound is problematic. If the synflood timestamp is concurrently updated between the time we read jiffies and the moment we store the timestamp in 'last_overflow', then 'now' becomes smaller than 'last_overflow' and tcp_synq_no_recent_overflow() returns true, potentially dropping a valid syncookie. Reading jiffies after loading the timestamp could fix the problem, but that'd require a memory barrier. Let's just accommodate for potential timestamp growth instead and extend the interval using 'last_overflow - HZ' as lower bound. Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 2424026a6530..2c47947eac07 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -512,7 +512,15 @@ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); + /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, + * then we're under synflood. However, we have to use + * 'last_overflow - HZ' as lower bound. That's because a concurrent + * tcp_synq_overflow() could update .ts_recent_stamp after we read + * jiffies but before we store .ts_recent_stamp into last_overflow, + * which could lead to rejecting a valid syncookie. + */ + return !time_between32(jiffies, last_overflow - HZ, + last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) From b8a30668954f72174bb5cd007be9351bbe31f726 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Dec 2019 12:38:49 +0100 Subject: [PATCH 1725/3715] tcp: Protect accesses to .ts_recent_stamp with {READ,WRITE}_ONCE() [ Upstream commit 721c8dafad26ccfa90ff659ee19755e3377b829d ] Syncookies borrow the ->rx_opt.ts_recent_stamp field to store the timestamp of the last synflood. Protect them with READ_ONCE() and WRITE_ONCE() since reads and writes aren't serialised. Use of .rx_opt.ts_recent_stamp for storing the synflood timestamp was introduced by a0f82f64e269 ("syncookies: remove last_synq_overflow from struct tcp_sock"). But unprotected accesses were already there when timestamp was stored in .last_synq_overflow. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 2c47947eac07..00d10f0e1194 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -500,17 +500,17 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); */ static inline void tcp_synq_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); unsigned long now = jiffies; if (!time_between32(now, last_overflow, last_overflow + HZ)) - tcp_sk(sk)->rx_opt.ts_recent_stamp = now; + WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, * then we're under synflood. However, we have to use From 7272e8e3bfa354a4f2c829a80180f01dc66d4861 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2019 20:43:46 -0800 Subject: [PATCH 1726/3715] inet: protect against too small mtu values. [ Upstream commit 501a90c945103e8627406763dac418f20f3837b2 ] syzbot was once again able to crash a host by setting a very small mtu on loopback device. Let's make inetdev_valid_mtu() available in include/net/ip.h, and use it in ip_setup_cork(), so that we protect both ip_append_page() and __ip_append_data() Also add a READ_ONCE() when the device mtu is read. Pairs this lockless read with one WRITE_ONCE() in __dev_set_mtu(), even if other code paths might write over this field. Add a big comment in include/linux/netdevice.h about dev->mtu needing READ_ONCE()/WRITE_ONCE() annotations. Hopefully we will add the missing ones in followup patches. [1] refcount_t: saturated; leaking memory. WARNING: CPU: 0 PID: 9464 at lib/refcount.c:22 refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 9464 Comm: syz-executor850 Not tainted 5.4.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 panic+0x2e3/0x75c kernel/panic.c:221 __warn.cold+0x2f/0x3e kernel/panic.c:582 report_bug+0x289/0x300 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:174 [inline] fixup_bug arch/x86/kernel/traps.c:169 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 Code: 06 31 ff 89 de e8 c8 f5 e6 fd 84 db 0f 85 6f ff ff ff e8 7b f4 e6 fd 48 c7 c7 e0 71 4f 88 c6 05 56 a6 a4 06 01 e8 c7 a8 b7 fd <0f> 0b e9 50 ff ff ff e8 5c f4 e6 fd 0f b6 1d 3d a6 a4 06 31 ff 89 RSP: 0018:ffff88809689f550 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff815e4336 RDI: ffffed1012d13e9c RBP: ffff88809689f560 R08: ffff88809c50a3c0 R09: fffffbfff15d31b1 R10: fffffbfff15d31b0 R11: ffffffff8ae98d87 R12: 0000000000000001 R13: 0000000000040100 R14: ffff888099041104 R15: ffff888218d96e40 refcount_add include/linux/refcount.h:193 [inline] skb_set_owner_w+0x2b6/0x410 net/core/sock.c:1999 sock_wmalloc+0xf1/0x120 net/core/sock.c:2096 ip_append_page+0x7ef/0x1190 net/ipv4/ip_output.c:1383 udp_sendpage+0x1c7/0x480 net/ipv4/udp.c:1276 inet_sendpage+0xdb/0x150 net/ipv4/af_inet.c:821 kernel_sendpage+0x92/0xf0 net/socket.c:3794 sock_sendpage+0x8b/0xc0 net/socket.c:936 pipe_to_sendpage+0x2da/0x3c0 fs/splice.c:458 splice_from_pipe_feed fs/splice.c:512 [inline] __splice_from_pipe+0x3ee/0x7c0 fs/splice.c:636 splice_from_pipe+0x108/0x170 fs/splice.c:671 generic_splice_sendpage+0x3c/0x50 fs/splice.c:842 do_splice_from fs/splice.c:861 [inline] direct_splice_actor+0x123/0x190 fs/splice.c:1035 splice_direct_to_actor+0x3b4/0xa30 fs/splice.c:990 do_splice_direct+0x1da/0x2a0 fs/splice.c:1078 do_sendfile+0x597/0xd00 fs/read_write.c:1464 __do_sys_sendfile64 fs/read_write.c:1525 [inline] __se_sys_sendfile64 fs/read_write.c:1511 [inline] __x64_sys_sendfile64+0x1dd/0x220 fs/read_write.c:1511 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x441409 Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffb64c4f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441409 RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000005 RBP: 0000000000073b8a R08: 0000000000000010 R09: 0000000000000010 R10: 0000000000010001 R11: 0000000000000246 R12: 0000000000402180 R13: 0000000000402210 R14: 0000000000000000 R15: 0000000000000000 Kernel Offset: disabled Rebooting in 86400 seconds.. Fixes: 1470ddf7f8ce ("inet: Remove explicit write references to sk/inet in ip_append_data") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 5 +++++ include/net/ip.h | 5 +++++ net/core/dev.c | 3 ++- net/ipv4/devinet.c | 5 ----- net/ipv4/ip_output.c | 14 +++++++++----- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4725a9d9597f..8818291815bc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1718,6 +1718,11 @@ struct net_device { unsigned char if_port; unsigned char dma; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ unsigned int mtu; unsigned int min_mtu; unsigned int max_mtu; diff --git a/include/net/ip.h b/include/net/ip.h index b8ebee43941f..666d89ca4e2e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -644,4 +644,9 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif +static inline bool inetdev_valid_mtu(unsigned int mtu) +{ + return likely(mtu >= IPV4_MIN_MTU); +} + #endif /* _IP_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 3ce68484ed5a..f9f05b3df460 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6876,7 +6876,8 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) if (ops->ndo_change_mtu) return ops->ndo_change_mtu(dev, new_mtu); - dev->mtu = new_mtu; + /* Pairs with all the lockless reads of dev->mtu in the stack */ + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(__dev_set_mtu); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5264510c9983..5f020c051af9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1426,11 +1426,6 @@ skip: } } -static bool inetdev_valid_mtu(unsigned int mtu) -{ - return mtu >= IPV4_MIN_MTU; -} - static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34d49f76d1a7..73cd64c7692f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1123,13 +1123,17 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, rt = *rtp; if (unlikely(!rt)) return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; + cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : rt->dst.dev->mtu; + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + cork->dst = &rt->dst; + /* We stole this route, caller should not release it. */ + *rtp = NULL; + cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; From b6cdbf0f7cda1121bd92bdfd8523cfacddbe8097 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Wed, 23 May 2018 17:56:11 +0300 Subject: [PATCH 1727/3715] nvme: host: core: fix precedence of ternary operator commit e9a9853c23c13a37546397b61b270999fd0fb759 upstream. Ternary operator have lower precedence then bitwise or, so 'cdw10' was calculated wrong. Signed-off-by: Ivan Bornyakov Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 768ac752a6e3..f543b9932c83 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1331,7 +1331,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1343,7 +1343,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } From 350a0cba90e1bd40b1fe5b396923d2f3a61d1056 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Dec 2019 15:10:24 +0100 Subject: [PATCH 1728/3715] Revert "regulator: Defer init completion for a while after late_initcall" This reverts commit d7ce17fba6c8e316ca9a554a87edddce6f862435 which is commit 55576cf1853798e86f620766e23b604c9224c19c upstream. It's causing "odd" interactions with older kernels, so it probably isn't a good idea to cause timing changes there. This has been reported to cause oopses on Pixel devices. Reported-by: Siddharth Kapoor Cc: Mark Brown Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 42 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d2428c262b7c..b2cb4f497ef6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4503,7 +4503,7 @@ static int __init regulator_init(void) /* init early to allow our consumers to complete system booting */ core_initcall(regulator_init); -static int regulator_late_cleanup(struct device *dev, void *data) +static int __init regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); const struct regulator_ops *ops = rdev->desc->ops; @@ -4552,8 +4552,17 @@ unlock: return 0; } -static void regulator_init_complete_work_function(struct work_struct *work) +static int __init regulator_init_complete(void) { + /* + * Since DT doesn't provide an idiomatic mechanism for + * enabling full constraints and since it's much more natural + * with DT to provide them just assume that a DT enabled + * system has full constraints. + */ + if (of_have_populated_dt()) + has_full_constraints = true; + /* * Regulators may had failed to resolve their input supplies * when were registered, either because the input supply was @@ -4571,35 +4580,6 @@ static void regulator_init_complete_work_function(struct work_struct *work) */ class_for_each_device(®ulator_class, NULL, NULL, regulator_late_cleanup); -} - -static DECLARE_DELAYED_WORK(regulator_init_complete_work, - regulator_init_complete_work_function); - -static int __init regulator_init_complete(void) -{ - /* - * Since DT doesn't provide an idiomatic mechanism for - * enabling full constraints and since it's much more natural - * with DT to provide them just assume that a DT enabled - * system has full constraints. - */ - if (of_have_populated_dt()) - has_full_constraints = true; - - /* - * We punt completion for an arbitrary amount of time since - * systems like distros will load many drivers from userspace - * so consumers might not always be ready yet, this is - * particularly an issue with laptops where this might bounce - * the display off then on. Ideally we'd get a notification - * from userspace when this happens but we don't so just wait - * a bit and hope we waited long enough. It'd be better if - * we'd only do this on systems that need it, and a kernel - * command line option might be useful. - */ - schedule_delayed_work(®ulator_init_complete_work, - msecs_to_jiffies(30000)); return 0; } From 6ab2e14ba709957cf43ec9fa4b36dd4dfbbc65b2 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Aug 2019 01:06:55 +0000 Subject: [PATCH 1729/3715] PCI/PM: Always return devices to D0 when thawing commit f2c33ccacb2d4bbeae2a255a7ca0cbfd03017b7c upstream. pci_pm_thaw_noirq() is supposed to return the device to D0 and restore its configuration registers, but previously it only did that for devices whose drivers implemented the new power management ops. Hibernation, e.g., via "echo disk > /sys/power/state", involves freezing devices, creating a hibernation image, thawing devices, writing the image, and powering off. The fact that thawing did not return devices with legacy power management to D0 caused errors, e.g., in this path: pci_pm_thaw_noirq if (pci_has_legacy_pm_support(pci_dev)) # true for Mellanox VF driver return pci_legacy_resume_early(dev) # ... legacy PM skips the rest pci_set_power_state(pci_dev, PCI_D0) pci_restore_state(pci_dev) pci_pm_thaw if (pci_has_legacy_pm_support(pci_dev)) pci_legacy_resume drv->resume mlx4_resume ... pci_enable_msix_range ... if (dev->current_state != PCI_D0) # <--- return -EINVAL; which caused these warnings: mlx4_core a6d1:00:02.0: INTx is not supported in multi-function mode, aborting PM: dpm_run_callback(): pci_pm_thaw+0x0/0xd7 returns -95 PM: Device a6d1:00:02.0 failed to thaw: error -95 Return devices to D0 and restore config registers for all devices, not just those whose drivers support new power management. [bhelgaas: also call pci_restore_state() before pci_legacy_resume_early(), update comment, add stable tag, commit log] Link: https://lore.kernel.org/r/KU1P153MB016637CAEAD346F0AA8E3801BFAD0@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM Signed-off-by: Dexuan Cui Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 7e4bec75fcde..522e59274b5d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -967,17 +967,22 @@ static int pci_pm_thaw_noirq(struct device *dev) return error; } - if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume_early(dev); - /* - * pci_restore_state() requires the device to be in D0 (because of MSI - * restoration among other things), so force it into D0 in case the - * driver's "freeze" callbacks put it into a low-power state directly. + * Both the legacy ->resume_early() and the new pm->thaw_noirq() + * callbacks assume the device has been returned to D0 and its + * config state has been restored. + * + * In addition, pci_restore_state() restores MSI-X state in MMIO + * space, which requires the device to be in D0, so return it to D0 + * in case the driver's "freeze" callbacks put it into a low-power + * state. */ pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_resume_early(dev); + if (drv && drv->pm && drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); From 4ac3b0f14a70c16c734998dffca6ce260cfa51e9 Mon Sep 17 00:00:00 2001 From: Steffen Liebergeld Date: Wed, 18 Sep 2019 15:16:52 +0200 Subject: [PATCH 1730/3715] PCI: Fix Intel ACS quirk UPDCR register address commit d8558ac8c93d429d65d7490b512a3a67e559d0d4 upstream. According to documentation [0] the correct offset for the Upstream Peer Decode Configuration Register (UPDCR) is 0x1014. It was previously defined as 0x1114. d99321b63b1f ("PCI: Enable quirks for PCIe ACS on Intel PCH root ports") intended to enforce isolation between PCI devices allowing them to be put into separate IOMMU groups. Due to the wrong register offset the intended isolation was not fully enforced. This is fixed with this patch. Please note that I did not test this patch because I have no hardware that implements this register. [0] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/4th-gen-core-family-mobile-i-o-datasheet.pdf (page 325) Fixes: d99321b63b1f ("PCI: Enable quirks for PCIe ACS on Intel PCH root ports") Link: https://lore.kernel.org/r/7a3505df-79ba-8a28-464c-88b83eefffa6@kernkonzept.com Signed-off-by: Steffen Liebergeld Signed-off-by: Bjorn Helgaas Reviewed-by: Andrew Murray Acked-by: Ashok Raj Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 45c3fbd38f50..70c70da4624e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4600,7 +4600,7 @@ int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) #define INTEL_BSPR_REG_BPPD (1 << 9) /* Upstream Peer Decode Configuration Register */ -#define INTEL_UPDCR_REG 0x1114 +#define INTEL_UPDCR_REG 0x1014 /* 5:0 Peer Decode Enable bits */ #define INTEL_UPDCR_REG_MASK 0x3f From fa5da49c4f93c505e12d410cc2a13beb004ff97b Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 8 Oct 2019 11:42:39 +0800 Subject: [PATCH 1731/3715] PCI/MSI: Fix incorrect MSI-X masking on resume commit e045fa29e89383c717e308609edd19d2fd29e1be upstream. When a driver enables MSI-X, msix_program_entries() reads the MSI-X Vector Control register for each vector and saves it in desc->masked. Each register is 32 bits and bit 0 is the actual Mask bit. When we restored these registers during resume, we previously set the Mask bit if *any* bit in desc->masked was set instead of when the Mask bit itself was set: pci_restore_state pci_restore_msi_state __pci_restore_msix_state for_each_pci_msi_entry msix_mask_irq(entry, entry->masked) <-- entire u32 word __pci_msix_desc_mask_irq(desc, flag) mask_bits = desc->masked & ~PCI_MSIX_ENTRY_CTRL_MASKBIT if (flag) <-- testing entire u32, not just bit 0 mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL) This means that after resume, MSI-X vectors were masked when they shouldn't be, which leads to timeouts like this: nvme nvme0: I/O 978 QID 3 timeout, completion polled On resume, set the Mask bit only when the saved Mask bit from suspend was set. This should remove the need for 19ea025e1d28 ("nvme: Add quirk for Kingston NVME SSD running FW E8FK11.T"). [bhelgaas: commit log, move fix to __pci_msix_desc_mask_irq()] Link: https://bugzilla.kernel.org/show_bug.cgi?id=204887 Link: https://lore.kernel.org/r/20191008034238.2503-1-jian-hong@endlessm.com Fixes: f2440d9acbe8 ("PCI MSI: Refactor interrupt masking code") Signed-off-by: Jian-Hong Pan Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index d66ef88e13cf..2a203055b16e 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -211,7 +211,7 @@ u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) return 0; mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; - if (flag) + if (flag & PCI_MSIX_ENTRY_CTRL_MASKBIT) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); From 2da0e66be5f906e0ffc13f8af80d78de9cde5988 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Mon, 11 Nov 2019 02:43:03 +0000 Subject: [PATCH 1732/3715] PCI: Apply Cavium ACS quirk to ThunderX2 and ThunderX3 commit f338bb9f0179cb959977b74e8331b312264d720b upstream. Enhance the ACS quirk for Cavium Processors. Add the root port vendor IDs for ThunderX2 and ThunderX3 series of processors. [bhelgaas: add Fixes: and stable tag] Fixes: f2ddaf8dfd4a ("PCI: Apply Cavium ThunderX ACS quirk to more Root Ports") Link: https://lore.kernel.org/r/20191111024243.GA11408@dc5-eodlnx05.marvell.com Signed-off-by: George Cherian Signed-off-by: Bjorn Helgaas Reviewed-by: Robert Richter Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 70c70da4624e..90df085e9f92 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4252,15 +4252,21 @@ static int pci_quirk_amd_sb_acs(struct pci_dev *dev, u16 acs_flags) static bool pci_quirk_cavium_acs_match(struct pci_dev *dev) { + if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) + return false; + + switch (dev->device) { /* - * Effectively selects all downstream ports for whole ThunderX 1 - * family by 0xf800 mask (which represents 8 SoCs), while the lower - * bits of device ID are used to indicate which subdevice is used - * within the SoC. + * Effectively selects all downstream ports for whole ThunderX1 + * (which represents 8 SoCs). */ - return (pci_is_pcie(dev) && - (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) && - ((dev->device & 0xf800) == 0xa000)); + case 0xa000 ... 0xa7ff: /* ThunderX1 */ + case 0xaf84: /* ThunderX2 */ + case 0xb884: /* ThunderX3 */ + return true; + default: + return false; + } } static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags) From 9d9ee2b7b643c5fa208cc4094cbbe27bee93d5bb Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 13 Nov 2019 13:18:31 -0800 Subject: [PATCH 1733/3715] xtensa: fix TLB sanity checker commit 36de10c4788efc6efe6ff9aa10d38cb7eea4c818 upstream. Virtual and translated addresses retrieved by the xtensa TLB sanity checker must be consistent, i.e. correspond to the same state of the checked TLB entry. KASAN shadow memory is mapped dynamically using auto-refill TLB entries and thus may change TLB state between the virtual and translated address retrieval, resulting in false TLB insanity report. Move read_xtlb_translation close to read_xtlb_virtual to make sure that read values are consistent. Cc: stable@vger.kernel.org Fixes: a99e07ee5e88 ("xtensa: check TLB sanity on return to userspace") Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/mm/tlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 35c822286bbe..3ce5ccdb054d 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -218,6 +218,8 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) unsigned tlbidx = w | (e << PAGE_SHIFT); unsigned r0 = dtlb ? read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx); + unsigned r1 = dtlb ? + read_dtlb_translation(tlbidx) : read_itlb_translation(tlbidx); unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT); unsigned pte = get_pte_for_vaddr(vpn); unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK; @@ -233,8 +235,6 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) } if (tlb_asid == mm_asid) { - unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) : - read_itlb_translation(tlbidx); if ((pte ^ r1) & PAGE_MASK) { pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n", dtlb ? 'D' : 'I', w, e, r0, r1, pte); From cb622fd2aba07c1f8cdb0d67ec80c1cf3759f2d0 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 27 Jun 2018 18:19:57 -0700 Subject: [PATCH 1734/3715] rpmsg: glink: Set tail pointer to 0 at end of FIFO commit 4623e8bf1de0b86e23a56cdb39a72f054e89c3bd upstream. When wrapping around the FIFO, the remote expects the tail pointer to be reset to 0 on the edge case where the tail equals the FIFO length. Fixes: caf989c350e8 ("rpmsg: glink: Introduce glink smem based transport") Cc: stable@vger.kernel.org Signed-off-by: Chris Lew Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_smem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 5cdaa5f8fb61..53b3a43160f4 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -119,7 +119,7 @@ static void glink_smem_rx_advance(struct qcom_glink_pipe *np, tail = le32_to_cpu(*pipe->tail); tail += count; - if (tail > pipe->native.length) + if (tail >= pipe->native.length) tail -= pipe->native.length; *pipe->tail = cpu_to_le32(tail); From e4fbe58ca403f028ce2868d5ce9979443bb61950 Mon Sep 17 00:00:00 2001 From: Arun Kumar Neelakantam Date: Fri, 4 Oct 2019 15:26:57 -0700 Subject: [PATCH 1735/3715] rpmsg: glink: Fix reuse intents memory leak issue commit b85f6b601407347f5425c4c058d1b7871f5bf4f0 upstream. Memory allocated for re-usable intents are not freed during channel cleanup which causes memory leak in system. Check and free all re-usable memory to avoid memory leak. Fixes: 933b45da5d1d ("rpmsg: glink: Add support for TX intents") Cc: stable@vger.kernel.org Acked-By: Chris Lew Tested-by: Srinivas Kandagatla Signed-off-by: Arun Kumar Neelakantam Reported-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index e8e12c2b1d0e..32b8ac9a3fdc 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -243,10 +243,19 @@ static void qcom_glink_channel_release(struct kref *ref) { struct glink_channel *channel = container_of(ref, struct glink_channel, refcount); + struct glink_core_rx_intent *tmp; unsigned long flags; + int iid; spin_lock_irqsave(&channel->intent_lock, flags); + idr_for_each_entry(&channel->liids, tmp, iid) { + kfree(tmp->data); + kfree(tmp); + } idr_destroy(&channel->liids); + + idr_for_each_entry(&channel->riids, tmp, iid) + kfree(tmp); idr_destroy(&channel->riids); spin_unlock_irqrestore(&channel->intent_lock, flags); From ec9bacb6ca32610b89c3ac4b2646271239219920 Mon Sep 17 00:00:00 2001 From: Arun Kumar Neelakantam Date: Fri, 4 Oct 2019 15:26:58 -0700 Subject: [PATCH 1736/3715] rpmsg: glink: Fix use after free in open_ack TIMEOUT case commit ac74ea01860170699fb3b6ea80c0476774c8e94f upstream. Extra channel reference put when remote sending OPEN_ACK after timeout causes use-after-free while handling next remote CLOSE command. Remove extra reference put in timeout case to avoid use-after-free. Fixes: b4f8e52b89f6 ("rpmsg: Introduce Qualcomm RPM glink driver") Cc: stable@vger.kernel.org Tested-by: Srinivas Kandagatla Signed-off-by: Arun Kumar Neelakantam Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 32b8ac9a3fdc..575203dfd676 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1104,13 +1104,12 @@ static int qcom_glink_create_remote(struct qcom_glink *glink, close_link: /* * Send a close request to "undo" our open-ack. The close-ack will - * release the last reference. + * release qcom_glink_send_open_req() reference and the last reference + * will be relesed after receiving remote_close or transport unregister + * by calling qcom_glink_native_remove(). */ qcom_glink_send_close_req(glink, channel); - /* Release qcom_glink_send_open_req() reference */ - kref_put(&channel->refcount, qcom_glink_channel_release); - return ret; } From 907f2c940db052b5a5f371a7be456bac6d0b267c Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Fri, 4 Oct 2019 15:26:59 -0700 Subject: [PATCH 1737/3715] rpmsg: glink: Put an extra reference during cleanup commit b646293e272816dd0719529dcebbd659de0722f7 upstream. In a remote processor crash scenario, there is no guarantee the remote processor sent close requests before it went into a bad state. Remove the reference that is normally handled by the close command in the so channel resources can be released. Fixes: b4f8e52b89f6 ("rpmsg: Introduce Qualcomm RPM glink driver") Cc: stable@vger.kernel.org Tested-by: Srinivas Kandagatla Signed-off-by: Chris Lew Reported-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 575203dfd676..aef24a4c9826 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1613,6 +1613,10 @@ void qcom_glink_native_remove(struct qcom_glink *glink) idr_for_each_entry(&glink->lcids, channel, cid) kref_put(&channel->refcount, qcom_glink_channel_release); + /* Release any defunct local channels, waiting for close-req */ + idr_for_each_entry(&glink->rcids, channel, cid) + kref_put(&channel->refcount, qcom_glink_channel_release); + idr_destroy(&glink->lcids); idr_destroy(&glink->rcids); spin_unlock_irqrestore(&glink->idr_lock, flags); From 90be4d4327fc5560b77693dd63922bcc331fa4d8 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Fri, 4 Oct 2019 15:27:00 -0700 Subject: [PATCH 1738/3715] rpmsg: glink: Fix rpmsg_register_device err handling commit f7e714988edaffe6ac578318e99501149b067ba0 upstream. The device release function is set before registering with rpmsg. If rpmsg registration fails, the framework will call device_put(), which invokes the release function. The channel create logic does not need to free rpdev if rpmsg_register_device() fails and release is called. Fixes: b4f8e52b89f6 ("rpmsg: Introduce Qualcomm RPM glink driver") Cc: stable@vger.kernel.org Tested-by: Srinivas Kandagatla Signed-off-by: Chris Lew Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index aef24a4c9826..306b3cc6ab13 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1400,15 +1400,13 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid, ret = rpmsg_register_device(rpdev); if (ret) - goto free_rpdev; + goto rcid_remove; channel->rpdev = rpdev; } return 0; -free_rpdev: - kfree(rpdev); rcid_remove: spin_lock_irqsave(&glink->idr_lock, flags); idr_remove(&glink->rcids, channel->rcid); From 29166ff96dad3e6b95aa30bcf2aeda6e3e69b791 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 4 Oct 2019 15:27:01 -0700 Subject: [PATCH 1739/3715] rpmsg: glink: Don't send pending rx_done during remove commit c3dadc19b7564c732598b30d637c6f275c3b77b6 upstream. Attempting to transmit rx_done messages after the GLINK instance is being torn down will cause use after free and memory leaks. So cancel the intent_work and free up the pending intents. With this there are no concurrent accessors of the channel left during qcom_glink_native_remove() and there is therefor no need to hold the spinlock during this operation - which would prohibit the use of cancel_work_sync() in the release function. So remove this. Fixes: 1d2ea36eead9 ("rpmsg: glink: Add rx done command") Cc: stable@vger.kernel.org Acked-by: Chris Lew Tested-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 306b3cc6ab13..57a5d394d650 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -243,11 +243,23 @@ static void qcom_glink_channel_release(struct kref *ref) { struct glink_channel *channel = container_of(ref, struct glink_channel, refcount); + struct glink_core_rx_intent *intent; struct glink_core_rx_intent *tmp; unsigned long flags; int iid; + /* cancel pending rx_done work */ + cancel_work_sync(&channel->intent_work); + spin_lock_irqsave(&channel->intent_lock, flags); + /* Free all non-reuse intents pending rx_done work */ + list_for_each_entry_safe(intent, tmp, &channel->done_intents, node) { + if (!intent->reuse) { + kfree(intent->data); + kfree(intent); + } + } + idr_for_each_entry(&channel->liids, tmp, iid) { kfree(tmp->data); kfree(tmp); @@ -1597,7 +1609,6 @@ void qcom_glink_native_remove(struct qcom_glink *glink) struct glink_channel *channel; int cid; int ret; - unsigned long flags; disable_irq(glink->irq); cancel_work_sync(&glink->rx_work); @@ -1606,7 +1617,6 @@ void qcom_glink_native_remove(struct qcom_glink *glink) if (ret) dev_warn(glink->dev, "Can't remove GLINK devices: %d\n", ret); - spin_lock_irqsave(&glink->idr_lock, flags); /* Release any defunct local channels, waiting for close-ack */ idr_for_each_entry(&glink->lcids, channel, cid) kref_put(&channel->refcount, qcom_glink_channel_release); @@ -1617,7 +1627,6 @@ void qcom_glink_native_remove(struct qcom_glink *glink) idr_destroy(&glink->lcids); idr_destroy(&glink->rcids); - spin_unlock_irqrestore(&glink->idr_lock, flags); mbox_free_channel(glink->mbox_chan); } EXPORT_SYMBOL_GPL(qcom_glink_native_remove); From 50736a155f0f4953ee802699431b3f1eff9d8e6b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 4 Oct 2019 15:27:02 -0700 Subject: [PATCH 1740/3715] rpmsg: glink: Free pending deferred work on remove commit 278bcb7300f61785dba63840bd2a8cf79f14554c upstream. By just cancelling the deferred rx worker during GLINK instance teardown any pending deferred commands are leaked, so free them. Fixes: b4f8e52b89f6 ("rpmsg: Introduce Qualcomm RPM glink driver") Cc: stable@vger.kernel.org Acked-by: Chris Lew Tested-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_glink_native.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 57a5d394d650..1e6253f1e070 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1539,6 +1539,18 @@ static void qcom_glink_work(struct work_struct *work) } } +static void qcom_glink_cancel_rx_work(struct qcom_glink *glink) +{ + struct glink_defer_cmd *dcmd; + struct glink_defer_cmd *tmp; + + /* cancel any pending deferred rx_work */ + cancel_work_sync(&glink->rx_work); + + list_for_each_entry_safe(dcmd, tmp, &glink->rx_queue, node) + kfree(dcmd); +} + struct qcom_glink *qcom_glink_native_probe(struct device *dev, unsigned long features, struct qcom_glink_pipe *rx, @@ -1611,7 +1623,7 @@ void qcom_glink_native_remove(struct qcom_glink *glink) int ret; disable_irq(glink->irq); - cancel_work_sync(&glink->rx_work); + qcom_glink_cancel_rx_work(glink); ret = device_for_each_child(glink->dev, NULL, qcom_glink_remove_device); if (ret) From bf368c1377981c89cf08037322412cf421809733 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 12 Nov 2019 17:16:35 -0800 Subject: [PATCH 1741/3715] CIFS: Respect O_SYNC and O_DIRECT flags during reconnect commit 44805b0e62f15e90d233485420e1847133716bdc upstream. Currently the client translates O_SYNC and O_DIRECT flags into corresponding SMB create options when openning a file. The problem is that on reconnect when the file is being re-opened the client doesn't set those flags and it causes a server to reject re-open requests because create options don't match. The latter means that any subsequent system call against that open file fail until a share is re-mounted. Fix this by properly setting SMB create options when re-openning files after reconnects. Fixes: 1013e760d10e6: ("SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags") Cc: Stable Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6dc0e092b0fc..5e75c5f77f4c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -722,6 +722,13 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (cfile->f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (cfile->f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); From e1674fd49a9e70ae170d56efc38ee8856907ab63 Mon Sep 17 00:00:00 2001 From: Lihua Yao Date: Tue, 10 Sep 2019 13:22:28 +0000 Subject: [PATCH 1742/3715] ARM: dts: s3c64xx: Fix init order of clock providers commit d60d0cff4ab01255b25375425745c3cff69558ad upstream. fin_pll is the parent of clock-controller@7e00f000, specify the dependency to ensure proper initialization order of clock providers. without this patch: [ 0.000000] S3C6410 clocks: apll = 0, mpll = 0 [ 0.000000] epll = 0, arm_clk = 0 with this patch: [ 0.000000] S3C6410 clocks: apll = 532000000, mpll = 532000000 [ 0.000000] epll = 24000000, arm_clk = 532000000 Cc: Fixes: 3f6d439f2022 ("clk: reverse default clk provider initialization order in of_clk_init()") Signed-off-by: Lihua Yao Reviewed-by: Sylwester Nawrocki Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/s3c6410-mini6410.dts | 4 ++++ arch/arm/boot/dts/s3c6410-smdk6410.dts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts index f4afda3594f8..de04d8764b0f 100644 --- a/arch/arm/boot/dts/s3c6410-mini6410.dts +++ b/arch/arm/boot/dts/s3c6410-mini6410.dts @@ -167,6 +167,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts index ecf35ec466f7..7ade1a0686d2 100644 --- a/arch/arm/boot/dts/s3c6410-smdk6410.dts +++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts @@ -71,6 +71,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; From f6338f8a42529e6353ba8b0aaebe69fc3e8601f8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 30 Jul 2019 20:23:39 +0300 Subject: [PATCH 1743/3715] ARM: tegra: Fix FLOW_CTLR_HALT register clobbering by tegra_resume() commit d70f7d31a9e2088e8a507194354d41ea10062994 upstream. There is an unfortunate typo in the code that results in writing to FLOW_CTLR_HALT instead of FLOW_CTLR_CSR. Cc: Acked-by: Peter De Schrijver Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/reset-handler.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 805f306fa6f7..e31f167a8199 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -56,16 +56,16 @@ ENTRY(tegra_resume) cmp r6, #TEGRA20 beq 1f @ Yes /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r1, r0 + cpu_to_csr_reg r3, r0 mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r1] + ldr r1, [r2, r3] /* Clear event & intr flag */ orr r1, r1, \ #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps @ & ext flags for CPU power mgnt bic r1, r1, r0 - str r1, [r2] + str r1, [r2, r3] 1: mov32 r9, 0xc09 From 05d1ce97c6df4ed9886669611999b33036937a36 Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Wed, 27 Nov 2019 17:49:10 +0100 Subject: [PATCH 1744/3715] vfio/pci: call irq_bypass_unregister_producer() before freeing irq commit d567fb8819162099035e546b11a736e29c2af0ea upstream. Since irq_bypass_register_producer() is called after request_irq(), we should do tear-down in reverse order: irq_bypass_unregister_producer() then free_irq(). Specifically free_irq() may release resources required by the irqbypass del_producer() callback. Notably an example provided by Marc Zyngier on arm64 with GICv4 that he indicates has the potential to wedge the hardware: free_irq(irq) __free_irq(irq) irq_domain_deactivate_irq(irq) its_irq_domain_deactivate() [unmap the VLPI from the ITS] kvm_arch_irq_bypass_del_producer(cons, prod) kvm_vgic_v4_unset_forwarding(kvm, irq, ...) its_unmap_vlpi(irq) [Unmap the VLPI from the ITS (again), remap the original LPI] Signed-off-by: Jiang Yi Cc: stable@vger.kernel.org # v4.4+ Fixes: 6d7425f109d26 ("vfio: Register/unregister irq_bypass_producer") Link: https://lore.kernel.org/kvm/20191127164910.15888-1-giangyi@amazon.com Reviewed-by: Marc Zyngier Reviewed-by: Eric Auger [aw: commit log] Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 1c46045b0e7f..94594dc63c41 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -297,8 +297,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, irq = pci_irq_vector(pdev, vector); if (vdev->ctx[vector].trigger) { - free_irq(irq, vdev->ctx[vector].trigger); irq_bypass_unregister_producer(&vdev->ctx[vector].producer); + free_irq(irq, vdev->ctx[vector].trigger); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; From 490d242d6c725ab5e3bb45d507eb81c5dd5b418a Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 22 Nov 2019 16:09:55 -0600 Subject: [PATCH 1745/3715] dma-buf: Fix memory leak in sync_file_merge() commit 6645d42d79d33e8a9fe262660a75d5f4556bbea9 upstream. In the implementation of sync_file_merge() the allocated sync_file is leaked if number of fences overflows. Release sync_file by goto err. Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct sync_file") Signed-off-by: Navid Emamdoost Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191122220957.30427-1-navid.emamdoost@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/sync_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 03830634e141..bf65e634590b 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -230,7 +230,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, a_fences = get_fences(a, &a_num_fences); b_fences = get_fences(b, &b_num_fences); if (a_num_fences > INT_MAX - b_num_fences) - return NULL; + goto err; num_fences = a_num_fences + b_num_fences; From 94b8b2453d2ba64a506173dbb510934d810b861e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 3 Dec 2019 19:42:58 +0800 Subject: [PATCH 1746/3715] dm btree: increase rebalance threshold in __rebalance2() commit 474e559567fa631dea8fb8407ab1b6090c903755 upstream. We got the following warnings from thin_check during thin-pool setup: $ thin_check /dev/vdb examining superblock examining devices tree missing devices: [1, 84] too few entries in btree_node: 41, expected at least 42 (block 138, max_entries = 126) examining mapping tree The phenomenon is the number of entries in one node of details_info tree is less than (max_entries / 3). And it can be easily reproduced by the following procedures: $ new a thin pool $ presume the max entries of details_info tree is 126 $ new 127 thin devices (e.g. 1~127) to make the root node being full and then split $ remove the first 43 (e.g. 1~43) thin devices to make the children reblance repeatedly $ stop the thin pool $ thin_check The root cause is that the B-tree removal procedure in __rebalance2() doesn't guarantee the invariance: the minimal number of entries in non-root node should be >= (max_entries / 3). Simply fix the problem by increasing the rebalance threshold to make sure the number of entries in each child will be greater than or equal to (max_entries / 3 + 1), so no matter which child is used for removal, the number will still be valid. Cc: stable@vger.kernel.org Signed-off-by: Hou Tao Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree-remove.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 21ea537bd55e..eff04fa23dfa 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -203,7 +203,13 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); - unsigned threshold = 2 * merge_threshold(left) + 1; + /* + * Ensure the number of entries in each child will be greater + * than or equal to (max_entries / 3 + 1), so no matter which + * child is used for removal, the number will still be not + * less than (max_entries / 3). + */ + unsigned int threshold = 2 * (merge_threshold(left) + 1); if (nr_left + nr_right < threshold) { /* From 7009b5bf234a61321e3ad7cba6ffb99c30117e1f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Dec 2019 09:34:57 -0800 Subject: [PATCH 1747/3715] scsi: iscsi: Fix a potential deadlock in the timeout handler commit 5480e299b5ae57956af01d4839c9fc88a465eeab upstream. Some time ago the block layer was modified such that timeout handlers are called from thread context instead of interrupt context. Make it safe to run the iSCSI timeout handler in thread context. This patch fixes the following lockdep complaint: ================================ WARNING: inconsistent lock state 5.5.1-dbg+ #11 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. kworker/7:1H/206 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff88802d9827e8 (&(&session->frwd_lock)->rlock){+.?.}, at: iscsi_eh_cmd_timed_out+0xa6/0x6d0 [libiscsi] {IN-SOFTIRQ-W} state was registered at: lock_acquire+0x106/0x240 _raw_spin_lock+0x38/0x50 iscsi_check_transport_timeouts+0x3e/0x210 [libiscsi] call_timer_fn+0x132/0x470 __run_timers.part.0+0x39f/0x5b0 run_timer_softirq+0x63/0xc0 __do_softirq+0x12d/0x5fd irq_exit+0xb3/0x110 smp_apic_timer_interrupt+0x131/0x3d0 apic_timer_interrupt+0xf/0x20 default_idle+0x31/0x230 arch_cpu_idle+0x13/0x20 default_idle_call+0x53/0x60 do_idle+0x38a/0x3f0 cpu_startup_entry+0x24/0x30 start_secondary+0x222/0x290 secondary_startup_64+0xa4/0xb0 irq event stamp: 1383705 hardirqs last enabled at (1383705): [] _raw_spin_unlock_irq+0x2c/0x50 hardirqs last disabled at (1383704): [] _raw_spin_lock_irq+0x18/0x50 softirqs last enabled at (1383690): [] iscsi_queuecommand+0x76a/0xa20 [libiscsi] softirqs last disabled at (1383682): [] iscsi_queuecommand+0x118/0xa20 [libiscsi] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&session->frwd_lock)->rlock); lock(&(&session->frwd_lock)->rlock); *** DEADLOCK *** 2 locks held by kworker/7:1H/206: #0: ffff8880d57bf928 ((wq_completion)kblockd){+.+.}, at: process_one_work+0x472/0xab0 #1: ffff88802b9c7de8 ((work_completion)(&q->timeout_work)){+.+.}, at: process_one_work+0x476/0xab0 stack backtrace: CPU: 7 PID: 206 Comm: kworker/7:1H Not tainted 5.5.1-dbg+ #11 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: kblockd blk_mq_timeout_work Call Trace: dump_stack+0xa5/0xe6 print_usage_bug.cold+0x232/0x23b mark_lock+0x8dc/0xa70 __lock_acquire+0xcea/0x2af0 lock_acquire+0x106/0x240 _raw_spin_lock+0x38/0x50 iscsi_eh_cmd_timed_out+0xa6/0x6d0 [libiscsi] scsi_times_out+0xf4/0x440 [scsi_mod] scsi_timeout+0x1d/0x20 [scsi_mod] blk_mq_check_expired+0x365/0x3a0 bt_iter+0xd6/0xf0 blk_mq_queue_tag_busy_iter+0x3de/0x650 blk_mq_timeout_work+0x1af/0x380 process_one_work+0x56d/0xab0 worker_thread+0x7a/0x5d0 kthread+0x1bc/0x210 ret_from_fork+0x24/0x30 Fixes: 287922eb0b18 ("block: defer timeouts to a workqueue") Cc: Christoph Hellwig Cc: Keith Busch Cc: Lee Duncan Cc: Chris Leech Cc: Link: https://lore.kernel.org/r/20191209173457.187370-1-bvanassche@acm.org Signed-off-by: Bart Van Assche Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 5ea5d42bac76..662df16b07a4 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1983,7 +1983,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc); - spin_lock(&session->frwd_lock); + spin_lock_bh(&session->frwd_lock); task = (struct iscsi_task *)sc->SCp.ptr; if (!task) { /* @@ -2110,7 +2110,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) done: if (task) task->last_timeout = jiffies; - spin_unlock(&session->frwd_lock); + spin_unlock_bh(&session->frwd_lock); ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? "timer reset" : "shutdown or nh"); return rc; From a54bf706197be89fa2d431951cfe59b70ab8304d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Nov 2019 09:41:46 -0500 Subject: [PATCH 1748/3715] drm/radeon: fix r1xx/r2xx register checker for POT textures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 008037d4d972c9c47b273e40e52ae34f9d9e33e7 upstream. Shift and mask were reversed. Noticed by chance. Tested-by: Meelis Roos Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/r100.c | 4 ++-- drivers/gpu/drm/radeon/r200.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index c31e660e35db..a4929372090d 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1820,8 +1820,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index c22321cc5a41..c2b506c707a2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) track->textures[i].lookup_disable = true; From da2bee90f5e7591aa1f949c26bf35569501d80ba Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 11 Dec 2019 16:20:03 +0200 Subject: [PATCH 1749/3715] xhci: fix USB3 device initiated resume race with roothub autosuspend commit 057d476fff778f1d3b9f861fdb5437ea1a3cfc99 upstream. A race in xhci USB3 remote wake handling may force device back to suspend after it initiated resume siganaling, causing a missed resume event or warm reset of device. When a USB3 link completes resume signaling and goes to enabled (UO) state a interrupt is issued and the interrupt handler will clear the bus_state->port_remote_wakeup resume flag, allowing bus suspend. If the USB3 roothub thread just finished reading port status before the interrupt, finding ports still in suspended (U3) state, but hasn't yet started suspending the hub, then the xhci interrupt handler will clear the flag that prevented roothub suspend and allow bus to suspend, forcing all port links back to suspended (U3) state. Example case: usb_runtime_suspend() # because all ports still show suspended U3 usb_suspend_both() hub_suspend(); # successful as hub->wakeup_bits not set yet ==> INTERRUPT xhci_irq() handle_port_status() clear bus_state->port_remote_wakeup usb_wakeup_notification() sets hub->wakeup_bits; kick_hub_wq() <== END INTERRUPT hcd_bus_suspend() xhci_bus_suspend() # success as port_remote_wakeup bits cleared Fix this by increasing roothub usage count during port resume to prevent roothub autosuspend, and by making sure bus_state->port_remote_wakeup flag is only cleared after resume completion is visible, i.e. after xhci roothub returned U0 or other non-U3 link state link on a get port status request. Issue rootcaused by Chiasheng Lee Cc: Cc: Lee, Hou-hsun Reported-by: Lee, Chiasheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20191211142007.8847-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 8 ++++++++ drivers/usb/host/xhci-ring.c | 6 +----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 95503bb9b067..d1363f3fabfa 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -887,6 +887,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_BH_RESET << 16; if ((raw_port_status & PORT_CEC)) status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; + + /* USB3 remote wake resume signaling completed */ + if (bus_state->port_remote_wakeup & (1 << wIndex) && + (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME && + (raw_port_status & PORT_PLS_MASK) != XDEV_RECOVERY) { + bus_state->port_remote_wakeup &= ~(1 << wIndex); + usb_hcd_end_port_resume(&hcd->self, wIndex); + } } if (hcd->speed < HCD_USB3) { diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 89af395cd89c..61fa3007a74a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1679,9 +1679,6 @@ static void handle_port_status(struct xhci_hcd *xhci, usb_hcd_resume_root_hub(hcd); } - if (hcd->speed >= HCD_USB3 && (portsc & PORT_PLS_MASK) == XDEV_INACTIVE) - bus_state->port_remote_wakeup &= ~(1 << faked_port_index); - if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) { xhci_dbg(xhci, "port resume event for port %d\n", port_id); @@ -1700,6 +1697,7 @@ static void handle_port_status(struct xhci_hcd *xhci, bus_state->port_remote_wakeup |= 1 << faked_port_index; xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); + usb_hcd_start_port_resume(&hcd->self, faked_port_index); xhci_set_link_state(xhci, port_array, faked_port_index, XDEV_U0); /* Need to wait until the next link state change @@ -1737,8 +1735,6 @@ static void handle_port_status(struct xhci_hcd *xhci, if (slot_id && xhci->devs[slot_id]) xhci_ring_device(xhci, slot_id); if (bus_state->port_remote_wakeup & (1 << faked_port_index)) { - bus_state->port_remote_wakeup &= - ~(1 << faked_port_index); xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); usb_wakeup_notification(hcd->self.root_hub, From 8ebb2441b69a7f35a5131ab9cc77459f61ec4acd Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:35 +0200 Subject: [PATCH 1750/3715] net: stmmac: use correct DMA buffer size in the RX descriptor commit 583e6361414903c5206258a30e5bd88cb03c0254 upstream. We always program the maximum DMA buffer size into the receive descriptor, although the allocated size may be less. E.g. with the default MTU size we allocate only 1536 bytes. If somebody sends us a bigger frame, then memory may get corrupted. Fix by using exact buffer sizes. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller [acj: backport v4.14 -stable - adjust context - skipped the section modifying non-existent functions in dwxgmac2_descs.c and hwif.h ] Signed-off-by: Aviraj CJ Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- .../net/ethernet/stmicro/stmmac/descs_com.h | 22 ++++++++++++------- .../ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- .../net/ethernet/stmicro/stmmac/enh_desc.c | 10 ++++++--- .../net/ethernet/stmicro/stmmac/norm_desc.c | 10 ++++++--- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++--- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d824bf942a8f..efc4a1a8343a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -367,7 +367,7 @@ struct dma_features { struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode, - int end); + int end, int bfsize); /* DMA TX descriptor ring initialization */ void (*init_tx_desc) (struct dma_desc *p, int mode, int end); diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index 40d6356a7e73..3dfb07a78952 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -29,11 +29,13 @@ /* Specific functions used for Ring mode */ /* Enhanced descriptors */ -static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end, + int bfsize) { - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - << ERDES1_BUFFER2_SIZE_SHIFT) - & ERDES1_BUFFER2_SIZE_MASK); + if (bfsize == BUF_SIZE_16KiB) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB + << ERDES1_BUFFER2_SIZE_SHIFT) + & ERDES1_BUFFER2_SIZE_MASK); if (end) p->des1 |= cpu_to_le32(ERDES1_END_RING); @@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) } /* Normal descriptors */ -static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize) { - p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1) - << RDES1_BUFFER2_SIZE_SHIFT) - & RDES1_BUFFER2_SIZE_MASK); + if (bfsize >= BUF_SIZE_2KiB) { + int bfsize2; + + bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT) + & RDES1_BUFFER2_SIZE_MASK); + } if (end) p->des1 |= cpu_to_le32(RDES1_END_RING); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 37b77e7da132..2896ec100c75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -293,7 +293,7 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index f2150efddc88..dfa6599ca1a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -265,15 +265,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_8KiB); + p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else - ehn_desc_rx_set_on_ring(p, end); + ehn_desc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 66c17bab5997..44a4666290da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -133,15 +133,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end) + int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else - ndesc_rx_set_on_ring(p, end); + ndesc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 612773b94ae3..da5b5fc99c04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1072,11 +1072,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) if (priv->extend_desc) priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic, priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); else priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i], priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); } /** @@ -3299,7 +3301,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) dma_wmb(); if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0); + priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0, priv->dma_buf_sz); else priv->hw->desc->set_rx_owner(p); From 27e96cbce648e04dd78ae0cd0f4f34b03314dea7 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:37 +0200 Subject: [PATCH 1751/3715] net: stmmac: don't stop NAPI processing when dropping a packet commit 07b3975352374c3f5ebb4a42ef0b253fe370542d upstream. Currently, if we drop a packet, we exit from NAPI loop before the budget is consumed. In some situations this will make the RX processing stall e.g. when flood pinging the system with oversized packets, as the errorneous packets are not dropped efficiently. If we drop a packet, we should just continue to the next one as long as the budget allows. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller [acj: backport v4.14 -stable - adjust context] Signed-off-by: Aviraj CJ Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index da5b5fc99c04..e6d16c48ffef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3323,9 +3323,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - unsigned int entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; - unsigned int next_entry; + unsigned int next_entry = rx_q->cur_rx; unsigned int count = 0; if (netif_msg_rx_status(priv)) { @@ -3340,10 +3339,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true); } while (count < limit) { - int status; + int entry, status; struct dma_desc *p; struct dma_desc *np; + entry = next_entry; + if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else @@ -3410,7 +3411,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "len %d larger than size (%d)\n", frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; - break; + continue; } /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 @@ -3446,7 +3447,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dev_warn(priv->device, "packet dropped\n"); priv->dev->stats.rx_dropped++; - break; + continue; } dma_sync_single_for_cpu(priv->device, @@ -3471,7 +3472,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "%s: Inconsistent Rx chain\n", priv->dev->name); priv->dev->stats.rx_dropped++; - break; + continue; } prefetch(skb->data - NET_IP_ALIGN); rx_q->rx_skbuff[entry] = NULL; @@ -3506,7 +3507,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } - entry = next_entry; } stmmac_rx_refill(priv, queue); From e1f7d50ae3a3ec342e87a9b1ce6787bfb8b3c08b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 21 Dec 2019 10:47:56 +0100 Subject: [PATCH 1752/3715] Linux 4.14.160 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e14ad8f064ec..f011cb69545f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 159 +SUBLEVEL = 160 EXTRAVERSION = NAME = Petit Gorille From ee31b7230f0ced61bc973c3d2576bebbdbdac2b7 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Mon, 23 Dec 2019 10:45:42 -0800 Subject: [PATCH 1753/3715] ANDROID: cuttlefish_defconfig: Disable TRANSPARENT_HUGEPAGE Fix conflict between jemalloc/scudo and MADV_NOHUGEPAGE by disabling the transparent hugepage support. It has also been suggested that this feature can make VM behavior less predictable. Bug: 131119917 Change-Id: I17556838fbf1f893e26c5658ee95b4e3b16b10ad Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 3346572df45f..71fc12c51973 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -52,7 +52,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_100=y # CONFIG_SPARSEMEM_VMEMMAP is not set CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_ZSMALLOC=y CONFIG_SECCOMP=y CONFIG_PARAVIRT=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 38e8765c808c..a21970185ddf 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -55,7 +55,6 @@ CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 -CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_ZSMALLOC=y # CONFIG_MTRR is not set CONFIG_HZ_100=y From 665c9af8987880414e141e623bf7e6481d1c1696 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Mon, 9 Dec 2019 21:31:25 +0800 Subject: [PATCH 1754/3715] af_packet: set defaule value for tmo [ Upstream commit b43d1f9f7067c6759b1051e8ecb84e82cef569fe ] There is softlockup when using TPACKET_V3: ... NMI watchdog: BUG: soft lockup - CPU#2 stuck for 60010ms! (__irq_svc) from [] (_raw_spin_unlock_irqrestore+0x44/0x54) (_raw_spin_unlock_irqrestore) from [] (mod_timer+0x210/0x25c) (mod_timer) from [] (prb_retire_rx_blk_timer_expired+0x68/0x11c) (prb_retire_rx_blk_timer_expired) from [] (call_timer_fn+0x90/0x17c) (call_timer_fn) from [] (run_timer_softirq+0x2d4/0x2fc) (run_timer_softirq) from [] (__do_softirq+0x218/0x318) (__do_softirq) from [] (irq_exit+0x88/0xac) (irq_exit) from [] (msa_irq_exit+0x11c/0x1d4) (msa_irq_exit) from [] (handle_IPI+0x650/0x7f4) (handle_IPI) from [] (gic_handle_irq+0x108/0x118) (gic_handle_irq) from [] (__irq_usr+0x44/0x5c) ... If __ethtool_get_link_ksettings() is failed in prb_calc_retire_blk_tmo(), msec and tmo will be zero, so tov_in_jiffies is zero and the timer expire for retire_blk_timer is turn to mod_timer(&pkc->retire_blk_timer, jiffies + 0), which will trigger cpu usage of softirq is 100%. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Tested-by: Xiao Jiangfeng Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1f86bf0d1649..4e1058159b08 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -593,7 +593,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, msec = 1; div = ecmd.base.speed / 1000; } - } + } else + return DEFAULT_PRB_RETIRE_TOV; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); From 377d7378a60511970b89c855e033bc523895c1e7 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Tue, 10 Dec 2019 00:22:07 +0800 Subject: [PATCH 1755/3715] fjes: fix missed check in fjes_acpi_add [ Upstream commit a288f105a03a7e0e629a8da2b31f34ebf0343ee2 ] fjes_acpi_add() misses a check for platform_device_register_simple(). Add a check to fix it. Fixes: 658d439b2292 ("fjes: Introduce FUJITSU Extended Socket Network Device driver") Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/fjes/fjes_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 14d6579b292a..314e3eac09b9 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -181,6 +181,9 @@ static int fjes_acpi_add(struct acpi_device *device) /* create platform_device */ plat_dev = platform_device_register_simple(DRV_NAME, 0, fjes_resource, ARRAY_SIZE(fjes_resource)); + if (IS_ERR(plat_dev)) + return PTR_ERR(plat_dev); + device->driver_data = plat_dev; return 0; From ab8224804954d8d32b46805db5e662f174fd5e10 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Dec 2019 23:24:47 +0000 Subject: [PATCH 1756/3715] mod_devicetable: fix PHY module format [ Upstream commit d2ed49cf6c13e379c5819aa5ac20e1f9674ebc89 ] When a PHY is probed, if the top bit is set, we end up requesting a module with the string "mdio:-10101110000000100101000101010001" - the top bit is printed to a signed -1 value. This leads to the module not being loaded. Fix the module format string and the macro generating the values for it to ensure that we only print unsigned types and the top bit is always 0/1. We correctly end up with "mdio:10101110000000100101000101010001". Fixes: 8626d3b43280 ("phylib: Support phy module autoloading") Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 1c2e8d6b7274..6db347b58644 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -519,9 +519,9 @@ struct platform_device_id { #define MDIO_NAME_SIZE 32 #define MDIO_MODULE_PREFIX "mdio:" -#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" +#define MDIO_ID_FMT "%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u" #define MDIO_ID_ARGS(_id) \ - (_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ + ((_id)>>31) & 1, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ ((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \ ((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \ ((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \ From b1990ae775117fc4311c8cacea12f3c4d9481a97 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 Dec 2019 14:31:40 +0100 Subject: [PATCH 1757/3715] net: dst: Force 4-byte alignment of dst_metrics [ Upstream commit 258a980d1ec23e2c786e9536a7dd260bea74bae6 ] When storing a pointer to a dst_metrics structure in dst_entry._metrics, two flags are added in the least significant bits of the pointer value. Hence this assumes all pointers to dst_metrics structures have at least 4-byte alignment. However, on m68k, the minimum alignment of 32-bit values is 2 bytes, not 4 bytes. Hence in some kernel builds, dst_default_metrics may be only 2-byte aligned, leading to obscure boot warnings like: WARNING: CPU: 0 PID: 7 at lib/refcount.c:28 refcount_warn_saturate+0x44/0x9a refcount_t: underflow; use-after-free. Modules linked in: CPU: 0 PID: 7 Comm: ksoftirqd/0 Tainted: G W 5.5.0-rc2-atari-01448-g114a1a1038af891d-dirty #261 Stack from 10835e6c: 10835e6c 0038134f 00023fa6 00394b0f 0000001c 00000009 00321560 00023fea 00394b0f 0000001c 001a70f8 00000009 00000000 10835eb4 00000001 00000000 04208040 0000000a 00394b4a 10835ed4 00043aa8 001a70f8 00394b0f 0000001c 00000009 00394b4a 0026aba8 003215a4 00000003 00000000 0026d5a8 00000001 003215a4 003a4361 003238d6 000001f0 00000000 003215a4 10aa3b00 00025e84 003ddb00 10834000 002416a8 10aa3b00 00000000 00000080 000aa038 0004854a Call Trace: [<00023fa6>] __warn+0xb2/0xb4 [<00023fea>] warn_slowpath_fmt+0x42/0x64 [<001a70f8>] refcount_warn_saturate+0x44/0x9a [<00043aa8>] printk+0x0/0x18 [<001a70f8>] refcount_warn_saturate+0x44/0x9a [<0026aba8>] refcount_sub_and_test.constprop.73+0x38/0x3e [<0026d5a8>] ipv4_dst_destroy+0x5e/0x7e [<00025e84>] __local_bh_enable_ip+0x0/0x8e [<002416a8>] dst_destroy+0x40/0xae Fix this by forcing 4-byte alignment of all dst_metrics structures. Fixes: e5fd387ad5b30ca3 ("ipv6: do not overwrite inetpeer metrics prematurely") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/dst.h b/include/net/dst.h index 2acd670fc86b..fe230dd62c28 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; refcount_t refcnt; -}; +} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ extern const struct dst_metrics dst_default_metrics; u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); From df6f496b6ad9b3254103484fde03b29755e83b98 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Thu, 19 Dec 2019 10:08:07 +0800 Subject: [PATCH 1758/3715] net: hisilicon: Fix a BUG trigered by wrong bytes_compl [ Upstream commit 90b3b339364c76baa2436445401ea9ade040c216 ] When doing stress test, we get the following trace: kernel BUG at lib/dynamic_queue_limits.c:26! Internal error: Oops - BUG: 0 [#1] SMP ARM Modules linked in: hip04_eth CPU: 0 PID: 2003 Comm: tDblStackPcap0 Tainted: G O L 4.4.197 #1 Hardware name: Hisilicon A15 task: c3637668 task.stack: de3bc000 PC is at dql_completed+0x18/0x154 LR is at hip04_tx_reclaim+0x110/0x174 [hip04_eth] pc : [] lr : [] psr: 800f0313 sp : de3bdc2c ip : 00000000 fp : c020fb10 r10: 00000000 r9 : c39b4224 r8 : 00000001 r7 : 00000046 r6 : c39b4000 r5 : 0078f392 r4 : 0078f392 r3 : 00000047 r2 : 00000000 r1 : 00000046 r0 : df5d5c80 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 32c5387d Table: 1e189b80 DAC: 55555555 Process tDblStackPcap0 (pid: 2003, stack limit = 0xde3bc190) Stack: (0xde3bdc2c to 0xde3be000) [] (dql_completed) from [] (hip04_tx_reclaim+0x110/0x174 [hip04_eth]) [] (hip04_tx_reclaim [hip04_eth]) from [] (hip04_rx_poll+0x20/0x388 [hip04_eth]) [] (hip04_rx_poll [hip04_eth]) from [] (net_rx_action+0x120/0x374) [] (net_rx_action) from [] (__do_softirq+0x218/0x318) [] (__do_softirq) from [] (irq_exit+0x88/0xac) [] (irq_exit) from [] (msa_irq_exit+0x11c/0x1d4) [] (msa_irq_exit) from [] (__handle_domain_irq+0x110/0x148) [] (__handle_domain_irq) from [] (gic_handle_irq+0xd4/0x118) [] (gic_handle_irq) from [] (__irq_svc+0x40/0x58) Exception stack(0xde3bdde0 to 0xde3bde28) dde0: 00000000 00008001 c3637668 00000000 00000000 a00f0213 dd3627a0 c0af6380 de00: c086d380 a00f0213 c0a22a50 de3bde6c 00000002 de3bde30 c0558138 c055813c de20: 600f0213 ffffffff [] (__irq_svc) from [] (_raw_spin_unlock_irqrestore+0x44/0x54) Kernel panic - not syncing: Fatal exception in interrupt Pre-modification code: int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { [...] [1] priv->tx_head = TX_NEXT(tx_head); [2] count++; [3] netdev_sent_queue(ndev, skb->len); [...] } An rx interrupt occurs if hip04_mac_start_xmit just executes to the line 2, tx_head has been updated, but corresponding 'skb->len' has not been added to dql_queue. And then hip04_mac_interrupt->__napi_schedule->hip04_rx_poll->hip04_tx_reclaim In hip04_tx_reclaim, because tx_head has been updated, bytes_compl will plus an additional "skb-> len" which has not been added to dql_queue. And then trigger the BUG_ON(bytes_compl > num_queued - dql->num_completed). To solve the problem described above, we put "netdev_sent_queue(ndev, skb->len);" before "priv->tx_head = TX_NEXT(tx_head);" Fixes: a41ea46a9a12 ("net: hisilicon: new hip04 ethernet driver") Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 84c0f22ac2db..d5489cb0afff 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -456,9 +456,9 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_tx_timestamp(skb); hip04_set_xmit_desc(priv, phys); - priv->tx_head = TX_NEXT(tx_head); count++; netdev_sent_queue(ndev, skb->len); + priv->tx_head = TX_NEXT(tx_head); stats->tx_bytes += skb->len; stats->tx_packets++; From f46b81843d9ff5f253cdbf69128c5abe7099e35f Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 17:21:55 +0800 Subject: [PATCH 1759/3715] net: nfc: nci: fix a possible sleep-in-atomic-context bug in nci_uart_tty_receive() [ Upstream commit b7ac893652cafadcf669f78452329727e4e255cc ] The kernel may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: net/nfc/nci/uart.c, 349: nci_skb_alloc in nci_uart_default_recv_buf net/nfc/nci/uart.c, 255: (FUNC_PTR)nci_uart_default_recv_buf in nci_uart_tty_receive net/nfc/nci/uart.c, 254: spin_lock in nci_uart_tty_receive nci_skb_alloc(GFP_KERNEL) can sleep at runtime. (FUNC_PTR) means a function pointer is called. To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC for nci_skb_alloc(). This bug is found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 8d104c1db628..6f5addb5225c 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -348,7 +348,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, nu->rx_packet_len = -1; nu->rx_skb = nci_skb_alloc(nu->ndev, NCI_MAX_PACKET_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!nu->rx_skb) return -ENOMEM; } From de7e8ca720e622aae4dde1b66fd8544741f15b9c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 17 Dec 2019 01:57:40 +0000 Subject: [PATCH 1760/3715] net: qlogic: Fix error paths in ql_alloc_large_buffers() [ Upstream commit cad46039e4c99812db067c8ac22a864960e7acc4 ] ql_alloc_large_buffers() has the usual RX buffer allocation loop where it allocates skbs and maps them for DMA. It also treats failure as a fatal error. There are (at least) three bugs in the error paths: 1. ql_free_large_buffers() assumes that the lrg_buf[] entry for the first buffer that couldn't be allocated will have .skb == NULL. But the qla_buf[] array is not zero-initialised. 2. ql_free_large_buffers() DMA-unmaps all skbs in lrg_buf[]. This is incorrect for the last allocated skb, if DMA mapping failed. 3. Commit 1acb8f2a7a9f ("net: qlogic: Fix memory leak in ql_alloc_large_buffers") added a direct call to dev_kfree_skb_any() after the skb is recorded in lrg_buf[], so ql_free_large_buffers() will double-free it. The bugs are somewhat inter-twined, so fix them all at once: * Clear each entry in qla_buf[] before attempting to allocate an skb for it. This goes half-way to fixing bug 1. * Set the .skb field only after the skb is DMA-mapped. This fixes the rest. Fixes: 1357bfcf7106 ("qla3xxx: Dynamically size the rx buffer queue ...") Fixes: 0f8ab89e825f ("qla3xxx: Check return code from pci_map_single() ...") Fixes: 1acb8f2a7a9f ("net: qlogic: Fix memory leak in ql_alloc_large_buffers") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qla3xxx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 5fca9a75780c..cc53ee26bd3e 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2756,6 +2756,9 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) int err; for (i = 0; i < qdev->num_large_buffers; i++) { + lrg_buf_cb = &qdev->lrg_buf[i]; + memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb)); + skb = netdev_alloc_skb(qdev->ndev, qdev->lrg_buffer_len); if (unlikely(!skb)) { @@ -2766,11 +2769,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) ql_free_large_buffers(qdev); return -ENOMEM; } else { - - lrg_buf_cb = &qdev->lrg_buf[i]; - memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb)); lrg_buf_cb->index = i; - lrg_buf_cb->skb = skb; /* * We save some space to copy the ethhdr from first * buffer @@ -2792,6 +2791,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) return -ENOMEM; } + lrg_buf_cb->skb = skb; dma_unmap_addr_set(lrg_buf_cb, mapaddr, map); dma_unmap_len_set(lrg_buf_cb, maplen, qdev->lrg_buffer_len - From 1e125b01106007dcfcc81dfabdd169e6db23f5fb Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Thu, 12 Dec 2019 13:52:47 +0200 Subject: [PATCH 1761/3715] net: usb: lan78xx: Fix suspend/resume PHY register access error [ Upstream commit 20032b63586ac6c28c936dff696981159913a13f ] Lan78xx driver accesses the PHY registers through MDIO bus over USB connection. When performing a suspend/resume, the PHY registers can be accessed before the USB connection is resumed. This will generate an error and will prevent the device to resume correctly. This patch adds the dependency between the MDIO bus and USB device to allow correct handling of suspend/resume. Fixes: ce85e13ad6ef ("lan78xx: Update to use phylib instead of mii_if_info.") Signed-off-by: Cristian Birsan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 2229284d16f5..7d1d5b30ecc3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1765,6 +1765,7 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) dev->mdiobus->read = lan78xx_mdiobus_read; dev->mdiobus->write = lan78xx_mdiobus_write; dev->mdiobus->name = "lan78xx-mdiobus"; + dev->mdiobus->parent = &dev->udev->dev; snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); From e717f6b4f1737f3a81f727d821b9c834a892f6fb Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 12 Dec 2019 06:49:28 -0800 Subject: [PATCH 1762/3715] qede: Fix multicast mac configuration [ Upstream commit 0af67e49b018e7280a4227bfe7b6005bc9d3e442 ] Driver doesn't accommodate the configuration for max number of multicast mac addresses, in such particular case it leaves the device with improper/invalid multicast configuration state, causing connectivity issues (in lacp bonding like scenarios). Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f79e36e4060a..e7ad95de3da8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1181,7 +1181,7 @@ qede_configure_mcast_filtering(struct net_device *ndev, netif_addr_lock_bh(ndev); mc_count = netdev_mc_count(ndev); - if (mc_count < 64) { + if (mc_count <= 64) { netdev_for_each_mc_addr(ha, ndev) { ether_addr_copy(temp, ha->addr); temp += ETH_ALEN; From 2b63584780a35b566b575a38cddedad5df0ed43f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 9 Dec 2019 13:45:54 +0800 Subject: [PATCH 1763/3715] sctp: fully initialize v4 addr in some functions [ Upstream commit b6f3320b1d5267e7b583a6d0c88dda518101740c ] Syzbot found a crash: BUG: KMSAN: uninit-value in crc32_body lib/crc32.c:112 [inline] BUG: KMSAN: uninit-value in crc32_le_generic lib/crc32.c:179 [inline] BUG: KMSAN: uninit-value in __crc32c_le_base+0x4fa/0xd30 lib/crc32.c:202 Call Trace: crc32_body lib/crc32.c:112 [inline] crc32_le_generic lib/crc32.c:179 [inline] __crc32c_le_base+0x4fa/0xd30 lib/crc32.c:202 chksum_update+0xb2/0x110 crypto/crc32c_generic.c:90 crypto_shash_update+0x4c5/0x530 crypto/shash.c:107 crc32c+0x150/0x220 lib/libcrc32c.c:47 sctp_csum_update+0x89/0xa0 include/net/sctp/checksum.h:36 __skb_checksum+0x1297/0x12a0 net/core/skbuff.c:2640 sctp_compute_cksum include/net/sctp/checksum.h:59 [inline] sctp_packet_pack net/sctp/output.c:528 [inline] sctp_packet_transmit+0x40fb/0x4250 net/sctp/output.c:597 sctp_outq_flush_transports net/sctp/outqueue.c:1146 [inline] sctp_outq_flush+0x1823/0x5d80 net/sctp/outqueue.c:1194 sctp_outq_uncork+0xd0/0xf0 net/sctp/outqueue.c:757 sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1781 [inline] sctp_side_effects net/sctp/sm_sideeffect.c:1184 [inline] sctp_do_sm+0x8fe1/0x9720 net/sctp/sm_sideeffect.c:1155 sctp_primitive_REQUESTHEARTBEAT+0x175/0x1a0 net/sctp/primitive.c:185 sctp_apply_peer_addr_params+0x212/0x1d40 net/sctp/socket.c:2433 sctp_setsockopt_peer_addr_params net/sctp/socket.c:2686 [inline] sctp_setsockopt+0x189bb/0x19090 net/sctp/socket.c:4672 The issue was caused by transport->ipaddr set with uninit addr param, which was passed by: sctp_transport_init net/sctp/transport.c:47 [inline] sctp_transport_new+0x248/0xa00 net/sctp/transport.c:100 sctp_assoc_add_peer+0x5ba/0x2030 net/sctp/associola.c:611 sctp_process_param net/sctp/sm_make_chunk.c:2524 [inline] where 'addr' is set by sctp_v4_from_addr_param(), and it doesn't initialize the padding of addr->v4. Later when calling sctp_make_heartbeat(), hbinfo.daddr(=transport->ipaddr) will become the part of skb, and the issue occurs. This patch is to fix it by initializing the padding of addr->v4 in sctp_v4_from_addr_param(), as well as other functions that do the similar thing, and these functions shouldn't trust that the caller initializes the memory, as Marcelo suggested. Reported-by: syzbot+6dcbfea81cd3d4dd0b02@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 01f88e9abbc6..bf39f317953a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -253,6 +253,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, sa->sin_port = sh->dest; sa->sin_addr.s_addr = ip_hdr(skb)->daddr; } + memset(sa->sin_zero, 0, sizeof(sa->sin_zero)); } /* Initialize an sctp_addr from a socket. */ @@ -261,6 +262,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -283,6 +285,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; addr->v4.sin_addr.s_addr = param->v4.addr.s_addr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize an address parameter from a sctp_addr and return the length @@ -307,6 +310,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4, saddr->v4.sin_family = AF_INET; saddr->v4.sin_port = port; saddr->v4.sin_addr.s_addr = fl4->saddr; + memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero)); } /* Compare two addresses exactly. */ @@ -329,6 +333,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) addr->v4.sin_family = AF_INET; addr->v4.sin_addr.s_addr = htonl(INADDR_ANY); addr->v4.sin_port = port; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Is this a wildcard address? */ From af6ba22a383189b75409859dcc54211e53fbf3d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 19 Nov 2019 13:59:20 -0500 Subject: [PATCH 1764/3715] btrfs: don't double lock the subvol_sem for rename exchange commit 943eb3bf25f4a7b745dd799e031be276aa104d82 upstream. If we're rename exchanging two subvols we'll try to lock this lock twice, which is bad. Just lock once if either of the ino's are subvols. Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 739f45b04b52..2ac361021119 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9820,9 +9820,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ - if (old_ino == BTRFS_FIRST_FREE_OBJECTID) - down_read(&fs_info->subvol_sem); - if (new_ino == BTRFS_FIRST_FREE_OBJECTID) + if (old_ino == BTRFS_FIRST_FREE_OBJECTID || + new_ino == BTRFS_FIRST_FREE_OBJECTID) down_read(&fs_info->subvol_sem); /* @@ -10014,9 +10013,8 @@ out_fail: ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: - if (new_ino == BTRFS_FIRST_FREE_OBJECTID) - up_read(&fs_info->subvol_sem); - if (old_ino == BTRFS_FIRST_FREE_OBJECTID) + if (new_ino == BTRFS_FIRST_FREE_OBJECTID || + old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); return ret; From 0145dc5ac7bc8b216554437cc9575e116a03abb0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 19 Nov 2019 13:59:35 -0500 Subject: [PATCH 1765/3715] btrfs: do not call synchronize_srcu() in inode_tree_del commit f72ff01df9cf5db25c76674cac16605992d15467 upstream. Testing with the new fsstress uncovered a pretty nasty deadlock with lookup and snapshot deletion. Process A unlink -> final iput -> inode_tree_del -> synchronize_srcu(subvol_srcu) Process B btrfs_lookup <- srcu_read_lock() acquired here -> btrfs_iget -> find inode that has I_FREEING set -> __wait_on_freeing_inode() We're holding the srcu_read_lock() while doing the iget in order to make sure our fs root doesn't go away, and then we are waiting for the inode to finish freeing. However because the free'ing process is doing a synchronize_srcu() we deadlock. Fix this by dropping the synchronize_srcu() in inode_tree_del(). We don't need people to stop accessing the fs root at this point, we're only adding our empty root to the dead roots list. A larger much more invasive fix is forthcoming to address how we deal with fs roots, but this fixes the immediate problem. Fixes: 76dda93c6ae2 ("Btrfs: add snapshot/subvolume destroy ioctl") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2ac361021119..f2dc517768f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5729,7 +5729,6 @@ static void inode_tree_add(struct inode *inode) static void inode_tree_del(struct inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; int empty = 0; @@ -5742,7 +5741,6 @@ static void inode_tree_del(struct inode *inode) spin_unlock(&root->inode_lock); if (empty && btrfs_root_refs(&root->root_item) == 0) { - synchronize_srcu(&fs_info->subvol_srcu); spin_lock(&root->inode_lock); empty = RB_EMPTY_ROOT(&root->inode_tree); spin_unlock(&root->inode_lock); From 08c0e06a7ba7ae66046f97366c10353cdf2a4d0d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Dec 2019 09:37:17 -0500 Subject: [PATCH 1766/3715] btrfs: skip log replay on orphaned roots commit 9bc574de590510eff899c3ca8dbaf013566b5efe upstream. My fsstress modifications coupled with generic/475 uncovered a failure to mount and replay the log if we hit a orphaned root. We do not want to replay the log for an orphan root, but it's completely legitimate to have an orphaned root with a log attached. Fix this by simply skipping replaying the log. We still need to pin it's root node so that we do not overwrite it while replaying other logs, as we re-read the log root at every stage of the replay. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e35301e5fe8e..98c397eb054c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6018,9 +6018,28 @@ again: wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); if (IS_ERR(wc.replay_dest)) { ret = PTR_ERR(wc.replay_dest); + + /* + * We didn't find the subvol, likely because it was + * deleted. This is ok, simply skip this log and go to + * the next one. + * + * We need to exclude the root because we can't have + * other log replays overwriting this log as we'll read + * it back in a few more times. This will keep our + * block from being modified, and we'll just bail for + * each subsequent pass. + */ + if (ret == -ENOENT) + ret = btrfs_pin_extent_for_log_replay(fs_info, + log->node->start, + log->node->len); free_extent_buffer(log->node); free_extent_buffer(log->commit_root); kfree(log); + + if (!ret) + goto next; btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root for tree log recovery."); goto error; @@ -6052,7 +6071,6 @@ again: &root->highest_objectid); } - key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); free_extent_buffer(log->commit_root); @@ -6060,9 +6078,10 @@ again: if (ret) goto error; - +next: if (found_key.offset == 0) break; + key.offset = found_key.offset - 1; } btrfs_release_path(path); From 9cb1548707afef0d04875d34ab0eeb09874c5462 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Dec 2019 09:37:18 -0500 Subject: [PATCH 1767/3715] btrfs: do not leak reloc root if we fail to read the fs root commit ca1aa2818a53875cfdd175fb5e9a2984e997cce9 upstream. If we fail to read the fs root corresponding with a reloc root we'll just break out and free the reloc roots. But we remove our current reloc_root from this list higher up, which means we'll leak this reloc_root. Fix this by adding ourselves back to the reloc_roots list so we are properly cleaned up. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9fa6db6a6f7d..d4c00edd16d2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4587,6 +4587,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(fs_info, reloc_root->root_key.offset); if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); + list_add_tail(&reloc_root->root_list, &reloc_roots); goto out_free; } From 5a744f9eeea5b3216f7467aa61c3d3106e71bc4b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Dec 2019 11:39:00 -0500 Subject: [PATCH 1768/3715] btrfs: handle ENOENT in btrfs_uuid_tree_iterate commit 714cd3e8cba6841220dce9063a7388a81de03825 upstream. If we get an -ENOENT back from btrfs_uuid_iter_rem when iterating the uuid tree we'll just continue and do btrfs_next_item(). However we've done a btrfs_release_path() at this point and no longer have a valid path. So increment the key and go back and do a normal search. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/uuid-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 726f928238d0..331f3a1ad23b 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -336,6 +336,8 @@ again_search_slot: } if (ret < 0 && ret != -ENOENT) goto out; + key.offset++; + goto again_search_slot; } item_size -= sizeof(subid_le); offset += sizeof(subid_le); From 1622a3416b534aaa7d7f8760908c021c073954ac Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 6 Dec 2019 12:27:39 +0000 Subject: [PATCH 1769/3715] Btrfs: fix removal logic of the tree mod log that leads to use-after-free issues commit 6609fee8897ac475378388238456c84298bff802 upstream. When a tree mod log user no longer needs to use the tree it calls btrfs_put_tree_mod_seq() to remove itself from the list of users and delete all no longer used elements of the tree's red black tree, which should be all elements with a sequence number less then our equals to the caller's sequence number. However the logic is broken because it can delete and free elements from the red black tree that have a sequence number greater then the caller's sequence number: 1) At a point in time we have sequence numbers 1, 2, 3 and 4 in the tree mod log; 2) The task which got assigned the sequence number 1 calls btrfs_put_tree_mod_seq(); 3) Sequence number 1 is deleted from the list of sequence numbers; 4) The current minimum sequence number is computed to be the sequence number 2; 5) A task using sequence number 2 is at tree_mod_log_rewind() and gets a pointer to one of its elements from the red black tree through a call to tree_mod_log_search(); 6) The task with sequence number 1 iterates the red black tree of tree modification elements and deletes (and frees) all elements with a sequence number less then or equals to 2 (the computed minimum sequence number) - it ends up only leaving elements with sequence numbers of 3 and 4; 7) The task with sequence number 2 now uses the pointer to its element, already freed by the other task, at __tree_mod_log_rewind(), resulting in a use-after-free issue. When CONFIG_DEBUG_PAGEALLOC=y it produces a trace like the following: [16804.546854] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [16804.547451] CPU: 0 PID: 28257 Comm: pool Tainted: G W 5.4.0-rc8-btrfs-next-51 #1 [16804.548059] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 [16804.548666] RIP: 0010:rb_next+0x16/0x50 (...) [16804.550581] RSP: 0018:ffffb948418ef9b0 EFLAGS: 00010202 [16804.551227] RAX: 6b6b6b6b6b6b6b6b RBX: ffff90e0247f6600 RCX: 6b6b6b6b6b6b6b6b [16804.551873] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff90e0247f6600 [16804.552504] RBP: ffff90dffe0d4688 R08: 0000000000000001 R09: 0000000000000000 [16804.553136] R10: ffff90dffa4a0040 R11: 0000000000000000 R12: 000000000000002e [16804.553768] R13: ffff90e0247f6600 R14: 0000000000001663 R15: ffff90dff77862b8 [16804.554399] FS: 00007f4b197ae700(0000) GS:ffff90e036a00000(0000) knlGS:0000000000000000 [16804.555039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [16804.555683] CR2: 00007f4b10022000 CR3: 00000002060e2004 CR4: 00000000003606f0 [16804.556336] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [16804.556968] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [16804.557583] Call Trace: [16804.558207] __tree_mod_log_rewind+0xbf/0x280 [btrfs] [16804.558835] btrfs_search_old_slot+0x105/0xd00 [btrfs] [16804.559468] resolve_indirect_refs+0x1eb/0xc70 [btrfs] [16804.560087] ? free_extent_buffer.part.19+0x5a/0xc0 [btrfs] [16804.560700] find_parent_nodes+0x388/0x1120 [btrfs] [16804.561310] btrfs_check_shared+0x115/0x1c0 [btrfs] [16804.561916] ? extent_fiemap+0x59d/0x6d0 [btrfs] [16804.562518] extent_fiemap+0x59d/0x6d0 [btrfs] [16804.563112] ? __might_fault+0x11/0x90 [16804.563706] do_vfs_ioctl+0x45a/0x700 [16804.564299] ksys_ioctl+0x70/0x80 [16804.564885] ? trace_hardirqs_off_thunk+0x1a/0x20 [16804.565461] __x64_sys_ioctl+0x16/0x20 [16804.566020] do_syscall_64+0x5c/0x250 [16804.566580] entry_SYSCALL_64_after_hwframe+0x49/0xbe [16804.567153] RIP: 0033:0x7f4b1ba2add7 (...) [16804.568907] RSP: 002b:00007f4b197adc88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [16804.569513] RAX: ffffffffffffffda RBX: 00007f4b100210d8 RCX: 00007f4b1ba2add7 [16804.570133] RDX: 00007f4b100210d8 RSI: 00000000c020660b RDI: 0000000000000003 [16804.570726] RBP: 000055de05a6cfe0 R08: 0000000000000000 R09: 00007f4b197add44 [16804.571314] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4b197add48 [16804.571905] R13: 00007f4b197add40 R14: 00007f4b100210d0 R15: 00007f4b197add50 (...) [16804.575623] ---[ end trace 87317359aad4ba50 ]--- Fix this by making btrfs_put_tree_mod_seq() skip deletion of elements that have a sequence number equals to the computed minimum sequence number, and not just elements with a sequence number greater then that minimum. Fixes: bd989ba359f2ac ("Btrfs: add tree modification log functions") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d2263caff307..740ef428acdd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -427,7 +427,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); tm = rb_entry(node, struct tree_mod_elem, node); - if (tm->seq > min_seq) + if (tm->seq >= min_seq) continue; rb_erase(node, tm_root); kfree(tm); From f79c61a90d46c026b83ec0c9ae7ef71796ccdb42 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Dec 2019 16:57:42 +0100 Subject: [PATCH 1770/3715] ALSA: pcm: Avoid possible info leaks from PCM stream buffers commit add9d56d7b3781532208afbff5509d7382fb6efe upstream. The current PCM code doesn't initialize explicitly the buffers allocated for PCM streams, hence it might leak some uninitialized kernel data or previous stream contents by mmapping or reading the buffer before actually starting the stream. Since this is a common problem, this patch simply adds the clearance of the buffer data at hw_params callback. Although this does only zero-clear no matter which format is used, which doesn't mean the silence for some formats, but it should be OK because the intention is just to clear the previous data on the buffer. Reported-by: Lionel Koenig Cc: Link: https://lore.kernel.org/r/20191211155742.3213-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 182e4afd21eb..14b1ee29509d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -720,6 +720,10 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size) runtime->boundary *= 2; + /* clear the buffer for avoiding possible kernel info leaks */ + if (runtime->dma_area && !substream->ops->copy_user) + memset(runtime->dma_area, 0, runtime->dma_bytes); + snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); From c06539b5d420b4adc2ec9538fc59c25f5339f68e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 13 Dec 2019 09:51:09 +0100 Subject: [PATCH 1771/3715] ALSA: hda/ca0132 - Keep power on during processing DSP response commit 377bc0cfabce0244632dada19060839ced4e6949 upstream. We need to keep power on while processing the DSP response via unsol event. Each snd_hda_codec_read() call does the power management, so it should work normally, but still it's safer to keep the power up for the whole function. Fixes: a73d511c4867 ("ALSA: hda/ca0132: Add unsol handler for DSP and jack detection") Cc: Link: https://lore.kernel.org/r/20191213085111.22855-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 9876d8dc2ede..e0610982a170 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4424,12 +4424,14 @@ static void ca0132_process_dsp_response(struct hda_codec *codec, struct ca0132_spec *spec = codec->spec; codec_dbg(codec, "ca0132_process_dsp_response\n"); + snd_hda_power_up_pm(codec); if (spec->wait_scp) { if (dspio_get_response_data(codec) >= 0) spec->wait_scp = 0; } dspio_clear_response_queue(codec); + snd_hda_power_down_pm(codec); } static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) From ea929ea14a07ae4f8d3befa1a6e4039aa3dcd89e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 13 Dec 2019 09:51:10 +0100 Subject: [PATCH 1772/3715] ALSA: hda/ca0132 - Avoid endless loop commit cb04fc3b6b076f67d228a0b7d096c69ad486c09c upstream. Introduce a timeout to dspio_clear_response_queue() so that it won't be caught in an endless loop even if the hardware doesn't respond properly. Fixes: a73d511c4867 ("ALSA: hda/ca0132: Add unsol handler for DSP and jack detection") Cc: Link: https://lore.kernel.org/r/20191213085111.22855-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index e0610982a170..92f5f452bee2 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1300,13 +1300,14 @@ struct scp_msg { static void dspio_clear_response_queue(struct hda_codec *codec) { + unsigned long timeout = jiffies + msecs_to_jiffies(1000); unsigned int dummy = 0; - int status = -1; + int status; /* clear all from the response queue */ do { status = dspio_read(codec, &dummy); - } while (status == 0); + } while (status == 0 && time_before(jiffies, timeout)); } static int dspio_get_response_data(struct hda_codec *codec) From 80eb7d92e253a83f090dd927b3d653de0ce7ceb6 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Thu, 29 Aug 2019 12:52:19 -0400 Subject: [PATCH 1773/3715] drm: mst: Fix query_payload ack reply struct [ Upstream commit 268de6530aa18fe5773062367fd119f0045f6e88 ] Spec says[1] Allocated_PBN is 16 bits [1]- DisplayPort 1.2 Spec, Section 2.11.9.8, Table 2-98 Fixes: ad7f8a1f9ced ("drm/helper: add Displayport multi-stream helper (v0.6)") Cc: Lyude Paul Cc: Todd Previte Cc: Dave Airlie Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190829165223.129662-1-sean@poorly.run Signed-off-by: Sasha Levin --- include/drm/drm_dp_mst_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index d55abb75f29a..eec6cba204ea 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -313,7 +313,7 @@ struct drm_dp_resource_status_notify { struct drm_dp_query_payload_ack_reply { u8 port_number; - u8 allocated_pbn; + u16 allocated_pbn; }; struct drm_dp_sideband_msg_req_body { From c81f7b7c9ef00277c8cc6e364e590eab8a337e86 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Wed, 14 Aug 2019 20:48:46 -0400 Subject: [PATCH 1774/3715] drm/bridge: analogix-anx78xx: silence -EPROBE_DEFER warnings [ Upstream commit 2708e876272d89bbbff811d12834adbeef85f022 ] Silence two warning messages that occur due to -EPROBE_DEFER errors to help cleanup the system boot log. Signed-off-by: Brian Masney Reviewed-by: Linus Walleij Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190815004854.19860-4-masneyb@onstation.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/analogix-anx78xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix-anx78xx.c index 9385eb0b1ee4..cd2bfd7bf048 100644 --- a/drivers/gpu/drm/bridge/analogix-anx78xx.c +++ b/drivers/gpu/drm/bridge/analogix-anx78xx.c @@ -725,7 +725,9 @@ static int anx78xx_init_pdata(struct anx78xx *anx78xx) /* 1.0V digital core power regulator */ pdata->dvdd10 = devm_regulator_get(dev, "dvdd10"); if (IS_ERR(pdata->dvdd10)) { - DRM_ERROR("DVDD10 regulator not found\n"); + if (PTR_ERR(pdata->dvdd10) != -EPROBE_DEFER) + DRM_ERROR("DVDD10 regulator not found\n"); + return PTR_ERR(pdata->dvdd10); } @@ -1344,7 +1346,9 @@ static int anx78xx_i2c_probe(struct i2c_client *client, err = anx78xx_init_pdata(anx78xx); if (err) { - DRM_ERROR("Failed to initialize pdata: %d\n", err); + if (err != -EPROBE_DEFER) + DRM_ERROR("Failed to initialize pdata: %d\n", err); + return err; } From 0ea124722c685ee64e1d3d77cfe6790679ab1cba Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczynski Date: Fri, 13 Sep 2019 22:24:13 +0200 Subject: [PATCH 1775/3715] iio: light: bh1750: Resolve compiler warning and make code more readable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f552fde983d378e7339f9ea74a25f918563bf0d3 ] Separate the declaration of struct bh1750_chip_info from definition of bh1750_chip_info_tbl[] in a single statement as it makes the code hard to read, and with the extra newline it makes it look as if the bh1750_chip_info_tbl[] had no explicit type. This change also resolves the following compiler warning about the unusual position of the static keyword that can be seen when building with warnings enabled (W=1): drivers/iio/light/bh1750.c:64:1: warning: ‘static’ is not at beginning of declaration [-Wold-style-declaration] Related to commit 3a11fbb037a1 ("iio: light: add support for ROHM BH1710/BH1715/BH1721/BH1750/BH1751 ambient light sensors"). Signed-off-by: Krzysztof Wilczynski Acked-by: Uwe Kleine-König Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/light/bh1750.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/bh1750.c b/drivers/iio/light/bh1750.c index 6c61187e630f..0b7ba02c8d16 100644 --- a/drivers/iio/light/bh1750.c +++ b/drivers/iio/light/bh1750.c @@ -62,9 +62,9 @@ struct bh1750_chip_info { u16 int_time_low_mask; u16 int_time_high_mask; -} +}; -static const bh1750_chip_info_tbl[] = { +static const struct bh1750_chip_info bh1750_chip_info_tbl[] = { [BH1710] = { 140, 1022, 300, 400, 250000000, 2, 0x001F, 0x03E0 }, [BH1721] = { 140, 1020, 300, 400, 250000000, 2, 0x0010, 0x03E0 }, [BH1750] = { 31, 254, 69, 1740, 57500000, 1, 0x001F, 0x00E0 }, From af64290527d1148a99b59a59a3b1ae1b63a563ee Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 25 Sep 2019 11:11:42 +0200 Subject: [PATCH 1776/3715] spi: Add call to spi_slave_abort() function when spidev driver is released [ Upstream commit 9f918a728cf86b2757b6a7025e1f46824bfe3155 ] This change is necessary for spidev devices (e.g. /dev/spidev3.0) working in the slave mode (like NXP's dspi driver for Vybrid SoC). When SPI HW works in this mode - the master is responsible for providing CS and CLK signals. However, when some fault happens - like for example distortion on SPI lines - the SPI Linux driver needs a chance to recover from this abnormal situation and prepare itself for next (correct) transmission. This change doesn't pose any threat on drivers working in master mode as spi_slave_abort() function checks if SPI slave mode is supported. Signed-off-by: Lukasz Majewski Link: https://lore.kernel.org/r/20190924110547.14770-2-lukma@denx.de Signed-off-by: Mark Brown Reported-by: kbuild test robot Link: https://lore.kernel.org/r/20190925091143.15468-2-lukma@denx.de Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spidev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index c5fe08bc34a0..028725573e63 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -634,6 +634,9 @@ static int spidev_release(struct inode *inode, struct file *filp) if (dofree) kfree(spidev); } +#ifdef CONFIG_SPI_SLAVE + spi_slave_abort(spidev->spi); +#endif mutex_unlock(&device_list_lock); return 0; From 21c84659c60223461d51942498087c7a8cf8e3dc Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 21:51:33 -0500 Subject: [PATCH 1777/3715] staging: rtl8192u: fix multiple memory leaks on error path [ Upstream commit ca312438cf176a16d4b89350cade8789ba8d7133 ] In rtl8192_tx on error handling path allocated urbs and also skb should be released. Signed-off-by: Navid Emamdoost Link: https://lore.kernel.org/r/20190920025137.29407-1-navid.emamdoost@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8192u/r8192U_core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index db3eb7ec5809..fbbd1b59dc11 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -1506,7 +1506,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) (tx_fwinfo_819x_usb *)(skb->data + USB_HWDESC_HEADER_LEN); struct usb_device *udev = priv->udev; int pend; - int status; + int status, rt = -1; struct urb *tx_urb = NULL, *tx_urb_zero = NULL; unsigned int idx_pipe; @@ -1650,8 +1650,10 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) } if (bSend0Byte) { tx_urb_zero = usb_alloc_urb(0, GFP_ATOMIC); - if (!tx_urb_zero) - return -ENOMEM; + if (!tx_urb_zero) { + rt = -ENOMEM; + goto error; + } usb_fill_bulk_urb(tx_urb_zero, udev, usb_sndbulkpipe(udev, idx_pipe), &zero, 0, tx_zero_isr, dev); @@ -1661,7 +1663,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) "Error TX URB for zero byte %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status); - return -1; + goto error; } } netif_trans_update(dev); @@ -1672,7 +1674,12 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) RT_TRACE(COMP_ERR, "Error TX URB %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status); - return -1; + +error: + dev_kfree_skb_any(skb); + usb_free_urb(tx_urb); + usb_free_urb(tx_urb_zero); + return rt; } static short rtl8192_usb_initendpoints(struct net_device *dev) From 8ac64e1a9bc6b281c115853b49e6ccbad35b264a Mon Sep 17 00:00:00 2001 From: Connor Kuehl Date: Thu, 26 Sep 2019 08:03:17 -0700 Subject: [PATCH 1778/3715] staging: rtl8188eu: fix possible null dereference [ Upstream commit 228241944a48113470d3c3b46c88ba7fbe0a274b ] Inside a nested 'else' block at the beginning of this function is a call that assigns 'psta' to the return value of 'rtw_get_stainfo()'. If 'rtw_get_stainfo()' returns NULL and the flow of control reaches the 'else if' where 'psta' is dereferenced, then we will dereference a NULL pointer. Fix this by checking if 'psta' is not NULL before reading its 'psta->qos_option' data member. Addresses-Coverity: ("Dereference null return value") Signed-off-by: Connor Kuehl Acked-by: Larry Finger Link: https://lore.kernel.org/r/20190926150317.5894-1-connor.kuehl@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8188eu/core/rtw_xmit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_xmit.c b/drivers/staging/rtl8188eu/core/rtw_xmit.c index 904b988ecc4e..7c895af1ba31 100644 --- a/drivers/staging/rtl8188eu/core/rtw_xmit.c +++ b/drivers/staging/rtl8188eu/core/rtw_xmit.c @@ -805,7 +805,7 @@ s32 rtw_make_wlanhdr(struct adapter *padapter, u8 *hdr, struct pkt_attrib *pattr memcpy(pwlanhdr->addr2, get_bssid(pmlmepriv), ETH_ALEN); memcpy(pwlanhdr->addr3, pattrib->src, ETH_ALEN); - if (psta->qos_option) + if (psta && psta->qos_option) qos_option = true; } else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)) { @@ -813,7 +813,7 @@ s32 rtw_make_wlanhdr(struct adapter *padapter, u8 *hdr, struct pkt_attrib *pattr memcpy(pwlanhdr->addr2, pattrib->src, ETH_ALEN); memcpy(pwlanhdr->addr3, get_bssid(pmlmepriv), ETH_ALEN); - if (psta->qos_option) + if (psta && psta->qos_option) qos_option = true; } else { RT_TRACE(_module_rtl871x_xmit_c_, _drv_err_, ("fw_state:%x is not allowed to xmit frame\n", get_fwstate(pmlmepriv))); From 9c84ba30947a9d8ac2c67abaafc17087ead04426 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 20:20:21 -0500 Subject: [PATCH 1779/3715] rtlwifi: prevent memory leak in rtl_usb_probe [ Upstream commit 3f93616951138a598d930dcaec40f2bfd9ce43bb ] In rtl_usb_probe if allocation for usb_data fails the allocated hw should be released. In addition the allocated rtlpriv->usb_data should be released on error handling path. Signed-off-by: Navid Emamdoost Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/usb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 2401c8bdb211..93eda23f0123 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1068,8 +1068,10 @@ int rtl_usb_probe(struct usb_interface *intf, rtlpriv->hw = hw; rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32), GFP_KERNEL); - if (!rtlpriv->usb_data) + if (!rtlpriv->usb_data) { + ieee80211_free_hw(hw); return -ENOMEM; + } /* this spin lock must be initialized early */ spin_lock_init(&rtlpriv->locks.usb_lock); @@ -1130,6 +1132,7 @@ error_out2: _rtl_usb_io_handler_release(hw); usb_put_dev(udev); complete(&rtlpriv->firmware_loading_complete); + kfree(rtlpriv->usb_data); return -ENODEV; } EXPORT_SYMBOL(rtl_usb_probe); From b88d9f8b8ba5722ab4aef7d01c2a5a66b5414b83 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 18 Sep 2019 22:05:00 +0530 Subject: [PATCH 1780/3715] libertas: fix a potential NULL pointer dereference [ Upstream commit 7da413a18583baaf35dd4a8eb414fa410367d7f2 ] alloc_workqueue is not checked for errors and as a result, a potential NULL dereference could occur. Signed-off-by: Allen Pais Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/if_sdio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 39bf85d0ade0..c7f8a29d2606 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1183,6 +1183,10 @@ static int if_sdio_probe(struct sdio_func *func, spin_lock_init(&card->lock); card->workqueue = alloc_workqueue("libertas_sdio", WQ_MEM_RECLAIM, 0); + if (unlikely(!card->workqueue)) { + ret = -ENOMEM; + goto err_queue; + } INIT_WORK(&card->packet_worker, if_sdio_host_to_card_worker); init_waitqueue_head(&card->pwron_waitq); @@ -1234,6 +1238,7 @@ err_activate_card: lbs_remove_card(priv); free: destroy_workqueue(card->workqueue); +err_queue: while (card->packets) { packet = card->packets; card->packets = card->packets->next; From 7642460c2780aab4e66852576d1de5484de8da63 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 25 Sep 2019 00:03:47 +0300 Subject: [PATCH 1781/3715] IB/iser: bound protection_sg size by data_sg size [ Upstream commit 7718cf03c3ce4b6ebd90107643ccd01c952a1fce ] In case we don't set the sg_prot_tablesize, the scsi layer assign the default size (65535 entries). We should limit this size since we should take into consideration the underlaying device capability. This cap is considered when calculating the sg_tablesize. Otherwise, for example, we can get that /sys/block/sdb/queue/max_segments is 128 and /sys/block/sdb/queue/max_integrity_segments is 65535. Link: https://lore.kernel.org/r/1569359027-10987-1-git-send-email-maxg@mellanox.com Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 19624e023ebd..b5a789567b4e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -648,6 +648,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); From a385d400aa50933c3e498e59cbbb33dab6e1bf6b Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Fri, 20 Sep 2019 14:05:48 -0300 Subject: [PATCH 1782/3715] media: am437x-vpfe: Setting STD to current value is not an error [ Upstream commit 13aa21cfe92ce9ebb51824029d89f19c33f81419 ] VIDIOC_S_STD should not return an error if the value is identical to the current one. This error was highlighted by the v4l2-compliance test. Signed-off-by: Benoit Parrot Acked-by: Lad Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/am437x/am437x-vpfe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index dfcc484cab89..e92c5b56be42 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1848,6 +1848,10 @@ static int vpfe_s_std(struct file *file, void *priv, v4l2_std_id std_id) if (!(sdinfo->inputs[0].capabilities & V4L2_IN_CAP_STD)) return -ENODATA; + /* if trying to set the same std then nothing to do */ + if (vpfe_standards[vpfe->std_index].std_id == std_id) + return 0; + /* If streaming is started, return error */ if (vb2_is_busy(&vpfe->buffer_queue)) { vpfe_err(vpfe, "%s device busy\n", __func__); From 267c32049704190c31f2e03ad41d7c4c48908a5e Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 30 Sep 2019 10:06:40 -0300 Subject: [PATCH 1783/3715] media: i2c: ov2659: fix s_stream return value [ Upstream commit 85c4043f1d403c222d481dfc91846227d66663fb ] In ov2659_s_stream() return value for invoked function should be checked and propagated. Signed-off-by: Benoit Parrot Acked-by: Lad, Prabhakar Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov2659.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index ce23f436e130..04d493aea45e 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -1203,11 +1203,15 @@ static int ov2659_s_stream(struct v4l2_subdev *sd, int on) goto unlock; } - ov2659_set_pixel_clock(ov2659); - ov2659_set_frame_size(ov2659); - ov2659_set_format(ov2659); - ov2659_set_streaming(ov2659, 1); - ov2659->streaming = on; + ret = ov2659_set_pixel_clock(ov2659); + if (!ret) + ret = ov2659_set_frame_size(ov2659); + if (!ret) + ret = ov2659_set_format(ov2659); + if (!ret) { + ov2659_set_streaming(ov2659, 1); + ov2659->streaming = on; + } unlock: mutex_unlock(&ov2659->lock); From 4580d7bfecd2e176decabd3013a21ae6f4ed6726 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:38 -0300 Subject: [PATCH 1784/3715] media: ov6650: Fix crop rectangle alignment not passed back [ Upstream commit 7b188d6ba27a131e7934a51a14ece331c0491f18 ] Commit 4f996594ceaf ("[media] v4l2: make vidioc_s_crop const") introduced a writable copy of constified user requested crop rectangle in order to be able to perform hardware alignments on it. Later on, commit 10d5509c8d50 ("[media] v4l2: remove g/s_crop from video ops") replaced s_crop() video operation using that const argument with set_selection() pad operation which had a corresponding argument not constified, however the original behavior of the driver was not restored. Since that time, any hardware alignment applied on a user requested crop rectangle is not passed back to the user calling .set_selection() as it should be. Fix the issue by dropping the copy and replacing all references to it with references to the crop rectangle embedded in the user argument. Fixes: 10d5509c8d50 ("[media] v4l2: remove g/s_crop from video ops") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov6650.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 025869eec2ac..98080d420444 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -469,38 +469,37 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); - struct v4l2_rect rect = sel->r; int ret; if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE || sel->target != V4L2_SEL_TGT_CROP) return -EINVAL; - v4l_bound_align_image(&rect.width, 2, W_CIF, 1, - &rect.height, 2, H_CIF, 1, 0); - v4l_bound_align_image(&rect.left, DEF_HSTRT << 1, - (DEF_HSTRT << 1) + W_CIF - (__s32)rect.width, 1, - &rect.top, DEF_VSTRT << 1, - (DEF_VSTRT << 1) + H_CIF - (__s32)rect.height, 1, - 0); + v4l_bound_align_image(&sel->r.width, 2, W_CIF, 1, + &sel->r.height, 2, H_CIF, 1, 0); + v4l_bound_align_image(&sel->r.left, DEF_HSTRT << 1, + (DEF_HSTRT << 1) + W_CIF - (__s32)sel->r.width, 1, + &sel->r.top, DEF_VSTRT << 1, + (DEF_VSTRT << 1) + H_CIF - (__s32)sel->r.height, + 1, 0); - ret = ov6650_reg_write(client, REG_HSTRT, rect.left >> 1); + ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); if (!ret) { - priv->rect.left = rect.left; + priv->rect.left = sel->r.left; ret = ov6650_reg_write(client, REG_HSTOP, - (rect.left + rect.width) >> 1); + (sel->r.left + sel->r.width) >> 1); } if (!ret) { - priv->rect.width = rect.width; - ret = ov6650_reg_write(client, REG_VSTRT, rect.top >> 1); + priv->rect.width = sel->r.width; + ret = ov6650_reg_write(client, REG_VSTRT, sel->r.top >> 1); } if (!ret) { - priv->rect.top = rect.top; + priv->rect.top = sel->r.top; ret = ov6650_reg_write(client, REG_VSTOP, - (rect.top + rect.height) >> 1); + (sel->r.top + sel->r.height) >> 1); } if (!ret) - priv->rect.height = rect.height; + priv->rect.height = sel->r.height; return ret; } From 3ff18daf0602cf0903612aad01f0ee9b00151c30 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 30 Sep 2019 10:06:43 -0300 Subject: [PATCH 1785/3715] media: i2c: ov2659: Fix missing 720p register config [ Upstream commit 9d669fbfca20e6035ead814e55d9ef1a6b500540 ] The initial registers sequence is only loaded at probe time. Afterward only the resolution and format specific register are modified. Care must be taken to make sure registers modified by one resolution setting are reverted back when another resolution is programmed. This was not done properly for the 720p case. Signed-off-by: Benoit Parrot Acked-by: Lad, Prabhakar Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov2659.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index 04d493aea45e..44b0584eb8a6 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -419,10 +419,14 @@ static struct sensor_register ov2659_720p[] = { { REG_TIMING_YINC, 0x11 }, { REG_TIMING_VERT_FORMAT, 0x80 }, { REG_TIMING_HORIZ_FORMAT, 0x00 }, + { 0x370a, 0x12 }, { 0x3a03, 0xe8 }, { 0x3a09, 0x6f }, { 0x3a0b, 0x5d }, { 0x3a15, 0x9a }, + { REG_VFIFO_READ_START_H, 0x00 }, + { REG_VFIFO_READ_START_L, 0x80 }, + { REG_ISP_CTRL02, 0x00 }, { REG_NULL, 0x00 }, }; From d882197149063af8cb01a55a806b32c538529cdb Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:43 -0300 Subject: [PATCH 1786/3715] media: ov6650: Fix stored frame format not in sync with hardware [ Upstream commit 3143b459de4cdcce67b36827476c966e93c1cf01 ] The driver stores frame format settings supposed to be in line with hardware state in a device private structure. Since the driver initial submission, those settings are updated before they are actually applied on hardware. If an error occurs on device update, the stored settings my not reflect hardware state anymore and consecutive calls to .get_fmt() may return incorrect information. That in turn may affect ability of a bridge device to use correct DMA transfer settings if such incorrect informmation on active frame format returned by .get_fmt() is used. Assuming a failed device update means its state hasn't changed, update frame format related settings stored in the device private structure only after they are successfully applied so the stored values always reflect hardware state as closely as possible. Fixes: 2f6e2404799a ("[media] SoC Camera: add driver for OV6650 sensor") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov6650.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 98080d420444..81fa21ed2599 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -613,7 +613,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) dev_err(&client->dev, "Pixel format not handled: 0x%x\n", code); return -EINVAL; } - priv->code = code; if (code == MEDIA_BUS_FMT_Y8_1X8 || code == MEDIA_BUS_FMT_SBGGR8_1X8) { @@ -639,7 +638,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) dev_dbg(&client->dev, "max resolution: CIF\n"); coma_mask |= COMA_QCIF; } - priv->half_scale = half_scale; clkrc = CLKRC_12MHz; mclk = 12000000; @@ -657,8 +655,13 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); if (!ret) ret = ov6650_reg_write(client, REG_CLKRC, clkrc); - if (!ret) + if (!ret) { + priv->half_scale = half_scale; + ret = ov6650_reg_rmw(client, REG_COML, coml_set, coml_mask); + } + if (!ret) + priv->code = code; if (!ret) { mf->colorspace = priv->colorspace; From 319f54451969667fa942f701270b68749f6bcbc9 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:44 -0300 Subject: [PATCH 1787/3715] media: ov6650: Fix stored crop rectangle not in sync with hardware [ Upstream commit 1463b371aff0682c70141f7521db13cc4bbf3016 ] The driver stores crop rectangle settings supposed to be in line with hardware state in a device private structure. Since the driver initial submission, crop rectangle width and height settings are not updated correctly when rectangle offset settings are applied on hardware. If an error occurs while the device is updated, the stored settings my no longer reflect hardware state and consecutive calls to .get_selection() as well as .get/set_fmt() may return incorrect information. That in turn may affect ability of a bridge device to use correct DMA transfer settings if such incorrect informamtion on active frame format returned by .get/set_fmt() is used. Assuming a failed update of the device means its actual settings haven't changed, update crop rectangle width and height settings stored in the device private structure correctly while the rectangle offset is successfully applied on hardware so the stored values always reflect actual hardware state to the extent possible. Fixes: 2f6e2404799a ("[media] SoC Camera: add driver for OV6650 sensor") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov6650.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 81fa21ed2599..348296be4925 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -485,6 +485,7 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); if (!ret) { + priv->rect.width += priv->rect.left - sel->r.left; priv->rect.left = sel->r.left; ret = ov6650_reg_write(client, REG_HSTOP, (sel->r.left + sel->r.width) >> 1); @@ -494,6 +495,7 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, ret = ov6650_reg_write(client, REG_VSTRT, sel->r.top >> 1); } if (!ret) { + priv->rect.height += priv->rect.top - sel->r.top; priv->rect.top = sel->r.top; ret = ov6650_reg_write(client, REG_VSTOP, (sel->r.top + sel->r.height) >> 1); From 5790fca95d46eaa94a3ddb6cd163509a8b73741b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 27 Sep 2019 09:26:42 -0700 Subject: [PATCH 1788/3715] tools/power/cpupower: Fix initializer override in hsw_ext_cstates [ Upstream commit 7e5705c635ecfccde559ebbbe1eaf05b5cc60529 ] When building cpupower with clang, the following warning appears: utils/idle_monitor/hsw_ext_idle.c:42:16: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] .desc = N_("Processor Package C2"), ^~~~~~~~~~~~~~~~~~~~~~ ./utils/helpers/helpers.h:25:33: note: expanded from macro 'N_' #define N_(String) gettext_noop(String) ^~~~~~ ./utils/helpers/helpers.h:23:30: note: expanded from macro 'gettext_noop' #define gettext_noop(String) String ^~~~~~ utils/idle_monitor/hsw_ext_idle.c:41:16: note: previous initialization is here .desc = N_("Processor Package C9"), ^~~~~~~~~~~~~~~~~~~~~~ ./utils/helpers/helpers.h:25:33: note: expanded from macro 'N_' #define N_(String) gettext_noop(String) ^~~~~~ ./utils/helpers/helpers.h:23:30: note: expanded from macro 'gettext_noop' #define gettext_noop(String) String ^~~~~~ 1 warning generated. This appears to be a copy and paste or merge mistake because the name and id fields both have PC9 in them, not PC2. Remove the second assignment to fix the warning. Fixes: 7ee767b69b68 ("cpupower: Add Haswell family 0x45 specific idle monitor to show PC8,9,10 states") Link: https://github.com/ClangBuiltLinux/linux/issues/718 Signed-off-by: Nathan Chancellor Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index f794d6bbb7e9..3e4ff4a1cdf4 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -40,7 +40,6 @@ static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = { { .name = "PC9", .desc = N_("Processor Package C9"), - .desc = N_("Processor Package C2"), .id = PC9, .range = RANGE_PACKAGE, .get_count_percent = hsw_ext_get_count_percent, From 03b7865f4cba3e1523a61192fea0d9dbe27bbdfb Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 11 Sep 2019 11:45:59 -0300 Subject: [PATCH 1789/3715] media: venus: core: Fix msm8996 frequency table [ Upstream commit c690435ed07901737e5c007a65ec59f53b33eb71 ] In downstream driver, there are two frequency tables defined, one for the encoder and one for the decoder: /* Encoders / <972000 490000000 0x55555555>, / 4k UHD @ 30 / <489600 320000000 0x55555555>, / 1080p @ 60 / <244800 150000000 0x55555555>, / 1080p @ 30 / <108000 75000000 0x55555555>, / 720p @ 30 */ /* Decoders / <1944000 490000000 0xffffffff>, / 4k UHD @ 60 / < 972000 320000000 0xffffffff>, / 4k UHD @ 30 / < 489600 150000000 0xffffffff>, / 1080p @ 60 / < 244800 75000000 0xffffffff>; / 1080p @ 30 */ It shows that encoder always needs a higher clock than decoder. In current venus driver, the unified frequency table is aligned with the downstream decoder table which causes performance issues in encoding scenarios. Fix that by aligning frequency table on worst case (encoding). Signed-off-by: Loic Poulain Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/qcom/venus/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 769e9e68562d..9360b36b82cd 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -345,10 +345,11 @@ static const struct venus_resources msm8916_res = { }; static const struct freq_tbl msm8996_freq_table[] = { - { 1944000, 490000000 }, /* 4k UHD @ 60 */ - { 972000, 320000000 }, /* 4k UHD @ 30 */ - { 489600, 150000000 }, /* 1080p @ 60 */ - { 244800, 75000000 }, /* 1080p @ 30 */ + { 1944000, 520000000 }, /* 4k UHD @ 60 (decode only) */ + { 972000, 520000000 }, /* 4k UHD @ 30 */ + { 489600, 346666667 }, /* 1080p @ 60 */ + { 244800, 150000000 }, /* 1080p @ 30 */ + { 108000, 75000000 }, /* 720p @ 30 */ }; static const struct reg_val msm8996_reg_preset[] = { From f29ead458d231b59d44c41310c7f76bb5023c915 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 17 Oct 2017 17:03:12 -0700 Subject: [PATCH 1790/3715] ath10k: fix offchannel tx failure when no ath10k_mac_tx_frm_has_freq [ Upstream commit cc6df017e55764ffef9819dd9554053182535ffd ] Offchannel management frames were failing: [18099.253732] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3780 [18102.293686] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3780 [18105.333653] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3780 [18108.373712] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3780 [18111.413687] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e36c0 [18114.453726] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3f00 [18117.493773] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e36c0 [18120.533631] ath10k_pci 0000:01:00.0: timed out waiting for offchannel skb cf0e3f00 This bug appears to have been added between 4.0 (which works for us), and 4.4, which does not work. I think this is because the tx-offchannel logic gets in a loop when ath10k_mac_tx_frm_has_freq(ar) is false, so pkt is never actually sent to the firmware for transmit. This patch fixes the problem on 4.9 for me, and now HS20 clients can work again with my firmware. Antonio: tested with 10.4-3.5.3-00057 on QCA4019 and QCA9888 Signed-off-by: Ben Greear Tested-by: Antonio Quartulli [kvalo@codeaurora.org: improve commit log, remove unneeded parenthesis] Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/mac.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index dff34448588f..ea47ad4b2343 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3627,7 +3627,7 @@ static int ath10k_mac_tx(struct ath10k *ar, struct ieee80211_vif *vif, enum ath10k_hw_txrx_mode txmode, enum ath10k_mac_tx_path txpath, - struct sk_buff *skb) + struct sk_buff *skb, bool noque_offchan) { struct ieee80211_hw *hw = ar->hw; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -3655,10 +3655,10 @@ static int ath10k_mac_tx(struct ath10k *ar, } } - if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { + if (!noque_offchan && info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n", - skb); + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %pK len %d\n", + skb, skb->len); skb_queue_tail(&ar->offchan_tx_queue, skb); ieee80211_queue_work(hw, &ar->offchan_tx_work); @@ -3720,8 +3720,8 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n", - skb); + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK len %d\n", + skb, skb->len); hdr = (struct ieee80211_hdr *)skb->data; peer_addr = ieee80211_get_DA(hdr); @@ -3767,7 +3767,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, true); if (ret) { ath10k_warn(ar, "failed to transmit offchannel frame: %d\n", ret); @@ -3777,8 +3777,8 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n", - skb); + ath10k_warn(ar, "timed out waiting for offchannel skb %pK, len: %d\n", + skb, skb->len); if (!peer && tmp_peer_created) { ret = ath10k_peer_delete(ar, vdev_id, peer_addr); @@ -3957,7 +3957,7 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, spin_unlock_bh(&ar->htt.tx_lock); } - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (unlikely(ret)) { ath10k_warn(ar, "failed to push frame: %d\n", ret); @@ -4239,7 +4239,7 @@ static void ath10k_mac_op_tx(struct ieee80211_hw *hw, spin_unlock_bh(&ar->htt.tx_lock); } - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (ret) { ath10k_warn(ar, "failed to transmit frame: %d\n", ret); if (is_htt) { From 679c4f27b8958b65bb51d1c3dfdbf3befe4a33a3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Oct 2019 13:42:06 +0100 Subject: [PATCH 1791/3715] pinctrl: devicetree: Avoid taking direct reference to device name string [ Upstream commit be4c60b563edee3712d392aaeb0943a768df7023 ] When populating the pinctrl mapping table entries for a device, the 'dev_name' field for each entry is initialised to point directly at the string returned by 'dev_name()' for the device and subsequently used by 'create_pinctrl()' when looking up the mappings for the device being probed. This is unreliable in the presence of calls to 'dev_set_name()', which may reallocate the device name string leaving the pinctrl mappings with a dangling reference. This then leads to a use-after-free every time the name is dereferenced by a device probe: | BUG: KASAN: invalid-access in strcmp+0x20/0x64 | Read of size 1 at addr 13ffffc153494b00 by task modprobe/590 | Pointer tag: [13], memory tag: [fe] | | Call trace: | __kasan_report+0x16c/0x1dc | kasan_report+0x10/0x18 | check_memory_region | __hwasan_load1_noabort+0x4c/0x54 | strcmp+0x20/0x64 | create_pinctrl+0x18c/0x7f4 | pinctrl_get+0x90/0x114 | devm_pinctrl_get+0x44/0x98 | pinctrl_bind_pins+0x5c/0x450 | really_probe+0x1c8/0x9a4 | driver_probe_device+0x120/0x1d8 Follow the example of sysfs, and duplicate the device name string before stashing it away in the pinctrl mapping entries. Cc: Linus Walleij Reported-by: Elena Petrova Tested-by: Elena Petrova Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20191002124206.22928-1-will@kernel.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/devicetree.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index c4aa411f5935..3a7c2d6e4d5f 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -40,6 +40,13 @@ struct pinctrl_dt_map { static void dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { + int i; + + for (i = 0; i < num_maps; ++i) { + kfree_const(map[i].dev_name); + map[i].dev_name = NULL; + } + if (pctldev) { const struct pinctrl_ops *ops = pctldev->desc->pctlops; if (ops->dt_free_map) @@ -74,7 +81,13 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Initialize common mapping table entry fields */ for (i = 0; i < num_maps; i++) { - map[i].dev_name = dev_name(p->dev); + const char *devname; + + devname = kstrdup_const(dev_name(p->dev), GFP_KERNEL); + if (!devname) + goto err_free_map; + + map[i].dev_name = devname; map[i].name = statename; if (pctldev) map[i].ctrl_dev_name = dev_name(pctldev->dev); @@ -82,10 +95,8 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Remember the converted mapping table entries */ dt_map = kzalloc(sizeof(*dt_map), GFP_KERNEL); - if (!dt_map) { - dt_free_map(pctldev, map, num_maps); - return -ENOMEM; - } + if (!dt_map) + goto err_free_map; dt_map->pctldev = pctldev; dt_map->map = map; @@ -93,6 +104,10 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, list_add_tail(&dt_map->node, &p->dt_maps); return pinctrl_register_map(map, num_maps, false); + +err_free_map: + dt_free_map(pctldev, map, num_maps); + return -ENOMEM; } struct pinctrl_dev *of_pinctrl_get(struct device_node *np) From 752117fccf0b886a992910b41acf7219b11a96c1 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 2 Oct 2019 15:04:04 +0300 Subject: [PATCH 1792/3715] selftests/bpf: Correct path to include msg + path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c588146378962786ddeec817f7736a53298a7b01 ] The "path" buf is supposed to contain path + printf msg up to 24 bytes. It will be cut anyway, but compiler generates truncation warns like: " samples/bpf/../../tools/testing/selftests/bpf/cgroup_helpers.c: In function ‘setup_cgroup_environment’: samples/bpf/../../tools/testing/selftests/bpf/cgroup_helpers.c:52:34: warning: ‘/cgroup.controllers’ directive output may be truncated writing 19 bytes into a region of size between 1 and 4097 [-Wformat-truncation=] snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); ^~~~~~~~~~~~~~~~~~~ samples/bpf/../../tools/testing/selftests/bpf/cgroup_helpers.c:52:2: note: ‘snprintf’ output between 20 and 4116 bytes into a destination of size 4097 snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ samples/bpf/../../tools/testing/selftests/bpf/cgroup_helpers.c:72:34: warning: ‘/cgroup.subtree_control’ directive output may be truncated writing 23 bytes into a region of size between 1 and 4097 [-Wformat-truncation=] snprintf(path, sizeof(path), "%s/cgroup.subtree_control", ^~~~~~~~~~~~~~~~~~~~~~~ cgroup_path); samples/bpf/../../tools/testing/selftests/bpf/cgroup_helpers.c:72:2: note: ‘snprintf’ output between 24 and 4120 bytes into a destination of size 4097 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); " In order to avoid warns, lets decrease buf size for cgroup workdir on 24 bytes with assumption to include also "/cgroup.subtree_control" to the address. The cut will never happen anyway. Signed-off-by: Ivan Khoronzhuk Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191002120404.26962-3-ivan.khoronzhuk@linaro.org Signed-off-by: Sasha Levin --- samples/bpf/cgroup_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c index 09afaddfc9ba..b5c09cd6c7bd 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/samples/bpf/cgroup_helpers.c @@ -43,7 +43,7 @@ */ int setup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; + char cgroup_workdir[PATH_MAX - 24]; format_cgroup_path(cgroup_workdir, ""); From 40ba746f3c492fbd56015f1e0a5cbb4b4f3e42bf Mon Sep 17 00:00:00 2001 From: Veeraiyan Chidambaram Date: Wed, 11 Sep 2019 15:15:56 +0200 Subject: [PATCH 1793/3715] usb: renesas_usbhs: add suspend event support in gadget mode [ Upstream commit 39abcc84846bbc0538f13c190b6a9c7e36890cd2 ] When R-Car Gen3 USB 2.0 is in Gadget mode, if host is detached an interrupt will be generated and Suspended state bit is set in interrupt status register. Interrupt handler will call driver->suspend(composite_suspend) if suspended state bit is set. composite_suspend will call ffs_func_suspend which will post FUNCTIONFS_SUSPEND and will be consumed by user space application via /dev/ep0. To be able to detect host detach, extend the DVSQ_MASK to cover the Suspended bit of the DVSQ[2:0] bitfield from the Interrupt Status Register 0 (INTSTS0) register and perform appropriate action in the DVST interrupt handler (usbhsg_irq_dev_state). Without this commit, disconnection of the phone from R-Car-H3 ES2.0 Salvator-X CN9 port is not recognized and reverse role switch does not happen. If phone is connected again it does not enumerate. With this commit, disconnection will be recognized and reverse role switch will happen by a user space application. If phone is connected again it will enumerate properly and will become visible in the output of 'lsusb'. Signed-off-by: Veeraiyan Chidambaram Signed-off-by: Eugeniu Rosca Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1568207756-22325-3-git-send-email-external.veeraiyan.c@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/renesas_usbhs/common.h | 3 ++- drivers/usb/renesas_usbhs/mod_gadget.c | 12 +++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index b8620aa6b72e..8424c165f732 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -163,11 +163,12 @@ struct usbhs_priv; #define VBSTS (1 << 7) /* VBUS_0 and VBUSIN_0 Input Status */ #define VALID (1 << 3) /* USB Request Receive */ -#define DVSQ_MASK (0x3 << 4) /* Device State */ +#define DVSQ_MASK (0x7 << 4) /* Device State */ #define POWER_STATE (0 << 4) #define DEFAULT_STATE (1 << 4) #define ADDRESS_STATE (2 << 4) #define CONFIGURATION_STATE (3 << 4) +#define SUSPENDED_STATE (4 << 4) #define CTSQ_MASK (0x7) /* Control Transfer Stage */ #define IDLE_SETUP_STAGE 0 /* Idle stage or setup stage */ diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 0dedb0d91dcc..b27f2135b66d 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -465,12 +465,18 @@ static int usbhsg_irq_dev_state(struct usbhs_priv *priv, { struct usbhsg_gpriv *gpriv = usbhsg_priv_to_gpriv(priv); struct device *dev = usbhsg_gpriv_to_dev(gpriv); + int state = usbhs_status_get_device_state(irq_state); gpriv->gadget.speed = usbhs_bus_get_speed(priv); - dev_dbg(dev, "state = %x : speed : %d\n", - usbhs_status_get_device_state(irq_state), - gpriv->gadget.speed); + dev_dbg(dev, "state = %x : speed : %d\n", state, gpriv->gadget.speed); + + if (gpriv->gadget.speed != USB_SPEED_UNKNOWN && + (state & SUSPENDED_STATE)) { + if (gpriv->driver && gpriv->driver->suspend) + gpriv->driver->suspend(&gpriv->gadget); + usb_gadget_set_state(&gpriv->gadget, USB_STATE_SUSPENDED); + } return 0; } From 1271cd8f220236db2cef1741eb56e22d11470d2a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 14 Sep 2019 14:02:56 -0700 Subject: [PATCH 1794/3715] hwrng: omap3-rom - Call clk_disable_unprepare() on exit only if not idled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eaecce12f5f0d2c35d278e41e1bc4522393861ab ] When unloading omap3-rom-rng, we'll get the following: WARNING: CPU: 0 PID: 100 at drivers/clk/clk.c:948 clk_core_disable This is because the clock may be already disabled by omap3_rom_rng_idle(). Let's fix the issue by checking for rng_idle on exit. Cc: Aaro Koskinen Cc: Adam Ford Cc: Pali Rohár Cc: Sebastian Reichel Cc: Tero Kristo Fixes: 1c6b7c2108bd ("hwrng: OMAP3 ROM Random Number Generator support") Signed-off-by: Tony Lindgren Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/char/hw_random/omap3-rom-rng.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index 38b719017186..648e39ce6bd9 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -121,7 +121,8 @@ static int omap3_rom_rng_remove(struct platform_device *pdev) { cancel_delayed_work_sync(&idle_work); hwrng_unregister(&omap3_rom_rng_ops); - clk_disable_unprepare(rng_clk); + if (!rng_idle) + clk_disable_unprepare(rng_clk); return 0; } From d8233bd84b2ba05aeb8739bd394c65b5f9168c02 Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Thu, 3 Oct 2019 10:58:13 -0700 Subject: [PATCH 1795/3715] regulator: max8907: Fix the usage of uninitialized variable in max8907_regulator_probe() [ Upstream commit 472b39c3d1bba0616eb0e9a8fa3ad0f56927c7d7 ] Inside function max8907_regulator_probe(), variable val could be uninitialized if regmap_read() fails. However, val is used later in the if statement to decide the content written to "pmic", which is potentially unsafe. Signed-off-by: Yizhuo Link: https://lore.kernel.org/r/20191003175813.16415-1-yzhai003@ucr.edu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/max8907-regulator.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max8907-regulator.c b/drivers/regulator/max8907-regulator.c index 860400d2cd85..a8f2f07239fb 100644 --- a/drivers/regulator/max8907-regulator.c +++ b/drivers/regulator/max8907-regulator.c @@ -299,7 +299,10 @@ static int max8907_regulator_probe(struct platform_device *pdev) memcpy(pmic->desc, max8907_regulators, sizeof(pmic->desc)); /* Backwards compatibility with MAX8907B; SD1 uses different voltages */ - regmap_read(max8907->regmap_gen, MAX8907_REG_II2RR, &val); + ret = regmap_read(max8907->regmap_gen, MAX8907_REG_II2RR, &val); + if (ret) + return ret; + if ((val & MAX8907_II2RR_VERSION_MASK) == MAX8907_II2RR_VERSION_REV_B) { pmic->desc[MAX8907_SD1].min_uV = 637500; @@ -336,14 +339,20 @@ static int max8907_regulator_probe(struct platform_device *pdev) } if (pmic->desc[i].ops == &max8907_ldo_ops) { - regmap_read(config.regmap, pmic->desc[i].enable_reg, + ret = regmap_read(config.regmap, pmic->desc[i].enable_reg, &val); + if (ret) + return ret; + if ((val & MAX8907_MASK_LDO_SEQ) != MAX8907_MASK_LDO_SEQ) pmic->desc[i].ops = &max8907_ldo_hwctl_ops; } else if (pmic->desc[i].ops == &max8907_out5v_ops) { - regmap_read(config.regmap, pmic->desc[i].enable_reg, + ret = regmap_read(config.regmap, pmic->desc[i].enable_reg, &val); + if (ret) + return ret; + if ((val & (MAX8907_MASK_OUT5V_VINEN | MAX8907_MASK_OUT5V_ENSRC)) != MAX8907_MASK_OUT5V_ENSRC) From c6a2abe123cdbb39d90ef0938084d8cbed999d01 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 24 Sep 2019 06:49:04 -0300 Subject: [PATCH 1796/3715] media: flexcop-usb: fix NULL-ptr deref in flexcop_usb_transfer_init() [ Upstream commit 649cd16c438f51d4cd777e71ca1f47f6e0c5e65d ] If usb_set_interface() failed, iface->cur_altsetting will not be assigned and it will be used in flexcop_usb_transfer_init() It may lead a NULL pointer dereference. Check usb_set_interface() return value in flexcop_usb_init() and return failed to avoid using this NULL pointer. Signed-off-by: Yang Yingliang Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/b2c2/flexcop-usb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index ac4fddfd0a43..f1807c16438d 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -503,7 +503,13 @@ urb_error: static int flexcop_usb_init(struct flexcop_usb *fc_usb) { /* use the alternate setting with the larges buffer */ - usb_set_interface(fc_usb->udev,0,1); + int ret = usb_set_interface(fc_usb->udev, 0, 1); + + if (ret) { + err("set interface failed."); + return ret; + } + switch (fc_usb->udev->speed) { case USB_SPEED_LOW: err("cannot handle USB speed because it is too slow."); From 9068d32b52425d2ac2031e69a42f7d3cf3912030 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 1 Oct 2019 04:56:38 -0300 Subject: [PATCH 1797/3715] media: cec-funcs.h: add status_req checks [ Upstream commit 9b211f9c5a0b67afc435b86f75d78273b97db1c5 ] The CEC_MSG_GIVE_DECK_STATUS and CEC_MSG_GIVE_TUNER_DEVICE_STATUS commands both have a status_req argument: ON, OFF, ONCE. If ON or ONCE, then the follower will reply with a STATUS message. Either once or whenever the status changes (status_req == ON). If status_req == OFF, then it will stop sending continuous status updates, but the follower will *not* send a STATUS message in that case. This means that if status_req == OFF, then msg->reply should be 0 as well since no reply is expected in that case. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- include/uapi/linux/cec-funcs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index 28e8a2a86e16..2a114f7a24d4 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -952,7 +952,8 @@ static inline void cec_msg_give_deck_status(struct cec_msg *msg, msg->len = 3; msg->msg[1] = CEC_MSG_GIVE_DECK_STATUS; msg->msg[2] = status_req; - msg->reply = reply ? CEC_MSG_DECK_STATUS : 0; + msg->reply = (reply && status_req != CEC_OP_STATUS_REQ_OFF) ? + CEC_MSG_DECK_STATUS : 0; } static inline void cec_ops_give_deck_status(const struct cec_msg *msg, @@ -1056,7 +1057,8 @@ static inline void cec_msg_give_tuner_device_status(struct cec_msg *msg, msg->len = 3; msg->msg[1] = CEC_MSG_GIVE_TUNER_DEVICE_STATUS; msg->msg[2] = status_req; - msg->reply = reply ? CEC_MSG_TUNER_DEVICE_STATUS : 0; + msg->reply = (reply && status_req != CEC_OP_STATUS_REQ_OFF) ? + CEC_MSG_TUNER_DEVICE_STATUS : 0; } static inline void cec_ops_give_tuner_device_status(const struct cec_msg *msg, From 739c4f80e3376bfb1c5e34ac359ad6456363554d Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 2 Oct 2019 12:44:06 -0700 Subject: [PATCH 1798/3715] drm/bridge: dw-hdmi: Refuse DDC/CI transfers on the internal I2C controller [ Upstream commit bee447e224b2645911c5d06e35dc90d8433fcef6 ] The DDC/CI protocol involves sending a multi-byte request to the display via I2C, which is typically followed by a multi-byte response. The internal I2C controller only allows single byte reads/writes or reads of 8 sequential bytes, hence DDC/CI is not supported when the internal I2C controller is used. The I2C transfers complete without errors, however the data in the response is garbage. Abort transfers to/from slave address 0x37 (DDC) with -EOPNOTSUPP, to make it evident that the communication is failing. Signed-off-by: Matthias Kaehlcke Reviewed-by: Douglas Anderson Reviewed-by: Sean Paul Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20191002124354.v2.1.I709dfec496f5f0b44a7b61dcd4937924da8d8382@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 4db31b89507c..0febaafb8d89 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -39,6 +39,7 @@ #include +#define DDC_CI_ADDR 0x37 #define DDC_SEGMENT_ADDR 0x30 #define HDMI_EDID_LEN 512 @@ -320,6 +321,15 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, u8 addr = msgs[0].addr; int i, ret = 0; + if (addr == DDC_CI_ADDR) + /* + * The internal I2C controller does not support the multi-byte + * read and write operations needed for DDC/CI. + * TOFIX: Blacklist the DDC/CI address until we filter out + * unsupported I2C operations. + */ + return -EOPNOTSUPP; + dev_dbg(hdmi->dev, "xfer: num: %d, addr: %#x\n", num, addr); for (i = 0; i < num; i++) { From a957513cc75f24b7414235e31c59baea03603a5a Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 5 Oct 2019 17:25:07 +0900 Subject: [PATCH 1799/3715] samples: pktgen: fix proc_cmd command result check logic [ Upstream commit 3cad8f911575191fb3b81d8ed0e061e30f922223 ] Currently, proc_cmd is used to dispatch command to 'pg_ctrl', 'pg_thread', 'pg_set'. proc_cmd is designed to check command result with grep the "Result:", but this might fail since this string is only shown in 'pg_thread' and 'pg_set'. This commit fixes this logic by grep-ing the "Result:" string only when the command is not for 'pg_ctrl'. For clarity of an execution flow, 'errexit' flag has been set. To cleanup pktgen on exit, trap has been added for EXIT signal. Signed-off-by: Daniel T. Lee Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- samples/pktgen/functions.sh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 205e4cde4601..065a7e296ee3 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -5,6 +5,8 @@ # Author: Jesper Dangaaard Brouer # License: GPL +set -o errexit + ## -- General shell logging cmds -- function err() { local exitcode=$1 @@ -58,6 +60,7 @@ function pg_set() { function proc_cmd() { local result local proc_file=$1 + local status=0 # after shift, the remaining args are contained in $@ shift local proc_ctrl=${PROC_DIR}/$proc_file @@ -73,13 +76,13 @@ function proc_cmd() { echo "cmd: $@ > $proc_ctrl" fi # Quoting of "$@" is important for space expansion - echo "$@" > "$proc_ctrl" - local status=$? + echo "$@" > "$proc_ctrl" || status=$? - result=$(grep "Result: OK:" $proc_ctrl) - # Due to pgctrl, cannot use exit code $? from grep - if [[ "$result" == "" ]]; then - grep "Result:" $proc_ctrl >&2 + if [[ "$proc_file" != "pgctrl" ]]; then + result=$(grep "Result: OK:" $proc_ctrl) || true + if [[ "$result" == "" ]]; then + grep "Result:" $proc_ctrl >&2 + fi fi if (( $status != 0 )); then err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\"" @@ -105,6 +108,8 @@ function pgset() { fi } +[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT + ## -- General shell tricks -- function root_check_run_with_sudo() { From 8e1823f13a0f3c8635c64c789f9804316ad8934d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:00:41 -0700 Subject: [PATCH 1800/3715] block: Fix writeback throttling W=1 compiler warnings [ Upstream commit 1d200e9d6f635ae894993a7d0f1b9e0b6e522e3b ] Fix the following compiler warnings: In file included from ./include/linux/bitmap.h:9, from ./include/linux/cpumask.h:12, from ./arch/x86/include/asm/cpumask.h:5, from ./arch/x86/include/asm/msr.h:11, from ./arch/x86/include/asm/processor.h:21, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:53, from ./include/linux/thread_info.h:38, from ./arch/x86/include/asm/preempt.h:7, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:51, from ./include/linux/mmzone.h:8, from ./include/linux/gfp.h:6, from ./include/linux/mm.h:10, from ./include/linux/bvec.h:13, from ./include/linux/blk_types.h:10, from block/blk-wbt.c:23: In function 'strncpy', inlined from 'perf_trace_wbt_stat' at ./include/trace/events/wbt.h:15:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'perf_trace_wbt_lat' at ./include/trace/events/wbt.h:58:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'perf_trace_wbt_step' at ./include/trace/events/wbt.h:87:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'perf_trace_wbt_timer' at ./include/trace/events/wbt.h:126:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'trace_event_raw_event_wbt_stat' at ./include/trace/events/wbt.h:15:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'trace_event_raw_event_wbt_lat' at ./include/trace/events/wbt.h:58:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'trace_event_raw_event_wbt_timer' at ./include/trace/events/wbt.h:126:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'strncpy', inlined from 'trace_event_raw_event_wbt_step' at ./include/trace/events/wbt.h:87:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism"; v4.10). Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/trace/events/wbt.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index b048694070e2..37342a13c9cb 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; __entry->rmax = stat[0].max; @@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; __entry->window = div_u64(window, 1000); @@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; __entry->inflight = inflight; From 1cdafe368ec4ad7e878eddc30ea0d11a0f57b222 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 15:16:48 -0500 Subject: [PATCH 1801/3715] mwifiex: pcie: Fix memory leak in mwifiex_pcie_init_evt_ring [ Upstream commit d10dcb615c8e29d403a24d35f8310a7a53e3050c ] In mwifiex_pcie_init_evt_ring, a new skb is allocated which should be released if mwifiex_map_pci_memory() fails. The release for skb and card->evtbd_ring_vbase is added. Fixes: 0732484b47b5 ("mwifiex: separate ring initialization and ring creation routines") Signed-off-by: Navid Emamdoost Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/pcie.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 9511f5fe62f4..9d0d790a1319 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -677,8 +677,11 @@ static int mwifiex_pcie_init_evt_ring(struct mwifiex_adapter *adapter) skb_put(skb, MAX_EVENT_SIZE); if (mwifiex_map_pci_memory(adapter, skb, MAX_EVENT_SIZE, - PCI_DMA_FROMDEVICE)) + PCI_DMA_FROMDEVICE)) { + kfree_skb(skb); + kfree(card->evtbd_ring_vbase); return -1; + } buf_pa = MWIFIEX_SKB_DMA_ADDR(skb); From f69d37afeb98a2f9b0f8bfea8d4e2f88a7c02ad0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 22 Sep 2019 04:41:23 -0300 Subject: [PATCH 1802/3715] media: cx88: Fix some error handling path in 'cx8800_initdev()' [ Upstream commit e1444e9b0424c70def6352580762d660af50e03f ] A call to 'pci_disable_device()' is missing in the error handling path. In some cases, a call to 'free_irq()' may also be missing. Reorder the error handling path, add some new labels and fix the 2 issues mentionned above. This way, the error handling path in more in line with 'cx8800_finidev()' (i.e. the remove function) Signed-off-by: Christophe JAILLET Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx88/cx88-video.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index 7d25ecd4404b..1748812bd7e5 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -1310,7 +1310,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, core = cx88_core_get(dev->pci); if (!core) { err = -EINVAL; - goto fail_free; + goto fail_disable; } dev->core = core; @@ -1356,7 +1356,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, cc->step, cc->default_value); if (!vc) { err = core->audio_hdl.error; - goto fail_core; + goto fail_irq; } vc->priv = (void *)cc; } @@ -1370,7 +1370,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, cc->step, cc->default_value); if (!vc) { err = core->video_hdl.error; - goto fail_core; + goto fail_irq; } vc->priv = (void *)cc; if (vc->id == V4L2_CID_CHROMA_AGC) @@ -1533,11 +1533,14 @@ static int cx8800_initdev(struct pci_dev *pci_dev, fail_unreg: cx8800_unregister_video(dev); - free_irq(pci_dev->irq, dev); mutex_unlock(&core->lock); +fail_irq: + free_irq(pci_dev->irq, dev); fail_core: core->v4ldev = NULL; cx88_core_put(core, dev->pci); +fail_disable: + pci_disable_device(pci_dev); fail_free: kfree(dev); return err; From 110df032c196ebf4df05a8b67b286192d0f1bc25 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:09:50 -0300 Subject: [PATCH 1803/3715] media: ti-vpe: vpe: Fix Motion Vector vpdma stride [ Upstream commit 102af9b9922f658f705a4b0deaccabac409131bf ] commit 3dc2046ca78b ("[media] media: ti-vpe: vpe: allow use of user specified stride") and commit da4414eaed15 ("[media] media: ti-vpe: vpdma: add support for user specified stride") resulted in the Motion Vector stride to be the same as the image stride. This caused memory corruption in the output image as mentioned in commit 00db969964c8 ("[media] media: ti-vpe: vpe: Fix line stride for output motion vector"). Fixes: 3dc2046ca78b ("[media] media: ti-vpe: vpe: allow use of user specified stride") Fixes: da4414eaed15 ("[media] media: ti-vpe: vpdma: add support for user specified stride") Signed-off-by: Benoit Parrot Acked-by: Nikhil Devshatwar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 45bd10544189..19c0a2614635 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -1044,11 +1044,14 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) dma_addr_t dma_addr; u32 flags = 0; u32 offset = 0; + u32 stride; if (port == VPE_PORT_MV_OUT) { vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV]; dma_addr = ctx->mv_buf_dma[mv_buf_selector]; q_data = &ctx->q_data[Q_DATA_SRC]; + stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3, + VPDMA_STRIDE_ALIGN); } else { /* to incorporate interleaved formats */ int plane = fmt->coplanar ? p_data->vb_part : 0; @@ -1075,6 +1078,7 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) } /* Apply the offset */ dma_addr += offset; + stride = q_data->bytesperline[VPE_LUMA]; } if (q_data->flags & Q_DATA_FRAME_1D) @@ -1086,7 +1090,7 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) MAX_W, MAX_H); vpdma_add_out_dtd(&ctx->desc_list, q_data->width, - q_data->bytesperline[VPE_LUMA], &q_data->c_rect, + stride, &q_data->c_rect, vpdma_fmt, dma_addr, MAX_OUT_WIDTH_REG1, MAX_OUT_HEIGHT_REG1, p_data->channel, flags); } @@ -1105,10 +1109,13 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) dma_addr_t dma_addr; u32 flags = 0; u32 offset = 0; + u32 stride; if (port == VPE_PORT_MV_IN) { vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV]; dma_addr = ctx->mv_buf_dma[mv_buf_selector]; + stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3, + VPDMA_STRIDE_ALIGN); } else { /* to incorporate interleaved formats */ int plane = fmt->coplanar ? p_data->vb_part : 0; @@ -1135,6 +1142,7 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) } /* Apply the offset */ dma_addr += offset; + stride = q_data->bytesperline[VPE_LUMA]; if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB) { /* @@ -1170,10 +1178,10 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) if (p_data->vb_part && fmt->fourcc == V4L2_PIX_FMT_NV12) frame_height /= 2; - vpdma_add_in_dtd(&ctx->desc_list, q_data->width, - q_data->bytesperline[VPE_LUMA], &q_data->c_rect, - vpdma_fmt, dma_addr, p_data->channel, field, flags, frame_width, - frame_height, 0, 0); + vpdma_add_in_dtd(&ctx->desc_list, q_data->width, stride, + &q_data->c_rect, vpdma_fmt, dma_addr, + p_data->channel, field, flags, frame_width, + frame_height, 0, 0); } /* From 5c181a4e29d2fd537e9908036d771dc63f91e098 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:09:57 -0300 Subject: [PATCH 1804/3715] media: ti-vpe: vpe: fix a v4l2-compliance warning about invalid pixel format [ Upstream commit 06bec72b250b2cb3ba96fa45c2b8e0fb83745517 ] v4l2-compliance warns with this message: warn: v4l2-test-formats.cpp(717): \ TRY_FMT cannot handle an invalid pixelformat. warn: v4l2-test-formats.cpp(718): \ This may or may not be a problem. For more information see: warn: v4l2-test-formats.cpp(719): \ http://www.mail-archive.com/linux-media@vger.kernel.org/msg56550.html ... test VIDIOC_TRY_FMT: FAIL We need to make sure that the returns a valid pixel format in all instance. Based on the v4l2 framework convention drivers must return a valid pixel format when the requested pixel format is either invalid or not supported. Signed-off-by: Benoit Parrot Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 19c0a2614635..4dc08f5a6081 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -352,20 +352,25 @@ enum { }; /* find our format description corresponding to the passed v4l2_format */ -static struct vpe_fmt *find_format(struct v4l2_format *f) +static struct vpe_fmt *__find_format(u32 fourcc) { struct vpe_fmt *fmt; unsigned int k; for (k = 0; k < ARRAY_SIZE(vpe_formats); k++) { fmt = &vpe_formats[k]; - if (fmt->fourcc == f->fmt.pix.pixelformat) + if (fmt->fourcc == fourcc) return fmt; } return NULL; } +static struct vpe_fmt *find_format(struct v4l2_format *f) +{ + return __find_format(f->fmt.pix.pixelformat); +} + /* * there is one vpe_dev structure in the driver, it is shared by * all instances. @@ -1608,9 +1613,9 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, unsigned int stride = 0; if (!fmt || !(fmt->types & type)) { - vpe_err(ctx->dev, "Fourcc format (0x%08x) invalid.\n", + vpe_dbg(ctx->dev, "Fourcc format (0x%08x) invalid.\n", pix->pixelformat); - return -EINVAL; + fmt = __find_format(V4L2_PIX_FMT_YUYV); } if (pix->field != V4L2_FIELD_NONE && pix->field != V4L2_FIELD_ALTERNATE From cd27dd240d198d9c0fee540dc7467be5842523de Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:10:00 -0300 Subject: [PATCH 1805/3715] media: ti-vpe: vpe: fix a v4l2-compliance failure about frame sequence number [ Upstream commit 2444846c0dbfa4ead21b621e4300ec32c90fbf38 ] v4l2-compliance fails with this message: fail: v4l2-test-buffers.cpp(294): \ (int)g_sequence() < seq.last_seq + 1 fail: v4l2-test-buffers.cpp(740): \ buf.check(m2m_q, last_m2m_seq) fail: v4l2-test-buffers.cpp(974): \ captureBufs(node, q, m2m_q, frame_count, true) test MMAP: FAIL The driver is failing to update the source frame sequence number in the vb2 buffer object. Only the destination frame sequence was being updated. This is only a reporting issue if the user space app actually cares about the frame sequence number. But it is fixed nonetheless. Signed-off-by: Benoit Parrot Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 4dc08f5a6081..8a5c0d3e733e 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -1448,6 +1448,7 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) d_vb->timecode = s_vb->timecode; d_vb->sequence = ctx->sequence; + s_vb->sequence = ctx->sequence; d_q_data = &ctx->q_data[Q_DATA_DST]; if (d_q_data->flags & Q_IS_INTERLACED) { From 95961770a970cad9a803da935c0ebe091eca6442 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:09:58 -0300 Subject: [PATCH 1806/3715] media: ti-vpe: vpe: Make sure YUYV is set as default format [ Upstream commit e20b248051ca0f90d84b4d9378e4780bc31f16c6 ] v4l2-compliance fails with this message: fail: v4l2-test-formats.cpp(672): \ Video Capture Multiplanar: TRY_FMT(G_FMT) != G_FMT fail: v4l2-test-formats.cpp(672): \ Video Output Multiplanar: TRY_FMT(G_FMT) != G_FMT ... test VIDIOC_TRY_FMT: FAIL The default pixel format was setup as pointing to a specific offset in the vpe_formats table assuming it was pointing to the V4L2_PIX_FMT_YUYV entry. This became false after the addition on the NV21 format (see above commid-id) So instead of hard-coding an offset which might change over time we need to use a lookup helper instead so we know the default will always be what we intended. Signed-off-by: Benoit Parrot Fixes: 40cc823f7005 ("media: ti-vpe: Add support for NV21 format") Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 8a5c0d3e733e..a136fb14bf1a 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -2322,7 +2322,7 @@ static int vpe_open(struct file *file) v4l2_ctrl_handler_setup(hdl); s_q_data = &ctx->q_data[Q_DATA_SRC]; - s_q_data->fmt = &vpe_formats[2]; + s_q_data->fmt = __find_format(V4L2_PIX_FMT_YUYV); s_q_data->width = 1920; s_q_data->height = 1080; s_q_data->nplanes = 1; From b86957058be437d5be8832a3d627c9c08298dea6 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:09:56 -0300 Subject: [PATCH 1807/3715] media: ti-vpe: vpe: fix a v4l2-compliance failure causing a kernel panic [ Upstream commit a37980ac5be29b83da67bf7d571c6bd9f90f8e45 ] v4l2-compliance fails with this message: warn: v4l2-test-formats.cpp(717): \ TRY_FMT cannot handle an invalid pixelformat. test VIDIOC_TRY_FMT: FAIL This causes the following kernel panic: Unable to handle kernel paging request at virtual address 56595561 pgd = ecd80e00 *pgd=00000000 Internal error: Oops: 205 [#1] PREEMPT SMP ARM ... CPU: 0 PID: 930 Comm: v4l2-compliance Not tainted \ 4.14.62-01715-gc8cd67f49a19 #1 Hardware name: Generic DRA72X (Flattened Device Tree) task: ece44d80 task.stack: ecc6e000 PC is at __vpe_try_fmt+0x18c/0x2a8 [ti_vpe] LR is at 0x8 Because the driver fails to properly check the 'num_planes' values for proper ranges it ends up accessing out of bound data causing the kernel panic. Since this driver only handle single or dual plane pixel format, make sure the provided value does not exceed 2 planes. Signed-off-by: Benoit Parrot Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index a136fb14bf1a..5adafee98e4c 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -1663,7 +1663,7 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, &pix->height, MIN_H, MAX_H, H_ALIGN, S_ALIGN); - if (!pix->num_planes) + if (!pix->num_planes || pix->num_planes > 2) pix->num_planes = fmt->coplanar ? 2 : 1; else if (pix->num_planes > 1 && !fmt->coplanar) pix->num_planes = 1; From 21d6b3c33c88dcd57a6f2d1abda980ea99d9e2bd Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:10:01 -0300 Subject: [PATCH 1808/3715] media: ti-vpe: vpe: ensure buffers are cleaned up properly in abort cases [ Upstream commit cf6acb73b050e98b5cc435fae0e8ae0157520410 ] v4l2-compliance fails with this message: fail: v4l2-test-buffers.cpp(691): ret == 0 fail: v4l2-test-buffers.cpp(974): captureBufs(node, q, m2m_q, frame_count, true) test MMAP: FAIL This caused the following Kernel Warning: WARNING: CPU: 0 PID: 961 at drivers/media/v4l2-core/videobuf2-core.c:1658 __vb2_queue_cancel+0x174/0x1d8 ... CPU: 0 PID: 961 Comm: v4l2-compliance Not tainted 4.14.62-01720-g20ecd717e87a #6 Hardware name: Generic DRA72X (Flattened Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r7:00000009 r6:60070013 r5:00000000 r4:c1053824 [] (show_stack) from [] (dump_stack+0x90/0xa4) [] (dump_stack) from [] (__warn+0xec/0x104) r7:00000009 r6:c0c0ad50 r5:00000000 r4:00000000 [] (__warn) from [] (warn_slowpath_null+0x28/0x30) r9:00000008 r8:00000000 r7:eced4808 r6:edbc9bac r5:eced4844 r4:eced4808 [] (warn_slowpath_null) from [] (__vb2_queue_cancel+0x174/0x1d8) [] (__vb2_queue_cancel) from [] (vb2_core_queue_release+0x20/0x40) r10:ecc7bd70 r9:00000008 r8:00000000 r7:edb73010 r6:edbc9bac r5:eced4844 r4:eced4808 r3:00000004 [] (vb2_core_queue_release) from [] (vb2_queue_release+0x10/0x14) r5:edbc9810 r4:eced4800 [] (vb2_queue_release) from [] (v4l2_m2m_ctx_release+0x1c/0x30) [] (v4l2_m2m_ctx_release) from [] (vpe_release+0x74/0xb0 [ti_vpe]) r5:edbc9810 r4:ed67a400 [] (vpe_release [ti_vpe]) from [] (v4l2_release+0x3c/0x80) r7:edb73010 r6:ed176aa0 r5:edbc9868 r4:ed5119c0 [] (v4l2_release) from [] (__fput+0x8c/0x1dc) r5:ecc7bd70 r4:ed5119c0 [] (__fput) from [] (____fput+0x10/0x14) r10:00000000 r9:ed5119c0 r8:ece392d0 r7:c1059544 r6:ece38d80 r5:ece392b4 r4:00000000 [] (____fput) from [] (task_work_run+0x98/0xb8) [] (task_work_run) from [] (do_exit+0x170/0xa80) r9:ece351fc r8:00000000 r7:ecde3f58 r6:ffffe000 r5:ece351c0 r4:ece38d80 [] (do_exit) from [] (do_group_exit+0x48/0xc4) r7:000000f8 [] (do_group_exit) from [] (__wake_up_parent+0x0/0x28) r7:000000f8 r6:b6c6a798 r5:00000001 r4:00000001 [] (SyS_exit_group) from [] (ret_fast_syscall+0x0/0x4c) These warnings are caused by buffers which not properly cleaned up/release during an abort use case. In the abort cases the VPDMA desc buffers would still be mapped and the in-flight VB2 buffers would not be released properly causing a kernel warning from being generated by the videobuf2-core level. Signed-off-by: Benoit Parrot Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpe.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 5adafee98e4c..7af66fe95a54 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -1435,9 +1435,6 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) /* the previous dst mv buffer becomes the next src mv buffer */ ctx->src_mv_buf_selector = !ctx->src_mv_buf_selector; - if (ctx->aborting) - goto finished; - s_vb = ctx->src_vbs[0]; d_vb = ctx->dst_vb; @@ -1502,6 +1499,9 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) ctx->src_vbs[0] = NULL; ctx->dst_vb = NULL; + if (ctx->aborting) + goto finished; + ctx->bufs_completed++; if (ctx->bufs_completed < ctx->bufs_per_job && job_ready(ctx)) { device_run(ctx); @@ -2400,6 +2400,12 @@ static int vpe_release(struct file *file) mutex_lock(&dev->dev_mutex); free_mv_buffers(ctx); + + vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v); + vpdma_free_desc_list(&ctx->desc_list); vpdma_free_desc_buf(&ctx->mmr_adb); From 23df4c40de1ebe4fbf97d1303990bf5601889bc2 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 7 Oct 2019 12:09:59 -0300 Subject: [PATCH 1809/3715] media: ti-vpe: vpe: fix a v4l2-compliance failure about invalid sizeimage [ Upstream commit 0bac73adea4df8d34048b38f6ff24dc3e73e90b6 ] v4l2-compliance fails with this message: fail: v4l2-test-formats.cpp(463): !pfmt.sizeimage fail: v4l2-test-formats.cpp(736): \ Video Capture Multiplanar is valid, \ but TRY_FMT failed to return a format test VIDIOC_TRY_FMT: FAIL This failure is causd by the driver failing to handle out range 'bytesperline' values from user space applications. VPDMA hardware is limited to 64k line stride (16 bytes aligned, so 65520 bytes). So make sure the provided or calculated 'bytesperline' is smaller than the maximum value. Signed-off-by: Benoit Parrot Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/ti-vpe/vpdma.h | 1 + drivers/media/platform/ti-vpe/vpe.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/media/platform/ti-vpe/vpdma.h b/drivers/media/platform/ti-vpe/vpdma.h index 7e611501c291..f29074c84915 100644 --- a/drivers/media/platform/ti-vpe/vpdma.h +++ b/drivers/media/platform/ti-vpe/vpdma.h @@ -60,6 +60,7 @@ struct vpdma_data_format { * line stride of source and dest * buffers should be 16 byte aligned */ +#define VPDMA_MAX_STRIDE 65520 /* Max line stride 16 byte aligned */ #define VPDMA_DTD_DESC_SIZE 32 /* 8 words */ #define VPDMA_CFD_CTD_DESC_SIZE 16 /* 4 words */ diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 7af66fe95a54..2e8970c7e22d 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -1702,6 +1702,10 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, if (stride > plane_fmt->bytesperline) plane_fmt->bytesperline = stride; + plane_fmt->bytesperline = clamp_t(u32, plane_fmt->bytesperline, + stride, + VPDMA_MAX_STRIDE); + plane_fmt->bytesperline = ALIGN(plane_fmt->bytesperline, VPDMA_STRIDE_ALIGN); From e9555e31fe7a853f18bd6ecea74b575e9262ce2c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 10 Oct 2019 17:47:20 +0200 Subject: [PATCH 1810/3715] extcon: sm5502: Reset registers during initialization [ Upstream commit 6942635032cfd3e003e980d2dfa4e6323a3ce145 ] On some devices (e.g. Samsung Galaxy A5 (2015)), the bootloader seems to keep interrupts enabled for SM5502 when booting Linux. Changing the cable state (i.e. plugging in a cable) - until the driver is loaded - will therefore produce an interrupt that is never read. In this situation, the cable state will be stuck forever on the initial state because SM5502 stops sending interrupts. This can be avoided by clearing those pending interrupts after the driver has been loaded. One way to do this is to reset all registers to default state by writing to SM5502_REG_RESET. This ensures that we start from a clean state, with all interrupts disabled. Suggested-by: Chanwoo Choi Signed-off-by: Stephan Gerhold Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/extcon/extcon-sm5502.c | 4 ++++ drivers/extcon/extcon-sm5502.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c index 106ef0297b53..1a1ee3db3455 100644 --- a/drivers/extcon/extcon-sm5502.c +++ b/drivers/extcon/extcon-sm5502.c @@ -69,6 +69,10 @@ struct sm5502_muic_info { /* Default value of SM5502 register to bring up MUIC device. */ static struct reg_data sm5502_reg_data[] = { { + .reg = SM5502_REG_RESET, + .val = SM5502_REG_RESET_MASK, + .invert = true, + }, { .reg = SM5502_REG_CONTROL, .val = SM5502_REG_CONTROL_MASK_INT_MASK, .invert = false, diff --git a/drivers/extcon/extcon-sm5502.h b/drivers/extcon/extcon-sm5502.h index 974b53222f56..12f8b01e5753 100644 --- a/drivers/extcon/extcon-sm5502.h +++ b/drivers/extcon/extcon-sm5502.h @@ -241,6 +241,8 @@ enum sm5502_reg { #define DM_DP_SWITCH_UART ((DM_DP_CON_SWITCH_UART < Date: Fri, 13 Sep 2019 14:14:02 -0700 Subject: [PATCH 1811/3715] x86/mm: Use the correct function type for native_set_fixmap() [ Upstream commit f53e2cd0b8ab7d9e390414470bdbd830f660133f ] We call native_set_fixmap indirectly through the function pointer struct pv_mmu_ops::set_fixmap, which expects the first parameter to be 'unsigned' instead of 'enum fixed_addresses'. This patch changes the function type for native_set_fixmap to match the pointer, which fixes indirect call mismatches with Control-Flow Integrity (CFI) checking. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H . Peter Anvin Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190913211402.193018-1-samitolvanen@google.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/include/asm/fixmap.h | 2 +- arch/x86/mm/pgtable.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..5e12b2319d7a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -159,7 +159,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index b4fd36271f90..55338b392221 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -590,8 +590,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) { __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } From 88d9e1534feb485eee5455b356308b9aedf4c42f Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Tue, 8 Oct 2019 18:21:45 +0800 Subject: [PATCH 1812/3715] drm/bridge: dw-hdmi: Restore audio when setting a mode [ Upstream commit fadfee3f9d8f114435a8a3e9f83a227600d89de7 ] When setting a new display mode, dw_hdmi_setup() calls dw_hdmi_enable_video_path(), which disables all hdmi clocks, including the audio clock. We should only (re-)enable the audio clock if audio was already enabled when setting the new mode. Without this patch, on RK3288, there will be HDMI audio on some monitors if i2s was played to headphone when the monitor was plugged. ACER H277HU and ASUS PB278 are two of the monitors showing this issue. Signed-off-by: Cheng-Yi Chiang Signed-off-by: Daniel Kurtz Signed-off-by: Yakir Yang Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20191008102145.55134-1-cychiang@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 0febaafb8d89..cc1094f90125 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1743,7 +1743,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode) /* HDMI Initialization Step E - Configure audio */ hdmi_clk_regenerator_update_pixel_clock(hdmi); - hdmi_enable_audio_clk(hdmi, true); + hdmi_enable_audio_clk(hdmi, hdmi->audio_enable); } /* not for DVI mode */ From 30e2c60c63d14c9e9c162eb854367203eb48d278 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 11 Oct 2019 17:19:41 +0800 Subject: [PATCH 1813/3715] perf test: Report failure for mmap events [ Upstream commit 6add129c5d9210ada25217abc130df0b7096ee02 ] When fail to mmap events in task exit case, it misses to set 'err' to -1; thus the testing will not report failure for it. This patch sets 'err' to -1 when fails to mmap events, thus Perf tool can report correct result. Fixes: d723a55096b8 ("perf test: Add test case for checking number of EXIT events") Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20191011091942.29841-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/task-exit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 89c8e1604ca7..94fe5464bc6f 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -104,6 +104,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (perf_evlist__mmap(evlist, 128, true) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); + err = -1; goto out_delete_evlist; } From 3b6b823da2b4798ea48a1089f1078842d0592f3c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 11 Oct 2019 10:21:22 +0800 Subject: [PATCH 1814/3715] perf report: Add warning when libunwind not compiled in [ Upstream commit 800d3f561659b5436f8c57e7c26dd1f6928b5615 ] We received a user report that call-graph DWARF mode was enabled in 'perf record' but 'perf report' didn't unwind the callstack correctly. The reason was, libunwind was not compiled in. We can use 'perf -vv' to check the compiled libraries but it would be valuable to report a warning to user directly (especially valuable for a perf newbie). The warning is: Warning: Please install libunwind development packages during the perf build. Both TUI and stdio are supported. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191011022122.26369-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-report.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4ddb0726eebc..fd4dd12b8f9d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -342,6 +342,13 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; +#ifndef HAVE_LIBUNWIND_SUPPORT + if (dwarf_callchain_users) { + ui__warning("Please install libunwind development packages " + "during the perf build.\n"); + } +#endif + return 0; } From 8877aaf6afceb442f24e962eace325e6ebd7ca95 Mon Sep 17 00:00:00 2001 From: Ingo Rohloff Date: Fri, 11 Oct 2019 13:55:18 +0200 Subject: [PATCH 1815/3715] usb: usbfs: Suppress problematic bind and unbind uevents. [ Upstream commit abb0b3d96a1f9407dd66831ae33985a386d4200d ] commit 1455cf8dbfd0 ("driver core: emit uevents when device is bound to a driver") added bind and unbind uevents when a driver is bound or unbound to a physical device. For USB devices which are handled via the generic usbfs layer (via libusb for example), this is problematic: Each time a user space program calls ioctl(usb_fd, USBDEVFS_CLAIMINTERFACE, &usb_intf_nr); and then later ioctl(usb_fd, USBDEVFS_RELEASEINTERFACE, &usb_intf_nr); The kernel will now produce a bind or unbind event, which does not really contain any useful information. This allows a user space program to run a DoS attack against programs which listen to uevents (in particular systemd/eudev/upowerd): A malicious user space program just has to call in a tight loop ioctl(usb_fd, USBDEVFS_CLAIMINTERFACE, &usb_intf_nr); ioctl(usb_fd, USBDEVFS_RELEASEINTERFACE, &usb_intf_nr); With this loop the malicious user space program floods the kernel and all programs listening to uevents with tons of bind and unbind events. This patch suppresses uevents for ioctls USBDEVFS_CLAIMINTERFACE and USBDEVFS_RELEASEINTERFACE. Signed-off-by: Ingo Rohloff Link: https://lore.kernel.org/r/20191011115518.2801-1-ingo.rohloff@lauterbach.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/devio.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 62b2a7105f02..4fb4cf8c2f14 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -755,8 +755,15 @@ static int claimintf(struct usb_dev_state *ps, unsigned int ifnum) intf = usb_ifnum_to_if(dev, ifnum); if (!intf) err = -ENOENT; - else + else { + unsigned int old_suppress; + + /* suppress uevents while claiming interface */ + old_suppress = dev_get_uevent_suppress(&intf->dev); + dev_set_uevent_suppress(&intf->dev, 1); err = usb_driver_claim_interface(&usbfs_driver, intf, ps); + dev_set_uevent_suppress(&intf->dev, old_suppress); + } if (err == 0) set_bit(ifnum, &ps->ifclaimed); return err; @@ -776,7 +783,13 @@ static int releaseintf(struct usb_dev_state *ps, unsigned int ifnum) if (!intf) err = -ENOENT; else if (test_and_clear_bit(ifnum, &ps->ifclaimed)) { + unsigned int old_suppress; + + /* suppress uevents while releasing interface */ + old_suppress = dev_get_uevent_suppress(&intf->dev); + dev_set_uevent_suppress(&intf->dev, 1); usb_driver_release_interface(&usbfs_driver, intf); + dev_set_uevent_suppress(&intf->dev, old_suppress); err = 0; } return err; From b94653802a0a4ca027f8d9ff3ea79b4ebd2aa52e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 11 Oct 2019 16:43:42 +0200 Subject: [PATCH 1816/3715] iio: adc: max1027: Reset the device at probe time [ Upstream commit db033831b4f5589f9fcbadb837614a7c4eac0308 ] All the registers are configured by the driver, let's reset the chip at probe time, avoiding any conflict with a possible earlier configuration. Signed-off-by: Miquel Raynal Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/max1027.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index ebc715927e63..03af02769370 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -462,6 +462,14 @@ static int max1027_probe(struct spi_device *spi) goto fail_dev_register; } + /* Internal reset */ + st->reg = MAX1027_RST_REG; + ret = spi_write(st->spi, &st->reg, 1); + if (ret < 0) { + dev_err(&indio_dev->dev, "Failed to reset the ADC\n"); + return ret; + } + /* Disable averaging */ st->reg = MAX1027_AVG_REG; ret = spi_write(st->spi, &st->reg, 1); From 3eb99789ca2cffecb2a4c4228341ef871bca6e08 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Wed, 16 Oct 2019 12:39:43 +0100 Subject: [PATCH 1817/3715] Bluetooth: missed cpu_to_le16 conversion in hci_init4_req [ Upstream commit 727ea61a5028f8ac96f75ab34cb1b56e63fd9227 ] It looks like in hci_init4_req() the request is being initialised from cpu-endian data but the packet is specified to be little-endian. This causes an warning from sparse due to __le16 to u16 conversion. Fix this by using cpu_to_le16() on the two fields in the packet. net/bluetooth/hci_core.c:845:27: warning: incorrect type in assignment (different base types) net/bluetooth/hci_core.c:845:27: expected restricted __le16 [usertype] tx_len net/bluetooth/hci_core.c:845:27: got unsigned short [usertype] le_max_tx_len net/bluetooth/hci_core.c:846:28: warning: incorrect type in assignment (different base types) net/bluetooth/hci_core.c:846:28: expected restricted __le16 [usertype] tx_time net/bluetooth/hci_core.c:846:28: got unsigned short [usertype] le_max_tx_time Signed-off-by: Ben Dooks Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6bc679cd3481..d6d7364838f4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -802,8 +802,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; - cp.tx_len = hdev->le_max_tx_len; - cp.tx_time = hdev->le_max_tx_time; + cp.tx_len = cpu_to_le16(hdev->le_max_tx_len); + cp.tx_time = cpu_to_le16(hdev->le_max_tx_time); hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); } From 367028e7e7c6f17d8e039a43cd9d73b032430401 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Wed, 16 Oct 2019 20:20:39 -0700 Subject: [PATCH 1818/3715] Bluetooth: hci_core: fix init for HCI_USER_CHANNEL [ Upstream commit eb8c101e28496888a0dcfe16ab86a1bee369e820 ] During the setup() stage, HCI device drivers expect the chip to acknowledge its setup() completion via vendor specific frames. If userspace opens() such HCI device in HCI_USER_CHANNEL [1] mode, the vendor specific frames are never tranmitted to the driver, as they are filtered in hci_rx_work(). Allow HCI devices which operate in HCI_USER_CHANNEL mode to receive frames if the HCI device is is HCI_INIT state. [1] https://www.spinics.net/lists/linux-bluetooth/msg37345.html Fixes: 23500189d7e0 ("Bluetooth: Introduce new HCI socket channel for user operation") Signed-off-by: Mattijs Korpershoek Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d6d7364838f4..ff80a9d41ce1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4215,7 +4215,14 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + /* If the device has been opened in HCI_USER_CHANNEL, + * the userspace has exclusive access to device. + * When device is HCI_INIT, we still need to process + * the data packets to the driver in order + * to complete its setup(). + */ + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + !test_bit(HCI_INIT, &hdev->flags)) { kfree_skb(skb); continue; } From a3f3756387d359f03a7c9544452d80b6dd21579c Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 9 Oct 2019 17:54:24 +0200 Subject: [PATCH 1819/3715] x86/mce: Lower throttling MCE messages' priority to warning [ Upstream commit 9c3bafaa1fd88e4dd2dba3735a1f1abb0f2c7bb7 ] On modern CPUs it is quite normal that the temperature limits are reached and the CPU is throttled. In fact, often the thermal design is not sufficient to cool the CPU at full load and limits can quickly be reached when a burst in load happens. This will even happen with technologies like RAPL limitting the long term power consumption of the package. Also, these limits are "softer", as Srinivas explains: "CPU temperature doesn't have to hit max(TjMax) to get these warnings. OEMs ha[ve] an ability to program a threshold where a thermal interrupt can be generated. In some systems the offset is 20C+ (Read only value). In recent systems, there is another offset on top of it which can be programmed by OS, once some agent can adjust power limits dynamically. By default this is set to low by the firmware, which I guess the prime motivation of Benjamin to submit the patch." So these messages do not usually indicate a hardware issue (e.g. insufficient cooling). Log them as warnings to avoid confusion about their severity. [ bp: Massage commit mesage. ] Signed-off-by: Benjamin Berg Signed-off-by: Borislav Petkov Reviewed-by: Hans de Goede Tested-by: Christian Kellner Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/20191009155424.249277-1-bberg@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ee229ceee745..ec6a07b04fdb 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -185,7 +185,7 @@ static void therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); From 4d4d3240a7a9defaac373ca5c599dcdea9c06279 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 17 Oct 2019 23:41:50 -0500 Subject: [PATCH 1820/3715] drm/gma500: fix memory disclosures due to uninitialized bytes [ Upstream commit ec3b7b6eb8c90b52f61adff11b6db7a8db34de19 ] "clock" may be copied to "best_clock". Initializing best_clock is not sufficient. The fix initializes clock as well to avoid memory disclosures and informaiton leaks. Signed-off-by: Kangjie Lu Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191018044150.1899-1-kjlu@umn.edu Signed-off-by: Sasha Levin --- drivers/gpu/drm/gma500/oaktrail_crtc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c index 0fff269d3fe6..42785f3df60f 100644 --- a/drivers/gpu/drm/gma500/oaktrail_crtc.c +++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c @@ -139,6 +139,7 @@ static bool mrst_sdvo_find_best_pll(const struct gma_limit_t *limit, s32 freq_error, min_error = 100000; memset(best_clock, 0, sizeof(*best_clock)); + memset(&clock, 0, sizeof(clock)); for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) { for (clock.n = limit->n.min; clock.n <= limit->n.max; @@ -195,6 +196,7 @@ static bool mrst_lvds_find_best_pll(const struct gma_limit_t *limit, int err = target; memset(best_clock, 0, sizeof(*best_clock)); + memset(&clock, 0, sizeof(clock)); for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) { for (clock.p1 = limit->p1.min; clock.p1 <= limit->p1.max; From bac95ac5731fe7efa43c505b3e55bded37951749 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Wed, 16 Oct 2019 09:54:08 +0800 Subject: [PATCH 1821/3715] rtl8xxxu: fix RTL8723BU connection failure issue after warm reboot [ Upstream commit 0eeb91ade90ce06d2fa1e2fcb55e3316b64c203c ] The RTL8723BU has problems connecting to AP after each warm reboot. Sometimes it returns no scan result, and in most cases, it fails the authentication for unknown reason. However, it works totally fine after cold reboot. Compare the value of register SYS_CR and SYS_CLK_MAC_CLK_ENABLE for cold reboot and warm reboot, the registers imply that the MAC is already powered and thus some procedures are skipped during driver initialization. Double checked the vendor driver, it reads the SYS_CR and SYS_CLK_MAC_CLK_ENABLE also but doesn't skip any during initialization based on them. This commit only tells the RTL8723BU to do full initialization without checking MAC status. Signed-off-by: Chris Chiu Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 1 + drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 95e3993d8a33..a895b6fd6f85 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1349,6 +1349,7 @@ struct rtl8xxxu_fileops { u8 has_s0s1:1; u8 has_tx_report:1; u8 gen2_thermal_meter:1; + u8 needs_full_init:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index c4b86a84a721..27e97df996c7 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1673,6 +1673,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .has_s0s1 = 1, .has_tx_report = 1, .gen2_thermal_meter = 1, + .needs_full_init = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 91b01ca32e75..b58bf8e2cad2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3905,6 +3905,9 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) else macpower = true; + if (fops->needs_full_init) + macpower = false; + ret = fops->power_on(priv); if (ret < 0) { dev_warn(dev, "%s: Failed power on\n", __func__); From c50ca0128abfd7692e72706126e357c34d752f64 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 17 Oct 2019 12:19:01 +0200 Subject: [PATCH 1822/3715] x86/ioapic: Prevent inconsistent state when moving an interrupt [ Upstream commit df4393424af3fbdcd5c404077176082a8ce459c4 ] There is an issue with threaded interrupts which are marked ONESHOT and using the fasteoi handler: if (IS_ONESHOT()) mask_irq(); .... cond_unmask_eoi_irq() chip->irq_eoi(); if (setaffinity_pending) { mask_ioapic(); ... move_affinity(); unmask_ioapic(); } So if setaffinity is pending the interrupt will be moved and then unconditionally unmasked at the ioapic level, which is wrong in two aspects: 1) It should be kept masked up to the point where the threaded handler finished. 2) The physical chip state and the software masked state are inconsistent Guard both the mask and the unmask with a check for the software masked state. If the line is marked masked then the ioapic line is also masked, so both mask_ioapic() and unmask_ioapic() can be skipped safely. Signed-off-by: Thomas Gleixner Cc: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Fixes: 3aa551c9b4c4 ("genirq: add threaded interrupt handler support") Link: https://lkml.kernel.org/r/20191017101938.321393687@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/apic/io_apic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 566b7bc5deaa..2271adbc3c42 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1690,9 +1690,10 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) static inline bool ioapic_irqd_mask(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; @@ -1729,7 +1730,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else From 893a491267c56b9f7f8917c57241aad5ae6e77f6 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Mon, 21 Oct 2019 19:31:21 +0800 Subject: [PATCH 1823/3715] arm64: psci: Reduce the waiting time for cpu_psci_cpu_kill() [ Upstream commit bfcef4ab1d7ee8921bc322109b1692036cc6cbe0 ] In cases like suspend-to-disk and suspend-to-ram, a large number of CPU cores need to be shut down. At present, the CPU hotplug operation is serialised, and the CPU cores can only be shut down one by one. In this process, if PSCI affinity_info() does not return LEVEL_OFF quickly, cpu_psci_cpu_kill() needs to wait for 10ms. If hundreds of CPU cores need to be shut down, it will take a long time. Normally, there is no need to wait 10ms in cpu_psci_cpu_kill(). So change the wait interval from 10 ms to max 1 ms and use usleep_range() instead of msleep() for more accurate timer. In addition, reducing the time interval will increase the messages output, so remove the "Retry ..." message, instead, track time and output to the the sucessful message. Signed-off-by: Yunfeng Ye Reviewed-by: Sudeep Holla Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/psci.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..3856d51c645b 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -84,7 +84,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -94,16 +95,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); From 700ef8b5a98f5fdece9270c4ce616f147a7bb768 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 23 Oct 2019 17:48:45 +0300 Subject: [PATCH 1824/3715] net: phy: dp83867: enable robust auto-mdix [ Upstream commit 5a7f08c2abb0efc9d17aff2fc75d6d3b85e622e4 ] The link detection timeouts can be observed (or link might not be detected at all) when dp83867 PHY is configured in manual mode (speed/duplex). CFG3[9] Robust Auto-MDIX option allows to significantly improve link detection in case dp83867 is configured in manual mode and reduce link detection time. As per DM: "If link partners are configured to operational modes that are not supported by normal Auto MDI/MDIX mode (like Auto-Neg versus Force 100Base-TX or Force 100Base-TX versus Force 100Base-TX), this Robust Auto MDI/MDIX mode allows MDI/MDIX resolution and prevents deadlock." Hence, enable this option by default as there are no known reasons not to do so. Signed-off-by: Grygorii Strashko Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/dp83867.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index e03e91d5f1b1..0cbcced0870e 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -84,6 +84,10 @@ #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f +/* CFG3 bits */ +#define DP83867_CFG3_INT_OE BIT(7) +#define DP83867_CFG3_ROBUST_AUTO_MDIX BIT(9) + /* CFG4 bits */ #define DP83867_CFG4_PORT_MIRROR_EN BIT(0) @@ -320,12 +324,13 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } + val = phy_read(phydev, DP83867_CFG3); /* Enable Interrupt output INT_OE in CFG3 register */ - if (phy_interrupt_is_valid(phydev)) { - val = phy_read(phydev, DP83867_CFG3); - val |= BIT(7); - phy_write(phydev, DP83867_CFG3, val); - } + if (phy_interrupt_is_valid(phydev)) + val |= DP83867_CFG3_INT_OE; + + val |= DP83867_CFG3_ROBUST_AUTO_MDIX; + phy_write(phydev, DP83867_CFG3, val); if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP) dp83867_config_port_mirroring(phydev); From bd77e656941aeeb3a7ed31e33878b5df433be10c Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:51 +0200 Subject: [PATCH 1825/3715] RDMA/qedr: Fix memory leak in user qp and mr [ Upstream commit 24e412c1e00ebfe73619e6b88cbc26c2c7d41b85 ] User QPs pbl's weren't freed properly. MR pbls weren't freed properly. Fixes: e0290cce6ac0 ("qedr: Add support for memory registeration verbs") Link: https://lore.kernel.org/r/20191027200451.28187-5-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7f4cc9336442..656e7c1a4449 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1343,6 +1343,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) if (qp->urq.umem) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -2331,8 +2339,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ if (mr->umem) From 23e22b632a980c26809fdc50c567de6112f45faf Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 28 Oct 2019 13:37:12 +0100 Subject: [PATCH 1826/3715] gpu: host1x: Allocate gather copy for host1x [ Upstream commit b78e70c04c149299bd210759d7c7af7c86b89ca8 ] Currently when the gather buffers are copied, they are copied to a buffer that is allocated for the host1x client that wants to execute the command streams in the buffers. However, the gather buffers will be read by the host1x device, which causes SMMU faults if the DMA API is backed by an IOMMU. Fix this by allocating the gather buffer copy for the host1x device, which makes sure that it will be mapped into the host1x's IOVA space if the DMA API is backed by an IOMMU. Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/gpu/host1x/job.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index acd99783bbca..67f3c050c4cf 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -545,7 +545,8 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -570,12 +571,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -636,7 +637,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) goto out; if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + err = copy_gathers(host->dev, job, dev); if (err) goto out; } @@ -701,7 +702,7 @@ void host1x_job_unpin(struct host1x_job *job) job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); From 8e08c6af8694ef89bc6be0458ec59941649c7284 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Sat, 26 Oct 2019 10:21:39 +0800 Subject: [PATCH 1827/3715] net: dsa: LAN9303: select REGMAP when LAN9303 enable [ Upstream commit b6989d248a2d13f02895bae1a9321b3bbccc0283 ] When NET_DSA_SMSC_LAN9303=y and NET_DSA_SMSC_LAN9303_MDIO=y, below errors can be seen: drivers/net/dsa/lan9303_mdio.c:87:23: error: REGMAP_ENDIAN_LITTLE undeclared here (not in a function) .reg_format_endian = REGMAP_ENDIAN_LITTLE, drivers/net/dsa/lan9303_mdio.c:93:3: error: const struct regmap_config has no member named reg_read .reg_read = lan9303_mdio_read, It should select REGMAP in config NET_DSA_SMSC_LAN9303. Fixes: dc7005831523 ("net: dsa: LAN9303: add MDIO managed mode support") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 83a9bc892a3b..6ae13f2419e3 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -55,6 +55,7 @@ config NET_DSA_QCA8K config NET_DSA_SMSC_LAN9303 tristate select NET_DSA_TAG_LAN9303 + select REGMAP ---help--- This enables support for the SMSC/Microchip LAN9303 3 port ethernet switch chips. From 17972ec7d6fff2d32e0a7022164402c25e264e2a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 8 Oct 2019 13:52:08 +0200 Subject: [PATCH 1828/3715] phy: qcom-usb-hs: Fix extcon double register after power cycle [ Upstream commit 64f86b9978449ff05bfa6c64b4c5439e21e9c80b ] Commit f0b5c2c96370 ("phy: qcom-usb-hs: Replace the extcon API") switched from extcon_register_notifier() to the resource-managed API, i.e. devm_extcon_register_notifier(). This is problematic in this case, because the extcon notifier is dynamically registered/unregistered whenever the PHY is powered on/off. The resource-managed API does not unregister the notifier until the driver is removed, so as soon as the PHY is power cycled, attempting to register the notifier again results in: double register detected WARNING: CPU: 1 PID: 182 at kernel/notifier.c:26 notifier_chain_register+0x74/0xa0 Call trace: ... extcon_register_notifier+0x74/0xb8 devm_extcon_register_notifier+0x54/0xb8 qcom_usb_hs_phy_power_on+0x1fc/0x208 ... ... and USB stops working after plugging the cable out and in another time. The easiest way to fix this is to make a partial revert of commit f0b5c2c96370 ("phy: qcom-usb-hs: Replace the extcon API") and avoid using the resource-managed API in this case. Fixes: f0b5c2c96370 ("phy: qcom-usb-hs: Replace the extcon API") Signed-off-by: Stephan Gerhold Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-usb-hs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs.c b/drivers/phy/qualcomm/phy-qcom-usb-hs.c index 2d0c70b5589f..643934a2a70c 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hs.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hs.c @@ -159,8 +159,8 @@ static int qcom_usb_hs_phy_power_on(struct phy *phy) /* setup initial state */ qcom_usb_hs_phy_vbus_notifier(&uphy->vbus_notify, state, uphy->vbus_edev); - ret = devm_extcon_register_notifier(&ulpi->dev, uphy->vbus_edev, - EXTCON_USB, &uphy->vbus_notify); + ret = extcon_register_notifier(uphy->vbus_edev, EXTCON_USB, + &uphy->vbus_notify); if (ret) goto err_ulpi; } @@ -181,6 +181,9 @@ static int qcom_usb_hs_phy_power_off(struct phy *phy) { struct qcom_usb_hs_phy *uphy = phy_get_drvdata(phy); + if (uphy->vbus_edev) + extcon_unregister_notifier(uphy->vbus_edev, EXTCON_USB, + &uphy->vbus_notify); regulator_disable(uphy->v3p3); regulator_disable(uphy->v1p8); clk_disable_unprepare(uphy->sleep_clk); From de3c88b857384e393979dc9b6e85bce88e010f64 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 29 Oct 2019 14:09:47 +0100 Subject: [PATCH 1829/3715] s390/time: ensure get_clock_monotonic() returns monotonic values [ Upstream commit 011620688a71f2f1fe9901dbc2479a7c01053196 ] The current implementation of get_clock_monotonic() leaves it up to the caller to call the function with preemption disabled. The only core kernel caller (sched_clock) however does not disable preemption. In order to make sure that all callers of this function see monotonic values handle disabling preemption within the function itself. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/include/asm/timex.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 64539c221672..0f12a3f91282 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,8 +10,9 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H -#include +#include #include +#include /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -186,15 +187,18 @@ extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * - * The caller must ensure that preemption is disabled. * The clock and tod_clock_base get changed via stop_machine. - * Therefore preemption must be disabled when calling this - * function, otherwise the returned value is not guaranteed to - * be monotonic. + * Therefore preemption must be disabled, otherwise the returned + * value is not guaranteed to be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + unsigned long long tod; + + preempt_disable(); + tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + preempt_enable(); + return tod; } /** From fbd35049727bdd819508a06fc803b68e5a9d1c77 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 22 Oct 2019 14:38:08 +0200 Subject: [PATCH 1830/3715] s390/mm: add mm_pxd_folded() checks to pxd_free() [ Upstream commit 2416cefc504ba8ae9b17e3e6b40afc72708f96be ] Unlike pxd_free_tlb(), the pxd_free() functions do not check for folded page tables. This is not an issue so far, as those functions will actually never be called, since no code will reach them when page tables are folded. In order to avoid future issues, and to make the s390 code more similar to other architectures, add mm_pxd_folded() checks, similar to how it is done in pxd_free_tlb(). This was found by testing a patch from from Anshuman Khandual, which is currently discussed on LKML ("mm/debug: Add tests validating architecture page table helpers"). Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/include/asm/pgalloc.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bbe99cb8219d..11857fea993c 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -70,7 +70,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION2_ENTRY_EMPTY); return (p4d_t *) table; } -#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!mm_p4d_folded(mm)) + crst_table_free(mm, (unsigned long *) p4d); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { @@ -79,7 +84,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; } -#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!mm_pud_folded(mm)) + crst_table_free(mm, (unsigned long *) pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -97,6 +107,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + if (mm_pmd_folded(mm)) + return; pgtable_pmd_page_dtor(virt_to_page(pmd)); crst_table_free(mm, (unsigned long *) pmd); } From 6727fb4b064910866d4c292dd31838c0dca1752a Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 16 Oct 2019 18:19:52 +0800 Subject: [PATCH 1831/3715] libata: Ensure ata_port probe has completed before detach [ Upstream commit 130f4caf145c3562108b245a576db30b916199d2 ] With CONFIG_DEBUG_TEST_DRIVER_REMOVE set, we may find the following WARN: [ 23.452574] ------------[ cut here ]------------ [ 23.457190] WARNING: CPU: 59 PID: 1 at drivers/ata/libata-core.c:6676 ata_host_detach+0x15c/0x168 [ 23.466047] Modules linked in: [ 23.469092] CPU: 59 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc1-00010-g5b83fd27752b-dirty #296 [ 23.477776] Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 - V1.16.01 03/15/2019 [ 23.486286] pstate: a0c00009 (NzCv daif +PAN +UAO) [ 23.491065] pc : ata_host_detach+0x15c/0x168 [ 23.495322] lr : ata_host_detach+0x88/0x168 [ 23.499491] sp : ffff800011cabb50 [ 23.502792] x29: ffff800011cabb50 x28: 0000000000000007 [ 23.508091] x27: ffff80001137f068 x26: ffff8000112c0c28 [ 23.513390] x25: 0000000000003848 x24: ffff0023ea185300 [ 23.518689] x23: 0000000000000001 x22: 00000000000014c0 [ 23.523987] x21: 0000000000013740 x20: ffff0023bdc20000 [ 23.529286] x19: 0000000000000000 x18: 0000000000000004 [ 23.534584] x17: 0000000000000001 x16: 00000000000000f0 [ 23.539883] x15: ffff0023eac13790 x14: ffff0023eb76c408 [ 23.545181] x13: 0000000000000000 x12: ffff0023eac13790 [ 23.550480] x11: ffff0023eb76c228 x10: 0000000000000000 [ 23.555779] x9 : ffff0023eac13798 x8 : 0000000040000000 [ 23.561077] x7 : 0000000000000002 x6 : 0000000000000001 [ 23.566376] x5 : 0000000000000002 x4 : 0000000000000000 [ 23.571674] x3 : ffff0023bf08a0bc x2 : 0000000000000000 [ 23.576972] x1 : 3099674201f72700 x0 : 0000000000400284 [ 23.582272] Call trace: [ 23.584706] ata_host_detach+0x15c/0x168 [ 23.588616] ata_pci_remove_one+0x10/0x18 [ 23.592615] ahci_remove_one+0x20/0x40 [ 23.596356] pci_device_remove+0x3c/0xe0 [ 23.600267] really_probe+0xdc/0x3e0 [ 23.603830] driver_probe_device+0x58/0x100 [ 23.608000] device_driver_attach+0x6c/0x90 [ 23.612169] __driver_attach+0x84/0xc8 [ 23.615908] bus_for_each_dev+0x74/0xc8 [ 23.619730] driver_attach+0x20/0x28 [ 23.623292] bus_add_driver+0x148/0x1f0 [ 23.627115] driver_register+0x60/0x110 [ 23.630938] __pci_register_driver+0x40/0x48 [ 23.635199] ahci_pci_driver_init+0x20/0x28 [ 23.639372] do_one_initcall+0x5c/0x1b0 [ 23.643199] kernel_init_freeable+0x1a4/0x24c [ 23.647546] kernel_init+0x10/0x108 [ 23.651023] ret_from_fork+0x10/0x18 [ 23.654590] ---[ end trace 634a14b675b71c13 ]--- With KASAN also enabled, we may also get many use-after-free reports. The issue is that when CONFIG_DEBUG_TEST_DRIVER_REMOVE is set, we may attempt to detach the ata_port before it has been probed. This is because the ata_ports are async probed, meaning that there is no guarantee that the ata_port has probed prior to detach. When the ata_port does probe in this scenario, we get all sorts of issues as the detach may have already happened. Fix by ensuring synchronisation with async_synchronize_full(). We could alternatively use the cookie returned from the ata_port probe async_schedule() call, but that means managing the cookie, so more complicated. Signed-off-by: John Garry Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cbb162b683b6..08f67c109429 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6676,6 +6676,9 @@ void ata_host_detach(struct ata_host *host) { int i; + /* Ensure ata_port probe has completed */ + async_synchronize_full(); + for (i = 0; i < host->n_ports; i++) ata_port_detach(host->ports[i]); From 6e35f9139082ae3640d19bafcdd940de079b2503 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Oct 2019 20:29:48 -0700 Subject: [PATCH 1832/3715] loop: fix no-unmap write-zeroes request behavior [ Upstream commit efcfec579f6139528c9e6925eca2bc4a36da65c6 ] Currently, if the loop device receives a WRITE_ZEROES request, it asks the underlying filesystem to punch out the range. This behavior is correct if unmapping is allowed. However, a NOUNMAP request means that the caller doesn't want us to free the storage backing the range, so punching out the range is incorrect behavior. To satisfy a NOUNMAP | WRITE_ZEROES request, loop should ask the underlying filesystem to FALLOC_FL_ZERO_RANGE, which is (according to the fallocate documentation) required to ensure that the entire range is backed by real storage, which suffices for our purposes. Fixes: 19372e2769179dd ("loop: implement REQ_OP_WRITE_ZEROES") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/loop.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ec61dd873c93..453e3728e657 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -414,18 +414,20 @@ out_free_page: return ret; } -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) +static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, + int mode) { /* - * We use punch hole to reclaim the free space used by the - * image a.k.a. discard. However we do not support discard if - * encryption is enabled, because it may give an attacker - * useful information. + * We use fallocate to manipulate the space mappings used by the image + * a.k.a. discard/zerorange. However we do not support this if + * encryption is enabled, because it may give an attacker useful + * information. */ struct file *file = lo->lo_backing_file; - int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; int ret; + mode |= FALLOC_FL_KEEP_SIZE; + if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { ret = -EOPNOTSUPP; goto out; @@ -565,9 +567,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); - case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - return lo_discard(lo, rq, pos); + /* + * If the caller doesn't want deallocation, call zeroout to + * write zeroes the range. Otherwise, punch them out. + */ + return lo_fallocate(lo, rq, pos, + (rq->cmd_flags & REQ_NOUNMAP) ? + FALLOC_FL_ZERO_RANGE : + FALLOC_FL_PUNCH_HOLE); + case REQ_OP_DISCARD: + return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: if (lo->transfer) return lo_write_transfer(lo, rq, pos); From 8f143cafa6f7b620923851a7c1a5442a44503b83 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Oct 2019 15:13:08 +0200 Subject: [PATCH 1833/3715] pinctrl: sh-pfc: sh7734: Fix duplicate TCLK1_B [ Upstream commit 884caadad128efad8e00c1cdc3177bc8912ee8ec ] The definitions for bit field [19:18] of the Peripheral Function Select Register 3 were accidentally copied from bit field [20], leading to duplicates for the TCLK1_B function, and missing TCLK0, CAN_CLK_B, and ET0_ETXD4 functions. Fix this by adding the missing GPIO_FN_CAN_CLK_B and GPIO_FN_ET0_ETXD4 enum values, and correcting the functions. Reported-by: Ben Dooks Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191024131308.16659-1-geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/sh/include/cpu-sh4/cpu/sh7734.h | 2 +- drivers/pinctrl/sh-pfc/pfc-sh7734.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h index 96f0246ad2f2..82b63208135a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7734.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h @@ -134,7 +134,7 @@ enum { GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C, GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A, GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B, - GPIO_FN_RD_WR, GPIO_FN_TCLK0, + GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4, GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B, GPIO_FN_ET0_ETXD3_A, GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B, diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c index 33232041ee86..3eccc9b3ca84 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c @@ -1453,7 +1453,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(ET0_ETXD2_A), GPIO_FN(EX_CS5), GPIO_FN(SD1_CMD_A), GPIO_FN(ATADIR), GPIO_FN(QSSL_B), GPIO_FN(ET0_ETXD3_A), - GPIO_FN(RD_WR), GPIO_FN(TCLK1_B), + GPIO_FN(RD_WR), GPIO_FN(TCLK0), GPIO_FN(CAN_CLK_B), GPIO_FN(ET0_ETXD4), GPIO_FN(EX_WAIT0), GPIO_FN(TCLK1_B), GPIO_FN(EX_WAIT1), GPIO_FN(SD1_DAT0_A), GPIO_FN(DREQ2), GPIO_FN(CAN1_TX_C), GPIO_FN(ET0_LINK_C), GPIO_FN(ET0_ETXD5_A), @@ -1949,7 +1949,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* IP3_20 [1] */ FN_EX_WAIT0, FN_TCLK1_B, /* IP3_19_18 [2] */ - FN_RD_WR, FN_TCLK1_B, 0, 0, + FN_RD_WR, FN_TCLK0, FN_CAN_CLK_B, FN_ET0_ETXD4, /* IP3_17_15 [3] */ FN_EX_CS5, FN_SD1_CMD_A, FN_ATADIR, FN_QSSL_B, FN_ET0_ETXD3_A, 0, 0, 0, From 07debac18a320bd85c7b0e09f5d2daef5e4d0bb9 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 23 Oct 2019 11:26:34 +0300 Subject: [PATCH 1834/3715] iio: dln2-adc: fix iio_triggered_buffer_postenable() position [ Upstream commit a7bddfe2dfce1d8859422124abe1964e0ecd386e ] The iio_triggered_buffer_postenable() hook should be called first to attach the poll function. The iio_triggered_buffer_predisable() hook is called last (as is it should). This change moves iio_triggered_buffer_postenable() to be called first. It adds iio_triggered_buffer_predisable() on the error paths of the postenable hook. For the predisable hook, some code-paths have been changed to make sure that the iio_triggered_buffer_predisable() hook gets called in case there is an error before it. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/dln2-adc.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index ab8d6aed5085..2a299bbd6acf 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -528,6 +528,10 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) u16 conflict; unsigned int trigger_chan; + ret = iio_triggered_buffer_postenable(indio_dev); + if (ret) + return ret; + mutex_lock(&dln2->mutex); /* Enable ADC */ @@ -541,6 +545,7 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) (int)conflict); ret = -EBUSY; } + iio_triggered_buffer_predisable(indio_dev); return ret; } @@ -554,6 +559,7 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) mutex_unlock(&dln2->mutex); if (ret < 0) { dev_dbg(&dln2->pdev->dev, "Problem in %s\n", __func__); + iio_triggered_buffer_predisable(indio_dev); return ret; } } else { @@ -561,12 +567,12 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) mutex_unlock(&dln2->mutex); } - return iio_triggered_buffer_postenable(indio_dev); + return 0; } static int dln2_adc_triggered_buffer_predisable(struct iio_dev *indio_dev) { - int ret; + int ret, ret2; struct dln2_adc *dln2 = iio_priv(indio_dev); mutex_lock(&dln2->mutex); @@ -581,12 +587,14 @@ static int dln2_adc_triggered_buffer_predisable(struct iio_dev *indio_dev) ret = dln2_adc_set_port_enabled(dln2, false, NULL); mutex_unlock(&dln2->mutex); - if (ret < 0) { + if (ret < 0) dev_dbg(&dln2->pdev->dev, "Problem in %s\n", __func__); - return ret; - } - return iio_triggered_buffer_predisable(indio_dev); + ret2 = iio_triggered_buffer_predisable(indio_dev); + if (ret == 0) + ret = ret2; + + return ret; } static const struct iio_buffer_setup_ops dln2_adc_buffer_setup_ops = { From ccb2bae30094db6e8a57386d96f4a1701033307e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Sun, 3 Nov 2019 23:58:15 +0200 Subject: [PATCH 1835/3715] Bluetooth: Fix advertising duplicated flags [ Upstream commit 6012b9346d8959194c239fd60a62dfec98d43048 ] Instances may have flags set as part of its data in which case the code should not attempt to add it again otherwise it can cause duplication: < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 35 Handle: 0x00 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x06 Flags: 0x04 BR/EDR Not Supported Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin --- net/bluetooth/hci_request.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b73ac149de34..759329bec399 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1095,6 +1095,14 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) instance_flags = get_adv_instance_flags(hdev, instance); + /* If instance already has the flags set skip adding it once + * again. + */ + if (adv_instance && eir_get_data(adv_instance->adv_data, + adv_instance->adv_data_len, EIR_FLAGS, + NULL)) + goto skip_flags; + /* The Add Advertising command allows userspace to set both the general * and limited discoverable flags. */ @@ -1127,6 +1135,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } } +skip_flags: if (adv_instance) { memcpy(ptr, adv_instance->adv_data, adv_instance->adv_data_len); From 4ea31ac2bc4924c167b95e338d3ce4b264a9b535 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 16:11:54 +0100 Subject: [PATCH 1836/3715] pinctrl: amd: fix __iomem annotation in amd_gpio_irq_handler() [ Upstream commit 10ff58aa3c2e2a093b6ad615a7e3d8bb0dc613e5 ] The regs pointer in amd_gpio_irq_handler() should have __iomem on it, so add that to fix the following sparse warnings: drivers/pinctrl/pinctrl-amd.c:555:14: warning: incorrect type in assignment (different address spaces) drivers/pinctrl/pinctrl-amd.c:555:14: expected unsigned int [usertype] *regs drivers/pinctrl/pinctrl-amd.c:555:14: got void [noderef] *base drivers/pinctrl/pinctrl-amd.c:563:34: warning: incorrect type in argument 1 (different address spaces) drivers/pinctrl/pinctrl-amd.c:563:34: expected void const volatile [noderef] *addr drivers/pinctrl/pinctrl-amd.c:563:34: got unsigned int [usertype] * drivers/pinctrl/pinctrl-amd.c:580:34: warning: incorrect type in argument 1 (different address spaces) drivers/pinctrl/pinctrl-amd.c:580:34: expected void const volatile [noderef] *addr drivers/pinctrl/pinctrl-amd.c:580:34: got unsigned int [usertype] * drivers/pinctrl/pinctrl-amd.c:587:25: warning: incorrect type in argument 2 (different address spaces) drivers/pinctrl/pinctrl-amd.c:587:25: expected void volatile [noderef] *addr drivers/pinctrl/pinctrl-amd.c:587:25: got unsigned int [usertype] * Signed-off-by: Ben Dooks (Codethink) Link: https://lore.kernel.org/r/20191022151154.5986-1-ben.dooks@codethink.co.uk Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-amd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b78f42abff2f..7385cd81498c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -509,7 +509,8 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) irqreturn_t ret = IRQ_NONE; unsigned int i, irqnr; unsigned long flags; - u32 *regs, regval; + u32 __iomem *regs; + u32 regval; u64 status, mask; /* Read the wake status */ From bd423e4694eca922f15a40ccc862e19fe2c99b8e Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Sat, 5 Oct 2019 08:20:03 -0700 Subject: [PATCH 1837/3715] ixgbe: protect TX timestamping from API misuse [ Upstream commit 07066d9dc3d2326fbad8f7b0cb0120cff7b7dedb ] HW timestamping can only be requested for a packet if the NIC is first setup via ioctl(SIOCSHWTSTAMP). If this step was skipped, then the ixgbe driver still allowed TX packets to request HW timestamping. In this situation, we see 'clearing Tx Timestamp hang' noise in the log. Fix this by checking that the NIC is configured for HW TX timestamping before accepting a HW TX timestamping request. Similar-to: commit 26bd4e2db06b ("igb: protect TX timestamping from API misuse") commit 0a6f2f05a2f5 ("igb: Fix a test with HWTSTAMP_TX_ON") Signed-off-by: Manjunath Patil Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4801d96c4fa9..0edfd199937d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8379,7 +8379,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && adapter->ptp_clock) { - if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IXGBE_TX_FLAGS_TSTAMP; From 3feec89682118fad5139e745c3453a4cf8580ef0 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 18 Oct 2019 01:47:00 -0300 Subject: [PATCH 1838/3715] media: rcar_drif: fix a memory disclosure [ Upstream commit d39083234c60519724c6ed59509a2129fd2aed41 ] "f->fmt.sdr.reserved" is uninitialized. As other peer drivers like msi2500 and airspy do, the fix initializes it to avoid memory disclosures. Signed-off-by: Kangjie Lu Reviewed-by: Geert Uytterhoeven Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/rcar_drif.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c index 522364ff0d5d..3871ed6a1fcb 100644 --- a/drivers/media/platform/rcar_drif.c +++ b/drivers/media/platform/rcar_drif.c @@ -915,6 +915,7 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv, { struct rcar_drif_sdr *sdr = video_drvdata(file); + memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved)); f->fmt.sdr.pixelformat = sdr->fmt->pixelformat; f->fmt.sdr.buffersize = sdr->fmt->buffersize; From 29bfb2ee16b765b2150e03453aed33ba47578c2c Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Tue, 22 Oct 2019 04:51:40 -0300 Subject: [PATCH 1839/3715] media: v4l2-core: fix touch support in v4l_g_fmt [ Upstream commit 545b618cfb5cadacd00c25066b9a36540e5ca9e9 ] v4l_s_fmt, for VFL_TYPE_TOUCH, sets unneeded members of the v4l2_pix_format structure to default values.This was missing in v4l_g_fmt, which would lead to failures in v4l2-compliance tests. Signed-off-by: Vandana BN Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/v4l2-core/v4l2-ioctl.c | 33 +++++++++++++++------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 681eef972e63..7cafc8a57950 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1363,10 +1363,26 @@ static int v4l_enum_fmt(const struct v4l2_ioctl_ops *ops, return ret; } +static void v4l_pix_format_touch(struct v4l2_pix_format *p) +{ + /* + * The v4l2_pix_format structure contains fields that make no sense for + * touch. Set them to default values in this case. + */ + + p->field = V4L2_FIELD_NONE; + p->colorspace = V4L2_COLORSPACE_RAW; + p->flags = 0; + p->ycbcr_enc = 0; + p->quantization = 0; + p->xfer_func = 0; +} + static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_format *p = arg; + struct video_device *vfd = video_devdata(file); int ret = check_fmt(file, p->type); if (ret) @@ -1404,6 +1420,8 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg); /* just in case the driver zeroed it again */ p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + if (vfd->vfl_type == VFL_TYPE_TOUCH) + v4l_pix_format_touch(&p->fmt.pix); return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: return ops->vidioc_g_fmt_vid_cap_mplane(file, fh, arg); @@ -1439,21 +1457,6 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, return -EINVAL; } -static void v4l_pix_format_touch(struct v4l2_pix_format *p) -{ - /* - * The v4l2_pix_format structure contains fields that make no sense for - * touch. Set them to default values in this case. - */ - - p->field = V4L2_FIELD_NONE; - p->colorspace = V4L2_COLORSPACE_RAW; - p->flags = 0; - p->ycbcr_enc = 0; - p->quantization = 0; - p->xfer_func = 0; -} - static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { From b7a6c8ec56d804daa10879fce35843fe84da0ea4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 24 Oct 2019 19:40:42 +0200 Subject: [PATCH 1840/3715] rfkill: allocate static minor [ Upstream commit 8670b2b8b029a6650d133486be9d2ace146fd29a ] udev has a feature of creating /dev/ device-nodes if it finds a devnode: modalias. This allows for auto-loading of modules that provide the node. This requires to use a statically allocated minor number for misc character devices. However, rfkill uses dynamic minor numbers and prevents auto-loading of the module. So allocate the next static misc minor number and use it for rfkill. Signed-off-by: Marcel Holtmann Link: https://lore.kernel.org/r/20191024174042.19851-1-marcel@holtmann.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- include/linux/miscdevice.h | 1 + net/rfkill/core.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 4de703d9e21f..5e1e50b8f8c4 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -56,6 +56,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define RFKILL_MINOR 242 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 2064c3a35ef8..99a2e55b01cf 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1312,10 +1312,12 @@ static const struct file_operations rfkill_fops = { .llseek = no_llseek, }; +#define RFKILL_NAME "rfkill" + static struct miscdevice rfkill_miscdev = { - .name = "rfkill", .fops = &rfkill_fops, - .minor = MISC_DYNAMIC_MINOR, + .name = RFKILL_NAME, + .minor = RFKILL_MINOR, }; static int __init rfkill_init(void) @@ -1367,3 +1369,6 @@ static void __exit rfkill_exit(void) class_unregister(&rfkill_class); } module_exit(rfkill_exit); + +MODULE_ALIAS_MISCDEV(RFKILL_MINOR); +MODULE_ALIAS("devname:" RFKILL_NAME); From 44e2712909daca5390460ffefdfaaed0d080ca2d Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 4 Nov 2019 21:51:11 -0800 Subject: [PATCH 1841/3715] bnx2x: Fix PF-VF communication over multi-cos queues. [ Upstream commit dc5a3d79c345871439ffe72550b604fcde9770e1 ] PF driver doesn't enable tx-switching for all cos queues/clients, which causes packets drop from PF to VF. Fix this by enabling tx-switching on all cos queues/clients. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 9ca994d0bab6..1977e0c552df 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2389,15 +2389,21 @@ static int bnx2x_set_pf_tx_switching(struct bnx2x *bp, bool enable) /* send the ramrod on all the queues of the PF */ for_each_eth_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; + int tx_idx; /* Set the appropriate Queue object */ q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; - /* Update the Queue state */ - rc = bnx2x_queue_state_change(bp, &q_params); - if (rc) { - BNX2X_ERR("Failed to configure Tx switching\n"); - return rc; + for (tx_idx = FIRST_TX_COS_INDEX; + tx_idx < fp->max_cos; tx_idx++) { + q_params.params.update.cid_index = tx_idx; + + /* Update the Queue state */ + rc = bnx2x_queue_state_change(bp, &q_params); + if (rc) { + BNX2X_ERR("Failed to configure Tx switching\n"); + return rc; + } } } From 6021cd87982255b7c7518e69cec1c78241e5d36d Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 10:36:09 +0800 Subject: [PATCH 1842/3715] spi: img-spfi: fix potential double release [ Upstream commit e9a8ba9769a0e354341bc6cc01b98aadcea1dfe9 ] The channels spfi->tx_ch and spfi->rx_ch are not set to NULL after they are released. As a result, they will be released again, either on the error handling branch in the same function or in the corresponding remove function, i.e. img_spfi_remove(). This patch fixes the bug by setting the two members to NULL. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/1573007769-20131-1-git-send-email-bianpan2016@163.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-img-spfi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 7a37090dabbe..2e65b70c7879 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -673,6 +673,8 @@ static int img_spfi_probe(struct platform_device *pdev) dma_release_channel(spfi->tx_ch); if (spfi->rx_ch) dma_release_channel(spfi->rx_ch); + spfi->tx_ch = NULL; + spfi->rx_ch = NULL; dev_warn(spfi->dev, "Failed to get DMA channels, falling back to PIO mode\n"); } else { master->dma_tx = spfi->tx_ch; From 62c94beda69786d7e988f99e2dbabd1be96d5bd7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Nov 2019 16:42:57 +0100 Subject: [PATCH 1843/3715] ALSA: timer: Limit max amount of slave instances [ Upstream commit fdea53fe5de532969a332d6e5e727f2ad8bf084d ] The fuzzer tries to open the timer instances as much as possible, and this may cause a system hiccup easily. We've already introduced the cap for the max number of available instances for the h/w timers, and we should put such a limit also to the slave timers, too. This patch introduces the limit to the multiple opened slave timers. The upper limit is hard-coded to 1000 for now, which should suffice for any practical usages up to now. Link: https://lore.kernel.org/r/20191106154257.5853-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index c60dfd52e8a6..22589a073423 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -88,6 +88,9 @@ static LIST_HEAD(snd_timer_slave_list); /* lock for slave active lists */ static DEFINE_SPINLOCK(slave_active_lock); +#define MAX_SLAVE_INSTANCES 1000 +static int num_slaves; + static DEFINE_MUTEX(register_mutex); static int snd_timer_free(struct snd_timer *timer); @@ -266,6 +269,10 @@ int snd_timer_open(struct snd_timer_instance **ti, err = -EINVAL; goto unlock; } + if (num_slaves >= MAX_SLAVE_INSTANCES) { + err = -EBUSY; + goto unlock; + } timeri = snd_timer_instance_new(owner, NULL); if (!timeri) { err = -ENOMEM; @@ -275,6 +282,7 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri->slave_id = tid->device; timeri->flags |= SNDRV_TIMER_IFLG_SLAVE; list_add_tail(&timeri->open_list, &snd_timer_slave_list); + num_slaves++; err = snd_timer_check_slave(timeri); if (err < 0) { snd_timer_close_locked(timeri, &card_dev_to_put); @@ -364,6 +372,8 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri, struct snd_timer_instance *slave, *tmp; list_del(&timeri->open_list); + if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) + num_slaves--; /* force to stop the timer */ snd_timer_stop(timeri); From 6cb5c8d002eadfb1a58d7db753c5d57459a81fed Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 5 Nov 2019 10:18:38 +0800 Subject: [PATCH 1844/3715] rtlwifi: fix memory leak in rtl92c_set_fw_rsvdpagepkt() [ Upstream commit 5174f1e41074b5186608badc2e89441d021e8c08 ] This leak was found by testing the EDIMAX EW-7612 on Raspberry Pi 3B+ with Linux 5.4-rc5 (multi_v7_defconfig + rtlwifi + kmemleak) and noticed a single memory leak during probe: unreferenced object 0xec13ee40 (size 176): comm "kworker/u8:1", pid 36, jiffies 4294939321 (age 5580.790s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __netdev_alloc_skb+0x9c/0x164 [<863dfa6e>] rtl92c_set_fw_rsvdpagepkt+0x254/0x340 [rtl8192c_common] [<9572be0d>] rtl92cu_set_hw_reg+0xf48/0xfa4 [rtl8192cu] [<116df4d8>] rtl_op_bss_info_changed+0x234/0x96c [rtlwifi] [<8933575f>] ieee80211_bss_info_change_notify+0xb8/0x264 [mac80211] [] ieee80211_assoc_success+0x934/0x1798 [mac80211] [] ieee80211_rx_mgmt_assoc_resp+0x174/0x314 [mac80211] [<5974629e>] ieee80211_sta_rx_queued_mgmt+0x3f4/0x7f0 [mac80211] [] ieee80211_iface_work+0x208/0x318 [mac80211] [] process_one_work+0x22c/0x564 [] worker_thread+0x44/0x5d8 [<82c7b073>] kthread+0x150/0x154 [] ret_from_fork+0x14/0x2c [<794dff30>] 0x0 It is because 8192cu doesn't implement usb_cmd_send_packet(), and this patch just frees the skb within the function to resolve memleak problem by now. Since 8192cu doesn't turn on fwctrl_lps that needs to download command packet for firmware via the function, applying this patch doesn't affect driver behavior. Reported-by: Stefan Wahren Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 530e80f0ef0b..1ee7f796113b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1556,6 +1556,8 @@ static bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb) * This is maybe necessary: * rtlpriv->cfg->ops->fill_tx_cmddesc(hw, buffer, 1, 1, skb); */ + dev_kfree_skb(skb); + return true; } From 5fceb353b924c3674a2a3ac14bdaa87040ab57f4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 24 Oct 2019 18:12:36 +0900 Subject: [PATCH 1845/3715] perf probe: Fix to find range-only function instance [ Upstream commit b77afa1f810f37bd8a36cb1318178dfe2d7af6b6 ] Fix die_is_func_instance() to find range-only function instance. In some case, a function instance can be made without any low PC or entry PC, but only with address ranges by optimization. (e.g. cold text partially in "text.unlikely" section) To find such function instance, we have to check the range attribute too. Fixes: e1ecbbc3fa83 ("perf probe: Fix to handle optimized not-inlined functions") Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157190835669.1859.8368628035930950596.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f5acda13dcfa..bc52b3840706 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -331,10 +331,14 @@ bool die_is_func_def(Dwarf_Die *dw_die) bool die_is_func_instance(Dwarf_Die *dw_die) { Dwarf_Addr tmp; + Dwarf_Attribute attr_mem; /* Actually gcc optimizes non-inline as like as inlined */ - return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; + return !dwarf_func_inline(dw_die) && + (dwarf_entrypc(dw_die, &tmp) == 0 || + dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL); } + /** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure From 32ee45d86c5e8191d3408c2d0403fbefc29230cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:46:52 +0900 Subject: [PATCH 1846/3715] perf probe: Fix to list probe event with correct line number [ Upstream commit 3895534dd78f0fd4d3f9e05ee52b9cdd444a743e ] Since debuginfo__find_probe_point() uses dwarf_entrypc() for finding the entry address of the function on which a probe is, it will fail when the function DIE has only ranges attribute. To fix this issue, use die_entrypc() instead of dwarf_entrypc(). Without this fix, perf probe -l shows incorrect offset: # perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask+18446744071579263632@work/linux/linux/kernel/cpu.c) probe:clear_tasks_mm_cpumask_1 (on clear_tasks_mm_cpumask+18446744071579263752@work/linux/linux/kernel/cpu.c) With this: # perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask@work/linux/linux/kernel/cpu.c) probe:clear_tasks_mm_cpumask_1 (on clear_tasks_mm_cpumask:21@work/linux/linux/kernel/cpu.c) Committer testing: Before: [root@quaco ~]# perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask+18446744071579765152@kernel/cpu.c) [root@quaco ~]# After: [root@quaco ~]# perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask@kernel/cpu.c) [root@quaco ~]# Fixes: 1d46ea2a6a40 ("perf probe: Fix listing incorrect line number with inline function") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199321227.8075.14655572419136993015.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-finder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index a5731de0e5eb..5fee71e960a6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1570,7 +1570,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, /* Get function entry information */ func = basefunc = dwarf_diename(&spdie); if (!func || - dwarf_entrypc(&spdie, &baseaddr) != 0 || + die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { lineno = 0; goto post; @@ -1587,7 +1587,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr, &indie)) { /* There is an inline function */ - if (dwarf_entrypc(&indie, &_addr) == 0 && + if (die_entrypc(&indie, &_addr) == 0 && _addr == addr) { /* * addr is at an inline function entry. From 0fe578acbe986ae6d080f5839a7ed9f0eb76b54c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 24 Oct 2019 18:12:45 +0900 Subject: [PATCH 1847/3715] perf probe: Walk function lines in lexical blocks [ Upstream commit acb6a7047ac2146b723fef69ee1ab6b7143546bf ] Since some inlined functions are in lexical blocks of given function, we have to recursively walk through the DIE tree. Without this fix, perf-probe -L can miss the inlined functions which is in a lexical block (like if (..) { func() } case.) However, even though, to walk the lines in a given function, we don't need to follow the children DIE of inlined functions because those do not have any lines in the specified function. We need to walk though whole trees only if we walk all lines in a given file, because an inlined function can include another inlined function in the same file. Fixes: b0e9cb2802d4 ("perf probe: Fix to search nested inlined functions in CU") Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157190836514.1859.15996864849678136353.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index bc52b3840706..e5406e5adb68 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -691,10 +691,9 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) if (lw->retval != 0) return DIE_FIND_CB_END; } + if (!lw->recursive) + return DIE_FIND_CB_SIBLING; } - if (!lw->recursive) - /* Don't need to search recursively */ - return DIE_FIND_CB_SIBLING; if (addr) { fname = dwarf_decl_file(in_die); @@ -741,6 +740,10 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) { struct __line_walk_param *lw = data; + /* + * Since inlined function can include another inlined function in + * the same file, we need to walk in it recursively. + */ lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data); if (lw->retval != 0) return DWARF_CB_ABORT; @@ -830,8 +833,9 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) */ if (rt_die != cu_die) /* - * Don't need walk functions recursively, because nested - * inlined functions don't have lines of the specified DIE. + * Don't need walk inlined functions recursively, because + * inner inlined functions don't have the lines of the + * specified function. */ ret = __die_walk_funclines(rt_die, false, callback, data); else { From cebb762e56ebab6abde72e87d5a44d54cd159e22 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:46:43 +0900 Subject: [PATCH 1848/3715] perf probe: Fix to probe an inline function which has no entry pc [ Upstream commit eb6933b29d20bf2c3053883d409a53f462c1a3ac ] Fix perf probe to probe an inlne function which has no entry pc or low pc but only has ranges attribute. This seems very rare case, but I could find a few examples, as same as probe_point_search_cb(), use die_entrypc() to get the entry address in probe_point_inline_cb() too. Without this patch: # perf probe -D __amd_put_nb_event_constraints Failed to get entry address of __amd_put_nb_event_constraints. Probe point '__amd_put_nb_event_constraints' not found. Error: Failed to add events. With this patch: # perf probe -D __amd_put_nb_event_constraints p:probe/__amd_put_nb_event_constraints amd_put_event_constraints+43 Committer testing: Before: [root@quaco ~]# perf probe -D __amd_put_nb_event_constraints Failed to get entry address of __amd_put_nb_event_constraints. Probe point '__amd_put_nb_event_constraints' not found. Error: Failed to add events. [root@quaco ~]# After: [root@quaco ~]# perf probe -D __amd_put_nb_event_constraints p:probe/__amd_put_nb_event_constraints _text+33789 [root@quaco ~]# Fixes: 4ea42b181434 ("perf: Add perf probe subcommand, a kprobe-event setup helper") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199320336.8075.16189530425277588587.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 5fee71e960a6..b06b05bd488e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -953,7 +953,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) ret = find_probe_point_lazy(in_die, pf); else { /* Get probe address */ - if (dwarf_entrypc(in_die, &addr) != 0) { + if (die_entrypc(in_die, &addr) != 0) { pr_warning("Failed to get entry address of %s.\n", dwarf_diename(in_die)); return -ENOENT; From 41d3087d6dcc8a7973b4b60c629f6b8afe96c14e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:47:10 +0900 Subject: [PATCH 1849/3715] perf probe: Fix to show ranges of variables in functions without entry_pc [ Upstream commit af04dd2f8ebaa8fbd46f698714acbf43da14da45 ] Fix to show ranges of variables (--range and --vars option) in functions which DIE has only ranges but no entry_pc attribute. Without this fix: # perf probe --range -V clear_tasks_mm_cpumask Available variables at clear_tasks_mm_cpumask @ (No matched variables) With this fix: # perf probe --range -V clear_tasks_mm_cpumask Available variables at clear_tasks_mm_cpumask @ [VAL] int cpu @ Committer testing: Before: [root@quaco ~]# perf probe --range -V clear_tasks_mm_cpumask Available variables at clear_tasks_mm_cpumask @ (No matched variables) [root@quaco ~]# After: [root@quaco ~]# perf probe --range -V clear_tasks_mm_cpumask Available variables at clear_tasks_mm_cpumask @ [VAL] int cpu @ [root@quaco ~]# Using it: [root@quaco ~]# perf probe clear_tasks_mm_cpumask cpu Added new event: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask with cpu) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask -aR sleep 1 [root@quaco ~]# perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask@kernel/cpu.c with cpu) [root@quaco ~]# [root@quaco ~]# perf trace -e probe:*cpumask ^C[root@quaco ~]# Fixes: 349e8d261131 ("perf probe: Add --range option to show a variable's location range") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199323018.8075.8179744380479673672.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index e5406e5adb68..21c2ed42ad6b 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1010,7 +1010,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, bool first = true; const char *name; - ret = dwarf_entrypc(sp_die, &entry); + ret = die_entrypc(sp_die, &entry); if (ret) return ret; @@ -1073,7 +1073,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) bool first = true; const char *name; - ret = dwarf_entrypc(sp_die, &entry); + ret = die_entrypc(sp_die, &entry); if (ret) return ret; From e19d77602f23d1d06ed6e00a498b78c7caf46ec9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:47:01 +0900 Subject: [PATCH 1850/3715] perf probe: Fix to show inlined function callsite without entry_pc [ Upstream commit 18e21eb671dc87a4f0546ba505a89ea93598a634 ] Fix 'perf probe --line' option to show inlined function callsite lines even if the function DIE has only ranges. Without this: # perf probe -L amd_put_event_constraints ... 2 { 3 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) __amd_put_nb_event_constraints(cpuc, event); 5 } With this patch: # perf probe -L amd_put_event_constraints ... 2 { 3 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 4 __amd_put_nb_event_constraints(cpuc, event); 5 } Committer testing: Before: [root@quaco ~]# perf probe -L amd_put_event_constraints 0 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 2 { 3 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) __amd_put_nb_event_constraints(cpuc, event); 5 } PMU_FORMAT_ATTR(event, "config:0-7,32-35"); PMU_FORMAT_ATTR(umask, "config:8-15" ); [root@quaco ~]# After: [root@quaco ~]# perf probe -L amd_put_event_constraints 0 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 2 { 3 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 4 __amd_put_nb_event_constraints(cpuc, event); 5 } PMU_FORMAT_ATTR(event, "config:0-7,32-35"); PMU_FORMAT_ATTR(umask, "config:8-15" ); [root@quaco ~]# perf probe amd_put_event_constraints:4 Added new event: probe:amd_put_event_constraints (on amd_put_event_constraints:4) You can now use it in all perf tools, such as: perf record -e probe:amd_put_event_constraints -aR sleep 1 [root@quaco ~]# [root@quaco ~]# perf probe -l probe:amd_put_event_constraints (on amd_put_event_constraints:4@arch/x86/events/amd/core.c) probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask@kernel/cpu.c) [root@quaco ~]# Using it: [root@quaco ~]# perf trace -e probe:* ^C[root@quaco ~]# Ok, Intel system here... :-) Fixes: 4cc9cec636e7 ("perf probe: Introduce lines walker interface") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199322107.8075.12659099000567865708.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 21c2ed42ad6b..0a5de865563c 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -686,7 +686,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { fname = die_get_call_file(in_die); lineno = die_get_call_lineno(in_die); - if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { + if (fname && lineno > 0 && die_entrypc(in_die, &addr) == 0) { lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) return DIE_FIND_CB_END; From b41920540deb197461ef7cb479c0a3d45f356da2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 28 Oct 2019 11:34:01 +0000 Subject: [PATCH 1851/3715] libsubcmd: Use -O0 with DEBUG=1 [ Upstream commit 22bd8f1b5a1dd168ba4eba27cb17643a11012f5d ] When a 'make DEBUG=1' build is done, the command parser is still built with -O6 and is hard to step through, fix it making it use -O0 in that case. Signed-off-by: James Clark Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: nd Link: http://lore.kernel.org/lkml/20191028113340.4282-1-james.clark@arm.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/subcmd/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 5b2cd5e58df0..5dbb0dde208c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -28,7 +28,9 @@ ifeq ($(DEBUG),0) endif endif -ifeq ($(CC_NO_CLANG), 0) +ifeq ($(DEBUG),1) + CFLAGS += -O0 +else ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 From d622edacac43d1744d8b073daff78651028c268a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:46:34 +0900 Subject: [PATCH 1852/3715] perf probe: Fix to probe a function which has no entry pc [ Upstream commit 5d16dbcc311d91267ddb45c6da4f187be320ecee ] Fix 'perf probe' to probe a function which has no entry pc or low pc but only has ranges attribute. probe_point_search_cb() uses dwarf_entrypc() to get the probe address, but that doesn't work for the function DIE which has only ranges attribute. Use die_entrypc() instead. Without this fix: # perf probe -k ../build-x86_64/vmlinux -D clear_tasks_mm_cpumask:0 Probe point 'clear_tasks_mm_cpumask' not found. Error: Failed to add events. With this: # perf probe -k ../build-x86_64/vmlinux -D clear_tasks_mm_cpumask:0 p:probe/clear_tasks_mm_cpumask clear_tasks_mm_cpumask+0 Committer testing: Before: [root@quaco ~]# perf probe clear_tasks_mm_cpumask:0 Probe point 'clear_tasks_mm_cpumask' not found. Error: Failed to add events. [root@quaco ~]# After: [root@quaco ~]# perf probe clear_tasks_mm_cpumask:0 Added new event: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask -aR sleep 1 [root@quaco ~]# Using it with 'perf trace': [root@quaco ~]# perf trace -e probe:clear_tasks_mm_cpumask Doesn't seem to be used in x86_64: $ find . -name "*.c" | xargs grep clear_tasks_mm_cpumask ./kernel/cpu.c: * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU ./kernel/cpu.c:void clear_tasks_mm_cpumask(int cpu) ./arch/xtensa/kernel/smp.c: clear_tasks_mm_cpumask(cpu); ./arch/csky/kernel/smp.c: clear_tasks_mm_cpumask(cpu); ./arch/sh/kernel/smp.c: clear_tasks_mm_cpumask(cpu); ./arch/arm/kernel/smp.c: clear_tasks_mm_cpumask(cpu); ./arch/powerpc/mm/nohash/mmu_context.c: clear_tasks_mm_cpumask(cpu); $ find . -name "*.h" | xargs grep clear_tasks_mm_cpumask ./include/linux/cpu.h:void clear_tasks_mm_cpumask(int cpu); $ find . -name "*.S" | xargs grep clear_tasks_mm_cpumask $ Fixes: e1ecbbc3fa83 ("perf probe: Fix to handle optimized not-inlined functions") Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199319438.8075.4695576954550638618.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b06b05bd488e..aa6b35726976 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1005,7 +1005,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) param->retval = find_probe_point_by_line(pf); } else if (die_is_func_instance(sp_die)) { /* Instances always have the entry address */ - dwarf_entrypc(sp_die, &pf->addr); + die_entrypc(sp_die, &pf->addr); /* But in some case the entry address is 0 */ if (pf->addr == 0) { pr_debug("%s has no entry PC. Skipped\n", From e54d9ee1b1ac31fcb420c1943abc3e481ce8b6aa Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 17:14:45 +0800 Subject: [PATCH 1853/3715] drm/amdgpu: fix potential double drop fence reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 946ab8db6953535a3a88c957db8328beacdfed9d ] The object fence is not set to NULL after its reference is dropped. As a result, its reference may be dropped again if error occurs after that, which may lead to a use after free bug. To avoid the issue, fence is explicitly set to NULL after dropping its reference. Acked-by: Christian König Signed-off-by: Pan Bian Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index ed8c3739015b..b35b0741fd97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -125,6 +125,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -170,6 +171,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { From 9dff39a5fa7728402727a901a1b34a6875f66dc1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 30 Oct 2019 15:34:46 -0700 Subject: [PATCH 1854/3715] perf parse: If pmu configuration fails free terms [ Upstream commit 38f2c4226e6bc3e8c41c318242821ba5dc825aba ] Avoid a memory leak when the configuration fails. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Jin Yao Cc: John Garry Cc: Kan Liang Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: clang-built-linux@googlegroups.com Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20191030223448.12930-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 29e2bb304168..096c52f296d7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1253,8 +1253,15 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; - if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { + struct perf_evsel_config_term *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &config_terms, list) { + list_del_init(&pos->list); + free(pos); + } return -EINVAL; + } evsel = __add_event(list, &parse_state->idx, &attr, get_config_name(head_config), pmu, From 8f664b2e9b43168b0a11b1171f0f57cdabfccc30 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 30 Oct 2019 16:09:49 +0900 Subject: [PATCH 1855/3715] perf probe: Skip overlapped location on searching variables [ Upstream commit dee36a2abb67c175265d49b9a8c7dfa564463d9a ] Since debuginfo__find_probes() callback function can be called with the location which already passed, the callback function must filter out such overlapped locations. add_probe_trace_event() has already done it by commit 1a375ae7659a ("perf probe: Skip same probe address for a given line"), but add_available_vars() doesn't. Thus perf probe -v shows same address repeatedly as below: # perf probe -V vfs_read:18 Available variables at vfs_read:18 @ char* buf loff_t* pos ssize_t ret struct file* file @ char* buf loff_t* pos ssize_t ret struct file* file @ char* buf loff_t* pos ssize_t ret struct file* file With this fix, perf probe -V shows it correctly: # perf probe -V vfs_read:18 Available variables at vfs_read:18 @ char* buf loff_t* pos ssize_t ret struct file* file @ char* buf loff_t* pos ssize_t ret struct file* file Fixes: cf6eb489e5c0 ("perf probe: Show accessible local variables") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157241938927.32002.4026859017790562751.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-finder.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index aa6b35726976..e1d0bbf7735b 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1417,6 +1417,18 @@ error: return DIE_FIND_CB_END; } +static bool available_var_finder_overlap(struct available_var_finder *af) +{ + int i; + + for (i = 0; i < af->nvls; i++) { + if (af->pf.addr == af->vls[i].point.address) + return true; + } + return false; + +} + /* Add a found vars into available variables list */ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) { @@ -1427,6 +1439,14 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) Dwarf_Die die_mem; int ret; + /* + * For some reason (e.g. different column assigned to same address), + * this callback can be called with the address which already passed. + * Ignore it first. + */ + if (available_var_finder_overlap(af)) + return 0; + /* Check number of tevs */ if (af->nvls == af->max_vls) { pr_warning("Too many( > %d) probe point found.\n", af->max_vls); From 5935b5993f3f85ae26149a1c2a88da9076cc1730 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Nov 2019 09:16:49 +0900 Subject: [PATCH 1856/3715] perf probe: Return a better scope DIE if there is no best scope [ Upstream commit c701636aeec4c173208697d68da6e4271125564b ] Make find_best_scope() returns innermost DIE at given address if there is no best matched scope DIE. Since Gcc sometimes generates intuitively strange line info which is out of inlined function address range, we need this fixup. Without this, sometimes perf probe failed to probe on a line inside an inlined function: # perf probe -D ksys_open:3 Failed to find scope of probe point. Error: Failed to add events. With this fix, 'perf probe' can probe it: # perf probe -D ksys_open:3 p:probe/ksys_open _text+25707308 p:probe/ksys_open_1 _text+25710596 p:probe/ksys_open_2 _text+25711114 p:probe/ksys_open_3 _text+25711343 p:probe/ksys_open_4 _text+25714058 p:probe/ksys_open_5 _text+2819653 p:probe/ksys_open_6 _text+2819701 Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Steven Rostedt (VMware) Cc: Tom Zanussi Link: http://lore.kernel.org/lkml/157291300887.19771.14936015360963292236.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/probe-finder.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e1d0bbf7735b..30a5e92b67bd 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -767,6 +767,16 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) return 0; } +/* Return innermost DIE */ +static int find_inner_scope_cb(Dwarf_Die *fn_die, void *data) +{ + struct find_scope_param *fsp = data; + + memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); + fsp->found = true; + return 1; +} + /* Find an appropriate scope fits to given conditions */ static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) { @@ -778,8 +788,13 @@ static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) .die_mem = die_mem, .found = false, }; + int ret; - cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp); + ret = cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, + &fsp); + if (!ret && !fsp.found) + cu_walk_functions_at(&pf->cu_die, pf->addr, + find_inner_scope_cb, &fsp); return fsp.found ? die_mem : NULL; } From c00f4acafc9122e9c0d6e7347d316ed6d8b44284 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 30 Oct 2019 16:09:40 +0900 Subject: [PATCH 1857/3715] perf probe: Fix to show calling lines of inlined functions [ Upstream commit 86c0bf8539e7f46d91bd105e55eda96e0064caef ] Fix to show calling lines of inlined functions (where an inline function is called). die_walk_lines() filtered out the lines inside inlined functions based on the address. However this also filtered out the lines which call those inlined functions from the target function. To solve this issue, check the call_file and call_line attributes and do not filter out if it matches to the line information. Without this fix, perf probe -L doesn't show some lines correctly. (don't see the lines after 17) # perf probe -L vfs_read 0 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 1 { 2 ssize_t ret; 4 if (!(file->f_mode & FMODE_READ)) return -EBADF; 6 if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; 8 if (unlikely(!access_ok(buf, count))) return -EFAULT; 11 ret = rw_verify_area(READ, file, pos, count); 12 if (!ret) { 13 if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; 15 ret = __vfs_read(file, buf, count, pos); 16 if (ret > 0) { fsnotify_access(file); add_rchar(current, ret); } With this fix: # perf probe -L vfs_read 0 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 1 { 2 ssize_t ret; 4 if (!(file->f_mode & FMODE_READ)) return -EBADF; 6 if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; 8 if (unlikely(!access_ok(buf, count))) return -EFAULT; 11 ret = rw_verify_area(READ, file, pos, count); 12 if (!ret) { 13 if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; 15 ret = __vfs_read(file, buf, count, pos); 16 if (ret > 0) { 17 fsnotify_access(file); 18 add_rchar(current, ret); } 20 inc_syscr(current); } Fixes: 4cc9cec636e7 ("perf probe: Introduce lines walker interface") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157241937995.32002.17899884017011512577.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 0a5de865563c..49f9c65f1a9c 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -768,7 +768,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname, *decf = NULL; + const char *fname, *decf = NULL, *inf = NULL; int lineno, ret = 0; int decl = 0, inl; Dwarf_Die die_mem, *cu_die; @@ -812,13 +812,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) */ if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + /* Call-site check */ + inf = die_get_call_file(&die_mem); + if ((inf && !strcmp(inf, decf)) && + die_get_call_lineno(&die_mem) == lineno) + goto found; + dwarf_decl_line(&die_mem, &inl); if (inl != decl || decf != dwarf_decl_file(&die_mem)) continue; } } +found: /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); From c9560d9168aecbc6dcdbd6e84a45d68f48b20b9e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 30 Oct 2019 16:09:21 +0900 Subject: [PATCH 1858/3715] perf probe: Skip end-of-sequence and non statement lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4d99bdfd124823a81878b44b5e8750b97f73902 ] Skip end-of-sequence and non-statement lines while walking through lines list. The "end-of-sequence" line information means: "the current address is that of the first byte after the end of a sequence of target machine instructions." (DWARF version 4 spec 6.2.2) This actually means out of scope and we can not probe on it. On the other hand, the statement lines (is_stmt) means: "the current instruction is a recommended breakpoint location. A recommended breakpoint location is intended to “represent” a line, a statement and/or a semantically distinct subpart of a statement." (DWARF version 4 spec 6.2.2) So, non-statement line info also should be skipped. These can reduce unneeded probe points and also avoid an error. E.g. without this patch: # perf probe -a "clear_tasks_mm_cpumask:1" Added new events: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_1 (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_2 (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_3 (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_4 (on clear_tasks_mm_cpumask:1) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask_4 -aR sleep 1 # This puts 5 probes on one line, but acutally it's not inlined function. This is because there are many non statement instructions at the function prologue. With this patch: # perf probe -a "clear_tasks_mm_cpumask:1" Added new event: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask:1) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask -aR sleep 1 # Now perf-probe skips unneeded addresses. Committer testing: Slightly different results, but similar: Before: # uname -a Linux quaco 5.3.8-200.fc30.x86_64 #1 SMP Tue Oct 29 14:46:22 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux # # perf probe -a "clear_tasks_mm_cpumask:1" Added new events: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_1 (on clear_tasks_mm_cpumask:1) probe:clear_tasks_mm_cpumask_2 (on clear_tasks_mm_cpumask:1) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask_2 -aR sleep 1 # After: # perf probe -a "clear_tasks_mm_cpumask:1" Added new event: probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask:1) You can now use it in all perf tools, such as: perf record -e probe:clear_tasks_mm_cpumask -aR sleep 1 # perf probe -l probe:clear_tasks_mm_cpumask (on clear_tasks_mm_cpumask@kernel/cpu.c) # Fixes: 4cc9cec636e7 ("perf probe: Introduce lines walker interface") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157241936090.32002.12156347518596111660.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 49f9c65f1a9c..d48bd5eaa0f2 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -773,6 +773,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; + bool flag; /* Get the CU die */ if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { @@ -803,6 +804,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) "Possible error in debuginfo.\n"); continue; } + /* Skip end-of-sequence */ + if (dwarf_lineendsequence(line, &flag) != 0 || flag) + continue; + /* Skip Non statement line-info */ + if (dwarf_linebeginstatement(line, &flag) != 0 || !flag) + continue; /* Filter lines based on address */ if (rt_die != cu_die) { /* From 361b2a9cd86730eb028e16b95904b415f1c8dc31 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 30 Oct 2019 16:09:30 +0900 Subject: [PATCH 1859/3715] perf probe: Filter out instances except for inlined subroutine and subprogram [ Upstream commit da6cb952a89efe24bb76c4971370d485737a2d85 ] Filter out instances except for inlined_subroutine and subprogram DIE in die_walk_instances() and die_is_func_instance(). This fixes an issue that perf probe sets some probes on calling address instead of a target function itself. When perf probe walks on instances of an abstruct origin (a kind of function prototype of inlined function), die_walk_instances() can also pass a GNU_call_site (a GNU extension for call site) to callback. Since it is not an inlined instance of target function, we have to filter out when searching a probe point. Without this patch, perf probe sets probes on call site address too.This can happen on some function which is marked "inlined", but has actual symbol. (I'm not sure why GCC mark it "inlined"): # perf probe -D vfs_read p:probe/vfs_read _text+2500017 p:probe/vfs_read_1 _text+2499468 p:probe/vfs_read_2 _text+2499563 p:probe/vfs_read_3 _text+2498876 p:probe/vfs_read_4 _text+2498512 p:probe/vfs_read_5 _text+2498627 With this patch: Slightly different results, similar tho: # perf probe -D vfs_read p:probe/vfs_read _text+2498512 Committer testing: # uname -a Linux quaco 5.3.8-200.fc30.x86_64 #1 SMP Tue Oct 29 14:46:22 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux Before: # perf probe -D vfs_read p:probe/vfs_read _text+3131557 p:probe/vfs_read_1 _text+3130975 p:probe/vfs_read_2 _text+3131047 p:probe/vfs_read_3 _text+3130380 p:probe/vfs_read_4 _text+3130000 # uname -a Linux quaco 5.3.8-200.fc30.x86_64 #1 SMP Tue Oct 29 14:46:22 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux # After: # perf probe -D vfs_read p:probe/vfs_read _text+3130000 # Fixes: db0d2c6420ee ("perf probe: Search concrete out-of-line instances") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157241937063.32002.11024544873990816590.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index d48bd5eaa0f2..50903f30289f 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -325,18 +325,22 @@ bool die_is_func_def(Dwarf_Die *dw_die) * @dw_die: a DIE * * Ensure that this DIE is an instance (which has an entry address). - * This returns true if @dw_die is a function instance. If not, you need to - * call die_walk_instances() to find actual instances. + * This returns true if @dw_die is a function instance. If not, the @dw_die + * must be a prototype. You can use die_walk_instances() to find actual + * instances. **/ bool die_is_func_instance(Dwarf_Die *dw_die) { Dwarf_Addr tmp; Dwarf_Attribute attr_mem; + int tag = dwarf_tag(dw_die); - /* Actually gcc optimizes non-inline as like as inlined */ - return !dwarf_func_inline(dw_die) && - (dwarf_entrypc(dw_die, &tmp) == 0 || - dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL); + if (tag != DW_TAG_subprogram && + tag != DW_TAG_inlined_subroutine) + return false; + + return dwarf_entrypc(dw_die, &tmp) == 0 || + dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL; } /** @@ -615,6 +619,9 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) Dwarf_Die *origin; int tmp; + if (!die_is_func_instance(inst)) + return DIE_FIND_CB_CONTINUE; + attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem); if (attr == NULL) return DIE_FIND_CB_CONTINUE; From ca36cb7f4c06650f22cb9e5f3ad75afa6ca8e0a3 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 6 Nov 2019 20:04:37 +0200 Subject: [PATCH 1860/3715] ath10k: fix get invalid tx rate for Mesh metric [ Upstream commit 05a11003a56507023f18d3249a4d4d119c0a3e9c ] ath10k does not provide transmit rate info per MSDU in tx completion, mark that as -1 so mac80211 will ignore the rates. This fixes mac80211 update Mesh link metric with invalid transmit rate info. Tested HW: QCA9984 Tested FW: 10.4-3.9.0.2-00035 Signed-off-by: Hou Bao Hou Signed-off-by: Anilkumar Kolli Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/txrx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index d4986f626c35..9999c8c40269 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -100,6 +100,8 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); + info->status.rates[0].idx = -1; + trace_ath10k_txrx_tx_unref(ar, tx_done->msdu_id); if (tx_done->status == HTT_TX_COMPL_STATE_DISCARD) { From 57c99b5a08e72a8b6086bfdb61fb6f1c8f7c4233 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Fri, 8 Nov 2019 15:49:39 +1030 Subject: [PATCH 1861/3715] fsi: core: Fix small accesses and unaligned offsets via sysfs [ Upstream commit 9f4c2b516b4f031e3cd0e45957f4150b3c1a083d ] Subtracting the offset delta from four-byte alignment lead to wrapping of the requested length where `count` is less than `off`. Generalise the length handling to enable and optimise aligned access sizes for all offset and size combinations. The new formula produces the following results for given offset and count values: offset count | length --------------+------- 0 1 | 1 0 2 | 2 0 3 | 2 0 4 | 4 0 5 | 4 1 1 | 1 1 2 | 1 1 3 | 1 1 4 | 1 1 5 | 1 2 1 | 1 2 2 | 2 2 3 | 2 2 4 | 2 2 5 | 2 3 1 | 1 3 2 | 1 3 3 | 1 3 4 | 1 3 5 | 1 We might need something like this for the cfam chardevs as well, for example we don't currently implement any alignment restrictions / handling in the hardware master driver. Signed-off-by: Andrew Jeffery Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20191108051945.7109-6-joel@jms.id.au Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/fsi/fsi-core.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 4ea63d9bd131..8feca59c1f6b 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -419,6 +419,31 @@ static int fsi_slave_scan(struct fsi_slave *slave) return 0; } +static unsigned long aligned_access_size(size_t offset, size_t count) +{ + unsigned long offset_unit, count_unit; + + /* Criteria: + * + * 1. Access size must be less than or equal to the maximum access + * width or the highest power-of-two factor of offset + * 2. Access size must be less than or equal to the amount specified by + * count + * + * The access width is optimal if we can calculate 1 to be strictly + * equal while still satisfying 2. + */ + + /* Find 1 by the bottom bit of offset (with a 4 byte access cap) */ + offset_unit = BIT(__builtin_ctzl(offset | 4)); + + /* Find 2 by the top bit of count */ + count_unit = BIT(8 * sizeof(unsigned long) - 1 - __builtin_clzl(count)); + + /* Constrain the maximum access width to the minimum of both criteria */ + return BIT(__builtin_ctzl(offset_unit | count_unit)); +} + static ssize_t fsi_slave_sysfs_raw_read(struct file *file, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) @@ -434,8 +459,7 @@ static ssize_t fsi_slave_sysfs_raw_read(struct file *file, return -EINVAL; for (total_len = 0; total_len < count; total_len += read_len) { - read_len = min_t(size_t, count, 4); - read_len -= off & 0x3; + read_len = aligned_access_size(off, count - total_len); rc = fsi_slave_read(slave, off, buf + total_len, read_len); if (rc) @@ -462,8 +486,7 @@ static ssize_t fsi_slave_sysfs_raw_write(struct file *file, return -EINVAL; for (total_len = 0; total_len < count; total_len += write_len) { - write_len = min_t(size_t, count, 4); - write_len -= off & 0x3; + write_len = aligned_access_size(off, count - total_len); rc = fsi_slave_write(slave, off, buf + total_len, write_len); if (rc) From 1efdc4adb450903c1fb2d383c189129f6dcf444a Mon Sep 17 00:00:00 2001 From: Mike Isely Date: Wed, 6 Nov 2019 12:11:14 +0100 Subject: [PATCH 1862/3715] media: pvrusb2: Fix oops on tear-down when radio support is not present [ Upstream commit 7f404ae9cf2a285f73b3c18ab9303d54b7a3d8e1 ] In some device configurations there's no radio or radio support in the driver. That's OK, as the driver sets itself up accordingly. However on tear-down in these caes it's still trying to tear down radio related context when there isn't anything there, leading to dereferences through a null pointer and chaos follows. How this bug survived unfixed for 11 years in the pvrusb2 driver is a mystery to me. [hverkuil: fix two checkpatch warnings] Signed-off-by: Mike Isely Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index 4320bda9352d..e0413db26781 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -915,8 +915,12 @@ static void pvr2_v4l2_internal_check(struct pvr2_channel *chp) pvr2_v4l2_dev_disassociate_parent(vp->dev_video); pvr2_v4l2_dev_disassociate_parent(vp->dev_radio); if (!list_empty(&vp->dev_video->devbase.fh_list) || - !list_empty(&vp->dev_radio->devbase.fh_list)) + (vp->dev_radio && + !list_empty(&vp->dev_radio->devbase.fh_list))) { + pvr2_trace(PVR2_TRACE_STRUCT, + "pvr2_v4l2 internal_check exit-empty id=%p", vp); return; + } pvr2_v4l2_destroy_no_lock(vp); } @@ -990,7 +994,8 @@ static int pvr2_v4l2_release(struct file *file) kfree(fhp); if (vp->channel.mc_head->disconnect_flag && list_empty(&vp->dev_video->devbase.fh_list) && - list_empty(&vp->dev_radio->devbase.fh_list)) { + (!vp->dev_radio || + list_empty(&vp->dev_radio->devbase.fh_list))) { pvr2_v4l2_destroy_no_lock(vp); } return 0; From b8d065728cc900d2b101b39812a5e75f66e7216c Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Sun, 10 Nov 2019 07:28:15 +0100 Subject: [PATCH 1863/3715] media: si470x-i2c: add missed operations in remove [ Upstream commit 2df200ab234a86836a8879a05a8007d6b884eb14 ] The driver misses calling v4l2_ctrl_handler_free and v4l2_device_unregister in remove like what is done in probe failure. Add the calls to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/radio/si470x/radio-si470x-i2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index 8ce6f9cff746..b60fb6ed5aeb 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -455,6 +455,8 @@ static int si470x_i2c_remove(struct i2c_client *client) video_unregister_device(&radio->videodev); kfree(radio); + v4l2_ctrl_handler_free(&radio->hdl); + v4l2_device_unregister(&radio->v4l2_dev); return 0; } From b5c1ddeae6945b19cd7728f4dfcefa7fae3e4a40 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Nov 2019 09:33:23 +0000 Subject: [PATCH 1864/3715] EDAC/ghes: Fix grain calculation [ Upstream commit 7088e29e0423d3195e09079b4f849ec4837e5a75 ] The current code to convert a physical address mask to a grain (defined as granularity in bytes) is: e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); This is broken in several ways: 1) It calculates to wrong grain values. E.g., a physical address mask of ~0xfff should give a grain of 0x1000. Without considering PAGE_MASK, there is an off-by-one. Things are worse when also filtering it with ~PAGE_MASK. This will calculate to a grain with the upper bits set. In the example it even calculates to ~0. 2) The grain does not depend on and is unrelated to the kernel's page-size. The page-size only matters when unmapping memory in memory_failure(). Smaller grains are wrongly rounded up to the page-size, on architectures with a configurable page-size (e.g. arm64) this could round up to the even bigger page-size of the hypervisor. Fix this with: e->grain = ~mem_err->physical_addr_mask + 1; The grain_bits are defined as: grain = 1 << grain_bits; Change also the grain_bits calculation accordingly, it is the same formula as in edac_mc.c now and the code can be unified. The value in ->physical_addr_mask coming from firmware is assumed to be contiguous, but this is not sanity-checked. However, in case the mask is non-contiguous, a conversion to grain_bits effectively converts the grain bit mask to a power of 2 by rounding it up. Suggested-by: James Morse Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Cc: "linux-edac@vger.kernel.org" Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106093239.25517-11-rrichter@marvell.com Signed-off-by: Sasha Levin --- drivers/edac/ghes_edac.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 6f80eb65c26c..acae39278669 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -187,6 +187,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Cleans the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = 1; + e->grain = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; @@ -282,7 +283,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Error grain */ if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) - e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + e->grain = ~mem_err->physical_addr_mask + 1; /* Memory error location, mapped on e->location */ p = e->location; @@ -389,8 +390,13 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, if (p > pvt->other_detail) *(p - 1) = '\0'; + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + + grain_bits = fls_long(e->grain - 1); + /* Generate the trace event */ - grain_bits = fls_long(e->grain); snprintf(pvt->detail_location, sizeof(pvt->detail_location), "APEI location: %s %s", e->location, e->other_detail); trace_mc_event(type, e->msg, e->label, e->error_count, From dad729c48e9d61d03a512a36ba9947f8541e2009 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Sat, 9 Nov 2019 16:09:43 +0800 Subject: [PATCH 1865/3715] spi: pxa2xx: Add missed security checks [ Upstream commit 5eb263ef08b5014cfc2539a838f39d2fd3531423 ] pxa2xx_spi_init_pdata misses checks for devm_clk_get and platform_get_irq. Add checks for them to fix the bugs. Since ssp->clk and ssp->irq are used in probe, they are mandatory here. So we cannot use _optional() for devm_clk_get and platform_get_irq. Signed-off-by: Chuhong Yuan Link: https://lore.kernel.org/r/20191109080943.30428-1-hslester96@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-pxa2xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 9bf3e5f945c7..b2245cdce230 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1559,7 +1559,13 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev) } ssp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ssp->clk)) + return NULL; + ssp->irq = platform_get_irq(pdev, 0); + if (ssp->irq < 0) + return NULL; + ssp->type = type; ssp->pdev = pdev; ssp->port_id = pxa2xx_spi_get_port_id(adev); From e83f9a617d27418978aa294b1328580c7614596d Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Tue, 5 Nov 2019 17:13:30 -0800 Subject: [PATCH 1866/3715] ASoC: rt5677: Mark reg RT5677_PWR_ANLG2 as volatile [ Upstream commit eabf424f7b60246c76dcb0ea6f1e83ef9abbeaa6 ] The codec dies when RT5677_PWR_ANLG2(MX-64h) is set to 0xACE1 while it's streaming audio over SPI. The DSP firmware turns on PLL2 (MX-64 bit 8) when SPI streaming starts. However regmap does not believe that register can change by itself. When BST1 (bit 15) is turned on with regmap_update_bits(), it doesn't read the register first before write, so PLL2 power bit is cleared by accident. Marking MX-64h as volatile in regmap solved the issue. Signed-off-by: Ben Zhang Signed-off-by: Curtis Malainey Link: https://lore.kernel.org/r/20191106011335.223061-6-cujomalainey@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5677.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 1cd20b88a3a9..82ee8f4b965b 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -297,6 +297,7 @@ static bool rt5677_volatile_register(struct device *dev, unsigned int reg) case RT5677_I2C_MASTER_CTRL7: case RT5677_I2C_MASTER_CTRL8: case RT5677_HAP_GENE_CTRL2: + case RT5677_PWR_ANLG2: /* Modified by DSP firmware */ case RT5677_PWR_DSP_ST: case RT5677_PRIV_DATA: case RT5677_ASRC_22: From 3c6202094a4f8a395e563b67be9f7a5923111f63 Mon Sep 17 00:00:00 2001 From: Yu-Hsuan Hsu Date: Tue, 24 Sep 2019 00:29:40 +0800 Subject: [PATCH 1867/3715] ASoC: Intel: kbl_rt5663_rt5514_max98927: Add dmic format constraint [ Upstream commit e2db787bdcb4f2722ecf410168f0583764634e45 ] On KBL platform, the microphone is attached to external codec(rt5514) instead of PCH. However, TDM slot between PCH and codec is 16 bits only. In order to avoid setting wrong format, we should add a constraint to force to use 16 bits format forever. Signed-off-by: Yu-Hsuan Hsu Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190923162940.199580-1-yuhsuan@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 41cb1fefbd42..405196283688 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -422,6 +422,9 @@ static int kabylake_dmic_startup(struct snd_pcm_substream *substream) snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, dmic_constraints); + runtime->hw.formats = SNDRV_PCM_FMTBIT_S16_LE; + snd_pcm_hw_constraint_msbits(runtime, 0, 16, 16); + return snd_pcm_hw_constraint_list(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &constraints_rates); } From b391d3fe8ad79403521f866d2c7e7df7eec49dbe Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 31 Oct 2019 18:25:16 +0100 Subject: [PATCH 1868/3715] s390/disassembler: don't hide instruction addresses [ Upstream commit 544f1d62e3e6c6e6d17a5e56f6139208acb5ff46 ] Due to kptr_restrict, JITted BPF code is now displayed like this: 000000000b6ed1b2: ebdff0800024 stmg %r13,%r15,128(%r15) 000000004cde2ba0: 41d0f040 la %r13,64(%r15) 00000000fbad41b0: a7fbffa0 aghi %r15,-96 Leaking kernel addresses to dmesg is not a concern in this case, because this happens only when JIT debugging is explicitly activated, which only root can do. Use %px in this particular instance, and also to print an instruction address in show_code and PCREL (e.g. brasl) arguments in print_insn. While at present functionally equivalent to %016lx, %px is recommended by Documentation/core-api/printk-formats.rst for such cases. Signed-off-by: Ilya Leoshkevich Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/dis.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 2394557653d5..6d154069c962 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1930,10 +1930,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -2005,7 +2006,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -2033,7 +2034,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; From 3b524ed1fa05d4e2fef31b47da5abc053cadebd2 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Wed, 16 Oct 2019 15:45:39 +0100 Subject: [PATCH 1869/3715] parport: load lowlevel driver if ports not found [ Upstream commit 231ec2f24dad18d021b361045bbd618ba62a274e ] Usually all the distro will load the parport low level driver as part of their initialization. But we can get into a situation where all the parallel port drivers are built as module and we unload all the modules at a later time. Then if we just do "modprobe parport" it will only load the parport module and will not load the low level driver which will actually register the ports. So, check the bus if there is any parport registered, if not, load the low level driver. We can get into the above situation with all distro but only Suse has setup the alias for "parport_lowlevel" and so it only works in Suse. Users of Debian based distro will need to load the lowlevel module manually. Signed-off-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20191016144540.18810-3-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/parport/share.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 7b4ee33c1935..15c81cffd2de 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -230,6 +230,18 @@ static int port_check(struct device *dev, void *dev_drv) return 0; } +/* + * Iterates through all the devices connected to the bus and return 1 + * if the device is a parallel port. + */ + +static int port_detect(struct device *dev, void *dev_drv) +{ + if (is_parport(dev)) + return 1; + return 0; +} + /** * parport_register_driver - register a parallel port device driver * @drv: structure describing the driver @@ -282,6 +294,15 @@ int __parport_register_driver(struct parport_driver *drv, struct module *owner, if (ret) return ret; + /* + * check if bus has any parallel port registered, if + * none is found then load the lowlevel driver. + */ + ret = bus_for_each_dev(&parport_bus_type, NULL, NULL, + port_detect); + if (!ret) + get_lowlevel_driver(); + mutex_lock(®istration_lock); if (drv->match_port) bus_for_each_dev(&parport_bus_type, NULL, drv, From 7c31408136db6c826911f4a214868048f30839b5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 14 Nov 2019 09:06:17 +0530 Subject: [PATCH 1870/3715] cpufreq: Register drivers only after CPU devices have been registered [ Upstream commit 46770be0cf94149ca48be87719bda1d951066644 ] The cpufreq core heavily depends on the availability of the struct device for CPUs and if they aren't available at the time cpufreq driver is registered, we will never succeed in making cpufreq work. This happens due to following sequence of events: - cpufreq_register_driver() - subsys_interface_register() - return 0; //successful registration of driver ... at a later point of time - register_cpu(); - device_register(); - bus_probe_device(); - sif->add_dev(); - cpufreq_add_dev(); - get_cpu_device(); //FAILS - per_cpu(cpu_sys_devices, num) = &cpu->dev; //used by get_cpu_device() - return 0; //CPU registered successfully Because the per-cpu variable cpu_sys_devices is set only after the CPU device is regsitered, cpufreq will never be able to get it when cpufreq_add_dev() is called. This patch avoids this failure by making sure device structure of at least CPU0 is available when the cpufreq driver is registered, else return -EPROBE_DEFER. Reported-by: Bjorn Andersson Co-developed-by: Amit Kucheria Signed-off-by: Viresh Kumar Tested-by: Amit Kucheria Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 480e8c13567c..c798a1233e6a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2475,6 +2475,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (cpufreq_disabled()) return -ENODEV; + /* + * The cpufreq core depends heavily on the availability of device + * structure, make sure they are available before proceeding further. + */ + if (!get_cpu_device(0)) + return -EPROBE_DEFER; + if (!driver_data || !driver_data->verify || !driver_data->init || !(driver_data->setpolicy || driver_data->target_index || driver_data->target) || From 061a175c9254cecc45ff0d44a0a2ea138b387fb3 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Fri, 8 Nov 2019 17:00:27 +0800 Subject: [PATCH 1871/3715] x86/crash: Add a forward declaration of struct kimage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 112eee5d06007dae561f14458bde7f2a4879ef4e ] Add a forward declaration of struct kimage to the crash.h header because future changes will invoke a crash-specific function from the realmode init path and the compiler will complain otherwise like this: In file included from arch/x86/realmode/init.c:11: ./arch/x86/include/asm/crash.h:5:32: warning: ‘struct kimage’ declared inside\ parameter list will not be visible outside of this definition or declaration 5 | int crash_load_segments(struct kimage *image); | ^~~~~~ ./arch/x86/include/asm/crash.h:6:37: warning: ‘struct kimage’ declared inside\ parameter list will not be visible outside of this definition or declaration 6 | int crash_copy_backup_region(struct kimage *image); | ^~~~~~ ./arch/x86/include/asm/crash.h:7:39: warning: ‘struct kimage’ declared inside\ parameter list will not be visible outside of this definition or declaration 7 | int crash_setup_memmap_entries(struct kimage *image, | [ bp: Rewrite the commit message. ] Reported-by: kbuild test robot Signed-off-by: Lianbo Jiang Signed-off-by: Borislav Petkov Cc: bhe@redhat.com Cc: d.hatayama@fujitsu.com Cc: dhowells@redhat.com Cc: dyoung@redhat.com Cc: ebiederm@xmission.com Cc: horms@verge.net.au Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jürgen Gross Cc: kexec@lists.infradead.org Cc: Thomas Gleixner Cc: Tom Lendacky Cc: vgoyal@redhat.com Cc: x86-ml Link: https://lkml.kernel.org/r/20191108090027.11082-4-lijiang@redhat.com Link: https://lkml.kernel.org/r/201910310233.EJRtTMWP%25lkp@intel.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/crash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index a7adb2bfbf0b..6b8ad6fa3979 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, From 79bb64337a1d9247d657fe8b1cd9643adfcdf382 Mon Sep 17 00:00:00 2001 From: Wang Xuerui Date: Fri, 15 Nov 2019 09:28:02 +0200 Subject: [PATCH 1872/3715] iwlwifi: mvm: fix unaligned read of rx_pkt_status [ Upstream commit c5aaa8be29b25dfe1731e9a8b19fd91b7b789ee3 ] This is present since the introduction of iwlmvm. Example stack trace on MIPS: [] iwl_mvm_rx_rx_mpdu+0xa8/0xb88 [iwlmvm] [] iwl_pcie_rx_handle+0x420/0xc48 [iwlwifi] Tested with a Wireless AC 7265 for ~6 months, confirmed to fix the problem. No other unaligned accesses are spotted yet. Signed-off-by: Wang Xuerui Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index c73e4be9bde3..c31303d13069 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -62,6 +62,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ +#include #include #include #include "iwl-trans.h" @@ -290,7 +291,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, rx_res = (struct iwl_rx_mpdu_res_start *)pkt->data; hdr = (struct ieee80211_hdr *)(pkt->data + sizeof(*rx_res)); len = le16_to_cpu(rx_res->byte_count); - rx_pkt_status = le32_to_cpup((__le32 *) + rx_pkt_status = get_unaligned_le32((__le32 *) (pkt->data + sizeof(*rx_res) + len)); /* Dont use dev_alloc_skb(), we'll have enough headroom once From c4fd0e76e47195b9096f7334102d5e115cd91feb Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 15 Nov 2019 16:31:22 +0800 Subject: [PATCH 1873/3715] spi: tegra20-slink: add missed clk_unprepare [ Upstream commit 04358e40ba96d687c0811c21d9dede73f5244a98 ] The driver misses calling clk_unprepare in probe failure and remove. Add the calls to fix it. Signed-off-by: Chuhong Yuan Link: https://lore.kernel.org/r/20191115083122.12278-1-hslester96@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra20-slink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 9831c1106945..62b074b167a9 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1078,7 +1078,7 @@ static int tegra_slink_probe(struct platform_device *pdev) ret = clk_enable(tspi->clk); if (ret < 0) { dev_err(&pdev->dev, "Clock enable failed %d\n", ret); - goto exit_free_master; + goto exit_clk_unprepare; } spi_irq = platform_get_irq(pdev, 0); @@ -1151,6 +1151,8 @@ exit_free_irq: free_irq(spi_irq, tspi); exit_clk_disable: clk_disable(tspi->clk); +exit_clk_unprepare: + clk_unprepare(tspi->clk); exit_free_master: spi_master_put(master); return ret; @@ -1164,6 +1166,7 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); clk_disable(tspi->clk); + clk_unprepare(tspi->clk); if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); From 50806c4aa26f941ef665bfd9c2d70a16d8d6e304 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 9 Nov 2019 18:09:27 +0100 Subject: [PATCH 1874/3715] crypto: virtio - deal with unsupported input sizes [ Upstream commit 19c5da7d4a2662e85ea67d2d81df57e038fde3ab ] Return -EINVAL for input sizes that are not a multiple of the AES block size, since they are not supported by our CBC chaining mode. While at it, remove the pr_err() that reports unsupported key sizes being used: we shouldn't spam the kernel log with that. Fixes: dbaf0624ffa5 ("crypto: add virtio-crypto driver") Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Gonglei Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/virtio/virtio_crypto_algs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index 5035b0dc1e40..e2231a1a05a1 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -110,8 +110,6 @@ virtio_crypto_alg_validate_key(int key_len, uint32_t *alg) *alg = VIRTIO_CRYPTO_CIPHER_AES_CBC; break; default: - pr_err("virtio_crypto: Unsupported key length: %d\n", - key_len); return -EINVAL; } return 0; @@ -485,6 +483,11 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; + if (!req->nbytes) + return 0; + if (req->nbytes % AES_BLOCK_SIZE) + return -EINVAL; + vc_req->dataq = data_vq; vc_req->alg_cb = virtio_crypto_dataq_sym_callback; vc_sym_req->ablkcipher_ctx = ctx; @@ -505,6 +508,11 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; + if (!req->nbytes) + return 0; + if (req->nbytes % AES_BLOCK_SIZE) + return -EINVAL; + vc_req->dataq = data_vq; vc_req->alg_cb = virtio_crypto_dataq_sym_callback; vc_sym_req->ablkcipher_ctx = ctx; From 9402dae57784ffc81e0f142647a205b13e700a10 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Fri, 15 Nov 2019 14:44:30 +0100 Subject: [PATCH 1875/3715] mmc: tmio: Add MMC_CAP_ERASE to allow erase/discard/trim requests [ Upstream commit c91843463e9e821dc3b48fe37e3155fa38299f6e ] Isolated initially to renesas_sdhi_internal_dmac [1], Ulf suggested adding MMC_CAP_ERASE to the TMIO mmc core: On Fri, Nov 15, 2019 at 10:27:25AM +0100, Ulf Hansson wrote: -- snip -- This test and due to the discussions with Wolfram and you in this thread, I would actually suggest that you enable MMC_CAP_ERASE for all tmio variants, rather than just for this particular one. In other words, set the cap in tmio_mmc_host_probe() should be fine, as it seems none of the tmio variants supports HW busy detection at this point. -- snip -- Testing on R-Car H3ULCB-KF doesn't reveal any issues (v5.4-rc7): root@rcar-gen3:~# lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT mmcblk0 179:0 0 59.2G 0 disk <--- eMMC mmcblk0boot0 179:8 0 4M 1 disk mmcblk0boot1 179:16 0 4M 1 disk mmcblk1 179:24 0 30G 0 disk <--- SD card root@rcar-gen3:~# time blkdiscard /dev/mmcblk0 real 0m8.659s user 0m0.001s sys 0m1.920s root@rcar-gen3:~# time blkdiscard /dev/mmcblk1 real 0m1.176s user 0m0.001s sys 0m0.124s [1] https://lore.kernel.org/linux-renesas-soc/20191112134808.23546-1-erosca@de.adit-jv.com/ Cc: Wolfram Sang Cc: Masahiro Yamada Cc: Andrew Gabbasov Originally-by: Harish Jenny K N Suggested-by: Ulf Hansson Signed-off-by: Eugeniu Rosca Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/tmio_mmc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2fd862dc9770..a09aad9155a5 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1220,7 +1220,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, _host->start_signal_voltage_switch; mmc->ops = &tmio_mmc_ops; - mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; + mmc->caps |= MMC_CAP_ERASE | MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; mmc->max_segs = pdata->max_segs ? : 32; mmc->max_blk_size = 512; From 9e5ae20bb9b5e37d9ec07fe7933e14b4bc19f75f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Sep 2019 11:30:54 -0700 Subject: [PATCH 1876/3715] btrfs: don't prematurely free work in end_workqueue_fn() [ Upstream commit 9be490f1e15c34193b1aae17da58e14dd9f55a95 ] Currently, end_workqueue_fn() frees the end_io_wq entry (which embeds the work item) and then calls bio_endio(). This is another potential instance of the bug in "btrfs: don't prematurely free work in run_ordered_work()". In particular, the endio call may depend on other work items. For example, btrfs_end_dio_bio() can call btrfs_subio_endio_read() -> __btrfs_correct_data_nocsum() -> dio_read_error() -> submit_dio_repair_bio(), which submits a bio that is also completed through a end_workqueue_fn() work item. However, __btrfs_correct_data_nocsum() waits for the newly submitted bio to complete, thus it depends on another work item. This example currently usually works because we use different workqueue helper functions for BTRFS_WQ_ENDIO_DATA and BTRFS_WQ_ENDIO_DIO_REPAIR. However, it may deadlock with stacked filesystems and is fragile overall. The proper fix is to free the work item at the very end of the work function, so let's do that. Reviewed-by: Johannes Thumshirn Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 813834552aa1..a8ea56218d6b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1679,8 +1679,8 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_status = end_io_wq->status; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; - kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); bio_endio(bio); + kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); } static int cleaner_kthread(void *arg) From 6d52fb75cd543ae3f5ff443294ce3bef56bb12fe Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Sep 2019 11:30:53 -0700 Subject: [PATCH 1877/3715] btrfs: don't prematurely free work in run_ordered_work() [ Upstream commit c495dcd6fbe1dce51811a76bb85b4675f6494938 ] We hit the following very strange deadlock on a system with Btrfs on a loop device backed by another Btrfs filesystem: 1. The top (loop device) filesystem queues an async_cow work item from cow_file_range_async(). We'll call this work X. 2. Worker thread A starts work X (normal_work_helper()). 3. Worker thread A executes the ordered work for the top filesystem (run_ordered_work()). 4. Worker thread A finishes the ordered work for work X and frees X (work->ordered_free()). 5. Worker thread A executes another ordered work and gets blocked on I/O to the bottom filesystem (still in run_ordered_work()). 6. Meanwhile, the bottom filesystem allocates and queues an async_cow work item which happens to be the recently-freed X. 7. The workqueue code sees that X is already being executed by worker thread A, so it schedules X to be executed _after_ worker thread A finishes (see the find_worker_executing_work() call in process_one_work()). Now, the top filesystem is waiting for I/O on the bottom filesystem, but the bottom filesystem is waiting for the top filesystem to finish, so we deadlock. This happens because we are breaking the workqueue assumption that a work item cannot be recycled while it still depends on other work. Fix it by waiting to free the work item until we are done with all of the related ordered work. P.S.: One might ask why the workqueue code doesn't try to detect a recycled work item. It actually does try by checking whether the work item has the same work function (find_worker_executing_work()), but in our case the function is the same. This is the only key that the workqueue code has available to compare, short of adding an additional, layer-violating "custom key". Considering that we're the only ones that have ever hit this, we should just play by the rules. Unfortunately, we haven't been able to create a minimal reproducer other than our full container setup using a compress-force=zstd filesystem on top of another compress-force=zstd filesystem. Suggested-by: Tejun Heo Reviewed-by: Johannes Thumshirn Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/async-thread.c | 56 ++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index e00c8a9fd5bb..72d7589072f5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -265,16 +265,17 @@ out: } } -static void run_ordered_work(struct __btrfs_workqueue *wq) +static void run_ordered_work(struct __btrfs_workqueue *wq, + struct btrfs_work *self) { struct list_head *list = &wq->ordered_list; struct btrfs_work *work; spinlock_t *lock = &wq->list_lock; unsigned long flags; + void *wtag; + bool free_self = false; while (1) { - void *wtag; - spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -300,16 +301,47 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) list_del(&work->ordered_list); spin_unlock_irqrestore(lock, flags); - /* - * We don't want to call the ordered free functions with the - * lock held though. Save the work as tag for the trace event, - * because the callback could free the structure. - */ - wtag = work; - work->ordered_free(work); - trace_btrfs_all_work_done(wq->fs_info, wtag); + if (work == self) { + /* + * This is the work item that the worker is currently + * executing. + * + * The kernel workqueue code guarantees non-reentrancy + * of work items. I.e., if a work item with the same + * address and work function is queued twice, the second + * execution is blocked until the first one finishes. A + * work item may be freed and recycled with the same + * work function; the workqueue code assumes that the + * original work item cannot depend on the recycled work + * item in that case (see find_worker_executing_work()). + * + * Note that the work of one Btrfs filesystem may depend + * on the work of another Btrfs filesystem via, e.g., a + * loop device. Therefore, we must not allow the current + * work item to be recycled until we are really done, + * otherwise we break the above assumption and can + * deadlock. + */ + free_self = true; + } else { + /* + * We don't want to call the ordered free functions with + * the lock held though. Save the work as tag for the + * trace event, because the callback could free the + * structure. + */ + wtag = work; + work->ordered_free(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); + } } spin_unlock_irqrestore(lock, flags); + + if (free_self) { + wtag = self; + self->ordered_free(self); + trace_btrfs_all_work_done(wq->fs_info, wtag); + } } static void normal_work_helper(struct btrfs_work *work) @@ -337,7 +369,7 @@ static void normal_work_helper(struct btrfs_work *work) work->func(work); if (need_order) { set_bit(WORK_DONE_BIT, &work->flags); - run_ordered_work(wq); + run_ordered_work(wq, work); } if (!need_order) trace_btrfs_all_work_done(wq->fs_info, wtag); From d9d790232151e21e5871ca41134f6eecc6eaf514 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 18 Nov 2019 10:48:48 +0800 Subject: [PATCH 1878/3715] spi: st-ssc4: add missed pm_runtime_disable [ Upstream commit cd050abeba2a95fe5374eec28ad2244617bcbab6 ] The driver forgets to call pm_runtime_disable in probe failure and remove. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Link: https://lore.kernel.org/r/20191118024848.21645-1-hslester96@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-st-ssc4.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index a4e43fc19ece..5df01ffdef46 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -385,6 +385,7 @@ static int spi_st_probe(struct platform_device *pdev) return 0; clk_disable: + pm_runtime_disable(&pdev->dev); clk_disable_unprepare(spi_st->clk); put_master: spi_master_put(master); @@ -396,6 +397,8 @@ static int spi_st_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct spi_st *spi_st = spi_master_get_devdata(master); + pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(spi_st->clk); pinctrl_pm_select_sleep_state(&pdev->dev); From 21f32d7121560ed0c79fb6887682e731b9839161 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 15:54:47 +0200 Subject: [PATCH 1879/3715] x86/insn: Add some Intel instructions to the opcode map [ Upstream commit b980be189c9badba50634671e2303e92bf28e35a ] Add to the opcode map the following instructions: cldemote tpause umonitor umwait movdiri movdir64b enqcmd enqcmds encls enclu enclv pconfig wbnoinvd For information about the instructions, refer Intel SDM May 2019 (325462-070US) and Intel Architecture Instruction Set Extensions May 2019 (319433-037). The instruction decoding can be tested using the perf tools' "x86 instruction decoder - new instructions" test as folllows: $ perf test -v "new " 2>&1 | grep -i cldemote Decoded ok: 0f 1c 00 cldemote (%eax) Decoded ok: 0f 1c 05 78 56 34 12 cldemote 0x12345678 Decoded ok: 0f 1c 84 c8 78 56 34 12 cldemote 0x12345678(%eax,%ecx,8) Decoded ok: 0f 1c 00 cldemote (%rax) Decoded ok: 41 0f 1c 00 cldemote (%r8) Decoded ok: 0f 1c 04 25 78 56 34 12 cldemote 0x12345678 Decoded ok: 0f 1c 84 c8 78 56 34 12 cldemote 0x12345678(%rax,%rcx,8) Decoded ok: 41 0f 1c 84 c8 78 56 34 12 cldemote 0x12345678(%r8,%rcx,8) $ perf test -v "new " 2>&1 | grep -i tpause Decoded ok: 66 0f ae f3 tpause %ebx Decoded ok: 66 0f ae f3 tpause %ebx Decoded ok: 66 41 0f ae f0 tpause %r8d $ perf test -v "new " 2>&1 | grep -i umonitor Decoded ok: 67 f3 0f ae f0 umonitor %ax Decoded ok: f3 0f ae f0 umonitor %eax Decoded ok: 67 f3 0f ae f0 umonitor %eax Decoded ok: f3 0f ae f0 umonitor %rax Decoded ok: 67 f3 41 0f ae f0 umonitor %r8d $ perf test -v "new " 2>&1 | grep -i umwait Decoded ok: f2 0f ae f0 umwait %eax Decoded ok: f2 0f ae f0 umwait %eax Decoded ok: f2 41 0f ae f0 umwait %r8d $ perf test -v "new " 2>&1 | grep -i movdiri Decoded ok: 0f 38 f9 03 movdiri %eax,(%ebx) Decoded ok: 0f 38 f9 88 78 56 34 12 movdiri %ecx,0x12345678(%eax) Decoded ok: 48 0f 38 f9 03 movdiri %rax,(%rbx) Decoded ok: 48 0f 38 f9 88 78 56 34 12 movdiri %rcx,0x12345678(%rax) $ perf test -v "new " 2>&1 | grep -i movdir64b Decoded ok: 66 0f 38 f8 18 movdir64b (%eax),%ebx Decoded ok: 66 0f 38 f8 88 78 56 34 12 movdir64b 0x12345678(%eax),%ecx Decoded ok: 67 66 0f 38 f8 1c movdir64b (%si),%bx Decoded ok: 67 66 0f 38 f8 8c 34 12 movdir64b 0x1234(%si),%cx Decoded ok: 66 0f 38 f8 18 movdir64b (%rax),%rbx Decoded ok: 66 0f 38 f8 88 78 56 34 12 movdir64b 0x12345678(%rax),%rcx Decoded ok: 67 66 0f 38 f8 18 movdir64b (%eax),%ebx Decoded ok: 67 66 0f 38 f8 88 78 56 34 12 movdir64b 0x12345678(%eax),%ecx $ perf test -v "new " 2>&1 | grep -i enqcmd Decoded ok: f2 0f 38 f8 18 enqcmd (%eax),%ebx Decoded ok: f2 0f 38 f8 88 78 56 34 12 enqcmd 0x12345678(%eax),%ecx Decoded ok: 67 f2 0f 38 f8 1c enqcmd (%si),%bx Decoded ok: 67 f2 0f 38 f8 8c 34 12 enqcmd 0x1234(%si),%cx Decoded ok: f3 0f 38 f8 18 enqcmds (%eax),%ebx Decoded ok: f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%eax),%ecx Decoded ok: 67 f3 0f 38 f8 1c enqcmds (%si),%bx Decoded ok: 67 f3 0f 38 f8 8c 34 12 enqcmds 0x1234(%si),%cx Decoded ok: f2 0f 38 f8 18 enqcmd (%rax),%rbx Decoded ok: f2 0f 38 f8 88 78 56 34 12 enqcmd 0x12345678(%rax),%rcx Decoded ok: 67 f2 0f 38 f8 18 enqcmd (%eax),%ebx Decoded ok: 67 f2 0f 38 f8 88 78 56 34 12 enqcmd 0x12345678(%eax),%ecx Decoded ok: f3 0f 38 f8 18 enqcmds (%rax),%rbx Decoded ok: f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%rax),%rcx Decoded ok: 67 f3 0f 38 f8 18 enqcmds (%eax),%ebx Decoded ok: 67 f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%eax),%ecx $ perf test -v "new " 2>&1 | grep -i enqcmds Decoded ok: f3 0f 38 f8 18 enqcmds (%eax),%ebx Decoded ok: f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%eax),%ecx Decoded ok: 67 f3 0f 38 f8 1c enqcmds (%si),%bx Decoded ok: 67 f3 0f 38 f8 8c 34 12 enqcmds 0x1234(%si),%cx Decoded ok: f3 0f 38 f8 18 enqcmds (%rax),%rbx Decoded ok: f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%rax),%rcx Decoded ok: 67 f3 0f 38 f8 18 enqcmds (%eax),%ebx Decoded ok: 67 f3 0f 38 f8 88 78 56 34 12 enqcmds 0x12345678(%eax),%ecx $ perf test -v "new " 2>&1 | grep -i encls Decoded ok: 0f 01 cf encls Decoded ok: 0f 01 cf encls $ perf test -v "new " 2>&1 | grep -i enclu Decoded ok: 0f 01 d7 enclu Decoded ok: 0f 01 d7 enclu $ perf test -v "new " 2>&1 | grep -i enclv Decoded ok: 0f 01 c0 enclv Decoded ok: 0f 01 c0 enclv $ perf test -v "new " 2>&1 | grep -i pconfig Decoded ok: 0f 01 c5 pconfig Decoded ok: 0f 01 c5 pconfig $ perf test -v "new " 2>&1 | grep -i wbnoinvd Decoded ok: f3 0f 09 wbnoinvd Decoded ok: f3 0f 09 wbnoinvd Signed-off-by: Adrian Hunter Reviewed-by: Andi Kleen Acked-by: Masami Hiramatsu Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20191115135447.6519-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- arch/x86/lib/x86-opcode-map.txt | 18 ++++++++++++------ tools/objtool/arch/x86/lib/x86-opcode-map.txt | 18 ++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e0b85930dd77..0a0e9112f284 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -333,7 +333,7 @@ AVXcode: 1 06: CLTS 07: SYSRET (o64) 08: INVD -09: WBINVD +09: WBINVD | WBNOINVD (F3) 0a: 0b: UD2 (1B) 0c: @@ -364,7 +364,7 @@ AVXcode: 1 # a ModR/M byte. 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -1c: +1c: Grp20 (1A),(1C) 1d: 1e: 1f: NOP Ev @@ -792,6 +792,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f9: MOVDIRI My,Gy EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -943,9 +945,9 @@ GrpTable: Grp6 EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: rdpkru (110),(11B) | wrpkru (111),(11B) @@ -1020,7 +1022,7 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1051,6 +1053,10 @@ GrpTable: Grp19 6: vscatterpf1qps/d Wx (66),(ev) EndTable +GrpTable: Grp20 +0: cldemote Mb +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt index e0b85930dd77..0a0e9112f284 100644 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt @@ -333,7 +333,7 @@ AVXcode: 1 06: CLTS 07: SYSRET (o64) 08: INVD -09: WBINVD +09: WBINVD | WBNOINVD (F3) 0a: 0b: UD2 (1B) 0c: @@ -364,7 +364,7 @@ AVXcode: 1 # a ModR/M byte. 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -1c: +1c: Grp20 (1A),(1C) 1d: 1e: 1f: NOP Ev @@ -792,6 +792,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f9: MOVDIRI My,Gy EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -943,9 +945,9 @@ GrpTable: Grp6 EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: rdpkru (110),(11B) | wrpkru (111),(11B) @@ -1020,7 +1022,7 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1051,6 +1053,10 @@ GrpTable: Grp19 6: vscatterpf1qps/d Wx (66),(ev) EndTable +GrpTable: Grp20 +0: cldemote Mb +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH From 7a7f55f4cff4c0723b00e9ac11a30ce407f71f53 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Nov 2019 14:50:32 +0100 Subject: [PATCH 1880/3715] iwlwifi: check kasprintf() return value [ Upstream commit 5974fbb5e10b018fdbe3c3b81cb4cc54e1105ab9 ] kasprintf() can fail, we should check the return value. Fixes: 5ed540aecc2a ("iwlwifi: use mac80211 throughput trigger") Fixes: 8ca151b568b6 ("iwlwifi: add the MVM driver") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/dvm/led.c | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/led.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/led.c b/drivers/net/wireless/intel/iwlwifi/dvm/led.c index 1bbd17ada974..20e16c423990 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/led.c @@ -185,6 +185,9 @@ void iwl_leds_init(struct iwl_priv *priv) priv->led.name = kasprintf(GFP_KERNEL, "%s-led", wiphy_name(priv->hw->wiphy)); + if (!priv->led.name) + return; + priv->led.brightness_set = iwl_led_brightness_set; priv->led.blink_set = iwl_led_blink_set; priv->led.max_brightness = 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/led.c b/drivers/net/wireless/intel/iwlwifi/mvm/led.c index b27269504a62..072f80c90ce4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/led.c @@ -131,6 +131,9 @@ int iwl_mvm_leds_init(struct iwl_mvm *mvm) mvm->led.name = kasprintf(GFP_KERNEL, "%s-led", wiphy_name(mvm->hw->wiphy)); + if (!mvm->led.name) + return -ENOMEM; + mvm->led.brightness_set = iwl_led_brightness_set; mvm->led.max_brightness = 1; From 5b769b8ee1960b9eeb1487f813ff107ad3a02790 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Nov 2019 11:57:12 +0200 Subject: [PATCH 1881/3715] fbtft: Make sure string is NULL terminated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 21f585480deb4bcf0d92b08879c35d066dfee030 ] New GCC warns about inappropriate use of strncpy(): drivers/staging/fbtft/fbtft-core.c: In function ‘fbtft_framebuffer_alloc’: drivers/staging/fbtft/fbtft-core.c:665:2: warning: ‘strncpy’ specified bound 16 equals destination size [-Wstringop-truncation] 665 | strncpy(info->fix.id, dev->driver->name, 16); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Later on the copy is being used with the assumption to be NULL terminated. Make sure string is NULL terminated by switching to snprintf(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20191120095716.26628-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/fbtft/fbtft-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 0cbcbad8f074..b81c6dfa5b24 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -780,7 +780,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, fbdefio->deferred_io = fbtft_deferred_io; fb_deferred_io_init(info); - strncpy(info->fix.id, dev->driver->name, 16); + snprintf(info->fix.id, sizeof(info->fix.id), "%s", dev->driver->name); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_TRUECOLOR; info->fix.xpanstep = 0; From 08f433fca0056563812a8d5ff436d9a629e2029f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Nov 2019 10:38:34 +0800 Subject: [PATCH 1882/3715] crypto: sun4i-ss - Fix 64-bit size_t warnings [ Upstream commit d6e9da21ee8246b5e556b3b153401ab045adb986 ] If you try to compile this driver on a 64-bit platform then you will get warnings because it mixes size_t with unsigned int which only works on 32-bit. This patch fixes all of the warnings. Signed-off-by: Herbert Xu Acked-by: Corentin Labbe Tested-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/sunxi-ss/sun4i-ss-cipher.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index 5cf64746731a..22e491857925 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -81,7 +81,8 @@ static int sun4i_ss_opti_poll(struct skcipher_request *areq) oi = 0; oo = 0; do { - todo = min3(rx_cnt, ileft, (mi.length - oi) / 4); + todo = min(rx_cnt, ileft); + todo = min_t(size_t, todo, (mi.length - oi) / 4); if (todo) { ileft -= todo; writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); @@ -96,7 +97,8 @@ static int sun4i_ss_opti_poll(struct skcipher_request *areq) rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - todo = min3(tx_cnt, oleft, (mo.length - oo) / 4); + todo = min(tx_cnt, oleft); + todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { oleft -= todo; readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); @@ -220,7 +222,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * todo is the number of consecutive 4byte word that we * can read from current SG */ - todo = min3(rx_cnt, ileft / 4, (mi.length - oi) / 4); + todo = min(rx_cnt, ileft / 4); + todo = min_t(size_t, todo, (mi.length - oi) / 4); if (todo && !ob) { writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); @@ -234,8 +237,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * we need to be able to write all buf in one * pass, so it is why we min() with rx_cnt */ - todo = min3(rx_cnt * 4 - ob, ileft, - mi.length - oi); + todo = min(rx_cnt * 4 - ob, ileft); + todo = min_t(size_t, todo, mi.length - oi); memcpy(buf + ob, mi.addr + oi, todo); ileft -= todo; oi += todo; @@ -255,7 +258,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - dev_dbg(ss->dev, "%x %u/%u %u/%u cnt=%u %u/%u %u/%u cnt=%u %u\n", + dev_dbg(ss->dev, + "%x %u/%zu %u/%u cnt=%u %u/%zu %u/%u cnt=%u %u\n", mode, oi, mi.length, ileft, areq->cryptlen, rx_cnt, oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob); @@ -263,7 +267,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) if (!tx_cnt) continue; /* todo in 4bytes word */ - todo = min3(tx_cnt, oleft / 4, (mo.length - oo) / 4); + todo = min(tx_cnt, oleft / 4); + todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); oleft -= todo * 4; @@ -287,7 +292,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * no more than remaining buffer * no need to test against oleft */ - todo = min(mo.length - oo, obl - obo); + todo = min_t(size_t, + mo.length - oo, obl - obo); memcpy(mo.addr + oo, bufo + obo, todo); oleft -= todo; obo += todo; From cb40551504fd78fccbb32efa3c51ce043f9b7d8d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 14 Nov 2019 11:49:06 +0100 Subject: [PATCH 1883/3715] crypto: sun4i-ss - Fix 64-bit size_t warnings on sun4i-ss-hash.c [ Upstream commit a7126603d46fe8f01aeedf589e071c6aaa6c6c39 ] If you try to compile this driver on a 64-bit platform then you will get warnings because it mixes size_t with unsigned int which only works on 32-bit. This patch fixes all of the warnings on sun4i-ss-hash.c. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/sunxi-ss/sun4i-ss-hash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c index f6936bb3b7be..1a724263761b 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c @@ -276,8 +276,8 @@ static int sun4i_hash(struct ahash_request *areq) */ while (op->len < 64 && i < end) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, end - i, - 64 - op->len); + in_r = min(end - i, 64 - op->len); + in_r = min_t(size_t, mi.length - in_i, in_r); memcpy(op->buf + op->len, mi.addr + in_i, in_r); op->len += in_r; i += in_r; @@ -297,8 +297,8 @@ static int sun4i_hash(struct ahash_request *areq) } if (mi.length - in_i > 3 && i < end) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, areq->nbytes - i, - ((mi.length - in_i) / 4) * 4); + in_r = min_t(size_t, mi.length - in_i, areq->nbytes - i); + in_r = min_t(size_t, ((mi.length - in_i) / 4) * 4, in_r); /* how many bytes we can write in the device*/ todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4); writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo); @@ -324,8 +324,8 @@ static int sun4i_hash(struct ahash_request *areq) if ((areq->nbytes - i) < 64) { while (i < areq->nbytes && in_i < mi.length && op->len < 64) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, areq->nbytes - i, - 64 - op->len); + in_r = min(areq->nbytes - i, 64 - op->len); + in_r = min_t(size_t, mi.length - in_i, in_r); memcpy(op->buf + op->len, mi.addr + in_i, in_r); op->len += in_r; i += in_r; From 387053b4b4367c46f685b0d4f6d4b02be86f498a Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 18 Nov 2019 21:35:38 -0800 Subject: [PATCH 1884/3715] mac80211: consider QoS Null frames for STA_NULLFUNC_ACKED [ Upstream commit 08a5bdde3812993cb8eb7aa9124703df0de28e4b ] Commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") let STAs send QoS Null frames as PS triggers if the AP was a QoS STA. However, the mac80211 PS stack relies on an interface flag IEEE80211_STA_NULLFUNC_ACKED for determining trigger frame ACK, which was not being set for acked non-QoS Null frames. The effect is an inability to trigger hardware sleep via IEEE80211_CONF_PS since the QoS Null frame was seemingly never acked. This bug only applies to drivers which set both IEEE80211_HW_REPORTS_TX_ACK_STATUS and IEEE80211_HW_PS_NULLFUNC_STACK. Detect the acked QoS Null frame to restore STA power save. Fixes: 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20191119053538.25979-4-thomas@adapt-ip.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b18466cf466c..fbe7354aeac7 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -856,7 +856,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && + if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && local->ps_sdata && !(local->scanning)) { From e46523a24db0a8c48a072c6f75184f8ff4b222ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 20 Nov 2019 22:27:38 +1100 Subject: [PATCH 1885/3715] crypto: vmx - Avoid weird build failures [ Upstream commit 4ee812f6143d78d8ba1399671d78c8d78bf2817c ] In the vmx crypto Makefile we assign to a variable called TARGET and pass that to the aesp8-ppc.pl and ghashp8-ppc.pl scripts. The variable is meant to describe what flavour of powerpc we're building for, eg. either 32 or 64-bit, and big or little endian. Unfortunately TARGET is a fairly common name for a make variable, and if it happens that TARGET is specified as a command line parameter to make, the value specified on the command line will override our value. In particular this can happen if the kernel Makefile is driven by an external Makefile that uses TARGET for something. This leads to weird build failures, eg: nonsense at /build/linux/drivers/crypto/vmx/ghashp8-ppc.pl line 45. /linux/drivers/crypto/vmx/Makefile:20: recipe for target 'drivers/crypto/vmx/ghashp8-ppc.S' failed Which shows that we passed an empty value for $(TARGET) to the perl script, confirmed with make V=1: perl /linux/drivers/crypto/vmx/ghashp8-ppc.pl > drivers/crypto/vmx/ghashp8-ppc.S We can avoid this confusion by using override, to tell make that we don't want anything to override our variable, even a value specified on the command line. We can also use a less common name, given the script calls it "flavour", let's use that. Signed-off-by: Michael Ellerman Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/vmx/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index cab32cfec9c4..709670d2b553 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -3,13 +3,13 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -TARGET := linux-ppc64le +override flavour := linux-ppc64le else -TARGET := linux-ppc64 +override flavour := linux-ppc64 endif quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) $(TARGET) > $(@) + cmd_perl = $(PERL) $(<) $(flavour) > $(@) targets += aesp8-ppc.S ghashp8-ppc.S From 17dae5b25057781e09866535d50ed8d30a405d58 Mon Sep 17 00:00:00 2001 From: Hewenliang Date: Mon, 18 Nov 2019 20:44:15 -0500 Subject: [PATCH 1886/3715] libtraceevent: Fix memory leakage in copy_filter_type [ Upstream commit 10992af6bf46a2048ad964985a5b77464e5563b1 ] It is necessary to free the memory that we have allocated when error occurs. Fixes: ef3072cd1d5c ("tools lib traceevent: Get rid of die in add_filter_type()") Signed-off-by: Hewenliang Reviewed-by: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov Link: http://lore.kernel.org/lkml/20191119014415.57210-1-hewenliang4@huawei.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/traceevent/parse-filter.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 5e10ba796a6f..569bceff5f51 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1492,8 +1492,10 @@ static int copy_filter_type(struct event_filter *filter, if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { /* Add trivial event */ arg = allocate_arg(); - if (arg == NULL) + if (arg == NULL) { + free(str); return -1; + } arg->type = FILTER_ARG_BOOLEAN; if (strcmp(str, "TRUE") == 0) @@ -1502,8 +1504,11 @@ static int copy_filter_type(struct event_filter *filter, arg->boolean.value = 0; filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) + if (filter_type == NULL) { + free(str); + free_arg(arg); return -1; + } filter_type->filter = arg; From b4f2c6e4cfff4b987797cbda02884364c695c349 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Nov 2019 18:21:31 +0200 Subject: [PATCH 1887/3715] mips: fix build when "48 bits virtual memory" is enabled [ Upstream commit 3ed6751bb8fa89c3014399bb0414348499ee202a ] With CONFIG_MIPS_VA_BITS_48=y the build fails miserably: CC arch/mips/kernel/asm-offsets.s In file included from arch/mips/include/asm/pgtable.h:644, from include/linux/mm.h:99, from arch/mips/kernel/asm-offsets.c:15: include/asm-generic/pgtable.h:16:2: error: #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED ^~~~~ include/asm-generic/pgtable.h:390:28: error: unknown type name 'p4d_t'; did you mean 'pmd_t'? static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b) ^~~~~ pmd_t [ ... more such errors ... ] scripts/Makefile.build:99: recipe for target 'arch/mips/kernel/asm-offsets.s' failed make[2]: *** [arch/mips/kernel/asm-offsets.s] Error 1 This happens because when CONFIG_MIPS_VA_BITS_48 enables 4th level of the page tables, but neither pgtable-nop4d.h nor 5level-fixup.h are included to cope with the 5th level. Replace #ifdef conditions around includes of the pgtable-nop{m,u}d.h with explicit CONFIG_PGTABLE_LEVELS and add include of 5level-fixup.h for the case when CONFIG_PGTABLE_LEVELS==4 Signed-off-by: Mike Rapoport Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: Mike Rapoport Signed-off-by: Sasha Levin --- arch/mips/include/asm/pgtable-64.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index a2252c2a9ded..d0b9912fb63f 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -18,10 +18,12 @@ #include #define __ARCH_USE_5LEVEL_HACK -#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) +#if CONFIG_PGTABLE_LEVELS == 2 #include -#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48)) +#elif CONFIG_PGTABLE_LEVELS == 3 #include +#else +#include #endif /* @@ -222,6 +224,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) return pgd_val(pgd); } +#define pgd_phys(pgd) virt_to_phys((void *)pgd_val(pgd)) +#define pgd_page(pgd) (pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT)) + static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); From 8ce23510d754b75b8809c88e29cd42753ce10359 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 22 Nov 2019 15:23:23 +0000 Subject: [PATCH 1888/3715] net: phy: initialise phydev speed and duplex sanely [ Upstream commit a5d66f810061e2dd70fb7a108dcd14e535bc639f ] When a phydev is created, the speed and duplex are set to zero and -1 respectively, rather than using the predefined SPEED_UNKNOWN and DUPLEX_UNKNOWN constants. There is a window at initialisation time where we may report link down using the 0/-1 values. Tidy this up and use the predefined constants, so debug doesn't complain with: "Unsupported (update phy-core.c)/Unsupported (update phy-core.c)" when the speed and duplex settings are printed. Signed-off-by: Russell King Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ed7e3c70b511..a98c227a4c2e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -367,8 +367,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->device_free = phy_mdio_device_free; mdiodev->device_remove = phy_mdio_device_remove; - dev->speed = 0; - dev->duplex = -1; + dev->speed = SPEED_UNKNOWN; + dev->duplex = DUPLEX_UNKNOWN; dev->pause = 0; dev->asym_pause = 0; dev->link = 1; From 4a27508240821ed75cd504cfba0abda1fcab28a1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Sep 2019 11:30:55 -0700 Subject: [PATCH 1889/3715] btrfs: don't prematurely free work in reada_start_machine_worker() [ Upstream commit e732fe95e4cad35fc1df278c23a32903341b08b3 ] Currently, reada_start_machine_worker() frees the reada_machine_work and then calls __reada_start_machine() to do readahead. This is another potential instance of the bug in "btrfs: don't prematurely free work in run_ordered_work()". There _might_ already be a deadlock here: reada_start_machine_worker() can depend on itself through stacked filesystems (__read_start_machine() -> reada_start_machine_dev() -> reada_tree_block_flagged() -> read_extent_buffer_pages() -> submit_one_bio() -> btree_submit_bio_hook() -> btrfs_map_bio() -> submit_stripe_bio() -> submit_bio() onto a loop device can trigger readahead on the lower filesystem). Either way, let's fix it by freeing the work at the end. Reviewed-by: Johannes Thumshirn Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/reada.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 3a4e15b39cc1..440c0d5d2050 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -734,21 +734,19 @@ static int reada_start_machine_dev(struct btrfs_device *dev) static void reada_start_machine_worker(struct btrfs_work *work) { struct reada_machine_work *rmw; - struct btrfs_fs_info *fs_info; int old_ioprio; rmw = container_of(work, struct reada_machine_work, work); - fs_info = rmw->fs_info; - - kfree(rmw); old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), task_nice_ioprio(current)); set_task_ioprio(current, BTRFS_IOPRIO_READA); - __reada_start_machine(fs_info); + __reada_start_machine(rmw->fs_info); set_task_ioprio(current, old_ioprio); - atomic_dec(&fs_info->reada_works_cnt); + atomic_dec(&rmw->fs_info->reada_works_cnt); + + kfree(rmw); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) From 7cfef55ff0193d9a47d558b92038a1a2fc78cb98 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Sep 2019 11:30:56 -0700 Subject: [PATCH 1890/3715] btrfs: don't prematurely free work in scrub_missing_raid56_worker() [ Upstream commit 57d4f0b863272ba04ba85f86bfdc0f976f0af91c ] Currently, scrub_missing_raid56_worker() puts and potentially frees sblock (which embeds the work item) and then submits a bio through scrub_wr_submit(). This is another potential instance of the bug in "btrfs: don't prematurely free work in run_ordered_work()". Fix it by dropping the reference after we submit the bio. Reviewed-by: Johannes Thumshirn Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/scrub.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 61192c536e6c..2ebae9773978 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2421,14 +2421,13 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) scrub_write_block_to_dev_replace(sblock); } - scrub_block_put(sblock); - if (sctx->is_dev_replace && sctx->flush_all_writes) { mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); mutex_unlock(&sctx->wr_lock); } + scrub_block_put(sblock); scrub_pending_bio_dec(sctx); } From 75bfb048437fba5dd6cfc2535c53ff50197c3b1a Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Thu, 28 Nov 2019 16:34:22 +0530 Subject: [PATCH 1891/3715] Revert "mmc: sdhci: Fix incorrect switch to HS mode" commit 07bcc411567cb96f9d1fc84fff8d387118a2920d upstream. This reverts commit c894e33ddc1910e14d6f2a2016f60ab613fd8b37. This commit aims to treat SD High speed and SDR25 as the same while setting UHS Timings in HOST_CONTROL2 which leads to failures with some SD cards in AM65x. Revert this commit. The issue this commit was trying to fix can be implemented in a platform specific callback instead of common sdhci code. Cc: Signed-off-by: Faiz Abbas Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20191128110422.25917-1-faiz_abbas@ti.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 5807028c8309..9b6660dd8a01 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1635,9 +1635,7 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) ctrl_2 |= SDHCI_CTRL_UHS_SDR104; else if (timing == MMC_TIMING_UHS_SDR12) ctrl_2 |= SDHCI_CTRL_UHS_SDR12; - else if (timing == MMC_TIMING_SD_HS || - timing == MMC_TIMING_MMC_HS || - timing == MMC_TIMING_UHS_SDR25) + else if (timing == MMC_TIMING_UHS_SDR25) ctrl_2 |= SDHCI_CTRL_UHS_SDR25; else if (timing == MMC_TIMING_UHS_SDR50) ctrl_2 |= SDHCI_CTRL_UHS_SDR50; From 8bcc8514f821b27652d2019e0ae82f6482be56c7 Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Wed, 4 Dec 2019 15:19:58 +0800 Subject: [PATCH 1892/3715] mmc: mediatek: fix CMD_TA to 2 for MT8173 HS200/HS400 mode commit 8f34e5bd7024d1ffebddd82d7318b1be17be9e9a upstream. there is a chance that always get response CRC error after HS200 tuning, the reason is that need set CMD_TA to 2. this modification is only for MT8173. Signed-off-by: Chaotian Jing Tested-by: Hsin-Yi Wang Cc: stable@vger.kernel.org Fixes: 1ede5cb88a29 ("mmc: mediatek: Use data tune for CMD line tune") Link: https://lore.kernel.org/r/20191204071958.18553-1-chaotian.jing@mediatek.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mtk-sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index a2ac9938d945..1a5d5c40324b 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -212,6 +212,8 @@ #define MSDC_PATCH_BIT_SPCPUSH (0x1 << 29) /* RW */ #define MSDC_PATCH_BIT_DECRCTMO (0x1 << 30) /* RW */ +#define MSDC_PATCH_BIT1_CMDTA (0x7 << 3) /* RW */ + #define MSDC_PAD_TUNE_DATWRDLY (0x1f << 0) /* RW */ #define MSDC_PAD_TUNE_DATRRDLY (0x1f << 8) /* RW */ #define MSDC_PAD_TUNE_CMDRDLY (0x1f << 16) /* RW */ @@ -1442,6 +1444,7 @@ static int hs400_tune_response(struct mmc_host *mmc, u32 opcode) /* select EMMC50 PAD CMD tune */ sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0)); + sdr_set_field(host->base + MSDC_PATCH_BIT1, MSDC_PATCH_BIT1_CMDTA, 2); if (mmc->ios.timing == MMC_TIMING_MMC_HS200 || mmc->ios.timing == MMC_TIMING_UHS_SDR104) From 031fbffac8ad65041b7308fd3b12f05471a980bc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 17 Dec 2019 17:19:11 -0800 Subject: [PATCH 1893/3715] usb: xhci: Fix build warning seen with CONFIG_PM=n [ Upstream commit 6056a0f8ede27b296d10ef46f7f677cc9d715371 ] The following build warning is seen if CONFIG_PM is disabled. drivers/usb/host/xhci-pci.c:498:13: warning: unused function 'xhci_pci_shutdown' Fixes: f2c710f7dca8 ("usb: xhci: only set D3hot for pci device") Cc: Henry Lin Cc: stable@vger.kernel.org # all stable releases with f2c710f7dca8 Signed-off-by: Guenter Roeck Acked-by: Mathias Nyman Link: https://lore.kernel.org/r/20191218011911.6907-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 021a2d320acc..09f228279c01 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -497,7 +497,6 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) retval = xhci_resume(xhci, hibernated); return retval; } -#endif /* CONFIG_PM */ static void xhci_pci_shutdown(struct usb_hcd *hcd) { @@ -510,6 +509,7 @@ static void xhci_pci_shutdown(struct usb_hcd *hcd) if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(pdev, PCI_D3hot); } +#endif /* CONFIG_PM */ /*-------------------------------------------------------------------------*/ From 51ff11e50d8b2bbde8b790b4e7cdd5688d5aef94 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Dec 2019 09:03:12 +0100 Subject: [PATCH 1894/3715] s390/ftrace: fix endless recursion in function_graph tracer [ Upstream commit 6feeee8efc53035c3195b02068b58ae947538aa4 ] The following sequence triggers a kernel stack overflow on s390x: mount -t tracefs tracefs /sys/kernel/tracing cd /sys/kernel/tracing echo function_graph > current_tracer [crash] This is because preempt_count_{add,sub} are in the list of traced functions, which can be demonstrated by: echo preempt_count_add >set_ftrace_filter echo function_graph > current_tracer [crash] The stack overflow happens because get_tod_clock_monotonic() gets called by ftrace but itself calls preempt_{disable,enable}(), which leads to a endless recursion. Fix this by using preempt_{disable,enable}_notrace(). Fixes: 011620688a71 ("s390/time: ensure get_clock_monotonic() returns monotonic values") Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/include/asm/timex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 0f12a3f91282..2dc9eb4e1acc 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -195,9 +195,9 @@ static inline unsigned long long get_tod_clock_monotonic(void) { unsigned long long tod; - preempt_disable(); + preempt_disable_notrace(); tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; - preempt_enable(); + preempt_enable_notrace(); return tod; } From f8f86208bfbe7b556d108e3dae05e315cfcc34f4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Dec 2019 14:24:58 +0300 Subject: [PATCH 1895/3715] btrfs: return error pointer from alloc_test_extent_buffer [ Upstream commit b6293c821ea8fa2a631a2112cd86cd435effeb8b ] Callers of alloc_test_extent_buffer have not correctly interpreted the return value as error pointer, as alloc_test_extent_buffer should behave as alloc_extent_buffer. The self-tests were unaffected but btrfs_find_create_tree_block could call both functions and that would cause problems up in the call chain. Fixes: faa2dbf004e8 ("Btrfs: add sanity tests for new qgroup accounting code") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 6 ++++-- fs/btrfs/tests/free-space-tree-tests.c | 6 +++--- fs/btrfs/tests/qgroup-tests.c | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4cc534584665..fced434bbddc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4949,12 +4949,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, return eb; eb = alloc_dummy_extent_buffer(fs_info, start); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); eb->fs_info = fs_info; again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, start >> PAGE_SHIFT, eb); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 8444a018cca2..f6c783e959b7 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -475,9 +475,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, root->fs_info->tree_root = root; root->node = alloc_test_extent_buffer(root->fs_info, nodesize); - if (!root->node) { - test_msg("Couldn't allocate dummy buffer\n"); - ret = -ENOMEM; + if (IS_ERR(root->node)) { + test_msg("couldn't allocate dummy buffer\n"); + ret = PTR_ERR(root->node); goto out; } btrfs_set_header_level(root->node, 0); diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 578fd045e859..eb72cf280546 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -487,9 +487,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) * *cough*backref walking code*cough* */ root->node = alloc_test_extent_buffer(root->fs_info, nodesize); - if (!root->node) { + if (IS_ERR(root->node)) { test_msg("Couldn't allocate dummy buffer\n"); - ret = -ENOMEM; + ret = PTR_ERR(root->node); goto out; } btrfs_set_header_level(root->node, 0); From a93dd1c4626d1f61b0de64e1feaa385aa48fc1e9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Dec 2019 09:37:15 -0500 Subject: [PATCH 1896/3715] btrfs: abort transaction after failed inode updates in create_subvol [ Upstream commit c7e54b5102bf3614cadb9ca32d7be73bad6cecf0 ] We can just abort the transaction here, and in fact do that for every other failure in this function except these two cases. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dd3b4820ac30..e82b4f3f490c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -580,12 +580,18 @@ static noinline int create_subvol(struct inode *dir, btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); ret = btrfs_update_inode(trans, root, dir); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; + } ret = btrfs_add_root_ref(trans, fs_info, objectid, root->root_key.objectid, btrfs_ino(BTRFS_I(dir)), index, name, namelen); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; + } ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid, BTRFS_UUID_KEY_SUBVOL, objectid); From 5efc690e2043e08c4052bc8a6444ca2157e54799 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Fri, 13 Dec 2019 11:30:54 +0900 Subject: [PATCH 1897/3715] usbip: Fix receive error in vhci-hcd when using scatter-gather MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d986294ee55d719562b20aabe15a39bf8f863415 upstream. When vhci uses SG and receives data whose size is smaller than SG buffer size, it tries to receive more data even if it acutally receives all the data from the server. If then, it erroneously adds error event and triggers connection shutdown. vhci-hcd should check if it received all the data even if there are more SG entries left. So, check if it receivces all the data from the server in for_each_sg() loop. Fixes: ea44d190764b ("usbip: Implement SG support to vhci-hcd and stub driver") Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Suwan Kim Acked-by: Shuah Khan Cc: stable Link: https://lore.kernel.org/r/20191213023055.19933-2-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index da03451328cd..cd01f1e278ee 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -742,6 +742,9 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) copy -= recv; ret += recv; + + if (!copy) + break; } if (ret != size) From ced35178a76f255631f05cda074ec4b96786b3f8 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Fri, 13 Dec 2019 11:30:55 +0900 Subject: [PATCH 1898/3715] usbip: Fix error path of vhci_recv_ret_submit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aabb5b833872524eaf28f52187e5987984982264 upstream. If a transaction error happens in vhci_recv_ret_submit(), event handler closes connection and changes port status to kick hub_event. Then hub tries to flush the endpoint URBs, but that causes infinite loop between usb_hub_flush_endpoint() and vhci_urb_dequeue() because "vhci_priv" in vhci_urb_dequeue() was already released by vhci_recv_ret_submit() before a transmission error occurred. Thus, vhci_urb_dequeue() terminates early and usb_hub_flush_endpoint() continuously calls vhci_urb_dequeue(). The root cause of this issue is that vhci_recv_ret_submit() terminates early without giving back URB when transaction error occurs in vhci_recv_ret_submit(). That causes the error URB to still be linked at endpoint list without “vhci_priv". So, in the case of transaction error in vhci_recv_ret_submit(), unlink URB from the endpoint, insert proper error code in urb->status and give back URB. Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Suwan Kim Cc: stable Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20191213023055.19933-3-suwan.kim027@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_rx.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 3f998b605f03..c13dd526b996 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -91,16 +91,21 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, usbip_pack_pdu(pdu, urb, USBIP_RET_SUBMIT, 0); /* recv transfer buffer */ - if (usbip_recv_xbuff(ud, urb) < 0) - return; + if (usbip_recv_xbuff(ud, urb) < 0) { + urb->status = -EPROTO; + goto error; + } /* recv iso_packet_descriptor */ - if (usbip_recv_iso(ud, urb) < 0) - return; + if (usbip_recv_iso(ud, urb) < 0) { + urb->status = -EPROTO; + goto error; + } /* restore the padding in iso packets */ usbip_pad_iso(ud, urb); +error: if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); From 1d21868ab70ab789c0a9b12634a575b04762e190 Mon Sep 17 00:00:00 2001 From: Erkka Talvitie Date: Wed, 11 Dec 2019 10:08:39 +0200 Subject: [PATCH 1899/3715] USB: EHCI: Do not return -EPIPE when hub is disconnected commit 64cc3f12d1c7dd054a215bc1ff9cc2abcfe35832 upstream. When disconnecting a USB hub that has some child device(s) connected to it (such as a USB mouse), then the stack tries to clear halt and reset device(s) which are _already_ physically disconnected. The issue has been reproduced with: CPU: IMX6D5EYM10AD or MCIMX6D5EYM10AE. SW: U-Boot 2019.07 and kernel 4.19.40. CPU: HP Proliant Microserver Gen8. SW: Linux version 4.2.3-300.fc23.x86_64 In this situation there will be error bit for MMF active yet the CERR equals EHCI_TUNE_CERR + halt. Existing implementation interprets this as a stall [1] (chapter 8.4.5). The possible conditions when the MMF will be active + halt can be found from [2] (Table 4-13). Fix for the issue is to check whether MMF is active and PID Code is IN before checking for the stall. If these conditions are true then it is not a stall. What happens after the fix is that when disconnecting a hub with attached device(s) the situation is not interpret as a stall. [1] [https://www.usb.org/document-library/usb-20-specification, usb_20.pdf] [2] [https://www.intel.com/content/dam/www/public/us/en/documents/ technical-specifications/ehci-specification-for-usb.pdf] Signed-off-by: Erkka Talvitie Reviewed-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/ef70941d5f349767f19c0ed26b0dd9eed8ad81bb.1576050523.git.erkka.talvitie@vincit.fi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-q.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 8f3f055c05fa..477ba3842cc4 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -40,6 +40,10 @@ /*-------------------------------------------------------------------------*/ +/* PID Codes that are used here, from EHCI specification, Table 3-16. */ +#define PID_CODE_IN 1 +#define PID_CODE_SETUP 2 + /* fill a qtd, returning how much of the buffer we were able to queue up */ static int @@ -203,7 +207,7 @@ static int qtd_copy_status ( int status = -EINPROGRESS; /* count IN/OUT bytes, not SETUP (even short packets) */ - if (likely (QTD_PID (token) != 2)) + if (likely(QTD_PID(token) != PID_CODE_SETUP)) urb->actual_length += length - QTD_LENGTH (token); /* don't modify error codes */ @@ -219,6 +223,13 @@ static int qtd_copy_status ( if (token & QTD_STS_BABBLE) { /* FIXME "must" disable babbling device's port too */ status = -EOVERFLOW; + /* + * When MMF is active and PID Code is IN, queue is halted. + * EHCI Specification, Table 4-13. + */ + } else if ((token & QTD_STS_MMF) && + (QTD_PID(token) == PID_CODE_IN)) { + status = -EPROTO; /* CERR nonzero + halt --> stall */ } else if (QTD_CERR(token)) { status = -EPIPE; From 944276c573d5f5ec3767263363369f3d5335ed9a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Dec 2019 13:55:24 +0200 Subject: [PATCH 1900/3715] intel_th: pci: Add Comet Lake PCH-V support commit e4de2a5d51f97a6e720a1c0911f93e2d8c2f1c08 upstream. This adds Intel(R) Trace Hub PCI ID for Comet Lake PCH-V. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: Link: https://lore.kernel.org/r/20191217115527.74383-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index c224b92a80f1..00b22d1b8c33 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -188,6 +188,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake PCH-V */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa3a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Ice Lake NNPI */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), From 77d893bb8d6f0077da879c83e482bc5d6bad4e3f Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Dec 2019 13:55:25 +0200 Subject: [PATCH 1901/3715] intel_th: pci: Add Elkhart Lake SOC support commit 88385866bab8d5e18c7f45d1023052c783572e03 upstream. This adds support for Intel Trace Hub in Elkhart Lake. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191217115527.74383-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 00b22d1b8c33..fc371444407d 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -218,6 +218,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Elkhart Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; From 6cc3ecc1ac2364cddd8bf44dcfdd6123dd63d14c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Dec 2019 20:06:04 +0100 Subject: [PATCH 1902/3715] platform/x86: hp-wmi: Make buffer for HPWMI_FEATURE2_QUERY 128 bytes commit 133b2acee3871ae6bf123b8fe34be14464aa3d2c upstream. At least on the HP Envy x360 15-cp0xxx model the WMI interface for HPWMI_FEATURE2_QUERY requires an outsize of at least 128 bytes, otherwise it fails with an error code 5 (HPWMI_RET_INVALID_PARAMETERS): Dec 06 00:59:38 kernel: hp_wmi: query 0xd returned error 0x5 We do not care about the contents of the buffer, we just want to know if the HPWMI_FEATURE2_QUERY command is supported. This commits bumps the buffer size, fixing the error. Fixes: 8a1513b4932 ("hp-wmi: limit hotkey enable") Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1520703 Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index d0ffdd5d9199..06a3c1ef8eee 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -313,7 +313,7 @@ static int __init hp_wmi_bios_2008_later(void) static int __init hp_wmi_bios_2009_later(void) { - int state = 0; + u8 state[128]; int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state, sizeof(state), sizeof(state)); if (!ret) From c67a2906487c75e9415d9ea1c6ca622a9e63a769 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 16 Dec 2019 11:08:23 +0000 Subject: [PATCH 1903/3715] staging: comedi: gsc_hpdi: check dma_alloc_coherent() return value commit ab42b48f32d4c766420c3499ee9c0289b7028182 upstream. The "auto-attach" handler function `gsc_hpdi_auto_attach()` calls `dma_alloc_coherent()` in a loop to allocate some DMA data buffers, and also calls it to allocate a buffer for a DMA descriptor chain. However, it does not check the return value of any of these calls. Change `gsc_hpdi_auto_attach()` to return `-ENOMEM` if any of these `dma_alloc_coherent()` calls fail. This will result in the comedi core calling the "detach" handler `gsc_hpdi_detach()` as part of the clean-up, which will call `gsc_hpdi_free_dma()` to free any allocated DMA coherent memory buffers. Cc: #4.6+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20191216110823.216237-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/gsc_hpdi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/staging/comedi/drivers/gsc_hpdi.c b/drivers/staging/comedi/drivers/gsc_hpdi.c index e5b948405fd9..a09631f9d813 100644 --- a/drivers/staging/comedi/drivers/gsc_hpdi.c +++ b/drivers/staging/comedi/drivers/gsc_hpdi.c @@ -632,6 +632,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev, dma_alloc_coherent(&pcidev->dev, DMA_BUFFER_SIZE, &devpriv->dio_buffer_phys_addr[i], GFP_KERNEL); + if (!devpriv->dio_buffer[i]) { + dev_warn(dev->class_dev, + "failed to allocate DMA buffer\n"); + return -ENOMEM; + } } /* allocate dma descriptors */ devpriv->dma_desc = dma_alloc_coherent(&pcidev->dev, @@ -639,6 +644,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev, NUM_DMA_DESCRIPTORS, &devpriv->dma_desc_phys_addr, GFP_KERNEL); + if (!devpriv->dma_desc) { + dev_warn(dev->class_dev, + "failed to allocate DMA descriptors\n"); + return -ENOMEM; + } if (devpriv->dma_desc_phys_addr & 0xf) { dev_warn(dev->class_dev, " dma descriptors not quad-word aligned (bug)\n"); From 11755d82e1adeb898976f3fb48ed6c8c67a9ed45 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 2 Dec 2019 18:02:12 +0100 Subject: [PATCH 1904/3715] ext4: fix ext4_empty_dir() for directories with holes commit 64d4ce892383b2ad6d782e080d25502f91bf2a38 upstream. Function ext4_empty_dir() doesn't correctly handle directories with holes and crashes on bh->b_data dereference when bh is NULL. Reorganize the loop to use 'offset' variable all the times instead of comparing pointers to current direntry with bh->b_data pointer. Also add more strict checking of '.' and '..' directory entries to avoid entering loop in possibly invalid state on corrupted filesystems. References: CVE-2019-19037 CC: stable@vger.kernel.org Fixes: 4e19d6b65fb4 ("ext4: allow directory holes") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191202170213.4761-2-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b4e0c270def4..0b5c36bd5418 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2702,7 +2702,7 @@ bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *de1; + struct ext4_dir_entry_2 *de; struct super_block *sb; if (ext4_has_inline_data(inode)) { @@ -2727,19 +2727,25 @@ bool ext4_empty_dir(struct inode *inode) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de, sb->s_blocksize); - if (le32_to_cpu(de->inode) != inode->i_ino || - le32_to_cpu(de1->inode) == 0 || - strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning_inode(inode, "directory missing '.' and/or '..'"); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + 0) || + le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { + ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); return true; } - offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + - ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); - de = ext4_next_entry(de1, sb->s_blocksize); + offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); + de = ext4_next_entry(de, sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + ext4_warning_inode(inode, "directory missing '..'"); + brelse(bh); + return true; + } + offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { - if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if (!(offset & (sb->s_blocksize - 1))) { unsigned int lblock; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); @@ -2750,12 +2756,11 @@ bool ext4_empty_dir(struct inode *inode) } if (IS_ERR(bh)) return true; - de = (struct ext4_dir_entry_2 *) bh->b_data; } + de = (struct ext4_dir_entry_2 *) (bh->b_data + + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset)) { - de = (struct ext4_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); offset = (offset | (sb->s_blocksize - 1)) + 1; continue; } @@ -2764,7 +2769,6 @@ bool ext4_empty_dir(struct inode *inode) return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); - de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); return true; From d3c6b57dcb7b5e500a7e9e1078530ace567014eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 2 Dec 2019 18:02:13 +0100 Subject: [PATCH 1905/3715] ext4: check for directory entries too close to block end commit 109ba779d6cca2d519c5dd624a3276d03e21948e upstream. ext4_check_dir_entry() currently does not catch a case when a directory entry ends so close to the block end that the header of the next directory entry would not fit in the remaining space. This can lead to directory iteration code trying to access address beyond end of current buffer head leading to oops. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191202170213.4761-3-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/dir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 446b6c375b6f..c17855fead7b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -76,6 +76,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - buf) + rlen > size)) error_msg = "directory entry overrun"; + else if (unlikely(((char *) de - buf) + rlen > + size - EXT4_DIR_REC_LEN(1) && + ((char *) de - buf) + rlen != size)) { + error_msg = "directory entry too close to block end"; + } else if (unlikely(le32_to_cpu(de->inode) > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; From b7cb3fd8293ebe313ebfa4fbf29445bff84623b6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Dec 2019 21:50:11 +0300 Subject: [PATCH 1906/3715] ext4: unlock on error in ext4_expand_extra_isize() commit 7f420d64a08c1dcd65b27be82a27cf2bdb2e7847 upstream. We need to unlock the xattr before returning on this error path. Cc: stable@kernel.org # 4.13 Fixes: c03b45b853f5 ("ext4, project: expand inode extra size if possible") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20191213185010.6k7yl2tck3wlsdkt@kili.mountain Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11bc4c69bf16..c2920cbfa3bf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5874,7 +5874,7 @@ int ext4_expand_extra_isize(struct inode *inode, error = ext4_journal_get_write_access(handle, iloc->bh); if (error) { brelse(iloc->bh); - goto out_stop; + goto out_unlock; } error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, @@ -5884,8 +5884,8 @@ int ext4_expand_extra_isize(struct inode *inode, if (!error) error = rc; +out_unlock: ext4_write_unlock_xattr(inode, &no_expand); -out_stop: ext4_journal_stop(handle); return error; } From 397d99dabadebd5f82c8ae6eebb07ba8b145686b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Dec 2019 09:40:49 +0000 Subject: [PATCH 1907/3715] KVM: arm64: Ensure 'params' is initialised when looking up sys register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1ce74e96c2407df2b5867e5d45a70aacb8923c14 upstream. Commit 4b927b94d5df ("KVM: arm/arm64: vgic: Introduce find_reg_by_id()") introduced 'find_reg_by_id()', which looks up a system register only if the 'id' index parameter identifies a valid system register. As part of the patch, existing callers of 'find_reg()' were ported over to the new interface, but this breaks 'index_to_sys_reg_desc()' in the case that the initial lookup in the vCPU target table fails because we will then call into 'find_reg()' for the system register table with an uninitialised 'param' as the key to the lookup. GCC 10 is bright enough to spot this (amongst a tonne of false positives, but hey!): | arch/arm64/kvm/sys_regs.c: In function ‘index_to_sys_reg_desc.part.0.isra’: | arch/arm64/kvm/sys_regs.c:983:33: warning: ‘params.Op2’ may be used uninitialized in this function [-Wmaybe-uninitialized] | 983 | (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); | [...] Revert the hunk of 4b927b94d5df which breaks 'index_to_sys_reg_desc()' so that the old behaviour of checking the index upfront is restored. Fixes: 4b927b94d5df ("KVM: arm/arm64: vgic: Introduce find_reg_by_id()") Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Cc: Link: https://lore.kernel.org/r/20191212094049.12437-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/sys_regs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index cfbf7bd0dfba..32ae5c9daac4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1785,8 +1785,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); From ee1a3ec0aeb9109fae53d1662e8cb36bdd677425 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 31 Oct 2019 16:04:48 +0300 Subject: [PATCH 1908/3715] x86/MCE/AMD: Do not use rdmsr_safe_on_cpu() in smca_configure() commit 246ff09f89e54fdf740a8d496176c86743db3ec7 upstream. ... because interrupts are disabled that early and sending IPIs can deadlock: BUG: sleeping function called from invalid context at kernel/sched/completion.c:99 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 no locks held by swapper/1/0. irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8b9/0x1ca0 softirqs last enabled at (0): [] copy_process+0x8b9/0x1ca0 softirqs last disabled at (0): [<0000000000000000>] 0x0 Preemption disabled at: [] start_secondary+0x3b/0x190 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.5.0-rc2+ #1 Hardware name: GIGABYTE MZ01-CE1-00/MZ01-CE1-00, BIOS F02 08/29/2018 Call Trace: dump_stack ___might_sleep.cold.92 wait_for_completion ? generic_exec_single rdmsr_safe_on_cpu ? wrmsr_on_cpus mce_amd_feature_init mcheck_cpu_init identify_cpu identify_secondary_cpu smp_store_cpu_info start_secondary secondary_startup_64 The function smca_configure() is called only on the current CPU anyway, therefore replace rdmsr_safe_on_cpu() with atomic rdmsr_safe() and avoid the IPI. [ bp: Update commit message. ] Signed-off-by: Konstantin Khlebnikov Signed-off-by: Borislav Petkov Reviewed-by: Yazen Ghannam Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/157252708836.3876.4604398213417262402.stgit@buzz Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index b434780ae680..b7a60ead8382 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -231,7 +231,7 @@ static void smca_configure(unsigned int bank, unsigned int cpu) if (smca_banks[bank].hwid) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } From eade3196196fc8e2ae1c2dd1aff41f621c26b5f7 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 21 Nov 2019 08:15:08 -0600 Subject: [PATCH 1909/3715] x86/MCE/AMD: Allow Reserved types to be overwritten in smca_banks[] commit 966af20929ac24360ba3fac5533eb2ab003747da upstream. Each logical CPU in Scalable MCA systems controls a unique set of MCA banks in the system. These banks are not shared between CPUs. The bank types and ordering will be the same across CPUs on currently available systems. However, some CPUs may see a bank as Reserved/Read-as-Zero (RAZ) while other CPUs do not. In this case, the bank seen as Reserved on one CPU is assumed to be the same type as the bank seen as a known type on another CPU. In general, this occurs when the hardware represented by the MCA bank is disabled, e.g. disabled memory controllers on certain models, etc. The MCA bank is disabled in the hardware, so there is no possibility of getting an MCA/MCE from it even if it is assumed to have a known type. For example: Full system: Bank | Type seen on CPU0 | Type seen on CPU1 ------------------------------------------------ 0 | LS | LS 1 | UMC | UMC 2 | CS | CS System with hardware disabled: Bank | Type seen on CPU0 | Type seen on CPU1 ------------------------------------------------ 0 | LS | LS 1 | UMC | RAZ 2 | CS | CS For this reason, there is a single, global struct smca_banks[] that is initialized at boot time. This array is initialized on each CPU as it comes online. However, the array will not be updated if an entry already exists. This works as expected when the first CPU (usually CPU0) has all possible MCA banks enabled. But if the first CPU has a subset, then it will save a "Reserved" type in smca_banks[]. Successive CPUs will then not be able to update smca_banks[] even if they encounter a known bank type. This may result in unexpected behavior. Depending on the system configuration, a user may observe issues enumerating the MCA thresholding sysfs interface. The issues may be as trivial as sysfs entries not being available, or as severe as system hangs. For example: Bank | Type seen on CPU0 | Type seen on CPU1 ------------------------------------------------ 0 | LS | LS 1 | RAZ | UMC 2 | CS | CS Extend the smca_banks[] entry check to return if the entry is a non-reserved type. Otherwise, continue so that CPUs that encounter a known bank type can update smca_banks[]. Fixes: 68627a697c19 ("x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/20191121141508.141273-1-Yazen.Ghannam@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index b7a60ead8382..a8f47697276b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -228,7 +228,7 @@ static void smca_configure(unsigned int bank, unsigned int cpu) } /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { From 99b07543313d0bbde1895efa4d32ba8b06806c5c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 9 Dec 2019 06:19:08 +0000 Subject: [PATCH 1910/3715] powerpc/irq: fix stack overflow verification commit 099bc4812f09155da77eeb960a983470249c9ce1 upstream. Before commit 0366a1c70b89 ("powerpc/irq: Run softirqs off the top of the irq stack"), check_stack_overflow() was called by do_IRQ(), before switching to the irq stack. In that commit, do_IRQ() was renamed __do_irq(), and is now executing on the irq stack, so check_stack_overflow() has just become almost useless. Move check_stack_overflow() call in do_IRQ() to do the check while still on the current stack. Fixes: 0366a1c70b89 ("powerpc/irq: Run softirqs off the top of the irq stack") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e033aa8116ab12b7ca9a9c75189ad0741e3b9b5f.1575872340.git.christophe.leroy@c-s.fr Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0ce8b0e5d7ba..207ba53a500b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -561,8 +561,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -594,6 +592,8 @@ void do_IRQ(struct pt_regs *regs) irqtp = hardirq_ctx[raw_smp_processor_id()]; sirqtp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(curtp == irqtp || curtp == sirqtp)) { __do_irq(regs); From 5dc1cb73d0b180d65822cec79a2ea079c22f3b03 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 4 Dec 2019 09:54:46 +0100 Subject: [PATCH 1911/3715] mmc: sdhci-of-esdhc: Revert "mmc: sdhci-of-esdhc: add erratum A-009204 support" commit 8b6dc6b2d60221e90703babbc141f063b8a07e72 upstream. This reverts commit 5dd195522562542bc6ebe6e7bd47890d8b7ca93c. First, the fix seems to be plain wrong, since the erratum suggests waiting 5ms before setting setting SYSCTL[RSTD], but this msleep() happens after the call of sdhci_reset() which is where that bit gets set (if SDHCI_RESET_DATA is in mask). Second, walking the whole device tree to figure out if some node has a "fsl,p2020-esdhc" compatible string is hugely expensive - about 70 to 100 us on our mpc8309 board. Walking the device tree is done under a raw_spin_lock, so this is obviously really bad on an -rt system, and a waste of time on all. In fact, since esdhc_reset() seems to get called around 100 times per second, that mpc8309 now spends 0.8% of its time determining that it is not a p2020. Whether those 100 calls/s are normal or due to some other bug or misconfiguration, regularly hitting a 100 us non-preemptible window is unacceptable. Signed-off-by: Rasmus Villemoes Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191204085447.27491-1-linux@rasmusvillemoes.dk Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 176cbc67d08a..bf8d02a996b6 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -615,9 +615,6 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask) sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); - if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) - mdelay(5); - if (mask & SDHCI_RESET_ALL) { val = sdhci_readl(host, ESDHC_TBCTL); val &= ~ESDHC_TB_EN; From 041ea215a9a056f86d4cd71d542ac73ab02451fd Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Fri, 6 Dec 2019 17:13:26 +0530 Subject: [PATCH 1912/3715] mmc: sdhci: Update the tuning failed messages to pr_debug level commit 2c92dd20304f505b6ef43d206fff21bda8f1f0ae upstream. Tuning support in DDR50 speed mode was added in SD Specifications Part1 Physical Layer Specification v3.01. Its not possible to distinguish between v3.00 and v3.01 from the SCR and that is why since commit 4324f6de6d2e ("mmc: core: enable CMD19 tuning for DDR50 mode") tuning failures are ignored in DDR50 speed mode. Cards compatible with v3.00 don't respond to CMD19 in DDR50 and this error gets printed during enumeration and also if retune is triggered at any time during operation. Update the printk level to pr_debug so that these errors don't lead to false error reports. Signed-off-by: Faiz Abbas Cc: stable@vger.kernel.org # v4.4+ Link: https://lore.kernel.org/r/20191206114326.15856-1-faiz_abbas@ti.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9b6660dd8a01..645775dd4edb 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2162,8 +2162,8 @@ static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode) sdhci_send_tuning(host, opcode); if (!host->tuning_done) { - pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n", - mmc_hostname(host->mmc)); + pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); sdhci_abort_tuning(host, opcode); return; } From 5a07ace7375231e6eb79667a2784c0bf023f87da Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 16 Dec 2019 11:18:42 +0800 Subject: [PATCH 1913/3715] mmc: sdhci-of-esdhc: fix P2020 errata handling commit fe0acab448f68c3146235afe03fb932e242ec94c upstream. Two previous patches introduced below quirks for P2020 platforms. - SDHCI_QUIRK_RESET_AFTER_REQUEST - SDHCI_QUIRK_BROKEN_TIMEOUT_VAL The patches made a mistake to add them in quirks2 of sdhci_host structure, while they were defined for quirks. host->quirks2 |= SDHCI_QUIRK_RESET_AFTER_REQUEST; host->quirks2 |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; This patch is to fix them. host->quirks |= SDHCI_QUIRK_RESET_AFTER_REQUEST; host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; Fixes: 05cb6b2a66fa ("mmc: sdhci-of-esdhc: add erratum eSDHC-A001 and A-008358 support") Fixes: a46e42712596 ("mmc: sdhci-of-esdhc: add erratum eSDHC5 support") Signed-off-by: Yangbo Lu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191216031842.40068-1-yangbo.lu@nxp.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index bf8d02a996b6..9a1ab39ee35e 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -886,8 +886,8 @@ static int sdhci_esdhc_probe(struct platform_device *pdev) host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ; if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) { - host->quirks2 |= SDHCI_QUIRK_RESET_AFTER_REQUEST; - host->quirks2 |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + host->quirks |= SDHCI_QUIRK_RESET_AFTER_REQUEST; + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; } if (of_device_is_compatible(np, "fsl,p5040-esdhc") || From d1db913b044f0a0693d8ee283d26b81d536efcd5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 8 Dec 2019 16:51:50 -0600 Subject: [PATCH 1914/3715] nbd: fix shutdown and recv work deadlock v2 commit 1c05839aa973cfae8c3db964a21f9c0eef8fcc21 upstream. This fixes a regression added with: commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 Author: Mike Christie Date: Sun Aug 4 14:10:06 2019 -0500 nbd: fix max number of supported devs where we can deadlock during device shutdown. The problem occurs if the recv_work's nbd_config_put occurs after nbd_start_device_ioctl has returned and the userspace app has droppped its reference via closing the device and running nbd_release. The recv_work nbd_config_put call would then drop the refcount to zero and try to destroy the config which would try to do destroy_workqueue from the recv work. This patch just has nbd_start_device_ioctl do a flush_workqueue when it wakes so we know after the ioctl returns running works have exited. This also fixes a possible race where we could try to reuse the device while old recv_works are still running. Cc: stable@vger.kernel.org Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Mike Christie Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 929bd255a290..4c661ad91e7d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1234,10 +1234,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); - if (ret) { + if (ret) sock_shutdown(nbd); - flush_workqueue(nbd->recv_workq); - } + flush_workqueue(nbd->recv_workq); + mutex_lock(&nbd->config_lock); bd_set_size(bdev, 0); /* user requested, ignore socket errors */ From c2816fc40d0c6b1cfce7dffe09571ac4efc7c0d8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 24 Oct 2019 18:12:54 +0900 Subject: [PATCH 1915/3715] perf probe: Fix to show function entry line as probe-able commit 91e2f539eeda26ab00bd03fae8dc434c128c85ed upstream. Fix die_walk_lines() to list the function entry line correctly. Since the dwarf_entrypc() does not return the entry pc if the DIE has only range attribute, __die_walk_funclines() fails to list the declaration line (entry line) in that case. To solve this issue, this introduces die_entrypc() which correctly returns the entry PC (the first address range) even if the DIE has only range attribute. With this fix die_walk_lines() shows the function entry line is able to probe correctly. Fixes: 4cc9cec636e7 ("perf probe: Introduce lines walker interface") Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157190837419.1859.4619125803596816752.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Cc: Thomas Backlund Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/dwarf-aux.c | 24 +++++++++++++++++++++++- tools/perf/util/dwarf-aux.h | 3 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 50903f30289f..289ef63208fb 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -320,6 +320,28 @@ bool die_is_func_def(Dwarf_Die *dw_die) dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); } +/** + * die_entrypc - Returns entry PC (the lowest address) of a DIE + * @dw_die: a DIE + * @addr: where to store entry PC + * + * Since dwarf_entrypc() does not return entry PC if the DIE has only address + * range, we have to use this to retrieve the lowest address from the address + * range attribute. + */ +int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) +{ + Dwarf_Addr base, end; + + if (!addr) + return -EINVAL; + + if (dwarf_entrypc(dw_die, addr) == 0) + return 0; + + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; +} + /** * die_is_func_instance - Ensure that this DIE is an instance of a subprogram * @dw_die: a DIE @@ -733,7 +755,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, /* Handle function declaration line */ fname = dwarf_decl_file(sp_die); if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && - dwarf_entrypc(sp_die, &addr) == 0) { + die_entrypc(sp_die, &addr) == 0) { lw.retval = callback(fname, lineno, addr, data); if (lw.retval != 0) goto done; diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 8ac53bf1ec4e..ee15fac4e1d0 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Get DW_AT_linkage_name (should be NULL for C binary) */ const char *die_get_linkage_name(Dwarf_Die *dw_die); +/* Get the lowest PC in DIE (including range list) */ +int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr); + /* Ensure that this DIE is a subprogram and definition (not declaration) */ bool die_is_func_def(Dwarf_Die *dw_die); From 4c5bf01e16a7ec59e59a38a61f793c5d1d5560c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 31 Dec 2019 12:38:09 +0100 Subject: [PATCH 1916/3715] Linux 4.14.161 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f011cb69545f..6b4528888a75 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 160 +SUBLEVEL = 161 EXTRAVERSION = NAME = Petit Gorille From aba366935db170ad8375583a770e71aaea670d58 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 2 Jan 2020 21:13:09 +0000 Subject: [PATCH 1917/3715] ANDROID: serdev: Fix platform device support After commit cdad3113fcaa ("ANDROID: serdev: add platform device support") landed, we started seeing boot panics on HiKey960/HiKey. After some debugging I found the patch is passing a serdev_device->dev pointer to match()/uevent(), which expects the dev pointer to be contained in a platform_device structure instead. When it uses container_of, it ends up miscast and we get bad values. Alistair suggested this fix which seems to avoid the issue. Signed-off-by: John Stultz Bug: 146517987 Change-Id: I6354aeb4008fff85264a3c848c6c95fb8ca5b07a --- drivers/tty/serdev/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 4fa59314640a..f513107b9ea0 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -56,7 +56,7 @@ static int serdev_device_match(struct device *dev, struct device_driver *drv) return 1; if (dev->parent->parent->bus == &platform_bus_type && - dev->parent->parent->bus->match(dev, drv)) + dev->parent->parent->bus->match(dev->parent->parent, drv)) return 1; return 0; @@ -73,7 +73,7 @@ static int serdev_uevent(struct device *dev, struct kobj_uevent_env *env) return rc; if (dev->parent->parent->bus == &platform_bus_type) - rc = dev->parent->parent->bus->uevent(dev, env); + rc = dev->parent->parent->bus->uevent(dev->parent->parent, env); return rc; } From 77880d107ed1c67b065e1609a5233422939fcd72 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:55 -0700 Subject: [PATCH 1918/3715] scsi: lpfc: Fix discovery failures when target device connectivity bounces [ Upstream commit 3f97aed6117c7677eb16756c4ec8b86000fd5822 ] An issue was seen discovering all SCSI Luns when a target device undergoes link bounce. The driver currently does not qualify the FC4 support on the target. Therefore it will send a SCSI PRLI and an NVMe PRLI. The expectation is that the target will reject the PRLI if it is not supported. If a PRLI times out, the driver will retry. The driver will not proceed with the device until both SCSI and NVMe PRLIs are resolved. In the failure case, the device is FCP only and does not respond to the NVMe PRLI, thus initiating the wait/retry loop in the driver. During that time, a RSCN is received (device bounced) causing the driver to issue a GID_FT. The GID_FT response comes back before the PRLI mess is resolved and it prematurely cancels the PRLI retry logic and leaves the device in a STE_PRLI_ISSUE state. Discovery with the target never completes or resets. Fix by resetting the node state back to STE_NPR_NODE when GID_FT completes, thereby restarting the discovery process for the node. Link: https://lore.kernel.org/r/20190922035906.10977-10-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_hbadisc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 3f88f3d79622..4a0889dd4c1d 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -5220,9 +5220,14 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) /* If we've already received a PLOGI from this NPort * we don't need to try to discover it again. */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) + if (ndlp->nlp_flag & NLP_RCV_PLOGI && + !(ndlp->nlp_type & + (NLP_FCP_TARGET | NLP_NVME_TARGET))) return NULL; + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); From fd9e15382953d6c18aaef4b8ab7f366a27980618 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 13 Sep 2019 09:04:40 -0400 Subject: [PATCH 1919/3715] scsi: mpt3sas: Fix clear pending bit in ioctl status [ Upstream commit 782b281883caf70289ba6a186af29441a117d23e ] When user issues diag register command from application with required size, and if driver unable to allocate the memory, then it will fail the register command. While failing the register command, driver is not currently clearing MPT3_CMD_PENDING bit in ctl_cmds.status variable which was set before trying to allocate the memory. As this bit is set, subsequent register command will be failed with BUSY status even when user wants to register the trace buffer will less memory. Clear MPT3_CMD_PENDING bit in ctl_cmds.status before returning the diag register command with no memory status. Link: https://lore.kernel.org/r/1568379890-18347-4-git-send-email-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index bdffb692bded..622dcf2984a9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -1502,7 +1502,8 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc, " for diag buffers, requested size(%d)\n", ioc->name, __func__, request_data_sz); mpt3sas_base_free_smid(ioc, smid); - return -ENOMEM; + rc = -ENOMEM; + goto out; } ioc->diag_buffer[buffer_type] = request_data; ioc->diag_buffer_sz[buffer_type] = request_data_sz; From c6ac111b46aa2b90e0f56af85fcb499d86f39177 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:53 -0700 Subject: [PATCH 1920/3715] scsi: lpfc: Fix locking on mailbox command completion [ Upstream commit 07b8582430370097238b589f4e24da7613ca6dd3 ] Symptoms were seen of the driver not having valid data for mailbox commands. After debugging, the following sequence was found: The driver maintains a port-wide pointer of the mailbox command that is currently in execution. Once finished, the port-wide pointer is cleared (done in lpfc_sli4_mq_release()). The next mailbox command issued will set the next pointer and so on. The mailbox response data is only copied if there is a valid port-wide pointer. In the failing case, it was seen that a new mailbox command was being attempted in parallel with the completion. The parallel path was seeing the mailbox no long in use (flag check under lock) and thus set the port pointer. The completion path had cleared the active flag under lock, but had not touched the port pointer. The port pointer is cleared after the lock is released. In this case, the completion path cleared the just-set value by the parallel path. Fix by making the calls that clear mbox state/port pointer while under lock. Also slightly cleaned up the error path. Link: https://lore.kernel.org/r/20190922035906.10977-8-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_sli.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d3bad0dbfaf7..7920b8c72caf 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12689,13 +12689,19 @@ send_current_mbox: phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; /* Setting active mailbox pointer need to be in sync to flag clear */ phba->sli.mbox_active = NULL; + if (bf_get(lpfc_trailer_consumed, mcqe)) + lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); spin_unlock_irqrestore(&phba->hbalock, iflags); /* Wake up worker thread to post the next pending mailbox command */ lpfc_worker_wake_up(phba); + return workposted; + out_no_mqe_complete: + spin_lock_irqsave(&phba->hbalock, iflags); if (bf_get(lpfc_trailer_consumed, mcqe)) lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); - return workposted; + spin_unlock_irqrestore(&phba->hbalock, iflags); + return false; } /** From 4ad39f44123afdaa94e1af8c2cc5c70013f863f0 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Wed, 2 Oct 2019 14:00:21 -0700 Subject: [PATCH 1921/3715] Input: atmel_mxt_ts - disable IRQ across suspend [ Upstream commit 463fa44eec2fef50d111ed0199cf593235065c04 ] Across suspend and resume, we are seeing error messages like the following: atmel_mxt_ts i2c-PRP0001:00: __mxt_read_reg: i2c transfer failed (-121) atmel_mxt_ts i2c-PRP0001:00: Failed to read T44 and T5 (-121) This occurs because the driver leaves its IRQ enabled. Upon resume, there is an IRQ pending, but the interrupt is serviced before both the driver and the underlying I2C bus have been resumed. This causes EREMOTEIO errors. Disable the IRQ in suspend, and re-enable it on resume. If there are cases where the driver enters suspend with interrupts disabled, that's a bug we should fix separately. Signed-off-by: Evan Green Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/atmel_mxt_ts.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 59aaac43db91..138d1f3b12b2 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -3257,6 +3257,8 @@ static int __maybe_unused mxt_suspend(struct device *dev) mutex_unlock(&input_dev->mutex); + disable_irq(data->irq); + return 0; } @@ -3269,6 +3271,8 @@ static int __maybe_unused mxt_resume(struct device *dev) if (!input_dev) return 0; + enable_irq(data->irq); + mutex_lock(&input_dev->mutex); if (input_dev->users) From 788e280d798b09e2bfbe70fff3103d240dfe4982 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 16 Oct 2019 13:50:26 +0200 Subject: [PATCH 1922/3715] iommu/tegra-smmu: Fix page tables in > 4 GiB memory [ Upstream commit 96d3ab802e4930a29a33934373157d6dff1b2c7e ] Page tables that reside in physical memory beyond the 4 GiB boundary are currently not working properly. The reason is that when the physical address for page directory entries is read, it gets truncated at 32 bits and can cause crashes when passing that address to the DMA API. Fix this by first casting the PDE value to a dma_addr_t and then using the page frame number mask for the SMMU instance to mask out the invalid bits, which are typically used for mapping attributes, etc. Signed-off-by: Thierry Reding Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/tegra-smmu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 40eb8138546a..848dac3e4580 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -156,9 +156,9 @@ static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr) return (addr & smmu->pfn_mask) == addr; } -static dma_addr_t smmu_pde_to_dma(u32 pde) +static dma_addr_t smmu_pde_to_dma(struct tegra_smmu *smmu, u32 pde) { - return pde << 12; + return (dma_addr_t)(pde & smmu->pfn_mask) << 12; } static void smmu_flush_ptc_all(struct tegra_smmu *smmu) @@ -543,6 +543,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, dma_addr_t *dmap) { unsigned int pd_index = iova_pd_index(iova); + struct tegra_smmu *smmu = as->smmu; struct page *pt_page; u32 *pd; @@ -551,7 +552,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, return NULL; pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pd_index]); + *dmap = smmu_pde_to_dma(smmu, pd[pd_index]); return tegra_smmu_pte_offset(pt_page, iova); } @@ -593,7 +594,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, } else { u32 *pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pde]); + *dmap = smmu_pde_to_dma(smmu, pd[pde]); } return tegra_smmu_pte_offset(as->pts[pde], iova); @@ -618,7 +619,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) if (--as->count[pde] == 0) { struct tegra_smmu *smmu = as->smmu; u32 *pd = page_address(as->pd); - dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]); + dma_addr_t pte_dma = smmu_pde_to_dma(smmu, pd[pde]); tegra_smmu_set_pde(as, iova, 0); From 05ee8b43f8e90561dc96ca5caa3219817f77b5d6 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Thu, 12 Sep 2019 11:55:45 +0200 Subject: [PATCH 1923/3715] scsi: target: compare full CHAP_A Algorithm strings [ Upstream commit 9cef2a7955f2754257a7cddedec16edae7b587d0 ] RFC 2307 states: For CHAP [RFC1994], in the first step, the initiator MUST send: CHAP_A= Where A1,A2... are proposed algorithms, in order of preference. ... For the Algorithm, as stated in [RFC1994], one value is required to be implemented: 5 (CHAP with MD5) LIO currently checks for this value by only comparing a single byte in the tokenized Algorithm string, which means that any value starting with a '5' (e.g. "55") is interpreted as "CHAP with MD5". Fix this by comparing the entire tokenized string. Reviewed-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: David Disseldorp Link: https://lore.kernel.org/r/20190912095547.22427-2-ddiss@suse.de Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target_auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index e2fa3a3bc81d..b6bf605fa5c1 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -78,7 +78,7 @@ static int chap_check_algorithm(const char *a_str) if (!token) goto out; - if (!strncmp(token, "5", 1)) { + if (!strcmp(token, "5")) { pr_debug("Selected MD5 Algorithm\n"); kfree(orig); return CHAP_DIGEST_MD5; From 781f73d867961d81dbb2519dfe6f0fbdc7d20b59 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Oct 2019 14:18:20 -0700 Subject: [PATCH 1924/3715] scsi: lpfc: Fix SLI3 hba in loop mode not discovering devices [ Upstream commit feff8b3d84d3d9570f893b4d83e5eab6693d6a52 ] When operating in private loop mode, PLOGI exchanges are racing and the driver tries to abort it's PLOGI. But the PLOGI abort ends up terminating the login with the other end causing the other end to abort its PLOGI as well. Discovery never fully completes. Fix by disabling the PLOGI abort when private loop and letting the state machine play out. Link: https://lore.kernel.org/r/20191018211832.7917-5-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nportdisc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 043bca6449cd..96411754aa43 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -483,8 +483,10 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * single discovery thread, this will cause a huge delay in * discovery. Also this will cause multiple state machines * running in parallel for this node. + * This only applies to a fabric environment. */ - if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) { + if ((ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) && + (vport->fc_flag & FC_FABRIC)) { /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); } From b0183bc00dac582b31260eaeca6819411db979a3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Oct 2019 11:59:13 +0300 Subject: [PATCH 1925/3715] scsi: csiostor: Don't enable IRQs too early [ Upstream commit d6c9b31ac3064fbedf8961f120a4c117daa59932 ] These are called with IRQs disabled from csio_mgmt_tmo_handler() so we can't call spin_unlock_irq() or it will enable IRQs prematurely. Fixes: a3667aaed569 ("[SCSI] csiostor: Chelsio FCoE offload driver") Link: https://lore.kernel.org/r/20191019085913.GA14245@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_lnode.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index be5ee2d37815..957767d38361 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -301,6 +301,7 @@ csio_ln_fdmi_rhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) struct fc_fdmi_port_name *port_name; uint8_t buf[64]; uint8_t *fc4_type; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi rhba cmd\n", @@ -377,13 +378,13 @@ csio_ln_fdmi_rhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) len = (uint32_t)(pld - (uint8_t *)cmd); /* Submit FDMI RPA request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_done, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi rpa req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /* @@ -404,6 +405,7 @@ csio_ln_fdmi_dprt_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) struct fc_fdmi_rpl *reg_pl; struct fs_fdmi_attrs *attrib_blk; uint8_t buf[64]; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi dprt cmd\n", @@ -483,13 +485,13 @@ csio_ln_fdmi_dprt_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) attrib_blk->numattrs = htonl(numattrs); /* Submit FDMI RHBA request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_rhba_cbfn, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi rhba req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /* @@ -504,6 +506,7 @@ csio_ln_fdmi_dhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) void *cmd; struct fc_fdmi_port_name *port_name; uint32_t len; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi dhba cmd\n", @@ -534,13 +537,13 @@ csio_ln_fdmi_dhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) len += sizeof(*port_name); /* Submit FDMI request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_dprt_cbfn, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi dprt req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /** From ee0c6265500faf6937fcd3bfc1f85a439e7a3994 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 13 Oct 2019 21:23:51 +1100 Subject: [PATCH 1926/3715] powerpc/pseries: Mark accumulate_stolen_time() as notrace [ Upstream commit eb8e20f89093b64f48975c74ccb114e6775cee22 ] accumulate_stolen_time() is called prior to interrupt state being reconciled, which can trip the warning in arch_local_irq_restore(): WARNING: CPU: 5 PID: 1017 at arch/powerpc/kernel/irq.c:258 .arch_local_irq_restore+0x9c/0x130 ... NIP .arch_local_irq_restore+0x9c/0x130 LR .rb_start_commit+0x38/0x80 Call Trace: .ring_buffer_lock_reserve+0xe4/0x620 .trace_function+0x44/0x210 .function_trace_call+0x148/0x170 .ftrace_ops_no_ops+0x180/0x1d0 ftrace_call+0x4/0x8 .accumulate_stolen_time+0x1c/0xb0 decrementer_common+0x124/0x160 For now just mark it as notrace. We may change the ordering to call it after interrupt state has been reconciled, but that is a larger change. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024055932.27940-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 14f3f28a089e..66a9987dc0f8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -241,7 +241,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; u8 save_soft_enabled = local_paca->soft_enabled; From 68672ccb81448ceaf91912fc6c69216823d6abb1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Oct 2019 15:05:41 +0530 Subject: [PATCH 1927/3715] powerpc/pseries: Don't fail hash page table insert for bolted mapping [ Upstream commit 75838a3290cd4ebbd1f567f310ba04b6ef017ce4 ] If the hypervisor returned H_PTEG_FULL for H_ENTER hcall, retry a hash page table insert by removing a random entry from the group. After some runtime, it is very well possible to find all the 8 hash page table entry slot in the hpte group used for mapping. Don't fail a bolted entry insert in that case. With Storage class memory a user can find this error easily since a namespace enable/disable is equivalent to memory add/remove. This results in failures as reported below: $ ndctl create-namespace -r region1 -t pmem -m devdax -a 65536 -s 100M libndctl: ndctl_dax_enable: dax1.3: failed to enable Error: namespace1.2: failed to enable failed to create namespace: No such device or address In kernel log we find the details as below: Unable to create mapping for hot added memory 0xc000042006000000..0xc00004200d000000: -1 dax_pmem: probe of dax1.3 failed with error -14 This indicates that we failed to create a bolted hash table entry for direct-map address backing the namespace. We also observe failures such that not all namespaces will be enabled with ndctl enable-namespace all command. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024093542.29777-2-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/hash_utils_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 58c14749bb0c..cf1d76e03635 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -292,7 +292,14 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); - + if (ret == -1) { + /* Try to remove a non bolted entry */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + } if (ret < 0) break; From 186e23c66a17fcb4b900c6496122a2ca67fa2b5d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Oct 2019 11:47:30 +1100 Subject: [PATCH 1928/3715] powerpc/tools: Don't quote $objdump in scripts [ Upstream commit e44ff9ea8f4c8a90c82f7b85bd4f5e497c841960 ] Some of our scripts are passed $objdump and then call it as "$objdump". This doesn't work if it contains spaces because we're using ccache, for example you get errors such as: ./arch/powerpc/tools/relocs_check.sh: line 48: ccache ppc64le-objdump: No such file or directory ./arch/powerpc/tools/unrel_branch_check.sh: line 26: ccache ppc64le-objdump: No such file or directory Fix it by not quoting the string when we expand it, allowing the shell to do the right thing for us. Fixes: a71aa05e1416 ("powerpc: Convert relocs_check to a shell script using grep") Fixes: 4ea80652dc75 ("powerpc/64s: Tool to flag direct branches from unrelocated interrupt vectors") Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191024004730.32135-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/tools/relocs_check.sh | 2 +- arch/powerpc/tools/unrel_branch_check.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index ec2d5c835170..d6c16e7faa38 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -23,7 +23,7 @@ objdump="$1" vmlinux="$2" bad_relocs=$( -"$objdump" -R "$vmlinux" | +$objdump -R "$vmlinux" | # Only look at relocation lines. grep -E '\:' | awk '{print $1}' ) BRANCHES=$( -"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ --stop-address=${end_intr} | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | From 5b81bbad05fd95d3e59ffd993198154dbe7aa1c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Oct 2019 14:56:46 -0700 Subject: [PATCH 1929/3715] dma-debug: add a schedule point in debug_dma_dump_mappings() [ Upstream commit 9ff6aa027dbb98755f0265695354f2dd07c0d1ce ] debug_dma_dump_mappings() can take a lot of cpu cycles : lpk43:/# time wc -l /sys/kernel/debug/dma-api/dump 163435 /sys/kernel/debug/dma-api/dump real 0m0.463s user 0m0.003s sys 0m0.459s Let's add a cond_resched() to avoid holding cpu for too long. Signed-off-by: Eric Dumazet Cc: Corentin Labbe Cc: Christoph Hellwig Cc: Marek Szyprowski Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- lib/dma-debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ea4cc3dde4f1..61e7240947f5 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -437,6 +437,7 @@ void debug_dma_dump_mappings(struct device *dev) } spin_unlock_irqrestore(&bucket->lock, flags); + cond_resched(); } } EXPORT_SYMBOL(debug_dma_dump_mappings); From 6017fdd186e7d11a8990ae3827c69efee4185848 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 16 Oct 2019 20:43:30 +0800 Subject: [PATCH 1930/3715] clocksource/drivers/asm9260: Add a check for of_clk_get [ Upstream commit 6e001f6a4cc73cd06fc7b8c633bc4906c33dd8ad ] asm9260_timer_init misses a check for of_clk_get. Add a check for it and print errors like other clocksource drivers. Signed-off-by: Chuhong Yuan Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191016124330.22211-1-hslester96@gmail.com Signed-off-by: Sasha Levin --- drivers/clocksource/asm9260_timer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c index 38cd2feb87c4..0ce760776406 100644 --- a/drivers/clocksource/asm9260_timer.c +++ b/drivers/clocksource/asm9260_timer.c @@ -198,6 +198,10 @@ static int __init asm9260_timer_init(struct device_node *np) } clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + pr_err("Failed to get clk!\n"); + return PTR_ERR(clk); + } ret = clk_prepare_enable(clk); if (ret) { From a9b68a316bed2ff6c729d423c97bfefa3e046cdf Mon Sep 17 00:00:00 2001 From: Anthony Steinhauser Date: Tue, 29 Oct 2019 12:07:59 -0700 Subject: [PATCH 1931/3715] powerpc/security/book3s64: Report L1TF status in sysfs [ Upstream commit 8e6b6da91ac9b9ec5a925b6cb13f287a54bd547d ] Some PowerPC CPUs are vulnerable to L1TF to the same extent as to Meltdown. It is also mitigated by flushing the L1D on privilege transition. Currently the sysfs gives a false negative on L1TF on CPUs that I verified to be vulnerable, a Power9 Talos II Boston 004e 1202, PowerNV T2P9D01. Signed-off-by: Anthony Steinhauser Signed-off-by: Michael Ellerman [mpe: Just have cpu_show_l1tf() call cpu_show_meltdown() directly] Link: https://lore.kernel.org/r/20191029190759.84821-1-asteinhauser@google.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/security.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index f5d6541bf8c2..fef3f09fc238 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -160,6 +160,11 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) From bcac8a188673a811eeb0dbbc5cce22340e4507ce Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Oct 2019 14:16:56 +0530 Subject: [PATCH 1932/3715] powerpc/book3s64/hash: Add cond_resched to avoid soft lockup warning [ Upstream commit 16f6b67cf03cb43db7104acb2ca877bdc2606c92 ] With large memory (8TB and more) hotplug, we can get soft lockup warnings as below. These were caused by a long loop without any explicit cond_resched which is a problem for !PREEMPT kernels. Avoid this using cond_resched() while inserting hash page table entries. We already do similar cond_resched() in __add_pages(), see commit f64ac5e6e306 ("mm, memory_hotplug: add scheduling point to __add_pages"). rcu: 3-....: (24002 ticks this GP) idle=13e/1/0x4000000000000002 softirq=722/722 fqs=12001 (t=24003 jiffies g=4285 q=2002) NMI backtrace for cpu 3 CPU: 3 PID: 3870 Comm: ndctl Not tainted 5.3.0-197.18-default+ #2 Call Trace: dump_stack+0xb0/0xf4 (unreliable) nmi_cpu_backtrace+0x124/0x130 nmi_trigger_cpumask_backtrace+0x1ac/0x1f0 arch_trigger_cpumask_backtrace+0x28/0x3c rcu_dump_cpu_stacks+0xf8/0x154 rcu_sched_clock_irq+0x878/0xb40 update_process_times+0x48/0x90 tick_sched_handle.isra.16+0x4c/0x80 tick_sched_timer+0x68/0xe0 __hrtimer_run_queues+0x180/0x430 hrtimer_interrupt+0x110/0x300 timer_interrupt+0x108/0x2f0 decrementer_common+0x114/0x120 --- interrupt: 901 at arch_add_memory+0xc0/0x130 LR = arch_add_memory+0x74/0x130 memremap_pages+0x494/0x650 devm_memremap_pages+0x3c/0xa0 pmem_attach_disk+0x188/0x750 nvdimm_bus_probe+0xac/0x2c0 really_probe+0x148/0x570 driver_probe_device+0x19c/0x1d0 device_driver_attach+0xcc/0x100 bind_store+0x134/0x1c0 drv_attr_store+0x44/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1a0/0x270 __vfs_write+0x3c/0x70 vfs_write+0xd0/0x260 ksys_write+0xdc/0x130 system_call+0x5c/0x68 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191001084656.31277-1-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/hash_utils_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index cf1d76e03635..387600ecea60 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -303,6 +303,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) From 47acc10500224436b3ea4c02b13550130ed4af33 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Tue, 5 Nov 2019 22:59:22 +1100 Subject: [PATCH 1933/3715] ext4: update direct I/O read lock pattern for IOCB_NOWAIT [ Upstream commit 548feebec7e93e58b647dba70b3303dcb569c914 ] This patch updates the lock pattern in ext4_direct_IO_read() to not block on inode lock in cases of IOCB_NOWAIT direct I/O reads. The locking condition implemented here is similar to that of 942491c9e6d6 ("xfs: fix AIM7 regression"). Fixes: 16c54688592c ("ext4: Allow parallel DIO reads") Signed-off-by: Matthew Bobrowski Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/c5d5e759f91747359fbd2c6f9a36240cf75ad79f.1572949325.git.mbobrowski@mbobrowski.org Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2920cbfa3bf..a91b8404d3dc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3796,7 +3796,13 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) * writes & truncates and since we take care of writing back page cache, * we are protected against page writeback as well. */ - inode_lock_shared(inode); + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, iocb->ki_pos + count - 1); if (ret) From 18250c784845c377699fea22492e2bd19fc4b7eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:19 +0100 Subject: [PATCH 1934/3715] jbd2: Fix statistics for the number of logged blocks [ Upstream commit 015c6033068208d6227612c878877919f3fcf6b6 ] jbd2 statistics counting number of blocks logged in a transaction was wrong. It didn't count the commit block and more importantly it didn't count revoke descriptor blocks. Make sure these get properly counted. Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-13-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0567b17a970c..7dd613392592 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -726,7 +726,6 @@ start_journal_io: submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); } cond_resched(); - stats.run.rs_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -813,6 +812,7 @@ start_journal_io: if (unlikely(!buffer_uptodate(bh))) err = -EIO; jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; /* * The list contains temporary buffer heads created by @@ -858,6 +858,7 @@ start_journal_io: BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -879,6 +880,7 @@ start_journal_io: } if (cbh) err = journal_wait_on_commit_record(journal, cbh); + stats.run.rs_blocks_logged++; if (jbd2_has_feature_async_commit(journal) && journal->j_flags & JBD2_BARRIER) { blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); From 8bded9c5d3285ce32fe700222d22c099a51526e7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 Nov 2019 13:55:53 -0800 Subject: [PATCH 1935/3715] scsi: tracing: Fix handling of TRANSFER LENGTH == 0 for READ(6) and WRITE(6) [ Upstream commit f6b8540f40201bff91062dd64db8e29e4ddaaa9d ] According to SBC-2 a TRANSFER LENGTH field of zero means that 256 logical blocks must be transferred. Make the SCSI tracing code follow SBC-2. Fixes: bf8162354233 ("[SCSI] add scsi trace core functions and put trace points") Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Douglas Gilbert Link: https://lore.kernel.org/r/20191105215553.185018-1-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_trace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c index 0ff083bbf5b1..617a60737590 100644 --- a/drivers/scsi/scsi_trace.c +++ b/drivers/scsi/scsi_trace.c @@ -30,15 +30,18 @@ static const char * scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba = 0, txlen; lba |= ((cdb[1] & 0x1F) << 16); lba |= (cdb[2] << 8); lba |= cdb[3]; - txlen = cdb[4]; + /* + * From SBC-2: a TRANSFER LENGTH field set to zero specifies that 256 + * logical blocks shall be read (READ(6)) or written (WRITE(6)). + */ + txlen = cdb[4] ? cdb[4] : 256; - trace_seq_printf(p, "lba=%llu txlen=%llu", - (unsigned long long)lba, (unsigned long long)txlen); + trace_seq_printf(p, "lba=%u txlen=%u", lba, txlen); trace_seq_putc(p, 0); return ret; From 58a80d83db75e0e940c3c97b1f295af212b4b10c Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 4 Nov 2019 16:56:58 -0800 Subject: [PATCH 1936/3715] scsi: lpfc: Fix duplicate unreg_rpi error in port offline flow [ Upstream commit 7cfd5639d99bec0d27af089d0c8c114330e43a72 ] If the driver receives a login that is later then LOGO'd by the remote port (aka ndlp), the driver, upon the completion of the LOGO ACC transmission, will logout the node and unregister the rpi that is being used for the node. As part of the unreg, the node's rpi value is replaced by the LPFC_RPI_ALLOC_ERROR value. If the port is subsequently offlined, the offline walks the nodes and ensures they are logged out, which possibly entails unreg'ing their rpi values. This path does not validate the node's rpi value, thus doesn't detect that it has been unreg'd already. The replaced rpi value is then used when accessing the rpi bitmask array which tracks active rpi values. As the LPFC_RPI_ALLOC_ERROR value is not a valid index for the bitmask, it may fault the system. Revise the rpi release code to detect when the rpi value is the replaced RPI_ALLOC_ERROR value and ignore further release steps. Link: https://lore.kernel.org/r/20191105005708.7399-2-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 7920b8c72caf..d8e0ba68879c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -17492,6 +17492,13 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) static void __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi) { + /* + * if the rpi value indicates a prior unreg has already + * been done, skip the unreg. + */ + if (rpi == LPFC_RPI_ALLOC_ERROR) + return; + if (test_and_clear_bit(rpi, phba->sli4_hba.rpi_bmask)) { phba->sli4_hba.rpi_count--; phba->sli4_hba.max_cfg_param.rpi_used--; From 4ff52ab7c72cacbcc4dcebd0cd04d293c709b729 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 14:12:05 +0800 Subject: [PATCH 1937/3715] f2fs: fix to update dir's i_pino during cross_rename [ Upstream commit 2a60637f06ac94869b2e630eaf837110d39bf291 ] As Eric reported: RENAME_EXCHANGE support was just added to fsstress in xfstests: commit 65dfd40a97b6bbbd2a22538977bab355c5bc0f06 Author: kaixuxia Date: Thu Oct 31 14:41:48 2019 +0800 fsstress: add EXCHANGE renameat2 support This is causing xfstest generic/579 to fail due to fsck.f2fs reporting errors. I'm not sure what the problem is, but it still happens even with all the fs-verity stuff in the test commented out, so that the test just runs fsstress. generic/579 23s ... [10:02:25] [ 7.745370] run fstests generic/579 at 2019-11-04 10:02:25 _check_generic_filesystem: filesystem on /dev/vdc is inconsistent (see /results/f2fs/results-default/generic/579.full for details) [10:02:47] Ran: generic/579 Failures: generic/579 Failed 1 of 1 tests Xunit report: /results/f2fs/results-default/result.xml Here's the contents of 579.full: _check_generic_filesystem: filesystem on /dev/vdc is inconsistent *** fsck.f2fs output *** [ASSERT] (__chk_dots_dentries:1378) --> Bad inode number[0x24] for '..', parent parent ino is [0xd10] The root cause is that we forgot to update directory's i_pino during cross_rename, fix it. Fixes: 32f9bc25cbda0 ("f2fs: support ->rename2()") Signed-off-by: Chao Yu Tested-by: Eric Biggers Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/namei.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b80e7db3b55b..b13383948fca 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -862,7 +862,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else - F2FS_I(old_inode)->i_pino = new_dir->i_ino; + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -1027,7 +1028,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_dir, old_entry, old_page, new_inode); down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry) + file_lost_pino(old_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); @@ -1042,7 +1047,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(new_dir, new_entry, new_page, old_inode); down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); + if (!new_dir_entry) + file_lost_pino(new_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(new_inode, old_dir->i_ino); up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); From 1fecec01675780f80f5fe3496cd9ab956caf51ec Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Thu, 31 Oct 2019 11:57:15 -0700 Subject: [PATCH 1938/3715] clk: qcom: Allow constant ratio freq tables for rcg [ Upstream commit efd164b5520afd6fb2883b68e0d408a7de29c491 ] Some RCGs (the gfx_3d_src_clk in msm8998 for example) are basically just some constant ratio from the input across the entire frequency range. It would be great if we could specify the frequency table as a single entry constant ratio instead of a long list, ie: { .src = P_GPUPLL0_OUT_EVEN, .pre_div = 3 }, { } So, lets support that. We need to fix a corner case in qcom_find_freq() where if the freq table is non-null, but has no frequencies, we end up returning an "entry" before the table array, which is bad. Then, we need ignore the freq from the table, and instead base everything on the requested freq. Suggested-by: Stephen Boyd Signed-off-by: Jeffrey Hugo Link: https://lkml.kernel.org/r/20191031185715.15504-1-jeffrey.l.hugo@gmail.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/clk-rcg2.c | 2 ++ drivers/clk/qcom/common.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 1a0985ae20d2..a93439242565 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -212,6 +212,8 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, p = clk_hw_get_parent_by_index(hw, index); if (clk_flags & CLK_SET_RATE_PARENT) { if (f->pre_div) { + if (!rate) + rate = req->rate; rate /= 2; rate *= f->pre_div + 1; } diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index 28ceaf1e9937..ae9352f7706d 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -37,6 +37,9 @@ struct freq_tbl *qcom_find_freq(const struct freq_tbl *f, unsigned long rate) if (!f) return NULL; + if (!f->freq) + return f; + for (; f->freq; f++) if (rate <= f->freq) return f; From 40d9eed1f44a53d86d4811c9ceacd898e9e8838f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 24 Oct 2019 13:14:13 -0700 Subject: [PATCH 1939/3715] irqchip/irq-bcm7038-l1: Enable parent IRQ if necessary [ Upstream commit 27eebb60357ed5aa6659442f92907c0f7368d6ae ] If the 'brcm,irq-can-wake' property is specified, make sure we also enable the corresponding parent interrupt we are attached to. Signed-off-by: Florian Fainelli Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191024201415.23454-4-f.fainelli@gmail.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-bcm7038-l1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 0b9a8b709abf..b32988cac80c 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -284,6 +284,10 @@ static int __init bcm7038_l1_init_one(struct device_node *dn, pr_err("failed to map parent interrupt %d\n", parent_irq); return -EINVAL; } + + if (of_property_read_bool(dn, "brcm,irq-can-wake")) + enable_irq_wake(parent_irq); + irq_set_chained_handler_and_data(parent_irq, bcm7038_l1_irq_handle, intc); From 54c4eeaacbaf491e4e196d9307748c1eac40dccd Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 2 Oct 2019 19:25:22 +0800 Subject: [PATCH 1940/3715] irqchip: ingenic: Error out if IRQ domain creation failed [ Upstream commit 52ecc87642f273a599c9913b29fd179c13de457b ] If we cannot create the IRQ domain, the driver should fail to probe instead of succeeding with just a warning message. Signed-off-by: Paul Cercueil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1570015525-27018-3-git-send-email-zhouyanjie@zoho.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-ingenic.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c index fc5953dea509..b2e16dca76a6 100644 --- a/drivers/irqchip/irq-ingenic.c +++ b/drivers/irqchip/irq-ingenic.c @@ -117,6 +117,14 @@ static int __init ingenic_intc_of_init(struct device_node *node, goto out_unmap_irq; } + domain = irq_domain_add_legacy(node, num_chips * 32, + JZ4740_IRQ_BASE, 0, + &irq_domain_simple_ops, NULL); + if (!domain) { + err = -ENOMEM; + goto out_unmap_base; + } + for (i = 0; i < num_chips; i++) { /* Mask all irqs */ writel(0xffffffff, intc->base + (i * CHIP_SIZE) + @@ -143,14 +151,11 @@ static int __init ingenic_intc_of_init(struct device_node *node, IRQ_NOPROBE | IRQ_LEVEL); } - domain = irq_domain_add_legacy(node, num_chips * 32, JZ4740_IRQ_BASE, 0, - &irq_domain_simple_ops, NULL); - if (!domain) - pr_warn("unable to register IRQ domain\n"); - setup_irq(parent_irq, &intc_cascade_action); return 0; +out_unmap_base: + iounmap(intc->base); out_unmap_irq: irq_dispose_mapping(parent_irq); out_free: From c90b82fe7f9ac6149bf5d6e454b16af81d7e4034 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 10 Nov 2019 12:49:06 +0300 Subject: [PATCH 1941/3715] fs/quota: handle overflows of sysctl fs.quota.* and report as unsigned long [ Upstream commit 6fcbcec9cfc7b3c6a2c1f1a23ebacedff7073e0a ] Quota statistics counted as 64-bit per-cpu counter. Reading sums per-cpu fractions as signed 64-bit int, filters negative values and then reports lower half as signed 32-bit int. Result may looks like: fs.quota.allocated_dquots = 22327 fs.quota.cache_hits = -489852115 fs.quota.drops = -487288718 fs.quota.free_dquots = 22083 fs.quota.lookups = -486883485 fs.quota.reads = 22327 fs.quota.syncs = 335064 fs.quota.writes = 3088689 Values bigger than 2^31-1 reported as negative. All counters except "allocated_dquots" and "free_dquots" are monotonic, thus they should be reported as is without filtering negative values. Kernel doesn't have generic helper for 64-bit sysctl yet, let's use at least unsigned long. Link: https://lore.kernel.org/r/157337934693.2078.9842146413181153727.stgit@buzz Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/quota/dquot.c | 29 +++++++++++++++++------------ include/linux/quota.h | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3254c90fd899..3fdbdd29702b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2849,68 +2849,73 @@ EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int type = (int *)table->data - dqstats.stat; + unsigned int type = (unsigned long *)table->data - dqstats.stat; + s64 value = percpu_counter_sum(&dqstats.counter[type]); + + /* Filter negative values for non-monotonic counters */ + if (value < 0 && (type == DQST_ALLOC_DQUOTS || + type == DQST_FREE_DQUOTS)) + value = 0; /* Update global table */ - dqstats.stat[type] = - percpu_counter_sum_positive(&dqstats.counter[type]); - return proc_dointvec(table, write, buffer, lenp, ppos); + dqstats.stat[type] = value; + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "drops", .data = &dqstats.stat[DQST_DROPS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "reads", .data = &dqstats.stat[DQST_READS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "writes", .data = &dqstats.stat[DQST_WRITES], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", .data = &dqstats.stat[DQST_CACHE_HITS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", .data = &dqstats.stat[DQST_ALLOC_DQUOTS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", .data = &dqstats.stat[DQST_FREE_DQUOTS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "syncs", .data = &dqstats.stat[DQST_SYNCS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, diff --git a/include/linux/quota.h b/include/linux/quota.h index 5ac9de4fcd6f..aa9a42eceab0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -263,7 +263,7 @@ enum { }; struct dqstats { - int stat[_DQST_DQSTAT_LAST]; + unsigned long stat[_DQST_DQSTAT_LAST]; struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; From 72b0b49aba0e465b7e673a302d3cb5267f14dedc Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 11 Nov 2019 15:03:57 -0800 Subject: [PATCH 1942/3715] scsi: lpfc: fix: Coverity: lpfc_cmpl_els_rsp(): Null pointer dereferences [ Upstream commit 6c6d59e0fe5b86cf273d6d744a6a9768c4ecc756 ] Coverity reported the following: *** CID 101747: Null pointer dereferences (FORWARD_NULL) /drivers/scsi/lpfc/lpfc_els.c: 4439 in lpfc_cmpl_els_rsp() 4433 kfree(mp); 4434 } 4435 mempool_free(mbox, phba->mbox_mem_pool); 4436 } 4437 out: 4438 if (ndlp && NLP_CHK_NODE_ACT(ndlp)) { vvv CID 101747: Null pointer dereferences (FORWARD_NULL) vvv Dereferencing null pointer "shost". 4439 spin_lock_irq(shost->host_lock); 4440 ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); 4441 spin_unlock_irq(shost->host_lock); 4442 4443 /* If the node is not being used by another discovery thread, 4444 * and we are sending a reject, we are done with it. Fix by adding a check for non-null shost in line 4438. The scenario when shost is set to null is when ndlp is null. As such, the ndlp check present was sufficient. But better safe than sorry so add the shost check. Reported-by: coverity-bot Addresses-Coverity-ID: 101747 ("Null pointer dereferences") Fixes: 2e0fef85e098 ("[SCSI] lpfc: NPIV: split ports") CC: James Bottomley CC: "Gustavo A. R. Silva" CC: linux-next@vger.kernel.org Link: https://lore.kernel.org/r/20191111230401.12958-3-jsmart2021@gmail.com Reviewed-by: Ewan D. Milne Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index c851fd14ff3e..4c84c2ae1112 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4102,7 +4102,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, mempool_free(mbox, phba->mbox_mem_pool); } out: - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) { + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && shost) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); spin_unlock_irq(shost->host_lock); From 1d75a2d82dbe59e8f17f68b8eb3d697232f8b0d6 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Tue, 12 Nov 2019 23:34:36 +0100 Subject: [PATCH 1943/3715] scsi: ufs: fix potential bug which ends in system hang [ Upstream commit cfcbae3895b86c390ede57b2a8f601dd5972b47b ] In function __ufshcd_query_descriptor(), in the event of an error happening, we directly goto out_unlock and forget to invaliate hba->dev_cmd.query.descriptor pointer. This results in this pointer still valid in ufshcd_copy_query_response() for other query requests which go through ufshcd_exec_raw_upiu_cmd(). This will cause __memcpy() crash and system hangs. Log as shown below: Unable to handle kernel paging request at virtual address ffff000012233c40 Mem abort info: ESR = 0x96000047 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000047 CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp = 0000000028cc735c [ffff000012233c40] pgd=00000000bffff003, pud=00000000bfffe003, pmd=00000000ba8b8003, pte=0000000000000000 Internal error: Oops: 96000047 [#2] PREEMPT SMP ... Call trace: __memcpy+0x74/0x180 ufshcd_issue_devman_upiu_cmd+0x250/0x3c0 ufshcd_exec_raw_upiu_cmd+0xfc/0x1a8 ufs_bsg_request+0x178/0x3b0 bsg_queue_rq+0xc0/0x118 blk_mq_dispatch_rq_list+0xb0/0x538 blk_mq_sched_dispatch_requests+0x18c/0x1d8 __blk_mq_run_hw_queue+0xb4/0x118 blk_mq_run_work_fn+0x28/0x38 process_one_work+0x1ec/0x470 worker_thread+0x48/0x458 kthread+0x130/0x138 ret_from_fork+0x10/0x1c Code: 540000ab a8c12027 a88120c7 a8c12027 (a88120c7) ---[ end trace 793e1eb5dff69f2d ]--- note: kworker/0:2H[2054] exited with preempt_count 1 This patch is to move "descriptor = NULL" down to below the label "out_unlock". Fixes: d44a5f98bb49b2(ufs: query descriptor API) Link: https://lore.kernel.org/r/20191112223436.27449-3-huobean@gmail.com Reviewed-by: Alim Akhtar Reviewed-by: Bart Van Assche Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 07cae5ea608c..9feae23bfd09 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2867,10 +2867,10 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, goto out_unlock; } - hba->dev_cmd.query.descriptor = NULL; *buf_len = be16_to_cpu(response->upiu_res.length); out_unlock: + hba->dev_cmd.query.descriptor = NULL; mutex_unlock(&hba->dev_cmd.lock); out: ufshcd_release(hba); From e2b1506f69cb1600aa8338643b6f888ba1a3e9b3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 31 Oct 2019 15:29:22 +0100 Subject: [PATCH 1944/3715] powerpc/pseries/cmm: Implement release() function for sysfs device [ Upstream commit 7d8212747435c534c8d564fbef4541a463c976ff ] When unloading the module, one gets ------------[ cut here ]------------ Device 'cmm0' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt. WARNING: CPU: 0 PID: 19308 at drivers/base/core.c:1244 .device_release+0xcc/0xf0 ... We only have one static fake device. There is nothing to do when releasing the device (via cmm_exit()). Signed-off-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191031142933.10779-2-david@redhat.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/cmm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 4ac419c7eb4c..25224c9e1dc0 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -425,6 +425,10 @@ static struct bus_type cmm_subsys = { .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -440,6 +444,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; From d17d0199fb0730bba30859c11d1f894b3fab94ba Mon Sep 17 00:00:00 2001 From: "Gustavo L. F. Walbon" Date: Thu, 2 May 2019 18:09:07 -0300 Subject: [PATCH 1945/3715] powerpc/security: Fix wrong message when RFI Flush is disable [ Upstream commit 4e706af3cd8e1d0503c25332b30cad33c97ed442 ] The issue was showing "Mitigation" message via sysfs whatever the state of "RFI Flush", but it should show "Vulnerable" when it is disabled. If you have "L1D private" feature enabled and not "RFI Flush" you are vulnerable to meltdown attacks. "RFI Flush" is the key feature to mitigate the meltdown whatever the "L1D private" state. SEC_FTR_L1D_THREAD_PRIV is a feature for Power9 only. So the message should be as the truth table shows: CPU | L1D private | RFI Flush | sysfs ----|-------------|-----------|------------------------------------- P9 | False | False | Vulnerable P9 | False | True | Mitigation: RFI Flush P9 | True | False | Vulnerable: L1D private per thread P9 | True | True | Mitigation: RFI Flush, L1D private per thread P8 | False | False | Vulnerable P8 | False | True | Mitigation: RFI Flush Output before this fix: # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: RFI Flush, L1D private per thread # echo 0 > /sys/kernel/debug/powerpc/rfi_flush # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: L1D private per thread Output after fix: # cat /sys/devices/system/cpu/vulnerabilities/meltdown Mitigation: RFI Flush, L1D private per thread # echo 0 > /sys/kernel/debug/powerpc/rfi_flush # cat /sys/devices/system/cpu/vulnerabilities/meltdown Vulnerable: L1D private per thread Signed-off-by: Gustavo L. F. Walbon Signed-off-by: Mauro S. M. Rodrigues Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190502210907.42375-1-gwalbon@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/security.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index fef3f09fc238..b3f540c9f410 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -134,26 +134,22 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); From 43b8d08c1b09b4e0810247bf6cf0da0034c0b8e8 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 2 Nov 2019 12:06:54 +1100 Subject: [PATCH 1946/3715] scsi: atari_scsi: sun3_scsi: Set sg_tablesize to 1 instead of SG_NONE [ Upstream commit 79172ab20bfd8437b277254028efdb68484e2c21 ] Since the scsi subsystem adopted the blk-mq API, a host with zero sg_tablesize crashes with a NULL pointer dereference. blk_queue_max_segments: set to minimum 1 scsi 0:0:0:0: Direct-Access QEMU QEMU HARDDISK 2.5+ PQ: 0 ANSI: 5 scsi target0:0:0: Beginning Domain Validation scsi target0:0:0: Domain Validation skipping write tests scsi target0:0:0: Ending Domain Validation blk_queue_max_segments: set to minimum 1 scsi 0:0:1:0: Direct-Access QEMU QEMU HARDDISK 2.5+ PQ: 0 ANSI: 5 scsi target0:0:1: Beginning Domain Validation scsi target0:0:1: Domain Validation skipping write tests scsi target0:0:1: Ending Domain Validation blk_queue_max_segments: set to minimum 1 scsi 0:0:2:0: CD-ROM QEMU QEMU CD-ROM 2.5+ PQ: 0 ANSI: 5 scsi target0:0:2: Beginning Domain Validation scsi target0:0:2: Domain Validation skipping write tests scsi target0:0:2: Ending Domain Validation blk_queue_max_segments: set to minimum 1 blk_queue_max_segments: set to minimum 1 blk_queue_max_segments: set to minimum 1 blk_queue_max_segments: set to minimum 1 sr 0:0:2:0: Power-on or device reset occurred sd 0:0:0:0: Power-on or device reset occurred sd 0:0:1:0: Power-on or device reset occurred sd 0:0:0:0: [sda] 10485762 512-byte logical blocks: (5.37 GB/5.00 GiB) sd 0:0:0:0: [sda] Write Protect is off sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA Unable to handle kernel NULL pointer dereference at virtual address (ptrval) Oops: 00000000 Modules linked in: PC: [<001cd874>] blk_mq_free_request+0x66/0xe2 SR: 2004 SP: (ptrval) a2: 00874520 d0: 00000000 d1: 00000000 d2: 009ba800 d3: 00000000 d4: 00000000 d5: 08000002 a0: 0087be68 a1: 009a81e0 Process kworker/u2:2 (pid: 15, task=(ptrval)) Frame format=7 eff addr=0000007a ssw=0505 faddr=0000007a wb 1 stat/addr/data: 0000 00000000 00000000 wb 2 stat/addr/data: 0000 00000000 00000000 wb 3 stat/addr/data: 0000 0000007a 00000000 push data: 00000000 00000000 00000000 00000000 Stack from 0087bd98: 00000002 00000000 0087be72 009a7820 0087bdb4 001c4f6c 009a7820 0087bdd4 0024d200 009a7820 0024d0dc 0087be72 009baa00 0087be68 009a5000 0087be7c 00265d10 009a5000 0087be72 00000003 00000000 00000000 00000000 0087be68 00000bb8 00000005 00000000 00000000 00000000 00000000 00265c56 00000000 009ba60c 0036ddf4 00000002 ffffffff 009baa00 009ba600 009a50d6 0087be74 00227ba0 009baa08 00000001 009baa08 009ba60c 0036ddf4 00000000 00000000 Call Trace: [<001c4f6c>] blk_put_request+0xe/0x14 [<0024d200>] __scsi_execute+0x124/0x174 [<0024d0dc>] __scsi_execute+0x0/0x174 [<00265d10>] sd_revalidate_disk+0xba/0x1f02 [<00265c56>] sd_revalidate_disk+0x0/0x1f02 [<0036ddf4>] strlen+0x0/0x22 [<00227ba0>] device_add+0x3da/0x604 [<0036ddf4>] strlen+0x0/0x22 [<00267e64>] sd_probe+0x30c/0x4b4 [<0002da44>] process_one_work+0x0/0x402 [<0022b978>] really_probe+0x226/0x354 [<0022bc34>] driver_probe_device+0xa4/0xf0 [<0002da44>] process_one_work+0x0/0x402 [<0022bcd0>] __driver_attach_async_helper+0x50/0x70 [<00035dae>] async_run_entry_fn+0x36/0x130 [<0002db88>] process_one_work+0x144/0x402 [<0002e1aa>] worker_thread+0x0/0x570 [<0002e29a>] worker_thread+0xf0/0x570 [<0002e1aa>] worker_thread+0x0/0x570 [<003768d8>] schedule+0x0/0xb8 [<0003f58c>] __init_waitqueue_head+0x0/0x12 [<00033e92>] kthread+0xc2/0xf6 [<000331e8>] kthread_parkme+0x0/0x4e [<003768d8>] schedule+0x0/0xb8 [<00033dd0>] kthread+0x0/0xf6 [<00002c10>] ret_from_kernel_thread+0xc/0x14 Code: 0280 0006 0800 56c0 4400 0280 0000 00ff <52b4> 0c3a 082b 0006 0013 6706 2042 53a8 00c4 4ab9 0047 3374 6640 202d 000c 670c Disabling lock debugging due to kernel taint Avoid this by setting sg_tablesize = 1. Link: https://lore.kernel.org/r/4567bcae94523b47d6f3b77450ba305823bca479.1572656814.git.fthain@telegraphics.com.au Reported-and-tested-by: Michael Schmitz Reviewed-by: Michael Schmitz References: commit 68ab2d76e4be ("scsi: cxlflash: Set sg_tablesize to 1 instead of SG_NONE") Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/atari_scsi.c | 6 +++--- drivers/scsi/mac_scsi.c | 2 +- drivers/scsi/sun3_scsi.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 89f5154c40b6..764c46d7333e 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -742,7 +742,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev) atari_scsi_template.sg_tablesize = SG_ALL; } else { atari_scsi_template.can_queue = 1; - atari_scsi_template.sg_tablesize = SG_NONE; + atari_scsi_template.sg_tablesize = 1; } if (setup_can_queue > 0) @@ -751,8 +751,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev) if (setup_cmd_per_lun > 0) atari_scsi_template.cmd_per_lun = setup_cmd_per_lun; - /* Leave sg_tablesize at 0 on a Falcon! */ - if (ATARIHW_PRESENT(TT_SCSI) && setup_sg_tablesize >= 0) + /* Don't increase sg_tablesize on Falcon! */ + if (ATARIHW_PRESENT(TT_SCSI) && setup_sg_tablesize > 0) atari_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) { diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index 643321fc152d..b5050c2ede00 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -429,7 +429,7 @@ static int __init mac_scsi_probe(struct platform_device *pdev) mac_scsi_template.can_queue = setup_can_queue; if (setup_cmd_per_lun > 0) mac_scsi_template.cmd_per_lun = setup_cmd_per_lun; - if (setup_sg_tablesize >= 0) + if (setup_sg_tablesize > 0) mac_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) mac_scsi_template.this_id = setup_hostid & 7; diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 9492638296c8..af8a7ef9c858 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -498,7 +498,7 @@ static struct scsi_host_template sun3_scsi_template = { .eh_host_reset_handler = sun3scsi_host_reset, .can_queue = 16, .this_id = 7, - .sg_tablesize = SG_NONE, + .sg_tablesize = 1, .cmd_per_lun = 2, .use_clustering = DISABLE_CLUSTERING, .cmd_size = NCR5380_CMD_SIZE, @@ -520,7 +520,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev) sun3_scsi_template.can_queue = setup_can_queue; if (setup_cmd_per_lun > 0) sun3_scsi_template.cmd_per_lun = setup_cmd_per_lun; - if (setup_sg_tablesize >= 0) + if (setup_sg_tablesize > 0) sun3_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) sun3_scsi_template.this_id = setup_hostid & 7; From 0884d8df84d51592016e36d5e351a9ee5666b80f Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 26 Oct 2019 21:44:20 +0200 Subject: [PATCH 1947/3715] clk: pxa: fix one of the pxa RTC clocks [ Upstream commit 46acbcb4849b2ca2e6e975e7c8130c1d61c8fd0c ] The pxa27x platforms have a single IP with 2 drivers, sa1100-rtc and rtc-pxa drivers. A previous patch fixed the sa1100-rtc case, but the pxa-rtc wasn't fixed. This patch completes the previous one. Fixes: 8b6d10345e16 ("clk: pxa: add missing pxa27x clocks for Irda and sa1100-rtc") Signed-off-by: Robert Jarzmik Link: https://lkml.kernel.org/r/20191026194420.11918-1-robert.jarzmik@free.fr Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/pxa/clk-pxa27x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c index 25a30194d27a..b67ea86ff156 100644 --- a/drivers/clk/pxa/clk-pxa27x.c +++ b/drivers/clk/pxa/clk-pxa27x.c @@ -462,6 +462,7 @@ struct dummy_clk { }; static struct dummy_clk dummy_clks[] __initdata = { DUMMY_CLK(NULL, "pxa27x-gpio", "osc_32_768khz"), + DUMMY_CLK(NULL, "pxa-rtc", "osc_32_768khz"), DUMMY_CLK(NULL, "sa1100-rtc", "osc_32_768khz"), DUMMY_CLK("UARTCLK", "pxa2xx-ir", "STUART"), }; From 90cfabd5f3945bf1df327e78940e364af7bb6f82 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 13 Nov 2019 16:03:24 +0800 Subject: [PATCH 1948/3715] bcache: at least try to shrink 1 node in bch_mca_scan() [ Upstream commit 9fcc34b1a6dd4b8e5337e2b6ef45e428897eca6b ] In bch_mca_scan(), the number of shrinking btree node is calculated by code like this, unsigned long nr = sc->nr_to_scan; nr /= c->btree_pages; nr = min_t(unsigned long, nr, mca_can_free(c)); variable sc->nr_to_scan is number of objects (here is bcache B+tree nodes' number) to shrink, and pointer variable sc is sent from memory management code as parametr of a callback. If sc->nr_to_scan is smaller than c->btree_pages, after the above calculation, variable 'nr' will be 0 and nothing will be shrunk. It is frequeently observed that only 1 or 2 is set to sc->nr_to_scan and make nr to be zero. Then bch_mca_scan() will do nothing more then acquiring and releasing mutex c->bucket_lock. This patch checkes whether nr is 0 after the above calculation, if 0 is the result then set 1 to variable 'n'. Then at least bch_mca_scan() will try to shrink a single B+tree node. Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/btree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9406326216f1..96a6583e7b52 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -685,6 +685,8 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, * IO can always make forward progress: */ nr /= c->btree_pages; + if (nr == 0) + nr = 1; nr = min_t(unsigned long, nr, mca_can_free(c)); i = 0; From f7aff3b9282651be45e4b8cceb1386b3cc89ffb1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 14 Nov 2019 15:30:46 +0100 Subject: [PATCH 1949/3715] HID: logitech-hidpp: Silence intermittent get_battery_capacity errors [ Upstream commit 61005d65b6c7dcf61c19516e6ebe5acc02d2cdda ] My Logitech M185 (PID:4038) 2.4 GHz wireless HID++ mouse is causing intermittent errors like these in the log: [11091.034857] logitech-hidpp-device 0003:046D:4038.0006: hidpp20_batterylevel_get_battery_capacity: received protocol error 0x09 [12388.031260] logitech-hidpp-device 0003:046D:4038.0006: hidpp20_batterylevel_get_battery_capacity: received protocol error 0x09 [16613.718543] logitech-hidpp-device 0003:046D:4038.0006: hidpp20_batterylevel_get_battery_capacity: received protocol error 0x09 [23529.938728] logitech-hidpp-device 0003:046D:4038.0006: hidpp20_batterylevel_get_battery_capacity: received protocol error 0x09 We are already silencing error-code 0x09 (HIDPP_ERROR_RESOURCE_ERROR) errors in other places, lets do the same in hidpp20_batterylevel_get_battery_capacity to remove these harmless, but scary looking errors from the dmesg output. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-logitech-hidpp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 4706fb852eaf..6ad776b4711b 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -978,6 +978,9 @@ static int hidpp20_batterylevel_get_battery_capacity(struct hidpp_device *hidpp, ret = hidpp_send_fap_command_sync(hidpp, feature_index, CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_LEVEL_STATUS, NULL, 0, &response); + /* Ignore these intermittent errors */ + if (ret == HIDPP_ERROR_RESOURCE_ERROR) + return -EIO; if (ret > 0) { hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n", __func__, ret); From 7ceb69670c2a885bc7a2f52f19bc5a87b4522538 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 31 Oct 2019 10:05:19 -0400 Subject: [PATCH 1950/3715] libnvdimm/btt: fix variable 'rc' set but not used [ Upstream commit 4e24e37d5313edca8b4ab86f240c046c731e28d6 ] drivers/nvdimm/btt.c: In function 'btt_read_pg': drivers/nvdimm/btt.c:1264:8: warning: variable 'rc' set but not used [-Wunused-but-set-variable] int rc; ^~ Add a ratelimited message in case a storm of errors is encountered. Fixes: d9b83c756953 ("libnvdimm, btt: rework error clearing") Signed-off-by: Qian Cai Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/1572530719-32161-1-git-send-email-cai@lca.pw Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/btt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index b2feda35966b..471498469d0a 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1259,11 +1259,11 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, ret = btt_data_read(arena, page, off, postmap, cur_len); if (ret) { - int rc; - /* Media error - set the e_flag */ - rc = btt_map_write(arena, premap, postmap, 0, 1, - NVDIMM_IO_ATOMIC); + if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC)) + dev_warn_ratelimited(to_dev(arena), + "Error persistently tracking bad blocks at %#x\n", + premap); goto out_rtt; } From 356218185edbec9e6d7c1a9c00929be53bc8775e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Wed, 6 Nov 2019 20:02:46 +0900 Subject: [PATCH 1951/3715] HID: Improve Windows Precision Touchpad detection. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2dbc6f113acd74c66b04bf49fb027efd830b1c5a ] Per Microsoft spec, usage 0xC5 (page 0xFF) returns a blob containing data used to verify the touchpad as a Windows Precision Touchpad. 0x85, REPORTID_PTPHQA, // REPORT_ID (PTPHQA) 0x09, 0xC5, // USAGE (Vendor Usage 0xC5) 0x15, 0x00, // LOGICAL_MINIMUM (0) 0x26, 0xff, 0x00, // LOGICAL_MAXIMUM (0xff) 0x75, 0x08, // REPORT_SIZE (8) 0x96, 0x00, 0x01, // REPORT_COUNT (0x100 (256)) 0xb1, 0x02, // FEATURE (Data,Var,Abs) However, some devices, namely Microsoft's Surface line of products instead implement a "segmented device certification report" (usage 0xC6) which returns the same report, but in smaller chunks. 0x06, 0x00, 0xff, // USAGE_PAGE (Vendor Defined) 0x85, REPORTID_PTPHQA, // REPORT_ID (PTPHQA) 0x09, 0xC6, // USAGE (Vendor usage for segment #) 0x25, 0x08, // LOGICAL_MAXIMUM (8) 0x75, 0x08, // REPORT_SIZE (8) 0x95, 0x01, // REPORT_COUNT (1) 0xb1, 0x02, // FEATURE (Data,Var,Abs) 0x09, 0xC7, // USAGE (Vendor Usage) 0x26, 0xff, 0x00, // LOGICAL_MAXIMUM (0xff) 0x95, 0x20, // REPORT_COUNT (32) 0xb1, 0x02, // FEATURE (Data,Var,Abs) By expanding Win8 touchpad detection to also look for the segmented report, all Surface touchpads are now properly recognized by hid-multitouch. Signed-off-by: Blaž Hrastnik Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/hid-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0c547bf841f4..6a04b56d161b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -760,6 +760,10 @@ static void hid_scan_feature_usage(struct hid_parser *parser, u32 usage) if (usage == 0xff0000c5 && parser->global.report_count == 256 && parser->global.report_size == 8) parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; + + if (usage == 0xff0000c6 && parser->global.report_count == 1 && + parser->global.report_size == 8) + parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; } static void hid_scan_collection(struct hid_parser *parser, unsigned type) From 369d36324d33085feba90a4236a82b2c34a0f049 Mon Sep 17 00:00:00 2001 From: peter chang Date: Thu, 14 Nov 2019 15:38:58 +0530 Subject: [PATCH 1952/3715] scsi: pm80xx: Fix for SATA device discovery [ Upstream commit ce21c63ee995b7a8b7b81245f2cee521f8c3c220 ] Driver was missing complete() call in mpi_sata_completion which result in SATA abort error handling timing out. That causes the device to be left in the in_recovery state so subsequent commands sent to the device fail and the OS removes access to it. Link: https://lore.kernel.org/r/20191114100910.6153-2-deepak.ukey@microchip.com Acked-by: Jack Wang Signed-off-by: peter chang Signed-off-by: Deepak Ukey Signed-off-by: Viswas G Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/pm8001/pm80xx_hwi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 9edd61c063a1..df5f0bc29587 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2368,6 +2368,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_printk("task 0x%p done with io_status 0x%x" " resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat)); + if (t->slow_task) + complete(&t->slow_task->completion); pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); From 8f000d767e32ae0c91592c94d2b908e13c607adb Mon Sep 17 00:00:00 2001 From: Subhash Jadavani Date: Thu, 14 Nov 2019 22:09:30 -0800 Subject: [PATCH 1953/3715] scsi: ufs: Fix error handing during hibern8 enter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d303e4b19d694cdbebf76bcdb51ada664ee953d ] During clock gating (ufshcd_gate_work()), we first put the link hibern8 by calling ufshcd_uic_hibern8_enter() and if ufshcd_uic_hibern8_enter() returns success (0) then we gate all the clocks. Now let’s zoom in to what ufshcd_uic_hibern8_enter() does internally: It calls __ufshcd_uic_hibern8_enter() and if failure is encountered, link recovery shall put the link back to the highest HS gear and returns success (0) to ufshcd_uic_hibern8_enter() which is the issue as link is still in active state due to recovery! Now ufshcd_uic_hibern8_enter() returns success to ufshcd_gate_work() and hence it goes ahead with gating the UFS clock while link is still in active state hence I believe controller would raise UIC error interrupts. But when we service the interrupt, clocks might have already been disabled! This change fixes for this by returning failure from __ufshcd_uic_hibern8_enter() if recovery succeeds as link is still not in hibern8, upon receiving the error ufshcd_hibern8_enter() would initiate retry to put the link state back into hibern8. Link: https://lore.kernel.org/r/1573798172-20534-8-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Reviewed-by: Bean Huo Signed-off-by: Subhash Jadavani Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9feae23bfd09..d25082e573e0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3684,15 +3684,24 @@ static int __ufshcd_uic_hibern8_enter(struct ufs_hba *hba) ktime_to_us(ktime_sub(ktime_get(), start)), ret); if (ret) { + int err; + dev_err(hba->dev, "%s: hibern8 enter failed. ret = %d\n", __func__, ret); /* - * If link recovery fails then return error so that caller - * don't retry the hibern8 enter again. + * If link recovery fails then return error code returned from + * ufshcd_link_recovery(). + * If link recovery succeeds then return -EAGAIN to attempt + * hibern8 enter retry again. */ - if (ufshcd_link_recovery(hba)) - ret = -ENOLINK; + err = ufshcd_link_recovery(hba); + if (err) { + dev_err(hba->dev, "%s: link recovery failed", __func__); + ret = err; + } else { + ret = -EAGAIN; + } } else ufshcd_vops_hibern8_notify(hba, UIC_CMD_DME_HIBER_ENTER, POST_CHANGE); @@ -3706,7 +3715,7 @@ static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba) for (retries = UIC_HIBERN8_ENTER_RETRIES; retries > 0; retries--) { ret = __ufshcd_uic_hibern8_enter(hba); - if (!ret || ret == -ENOLINK) + if (!ret) goto out; } out: From 360153b100f892a3e3ad97dc634237333ec693e7 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 15 Nov 2019 17:37:27 +0100 Subject: [PATCH 1954/3715] scsi: scsi_debug: num_tgts must be >= 0 [ Upstream commit aa5334c4f3014940f11bf876e919c956abef4089 ] Passing the parameter "num_tgts=-1" will start an infinite loop that exhausts the system memory Link: https://lore.kernel.org/r/20191115163727.24626-1-mlombard@redhat.com Signed-off-by: Maurizio Lombardi Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 92bc5b2d24ae..ac936b5ca74e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4960,6 +4960,11 @@ static int __init scsi_debug_init(void) return -EINVAL; } + if (sdebug_num_tgts < 0) { + pr_err("num_tgts must be >= 0\n"); + return -EINVAL; + } + if (sdebug_guard > 1) { pr_err("guard must be 0 or 1\n"); return -EINVAL; From 3e7ebe5b2b3f9e54aecd5512a50943019a7880af Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 16 Nov 2019 14:36:57 +1100 Subject: [PATCH 1955/3715] scsi: NCR5380: Add disconnect_mask module parameter [ Upstream commit 0b7a223552d455bcfba6fb9cfc5eef2b5fce1491 ] Add a module parameter to inhibit disconnect/reselect for individual targets. This gains compatibility with Aztec PowerMonster SCSI/SATA adapters with buggy firmware. (No fix is available from the vendor.) Apparently these adapters pass-through the product/vendor of the attached SATA device. Since they can't be identified from the response to an INQUIRY command, a device blacklist flag won't work. Cc: Michael Schmitz Link: https://lore.kernel.org/r/993b17545990f31f9fa5a98202b51102a68e7594.1573875417.git.fthain@telegraphics.com.au Reviewed-and-tested-by: Michael Schmitz Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/NCR5380.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 21377ac71168..79b0b4eece19 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -129,6 +129,9 @@ #define NCR5380_release_dma_irq(x) #endif +static unsigned int disconnect_mask = ~0; +module_param(disconnect_mask, int, 0444); + static int do_abort(struct Scsi_Host *); static void do_reset(struct Scsi_Host *); static void bus_reset_cleanup(struct Scsi_Host *); @@ -946,7 +949,8 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) int err; bool ret = true; bool can_disconnect = instance->irq != NO_IRQ && - cmd->cmnd[0] != REQUEST_SENSE; + cmd->cmnd[0] != REQUEST_SENSE && + (disconnect_mask & BIT(scmd_id(cmd))); NCR5380_dprint(NDEBUG_ARBITRATION, instance); dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n", From 5a6decd4bcccf23bd1fe45cf1eff1f130d240c16 Mon Sep 17 00:00:00 2001 From: Anatol Pomazau Date: Fri, 15 Nov 2019 19:47:35 -0500 Subject: [PATCH 1956/3715] scsi: iscsi: Don't send data to unbound connection [ Upstream commit 238191d65d7217982d69e21c1d623616da34b281 ] If a faulty initiator fails to bind the socket to the iSCSI connection before emitting a command, for instance, a subsequent send_pdu, it will crash the kernel due to a null pointer dereference in sock_sendmsg(), as shown in the log below. This patch makes sure the bind succeeded before trying to use the socket. BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 7 Comm: kworker/u8:0 Not tainted 5.4.0-rc2.iscsi+ #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 24.158246] Workqueue: iscsi_q_0 iscsi_xmitworker [ 24.158883] RIP: 0010:apparmor_socket_sendmsg+0x5/0x20 [...] [ 24.161739] RSP: 0018:ffffab6440043ca0 EFLAGS: 00010282 [ 24.162400] RAX: ffffffff891c1c00 RBX: ffffffff89d53968 RCX: 0000000000000001 [ 24.163253] RDX: 0000000000000030 RSI: ffffab6440043d00 RDI: 0000000000000000 [ 24.164104] RBP: 0000000000000030 R08: 0000000000000030 R09: 0000000000000030 [ 24.165166] R10: ffffffff893e66a0 R11: 0000000000000018 R12: ffffab6440043d00 [ 24.166038] R13: 0000000000000000 R14: 0000000000000000 R15: ffff9d5575a62e90 [ 24.166919] FS: 0000000000000000(0000) GS:ffff9d557db80000(0000) knlGS:0000000000000000 [ 24.167890] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.168587] CR2: 0000000000000018 CR3: 000000007a838000 CR4: 00000000000006e0 [ 24.169451] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 24.170320] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 24.171214] Call Trace: [ 24.171537] security_socket_sendmsg+0x3a/0x50 [ 24.172079] sock_sendmsg+0x16/0x60 [ 24.172506] iscsi_sw_tcp_xmit_segment+0x77/0x120 [ 24.173076] iscsi_sw_tcp_pdu_xmit+0x58/0x170 [ 24.173604] ? iscsi_dbg_trace+0x63/0x80 [ 24.174087] iscsi_tcp_task_xmit+0x101/0x280 [ 24.174666] iscsi_xmit_task+0x83/0x110 [ 24.175206] iscsi_xmitworker+0x57/0x380 [ 24.175757] ? __schedule+0x2a2/0x700 [ 24.176273] process_one_work+0x1b5/0x360 [ 24.176837] worker_thread+0x50/0x3c0 [ 24.177353] kthread+0xf9/0x130 [ 24.177799] ? process_one_work+0x360/0x360 [ 24.178401] ? kthread_park+0x90/0x90 [ 24.178915] ret_from_fork+0x35/0x40 [ 24.179421] Modules linked in: [ 24.179856] CR2: 0000000000000018 [ 24.180327] ---[ end trace b4b7674b6df5f480 ]--- Signed-off-by: Anatol Pomazau Co-developed-by: Frank Mayhar Signed-off-by: Frank Mayhar Co-developed-by: Bharath Ravi Signed-off-by: Bharath Ravi Co-developed-by: Khazhimsel Kumykov Signed-off-by: Khazhimsel Kumykov Co-developed-by: Gabriel Krisman Bertazi Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/iscsi_tcp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 045207b5560e..7e3a77d3c6f0 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -372,8 +372,16 @@ static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; unsigned int noreclaim_flag; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; int rc = 0; + if (!tcp_sw_conn->sock) { + iscsi_conn_printk(KERN_ERR, conn, + "Transport not bound to socket!\n"); + return -EINVAL; + } + noreclaim_flag = memalloc_noreclaim_save(); while (iscsi_sw_tcp_xmit_qlen(conn)) { From 6772ac25b19c355a1af381f1e505c266c6977199 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 Nov 2019 14:05:08 -0800 Subject: [PATCH 1957/3715] scsi: target: iscsi: Wait for all commands to finish before freeing a session [ Upstream commit e9d3009cb936bd0faf0719f68d98ad8afb1e613b ] The iSCSI target driver is the only target driver that does not wait for ongoing commands to finish before freeing a session. Make the iSCSI target driver wait for ongoing commands to finish before freeing a session. This patch fixes the following KASAN complaint: BUG: KASAN: use-after-free in __lock_acquire+0xb1a/0x2710 Read of size 8 at addr ffff8881154eca70 by task kworker/0:2/247 CPU: 0 PID: 247 Comm: kworker/0:2 Not tainted 5.4.0-rc1-dbg+ #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: target_completion target_complete_ok_work [target_core_mod] Call Trace: dump_stack+0x8a/0xd6 print_address_description.constprop.0+0x40/0x60 __kasan_report.cold+0x1b/0x33 kasan_report+0x16/0x20 __asan_load8+0x58/0x90 __lock_acquire+0xb1a/0x2710 lock_acquire+0xd3/0x200 _raw_spin_lock_irqsave+0x43/0x60 target_release_cmd_kref+0x162/0x7f0 [target_core_mod] target_put_sess_cmd+0x2e/0x40 [target_core_mod] lio_check_stop_free+0x12/0x20 [iscsi_target_mod] transport_cmd_check_stop_to_fabric+0xd8/0xe0 [target_core_mod] target_complete_ok_work+0x1b0/0x790 [target_core_mod] process_one_work+0x549/0xa40 worker_thread+0x7a/0x5d0 kthread+0x1bc/0x210 ret_from_fork+0x24/0x30 Allocated by task 889: save_stack+0x23/0x90 __kasan_kmalloc.constprop.0+0xcf/0xe0 kasan_slab_alloc+0x12/0x20 kmem_cache_alloc+0xf6/0x360 transport_alloc_session+0x29/0x80 [target_core_mod] iscsi_target_login_thread+0xcd6/0x18f0 [iscsi_target_mod] kthread+0x1bc/0x210 ret_from_fork+0x24/0x30 Freed by task 1025: save_stack+0x23/0x90 __kasan_slab_free+0x13a/0x190 kasan_slab_free+0x12/0x20 kmem_cache_free+0x146/0x400 transport_free_session+0x179/0x2f0 [target_core_mod] transport_deregister_session+0x130/0x180 [target_core_mod] iscsit_close_session+0x12c/0x350 [iscsi_target_mod] iscsit_logout_post_handler+0x136/0x380 [iscsi_target_mod] iscsit_response_queue+0x8de/0xbe0 [iscsi_target_mod] iscsi_target_tx_thread+0x27f/0x370 [iscsi_target_mod] kthread+0x1bc/0x210 ret_from_fork+0x24/0x30 The buggy address belongs to the object at ffff8881154ec9c0 which belongs to the cache se_sess_cache of size 352 The buggy address is located 176 bytes inside of 352-byte region [ffff8881154ec9c0, ffff8881154ecb20) The buggy address belongs to the page: page:ffffea0004553b00 refcount:1 mapcount:0 mapping:ffff888101755400 index:0x0 compound_mapcount: 0 flags: 0x2fff000000010200(slab|head) raw: 2fff000000010200 dead000000000100 dead000000000122 ffff888101755400 raw: 0000000000000000 0000000080130013 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881154ec900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881154ec980: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb >ffff8881154eca00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881154eca80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881154ecb00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc Cc: Mike Christie Link: https://lore.kernel.org/r/20191113220508.198257-3-bvanassche@acm.org Reviewed-by: Roman Bolshakov Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target.c | 10 ++++++++-- include/scsi/iscsi_proto.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index fb7bd422e2e1..21ce92ee1652 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1158,7 +1158,9 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - target_get_sess_cmd(&cmd->se_cmd, true); + if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_WAITING_FOR_LOGOUT, buf); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2004,7 +2006,9 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - target_get_sess_cmd(&cmd->se_cmd, true); + if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_WAITING_FOR_LOGOUT, buf); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4236,6 +4240,8 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); + target_sess_cmd_list_set_waiting(sess->se_sess); + target_wait_for_sess_cmds(sess->se_sess); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index df156f1d50b2..f0a01a54bd15 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -638,6 +638,7 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 +#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 From d5342c26646107b126ab2f1e0aa410006559c561 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Nov 2019 14:55:51 +0200 Subject: [PATCH 1958/3715] gpio: mpc8xxx: Don't overwrite default irq_set_type callback [ Upstream commit 4e50573f39229d5e9c985fa3b4923a8b29619ade ] The per-SoC devtype structures can contain their own callbacks that overwrite mpc8xxx_gpio_devtype_default. The clear intention is that mpc8xxx_irq_set_type is used in case the SoC does not specify a more specific callback. But what happens is that if the SoC doesn't specify one, its .irq_set_type is de-facto NULL, and this overwrites mpc8xxx_irq_set_type to a no-op. This means that the following SoCs are affected: - fsl,mpc8572-gpio - fsl,ls1028a-gpio - fsl,ls1088a-gpio On these boards, the irq_set_type does exactly nothing, and the GPIO controller keeps its GPICR register in the hardware-default state. On the LS1028A, that is ACTIVE_BOTH, which means 2 interrupts are raised even if the IRQ client requests LEVEL_HIGH. Another implication is that the IRQs are not checked (e.g. level-triggered interrupts are not rejected, although they are not supported). Fixes: 82e39b0d8566 ("gpio: mpc8xxx: handle differences between incarnations at a single place") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20191115125551.31061-1-olteanv@gmail.com Tested-by: Michael Walle Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mpc8xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 8c93dec498fa..e7783b852d69 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -337,7 +337,8 @@ static int mpc8xxx_probe(struct platform_device *pdev) * It's assumed that only a single type of gpio controller is available * on the current machine, so overwriting global data is fine. */ - mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type; + if (devtype->irq_set_type) + mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type; if (devtype->gpio_dir_out) gc->direction_output = devtype->gpio_dir_out; From 04db2eb6686a0de1ff934656791d4225d201a6d2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Jun 2019 14:09:04 +0100 Subject: [PATCH 1959/3715] apparmor: fix unsigned len comparison with less than zero [ Upstream commit 00e0590dbaec6f1bcaa36a85467d7e3497ced522 ] The sanity check in macro update_for_len checks to see if len is less than zero, however, len is a size_t so it can never be less than zero, so this sanity check is a no-op. Fix this by making len a ssize_t so the comparison will work and add ulen that is a size_t copy of len so that the min() macro won't throw warnings about comparing different types. Addresses-Coverity: ("Macro compares unsigned to 0") Fixes: f1bd904175e8 ("apparmor: add the base fns() for domain labels") Signed-off-by: Colin Ian King Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/label.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index c5b99b954580..ea63710442ae 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1463,11 +1463,13 @@ static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label, /* helper macro for snprint routines */ #define update_for_len(total, len, size, str) \ do { \ + size_t ulen = len; \ + \ AA_BUG(len < 0); \ - total += len; \ - len = min(len, size); \ - size -= len; \ - str += len; \ + total += ulen; \ + ulen = min(ulen, size); \ + size -= ulen; \ + str += ulen; \ } while (0) /** @@ -1602,7 +1604,7 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns, struct aa_ns *prev_ns = NULL; struct label_it i; int count = 0, total = 0; - size_t len; + ssize_t len; AA_BUG(!str && size != 0); AA_BUG(!label); From a06d07dd52932800258d079a156eafd3d5dcc5b2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 24 Nov 2019 01:04:30 +0900 Subject: [PATCH 1960/3715] scripts/kallsyms: fix definitely-lost memory leak [ Upstream commit 21915eca088dc271c970e8351290e83d938114ac ] build_initial_tok_table() overwrites unused sym_entry to shrink the table size. Before the entry is overwritten, table[i].sym must be freed since it is malloc'ed data. This fixes the 'definitely lost' report from valgrind. I ran valgrind against x86_64_defconfig of v5.4-rc8 kernel, and here is the summary: [Before the fix] LEAK SUMMARY: definitely lost: 53,184 bytes in 2,874 blocks [After the fix] LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kallsyms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index b471022c8162..b43531899648 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -510,6 +510,8 @@ static void build_initial_tok_table(void) table[pos] = table[i]; learn_symbol(table[pos].sym, table[pos].len); pos++; + } else { + free(table[i].sym); } } table_cnt = pos; From 8ee2f89ed3bf58e43347d9ee47638676c03dd976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Tue, 19 Nov 2019 21:37:08 +0000 Subject: [PATCH 1961/3715] cdrom: respect device capabilities during opening action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 366ba7c71ef77c08d06b18ad61b26e2df7352338 ] Reading the TOC only works if the device can play audio, otherwise these commands fail (and possibly bring the device to an unhealthy state.) Similarly, cdrom_mmc3_profile() should only be called if the device supports generic packet commands. To: Jens Axboe Cc: linux-kernel@vger.kernel.org Cc: linux-scsi@vger.kernel.org Signed-off-by: Diego Elio Pettenò Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/cdrom/cdrom.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 90dd8e7291da..1c90da4af94f 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -995,6 +995,12 @@ static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks) tracks->xa = 0; tracks->error = 0; cd_dbg(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n"); + + if (!CDROM_CAN(CDC_PLAY_AUDIO)) { + tracks->error = CDS_NO_INFO; + return; + } + /* Grab the TOC header so we can see how many tracks there are */ ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header); if (ret) { @@ -1161,7 +1167,8 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, ret = open_for_data(cdi); if (ret) goto err; - cdrom_mmc3_profile(cdi); + if (CDROM_CAN(CDC_GENERIC_PACKET)) + cdrom_mmc3_profile(cdi); if (mode & FMODE_WRITE) { ret = -EROFS; if (cdrom_open_write(cdi)) @@ -2878,6 +2885,9 @@ int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written) it doesn't give enough information or fails. then we return the toc contents. */ use_toc: + if (!CDROM_CAN(CDC_PLAY_AUDIO)) + return -ENOSYS; + toc.cdte_format = CDROM_MSF; toc.cdte_track = CDROM_LEADOUT; if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc))) From 3bda1b2036e954075bd29b28626052de3d09676e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Nov 2019 11:53:21 +0200 Subject: [PATCH 1962/3715] perf script: Fix brstackinsn for AUXTRACE [ Upstream commit 0cd032d3b5fcebf5454315400ab310746a81ca53 ] brstackinsn must be allowed to be set by the user when AUX area data has been captured because, in that case, the branch stack might be synthesized on the fly. This fixes the following error: Before: $ perf record -e '{intel_pt//,cpu/mem_inst_retired.all_loads,aux-sample-size=8192/pp}:u' grep -rqs jhgjhg /boot [ perf record: Woken up 19 times to write data ] [ perf record: Captured and wrote 2.274 MB perf.data ] $ perf script -F +brstackinsn --xed --itrace=i1usl100 | head Display of branch stack assembler requested, but non all-branch filter set Hint: run 'perf record -b ...' After: $ perf record -e '{intel_pt//,cpu/mem_inst_retired.all_loads,aux-sample-size=8192/pp}:u' grep -rqs jhgjhg /boot [ perf record: Woken up 19 times to write data ] [ perf record: Captured and wrote 2.274 MB perf.data ] $ perf script -F +brstackinsn --xed --itrace=i1usl100 | head grep 13759 [002] 8091.310257: 1862 instructions:uH: 5641d58069eb bmexec+0x86b (/bin/grep) bmexec+2485: 00005641d5806b35 jnz 0x5641d5806bd0 # MISPRED 00005641d5806bd0 movzxb (%r13,%rdx,1), %eax 00005641d5806bd6 add %rdi, %rax 00005641d5806bd9 movzxb -0x1(%rax), %edx 00005641d5806bdd cmp %rax, %r14 00005641d5806be0 jnb 0x5641d58069c0 # MISPRED mismatch of LBR data and executable 00005641d58069c0 movzxb (%r13,%rdx,1), %edi Fixes: 48d02a1d5c13 ("perf script: Add 'brstackinsn' for branch stacks") Reported-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191127095322.15417-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 76789523429a..09c4380bc225 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -355,7 +355,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - if (PRINT_FIELD(BRSTACKINSN) && + if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && !(perf_evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" From 9edc3a3ddb65d6291e5143c1c39e9b554597141a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Nov 2019 10:13:34 -0300 Subject: [PATCH 1963/3715] perf regs: Make perf_reg_name() return "unknown" instead of NULL [ Upstream commit 5b596e0ff0e1852197d4c82d3314db5e43126bf7 ] To avoid breaking the build on arches where this is not wired up, at least all the other features should be made available and when using this specific routine, the "unknown" should point the user/developer to the need to wire this up on this particular hardware architecture. Detected in a container mipsel debian cross build environment, where it shows up as: In file included from /usr/mipsel-linux-gnu/include/stdio.h:867, from /git/linux/tools/perf/lib/include/perf/cpumap.h:6, from util/session.c:13: In function 'printf', inlined from 'regs_dump__printf' at util/session.c:1103:3, inlined from 'regs__printf' at util/session.c:1131:2: /usr/mipsel-linux-gnu/include/bits/stdio2.h:107:10: error: '%-5s' directive argument is null [-Werror=format-overflow=] 107 | return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ()); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cross compiler details: mipsel-linux-gnu-gcc (Debian 9.2.1-8) 9.2.1 20190909 Also on mips64: In file included from /usr/mips64-linux-gnuabi64/include/stdio.h:867, from /git/linux/tools/perf/lib/include/perf/cpumap.h:6, from util/session.c:13: In function 'printf', inlined from 'regs_dump__printf' at util/session.c:1103:3, inlined from 'regs__printf' at util/session.c:1131:2, inlined from 'regs_user__printf' at util/session.c:1139:3, inlined from 'dump_sample' at util/session.c:1246:3, inlined from 'machines__deliver_event' at util/session.c:1421:3: /usr/mips64-linux-gnuabi64/include/bits/stdio2.h:107:10: error: '%-5s' directive argument is null [-Werror=format-overflow=] 107 | return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ()); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function 'printf', inlined from 'regs_dump__printf' at util/session.c:1103:3, inlined from 'regs__printf' at util/session.c:1131:2, inlined from 'regs_intr__printf' at util/session.c:1147:3, inlined from 'dump_sample' at util/session.c:1249:3, inlined from 'machines__deliver_event' at util/session.c:1421:3: /usr/mips64-linux-gnuabi64/include/bits/stdio2.h:107:10: error: '%-5s' directive argument is null [-Werror=format-overflow=] 107 | return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ()); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cross compiler details: mips64-linux-gnuabi64-gcc (Debian 9.2.1-8) 9.2.1 20190909 Fixes: 2bcd355b71da ("perf tools: Add interface to arch registers sets") Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-95wjyv4o65nuaeweq31t7l1s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/perf_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index c9319f8d17a6..f732e3af2bd4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -34,7 +34,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); static inline const char *perf_reg_name(int id __maybe_unused) { - return NULL; + return "unknown"; } static inline int perf_reg_value(u64 *valp __maybe_unused, From 6d7101aa1d5232a888f9fc98348974b9df04584d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 20 Nov 2019 11:44:31 +0100 Subject: [PATCH 1964/3715] s390/zcrypt: handle new reply code FILTERED_BY_HYPERVISOR [ Upstream commit 6733775a92eacd612ac88afa0fd922e4ffeb2bc7 ] This patch introduces support for a new architectured reply code 0x8B indicating that a hypervisor layer (if any) has rejected an ap message. Linux may run as a guest on top of a hypervisor like zVM or KVM. So the crypto hardware seen by the ap bus may be restricted by the hypervisor for example only a subset like only clear key crypto requests may be supported. Other requests will be filtered out - rejected by the hypervisor. The new reply code 0x8B will appear in such cases and needs to get recognized by the ap bus and zcrypt device driver zoo. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- drivers/s390/crypto/zcrypt_error.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 9499cd3a05f8..02a936db0092 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -75,6 +75,7 @@ struct error_hdr { #define REP82_ERROR_EVEN_MOD_IN_OPND 0x85 #define REP82_ERROR_RESERVED_FIELD 0x88 #define REP82_ERROR_INVALID_DOMAIN_PENDING 0x8A +#define REP82_ERROR_FILTERED_BY_HYPERVISOR 0x8B #define REP82_ERROR_TRANSPORT_FAIL 0x90 #define REP82_ERROR_PACKET_TRUNCATED 0xA0 #define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 @@ -105,6 +106,7 @@ static inline int convert_error(struct zcrypt_queue *zq, case REP82_ERROR_INVALID_DOMAIN_PRECHECK: case REP82_ERROR_INVALID_DOMAIN_PENDING: case REP82_ERROR_INVALID_SPECIAL_CMD: + case REP82_ERROR_FILTERED_BY_HYPERVISOR: // REP88_ERROR_INVALID_KEY // '82' CEX2A // REP88_ERROR_OPERAND // '84' CEX2A // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A From 916950d23ed457cfcf7b7107d08830c06212c337 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 13 Nov 2019 16:12:02 +0900 Subject: [PATCH 1965/3715] libfdt: define INT32_MAX and UINT32_MAX in libfdt_env.h [ Upstream commit a8de1304b7df30e3a14f2a8b9709bb4ff31a0385 ] The DTC v1.5.1 added references to (U)INT32_MAX. This is no problem for user-space programs since defines (U)INT32_MAX along with (u)int32_t. For the kernel space, libfdt_env.h needs to be adjusted before we pull in the changes. In the kernel, we usually use s/u32 instead of (u)int32_t for the fixed-width types. Accordingly, we already have S/U32_MAX for their max values. So, we should not add (U)INT32_MAX to any more. Instead, add them to the in-kernel libfdt_env.h to compile the latest libfdt. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- arch/arm/boot/compressed/libfdt_env.h | 4 +++- arch/powerpc/boot/libfdt_env.h | 2 ++ include/linux/libfdt_env.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index b36c0289a308..6a0f1f524466 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -2,11 +2,13 @@ #ifndef _ARM_LIBFDT_ENV_H #define _ARM_LIBFDT_ENV_H +#include #include #include #include -#define INT_MAX ((int)(~0U>>1)) +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX typedef __be16 fdt16_t; typedef __be32 fdt32_t; diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index 39155d3b2cef..ac5d3c947e04 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -6,6 +6,8 @@ #include #define INT_MAX ((int)(~0U>>1)) +#define UINT32_MAX ((u32)~0U) +#define INT32_MAX ((s32)(UINT32_MAX >> 1)) #include "of.h" diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 1aa707ab19bb..8b54c591678e 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -7,6 +7,9 @@ #include +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; From 03a8e61bab3355ec9c4ded3df6b63dd1868aaae9 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Nov 2019 16:43:15 +0100 Subject: [PATCH 1966/3715] s390/cpum_sf: Check for SDBT and SDB consistency [ Upstream commit 247f265fa502e7b17a0cb0cc330e055a36aafce4 ] Each SBDT is located at a 4KB page and contains 512 entries. Each entry of a SDBT points to a SDB, a 4KB page containing sampled data. The last entry is a link to another SDBT page. When an event is created the function sequence executed is: __hw_perf_event_init() +--> allocate_buffers() +--> realloc_sampling_buffers() +---> alloc_sample_data_block() Both functions realloc_sampling_buffers() and alloc_sample_data_block() allocate pages and the allocation can fail. This is handled correctly and all allocated pages are freed and error -ENOMEM is returned to the top calling function. Finally the event is not created. Once the event has been created, the amount of initially allocated SDBT and SDB can be too low. This is detected during measurement interrupt handling, where the amount of lost samples is calculated. If the number of lost samples is too high considering sampling frequency and already allocated SBDs, the number of SDBs is enlarged during the next execution of cpumsf_pmu_enable(). If more SBDs need to be allocated, functions realloc_sampling_buffers() +---> alloc-sample_data_block() are called to allocate more pages. Page allocation may fail and the returned error is ignored. A SDBT and SDB setup already exists. However the modified SDBTs and SDBs might end up in a situation where the first entry of an SDBT does not point to an SDB, but another SDBT, basicly an SBDT without payload. This can not be handled by the interrupt handler, where an SDBT must have at least one entry pointing to an SBD. Add a check to avoid SDBTs with out payload (SDBs) when enlarging the buffer setup. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 2e2fd9535f86..45304085b6ee 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -185,7 +185,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -224,6 +224,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -233,10 +234,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ From 9c2f6b5e695f78dfc8dde991c2a23276f5b7041d Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Sat, 30 Nov 2019 17:49:12 -0800 Subject: [PATCH 1967/3715] ocfs2: fix passing zero to 'PTR_ERR' warning [ Upstream commit 188c523e1c271d537f3c9f55b6b65bf4476de32f ] Fix a static code checker warning: fs/ocfs2/acl.c:331 ocfs2_acl_chmod() warn: passing zero to 'PTR_ERR' Link: http://lkml.kernel.org/r/1dee278b-6c96-eec2-ce76-fe6e07c6e20f@linux.alibaba.com Fixes: 5ee0fbd50fd ("ocfs2: revert using ocfs2_acl_chmod to avoid inode cluster lock hang") Signed-off-by: Ding Xiang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 917fadca8a7b..b73b78771915 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -335,8 +335,8 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); up_read(&OCFS2_I(inode)->ip_xattr_sem); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); + if (IS_ERR_OR_NULL(acl)) + return PTR_ERR_OR_ZERO(acl); ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); if (ret) return ret; From 5dc89b665d7278a50997a081ae546115029b2282 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:56:08 -0800 Subject: [PATCH 1968/3715] kernel: sysctl: make drop_caches write-only [ Upstream commit 204cb79ad42f015312a5bbd7012d09c93d9b46fb ] Currently, the drop_caches proc file and sysctl read back the last value written, suggesting this is somehow a stateful setting instead of a one-time command. Make it write-only, like e.g. compact_memory. While mitigating a VM problem at scale in our fleet, there was confusion about whether writing to this file will permanently switch the kernel into a non-caching mode. This influences the decision making in a tense situation, where tens of people are trying to fix tens of thousands of affected machines: Do we need a rollback strategy? What are the performance implications of operating in a non-caching state for several days? It also caused confusion when the kernel team said we may need to write the file several times to make sure it's effective ("But it already reads back 3?"). Link: http://lkml.kernel.org/r/20191031221602.9375-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Chris Down Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index cfc2c0d1369a..74fc3a9d1923 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1397,7 +1397,7 @@ static struct ctl_table vm_table[] = { .procname = "drop_caches", .data = &sysctl_drop_caches, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = &one, .extra2 = &four, From 36d503a7b0311d989affef48b910d90cbae41057 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 30 Nov 2019 17:58:01 -0800 Subject: [PATCH 1969/3715] userfaultfd: require CAP_SYS_PTRACE for UFFD_FEATURE_EVENT_FORK [ Upstream commit 3c1c24d91ffd536de0a64688a9df7f49e58fadbc ] A while ago Andy noticed (http://lkml.kernel.org/r/CALCETrWY+5ynDct7eU_nDUqx=okQvjm=Y5wJvA4ahBja=CQXGw@mail.gmail.com) that UFFD_FEATURE_EVENT_FORK used by an unprivileged user may have security implications. As the first step of the solution the following patch limits the availably of UFFD_FEATURE_EVENT_FORK only for those having CAP_SYS_PTRACE. The usage of CAP_SYS_PTRACE ensures compatibility with CRIU. Yet, if there are other users of non-cooperative userfaultfd that run without CAP_SYS_PTRACE, they would be broken :( Current implementation of UFFD_FEATURE_EVENT_FORK modifies the file descriptor table from the read() implementation of uffd, which may have security implications for unprivileged use of the userfaultfd. Limit availability of UFFD_FEATURE_EVENT_FORK only for callers that have CAP_SYS_PTRACE. Link: http://lkml.kernel.org/r/1572967777-8812-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Daniel Colascione Cc: Jann Horn Cc: Lokesh Gidra Cc: Nick Kralevich Cc: Nosh Minwalla Cc: Pavel Emelyanov Cc: Tim Murray Cc: Aleksa Sarai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/userfaultfd.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index a609d480606d..e2b2196fd942 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1807,13 +1807,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; features = uffdio_api.features; - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) { - memset(&uffdio_api, 0, sizeof(uffdio_api)); - if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) - goto out; - ret = -EINVAL; - goto out; - } + ret = -EINVAL; + if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + goto err_out; + ret = -EPERM; + if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) + goto err_out; /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; uffdio_api.ioctls = UFFD_API_IOCTLS; @@ -1826,6 +1825,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = 0; out: return ret; +err_out: + memset(&uffdio_api, 0, sizeof(uffdio_api)); + if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) + ret = -EFAULT; + goto out; } static long userfaultfd_ioctl(struct file *file, unsigned cmd, From b6829d2608ff945547eaaa22e76487db0c0b53f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Tue, 10 Dec 2019 01:07:30 +0100 Subject: [PATCH 1970/3715] x86/mce: Fix possibly incorrect severity calculation on AMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a3a57ddad061acc90bef39635caf2b2330ce8f21 ] The function mce_severity_amd_smca() requires m->bank to be initialized for correct operation. Fix the one case, where mce_severity() is called without doing so. Fixes: 6bda529ec42e ("x86/mce: Grade uncorrected errors for SMCA-enabled systems") Fixes: d28af26faa0b ("x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out()") Signed-off-by: Jan H. Schönherr Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Cc: Thomas Gleixner Cc: x86-ml Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20191210000733.17979-4-jschoenh@amazon.de Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c7bd2e549a6a..0b0e44f85393 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -802,8 +802,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; From daf8f15c068fcd5f3f1a20199aeecaa01c708518 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2019 12:18:21 +0300 Subject: [PATCH 1971/3715] net, sysctl: Fix compiler warning when only cBPF is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1148f9adbe71415836a18a36c1b4ece999ab0973 ] proc_dointvec_minmax_bpf_restricted() has been firstly introduced in commit 2e4a30983b0f ("bpf: restrict access to core bpf sysctls") under CONFIG_HAVE_EBPF_JIT. Then, this ifdef has been removed in ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations"), because a new sysctl, bpf_jit_limit, made use of it. Finally, this parameter has become long instead of integer with fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") and thus, a new proc_dolongvec_minmax_bpf_restricted() has been added. With this last change, we got back to that proc_dointvec_minmax_bpf_restricted() is used only under CONFIG_HAVE_EBPF_JIT, but the corresponding ifdef has not been brought back. So, in configurations like CONFIG_BPF_JIT=y && CONFIG_HAVE_EBPF_JIT=n since v4.20 we have: CC net/core/sysctl_net_core.o net/core/sysctl_net_core.c:292:1: warning: ‘proc_dointvec_minmax_bpf_restricted’ defined but not used [-Wunused-function] 292 | proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suppress this by guarding it with CONFIG_HAVE_EBPF_JIT again. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191218091821.7080-1-alobakin@dlink.ru Signed-off-by: Sasha Levin --- net/core/sysctl_net_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 144cd1acd7e3..069e3c4fcc44 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,6 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } +# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -284,6 +285,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } +# endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, From b2823ed5cb58ed54b7ae1e4b270291d84d5a2b10 Mon Sep 17 00:00:00 2001 From: Marco Oliverio Date: Mon, 2 Dec 2019 19:54:30 +0100 Subject: [PATCH 1972/3715] netfilter: nf_queue: enqueue skbs with NULL dst [ Upstream commit 0b9173f4688dfa7c5d723426be1d979c24ce3d51 ] Bridge packets that are forwarded have skb->dst == NULL and get dropped by the check introduced by b60a77386b1d4868f72f6353d35dabe5fbe981f2 (net: make skb_dst_force return true when dst is refcounted). To fix this we check skb_dst() before skb_dst_force(), so we don't drop skb packet with dst == NULL. This holds also for skb at the PRE_ROUTING hook so we remove the second check. Fixes: b60a77386b1d ("net: make skb_dst_force return true when dst is refcounted") Signed-off-by: Marco Oliverio Signed-off-by: Rocco Folino Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 37efcc1c8887..b06ef4c62522 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -138,7 +138,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } - if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { + if (skb_dst(skb) && !skb_dst_force(skb)) { status = -ENETDOWN; goto err; } From ac9d7d7de1db06bd382aa2628d5025fc02dfae87 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Dec 2019 16:12:24 +0100 Subject: [PATCH 1973/3715] ALSA: hda - Downgrade error message for single-cmd fallback [ Upstream commit 475feec0c41ad71cb7d02f0310e56256606b57c5 ] We made the error message for the CORB/RIRB communication clearer by upgrading to dev_WARN() so that user can notice better. But this struck us like a boomerang: now it caught syzbot and reported back as a fatal issue although it's not really any too serious bug that worth for stopping the whole system. OK, OK, let's be softy, downgrade it to the standard dev_err() again. Fixes: dd65f7e19c69 ("ALSA: hda - Show the fatal CORB/RIRB error more clearly") Reported-by: syzbot+b3028ac3933f5c466389@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20191216151224.30013-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 8fcb421193e0..fa261b27d858 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -883,7 +883,7 @@ static int azx_rirb_get_response(struct hdac_bus *bus, unsigned int addr, return -EAGAIN; /* give a chance to retry */ } - dev_WARN(chip->card->dev, + dev_err(chip->card->dev, "azx_get_response timeout, switching to single_cmd mode: last cmd=0x%08x\n", bus->last_cmd[addr]); chip->single_cmd = 1; From 75e18a6ee17be533a335f6e47527b648116a2f39 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 6 Dec 2019 15:44:55 -0800 Subject: [PATCH 1974/3715] bonding: fix active-backup transition after link failure [ Upstream commit 5d485ed88d48f8101a2067348e267c0aaf4ed486 ] After the recent fix in commit 1899bb325149 ("bonding: fix state transition issue in link monitoring"), the active-backup mode with miimon initially come-up fine but after a link-failure, both members transition into backup state. Following steps to reproduce the scenario (eth1 and eth2 are the slaves of the bond): ip link set eth1 up ip link set eth2 down sleep 1 ip link set eth2 up ip link set eth1 down cat /sys/class/net/eth1/bonding_slave/state cat /sys/class/net/eth2/bonding_slave/state Fixes: 1899bb325149 ("bonding: fix state transition issue in link monitoring") CC: Jay Vosburgh Signed-off-by: Mahesh Bandewar Acked-by: Jay Vosburgh Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5f6602cb191f..fef599eb822b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2186,9 +2186,6 @@ static void bond_miimon_commit(struct bonding *bond) } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */ bond_set_active_slave(slave); - } else if (slave != primary) { - /* prevent it from being the active one */ - bond_set_backup_slave(slave); } netdev_info(bond->dev, "link status definitely up for interface %s, %u Mbps %s duplex\n", From 29061e0875f853c3bed1d486ccf5ce476cc51afc Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Sat, 29 Dec 2018 15:17:50 +0100 Subject: [PATCH 1975/3715] perf strbuf: Remove redundant va_end() in strbuf_addv() commit 099be748865eece21362aee416c350c0b1ae34df upstream. Each call to va_copy() should have one, and only one, corresponding call to va_end(). In strbuf_addv() some code paths result in va_end() getting called multiple times. Remove the superfluous va_end(). Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Sanskriti Sharma Link: http://lkml.kernel.org/r/20181229141750.16945-1-2pi@mok.nu Fixes: ce49d8436cff ("perf strbuf: Match va_{add,copy} with va_end") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/strbuf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 9005fbe0780e..23092fd6451d 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -109,7 +109,6 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) return ret; } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); - va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); va_end(ap_saved); From 26b363149dd53a36bbd61901df5ac16bde39badb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Oct 2019 11:32:52 -0700 Subject: [PATCH 1976/3715] Make filldir[64]() verify the directory entry filename is valid commit 8a23eb804ca4f2be909e372cf5a9e7b30ae476cd upstream. This has been discussed several times, and now filesystem people are talking about doing it individually at the filesystem layer, so head that off at the pass and just do it in getdents{64}(). This is partially based on a patch by Jann Horn, but checks for NUL bytes as well, and somewhat simplified. There's also commentary about how it might be better if invalid names due to filesystem corruption don't cause an immediate failure, but only an error at the end of the readdir(), so that people can still see the filenames that are ok. There's also been discussion about just how much POSIX strictly speaking requires this since it's about filesystem corruption. It's really more "protect user space from bad behavior" as pointed out by Jann. But since Eric Biederman looked up the POSIX wording, here it is for context: "From readdir: The readdir() function shall return a pointer to a structure representing the directory entry at the current position in the directory stream specified by the argument dirp, and position the directory stream at the next entry. It shall return a null pointer upon reaching the end of the directory stream. The structure dirent defined in the header describes a directory entry. From definitions: 3.129 Directory Entry (or Link) An object that associates a filename with a file. Several directory entries can associate names with the same file. ... 3.169 Filename A name consisting of 1 to {NAME_MAX} bytes used to name a file. The characters composing the name may be selected from the set of all character values excluding the slash character and the null byte. The filenames dot and dot-dot have special meaning. A filename is sometimes referred to as a 'pathname component'." Note that I didn't bother adding the checks to any legacy interfaces that nobody uses. Also note that if this ends up being noticeable as a performance regression, we can fix that to do a much more optimized model that checks for both NUL and '/' at the same time one word at a time. We haven't really tended to optimize 'memchr()', and it only checks for one pattern at a time anyway, and we really _should_ check for NUL too (but see the comment about "soft errors" in the code about why it currently only checks for '/') See the CONFIG_DCACHE_WORD_ACCESS case of hash_name() for how the name lookup code looks for pathname terminating characters in parallel. Link: https://lore.kernel.org/lkml/20190118161440.220134-2-jannh@google.com/ Cc: Alexander Viro Cc: Jann Horn Cc: Eric W. Biederman Signed-off-by: Linus Torvalds Signed-off-by: Siddharth Chandrasekaran Signed-off-by: Greg Kroah-Hartman --- fs/readdir.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fs/readdir.c b/fs/readdir.c index d336db65a33e..9a3dc6620c54 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -65,6 +65,40 @@ out: } EXPORT_SYMBOL(iterate_dir); +/* + * POSIX says that a dirent name cannot contain NULL or a '/'. + * + * It's not 100% clear what we should really do in this case. + * The filesystem is clearly corrupted, but returning a hard + * error means that you now don't see any of the other names + * either, so that isn't a perfect alternative. + * + * And if you return an error, what error do you use? Several + * filesystems seem to have decided on EUCLEAN being the error + * code for EFSCORRUPTED, and that may be the error to use. Or + * just EIO, which is perhaps more obvious to users. + * + * In order to see the other file names in the directory, the + * caller might want to make this a "soft" error: skip the + * entry, and return the error at the end instead. + * + * Note that this should likely do a "memchr(name, 0, len)" + * check too, since that would be filesystem corruption as + * well. However, that case can't actually confuse user space, + * which has to do a strlen() on the name anyway to find the + * filename length, and the above "soft error" worry means + * that it's probably better left alone until we have that + * issue clarified. + */ +static int verify_dirent_name(const char *name, int len) +{ + if (WARN_ON_ONCE(!len)) + return -EIO; + if (WARN_ON_ONCE(memchr(name, '/', len))) + return -EIO; + return 0; +} + /* * Traditional linux readdir() handling.. * @@ -174,6 +208,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; @@ -260,6 +297,9 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; From 9c0ffae88f7507078c7e6db73eda3b60478628eb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 18 Oct 2019 18:41:16 -0400 Subject: [PATCH 1977/3715] filldir[64]: remove WARN_ON_ONCE() for bad directory entries commit b9959c7a347d6adbb558fba7e36e9fef3cba3b07 upstream. This was always meant to be a temporary thing, just for testing and to see if it actually ever triggered. The only thing that reported it was syzbot doing disk image fuzzing, and then that warning is expected. So let's just remove it before -rc4, because the extra sanity testing should probably go to -stable, but we don't want the warning to do so. Reported-by: syzbot+3031f712c7ad5dd4d926@syzkaller.appspotmail.com Fixes: 8a23eb804ca4 ("Make filldir[64]() verify the directory entry filename is valid") Signed-off-by: Linus Torvalds Signed-off-by: Siddharth Chandrasekaran Signed-off-by: Greg Kroah-Hartman --- fs/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index 9a3dc6620c54..0c357663e33a 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -92,9 +92,9 @@ EXPORT_SYMBOL(iterate_dir); */ static int verify_dirent_name(const char *name, int len) { - if (WARN_ON_ONCE(!len)) + if (!len) return -EIO; - if (WARN_ON_ONCE(memchr(name, '/', len))) + if (memchr(name, '/', len)) return -EIO; return 0; } From 69bb99133686bf9b8c7b5143c0bc2ae23d6cd63b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 15 Dec 2019 03:49:25 +0100 Subject: [PATCH 1978/3715] netfilter: ebtables: compat: reject all padding in matches/watchers commit e608f631f0ba5f1fc5ee2e260a3a35d13107cbfe upstream. syzbot reported following splat: BUG: KASAN: vmalloc-out-of-bounds in size_entry_mwt net/bridge/netfilter/ebtables.c:2063 [inline] BUG: KASAN: vmalloc-out-of-bounds in compat_copy_entries+0x128b/0x1380 net/bridge/netfilter/ebtables.c:2155 Read of size 4 at addr ffffc900004461f4 by task syz-executor267/7937 CPU: 1 PID: 7937 Comm: syz-executor267 Not tainted 5.5.0-rc1-syzkaller #0 size_entry_mwt net/bridge/netfilter/ebtables.c:2063 [inline] compat_copy_entries+0x128b/0x1380 net/bridge/netfilter/ebtables.c:2155 compat_do_replace+0x344/0x720 net/bridge/netfilter/ebtables.c:2249 compat_do_ebt_set_ctl+0x22f/0x27e net/bridge/netfilter/ebtables.c:2333 [..] Because padding isn't considered during computation of ->buf_user_offset, "total" is decremented by fewer bytes than it should. Therefore, the first part of if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry)) will pass, -- it should not have. This causes oob access: entry->next_offset is past the vmalloced size. Reject padding and check that computed user offset (sum of ebt_entry structure plus all individual matches/watchers/targets) is same value that userspace gave us as the offset of the next entry. Reported-by: syzbot+f68108fed972453a0ad4@syzkaller.appspotmail.com Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 35 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 100b4f88179a..35a670ec9077 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1876,7 +1876,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) } static int ebt_buf_add(struct ebt_entries_buf_state *state, - void *data, unsigned int sz) + const void *data, unsigned int sz) { if (state->buf_kern_start == NULL) goto count_only; @@ -1910,7 +1910,7 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; -static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, +static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, const unsigned char *base) @@ -1986,22 +1986,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, /* return size of all matches, watchers or target, including necessary * alignment and padding. */ -static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, +static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, unsigned int size_left, enum compat_mwt type, struct ebt_entries_buf_state *state, const void *base) { + const char *buf = (const char *)match32; int growth = 0; - char *buf; if (size_left == 0) return 0; - buf = (char *) match32; - - while (size_left >= sizeof(*match32)) { + do { struct ebt_entry_match *match_kern; int ret; + if (size_left < sizeof(*match32)) + return -EINVAL; + match_kern = (struct ebt_entry_match *) state->buf_kern_start; if (match_kern) { char *tmp; @@ -2038,22 +2039,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - /* rule should have no remaining data after target */ - if (type == EBT_COMPAT_TARGET && size_left) - return -EINVAL; - match32 = (struct compat_ebt_entry_mwt *) buf; - } + } while (size_left); return growth; } /* called for all ebt_entry structures. */ -static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, +static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, unsigned int *total, struct ebt_entries_buf_state *state) { - unsigned int i, j, startoff, new_offset = 0; + unsigned int i, j, startoff, next_expected_off, new_offset = 0; /* stores match/watchers/targets & offset of next struct ebt_entry: */ unsigned int offsets[4]; unsigned int *offsets_update = NULL; @@ -2140,11 +2137,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return ret; } - startoff = state->buf_user_offset - startoff; - - if (WARN_ON(*total < startoff)) + next_expected_off = state->buf_user_offset - startoff; + if (next_expected_off != entry->next_offset) return -EINVAL; - *total -= startoff; + + if (*total < entry->next_offset) + return -EINVAL; + *total -= entry->next_offset; return 0; } From 8b58905f212b4880d94b5b8ae54e5b84e311947d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Dec 2019 10:32:13 -0800 Subject: [PATCH 1979/3715] 6pack,mkiss: fix possible deadlock commit 5c9934b6767b16ba60be22ec3cbd4379ad64170d upstream. We got another syzbot report [1] that tells us we must use write_lock_irq()/write_unlock_irq() to avoid possible deadlock. [1] WARNING: inconsistent lock state 5.5.0-rc1-syzkaller #0 Not tainted -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-R} usage. syz-executor826/9605 [HC1[1]:SC0[0]:HE0:SE1] takes: ffffffff8a128718 (disc_data_lock){+-..}, at: sp_get.isra.0+0x1d/0xf0 drivers/net/ppp/ppp_synctty.c:138 {HARDIRQ-ON-W} state was registered at: lock_acquire+0x190/0x410 kernel/locking/lockdep.c:4485 __raw_write_lock_bh include/linux/rwlock_api_smp.h:203 [inline] _raw_write_lock_bh+0x33/0x50 kernel/locking/spinlock.c:319 sixpack_close+0x1d/0x250 drivers/net/hamradio/6pack.c:657 tty_ldisc_close.isra.0+0x119/0x1a0 drivers/tty/tty_ldisc.c:489 tty_set_ldisc+0x230/0x6b0 drivers/tty/tty_ldisc.c:585 tiocsetd drivers/tty/tty_io.c:2337 [inline] tty_ioctl+0xe8d/0x14f0 drivers/tty/tty_io.c:2597 vfs_ioctl fs/ioctl.c:47 [inline] file_ioctl fs/ioctl.c:545 [inline] do_vfs_ioctl+0x977/0x14e0 fs/ioctl.c:732 ksys_ioctl+0xab/0xd0 fs/ioctl.c:749 __do_sys_ioctl fs/ioctl.c:756 [inline] __se_sys_ioctl fs/ioctl.c:754 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:754 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe irq event stamp: 3946 hardirqs last enabled at (3945): [] __raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 [inline] hardirqs last enabled at (3945): [] _raw_spin_unlock_irq+0x23/0x80 kernel/locking/spinlock.c:199 hardirqs last disabled at (3946): [] trace_hardirqs_off_thunk+0x1a/0x1c arch/x86/entry/thunk_64.S:42 softirqs last enabled at (2658): [] spin_unlock_bh include/linux/spinlock.h:383 [inline] softirqs last enabled at (2658): [] clusterip_netdev_event+0x46f/0x670 net/ipv4/netfilter/ipt_CLUSTERIP.c:222 softirqs last disabled at (2656): [] spin_lock_bh include/linux/spinlock.h:343 [inline] softirqs last disabled at (2656): [] clusterip_netdev_event+0x1bb/0x670 net/ipv4/netfilter/ipt_CLUSTERIP.c:196 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(disc_data_lock); lock(disc_data_lock); *** DEADLOCK *** 5 locks held by syz-executor826/9605: #0: ffff8880a905e198 (&tty->legacy_mutex){+.+.}, at: tty_lock+0xc7/0x130 drivers/tty/tty_mutex.c:19 #1: ffffffff899a56c0 (rcu_read_lock){....}, at: mutex_spin_on_owner+0x0/0x330 kernel/locking/mutex.c:413 #2: ffff8880a496a2b0 (&(&i->lock)->rlock){-.-.}, at: spin_lock include/linux/spinlock.h:338 [inline] #2: ffff8880a496a2b0 (&(&i->lock)->rlock){-.-.}, at: serial8250_interrupt+0x2d/0x1a0 drivers/tty/serial/8250/8250_core.c:116 #3: ffffffff8c104048 (&port_lock_key){-.-.}, at: serial8250_handle_irq.part.0+0x24/0x330 drivers/tty/serial/8250/8250_port.c:1823 #4: ffff8880a905e090 (&tty->ldisc_sem){++++}, at: tty_ldisc_ref+0x22/0x90 drivers/tty/tty_ldisc.c:288 stack backtrace: CPU: 1 PID: 9605 Comm: syz-executor826 Not tainted 5.5.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_usage_bug.cold+0x327/0x378 kernel/locking/lockdep.c:3101 valid_state kernel/locking/lockdep.c:3112 [inline] mark_lock_irq kernel/locking/lockdep.c:3309 [inline] mark_lock+0xbb4/0x1220 kernel/locking/lockdep.c:3666 mark_usage kernel/locking/lockdep.c:3554 [inline] __lock_acquire+0x1e55/0x4a00 kernel/locking/lockdep.c:3909 lock_acquire+0x190/0x410 kernel/locking/lockdep.c:4485 __raw_read_lock include/linux/rwlock_api_smp.h:149 [inline] _raw_read_lock+0x32/0x50 kernel/locking/spinlock.c:223 sp_get.isra.0+0x1d/0xf0 drivers/net/ppp/ppp_synctty.c:138 sixpack_write_wakeup+0x25/0x340 drivers/net/hamradio/6pack.c:402 tty_wakeup+0xe9/0x120 drivers/tty/tty_io.c:536 tty_port_default_wakeup+0x2b/0x40 drivers/tty/tty_port.c:50 tty_port_tty_wakeup+0x57/0x70 drivers/tty/tty_port.c:387 uart_write_wakeup+0x46/0x70 drivers/tty/serial/serial_core.c:104 serial8250_tx_chars+0x495/0xaf0 drivers/tty/serial/8250/8250_port.c:1761 serial8250_handle_irq.part.0+0x2a2/0x330 drivers/tty/serial/8250/8250_port.c:1834 serial8250_handle_irq drivers/tty/serial/8250/8250_port.c:1820 [inline] serial8250_default_handle_irq+0xc0/0x150 drivers/tty/serial/8250/8250_port.c:1850 serial8250_interrupt+0xf1/0x1a0 drivers/tty/serial/8250/8250_core.c:126 __handle_irq_event_percpu+0x15d/0x970 kernel/irq/handle.c:149 handle_irq_event_percpu+0x74/0x160 kernel/irq/handle.c:189 handle_irq_event+0xa7/0x134 kernel/irq/handle.c:206 handle_edge_irq+0x25e/0x8d0 kernel/irq/chip.c:830 generic_handle_irq_desc include/linux/irqdesc.h:156 [inline] do_IRQ+0xde/0x280 arch/x86/kernel/irq.c:250 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:607 RIP: 0010:cpu_relax arch/x86/include/asm/processor.h:685 [inline] RIP: 0010:mutex_spin_on_owner+0x247/0x330 kernel/locking/mutex.c:579 Code: c3 be 08 00 00 00 4c 89 e7 e8 e5 06 59 00 4c 89 e0 48 c1 e8 03 42 80 3c 38 00 0f 85 e1 00 00 00 49 8b 04 24 a8 01 75 96 f3 90 2f fe ff ff 0f 0b e8 0d 19 09 00 84 c0 0f 85 ff fd ff ff 48 c7 RSP: 0018:ffffc90001eafa20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffd7 RAX: 0000000000000000 RBX: ffff88809fd9e0c0 RCX: 1ffffffff13266dd RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000000 RBP: ffffc90001eafa60 R08: 1ffff11013d22898 R09: ffffed1013d22899 R10: ffffed1013d22898 R11: ffff88809e9144c7 R12: ffff8880a905e138 R13: ffff88809e9144c0 R14: 0000000000000000 R15: dffffc0000000000 mutex_optimistic_spin kernel/locking/mutex.c:673 [inline] __mutex_lock_common kernel/locking/mutex.c:962 [inline] __mutex_lock+0x32b/0x13c0 kernel/locking/mutex.c:1106 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1121 tty_lock+0xc7/0x130 drivers/tty/tty_mutex.c:19 tty_release+0xb5/0xe90 drivers/tty/tty_io.c:1665 __fput+0x2ff/0x890 fs/file_table.c:280 ____fput+0x16/0x20 fs/file_table.c:313 task_work_run+0x145/0x1c0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x8e7/0x2ef0 kernel/exit.c:797 do_group_exit+0x135/0x360 kernel/exit.c:895 __do_sys_exit_group kernel/exit.c:906 [inline] __se_sys_exit_group kernel/exit.c:904 [inline] __x64_sys_exit_group+0x44/0x50 kernel/exit.c:904 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x43fef8 Code: Bad RIP value. RSP: 002b:00007ffdb07d2338 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000000000043fef8 RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 RBP: 00000000004bf730 R08: 00000000000000e7 R09: ffffffffffffffd0 R10: 00000000004002c8 R11: 0000000000000246 R12: 0000000000000001 R13: 00000000006d1180 R14: 0000000000000000 R15: 0000000000000000 Fixes: 6e4e2f811bad ("6pack,mkiss: fix lock inconsistency") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Arnd Bergmann Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/6pack.c | 4 ++-- drivers/net/hamradio/mkiss.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 021a8ec411ab..6d4742d10a78 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -665,10 +665,10 @@ static void sixpack_close(struct tty_struct *tty) { struct sixpack *sp; - write_lock_bh(&disc_data_lock); + write_lock_irq(&disc_data_lock); sp = tty->disc_data; tty->disc_data = NULL; - write_unlock_bh(&disc_data_lock); + write_unlock_irq(&disc_data_lock); if (!sp) return; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index aec6c26563cf..9fd7dab42a53 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -783,10 +783,10 @@ static void mkiss_close(struct tty_struct *tty) { struct mkiss *ax; - write_lock_bh(&disc_data_lock); + write_lock_irq(&disc_data_lock); ax = tty->disc_data; tty->disc_data = NULL; - write_unlock_bh(&disc_data_lock); + write_unlock_irq(&disc_data_lock); if (!ax) return; From ff194a90990e8186e087387c0638ffc4d621cd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Dec 2019 14:43:39 -0800 Subject: [PATCH 1980/3715] netfilter: bridge: make sure to pull arp header in br_nf_forward_arp() commit 5604285839aaedfb23ebe297799c6e558939334d upstream. syzbot is kind enough to remind us we need to call skb_may_pull() BUG: KMSAN: uninit-value in br_nf_forward_arp+0xe61/0x1230 net/bridge/br_netfilter_hooks.c:665 CPU: 1 PID: 11631 Comm: syz-executor.1 Not tainted 5.4.0-rc8-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0x128/0x220 mm/kmsan/kmsan_report.c:108 __msan_warning+0x64/0xc0 mm/kmsan/kmsan_instr.c:245 br_nf_forward_arp+0xe61/0x1230 net/bridge/br_netfilter_hooks.c:665 nf_hook_entry_hookfn include/linux/netfilter.h:135 [inline] nf_hook_slow+0x18b/0x3f0 net/netfilter/core.c:512 nf_hook include/linux/netfilter.h:260 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] __br_forward+0x78f/0xe30 net/bridge/br_forward.c:109 br_flood+0xef0/0xfe0 net/bridge/br_forward.c:234 br_handle_frame_finish+0x1a77/0x1c20 net/bridge/br_input.c:162 nf_hook_bridge_pre net/bridge/br_input.c:245 [inline] br_handle_frame+0xfb6/0x1eb0 net/bridge/br_input.c:348 __netif_receive_skb_core+0x20b9/0x51a0 net/core/dev.c:4830 __netif_receive_skb_one_core net/core/dev.c:4927 [inline] __netif_receive_skb net/core/dev.c:5043 [inline] process_backlog+0x610/0x13c0 net/core/dev.c:5874 napi_poll net/core/dev.c:6311 [inline] net_rx_action+0x7a6/0x1aa0 net/core/dev.c:6379 __do_softirq+0x4a1/0x83a kernel/softirq.c:293 do_softirq_own_stack+0x49/0x80 arch/x86/entry/entry_64.S:1091 do_softirq kernel/softirq.c:338 [inline] __local_bh_enable_ip+0x184/0x1d0 kernel/softirq.c:190 local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32 rcu_read_unlock_bh include/linux/rcupdate.h:688 [inline] __dev_queue_xmit+0x38e8/0x4200 net/core/dev.c:3819 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3825 packet_snd net/packet/af_packet.c:2959 [inline] packet_sendmsg+0x8234/0x9100 net/packet/af_packet.c:2984 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] __sys_sendto+0xc44/0xc70 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1960 __x64_sys_sendto+0x6e/0x90 net/socket.c:1960 do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45a679 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f0a3c9e5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 000000000045a679 RDX: 000000000000000e RSI: 0000000020000200 RDI: 0000000000000003 RBP: 000000000075bf20 R08: 00000000200000c0 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f0a3c9e66d4 R13: 00000000004c8ec1 R14: 00000000004dfe28 R15: 00000000ffffffff Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:149 [inline] kmsan_internal_poison_shadow+0x5c/0x110 mm/kmsan/kmsan.c:132 kmsan_slab_alloc+0x97/0x100 mm/kmsan/kmsan_hooks.c:86 slab_alloc_node mm/slub.c:2773 [inline] __kmalloc_node_track_caller+0xe27/0x11a0 mm/slub.c:4381 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x306/0xa10 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] alloc_skb_with_frags+0x18c/0xa80 net/core/skbuff.c:5662 sock_alloc_send_pskb+0xafd/0x10a0 net/core/sock.c:2244 packet_alloc_skb net/packet/af_packet.c:2807 [inline] packet_snd net/packet/af_packet.c:2902 [inline] packet_sendmsg+0x63a6/0x9100 net/packet/af_packet.c:2984 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] __sys_sendto+0xc44/0xc70 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1960 __x64_sys_sendto+0x6e/0x90 net/socket.c:1960 do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c4e70a87d975 ("netfilter: bridge: rename br_netfilter.c to br_netfilter_hooks.c") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netfilter_hooks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 89936e0d55c9..6feab2279143 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -643,6 +643,9 @@ static unsigned int br_nf_forward_arp(void *priv, nf_bridge_pull_encap_header(skb); } + if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) + return NF_DROP; + if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) nf_bridge_push_encap_header(skb); From 43b6375db5c47b0117d78525cbce4fdc26259bc8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 10:30:42 -0800 Subject: [PATCH 1981/3715] inetpeer: fix data-race in inet_putpeer / inet_putpeer commit 71685eb4ce80ae9c49eff82ca4dd15acab215de9 upstream. We need to explicitely forbid read/store tearing in inet_peer_gc() and inet_putpeer(). The following syzbot report reminds us about inet_putpeer() running without a lock held. BUG: KCSAN: data-race in inet_putpeer / inet_putpeer write to 0xffff888121fb2ed0 of 4 bytes by interrupt on cpu 0: inet_putpeer+0x37/0xa0 net/ipv4/inetpeer.c:240 ip4_frag_free+0x3d/0x50 net/ipv4/ip_fragment.c:102 inet_frag_destroy_rcu+0x58/0x80 net/ipv4/inet_fragment.c:228 __rcu_reclaim kernel/rcu/rcu.h:222 [inline] rcu_do_batch+0x256/0x5b0 kernel/rcu/tree.c:2157 rcu_core+0x369/0x4d0 kernel/rcu/tree.c:2377 rcu_core_si+0x12/0x20 kernel/rcu/tree.c:2386 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 write to 0xffff888121fb2ed0 of 4 bytes by interrupt on cpu 1: inet_putpeer+0x37/0xa0 net/ipv4/inetpeer.c:240 ip4_frag_free+0x3d/0x50 net/ipv4/ip_fragment.c:102 inet_frag_destroy_rcu+0x58/0x80 net/ipv4/inet_fragment.c:228 __rcu_reclaim kernel/rcu/rcu.h:222 [inline] rcu_do_batch+0x256/0x5b0 kernel/rcu/tree.c:2157 rcu_core+0x369/0x4d0 kernel/rcu/tree.c:2377 rcu_core_si+0x12/0x20 kernel/rcu/tree.c:2386 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 4b9d9be839fd ("inetpeer: remove unused list") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inetpeer.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f9cef27907ed..f94881412d5b 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -159,7 +159,12 @@ static void inet_peer_gc(struct inet_peer_base *base, base->total / inet_peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; - delta = (__u32)jiffies - p->dtime; + + /* The READ_ONCE() pairs with the WRITE_ONCE() + * in inet_putpeer() + */ + delta = (__u32)jiffies - READ_ONCE(p->dtime); + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) gc_stack[i] = NULL; } @@ -236,7 +241,10 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { - p->dtime = (__u32)jiffies; + /* The WRITE_ONCE() pairs with itself (we run lockless) + * and the READ_ONCE() in inet_peer_gc() + */ + WRITE_ONCE(p->dtime, (__u32)jiffies); if (refcount_dec_and_test(&p->refcnt)) call_rcu(&p->rcu, inetpeer_free_rcu); From 80e6b3268f7a74e34c8919defc825d600dd6ae54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 18:49:43 -0800 Subject: [PATCH 1982/3715] net: add a READ_ONCE() in skb_peek_tail() commit f8cc62ca3e660ae3fdaee533b1d554297cd2ae82 upstream. skb_peek_tail() can be used without protection of a lock, as spotted by KCSAN [1] In order to avoid load-stearing, add a READ_ONCE() Note that the corresponding WRITE_ONCE() are already there. [1] BUG: KCSAN: data-race in sk_wait_data / skb_queue_tail read to 0xffff8880b36a4118 of 8 bytes by task 20426 on cpu 1: skb_peek_tail include/linux/skbuff.h:1784 [inline] sk_wait_data+0x15b/0x250 net/core/sock.c:2477 kcm_wait_data+0x112/0x1f0 net/kcm/kcmsock.c:1103 kcm_recvmsg+0xac/0x320 net/kcm/kcmsock.c:1130 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 write to 0xffff8880b36a4118 of 8 bytes by task 451 on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] skb_queue_tail+0x7e/0xc0 net/core/skbuff.c:3145 kcm_queue_rcv_skb+0x202/0x310 net/kcm/kcmsock.c:206 kcm_rcv_strparser+0x74/0x4b0 net/kcm/kcmsock.c:370 __strp_recv+0x348/0xf50 net/strparser/strparser.c:309 strp_recv+0x84/0xa0 net/strparser/strparser.c:343 tcp_read_sock+0x174/0x5c0 net/ipv4/tcp.c:1639 strp_read_sock+0xd4/0x140 net/strparser/strparser.c:366 do_strp_work net/strparser/strparser.c:414 [inline] strp_work+0x9a/0xe0 net/strparser/strparser.c:423 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 451 Comm: kworker/u4:3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: kstrp strp_work Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ec00d9264e5c..a9a764a17c28 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1655,7 +1655,7 @@ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { - struct sk_buff *skb = list_->prev; + struct sk_buff *skb = READ_ONCE(list_->prev); if (skb == (struct sk_buff *)list_) skb = NULL; @@ -1723,7 +1723,9 @@ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { - /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */ + /* See skb_queue_empty_lockless() and skb_peek_tail() + * for the opposite READ_ONCE() + */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); From 6b307f5c03e298d85c6ed45e91925066ef6fc834 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Nov 2019 10:34:47 -0800 Subject: [PATCH 1983/3715] net: icmp: fix data-race in cmp_global_allow() commit bbab7ef235031f6733b5429ae7877bfa22339712 upstream. This code reads two global variables without protection of a lock. We need READ_ONCE()/WRITE_ONCE() pairs to avoid load/store-tearing and better document the intent. KCSAN reported : BUG: KCSAN: data-race in icmp_global_allow / icmp_global_allow read to 0xffffffff861a8014 of 4 bytes by task 11201 on cpu 0: icmp_global_allow+0x36/0x1b0 net/ipv4/icmp.c:254 icmpv6_global_allow net/ipv6/icmp.c:184 [inline] icmpv6_global_allow net/ipv6/icmp.c:179 [inline] icmp6_send+0x493/0x1140 net/ipv6/icmp.c:514 icmpv6_send+0x71/0xb0 net/ipv6/ip6_icmp.c:43 ip6_link_failure+0x43/0x180 net/ipv6/route.c:2640 dst_link_failure include/net/dst.h:419 [inline] vti_xmit net/ipv4/ip_vti.c:243 [inline] vti_tunnel_xmit+0x27f/0xa50 net/ipv4/ip_vti.c:279 __netdev_start_xmit include/linux/netdevice.h:4420 [inline] netdev_start_xmit include/linux/netdevice.h:4434 [inline] xmit_one net/core/dev.c:3280 [inline] dev_hard_start_xmit+0xef/0x430 net/core/dev.c:3296 __dev_queue_xmit+0x14c9/0x1b60 net/core/dev.c:3873 dev_queue_xmit+0x21/0x30 net/core/dev.c:3906 neigh_direct_output+0x1f/0x30 net/core/neighbour.c:1530 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a6/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179 write to 0xffffffff861a8014 of 4 bytes by task 11183 on cpu 1: icmp_global_allow+0x174/0x1b0 net/ipv4/icmp.c:272 icmpv6_global_allow net/ipv6/icmp.c:184 [inline] icmpv6_global_allow net/ipv6/icmp.c:179 [inline] icmp6_send+0x493/0x1140 net/ipv6/icmp.c:514 icmpv6_send+0x71/0xb0 net/ipv6/ip6_icmp.c:43 ip6_link_failure+0x43/0x180 net/ipv6/route.c:2640 dst_link_failure include/net/dst.h:419 [inline] vti_xmit net/ipv4/ip_vti.c:243 [inline] vti_tunnel_xmit+0x27f/0xa50 net/ipv4/ip_vti.c:279 __netdev_start_xmit include/linux/netdevice.h:4420 [inline] netdev_start_xmit include/linux/netdevice.h:4434 [inline] xmit_one net/core/dev.c:3280 [inline] dev_hard_start_xmit+0xef/0x430 net/core/dev.c:3296 __dev_queue_xmit+0x14c9/0x1b60 net/core/dev.c:3873 dev_queue_xmit+0x21/0x30 net/core/dev.c:3906 neigh_direct_output+0x1f/0x30 net/core/neighbour.c:1530 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a6/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 11183 Comm: syz-executor.2 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/icmp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f9d790b058d2..995ef3d23368 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -254,10 +254,11 @@ bool icmp_global_allow(void) bool rc = false; /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. + * without taking the spinlock. The READ_ONCE() are paired + * with the following WRITE_ONCE() in this same function. */ - if (!icmp_global.credit) { - delta = min_t(u32, now - icmp_global.stamp, HZ); + if (!READ_ONCE(icmp_global.credit)) { + delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); if (delta < HZ / 50) return false; } @@ -267,14 +268,14 @@ bool icmp_global_allow(void) if (delta >= HZ / 50) { incr = sysctl_icmp_msgs_per_sec * delta / HZ ; if (incr) - icmp_global.stamp = now; + WRITE_ONCE(icmp_global.stamp, now); } credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); if (credit) { credit--; rc = true; } - icmp_global.credit = credit; + WRITE_ONCE(icmp_global.credit, credit); spin_unlock(&icmp_global.lock); return rc; } From a51afeedc6e9980d83b6a28b25b984dd83ac77f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2019 09:48:04 -0800 Subject: [PATCH 1984/3715] hrtimer: Annotate lockless access to timer->state commit 56144737e67329c9aaed15f942d46a6302e2e3d8 upstream. syzbot reported various data-race caused by hrtimer_is_queued() reading timer->state. A READ_ONCE() is required there to silence the warning. Also add the corresponding WRITE_ONCE() when timer->state is set. In remove_hrtimer() the hrtimer_is_queued() helper is open coded to avoid loading timer->state twice. KCSAN reported these cases: BUG: KCSAN: data-race in __remove_hrtimer / tcp_pacing_check write to 0xffff8880b2a7d388 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff8880b2a7d388 of 1 bytes by task 24652 on cpu 1: tcp_pacing_check net/ipv4/tcp_output.c:2235 [inline] tcp_pacing_check+0xba/0x130 net/ipv4/tcp_output.c:2225 tcp_xmit_retransmit_queue+0x32c/0x5a0 net/ipv4/tcp_output.c:3044 tcp_xmit_recovery+0x7c/0x120 net/ipv4/tcp_input.c:3558 tcp_ack+0x17b6/0x3170 net/ipv4/tcp_input.c:3717 tcp_rcv_established+0x37e/0xf50 net/ipv4/tcp_input.c:5696 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 BUG: KCSAN: data-race in __remove_hrtimer / __tcp_ack_snd_check write to 0xffff8880a3a65588 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 read to 0xffff8880a3a65588 of 1 bytes by task 22891 on cpu 1: __tcp_ack_snd_check+0x415/0x4f0 net/ipv4/tcp_input.c:5265 tcp_ack_snd_check net/ipv4/tcp_input.c:5287 [inline] tcp_rcv_established+0x750/0xf50 net/ipv4/tcp_input.c:5708 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 __sys_sendto+0x21f/0x320 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto net/socket.c:1960 [inline] __x64_sys_sendto+0x89/0xb0 net/socket.c:1960 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 24652 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ tglx: Added comments ] Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191106174804.74723-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hrtimer.h | 14 ++++++++++---- kernel/time/hrtimer.c | 11 +++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 012c37fdb688..5511dc963dd5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -408,12 +408,18 @@ extern u64 hrtimer_get_next_event(void); extern bool hrtimer_active(const struct hrtimer *timer); -/* - * Helper function to check, whether the timer is on one of the queues +/** + * hrtimer_is_queued = check, whether the timer is on one of the queues + * @timer: Timer to check + * + * Returns: True if the timer is queued, false otherwise + * + * The function can be used lockless, but it gives only a current snapshot. */ -static inline int hrtimer_is_queued(struct hrtimer *timer) +static inline bool hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_ENQUEUED; + /* The READ_ONCE pairs with the update functions of timer->state */ + return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d00e85ac10d6..ecce9122343b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -845,7 +845,8 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); return timerqueue_add(&base->active, &timer->node); } @@ -867,7 +868,8 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_cpu_base *cpu_base = base->cpu_base; u8 state = timer->state; - timer->state = newstate; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, newstate); if (!(state & HRTIMER_STATE_ENQUEUED)) return; @@ -894,8 +896,9 @@ static void __remove_hrtimer(struct hrtimer *timer, static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { - if (hrtimer_is_queued(timer)) { - u8 state = timer->state; + u8 state = timer->state; + + if (state & HRTIMER_STATE_ENQUEUED) { int reprogram; /* From a48be2b29a08531d1dc6b6c51791704312a482c6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 9 Dec 2019 15:27:27 +0000 Subject: [PATCH 1985/3715] spi: fsl: don't map irq during probe [ Upstream commit 3194d2533efffae8b815d84729ecc58b6a9000ab ] With lastest kernel, the following warning is observed at startup: [ 1.500609] ------------[ cut here ]------------ [ 1.505225] remove_proc_entry: removing non-empty directory 'irq/22', leaking at least 'fsl_spi' [ 1.514234] WARNING: CPU: 0 PID: 1 at fs/proc/generic.c:682 remove_proc_entry+0x198/0x1c0 [ 1.522403] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-s3k-dev-02248-g93532430a4ff #2564 [ 1.530724] NIP: c0197694 LR: c0197694 CTR: c0050d80 [ 1.535762] REGS: df4a5af0 TRAP: 0700 Not tainted (5.4.0-02248-g93532430a4ff) [ 1.543818] MSR: 00029032 CR: 22028222 XER: 00000000 [ 1.550524] [ 1.550524] GPR00: c0197694 df4a5ba8 df4a0000 00000054 00000000 00000000 00004a38 00000010 [ 1.550524] GPR08: c07c5a30 00000800 00000000 00001032 22000208 00000000 c0004b14 00000000 [ 1.550524] GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 c0830000 c07fc078 [ 1.550524] GPR24: c08e8ca0 df665d10 df60ea98 c07c9db8 00000001 df5d5ae3 df5d5a80 df43f8e3 [ 1.585327] NIP [c0197694] remove_proc_entry+0x198/0x1c0 [ 1.590628] LR [c0197694] remove_proc_entry+0x198/0x1c0 [ 1.595829] Call Trace: [ 1.598280] [df4a5ba8] [c0197694] remove_proc_entry+0x198/0x1c0 (unreliable) [ 1.605321] [df4a5bd8] [c0067acc] unregister_irq_proc+0x5c/0x70 [ 1.611238] [df4a5bf8] [c005fbc4] free_desc+0x3c/0x80 [ 1.616286] [df4a5c18] [c005fe2c] irq_free_descs+0x70/0xa8 [ 1.621778] [df4a5c38] [c033d3fc] of_fsl_spi_probe+0xdc/0x3cc [ 1.627525] [df4a5c88] [c02f0f64] platform_drv_probe+0x44/0xa4 [ 1.633350] [df4a5c98] [c02eee44] really_probe+0x1ac/0x418 [ 1.638829] [df4a5cc8] [c02ed3e8] bus_for_each_drv+0x64/0xb0 [ 1.644481] [df4a5cf8] [c02ef950] __device_attach+0xd4/0x128 [ 1.650132] [df4a5d28] [c02ed61c] bus_probe_device+0xa0/0xbc [ 1.655783] [df4a5d48] [c02ebbe8] device_add+0x544/0x74c [ 1.661096] [df4a5d88] [c0382b78] of_platform_device_create_pdata+0xa4/0x100 [ 1.668131] [df4a5da8] [c0382cf4] of_platform_bus_create+0x120/0x20c [ 1.674474] [df4a5df8] [c0382d50] of_platform_bus_create+0x17c/0x20c [ 1.680818] [df4a5e48] [c0382e88] of_platform_bus_probe+0x9c/0xf0 [ 1.686907] [df4a5e68] [c0751404] __machine_initcall_cmpcpro_cmpcpro_declare_of_platform_devices+0x74/0x1a4 [ 1.696629] [df4a5e98] [c072a4cc] do_one_initcall+0x8c/0x1d4 [ 1.702282] [df4a5ef8] [c072a768] kernel_init_freeable+0x154/0x204 [ 1.708455] [df4a5f28] [c0004b2c] kernel_init+0x18/0x110 [ 1.713769] [df4a5f38] [c00122ac] ret_from_kernel_thread+0x14/0x1c [ 1.719926] Instruction dump: [ 1.722889] 2c030000 4182004c 3863ffb0 3c80c05f 80e3005c 388436a0 3c60c06d 7fa6eb78 [ 1.730630] 7fe5fb78 38840280 38634178 4be8c611 <0fe00000> 4bffff6c 3c60c071 7fe4fb78 [ 1.738556] ---[ end trace 05d0720bf2e352e2 ]--- The problem comes from the error path which calls irq_dispose_mapping() while the IRQ has been requested with devm_request_irq(). IRQ doesn't need to be mapped with irq_of_parse_and_map(). The only need is to get the IRQ virtual number. For that, use of_irq_to_resource() instead of the irq_of_parse_and_map()/irq_dispose_mapping() pair. Fixes: 500a32abaf81 ("spi: fsl: Call irq_dispose_mapping in err path") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/518cfb83347d5372748e7fe72f94e2e9443d0d4a.1575905123.git.christophe.leroy@c-s.fr Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-spi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 8f2e97857e8b..fb34ba3f2b23 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -832,8 +832,8 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) if (ret) goto err; - irq = irq_of_parse_and_map(np, 0); - if (!irq) { + irq = of_irq_to_resource(np, 0, NULL); + if (irq <= 0) { ret = -EINVAL; goto err; } @@ -847,7 +847,6 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) return 0; err: - irq_dispose_mapping(irq); if (type == TYPE_FSL) of_fsl_spi_free_chipselects(dev); return ret; From 52a6ba0b4a6efb04557c64e5c90bcd1b97c1238e Mon Sep 17 00:00:00 2001 From: David Engraf Date: Mon, 16 Dec 2019 09:54:03 +0100 Subject: [PATCH 1986/3715] tty/serial: atmel: fix out of range clock divider handling [ Upstream commit cb47b9f8630ae3fa3f5fbd0c7003faba7abdf711 ] Use MCK_DIV8 when the clock divider is > 65535. Unfortunately the mode register was already written thus the clock selection is ignored. Fix by doing the baud rate calulation before setting the mode. Fixes: 5bf5635ac170 ("tty/serial: atmel: add fractional baud rate support") Signed-off-by: David Engraf Acked-by: Ludovic Desroches Acked-by: Richard Genoud Cc: stable Link: https://lore.kernel.org/r/20191216085403.17050-1-david.engraf@sysgo.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/atmel_serial.c | 43 ++++++++++++++++--------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 9ee41ba0e55b..367ce812743e 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2183,27 +2183,6 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, mode |= ATMEL_US_USMODE_NORMAL; } - /* set the mode, clock divisor, parity, stop bits and data size */ - atmel_uart_writel(port, ATMEL_US_MR, mode); - - /* - * when switching the mode, set the RTS line state according to the - * new mode, otherwise keep the former state - */ - if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) { - unsigned int rts_state; - - if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) { - /* let the hardware control the RTS line */ - rts_state = ATMEL_US_RTSDIS; - } else { - /* force RTS line to low level */ - rts_state = ATMEL_US_RTSEN; - } - - atmel_uart_writel(port, ATMEL_US_CR, rts_state); - } - /* * Set the baud rate: * Fractional baudrate allows to setup output frequency more @@ -2229,6 +2208,28 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, quot = cd | fp << ATMEL_US_FP_OFFSET; atmel_uart_writel(port, ATMEL_US_BRGR, quot); + + /* set the mode, clock divisor, parity, stop bits and data size */ + atmel_uart_writel(port, ATMEL_US_MR, mode); + + /* + * when switching the mode, set the RTS line state according to the + * new mode, otherwise keep the former state + */ + if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) { + unsigned int rts_state; + + if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) { + /* let the hardware control the RTS line */ + rts_state = ATMEL_US_RTSDIS; + } else { + /* force RTS line to low level */ + rts_state = ATMEL_US_RTSEN; + } + + atmel_uart_writel(port, ATMEL_US_CR, rts_state); + } + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA | ATMEL_US_RSTRX); atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN | ATMEL_US_RXEN); From a6d311f2e79847424ded55f81f60fc5af1c1117d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 19 Nov 2019 16:46:41 +0100 Subject: [PATCH 1987/3715] pinctrl: baytrail: Really serialize all register accesses [ Upstream commit 40ecab551232972a39cdd8b6f17ede54a3fdb296 ] Commit 39ce8150a079 ("pinctrl: baytrail: Serialize all register access") added a spinlock around all register accesses because: "There is a hardware issue in Intel Baytrail where concurrent GPIO register access might result reads of 0xffffffff and writes might get dropped completely." Testing has shown that this does not catch all cases, there are still 2 problems remaining 1) The original fix uses a spinlock per byt_gpio device / struct, additional testing has shown that this is not sufficient concurent accesses to 2 different GPIO banks also suffer from the same problem. This commit fixes this by moving to a single global lock. 2) The original fix did not add a lock around the register accesses in the suspend/resume handling. Since pinctrl-baytrail.c is using normal suspend/resume handlers, interrupts are still enabled during suspend/resume handling. Nothing should be using the GPIOs when they are being taken down, _but_ the GPIOs themselves may still cause interrupts, which are likely to use (read) the triggering GPIO. So we need to protect against concurrent GPIO register accesses in the suspend/resume handlers too. This commit fixes this by adding the missing spin_lock / unlock calls. The 2 fixes together fix the Acer Switch 10 SW5-012 getting completely confused after a suspend resume. The DSDT for this device has a bug in its _LID method which reprograms the home and power button trigger- flags requesting both high and low _level_ interrupts so the IRQs for these 2 GPIOs continuously fire. This combined with the saving of registers during suspend, triggers concurrent GPIO register accesses resulting in saving 0xffffffff as pconf0 value during suspend and then when restoring this on resume the pinmux settings get all messed up, resulting in various I2C busses being stuck, the wifi no longer working and often the tablet simply not coming out of suspend at all. Cc: stable@vger.kernel.org Fixes: 39ce8150a079 ("pinctrl: baytrail: Serialize all register access") Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-baytrail.c | 81 +++++++++++++----------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index beeb7cbb5015..9df5d29d708d 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -204,7 +204,6 @@ struct byt_gpio { struct platform_device *pdev; struct pinctrl_dev *pctl_dev; struct pinctrl_desc pctl_desc; - raw_spinlock_t lock; const struct byt_pinctrl_soc_data *soc_data; struct byt_community *communities_copy; struct byt_gpio_pin_context *saved_context; @@ -715,6 +714,8 @@ static const struct byt_pinctrl_soc_data *byt_soc_data[] = { NULL, }; +static DEFINE_RAW_SPINLOCK(byt_lock); + static struct byt_community *byt_get_community(struct byt_gpio *vg, unsigned int pin) { @@ -856,7 +857,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg, unsigned long flags; int i; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); for (i = 0; i < group.npins; i++) { void __iomem *padcfg0; @@ -876,7 +877,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg, writel(value, padcfg0); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static void byt_set_group_mixed_mux(struct byt_gpio *vg, @@ -886,7 +887,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg, unsigned long flags; int i; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); for (i = 0; i < group.npins; i++) { void __iomem *padcfg0; @@ -906,7 +907,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg, writel(value, padcfg0); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, @@ -955,11 +956,11 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) unsigned long flags; u32 value; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); writel(value, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, @@ -971,7 +972,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, u32 value, gpio_mux; unsigned long flags; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); /* * In most cases, func pin mux 000 means GPIO function. @@ -993,7 +994,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, "pin %u forcibly re-configured as GPIO\n", offset); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); pm_runtime_get(&vg->pdev->dev); @@ -1021,7 +1022,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, unsigned long flags; u32 value; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(val_reg); value &= ~BYT_DIR_MASK; @@ -1038,7 +1039,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, "Potential Error: Setting GPIO with direct_irq_en to output"); writel(value, val_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1107,11 +1108,11 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, u32 conf, pull, val, debounce; u16 arg = 0; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); conf = readl(conf_reg); pull = conf & BYT_PULL_ASSIGN_MASK; val = readl(val_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); switch (param) { case PIN_CONFIG_BIAS_DISABLE: @@ -1138,9 +1139,9 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, if (!(conf & BYT_DEBOUNCE_EN)) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); debounce = readl(db_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); switch (debounce & BYT_DEBOUNCE_PULSE_MASK) { case BYT_DEBOUNCE_PULSE_375US: @@ -1192,7 +1193,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, u32 conf, val, debounce; int i, ret = 0; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); conf = readl(conf_reg); val = readl(val_reg); @@ -1300,7 +1301,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, if (!ret) writel(conf, conf_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return ret; } @@ -1325,9 +1326,9 @@ static int byt_gpio_get(struct gpio_chip *chip, unsigned offset) unsigned long flags; u32 val; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); val = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return !!(val & BYT_LEVEL); } @@ -1342,13 +1343,13 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) if (!reg) return; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); old_val = readl(reg); if (value) writel(old_val | BYT_LEVEL, reg); else writel(old_val & ~BYT_LEVEL, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -1361,9 +1362,9 @@ static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) if (!reg) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); if (!(value & BYT_OUTPUT_EN)) return GPIOF_DIR_OUT; @@ -1406,14 +1407,14 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) const char *label; unsigned int pin; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); pin = vg->soc_data->pins[i].number; reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG); if (!reg) { seq_printf(s, "Could not retrieve pin %i conf0 reg\n", pin); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); continue; } conf0 = readl(reg); @@ -1422,11 +1423,11 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) if (!reg) { seq_printf(s, "Could not retrieve pin %i val reg\n", pin); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); continue; } val = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); comm = byt_get_community(vg, pin); if (!comm) { @@ -1510,9 +1511,9 @@ static void byt_irq_ack(struct irq_data *d) if (!reg) return; - raw_spin_lock(&vg->lock); + raw_spin_lock(&byt_lock); writel(BIT(offset % 32), reg); - raw_spin_unlock(&vg->lock); + raw_spin_unlock(&byt_lock); } static void byt_irq_mask(struct irq_data *d) @@ -1536,7 +1537,7 @@ static void byt_irq_unmask(struct irq_data *d) if (!reg) return; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); switch (irqd_get_trigger_type(d)) { @@ -1557,7 +1558,7 @@ static void byt_irq_unmask(struct irq_data *d) writel(value, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_irq_type(struct irq_data *d, unsigned int type) @@ -1571,7 +1572,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type) if (!reg || offset >= vg->chip.ngpio) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); WARN(value & BYT_DIRECT_IRQ_EN, @@ -1593,7 +1594,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type) else if (type & IRQ_TYPE_LEVEL_MASK) irq_set_handler_locked(d, handle_level_irq); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1629,9 +1630,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) continue; } - raw_spin_lock(&vg->lock); + raw_spin_lock(&byt_lock); pending = readl(reg); - raw_spin_unlock(&vg->lock); + raw_spin_unlock(&byt_lock); for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); @@ -1833,8 +1834,6 @@ static int byt_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(vg->pctl_dev); } - raw_spin_lock_init(&vg->lock); - ret = byt_gpio_probe(vg); if (ret) return ret; @@ -1850,8 +1849,11 @@ static int byt_gpio_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct byt_gpio *vg = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&byt_lock, flags); + for (i = 0; i < vg->soc_data->npins; i++) { void __iomem *reg; u32 value; @@ -1872,6 +1874,7 @@ static int byt_gpio_suspend(struct device *dev) vg->saved_context[i].val = value; } + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1879,8 +1882,11 @@ static int byt_gpio_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct byt_gpio *vg = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&byt_lock, flags); + for (i = 0; i < vg->soc_data->npins; i++) { void __iomem *reg; u32 value; @@ -1918,6 +1924,7 @@ static int byt_gpio_resume(struct device *dev) } } + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } #endif From a6585c5b85341072f861f1b15e6378ddaf3c27e1 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 10 Dec 2019 11:27:44 +0000 Subject: [PATCH 1988/3715] net: ena: fix napi handler misbehavior when the napi budget is zero [ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ] In netpoll the napi handler could be called with budget equal to zero. Current ENA napi handler doesn't take that into consideration. The napi handler handles Rx packets in a do-while loop. Currently, the budget check happens only after decrementing the budget, therefore the napi handler, in rare cases, could run over MAX_INT packets. In addition to that, this moves all budget related variables to int calculation and stop mixing u32 to avoid ambiguity Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index db6f6a877f63..d22b138c2b09 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1196,8 +1196,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget) struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - u32 tx_work_done; - u32 rx_work_done; + int tx_work_done; + int rx_work_done = 0; int tx_budget; int napi_comp_call = 0; int ret; @@ -1214,7 +1214,11 @@ static int ena_io_poll(struct napi_struct *napi, int budget) } tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); - rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + /* On netpoll the budget is zero and the handler should only clean the + * tx completions. + */ + if (likely(budget)) + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); /* If the device is about to reset or down, avoid unmask * the interrupt and return 0 so NAPI won't reschedule From b8ae180a2500819f4d19b620f112a1386ef4cd3c Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Thu, 26 Dec 2019 10:41:56 +0200 Subject: [PATCH 1989/3715] net/mlxfw: Fix out-of-memory error in mfa2 flash burning [ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ] The burning process requires to perform internal allocations of large chunks of memory. This memory doesn't need to be contiguous and can be safely allocated by vzalloc() instead of kzalloc(). This patch changes such allocation to avoid possible out-of-memory failure. Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process") Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Aya Levin Signed-off-by: Leon Romanovsky Tested-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 993cb5ba934e..b99169a386eb 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "mlxfw_mfa2.h" #include "mlxfw_mfa2_file.h" @@ -579,7 +580,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_size = be32_to_cpu(comp->size); comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; - comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); + comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); if (!comp_data) return ERR_PTR(-ENOMEM); comp_data->comp.data_size = comp_size; @@ -601,7 +602,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; return &comp_data->comp; err_out: - kfree(comp_data); + vfree(comp_data); return ERR_PTR(err); } @@ -610,7 +611,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) const struct mlxfw_mfa2_comp_data *comp_data; comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); - kfree(comp_data); + vfree(comp_data); } void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) From 2dece4d6d13fe179ee3a5991811712725a56e2f7 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Fri, 27 Dec 2019 03:26:27 +0100 Subject: [PATCH 1990/3715] ptp: fix the race between the release of ptp_clock and cdev [ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ] In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston Analyzed-by: Vern Lovejoy Signed-off-by: Vladis Dronov Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_clock.c | 31 ++++++++++++++----------------- drivers/ptp/ptp_private.h | 2 +- include/linux/posix-clock.h | 19 +++++++++++-------- kernel/time/posix-clock.c | 31 +++++++++++++------------------ 4 files changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index c64903a5978f..b818f65480c1 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -175,9 +175,9 @@ static struct posix_clock_operations ptp_clock_ops = { .read = ptp_read, }; -static void delete_ptp_clock(struct posix_clock *pc) +static void ptp_clock_release(struct device *dev) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); @@ -222,7 +222,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } ptp->clock.ops = ptp_clock_ops; - ptp->clock.release = delete_ptp_clock; ptp->info = info; ptp->devid = MKDEV(major, index); ptp->index = index; @@ -249,15 +248,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (err) goto no_pin_groups; - /* Create a new device in our class. */ - ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, - ptp, ptp->pin_attr_groups, - "ptp%d", ptp->index); - if (IS_ERR(ptp->dev)) { - err = PTR_ERR(ptp->dev); - goto no_device; - } - /* Register a new PPS source. */ if (info->pps) { struct pps_source_info pps; @@ -273,8 +263,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } } - /* Create a posix clock. */ - err = posix_clock_register(&ptp->clock, ptp->devid); + /* Initialize a new device of our class in our clock structure. */ + device_initialize(&ptp->dev); + ptp->dev.devt = ptp->devid; + ptp->dev.class = ptp_class; + ptp->dev.parent = parent; + ptp->dev.groups = ptp->pin_attr_groups; + ptp->dev.release = ptp_clock_release; + dev_set_drvdata(&ptp->dev, ptp); + dev_set_name(&ptp->dev, "ptp%d", ptp->index); + + /* Create a posix clock and link it to the device. */ + err = posix_clock_register(&ptp->clock, &ptp->dev); if (err) { pr_err("failed to create posix clock\n"); goto no_clock; @@ -286,8 +286,6 @@ no_clock: if (ptp->pps_source) pps_unregister_source(ptp->pps_source); no_pps: - device_destroy(ptp_class, ptp->devid); -no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: if (ptp->kworker) @@ -317,7 +315,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp) if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - device_destroy(ptp_class, ptp->devid); ptp_cleanup_pin_groups(ptp); posix_clock_unregister(&ptp->clock); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index b86f1bfecd6f..45ed9e172bb4 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -41,7 +41,7 @@ struct timestamp_event_queue { struct ptp_clock { struct posix_clock clock; - struct device *dev; + struct device dev; struct ptp_clock_info *info; dev_t devid; int index; /* index into clocks.map */ diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 38d8225510f1..3097b08c55cb 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -82,29 +82,32 @@ struct posix_clock_operations { * * @ops: Functional interface to the clock * @cdev: Character device instance for this clock - * @kref: Reference count. + * @dev: Pointer to the clock's device. * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. - * @release: A function to free the structure when the reference count reaches - * zero. May be NULL if structure is statically allocated. * * Drivers should embed their struct posix_clock within a private * structure, obtaining a reference to it during callbacks using * container_of(). + * + * Drivers should supply an initialized but not exposed struct device + * to posix_clock_register(). It is used to manage lifetime of the + * driver's private structure. It's 'release' field should be set to + * a release function for this private structure. */ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; - struct kref kref; + struct device *dev; struct rw_semaphore rwsem; bool zombie; - void (*release)(struct posix_clock *clk); }; /** * posix_clock_register() - register a new clock - * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' - * @devid: Allocated device id + * @clk: Pointer to the clock. Caller must provide 'ops' field + * @dev: Pointer to the initialized device. Caller must provide + * 'release' field * * A clock driver calls this function to register itself with the * clock device subsystem. If 'clk' points to dynamically allocated @@ -113,7 +116,7 @@ struct posix_clock { * * Returns zero on success, non-zero otherwise. */ -int posix_clock_register(struct posix_clock *clk, dev_t devid); +int posix_clock_register(struct posix_clock *clk, struct device *dev); /** * posix_clock_unregister() - unregister a clock diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 17cdc554c9fe..e5706a826c1f 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -27,8 +27,6 @@ #include "posix-timers.h" -static void delete_clock(struct kref *kref); - /* * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. */ @@ -138,7 +136,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = 0; if (!err) { - kref_get(&clk->kref); + get_device(clk->dev); fp->private_data = clk; } out: @@ -154,7 +152,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp) if (clk->ops.release) err = clk->ops.release(clk); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); fp->private_data = NULL; @@ -174,38 +172,35 @@ static const struct file_operations posix_clock_file_operations = { #endif }; -int posix_clock_register(struct posix_clock *clk, dev_t devid) +int posix_clock_register(struct posix_clock *clk, struct device *dev) { int err; - kref_init(&clk->kref); init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); + err = cdev_device_add(&clk->cdev, dev); + if (err) { + pr_err("%s unable to add device %d:%d\n", + dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); + return err; + } clk->cdev.owner = clk->ops.owner; - err = cdev_add(&clk->cdev, devid, 1); + clk->dev = dev; - return err; + return 0; } EXPORT_SYMBOL_GPL(posix_clock_register); -static void delete_clock(struct kref *kref) -{ - struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - - if (clk->release) - clk->release(clk); -} - void posix_clock_unregister(struct posix_clock *clk) { - cdev_del(&clk->cdev); + cdev_device_del(&clk->cdev, clk->dev); down_write(&clk->rwsem); clk->zombie = true; up_write(&clk->rwsem); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); } EXPORT_SYMBOL_GPL(posix_clock_unregister); From 343f3056b542cf9c64c18c43a764752067887b14 Mon Sep 17 00:00:00 2001 From: Antonio Messina Date: Thu, 19 Dec 2019 15:08:03 +0100 Subject: [PATCH 1991/3715] udp: fix integer overflow while computing available space in sk_rcvbuf [ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ] When the size of the receive buffer for a socket is close to 2^31 when computing if we have enough space in the buffer to copy a packet from the queue to the buffer we might hit an integer overflow. When an user set net.core.rmem_default to a value close to 2^31 UDP packets are dropped because of this overflow. This can be visible, for instance, with failure to resolve hostnames. This can be fixed by casting sk_rcvbuf (which is an int) to unsigned int, similarly to how it is done in TCP. Signed-off-by: Antonio Messina Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ab3f272a0884..e33258d69246 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1338,7 +1338,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * queue contains some other skb */ rmem = atomic_add_return(size, &sk->sk_rmem_alloc); - if (rmem > (size + sk->sk_rcvbuf)) + if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) goto uncharge_drop; spin_lock(&list->lock); From 1f8b45f101ef715f5574bff6b51d5617097df0f7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 6 Dec 2019 15:39:12 +0100 Subject: [PATCH 1992/3715] vhost/vsock: accept only packets with the right dst_cid [ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ] When we receive a new packet from the guest, we check if the src_cid is correct, but we forgot to check the dst_cid. The host should accept only packets where dst_cid is equal to the host CID. Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 491de830b8d9..6391dc5b0ebe 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -436,7 +436,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) virtio_transport_deliver_tap_pkt(pkt); /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && + le64_to_cpu(pkt->hdr.dst_cid) == + vhost_transport_get_local_cid()) virtio_transport_recv_pkt(pkt); else virtio_transport_free_pkt(pkt); From 7ae78f9bbb51d2515c6e5abfde9461a7c51e1caf Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:09 +0800 Subject: [PATCH 1993/3715] net: add bool confirm_neigh parameter for dst_ops.update_pmtu [ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ] The MTU update code is supposed to be invoked in response to real networking events that update the PMTU. In IPv6 PMTU update function __ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor confirmed time. But for tunnel code, it will call pmtu before xmit, like: - tnl_update_pmtu() - skb_dst_update_pmtu() - ip6_rt_update_pmtu() - __ip6_rt_update_pmtu() - dst_confirm_neigh() If the tunnel remote dst mac address changed and we still do the neigh confirm, we will not be able to update neigh cache and ping6 remote will failed. So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we should not be invoking dst_confirm_neigh() as we have no evidence of successful two-way communication at this point. On the other hand it is also important to keep the neigh reachability fresh for TCP flows, so we cannot remove this dst_confirm_neigh() call. To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu to choose whether we should do neigh update or not. I will add the parameter in this patch and set all the callers to true to comply with the previous way, and fix the tunnel code one by one on later patches. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Suggested-by: David Miller Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 2 +- include/net/dst.h | 2 +- include/net/dst_ops.h | 3 ++- net/bridge/br_nf_core.c | 3 ++- net/decnet/dn_route.c | 6 ++++-- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/route.c | 9 ++++++--- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/route.c | 22 +++++++++++++++------- net/ipv6/xfrm6_policy.c | 5 +++-- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/sctp/transport.c | 2 +- 14 files changed, 42 insertions(+), 25 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5de4053774b8..97bce0170c12 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(&rt->dst); } - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { diff --git a/include/net/dst.h b/include/net/dst.h index fe230dd62c28..0e249f48f080 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -528,7 +528,7 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops->update_pmtu) - dst->ops->update_pmtu(dst, NULL, skb, mtu); + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); } #endif /* _NET_DST_H */ diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 5ec645f27ee3..443863c7b8da 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -27,7 +27,8 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 20cbb727df4d..c217276bd76a 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -26,7 +26,8 @@ #endif static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0bd3afd01dd2..ccc189bc3617 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu); + struct sk_buff *skb , u32 mtu, + bool confirm_neigh); static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, @@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops) * advertise to the other end). */ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9d6b172caf6c..f7224c4fc30f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1088,7 +1088,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index de7f955ffd0a..8b855d3eec9e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -145,7 +145,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); @@ -1042,7 +1043,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct rtable *rt = (struct rtable *) dst; struct flowi4 fl4; @@ -2529,7 +2531,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 5952dca98e6b..08f00225ed1b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -222,12 +222,13 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9a31d13bf180..890adadcda16 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4228f3b2f347..38a9c10b351e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 00f8fe8cebd5..b81522bcf223 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -93,7 +93,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void rt6_dst_from_metrics_check(struct rt6_info *rt); @@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } @@ -1471,7 +1473,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) + const struct ipv6hdr *iph, u32 mtu, + bool confirm_neigh) { const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; @@ -1489,7 +1492,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, daddr = NULL; saddr = NULL; } - dst_confirm_neigh(dst, daddr); + + if (confirm_neigh) + dst_confirm_neigh(dst, daddr); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; @@ -1518,9 +1524,11 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, } static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, + confirm_neigh); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -1540,7 +1548,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d6b012295b45..b0d80cef7c2b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -219,12 +219,13 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4527921b1c3a..97d411033f8a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 274df899e7bf..4c55b759a58e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -272,7 +272,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); - dst->ops->update_pmtu(dst, sk, NULL, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); From 4e3c0a8d0f85422a9ea23bb64e26b4288f6632c6 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:10 +0800 Subject: [PATCH 1994/3715] ip6_gre: do not confirm neighbor when do pmtu update [ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ] When we do ipv6 gre pmtu update, we will also do neigh confirm currently. This will cause the neigh cache be refreshed and set to REACHABLE before xmit. But if the remote mac address changed, e.g. device is deleted and recreated, we will not able to notice this and still use the old mac address as the neigh cache is REACHABLE. Fix this by disable neigh confirm when do pmtu update v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reported-by: Jianlin Shi Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 38a9c10b351e..726ba41133a3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); From 58df598b278e63b1f1d8995676c78c3e46800f62 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:11 +0800 Subject: [PATCH 1995/3715] gtp: do not confirm neighbor when do pmtu update [ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ] When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not call dst_confirm_neigh(), we still set it to false to keep consistency with IPv6 code. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 97bce0170c12..d8b2698f7f47 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(&rt->dst); } - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { From b560914e473a716533af3c24f578351558d40e4c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:12 +0800 Subject: [PATCH 1996/3715] net/dst: add new function skb_dst_update_pmtu_no_confirm [ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ] Add a new function skb_dst_update_pmtu_no_confirm() for callers who need update pmtu but should not do neighbor confirm. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/dst.h b/include/net/dst.h index 0e249f48f080..5ebc7356a381 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -531,4 +531,13 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, true); } +/* update dst pmtu but not do neighbor confirm */ +static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); +} + #endif /* _NET_DST_H */ From b1467e87af4ff142e2b88f6c497e50cf79f0803b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:13 +0800 Subject: [PATCH 1997/3715] tunnel: do not confirm neighbor when do pmtu update [ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ] When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. v5: No Change. v4: Update commit description v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP") Reviewed-by: Guillaume Nault Tested-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 2 +- net/ipv6/ip6_tunnel.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7a31287ff123..f1784162acc2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -521,7 +521,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 067fc78cc529..5bc2788e6ba4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -652,7 +652,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst_update_pmtu(skb2, rel_info); + skb_dst_update_pmtu_no_confirm(skb2, rel_info); } if (rel_type == ICMP_REDIRECT) skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); @@ -1138,7 +1138,7 @@ route_lookup: mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; From 7b296da1aca79471cbcc022b2e71efd65ab0eacd Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:14 +0800 Subject: [PATCH 1998/3715] vti: do not confirm neighbor when do pmtu update [ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ] When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. Although vti and vti6 are immune to this problem because they are IFF_NOARP interfaces, as Guillaume pointed. There is still no sense to confirm neighbour here. v5: Update commit description. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_vti.c | 2 +- net/ipv6/ip6_vti.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c07065b7e3b0..08c15dd42d93 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -244,7 +244,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 6b2416b4a53e..557fe3880a3f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) From de211c95f9b107b001d8864bfc8e45f62bcb6614 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:15 +0800 Subject: [PATCH 1999/3715] sit: do not confirm neighbor when do pmtu update [ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ] When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d2529c38e7e4..fb3f917db57a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -932,7 +932,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr) - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); From 0ddfaacfb76d8d54a1f94bfa4e4bc9a7c452d4c7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 11 Dec 2019 08:23:00 +0000 Subject: [PATCH 2000/3715] gtp: do not allow adding duplicate tid and ms_addr pdp context [ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ] GTP RX packet path lookups pdp context with TID. If duplicate TID pdp contexts are existing in the list, it couldn't select correct pdp context. So, TID value should be unique. GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp contexts are existing in the list, it couldn't select correct pdp context. So, ms_addr value should be unique. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index d8b2698f7f47..6e699cd0fbda 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -929,24 +929,31 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) } } -static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, - struct genl_info *info) +static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, + struct genl_info *info) { + struct pdp_ctx *pctx, *pctx_tid = NULL; struct net_device *dev = gtp->dev; u32 hash_ms, hash_tid = 0; - struct pdp_ctx *pctx; + unsigned int version; bool found = false; __be32 ms_addr; ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; + version = nla_get_u32(info->attrs[GTPA_VERSION]); - hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) { - if (pctx->ms_addr_ip4.s_addr == ms_addr) { - found = true; - break; - } - } + pctx = ipv4_pdp_find(gtp, ms_addr); + if (pctx) + found = true; + if (version == GTP_V0) + pctx_tid = gtp0_pdp_find(gtp, + nla_get_u64(info->attrs[GTPA_TID])); + else if (version == GTP_V1) + pctx_tid = gtp1_pdp_find(gtp, + nla_get_u32(info->attrs[GTPA_I_TEI])); + if (pctx_tid) + found = true; if (found) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) @@ -954,6 +961,11 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (pctx && pctx_tid) + return -EEXIST; + if (!pctx) + pctx = pctx_tid; + ipv4_pdp_fill(pctx, info); if (pctx->gtp_version == GTP_V0) @@ -1077,7 +1089,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } - err = ipv4_pdp_add(gtp, sk, info); + err = gtp_pdp_add(gtp, sk, info); out_unlock: rcu_read_unlock(); From 94671cf125f8e08e093290451d511c197b005c82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Dec 2019 18:20:41 -0800 Subject: [PATCH 2001/3715] tcp/dccp: fix possible race __inet_lookup_established() commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 upstream. Michal Kubecek and Firo Yang did a very nice analysis of crashes happening in __inet_lookup_established(). Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN (via a close()/socket()/listen() cycle) without a RCU grace period, I should not have changed listeners linkage in their hash table. They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt), so that a lookup can detect a socket in a hash list was moved in another one. Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix"), we have to add hlist_nulls_add_tail_rcu() helper. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Michal Kubecek Reported-by: Firo Yang Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/ Signed-off-by: Jakub Kicinski [stable-4.14: we also need to update code in __inet_lookup_listener() and inet6_lookup_listener() which has been removed in 5.0-rc1.] Signed-off-by: Michal Kubecek Signed-off-by: Greg Kroah-Hartman --- include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++ include/net/inet_hashtables.h | 12 +++++++++--- include/net/sock.h | 5 +++++ net/ipv4/inet_diag.c | 3 ++- net/ipv4/inet_hashtables.c | 18 ++++++++--------- net/ipv4/tcp_ipv4.c | 7 ++++--- net/ipv6/inet6_hashtables.c | 3 ++- 7 files changed, 68 insertions(+), 17 deletions(-) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e4b257ff881b..a10da545b3f6 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -100,6 +100,43 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + /* Note: write side code, so rcu accessors are not needed. */ + for (i = h->first; !is_a_nulls(i); i = i->next) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..573ab110c9ec 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -106,12 +106,18 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; -/* - * Sockets can be hashed in established or listening table +/* Sockets can be hashed in established or listening table. + * We must use different 'nulls' end-of-chain value for all hash buckets : + * A socket might transition from ESTABLISH to LISTEN state without + * RCU grace period. A lookup in ehash table needs to handle this case. */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_head head; + union { + struct hlist_head head; + struct hlist_nulls_head nulls_head; + }; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ diff --git a/include/net/sock.h b/include/net/sock.h index 0af46cbd3649..c6a003bc4737 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -693,6 +693,11 @@ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_h hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } +static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list) +{ + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); +} + static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 33edccfebc30..eb158badebc4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -911,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock(&ilb->lock); - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1f26627c7fad..0af13f5bdc9a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -219,9 +219,10 @@ struct sock *__inet_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each_rcu(sk, &ilb->head) { + sk_nulls_for_each_rcu(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { @@ -442,10 +443,11 @@ static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; + const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); - sk_for_each_rcu(sk2, &ilb->head) { + sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -480,9 +482,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); else - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: @@ -525,10 +527,7 @@ void inet_unhash(struct sock *sk) spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (listener) - done = __sk_del_node_init(sk); - else - done = __sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -664,7 +663,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_HEAD(&h->listening_hash[i].head); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, + i + LISTENING_NULLS_BASE); } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44a41ac2b0ca..b4f0fc34b0ed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1936,13 +1936,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; struct sock *sk = cur; if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->nulls_head); st->offset = 0; goto get_sk; } @@ -1950,9 +1951,9 @@ get_head: ++st->num; ++st->offset; - sk = sk_next(sk); + sk = sk_nulls_next(sk); get_sk: - sk_for_each_from(sk) { + sk_nulls_for_each_from(sk, node) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == st->family) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 228983a5531b..24a21979d7df 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -137,9 +137,10 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; From e4e33e48ac71512c00fcf3d489af7bb054198024 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Dec 2019 12:55:29 -0800 Subject: [PATCH 2002/3715] tcp: do not send empty skb from tcp_write_xmit() [ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ] Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from write queue in error cases") in linux-4.14 stable triggered various bugs. One of them has been fixed in commit ba2ddb43f270 ("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but we still have crashes in some occasions. Root-cause is that when tcp_sendmsg() has allocated a fresh skb and could not append a fragment before being blocked in sk_stream_wait_memory(), tcp_write_xmit() might be called and decide to send this fresh and empty skb. Sending an empty packet is not only silly, it might have caused many issues we had in the past with tp->packets_out being out of sync. Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.") Signed-off-by: Eric Dumazet Cc: Christoph Paasch Acked-by: Neal Cardwell Cc: Jason Baron Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3b28140c10b..e1eb56e21dd5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2380,6 +2380,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (tcp_small_queue_check(sk, skb, 0)) break; + /* Argh, we hit an empty skb(), presumably a thread + * is sleeping in sendmsg()/sk_stream_wait_memory(). + * We do not want to send a pure-ack packet and have + * a strange looking rtx queue with empty packet(s). + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) + break; + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; From 90cbd508fbd5adbd24c716816f2b0775efbb83bf Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 11 Dec 2019 08:23:17 +0000 Subject: [PATCH 2003/3715] gtp: fix wrong condition in gtp_genl_dump_pdp() [ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ] gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used to dump pdp contexts. it would be re-executed because of dump packet size. If dump packet size is too big, it saves current dump pointer (gtp interface pointer, bucket, TID value) then it restarts dump from last pointer. Current GTP code allows adding zero TID pdp context but dump code ignores zero TID value. So, last dump pointer will not be found. In addition, this patch adds missing rcu_read_lock() in gtp_genl_dump_pdp(). Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 6e699cd0fbda..69dba35b1222 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -42,7 +42,6 @@ struct pdp_ctx { struct hlist_node hlist_addr; union { - u64 tid; struct { u64 tid; u16 flow; @@ -1247,43 +1246,46 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb, struct netlink_callback *cb) { struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; + int i, j, bucket = cb->args[0], skip = cb->args[1]; struct net *net = sock_net(skb->sk); - struct gtp_net *gn = net_generic(net, gtp_net_id); - unsigned long tid = cb->args[1]; - int i, k = cb->args[0], ret; struct pdp_ctx *pctx; + struct gtp_net *gn; + + gn = net_generic(net, gtp_net_id); if (cb->args[4]) return 0; + rcu_read_lock(); list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { if (last_gtp && last_gtp != gtp) continue; else last_gtp = NULL; - for (i = k; i < gtp->hash_size; i++) { - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { - if (tid && tid != pctx->u.tid) - continue; - else - tid = 0; - - ret = gtp_genl_fill_info(skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, pctx); - if (ret < 0) { + for (i = bucket; i < gtp->hash_size; i++) { + j = 0; + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], + hlist_tid) { + if (j >= skip && + gtp_genl_fill_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, pctx)) { cb->args[0] = i; - cb->args[1] = pctx->u.tid; + cb->args[1] = j; cb->args[2] = (unsigned long)gtp; goto out; } + j++; } + skip = 0; } + bucket = 0; } cb->args[4] = 1; out: + rcu_read_unlock(); return skb->len; } From a29c4303930bc0c25ae6a4f365dcdef71447b4ea Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 11 Dec 2019 08:23:34 +0000 Subject: [PATCH 2004/3715] gtp: fix an use-after-free in ipv4_pdp_find() [ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ] ipv4_pdp_find() is called in TX packet path of GTP. ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context. In the current code, gtp->tid_hash and gtp->addr_hash are freed by ->dellink(), which is gtp_dellink(). But gtp_dellink() would be called while packets are processing. So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash. Instead, dev->priv_destructor() would be used because this callback is called after all packet processing safely. Test commands: ip link add veth1 type veth peer name veth2 ip a a 172.0.0.1/24 dev veth1 ip link set veth1 up ip a a 172.99.0.1/32 dev lo gtp-link add gtp1 & gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2 ip r a 172.99.0.2/32 dev gtp1 ip link set gtp1 mtu 1500 ip netns add ns2 ip link set veth2 netns ns2 ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2 ip netns exec ns2 ip link set veth2 up ip netns exec ns2 ip a a 172.99.0.2/32 dev lo ip netns exec ns2 ip link set lo up ip netns exec ns2 gtp-link add gtp2 & ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1 ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2 ip netns exec ns2 ip link set gtp2 mtu 1500 hping3 172.99.0.2 -2 --flood & ip link del gtp1 Splat looks like: [ 72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195 [ 72.569631][ T1195] [ 72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199 [ 72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 72.571438][ T1195] Call Trace: [ 72.571764][ T1195] dump_stack+0x96/0xdb [ 72.572171][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.572761][ T1195] print_address_description.constprop.5+0x1be/0x360 [ 72.573400][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.573971][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.574544][ T1195] __kasan_report+0x12a/0x16f [ 72.575014][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.575593][ T1195] kasan_report+0xe/0x20 [ 72.576004][ T1195] ipv4_pdp_find.isra.12+0x130/0x170 [gtp] [ 72.576577][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp] [ ... ] [ 72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28 [ 72.648512][ T1195] #PF: supervisor read access in kernel mode [ 72.649158][ T1195] #PF: error_code(0x0000) - not-present page [ 72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060 [ 72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G B 5.5.0-rc1 #199 [ 72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp] [ ... ] [ 72.679168][ T1195] Call Trace: [ 72.679603][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp] [ 72.681915][ T1195] ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp] [ 72.682513][ T1195] ? lock_acquire+0x164/0x3b0 [ 72.682966][ T1195] ? gtp_dev_xmit+0x35e/0x890 [gtp] [ 72.683481][ T1195] gtp_dev_xmit+0x3c2/0x890 [gtp] [ ... ] Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 69dba35b1222..51043a637719 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -644,9 +644,16 @@ static void gtp_link_setup(struct net_device *dev) } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); -static void gtp_hashtable_free(struct gtp_dev *gtp); static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); +static void gtp_destructor(struct net_device *dev) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); +} + static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -681,13 +688,15 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, gn = net_generic(dev_net(dev), gtp_net_id); list_add_rcu(>p->list, &gn->gtp_dev_list); + dev->priv_destructor = gtp_destructor; netdev_dbg(dev, "registered new GTP interface\n"); return 0; out_hashtable: - gtp_hashtable_free(gtp); + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); out_encap: gtp_encap_disable(gtp); return err; @@ -696,9 +705,14 @@ out_encap: static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct pdp_ctx *pctx; + int i; + + for (i = 0; i < gtp->hash_size; i++) + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + pdp_context_delete(pctx); gtp_encap_disable(gtp); - gtp_hashtable_free(gtp); list_del_rcu(>p->list); unregister_netdevice_queue(dev, head); } @@ -774,20 +788,6 @@ err1: return -ENOMEM; } -static void gtp_hashtable_free(struct gtp_dev *gtp) -{ - struct pdp_ctx *pctx; - int i; - - for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) - pdp_context_delete(pctx); - - synchronize_rcu(); - kfree(gtp->addr_hash); - kfree(gtp->tid_hash); -} - static struct sock *gtp_encap_enable_socket(int fd, int type, struct gtp_dev *gtp) { From 3e8374bd5ab1e7f6e6148fe1e0178edf049e6891 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 11 Dec 2019 08:23:48 +0000 Subject: [PATCH 2005/3715] gtp: avoid zero size hashtable [ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ] GTP default hashtable size is 1024 and userspace could set specific hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0 from userspace, hashtable will not work and panic will occur. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 51043a637719..35905e9ee9ec 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -671,10 +671,13 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - if (!data[IFLA_GTP_PDP_HASHSIZE]) + if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; - else + } else { hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); + if (!hashsize) + hashsize = 1024; + } err = gtp_hashtable_new(gtp, hashsize); if (err < 0) From f8f4d3e589c7c0ece3b5e3b1bec37c208edaeee8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 12 Dec 2019 17:47:24 +0000 Subject: [PATCH 2006/3715] spi: fsl: use platform_get_irq() instead of of_irq_to_resource() commit 63aa6a692595d47a0785297b481072086b9272d2 upstream. Unlike irq_of_parse_and_map() which has a dummy definition on SPARC, of_irq_to_resource() hasn't. But as platform_get_irq() can be used instead and is generic, use it. Reported-by: kbuild test robot Suggested-by: Mark Brown Fixes: 3194d2533eff ("spi: fsl: don't map irq during probe") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/091a277fd0b3356dca1e29858c1c96983fc9cb25.1576172743.git.christophe.leroy@c-s.fr Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-fsl-spi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index fb34ba3f2b23..8b79e36fab21 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -832,9 +832,9 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) if (ret) goto err; - irq = of_irq_to_resource(np, 0, NULL); - if (irq <= 0) { - ret = -EINVAL; + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + ret = irq; goto err; } From 84f5ad468100f86d70096799e4ee716a17c2962f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 4 Jan 2020 14:00:23 +0100 Subject: [PATCH 2007/3715] Linux 4.14.162 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6b4528888a75..cb57b5c58e2b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 161 +SUBLEVEL = 162 EXTRAVERSION = NAME = Petit Gorille From 7742890e23c40a9c06ae9dc9a8bbcbb8acf4d41a Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Wed, 11 Dec 2019 19:06:45 +0800 Subject: [PATCH 2008/3715] UPSTREAM: exit: panic before exit_mm() on global init exit Currently, when global init and all threads in its thread-group have exited we panic via: do_exit() -> exit_notify() -> forget_original_parent() -> find_child_reaper() This makes it hard to extract a useable coredump for global init from a kernel crashdump because by the time we panic exit_mm() will have already released global init's mm. This patch moves the panic futher up before exit_mm() is called. As was the case previously, we only panic when global init and all its threads in the thread-group have exited. Signed-off-by: chenqiwu Acked-by: Christian Brauner Acked-by: Oleg Nesterov [christian.brauner@ubuntu.com: fix typo, rewrite commit message] Link: https://lore.kernel.org/r/1576736993-10121-1-git-send-email-qiwuchen55@gmail.com Signed-off-by: Christian Brauner (cherry picked from commit 43cf75d96409a20ef06b756877a2e72b10a026fc) Bug: 146789558 Change-Id: Icff81267e8c49bf1d332773351d1b47cb8cbac4a Signed-off-by: Alistair Delva --- kernel/exit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index f76132a085f3..7bcd9154a979 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -577,10 +577,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father, } write_unlock_irq(&tasklist_lock); - if (unlikely(pid_ns == &init_pid_ns)) { - panic("Attempted to kill init! exitcode=0x%08x\n", - father->signal->group_exit_code ?: father->exit_code); - } list_for_each_entry_safe(p, n, dead, ptrace_entry) { list_del_init(&p->ptrace_entry); @@ -824,6 +820,14 @@ void __noreturn do_exit(long code) acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { + /* + * If the last thread of global init has exited, panic + * immediately to get a useable coredump. + */ + if (unlikely(is_global_init(tsk))) + panic("Attempted to kill init! exitcode=0x%08x\n", + tsk->signal->group_exit_code ?: (int)code); + #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); From f05f10630a51938a8b8fb36ae326d32da0ce3cc4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 22 Jan 2018 18:05:43 +0200 Subject: [PATCH 2009/3715] UPSTREAM: seq_file: Introduce DEFINE_SHOW_ATTRIBUTE() helper macro The DEFINE_SHOW_ATTRIBUTE() helper macro would be useful for current users, which are many of them, and for new comers to decrease code duplication. Acked-by: Lee Jones Acked-by: Darren Hart (VMware) Signed-off-by: Andy Shevchenko Bug: 136497735 (cherry picked from commit a08f06bb7a0743a7fc8d571899c93d882468096e) Change-Id: Ib60cf57dc5e979915a83848919644150d82e7058 Signed-off-by: Hridya Valsaraju --- drivers/mfd/ab8500-debugfs.c | 1 - include/linux/seq_file.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index c1c815241e02..28c3ee38b081 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1258,7 +1258,6 @@ static struct ab8500_prcmu_ranges ab8540_debug_ranges[AB8500_NUM_BANKS] = { }, }; - static irqreturn_t ab8500_debug_handler(int irq, void *data) { char buf[16]; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 09c6e28746f9..ab437dd2e3b9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -140,6 +140,20 @@ void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); +#define DEFINE_SHOW_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS From 8b2246afe1cdf86ccc925ce0dcebf784d685df82 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 30 Nov 2018 20:26:30 -0500 Subject: [PATCH 2010/3715] UPSTREAM: binder: remove BINDER_DEBUG_ENTRY() We already have the DEFINE_SHOW_ATTRIBUTE.There is no need to define such a macro,so remove BINDER_DEBUG_ENTRY. Signed-off-by: Yangtao Li Reviewed-by: Joey Pabalinas Acked-by: Todd Kjos Bug: 136497735 (cherry picked from commit c13e0a5288195aadec1e53af7a48ea8dae971416) Change-Id: I48ef17510ea0e252f747a864bd1e98951b0a81ba Signed-off-by: Hridya Valsaraju --- drivers/android/binder.c | 48 ++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2c8b629c90c3..6a1026be9af5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -91,22 +91,8 @@ static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; static atomic_t binder_last_id; -#define BINDER_DEBUG_ENTRY(name) \ -static int binder_##name##_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, binder_##name##_show, inode->i_private); \ -} \ -\ -static const struct file_operations binder_##name##_fops = { \ - .owner = THIS_MODULE, \ - .open = binder_##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -} - -static int binder_proc_show(struct seq_file *m, void *unused); -BINDER_DEBUG_ENTRY(proc); +static int proc_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(proc); /* This is only defined in include/asm-arm/sizes.h */ #ifndef SZ_1K @@ -5257,7 +5243,7 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->debugfs_entry = debugfs_create_file(strbuf, 0444, binder_debugfs_dir_entry_proc, (void *)(unsigned long)proc->pid, - &binder_proc_fops); + &proc_fops); } return 0; @@ -5905,7 +5891,7 @@ static void print_binder_proc_stats(struct seq_file *m, } -static int binder_state_show(struct seq_file *m, void *unused) +static int state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; @@ -5944,7 +5930,7 @@ static int binder_state_show(struct seq_file *m, void *unused) return 0; } -static int binder_stats_show(struct seq_file *m, void *unused) +static int stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -5960,7 +5946,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) return 0; } -static int binder_transactions_show(struct seq_file *m, void *unused) +static int transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -5973,7 +5959,7 @@ static int binder_transactions_show(struct seq_file *m, void *unused) return 0; } -static int binder_proc_show(struct seq_file *m, void *unused) +static int proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; int pid = (unsigned long)m->private; @@ -6016,7 +6002,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, "\n" : " (incomplete)\n"); } -static int binder_transaction_log_show(struct seq_file *m, void *unused) +static int transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); @@ -6048,10 +6034,10 @@ static const struct file_operations binder_fops = { .release = binder_release, }; -BINDER_DEBUG_ENTRY(state); -BINDER_DEBUG_ENTRY(stats); -BINDER_DEBUG_ENTRY(transactions); -BINDER_DEBUG_ENTRY(transaction_log); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(stats); +DEFINE_SHOW_ATTRIBUTE(transactions); +DEFINE_SHOW_ATTRIBUTE(transaction_log); static int __init init_binder_device(const char *name) { @@ -6105,27 +6091,27 @@ static int __init binder_init(void) 0444, binder_debugfs_dir_entry_root, NULL, - &binder_state_fops); + &state_fops); debugfs_create_file("stats", 0444, binder_debugfs_dir_entry_root, NULL, - &binder_stats_fops); + &stats_fops); debugfs_create_file("transactions", 0444, binder_debugfs_dir_entry_root, NULL, - &binder_transactions_fops); + &transactions_fops); debugfs_create_file("transaction_log", 0444, binder_debugfs_dir_entry_root, &binder_transaction_log, - &binder_transaction_log_fops); + &transaction_log_fops); debugfs_create_file("failed_transaction_log", 0444, binder_debugfs_dir_entry_root, &binder_transaction_log_failed, - &binder_transaction_log_fops); + &transaction_log_fops); } /* From 34fbd92d31235d51bc6020961368f37a263838ab Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 14 Dec 2018 13:11:14 +0100 Subject: [PATCH 2011/3715] BACKPORT: binder: implement binderfs As discussed at Linux Plumbers Conference 2018 in Vancouver [1] this is the implementation of binderfs. /* Abstract */ binderfs is a backwards-compatible filesystem for Android's binder ipc mechanism. Each ipc namespace will mount a new binderfs instance. Mounting binderfs multiple times at different locations in the same ipc namespace will not cause a new super block to be allocated and hence it will be the same filesystem instance. Each new binderfs mount will have its own set of binder devices only visible in the ipc namespace it has been mounted in. All devices in a new binderfs mount will follow the scheme binder%d and numbering will always start at 0. /* Backwards compatibility */ Devices requested in the Kconfig via CONFIG_ANDROID_BINDER_DEVICES for the initial ipc namespace will work as before. They will be registered via misc_register() and appear in the devtmpfs mount. Specifically, the standard devices binder, hwbinder, and vndbinder will all appear in their standard locations in /dev. Mounting or unmounting the binderfs mount in the initial ipc namespace will have no effect on these devices, i.e. they will neither show up in the binderfs mount nor will they disappear when the binderfs mount is gone. /* binder-control */ Each new binderfs instance comes with a binder-control device. No other devices will be present at first. The binder-control device can be used to dynamically allocate binder devices. All requests operate on the binderfs mount the binder-control device resides in. Assuming a new instance of binderfs has been mounted at /dev/binderfs via mount -t binderfs binderfs /dev/binderfs. Then a request to create a new binder device can be made as illustrated in [2]. Binderfs devices can simply be removed via unlink(). /* Implementation details */ - dynamic major number allocation: When binderfs is registered as a new filesystem it will dynamically allocate a new major number. The allocated major number will be returned in struct binderfs_device when a new binder device is allocated. - global minor number tracking: Minor are tracked in a global idr struct that is capped at BINDERFS_MAX_MINOR. The minor number tracker is protected by a global mutex. This is the only point of contention between binderfs mounts. - struct binderfs_info: Each binderfs super block has its own struct binderfs_info that tracks specific details about a binderfs instance: - ipc namespace - dentry of the binder-control device - root uid and root gid of the user namespace the binderfs instance was mounted in - mountable by user namespace root: binderfs can be mounted by user namespace root in a non-initial user namespace. The devices will be owned by user namespace root. - binderfs binder devices without misc infrastructure: New binder devices associated with a binderfs mount do not use the full misc_register() infrastructure. The misc_register() infrastructure can only create new devices in the host's devtmpfs mount. binderfs does however only make devices appear under its own mountpoint and thus allocates new character device nodes from the inode of the root dentry of the super block. This will have the side-effect that binderfs specific device nodes do not appear in sysfs. This behavior is similar to devpts allocated pts devices and has no effect on the functionality of the ipc mechanism itself. [1]: https://goo.gl/JL2tfX [2]: program to allocate a new binderfs binder device: #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int fd, ret, saved_errno; size_t len; struct binderfs_device device = { 0 }; if (argc < 2) exit(EXIT_FAILURE); len = strlen(argv[1]); if (len > BINDERFS_MAX_NAME) exit(EXIT_FAILURE); memcpy(device.name, argv[1], len); fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); if (fd < 0) { printf("%s - Failed to open binder-control device\n", strerror(errno)); exit(EXIT_FAILURE); } ret = ioctl(fd, BINDER_CTL_ADD, &device); saved_errno = errno; close(fd); errno = saved_errno; if (ret < 0) { printf("%s - Failed to allocate new binder device\n", strerror(errno)); exit(EXIT_FAILURE); } printf("Allocated new binder device with major %d, minor %d, and " "name %s\n", device.major, device.minor, device.name); exit(EXIT_SUCCESS); } Cc: Martijn Coenen Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 3ad20fe393b31025bebfc2d76964561f65df48aa) [ Using ida_simple_get() and ida_remove() instead of ida_alloc_max() and ida_free() as they are not available in 4.14 ] Change-Id: I145af9b0bc25b3a59a4f663c9e926889c2b41d18 Signed-off-by: Hridya Valsaraju --- drivers/android/Kconfig | 12 + drivers/android/Makefile | 1 + drivers/android/binder.c | 25 +- drivers/android/binder_internal.h | 49 +++ drivers/android/binderfs.c | 546 ++++++++++++++++++++++++ include/uapi/linux/android/binder_ctl.h | 35 ++ include/uapi/linux/magic.h | 1 + 7 files changed, 652 insertions(+), 17 deletions(-) create mode 100644 drivers/android/binder_internal.h create mode 100644 drivers/android/binderfs.c create mode 100644 include/uapi/linux/android/binder_ctl.h diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index ee4880bfdcdc..3a90d51d8419 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -20,6 +20,18 @@ config ANDROID_BINDER_IPC Android process, using Binder to identify, invoke and pass arguments between said processes. +config ANDROID_BINDERFS + bool "Android Binderfs filesystem" + depends on ANDROID_BINDER_IPC + default n + ---help--- + Binderfs is a pseudo-filesystem for the Android Binder IPC driver + which can be mounted per-ipc namespace allowing to run multiple + instances of Android. + Each binderfs mount initially only contains a binder-control device. + It can be used to dynamically allocate new binder IPC devices via + ioctls. + config ANDROID_BINDER_DEVICES string "Android Binder devices" depends on ANDROID_BINDER_IPC diff --git a/drivers/android/Makefile b/drivers/android/Makefile index a01254c43ee3..c7856e3200da 100644 --- a/drivers/android/Makefile +++ b/drivers/android/Makefile @@ -1,4 +1,5 @@ ccflags-y += -I$(src) # needed for trace events +obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6a1026be9af5..541957211045 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -75,6 +75,7 @@ #include #include #include "binder_alloc.h" +#include "binder_internal.h" #include "binder_trace.h" static HLIST_HEAD(binder_deferred_list); @@ -245,20 +246,6 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( return e; } -struct binder_context { - struct binder_node *binder_context_mgr_node; - struct mutex context_mgr_node_lock; - - kuid_t binder_context_mgr_uid; - const char *name; -}; - -struct binder_device { - struct hlist_node hlist; - struct miscdevice miscdev; - struct binder_context context; -}; - /** * struct binder_work - work enqueued on a worklist * @entry: node enqueued on list @@ -5214,8 +5201,12 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->default_priority.prio = NICE_TO_PRIO(0); } - binder_dev = container_of(filp->private_data, struct binder_device, - miscdev); + /* binderfs stashes devices in i_private */ + if (is_binderfs_device(nodp)) + binder_dev = nodp->i_private; + else + binder_dev = container_of(filp->private_data, + struct binder_device, miscdev); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -6023,7 +6014,7 @@ static int transaction_log_show(struct seq_file *m, void *unused) return 0; } -static const struct file_operations binder_fops = { +const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, .unlocked_ioctl = binder_ioctl, diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h new file mode 100644 index 000000000000..7fb97f503ef2 --- /dev/null +++ b/drivers/android/binder_internal.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_BINDER_INTERNAL_H +#define _LINUX_BINDER_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct binder_context { + struct binder_node *binder_context_mgr_node; + struct mutex context_mgr_node_lock; + kuid_t binder_context_mgr_uid; + const char *name; +}; + +/** + * struct binder_device - information about a binder device node + * @hlist: list of binder devices (only used for devices requested via + * CONFIG_ANDROID_BINDER_DEVICES) + * @miscdev: information about a binder character device node + * @context: binder context information + * @binderfs_inode: This is the inode of the root dentry of the super block + * belonging to a binderfs mount. + */ +struct binder_device { + struct hlist_node hlist; + struct miscdevice miscdev; + struct binder_context context; + struct inode *binderfs_inode; +}; + +extern const struct file_operations binder_fops; + +#ifdef CONFIG_ANDROID_BINDERFS +extern bool is_binderfs_device(const struct inode *inode); +#else +static inline bool is_binderfs_device(const struct inode *inode) +{ + return false; +} +#endif + +#endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c new file mode 100644 index 000000000000..28b911fa616c --- /dev/null +++ b/drivers/android/binderfs.c @@ -0,0 +1,546 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +#include "binder_internal.h" + +#define FIRST_INODE 1 +#define SECOND_INODE 2 +#define INODE_OFFSET 3 +#define INTSTRLEN 21 +#define BINDERFS_MAX_MINOR (1U << MINORBITS) + +static struct vfsmount *binderfs_mnt; + +static dev_t binderfs_dev; +static DEFINE_MUTEX(binderfs_minors_mutex); +static DEFINE_IDA(binderfs_minors); + +/** + * binderfs_info - information about a binderfs mount + * @ipc_ns: The ipc namespace the binderfs mount belongs to. + * @control_dentry: This records the dentry of this binderfs mount + * binder-control device. + * @root_uid: uid that needs to be used when a new binder device is + * created. + * @root_gid: gid that needs to be used when a new binder device is + * created. + */ +struct binderfs_info { + struct ipc_namespace *ipc_ns; + struct dentry *control_dentry; + kuid_t root_uid; + kgid_t root_gid; + +}; + +static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) +{ + return inode->i_sb->s_fs_info; +} + +bool is_binderfs_device(const struct inode *inode) +{ + if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC) + return true; + + return false; +} + +/** + * binderfs_binder_device_create - allocate inode from super block of a + * binderfs mount + * @ref_inode: inode from wich the super block will be taken + * @userp: buffer to copy information about new device for userspace to + * @req: struct binderfs_device as copied from userspace + * + * This function allocated a new binder_device and reserves a new minor + * number for it. + * Minor numbers are limited and tracked globally in binderfs_minors. The + * function will stash a struct binder_device for the specific binder + * device in i_private of the inode. + * It will go on to allocate a new inode from the super block of the + * filesystem mount, stash a struct binder_device in its i_private field + * and attach a dentry to that inode. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_device_create(struct inode *ref_inode, + struct binderfs_device __user *userp, + struct binderfs_device *req) +{ + int minor, ret; + struct dentry *dentry, *dup, *root; + struct binder_device *device; + size_t name_len = BINDERFS_MAX_NAME + 1; + char *name = NULL; + struct inode *inode = NULL; + struct super_block *sb = ref_inode->i_sb; + struct binderfs_info *info = sb->s_fs_info; + + /* Reserve new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, + GFP_KERNEL); + mutex_unlock(&binderfs_minors_mutex); + if (minor < 0) + return minor; + + ret = -ENOMEM; + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + goto err; + + inode = new_inode(sb); + if (!inode) + goto err; + + inode->i_ino = minor + INODE_OFFSET; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + name = kmalloc(name_len, GFP_KERNEL); + if (!name) + goto err; + + strscpy(name, req->name, name_len); + + device->binderfs_inode = inode; + device->context.binder_context_mgr_uid = INVALID_UID; + device->context.name = name; + device->miscdev.name = name; + device->miscdev.minor = minor; + mutex_init(&device->context.context_mgr_node_lock); + + req->major = MAJOR(binderfs_dev); + req->minor = minor; + + ret = copy_to_user(userp, req, sizeof(*req)); + if (ret) { + ret = -EFAULT; + goto err; + } + + root = sb->s_root; + inode_lock(d_inode(root)); + dentry = d_alloc_name(root, name); + if (!dentry) { + inode_unlock(d_inode(root)); + ret = -ENOMEM; + goto err; + } + + /* Verify that the name userspace gave us is not already in use. */ + dup = d_lookup(root, &dentry->d_name); + if (dup) { + if (d_really_is_positive(dup)) { + dput(dup); + dput(dentry); + inode_unlock(d_inode(root)); + ret = -EEXIST; + goto err; + } + dput(dup); + } + + inode->i_private = device; + d_add(dentry, inode); + fsnotify_create(root->d_inode, dentry); + inode_unlock(d_inode(root)); + + return 0; + +err: + kfree(name); + kfree(device); + mutex_lock(&binderfs_minors_mutex); + ida_remove(&binderfs_minors, minor); + mutex_unlock(&binderfs_minors_mutex); + iput(inode); + + return ret; +} + +/** + * binderfs_ctl_ioctl - handle binder device node allocation requests + * + * The request handler for the binder-control device. All requests operate on + * the binderfs mount the binder-control device resides in: + * - BINDER_CTL_ADD + * Allocate a new binder device. + * + * Return: 0 on success, negative errno on failure + */ +static long binder_ctl_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + struct inode *inode = file_inode(file); + struct binderfs_device __user *device = (struct binderfs_device __user *)arg; + struct binderfs_device device_req; + + switch (cmd) { + case BINDER_CTL_ADD: + ret = copy_from_user(&device_req, device, sizeof(device_req)); + if (ret) { + ret = -EFAULT; + break; + } + + ret = binderfs_binder_device_create(inode, device, &device_req); + break; + default: + break; + } + + return ret; +} + +static void binderfs_evict_inode(struct inode *inode) +{ + struct binder_device *device = inode->i_private; + + clear_inode(inode); + + if (!device) + return; + + mutex_lock(&binderfs_minors_mutex); + ida_remove(&binderfs_minors, device->miscdev.minor); + mutex_unlock(&binderfs_minors_mutex); + + kfree(device->context.name); + kfree(device); +} + +static const struct super_operations binderfs_super_ops = { + .statfs = simple_statfs, + .evict_inode = binderfs_evict_inode, +}; + +static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = d_inode(old_dentry); + + /* binderfs doesn't support directories. */ + if (d_is_dir(old_dentry)) + return -EPERM; + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + if (!simple_empty(new_dentry)) + return -ENOTEMPTY; + + if (d_really_is_positive(new_dentry)) + simple_unlink(new_dir, new_dentry); + + old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = + new_dir->i_mtime = inode->i_ctime = current_time(old_dir); + + return 0; +} + +static int binderfs_unlink(struct inode *dir, struct dentry *dentry) +{ + /* + * The control dentry is only ever touched during mount so checking it + * here should not require us to take lock. + */ + if (BINDERFS_I(dir)->control_dentry == dentry) + return -EPERM; + + return simple_unlink(dir, dentry); +} + +static const struct file_operations binder_ctl_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = binder_ctl_ioctl, + .compat_ioctl = binder_ctl_ioctl, + .llseek = noop_llseek, +}; + +/** + * binderfs_binder_ctl_create - create a new binder-control device + * @sb: super block of the binderfs mount + * + * This function creates a new binder-control device node in the binderfs mount + * referred to by @sb. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_ctl_create(struct super_block *sb) +{ + int minor, ret; + struct dentry *dentry; + struct binder_device *device; + struct inode *inode = NULL; + struct dentry *root = sb->s_root; + struct binderfs_info *info = sb->s_fs_info; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + inode_lock(d_inode(root)); + + /* If we have already created a binder-control node, return. */ + if (info->control_dentry) { + ret = 0; + goto out; + } + + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto out; + + /* Reserve a new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, + GFP_KERNEL); + mutex_unlock(&binderfs_minors_mutex); + if (minor < 0) { + ret = minor; + goto out; + } + + inode->i_ino = SECOND_INODE; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_ctl_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + device->binderfs_inode = inode; + device->miscdev.minor = minor; + + dentry = d_alloc_name(root, "binder-control"); + if (!dentry) + goto out; + + inode->i_private = device; + info->control_dentry = dentry; + d_add(dentry, inode); + inode_unlock(d_inode(root)); + + return 0; + +out: + inode_unlock(d_inode(root)); + kfree(device); + iput(inode); + + return ret; +} + +static const struct inode_operations binderfs_dir_inode_operations = { + .lookup = simple_lookup, + .rename = binderfs_rename, + .unlink = binderfs_unlink, +}; + +static int binderfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct binderfs_info *info; + int ret = -ENOMEM; + struct inode *inode = NULL; + struct ipc_namespace *ipc_ns = sb->s_fs_info; + + get_ipc_ns(ipc_ns); + + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + + /* + * The binderfs filesystem can be mounted by userns root in a + * non-initial userns. By default such mounts have the SB_I_NODEV flag + * set in s_iflags to prevent security issues where userns root can + * just create random device nodes via mknod() since it owns the + * filesystem mount. But binderfs does not allow to create any files + * including devices nodes. The only way to create binder devices nodes + * is through the binder-control device which userns root is explicitly + * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both + * necessary and safe. + */ + sb->s_iflags &= ~SB_I_NODEV; + sb->s_iflags |= SB_I_NOEXEC; + sb->s_magic = BINDERFS_SUPER_MAGIC; + sb->s_op = &binderfs_super_ops; + sb->s_time_gran = 1; + + info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!info) + goto err_without_dentry; + + info->ipc_ns = ipc_ns; + info->root_gid = make_kgid(sb->s_user_ns, 0); + if (!gid_valid(info->root_gid)) + info->root_gid = GLOBAL_ROOT_GID; + info->root_uid = make_kuid(sb->s_user_ns, 0); + if (!uid_valid(info->root_uid)) + info->root_uid = GLOBAL_ROOT_UID; + + sb->s_fs_info = info; + + inode = new_inode(sb); + if (!inode) + goto err_without_dentry; + + inode->i_ino = FIRST_INODE; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | 0755; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_op = &binderfs_dir_inode_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto err_without_dentry; + + ret = binderfs_binder_ctl_create(sb); + if (ret) + goto err_with_dentry; + + return 0; + +err_with_dentry: + dput(sb->s_root); + sb->s_root = NULL; + +err_without_dentry: + put_ipc_ns(ipc_ns); + iput(inode); + kfree(info); + + return ret; +} + +static int binderfs_test_super(struct super_block *sb, void *data) +{ + struct binderfs_info *info = sb->s_fs_info; + + if (info) + return info->ipc_ns == data; + + return 0; +} + +static int binderfs_set_super(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); +} + +static struct dentry *binderfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + struct super_block *sb; + struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + + if (!ns_capable(ipc_ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + sb = sget_userns(fs_type, binderfs_test_super, binderfs_set_super, + flags, ipc_ns->user_ns, ipc_ns); + if (IS_ERR(sb)) + return ERR_CAST(sb); + + if (!sb->s_root) { + int ret = binderfs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); + if (ret) { + deactivate_locked_super(sb); + return ERR_PTR(ret); + } + + sb->s_flags |= SB_ACTIVE; + } + + return dget(sb->s_root); +} + +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); + kill_litter_super(sb); +} + +static struct file_system_type binder_fs_type = { + .name = "binder", + .mount = binderfs_mount, + .kill_sb = binderfs_kill_super, + .fs_flags = FS_USERNS_MOUNT, +}; + +static int __init init_binderfs(void) +{ + int ret; + + /* Allocate new major number for binderfs. */ + ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, + "binder"); + if (ret) + return ret; + + ret = register_filesystem(&binder_fs_type); + if (ret) { + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); + return ret; + } + + binderfs_mnt = kern_mount(&binder_fs_type); + if (IS_ERR(binderfs_mnt)) { + ret = PTR_ERR(binderfs_mnt); + binderfs_mnt = NULL; + unregister_filesystem(&binder_fs_type); + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); + } + + return ret; +} + +device_initcall(init_binderfs); diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binder_ctl.h new file mode 100644 index 000000000000..65b2efd1a0a5 --- /dev/null +++ b/include/uapi/linux/android/binder_ctl.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 Canonical Ltd. + * + */ + +#ifndef _UAPI_LINUX_BINDER_CTL_H +#define _UAPI_LINUX_BINDER_CTL_H + +#include +#include +#include + +#define BINDERFS_MAX_NAME 255 + +/** + * struct binderfs_device - retrieve information about a new binder device + * @name: the name to use for the new binderfs binder device + * @major: major number allocated for binderfs binder devices + * @minor: minor number allocated for the new binderfs binder device + * + */ +struct binderfs_device { + char name[BINDERFS_MAX_NAME + 1]; + __u8 major; + __u8 minor; +}; + +/** + * Allocate a new binder device. + */ +#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) + +#endif /* _UAPI_LINUX_BINDER_CTL_H */ + diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d94c457845b1..de394b15079d 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -73,6 +73,7 @@ #define DAXFS_MAGIC 0x64646178 #define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define BINDERFS_SUPER_MAGIC 0x6c6f6f70 #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define PIPEFS_MAGIC 0x50495045 #define PROC_SUPER_MAGIC 0x9fa0 From ea40e35762bf340bb3a45db22265f16fdab224f9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 6 Jan 2019 15:05:40 +0100 Subject: [PATCH 2012/3715] UPSTREAM: binderfs: remove wrong kern_mount() call The binderfs filesystem never needs to be mounted by the kernel itself. This is conceptually wrong and should never have been done in the first place. Fixes: 3ad20fe393b ("binder: implement binderfs") Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit fdd94acd50d607cf6a971455307e711fd8ee16e) Change-Id: Ife722830ecb64ab75ccdd012043864ae1b10d792 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 28b911fa616c..200b58fc6a1c 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -40,8 +40,6 @@ #define INTSTRLEN 21 #define BINDERFS_MAX_MINOR (1U << MINORBITS) -static struct vfsmount *binderfs_mnt; - static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); @@ -532,14 +530,6 @@ static int __init init_binderfs(void) return ret; } - binderfs_mnt = kern_mount(&binder_fs_type); - if (IS_ERR(binderfs_mnt)) { - ret = PTR_ERR(binderfs_mnt); - binderfs_mnt = NULL; - unregister_filesystem(&binder_fs_type); - unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); - } - return ret; } From 723aff7432f110f7c21db7cac317dfed0eee7889 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 6 Jan 2019 15:05:41 +0100 Subject: [PATCH 2013/3715] UPSTREAM: binderfs: make each binderfs mount a new instance When currently mounting binderfs in the same ipc namespace twice: mount -t binder binder /A mount -t binder binder /B then the binderfs instances mounted on /A and /B will be the same, i.e. they will have the same superblock. This was the first approach that seemed reasonable. However, this leads to some problems and inconsistencies: /* private binderfs instance in same ipc namespace */ There is no way for a user to request a private binderfs instance in the same ipc namespace. This request has been made in a private mail to me by two independent people. /* bind-mounts */ If users want the same binderfs instance to appear in multiple places they can use bind mounts. So there is no value in having a request for a new binderfs mount giving them the same instance. /* unexpected behavior */ It's surprising that request to mount binderfs is not giving the user a new instance like tmpfs, devpts, ramfs, and others do. /* past mistakes */ Other pseudo-filesystems once made the same mistakes of giving back the same superblock when actually requesting a new mount (cf. devpts's deprecated "newinstance" option). We should not make the same mistake. Once we've committed to always giving back the same superblock in the same IPC namespace with the next kernel release we will not be able to make that change so better to do it now. /* kdbusfs */ It was pointed out to me that kdbusfs - which is conceptually closely related to binderfs - also allowed users to get a private kdbusfs instance in the same IPC namespace by making each mount of kdbusfs a separate instance. I think that makes a lot of sense. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit b6c770d7c9dc7185b17d53a9d5ca1278c182d6fa) Change-Id: I7e341524f625802429f89966d9edf9cab9ca59f3 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 41 ++------------------------------------ 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 200b58fc6a1c..d52128cfb76d 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -381,7 +381,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) struct binderfs_info *info; int ret = -ENOMEM; struct inode *inode = NULL; - struct ipc_namespace *ipc_ns = sb->s_fs_info; + struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; get_ipc_ns(ipc_ns); @@ -452,48 +452,11 @@ err_without_dentry: return ret; } -static int binderfs_test_super(struct super_block *sb, void *data) -{ - struct binderfs_info *info = sb->s_fs_info; - - if (info) - return info->ipc_ns == data; - - return 0; -} - -static int binderfs_set_super(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); -} - static struct dentry *binderfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct super_block *sb; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - - if (!ns_capable(ipc_ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - sb = sget_userns(fs_type, binderfs_test_super, binderfs_set_super, - flags, ipc_ns->user_ns, ipc_ns); - if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (!sb->s_root) { - int ret = binderfs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); - if (ret) { - deactivate_locked_super(sb); - return ERR_PTR(ret); - } - - sb->s_flags |= SB_ACTIVE; - } - - return dget(sb->s_root); + return mount_nodev(fs_type, flags, data, binderfs_fill_super); } static void binderfs_kill_super(struct super_block *sb) From b67b1bf771ee8f8515c364a530632812161168cb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Jan 2019 12:32:18 +0100 Subject: [PATCH 2014/3715] BACKPORT: binderfs: implement "max" mount option Since binderfs can be mounted by userns root in non-initial user namespaces some precautions are in order. First, a way to set a maximum on the number of binder devices that can be allocated per binderfs instance and second, a way to reserve a reasonable chunk of binderfs devices for the initial ipc namespace. A first approach as seen in [1] used sysctls similiar to devpts but was shown to be flawed (cf. [2] and [3]) since some aspects were unneeded. This is an alternative approach which avoids sysctls completely and instead switches to a single mount option. Starting with this commit binderfs instances can be mounted with a limit on the number of binder devices that can be allocated. The max= mount option serves as a per-instance limit. If max= is set then only number of binder devices can be allocated in this binderfs instance. This allows to safely bind-mount binderfs instances into unprivileged user namespaces since userns root in a non-initial user namespace cannot change the mount option as long as it does not own the mount namespace the binderfs mount was created in and hence cannot drain the host of minor device numbers [1]: https://lore.kernel.org/lkml/20181221133909.18794-1-christian@brauner.io/ [2]; https://lore.kernel.org/lkml/20181221163316.GA8517@kroah.com/ [3]: https://lore.kernel.org/lkml/CAHRSSEx+gDVW4fKKK8oZNAir9G5icJLyodO8hykv3O0O1jt2FQ@mail.gmail.com/ [4]: https://lore.kernel.org/lkml/20181221192044.5yvfnuri7gdop4rs@brauner.io/ Cc: Todd Kjos Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 849d540ddfcd4f232f3b2cf40a2e07eccbd6212c) [ Resolved minor conflict from switch to ida_get_simple/ida_remove ]. Change-Id: Idfc17f9570d165b05779d0bfdb782117beb9c44e Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 105 ++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 7 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index d52128cfb76d..33905951b0b6 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,24 @@ static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + */ +struct binderfs_mount_opts { + int max; +}; + +enum { + Opt_max, + Opt_err +}; + +static const match_table_t tokens = { + { Opt_max, "max=%d" }, + { Opt_err, NULL } +}; + /** * binderfs_info - information about a binderfs mount * @ipc_ns: The ipc namespace the binderfs mount belongs to. @@ -53,13 +72,16 @@ static DEFINE_IDA(binderfs_minors); * created. * @root_gid: gid that needs to be used when a new binder device is * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. */ struct binderfs_info { struct ipc_namespace *ipc_ns; struct dentry *control_dentry; kuid_t root_uid; kgid_t root_gid; - + struct binderfs_mount_opts mount_opts; + int device_count; }; static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) @@ -108,11 +130,17 @@ static int binderfs_binder_device_create(struct inode *ref_inode, /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, - GFP_KERNEL); - mutex_unlock(&binderfs_minors_mutex); - if (minor < 0) + if (++info->device_count <= info->mount_opts.max) + minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); return minor; + } + mutex_unlock(&binderfs_minors_mutex); ret = -ENOMEM; device = kzalloc(sizeof(*device), GFP_KERNEL); @@ -186,6 +214,7 @@ err: kfree(name); kfree(device); mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_remove(&binderfs_minors, minor); mutex_unlock(&binderfs_minors_mutex); iput(inode); @@ -231,6 +260,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); @@ -238,6 +268,7 @@ static void binderfs_evict_inode(struct inode *inode) return; mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_remove(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); @@ -245,9 +276,65 @@ static void binderfs_evict_inode(struct inode *inode) kfree(device); } +/** + * binderfs_parse_mount_opts - parse binderfs mount options + * @data: options to set (can be NULL in which case defaults are used) + */ +static int binderfs_parse_mount_opts(char *data, + struct binderfs_mount_opts *opts) +{ + char *p; + opts->max = BINDERFS_MAX_MINOR; + + while ((p = strsep(&data, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + int max_devices; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_max: + if (match_int(&args[0], &max_devices) || + (max_devices < 0 || + (max_devices > BINDERFS_MAX_MINOR))) + return -EINVAL; + + opts->max = max_devices; + break; + default: + pr_err("Invalid mount options\n"); + return -EINVAL; + } + } + + return 0; +} + +static int binderfs_remount(struct super_block *sb, int *flags, char *data) +{ + struct binderfs_info *info = sb->s_fs_info; + return binderfs_parse_mount_opts(data, &info->mount_opts); +} + +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info; + + info = root->d_sb->s_fs_info; + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + + return 0; +} + static const struct super_operations binderfs_super_ops = { - .statfs = simple_statfs, - .evict_inode = binderfs_evict_inode, + .evict_inode = binderfs_evict_inode, + .remount_fs = binderfs_remount, + .show_options = binderfs_show_mount_opts, + .statfs = simple_statfs, }; static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -409,6 +496,10 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!info) goto err_without_dentry; + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + goto err_without_dentry; + info->ipc_ns = ipc_ns; info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) From 4057c1f08200a7c24ae29ff03ee6698f15f2c162 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jan 2019 00:25:41 +0100 Subject: [PATCH 2015/3715] UPSTREAM: binderfs: rename header to binderfs.h It doesn't make sense to call the header binder_ctl.h when its sole existence is tied to binderfs. So give it a sensible name. Users will far more easily remember binderfs.h than binder_ctl.h. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit c13295ad219d8bb0e47942d4cfc8251de449a67e) Change-Id: Ide6275bbbaec2e25df19e11754afb7f1827888b2 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 2 +- include/uapi/linux/android/{binder_ctl.h => binderfs.h} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename include/uapi/linux/android/{binder_ctl.h => binderfs.h} (100%) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 33905951b0b6..b95b74264455 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -31,7 +31,7 @@ //#include #include #include -#include +#include #include "binder_internal.h" diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binderfs.h similarity index 100% rename from include/uapi/linux/android/binder_ctl.h rename to include/uapi/linux/android/binderfs.h From c8823b16d624817124ec12ba8aa4d9eac3b6f7a2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jan 2019 11:19:40 +0100 Subject: [PATCH 2016/3715] BACKPORT: binderfs: reserve devices for initial mount The binderfs instance in the initial ipc namespace will always have a reserve of 4 binder devices unless explicitly capped by specifying a lower value via the "max" mount option. This ensures when binder devices are removed (on accident or on purpose) they can always be recreated without risking that all minor numbers have already been used up. Cc: Todd Kjos Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 36bdf3cae09df891b191f3955c8e54a2e05d67d0) [Resolved minor conflicts due to the switch to ida_get_simple/ida_remove] Change-Id: I001f305659c37e3b631696712332ae2e21464be8 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index b95b74264455..c100bc1fca35 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -40,6 +40,8 @@ #define INODE_OFFSET 3 #define INTSTRLEN 21 #define BINDERFS_MAX_MINOR (1U << MINORBITS) +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); @@ -127,11 +129,14 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; + bool use_reserve = (info->ipc_ns == &init_ipc_ns); /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); if (++info->device_count <= info->mount_opts.max) - minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, + minor = ida_simple_get(&binderfs_minors, 0, + use_reserve ? BINDERFS_MAX_MINOR + 1: + BINDERFS_MAX_MINOR_CAPPED + 1, GFP_KERNEL); else minor = -ENOSPC; From efd33f2e3d00655092a92a5ba513d12c776018b8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 12 Jan 2019 01:06:03 +0100 Subject: [PATCH 2017/3715] UPSTREAM: binderfs: handle !CONFIG_IPC_NS builds kbuild reported a build faile in [1]. This is triggered when CONFIG_IPC_NS is not set. So let's make the use of init_ipc_ns conditional on CONFIG_IPC_NS being set. [1]: https://lists.01.org/pipermail/kbuild-all/2019-January/056903.html Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 7fefaadd6a962987baac50e7b3c4c3d5ef9b55c6) Change-Id: I97b0a7a13a82d79d97fe340d4267795e4e6442c7 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index c100bc1fca35..28132bdcafc9 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -129,7 +129,11 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); From 5cf918231a6cb0036fe765d3a0563e196a051ecf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 16 Jan 2019 10:42:59 +0000 Subject: [PATCH 2018/3715] UPSTREAM: binderfs: fix error return code in binderfs_fill_super() Fix to return a negative error code -ENOMEM from the new_inode() and d_make_root() error handling cases instead of 0, as done elsewhere in this function. Fixes: 849d540ddfcd ("binderfs: implement "max" mount option") Signed-off-by: Wei Yongjun Reviewed-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 7e7ca7744a539f1a172e3b81c29d000787e3d774) Change-Id: If9d120c4abdbc0d5528c85d2515a9d5e40addfdc Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 28132bdcafc9..ac3be5cdd5ad 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -519,6 +519,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = info; + ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto err_without_dentry; From f002cbca0875e1ee0f2df810984452e42339621d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:02 +0100 Subject: [PATCH 2019/3715] UPSTREAM: binderfs: remove outdated comment The comment stems from an early version of that patchset and is just confusing now. Cc: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 7c4d08fc4d5aca073bd4ebecbb9eda5e4d858b71) Change-Id: I8d5376e217763d7a6203a54516d4220ccdbe268d Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index ac3be5cdd5ad..81f36c061174 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -373,10 +373,6 @@ static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { - /* - * The control dentry is only ever touched during mount so checking it - * here should not require us to take lock. - */ if (BINDERFS_I(dir)->control_dentry == dentry) return -EPERM; From 1ec2b9efbccce8fab2287739947a2550ce872601 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:03 +0100 Subject: [PATCH 2020/3715] UPSTREAM: binderfs: prevent renaming the control dentry - make binderfs control dentry immutable: We don't allow to unlink it since it is crucial for binderfs to be useable but if we allow to rename it we make the unlink trivial to bypass. So prevent renaming too and simply treat the control dentry as immutable. - add is_binderfs_control_device() helper: Take the opportunity and turn the check for the control dentry into a separate helper is_binderfs_control_device() since it's now used in two places. - simplify binderfs_rename(): Instead of hand-rolling our custom version of simple_rename() just dumb the whole function down to first check whether we're trying to rename the control dentry. If we do EPERM the caller and if not call simple_rename(). Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit e98e6fa18636609f14a7f866524950a783cf4fbf) Change-Id: I44e49a144b624c360ab8a277970625c64511da15 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 81f36c061174..c9152b812f63 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -346,34 +346,26 @@ static const struct super_operations binderfs_super_ops = { .statfs = simple_statfs, }; +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + return info->control_dentry == dentry; +} + static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct inode *inode = d_inode(old_dentry); - - /* binderfs doesn't support directories. */ - if (d_is_dir(old_dentry)) + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) return -EPERM; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - if (!simple_empty(new_dentry)) - return -ENOTEMPTY; - - if (d_really_is_positive(new_dentry)) - simple_unlink(new_dir, new_dentry); - - old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = - new_dir->i_mtime = inode->i_ctime = current_time(old_dir); - - return 0; + return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { - if (BINDERFS_I(dir)->control_dentry == dentry) + if (is_binderfs_control_device(dentry)) return -EPERM; return simple_unlink(dir, dentry); From 5f5167ab4042b11a4c0c9effb320e1d128588d96 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:04 +0100 Subject: [PATCH 2021/3715] UPSTREAM: binderfs: rework binderfs_fill_super() Al pointed out that on binderfs_fill_super() error deactivate_locked_super() will call binderfs_kill_super() so all of the freeing and putting we currently do in binderfs_fill_super() is unnecessary and buggy. Let's simply return errors and let binderfs_fill_super() take care of cleaning up on error. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 36975fc3e5f241cc4f45df4ab4624d7d5199d9ed) Change-Id: I89cac3746d67638901e554c3ede6c0f2931e67d4 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 41 ++++++++++---------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index c9152b812f63..25bc3af2c4cf 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -462,12 +462,9 @@ static const struct inode_operations binderfs_dir_inode_operations = { static int binderfs_fill_super(struct super_block *sb, void *data, int silent) { + int ret; struct binderfs_info *info; - int ret = -ENOMEM; struct inode *inode = NULL; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - - get_ipc_ns(ipc_ns); sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -489,15 +486,17 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &binderfs_super_ops; sb->s_time_gran = 1; - info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); - if (!info) - goto err_without_dentry; + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); ret = binderfs_parse_mount_opts(data, &info->mount_opts); if (ret) - goto err_without_dentry; + return ret; - info->ipc_ns = ipc_ns; info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) info->root_gid = GLOBAL_ROOT_GID; @@ -505,12 +504,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!uid_valid(info->root_uid)) info->root_uid = GLOBAL_ROOT_UID; - sb->s_fs_info = info; - - ret = -ENOMEM; inode = new_inode(sb); if (!inode) - goto err_without_dentry; + return -ENOMEM; inode->i_ino = FIRST_INODE; inode->i_fop = &simple_dir_operations; @@ -521,24 +517,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = d_make_root(inode); if (!sb->s_root) - goto err_without_dentry; + return -ENOMEM; - ret = binderfs_binder_ctl_create(sb); - if (ret) - goto err_with_dentry; - - return 0; - -err_with_dentry: - dput(sb->s_root); - sb->s_root = NULL; - -err_without_dentry: - put_ipc_ns(ipc_ns); - iput(inode); - kfree(info); - - return ret; + return binderfs_binder_ctl_create(sb); } static struct dentry *binderfs_mount(struct file_system_type *fs_type, From ed0703a9be7b044f3c30ea9c8c8d2be9b395ead3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:05 +0100 Subject: [PATCH 2022/3715] UPSTREAM: binderfs: rework binderfs_binder_device_create() - switch from d_alloc_name() + d_lookup() to lookup_one_len(): Instead of using d_alloc_name() and then doing a d_lookup() with the allocated dentry to find whether a device with the name we're trying to create already exists switch to using lookup_one_len(). The latter will either return the existing dentry or a new one. - switch from kmalloc() + strscpy() to kmemdup(): Use a more idiomatic way to copy the name for the new dentry that userspace gave us. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 01b3f1fc568352a1ffdcd3ee82a0297f16cc9bd9) Change-Id: I993a7dfa2f48bc6deb305852ff4085dc8dcaae4d Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 25bc3af2c4cf..33bbe0ce60e3 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ bool is_binderfs_device(const struct inode *inode) * @userp: buffer to copy information about new device for userspace to * @req: struct binderfs_device as copied from userspace * - * This function allocated a new binder_device and reserves a new minor + * This function allocates a new binder_device and reserves a new minor * number for it. * Minor numbers are limited and tracked globally in binderfs_minors. The * function will stash a struct binder_device for the specific binder @@ -122,10 +123,10 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct binderfs_device *req) { int minor, ret; - struct dentry *dentry, *dup, *root; + struct dentry *dentry, *root; struct binder_device *device; - size_t name_len = BINDERFS_MAX_NAME + 1; char *name = NULL; + size_t name_len; struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; @@ -168,12 +169,13 @@ static int binderfs_binder_device_create(struct inode *ref_inode, inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; - name = kmalloc(name_len, GFP_KERNEL); + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + name_len = strlen(req->name); + /* Make sure to include terminating NUL byte */ + name = kmemdup(req->name, name_len + 1, GFP_KERNEL); if (!name) goto err; - strscpy(name, req->name, name_len); - device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -192,24 +194,21 @@ static int binderfs_binder_device_create(struct inode *ref_inode, root = sb->s_root; inode_lock(d_inode(root)); - dentry = d_alloc_name(root, name); - if (!dentry) { + + /* look it up */ + dentry = lookup_one_len(name, root, name_len); + if (IS_ERR(dentry)) { inode_unlock(d_inode(root)); - ret = -ENOMEM; + ret = PTR_ERR(dentry); goto err; } - /* Verify that the name userspace gave us is not already in use. */ - dup = d_lookup(root, &dentry->d_name); - if (dup) { - if (d_really_is_positive(dup)) { - dput(dup); - dput(dentry); - inode_unlock(d_inode(root)); - ret = -EEXIST; - goto err; - } - dput(dup); + if (d_really_is_positive(dentry)) { + /* already exists */ + dput(dentry); + inode_unlock(d_inode(root)); + ret = -EEXIST; + goto err; } inode->i_private = device; From 7166228debf4064f436241215a92c3e03325edcb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:06 +0100 Subject: [PATCH 2023/3715] UPSTREAM: binderfs: kill_litter_super() before cleanup Al pointed out that first calling kill_litter_super() before cleaning up info is more correct since destroying info doesn't depend on the state of the dentries and inodes. That the opposite remains true is not guaranteed. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 4198479524aeccaf53c3a4cc73784982535573fa) Change-Id: Ie2dfc2c2f17dde25b4215853b05fa8f21fb6b298 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 33bbe0ce60e3..dba13340d228 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -532,11 +532,12 @@ static void binderfs_kill_super(struct super_block *sb) { struct binderfs_info *info = sb->s_fs_info; + kill_litter_super(sb); + if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); kfree(info); - kill_litter_super(sb); } static struct file_system_type binder_fs_type = { From 2129f8f7bc64cfb76bf09cdfc7871cc3b9508484 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:07 +0100 Subject: [PATCH 2024/3715] UPSTREAM: binderfs: drop lock in binderfs_binder_ctl_create The binderfs_binder_ctl_create() call is a no-op on subsequent calls and the first call is done before we unlock the suberblock. Hence, there is no need to take inode_lock() in there. Let's remove it. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 29ef1c8e16aed079ac09989d752e38d412b6e1a8) Change-Id: I7c294796ac7891f62387e09dc34332ca4c3ee67b Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index dba13340d228..2fdaaa08a20e 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -400,8 +400,6 @@ static int binderfs_binder_ctl_create(struct super_block *sb) if (!device) return -ENOMEM; - inode_lock(d_inode(root)); - /* If we have already created a binder-control node, return. */ if (info->control_dentry) { ret = 0; @@ -441,12 +439,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_private = device; info->control_dentry = dentry; d_add(dentry, inode); - inode_unlock(d_inode(root)); return 0; out: - inode_unlock(d_inode(root)); kfree(device); iput(inode); From d0d9ecaff08524a24458ee0d03772e037ed62a60 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:08 +0100 Subject: [PATCH 2025/3715] UPSTREAM: binderfs: switch from d_add() to d_instantiate() In a previous commit we switched from a d_alloc_name() + d_lookup() combination to setup a new dentry and find potential duplicates to the more idiomatic lookup_one_len(). As far as I understand, this also means we need to switch from d_add() to d_instantiate() since lookup_one_len() will create a new dentry when it doesn't find an existing one and add the new dentry to the hash queues. So we only need to call d_instantiate() to connect the dentry to the inode and turn it into a positive dentry. If we were to use d_add() we sure see stack traces like the following indicating that adding the same dentry twice over the same inode: [ 744.441889] CPU: 4 PID: 2849 Comm: landscape-sysin Not tainted 5.0.0-rc1-brauner-binderfs #243 [ 744.441889] Hardware name: Dell DCS XS24-SC2 /XS24-SC2 , BIOS S59_3C20 04/07/2011 [ 744.441889] RIP: 0010:__d_lookup_rcu+0x76/0x190 [ 744.441889] Code: 89 75 c0 49 c1 e9 20 49 89 fd 45 89 ce 41 83 e6 07 42 8d 04 f5 00 00 00 00 89 45 c8 eb 0c 48 8b 1b 48 85 db 0f 84 81 00 00 00 <44> 8b 63 fc 4c 3b 6b 10 75 ea 48 83 7b 08 00 74 e3 41 83 e4 fe 41 [ 744.441889] RSP: 0018:ffffb8c984e27ad0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 [ 744.441889] RAX: 0000000000000038 RBX: ffff9407ef770c08 RCX: ffffb8c980011000 [ 744.441889] RDX: ffffb8c984e27b54 RSI: ffffb8c984e27ce0 RDI: ffff9407e6689600 [ 744.441889] RBP: ffffb8c984e27b28 R08: ffffb8c984e27ba4 R09: 0000000000000007 [ 744.441889] R10: ffff9407e5c4f05c R11: 973f3eb9d84a94e5 R12: 0000000000000002 [ 744.441889] R13: ffff9407e6689600 R14: 0000000000000007 R15: 00000007bfef7a13 [ 744.441889] FS: 00007f0db13bb740(0000) GS:ffff9407f3b00000(0000) knlGS:0000000000000000 [ 744.441889] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 744.441889] CR2: 00007f0dacc51024 CR3: 000000032961a000 CR4: 00000000000006e0 [ 744.441889] Call Trace: [ 744.441889] lookup_fast+0x53/0x300 [ 744.441889] walk_component+0x49/0x350 [ 744.441889] ? inode_permission+0x63/0x1a0 [ 744.441889] link_path_walk.part.33+0x1bc/0x5a0 [ 744.441889] ? path_init+0x190/0x310 [ 744.441889] path_lookupat+0x95/0x210 [ 744.441889] filename_lookup+0xb6/0x190 [ 744.441889] ? __check_object_size+0xb8/0x1b0 [ 744.441889] ? strncpy_from_user+0x50/0x1a0 [ 744.441889] user_path_at_empty+0x36/0x40 [ 744.441889] ? user_path_at_empty+0x36/0x40 [ 744.441889] vfs_statx+0x76/0xe0 [ 744.441889] __do_sys_newstat+0x3d/0x70 [ 744.441889] __x64_sys_newstat+0x16/0x20 [ 744.441889] do_syscall_64+0x5a/0x120 [ 744.441889] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 744.441889] RIP: 0033:0x7f0db0ec2775 [ 744.441889] Code: 00 00 00 75 05 48 83 c4 18 c3 e8 26 55 02 00 66 0f 1f 44 00 00 83 ff 01 48 89 f0 77 30 48 89 c7 48 89 d6 b8 04 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 03 f3 c3 90 48 8b 15 e1 b6 2d 00 f7 d8 64 89 [ 744.441889] RSP: 002b:00007ffc36bc9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000004 [ 744.441889] RAX: ffffffffffffffda RBX: 00007ffc36bc9300 RCX: 00007f0db0ec2775 [ 744.441889] RDX: 00007ffc36bc9400 RSI: 00007ffc36bc9400 RDI: 00007f0dad26f050 [ 744.441889] RBP: 0000000000c0bc60 R08: 0000000000000000 R09: 0000000000000001 [ 744.441889] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc36bc9400 [ 744.441889] R13: 0000000000000001 R14: 00000000ffffff9c R15: 0000000000c0bc60 Cc: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 01684db950ea2b840531ab9298a8785776b6f6e8) Change-Id: I5e7cd8df8210943ee59e1297f2eb8f81f5fa2cb5 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 2fdaaa08a20e..2388ca5f40ae 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -212,7 +212,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, } inode->i_private = device; - d_add(dentry, inode); + d_instantiate(dentry, inode); fsnotify_create(root->d_inode, dentry); inode_unlock(d_inode(root)); From 614d6c81b36eae38594395f909349554cbeb2d01 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 23 Jan 2019 12:41:15 +0100 Subject: [PATCH 2026/3715] BACKPORT: binderfs: respect limit on binder control creation We currently adhere to the reserved devices limit when creating new binderfs devices in binderfs instances not located in the inital ipc namespace. But it is still possible to rob the host instances of their 4 reserved devices by creating the maximum allowed number of devices in a single binderfs instance located in a non-initial ipc namespace and then mounting 4 separate binderfs instances in non-initial ipc namespaces. That happens because the limit is currently not respected for the creation of the initial binder-control device node. Block this nonsense by performing the same check in binderfs_binder_ctl_create() that we perform in binderfs_binder_device_create(). Fixes: 36bdf3cae09d ("binderfs: reserve devices for initial mount") Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit da8ddba566ff0a883237dbc8c5dadef1ca769e19) [ Resolved minor conflicts due to the switch to ida_get_simple/ida_remove] Change-Id: I7e170260ce79fc23a034ce75450d58ff39a7b902 Signed-off-by: Hridya Valsaraju --- drivers/android/binderfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 2388ca5f40ae..1e84c1670c81 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -395,6 +395,11 @@ static int binderfs_binder_ctl_create(struct super_block *sb) struct inode *inode = NULL; struct dentry *root = sb->s_root; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) @@ -413,7 +418,9 @@ static int binderfs_binder_ctl_create(struct super_block *sb) /* Reserve a new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_simple_get(&binderfs_minors, 0, BINDERFS_MAX_MINOR + 1, + minor = ida_simple_get(&binderfs_minors, 0, + use_reserve ? BINDERFS_MAX_MINOR + 1: + BINDERFS_MAX_MINOR_CAPPED + 1, GFP_KERNEL); mutex_unlock(&binderfs_minors_mutex); if (minor < 0) { From bf4b5dad24036e6f8450c8f965de491f60e3f45f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 31 Jan 2019 01:25:02 +0100 Subject: [PATCH 2027/3715] UPSTREAM: binderfs: remove separate device_initcall() binderfs should not have a separate device_initcall(). When a kernel is compiled with CONFIG_ANDROID_BINDERFS register the filesystem alongside CONFIG_ANDROID_IPC. This use-case is especially sensible when users specify CONFIG_ANDROID_IPC=y, CONFIG_ANDROID_BINDERFS=y and ANDROID_BINDER_DEVICES="". When CONFIG_ANDROID_BINDERFS=n then this always succeeds so there's no regression potential for legacy workloads. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 5b9633af298bfd1de650f6774d3fada546543101) Change-Id: I91892655d9d36df5218189f7874312eec7ae3c46 Signed-off-by: Hridya Valsaraju --- drivers/android/binder.c | 7 ++++++- drivers/android/binder_internal.h | 9 +++++++++ drivers/android/binderfs.c | 4 +--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 541957211045..6875835911b7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6061,9 +6061,10 @@ static int __init init_binder_device(const char *name) static int __init binder_init(void) { int ret; - char *device_name, *device_names, *device_tmp; + char *device_name, *device_tmp; struct binder_device *device; struct hlist_node *tmp; + char *device_names = NULL; ret = binder_alloc_shrinker_init(); if (ret) @@ -6123,6 +6124,10 @@ static int __init binder_init(void) goto err_init_binder_device_failed; } + ret = init_binderfs(); + if (ret) + goto err_init_binder_device_failed; + return ret; err_init_binder_device_failed: diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 7fb97f503ef2..045b3e42d98b 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -46,4 +46,13 @@ static inline bool is_binderfs_device(const struct inode *inode) } #endif +#ifdef CONFIG_ANDROID_BINDERFS +extern int __init init_binderfs(void); +#else +static inline int __init init_binderfs(void) +{ + return 0; +} +#endif + #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 1e84c1670c81..247f6d54f48f 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -550,7 +550,7 @@ static struct file_system_type binder_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -static int __init init_binderfs(void) +int __init init_binderfs(void) { int ret; @@ -568,5 +568,3 @@ static int __init init_binderfs(void) return ret; } - -device_initcall(init_binderfs); From 8e94c42c87fa6b3cc7aee9d09042af4ecda99daa Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 7 Sep 2018 10:01:46 +0200 Subject: [PATCH 2028/3715] UPSTREAM: android: binder: use kstrdup instead of open-coding it Signed-off-by: Rasmus Villemoes Bug: 136497735 Change-Id: I545073facdb76ea12accfc7bfa4738f2e3bf0b28 (cherry picked from commit 6b6642dadd685af885367d6e30f18553e2a23b22) Signed-off-by: Hridya Valsaraju --- drivers/android/binder.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6875835911b7..5678b65209af 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6110,12 +6110,11 @@ static int __init binder_init(void) * Copy the module_parameter string, because we don't want to * tokenize it in-place. */ - device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); + device_names = kstrdup(binder_devices_param, GFP_KERNEL); if (!device_names) { ret = -ENOMEM; goto err_alloc_device_names_failed; } - strcpy(device_names, binder_devices_param); device_tmp = device_names; while ((device_name = strsep(&device_tmp, ","))) { From 574e6b0022389e72031536a41eb982ffb50581ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Jan 2019 11:23:20 +0100 Subject: [PATCH 2029/3715] UPSTREAM: binder: fix CONFIG_ANDROID_BINDER_DEVICES Several users have tried to only rely on binderfs to provide binder devices and set CONFIG_ANDROID_BINDER_DEVICES="" empty. This is a great use-case of binderfs and one that was always intended to work. However, this is currently not possible since setting CONFIG_ANDROID_BINDER_DEVICES="" emtpy will simply panic the kernel: kobject: (00000000028c2f79): attempted to be registered with empty name! WARNING: CPU: 7 PID: 1703 at lib/kobject.c:228 kobject_add_internal+0x288/0x2b0 Modules linked in: binder_linux(+) bridge stp llc ipmi_ssif gpio_ich dcdbas coretemp kvm_intel kvm irqbypass serio_raw input_leds lpc_ich i5100_edac mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel ib_i CPU: 7 PID: 1703 Comm: modprobe Not tainted 5.0.0-rc2-brauner-binderfs #263 Hardware name: Dell DCS XS24-SC2 /XS24-SC2 , BIOS S59_3C20 04/07/2011 RIP: 0010:kobject_add_internal+0x288/0x2b0 Code: 12 95 48 c7 c7 78 63 3b 95 e8 77 35 71 ff e9 91 fe ff ff 0f 0b eb a7 0f 0b eb 9a 48 89 de 48 c7 c7 00 63 3b 95 e8 f8 95 6a ff <0f> 0b 41 bc ea ff ff ff e9 6d fe ff ff 41 bc fe ff ff ff e9 62 fe RSP: 0018:ffff973f84237a30 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8b53e2472010 RCX: 0000000000000006 RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff8b53edbd63a0 RBP: ffff973f84237a60 R08: 0000000000000342 R09: 0000000000000004 R10: ffff973f84237af0 R11: 0000000000000001 R12: 0000000000000000 R13: ffff8b53e9f1a1e0 R14: 00000000e9f1a1e0 R15: 0000000000a00037 FS: 00007fbac36f7540(0000) GS:ffff8b53edbc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbac364cfa7 CR3: 00000004a6d48000 CR4: 00000000000406e0 Call Trace: kobject_add+0x71/0xd0 ? _cond_resched+0x19/0x40 ? mutex_lock+0x12/0x40 device_add+0x12e/0x6b0 device_create_groups_vargs+0xe4/0xf0 device_create_with_groups+0x3f/0x60 ? _cond_resched+0x19/0x40 misc_register+0x140/0x180 binder_init+0x1ed/0x2d4 [binder_linux] ? trace_event_define_fields_binder_transaction_fd_send+0x8e/0x8e [binder_linux] do_one_initcall+0x4a/0x1c9 ? _cond_resched+0x19/0x40 ? kmem_cache_alloc_trace+0x151/0x1c0 do_init_module+0x5f/0x216 load_module+0x223d/0x2b20 __do_sys_finit_module+0xfc/0x120 ? __do_sys_finit_module+0xfc/0x120 __x64_sys_finit_module+0x1a/0x20 do_syscall_64+0x5a/0x120 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fbac3202839 Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffd1494a908 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000055b629ebec60 RCX: 00007fbac3202839 RDX: 0000000000000000 RSI: 000055b629c20d2e RDI: 0000000000000003 RBP: 000055b629c20d2e R08: 0000000000000000 R09: 000055b629ec2310 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 000055b629ebed70 R14: 0000000000040000 R15: 000055b629ebec60 So check for the empty string since strsep() will otherwise return the emtpy string which will cause kobject_add_internal() to panic when trying to add a kobject with an emtpy name. Fixes: ac4812c5ffbb ("binder: Support multiple /dev instances") Cc: Martijn Coenen Signed-off-by: Christian Brauner Acked-by: Todd Kjos Bug: 136497735 (cherry picked from commit 793c8232937610ae00bc174b87d7fc324346eaea) Change-Id: I2ad3e2370596ced6ab2b2b197421182f9f9485e7 Signed-off-by: Hridya Valsaraju --- drivers/android/binder.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5678b65209af..6dc9fb2e7602 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6106,21 +6106,23 @@ static int __init binder_init(void) &transaction_log_fops); } - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kstrdup(binder_devices_param, GFP_KERNEL); - if (!device_names) { - ret = -ENOMEM; - goto err_alloc_device_names_failed; - } + if (strcmp(binder_devices_param, "") != 0) { + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kstrdup(binder_devices_param, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } - device_tmp = device_names; - while ((device_name = strsep(&device_tmp, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; + device_tmp = device_names; + while ((device_name = strsep(&device_tmp, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } } ret = init_binderfs(); From e8fb3933b6d5ea76a3e2c66159cb3a6bb09961e5 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Thu, 8 Aug 2019 15:27:25 -0700 Subject: [PATCH 2030/3715] UPSTREAM: binder: Add default binder devices through binderfs when configured Currently, since each binderfs instance needs its own private binder devices, every time a binderfs instance is mounted, all the default binder devices need to be created via the BINDER_CTL_ADD IOCTL. This patch aims to add a solution to automatically create the default binder devices for each binderfs instance that gets mounted. To achieve this goal, when CONFIG_ANDROID_BINDERFS is set, the default binder devices specified by CONFIG_ANDROID_BINDER_DEVICES are created in each binderfs instance instead of global devices being created by the binder driver. Co-developed-by: Christian Brauner Signed-off-by: Christian Brauner Signed-off-by: Hridya Valsaraju Reviewed-by: Joel Fernandes (Google) Reviewed-by: Joel Fernandes (Google) Bug: 136497735 (cherry picked from commit ca2864c6e8965c37df97f11e6f99e83e09806b1c) Change-Id: I4f6c5d95997ffd3df182d6ec32d467b15d1f0c42 Signed-off-by: Hridya Valsaraju --- drivers/android/binder.c | 5 +++-- drivers/android/binder_internal.h | 2 ++ drivers/android/binderfs.c | 23 ++++++++++++++++++++--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6dc9fb2e7602..32204c5630b7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -127,7 +127,7 @@ static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; module_param_named(debug_mask, binder_debug_mask, uint, 0644); -static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, 0444); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); @@ -6106,7 +6106,8 @@ static int __init binder_init(void) &transaction_log_fops); } - if (strcmp(binder_devices_param, "") != 0) { + if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && + strcmp(binder_devices_param, "") != 0) { /* * Copy the module_parameter string, because we don't want to * tokenize it in-place. diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 045b3e42d98b..fe8c745dc8e0 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -37,6 +37,8 @@ struct binder_device { extern const struct file_operations binder_fops; +extern char *binder_devices_param; + #ifdef CONFIG_ANDROID_BINDERFS extern bool is_binderfs_device(const struct inode *inode); #else diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 247f6d54f48f..9669f2ca90f2 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -186,8 +186,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, req->major = MAJOR(binderfs_dev); req->minor = minor; - ret = copy_to_user(userp, req, sizeof(*req)); - if (ret) { + if (userp && copy_to_user(userp, req, sizeof(*req))) { ret = -EFAULT; goto err; } @@ -467,6 +466,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) int ret; struct binderfs_info *info; struct inode *inode = NULL; + struct binderfs_device device_info = { 0 }; + const char *name; + size_t len; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -521,7 +523,22 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; - return binderfs_binder_ctl_create(sb); + ret = binderfs_binder_ctl_create(sb); + if (ret) + return ret; + + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + strscpy(device_info.name, name, len + 1); + ret = binderfs_binder_device_create(inode, NULL, &device_info); + if (ret) + return ret; + name += len; + if (*name == ',') + name++; + } + + return 0; } static struct dentry *binderfs_mount(struct file_system_type *fs_type, From fd533398bf7c38486bf150ea45d57de747039858 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Thu, 8 Aug 2019 15:27:26 -0700 Subject: [PATCH 2031/3715] UPSTREAM: binder: Validate the default binderfs device names. Length of a binderfs device name cannot exceed BINDERFS_MAX_NAME. This patch adds a check in binderfs_init() to ensure the same for the default binder devices that will be created in every binderfs instance. Co-developed-by: Christian Brauner Signed-off-by: Christian Brauner Signed-off-by: Hridya Valsaraju Reviewed-by: Joel Fernandes (Google) Signed-off-by: Hridya Valsaraju Bug: 136497735 (cherry picked from commit 028fb5822b76bc2e095b5c145d7bd263878d9e27) Change-Id: I347a427690ae35c792ce15afc90151937b879ef7 --- drivers/android/binderfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 9669f2ca90f2..e2fec193ed79 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -570,6 +570,18 @@ static struct file_system_type binder_fs_type = { int __init init_binderfs(void) { int ret; + const char *name; + size_t len; + + /* Verify that the default binderfs device names are valid. */ + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + if (len > BINDERFS_MAX_NAME) + return -E2BIG; + name += len; + if (*name == ',') + name++; + } /* Allocate new major number for binderfs. */ ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, From 4d96097431b9584aecec08004c5ca78aa00bad11 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Tue, 3 Sep 2019 09:16:52 -0700 Subject: [PATCH 2032/3715] UPSTREAM: binder: add a mount option to show global stats Currently, all binder state and statistics live in debugfs. We need this information even when debugfs is not mounted. This patch adds the mount option 'stats' to enable a binderfs instance to have binder debug information present in the same. 'stats=global' will enable the global binder statistics. In the future, 'stats=local' will enable binder statistics local to the binderfs instance. The two modes 'global' and 'local' will be mutually exclusive. 'stats=global' option is only available for a binderfs instance mounted in the initial user namespace. An attempt to use the option to mount a binderfs instance in another user namespace will return an EPERM error. Signed-off-by: Hridya Valsaraju Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20190903161655.107408-2-hridya@google.com Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit f00834518ed3194b866f5f3d63b71e0ed7f6bc00) Change-Id: I4c9da221e7e19729a6489436ffa6233864eac4f7 --- drivers/android/binderfs.c | 45 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index e2fec193ed79..ff5a3d73191f 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -51,18 +51,27 @@ static DEFINE_IDA(binderfs_minors); /** * binderfs_mount_opts - mount options for binderfs * @max: maximum number of allocatable binderfs binder devices + * @stats_mode: enable binder stats in binderfs. */ struct binderfs_mount_opts { int max; + int stats_mode; }; enum { Opt_max, + Opt_stats_mode, Opt_err }; +enum binderfs_stats_mode { + STATS_NONE, + STATS_GLOBAL, +}; + static const match_table_t tokens = { { Opt_max, "max=%d" }, + { Opt_stats_mode, "stats=%s" }, { Opt_err, NULL } }; @@ -290,8 +299,9 @@ static void binderfs_evict_inode(struct inode *inode) static int binderfs_parse_mount_opts(char *data, struct binderfs_mount_opts *opts) { - char *p; + char *p, *stats; opts->max = BINDERFS_MAX_MINOR; + opts->stats_mode = STATS_NONE; while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -311,6 +321,22 @@ static int binderfs_parse_mount_opts(char *data, opts->max = max_devices; break; + case Opt_stats_mode: + if (!capable(CAP_SYS_ADMIN)) + return -EINVAL; + + stats = match_strdup(&args[0]); + if (!stats) + return -ENOMEM; + + if (strcmp(stats, "global") != 0) { + kfree(stats); + return -EINVAL; + } + + opts->stats_mode = STATS_GLOBAL; + kfree(stats); + break; default: pr_err("Invalid mount options\n"); return -EINVAL; @@ -322,8 +348,21 @@ static int binderfs_parse_mount_opts(char *data, static int binderfs_remount(struct super_block *sb, int *flags, char *data) { + int prev_stats_mode, ret; struct binderfs_info *info = sb->s_fs_info; - return binderfs_parse_mount_opts(data, &info->mount_opts); + + prev_stats_mode = info->mount_opts.stats_mode; + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + return ret; + + if (prev_stats_mode != info->mount_opts.stats_mode) { + pr_err("Binderfs stats mode cannot be changed during a remount\n"); + info->mount_opts.stats_mode = prev_stats_mode; + return -EINVAL; + } + + return 0; } static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) @@ -333,6 +372,8 @@ static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) info = root->d_sb->s_fs_info; if (info->mount_opts.max <= BINDERFS_MAX_MINOR) seq_printf(seq, ",max=%d", info->mount_opts.max); + if (info->mount_opts.stats_mode == STATS_GLOBAL) + seq_printf(seq, ",stats=global"); return 0; } From a575fb2979008a272dc94684e69feaeab571004d Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Fri, 1 Nov 2019 15:22:42 -0700 Subject: [PATCH 2033/3715] UPSTREAM: binder: Add stats, state and transactions files The following binder stat files currently live in debugfs. /sys/kernel/debug/binder/state /sys/kernel/debug/binder/stats /sys/kernel/debug/binder/transactions This patch makes these files available in a binderfs instance mounted with the mount option 'stats=global'. For example, if a binderfs instance is mounted at path /dev/binderfs, the above files will be available at the following locations: /dev/binderfs/binder_logs/state /dev/binderfs/binder_logs/stats /dev/binderfs/binder_logs/transactions This provides a way to access them even when debugfs is not mounted. Acked-by: Christian Brauner Signed-off-by: Hridya Valsaraju Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20190903161655.107408-3-hridya@google.com Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 0e13e452dafc009049a9a5a4153e2f9e51b23915) Change-Id: Ieeb666a719fb3195133403054de7b103a358e1ae --- drivers/android/binder.c | 15 ++-- drivers/android/binder_internal.h | 8 ++ drivers/android/binderfs.c | 140 +++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 32204c5630b7..f85cbd68a9e7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5882,7 +5882,7 @@ static void print_binder_proc_stats(struct seq_file *m, } -static int state_show(struct seq_file *m, void *unused) +int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; @@ -5921,7 +5921,7 @@ static int state_show(struct seq_file *m, void *unused) return 0; } -static int stats_show(struct seq_file *m, void *unused) +int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -5937,7 +5937,7 @@ static int stats_show(struct seq_file *m, void *unused) return 0; } -static int transactions_show(struct seq_file *m, void *unused) +int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -6025,9 +6025,6 @@ const struct file_operations binder_fops = { .release = binder_release, }; -DEFINE_SHOW_ATTRIBUTE(state); -DEFINE_SHOW_ATTRIBUTE(stats); -DEFINE_SHOW_ATTRIBUTE(transactions); DEFINE_SHOW_ATTRIBUTE(transaction_log); static int __init init_binder_device(const char *name) @@ -6083,17 +6080,17 @@ static int __init binder_init(void) 0444, binder_debugfs_dir_entry_root, NULL, - &state_fops); + &binder_state_fops); debugfs_create_file("stats", 0444, binder_debugfs_dir_entry_root, NULL, - &stats_fops); + &binder_stats_fops); debugfs_create_file("transactions", 0444, binder_debugfs_dir_entry_root, NULL, - &transactions_fops); + &binder_transactions_fops); debugfs_create_file("transaction_log", 0444, binder_debugfs_dir_entry_root, diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index fe8c745dc8e0..12ef96f256c6 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -57,4 +57,12 @@ static inline int __init init_binderfs(void) } #endif +int binder_stats_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_stats); + +int binder_state_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_state); + +int binder_transactions_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_transactions); #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index ff5a3d73191f..5af4cfece336 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -280,7 +280,7 @@ static void binderfs_evict_inode(struct inode *inode) clear_inode(inode); - if (!device) + if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); @@ -502,6 +502,141 @@ static const struct inode_operations binderfs_dir_inode_operations = { .unlink = binderfs_unlink, }; +static struct inode *binderfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret; + + ret = new_inode(sb); + if (ret) { + ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); + ret->i_mode = mode; + ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret); + } + return ret; +} + +static struct dentry *binderfs_create_dentry(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) + return dentry; + + /* Return error if the file/dir already exists. */ + if (d_really_is_positive(dentry)) { + dput(dentry); + return ERR_PTR(-EEXIST); + } + + return dentry; +} + +static struct dentry *binderfs_create_file(struct dentry *parent, + const char *name, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + inode_lock(parent_inode); + + dentry = binderfs_create_dentry(parent, name); + if (IS_ERR(dentry)) + goto out; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFREG | 0444); + if (!new_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto out; + } + + new_inode->i_fop = fops; + new_inode->i_private = data; + d_instantiate(dentry, new_inode); + fsnotify_create(parent_inode, dentry); + +out: + inode_unlock(parent_inode); + return dentry; +} + +static struct dentry *binderfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + inode_lock(parent_inode); + + dentry = binderfs_create_dentry(parent, name); + if (IS_ERR(dentry)) + goto out; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); + if (!new_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto out; + } + + new_inode->i_fop = &simple_dir_operations; + new_inode->i_op = &simple_dir_inode_operations; + + set_nlink(new_inode, 2); + d_instantiate(dentry, new_inode); + inc_nlink(parent_inode); + fsnotify_mkdir(parent_inode, dentry); + +out: + inode_unlock(parent_inode); + return dentry; +} + +static int init_binder_logs(struct super_block *sb) +{ + struct dentry *binder_logs_root_dir, *dentry; + int ret = 0; + + binder_logs_root_dir = binderfs_create_dir(sb->s_root, + "binder_logs"); + if (IS_ERR(binder_logs_root_dir)) { + ret = PTR_ERR(binder_logs_root_dir); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "stats", + &binder_stats_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "state", + &binder_state_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "transactions", + &binder_transactions_fops, NULL); + if (IS_ERR(dentry)) + ret = PTR_ERR(dentry); + +out: + return ret; +} + static int binderfs_fill_super(struct super_block *sb, void *data, int silent) { int ret; @@ -579,6 +714,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) name++; } + if (info->mount_opts.stats_mode == STATS_GLOBAL) + return init_binder_logs(sb); + return 0; } From 3741393c2db1ac96d820bf3e020a19792ffa174c Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Tue, 3 Sep 2019 09:16:54 -0700 Subject: [PATCH 2034/3715] UPSTREAM: binder: Make transaction_log available in binderfs Currently, the binder transaction log files 'transaction_log' and 'failed_transaction_log' live in debugfs at the following locations: /sys/kernel/debug/binder/failed_transaction_log /sys/kernel/debug/binder/transaction_log This patch makes these files also available in a binderfs instance mounted with the mount option "stats=global". It does not affect the presence of these files in debugfs. If a binderfs instance is mounted at path /dev/binderfs, the location of these files will be as follows: /dev/binderfs/binder_logs/failed_transaction_log /dev/binderfs/binder_logs/transaction_log This change provides an alternate option to access these files when debugfs is not mounted. Acked-by: Christian Brauner Signed-off-by: Hridya Valsaraju Link: https://lore.kernel.org/r/20190903161655.107408-4-hridya@google.com Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit c31e73121f4c1ec41143423ac6ce3ce6dafdcec1) Change-Id: I20d9e6c4c7115297f9740cc42a516c315b3a209e --- drivers/android/binder.c | 34 +++++-------------------------- drivers/android/binder_internal.h | 30 +++++++++++++++++++++++++++ drivers/android/binderfs.c | 18 ++++++++++++++++ 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f85cbd68a9e7..e3b5d61e75ea 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -201,30 +201,8 @@ static inline void binder_stats_created(enum binder_stat_types type) atomic_inc(&binder_stats.obj_created[type]); } -struct binder_transaction_log_entry { - int debug_id; - int debug_id_done; - int call_type; - int from_proc; - int from_thread; - int target_handle; - int to_proc; - int to_thread; - int to_node; - int data_size; - int offsets_size; - int return_error_line; - uint32_t return_error; - uint32_t return_error_param; - const char *context_name; -}; -struct binder_transaction_log { - atomic_t cur; - bool full; - struct binder_transaction_log_entry entry[32]; -}; -static struct binder_transaction_log binder_transaction_log; -static struct binder_transaction_log binder_transaction_log_failed; +struct binder_transaction_log binder_transaction_log; +struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) @@ -5993,7 +5971,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, "\n" : " (incomplete)\n"); } -static int transaction_log_show(struct seq_file *m, void *unused) +int binder_transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); @@ -6025,8 +6003,6 @@ const struct file_operations binder_fops = { .release = binder_release, }; -DEFINE_SHOW_ATTRIBUTE(transaction_log); - static int __init init_binder_device(const char *name) { int ret; @@ -6095,12 +6071,12 @@ static int __init binder_init(void) 0444, binder_debugfs_dir_entry_root, &binder_transaction_log, - &transaction_log_fops); + &binder_transaction_log_fops); debugfs_create_file("failed_transaction_log", 0444, binder_debugfs_dir_entry_root, &binder_transaction_log_failed, - &transaction_log_fops); + &binder_transaction_log_fops); } if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 12ef96f256c6..b9be42d9464c 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -65,4 +65,34 @@ DEFINE_SHOW_ATTRIBUTE(binder_state); int binder_transactions_show(struct seq_file *m, void *unused); DEFINE_SHOW_ATTRIBUTE(binder_transactions); + +int binder_transaction_log_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_transaction_log); + +struct binder_transaction_log_entry { + int debug_id; + int debug_id_done; + int call_type; + int from_proc; + int from_thread; + int target_handle; + int to_proc; + int to_thread; + int to_node; + int data_size; + int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; + const char *context_name; +}; + +struct binder_transaction_log { + atomic_t cur; + bool full; + struct binder_transaction_log_entry entry[32]; +}; + +extern struct binder_transaction_log binder_transaction_log; +extern struct binder_transaction_log binder_transaction_log_failed; #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 5af4cfece336..a1cba37afbf5 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -630,6 +630,24 @@ static int init_binder_logs(struct super_block *sb) dentry = binderfs_create_file(binder_logs_root_dir, "transactions", &binder_transactions_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, + "transaction_log", + &binder_transaction_log_fops, + &binder_transaction_log); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, + "failed_transaction_log", + &binder_transaction_log_fops, + &binder_transaction_log_failed); if (IS_ERR(dentry)) ret = PTR_ERR(dentry); From 1405bf2e0f828477e52d84720674c6e193cc7a3c Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Tue, 3 Sep 2019 09:16:55 -0700 Subject: [PATCH 2035/3715] UPSTREAM: binder: Add binder_proc logging to binderfs Currently /sys/kernel/debug/binder/proc contains the debug data for every binder_proc instance. This patch makes this information also available in a binderfs instance mounted with a mount option "stats=global" in addition to debugfs. The patch does not affect the presence of the file in debugfs. If a binderfs instance is mounted at path /dev/binderfs, this file would be present at /dev/binderfs/binder_logs/proc. This change provides an alternate way to access this file when debugfs is not mounted. Acked-by: Christian Brauner Signed-off-by: Hridya Valsaraju Link: https://lore.kernel.org/r/20190903161655.107408-5-hridya@google.com Signed-off-by: Greg Kroah-Hartman Bug: 136497735 (cherry picked from commit 4feb80faf428a02d407a9ea1952004af01308765) Change-Id: I3aa974979f2d4aebbe79ea9df30ede2813826157 --- drivers/android/binder.c | 46 ++++++++++++++++++++- drivers/android/binder_internal.h | 46 +++++++++++++++++++++ drivers/android/binderfs.c | 68 ++++++++++++++----------------- 3 files changed, 121 insertions(+), 39 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e3b5d61e75ea..920b1ca35bf0 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -488,6 +488,7 @@ struct binder_priority { * @inner_lock: can nest under outer_lock and/or node lock * @outer_lock: no nesting under innor or node lock * Lock order: 1) outer, 2) node, 3) inner + * @binderfs_entry: process-specific binderfs log file * * Bookkeeping structure for binder processes */ @@ -519,6 +520,7 @@ struct binder_proc { struct binder_context *context; spinlock_t inner_lock; spinlock_t outer_lock; + struct dentry *binderfs_entry; }; enum { @@ -5158,6 +5160,8 @@ static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; struct binder_device *binder_dev; + struct binderfs_info *info; + struct dentry *binder_binderfs_dir_entry_proc = NULL; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); @@ -5180,11 +5184,14 @@ static int binder_open(struct inode *nodp, struct file *filp) } /* binderfs stashes devices in i_private */ - if (is_binderfs_device(nodp)) + if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; - else + info = nodp->i_sb->s_fs_info; + binder_binderfs_dir_entry_proc = info->proc_log_dir; + } else { binder_dev = container_of(filp->private_data, struct binder_device, miscdev); + } proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5215,6 +5222,35 @@ static int binder_open(struct inode *nodp, struct file *filp) &proc_fops); } + if (binder_binderfs_dir_entry_proc) { + char strbuf[11]; + struct dentry *binderfs_entry; + + snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * Similar to debugfs, the process specific log file is shared + * between contexts. If the file has already been created for a + * process, the following binderfs_create_file() call will + * fail with error code EEXIST if another context of the same + * process invoked binder_open(). This is ok since same as + * debugfs, the log file will contain information on all + * contexts of a given PID. + */ + binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc, + strbuf, &proc_fops, (void *)(unsigned long)proc->pid); + if (!IS_ERR(binderfs_entry)) { + proc->binderfs_entry = binderfs_entry; + } else { + int error; + + error = PTR_ERR(binderfs_entry); + if (error != -EEXIST) { + pr_warn("Unable to create file %s in binderfs (error %d)\n", + strbuf, error); + } + } + } + return 0; } @@ -5254,6 +5290,12 @@ static int binder_release(struct inode *nodp, struct file *filp) struct binder_proc *proc = filp->private_data; debugfs_remove(proc->debugfs_entry); + + if (proc->binderfs_entry) { + binderfs_remove_file(proc->binderfs_entry); + proc->binderfs_entry = NULL; + } + binder_defer_work(proc, BINDER_DEFERRED_RELEASE); return 0; diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index b9be42d9464c..bd47f7f72075 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -35,17 +35,63 @@ struct binder_device { struct inode *binderfs_inode; }; +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + * @stats_mode: enable binder stats in binderfs. + */ +struct binderfs_mount_opts { + int max; + int stats_mode; +}; + +/** + * binderfs_info - information about a binderfs mount + * @ipc_ns: The ipc namespace the binderfs mount belongs to. + * @control_dentry: This records the dentry of this binderfs mount + * binder-control device. + * @root_uid: uid that needs to be used when a new binder device is + * created. + * @root_gid: gid that needs to be used when a new binder device is + * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. + * @proc_log_dir: Pointer to the directory dentry containing process-specific + * logs. + */ +struct binderfs_info { + struct ipc_namespace *ipc_ns; + struct dentry *control_dentry; + kuid_t root_uid; + kgid_t root_gid; + struct binderfs_mount_opts mount_opts; + int device_count; + struct dentry *proc_log_dir; +}; + extern const struct file_operations binder_fops; extern char *binder_devices_param; #ifdef CONFIG_ANDROID_BINDERFS extern bool is_binderfs_device(const struct inode *inode); +extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name, + const struct file_operations *fops, + void *data); +extern void binderfs_remove_file(struct dentry *dentry); #else static inline bool is_binderfs_device(const struct inode *inode) { return false; } +static inline struct dentry *binderfs_create_file(struct dentry *dir, + const char *name, + const struct file_operations *fops, + void *data) +{ + return NULL; +} +static inline void binderfs_remove_file(struct dentry *dentry) {} #endif #ifdef CONFIG_ANDROID_BINDERFS diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index a1cba37afbf5..a4f73af4fa4e 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -48,16 +48,6 @@ static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); -/** - * binderfs_mount_opts - mount options for binderfs - * @max: maximum number of allocatable binderfs binder devices - * @stats_mode: enable binder stats in binderfs. - */ -struct binderfs_mount_opts { - int max; - int stats_mode; -}; - enum { Opt_max, Opt_stats_mode, @@ -75,27 +65,6 @@ static const match_table_t tokens = { { Opt_err, NULL } }; -/** - * binderfs_info - information about a binderfs mount - * @ipc_ns: The ipc namespace the binderfs mount belongs to. - * @control_dentry: This records the dentry of this binderfs mount - * binder-control device. - * @root_uid: uid that needs to be used when a new binder device is - * created. - * @root_gid: gid that needs to be used when a new binder device is - * created. - * @mount_opts: The mount options in use. - * @device_count: The current number of allocated binder devices. - */ -struct binderfs_info { - struct ipc_namespace *ipc_ns; - struct dentry *control_dentry; - kuid_t root_uid; - kgid_t root_gid; - struct binderfs_mount_opts mount_opts; - int device_count; -}; - static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) { return inode->i_sb->s_fs_info; @@ -533,10 +502,24 @@ static struct dentry *binderfs_create_dentry(struct dentry *parent, return dentry; } -static struct dentry *binderfs_create_file(struct dentry *parent, - const char *name, - const struct file_operations *fops, - void *data) +void binderfs_remove_file(struct dentry *dentry) +{ + struct inode *parent_inode; + + parent_inode = d_inode(dentry->d_parent); + inode_lock(parent_inode); + if (simple_positive(dentry)) { + dget(dentry); + simple_unlink(parent_inode, dentry); + d_delete(dentry); + dput(dentry); + } + inode_unlock(parent_inode); +} + +struct dentry *binderfs_create_file(struct dentry *parent, const char *name, + const struct file_operations *fops, + void *data) { struct dentry *dentry; struct inode *new_inode, *parent_inode; @@ -604,7 +587,8 @@ out: static int init_binder_logs(struct super_block *sb) { - struct dentry *binder_logs_root_dir, *dentry; + struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; + struct binderfs_info *info; int ret = 0; binder_logs_root_dir = binderfs_create_dir(sb->s_root, @@ -648,8 +632,18 @@ static int init_binder_logs(struct super_block *sb) "failed_transaction_log", &binder_transaction_log_fops, &binder_transaction_log_failed); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); + goto out; + } + + proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); + if (IS_ERR(proc_log_dir)) { + ret = PTR_ERR(proc_log_dir); + goto out; + } + info = sb->s_fs_info; + info->proc_log_dir = proc_log_dir; out: return ret; From f81151cd3afda797c1f0871e42a19606277b414b Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 14 Oct 2019 13:03:08 -0400 Subject: [PATCH 2036/3715] BACKPORT: perf_event: Add support for LSM and SELinux checks In current mainline, the degree of access to perf_event_open(2) system call depends on the perf_event_paranoid sysctl. This has a number of limitations: 1. The sysctl is only a single value. Many types of accesses are controlled based on the single value thus making the control very limited and coarse grained. 2. The sysctl is global, so if the sysctl is changed, then that means all processes get access to perf_event_open(2) opening the door to security issues. This patch adds LSM and SELinux access checking which will be used in Android to access perf_event_open(2) for the purposes of attaching BPF programs to tracepoints, perf profiling and other operations from userspace. These operations are intended for production systems. 5 new LSM hooks are added: 1. perf_event_open: This controls access during the perf_event_open(2) syscall itself. The hook is called from all the places that the perf_event_paranoid sysctl is checked to keep it consistent with the systctl. The hook gets passed a 'type' argument which controls CPU, kernel and tracepoint accesses (in this context, CPU, kernel and tracepoint have the same semantics as the perf_event_paranoid sysctl). Additionally, I added an 'open' type which is similar to perf_event_paranoid sysctl == 3 patch carried in Android and several other distros but was rejected in mainline [1] in 2016. 2. perf_event_alloc: This allocates a new security object for the event which stores the current SID within the event. It will be useful when the perf event's FD is passed through IPC to another process which may try to read the FD. Appropriate security checks will limit access. 3. perf_event_free: Called when the event is closed. 4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event. 5. perf_event_write: Called from the ioctl(2) syscalls for the event. [1] https://lwn.net/Articles/696240/ Since Peter had suggest LSM hooks in 2016 [1], I am adding his Suggested-by tag below. To use this patch, we set the perf_event_paranoid sysctl to -1 and then apply selinux checking as appropriate (default deny everything, and then add policy rules to give access to domains that need it). In the future we can remove the perf_event_paranoid sysctl altogether. Suggested-by: Peter Zijlstra Co-developed-by: Peter Zijlstra Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Morris Cc: Arnaldo Carvalho de Melo Cc: rostedt@goodmis.org Cc: Yonghong Song Cc: Kees Cook Cc: Ingo Molnar Cc: Alexei Starovoitov Cc: jeffv@google.com Cc: Jiri Olsa Cc: Daniel Borkmann Cc: primiano@google.com Cc: Song Liu Cc: rsavitski@google.com Cc: Namhyung Kim Cc: Matthew Garrett Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org Bug: 137092007 Change-Id: Ibb356813b0b2f0cedab7806ee21ce4c00469be32 (cherry picked from commit da97e18458fb42d7c00fac5fd1c56a3896ec666e) [ Ryan Savitski: Adapted for older APIs, e.g. hlist -> list, removed refs to selinux_state. No new functionality. ] Signed-off-by: Ryan Savitski --- arch/powerpc/perf/core-book3s.c | 15 +++---- arch/x86/events/intel/bts.c | 8 ++-- arch/x86/events/intel/core.c | 5 ++- arch/x86/events/intel/p4.c | 5 ++- include/linux/lsm_hooks.h | 15 +++++++ include/linux/perf_event.h | 36 ++++++++++++--- include/linux/security.h | 38 +++++++++++++++- kernel/events/core.c | 57 ++++++++++++++++++----- kernel/trace/trace_event_perf.c | 15 ++++--- security/security.c | 27 +++++++++++ security/selinux/hooks.c | 70 +++++++++++++++++++++++++++++ security/selinux/include/classmap.h | 2 + security/selinux/include/objsec.h | 6 ++- 13 files changed, 261 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3188040022c4..1c37f08bcddd 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,7 +95,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { return 0; @@ -126,7 +126,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} -static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } static bool use_ic(u64 event) { @@ -174,7 +174,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; bool sdar_valid; @@ -435,7 +435,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -463,8 +463,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) */ - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && - is_kernel_addr(addr)) + if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2077,12 +2076,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) - perf_get_data_addr(regs, &data.addr); + perf_get_data_addr(event, regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); - power_pmu_bhrb_read(cpuhw); + power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; } diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 7139f6bf27ad..68162163a05d 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -557,9 +557,11 @@ static int bts_event_init(struct perf_event *event) * Note that the default paranoia setting permits unprivileged * users to profile the kernel. */ - if (event->attr.exclude_kernel && perf_paranoid_kernel() && - !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (event->attr.exclude_kernel) { + ret = perf_allow_kernel(&event->attr); + if (ret) + return ret; + } if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4a60ed8c4413..0307e34d2272 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3087,8 +3087,9 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + ret = perf_allow_cpu(&event->attr); + if (ret) + return ret; event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index d32c0eed38ca..4f9ac72968db 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + v = perf_allow_cpu(&event->attr); + if (v) + return v; } /* ESCR EventMask bits may be invalid */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..7e9f59aeadb6 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1727,6 +1727,14 @@ union security_list_options { int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + int (*perf_event_open)(struct perf_event_attr *attr, int type); + int (*perf_event_alloc)(struct perf_event *event); + void (*perf_event_free)(struct perf_event *event); + int (*perf_event_read)(struct perf_event *event); + int (*perf_event_write)(struct perf_event *event); + +#endif }; struct security_hook_heads { @@ -1955,6 +1963,13 @@ struct security_hook_heads { struct list_head bpf_prog_alloc_security; struct list_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + struct list_head perf_event_open; + struct list_head perf_event_alloc; + struct list_head perf_event_free; + struct list_head perf_event_read; + struct list_head perf_event_write; +#endif } __randomize_layout; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ac16bac38c03..5d798eb5ac0a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -712,6 +713,9 @@ struct perf_event { int cgrp_defer_enabled; #endif +#ifdef CONFIG_SECURITY + void *security; +#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1175,24 +1179,46 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +/* Access to perf_event_open(2) syscall. */ +#define PERF_SECURITY_OPEN 0 + +/* Finer grained perf_event_open(2) access control. */ +#define PERF_SECURITY_CPU 1 +#define PERF_SECURITY_KERNEL 2 +#define PERF_SECURITY_TRACEPOINT 3 + static inline bool perf_paranoid_any(void) { return sysctl_perf_event_paranoid > 2; } -static inline bool perf_paranoid_tracepoint_raw(void) +static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; } -static inline bool perf_paranoid_cpu(void) +static inline int perf_allow_kernel(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 0; + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_KERNEL); } -static inline bool perf_paranoid_kernel(void) +static inline int perf_allow_cpu(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 1; + if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_CPU); +} + +static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +{ + if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } extern void perf_event_init(void); diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..aee15efd27ea 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1801,5 +1801,41 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#endif /* ! __LINUX_SECURITY_H */ +#ifdef CONFIG_PERF_EVENTS +struct perf_event_attr; +#ifdef CONFIG_SECURITY +extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_alloc(struct perf_event *event); +extern void security_perf_event_free(struct perf_event *event); +extern int security_perf_event_read(struct perf_event *event); +extern int security_perf_event_write(struct perf_event *event); +#else +static inline int security_perf_event_open(struct perf_event_attr *attr, + int type) +{ + return 0; +} + +static inline int security_perf_event_alloc(struct perf_event *event) +{ + return 0; +} + +static inline void security_perf_event_free(struct perf_event *event) +{ +} + +static inline int security_perf_event_read(struct perf_event *event) +{ + return 0; +} + +static inline int security_perf_event_write(struct perf_event *event) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_PERF_EVENTS */ + +#endif /* ! __LINUX_SECURITY_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index f87d54270076..e24e2d558cbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3912,8 +3912,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); + err = perf_allow_cpu(&event->attr); + if (err) + return ERR_PTR(err); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4216,6 +4217,8 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + security_perf_event_free(event); + if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4635,6 +4638,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; + ret = security_perf_event_read(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -4880,6 +4887,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; + /* Treat ioctl like writes as it is likely a mutating operation. */ + ret = security_perf_event_write(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5340,6 +5352,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; + ret = security_perf_event_read(event); + if (ret) + return ret; + vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5453,7 +5469,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = vma->vm_mm->pinned_vm + extra; - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + if ((locked > lock_limit) && perf_is_paranoid() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -9693,11 +9709,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + err = security_perf_event_alloc(event); + if (err) + goto err_callchain_buffer; + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_callchain_buffer: + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } err_addr_filters: kfree(event->addr_filter_ranges); @@ -9815,9 +9840,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) - && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { + ret = perf_allow_kernel(attr); + if (ret) + return ret; + } } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10030,13 +10057,19 @@ SYSCALL_DEFINE5(perf_event_open, if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + err = perf_allow_kernel(&attr); + if (err) + return err; } if (attr.namespaces) { @@ -10053,9 +10086,11 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { + err = perf_allow_kernel(&attr); + if (err) + return err; + } if (!attr.sample_max_stack) attr.sample_max_stack = sysctl_perf_event_max_stack; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..80b7b194c181 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -7,6 +7,7 @@ #include #include +#include #include "trace.h" static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; @@ -24,8 +25,10 @@ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { + int ret; + if (tp_event->perf_perm) { - int ret = tp_event->perf_perm(tp_event, p_event); + ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } @@ -44,8 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; if (!is_sampling_event(p_event)) return 0; @@ -80,8 +84,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; return 0; } diff --git a/security/security.c b/security/security.c index fb4910f0d0e2..5afd1dc81511 100644 --- a/security/security.c +++ b/security/security.c @@ -1745,3 +1745,30 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux) call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ + +#ifdef CONFIG_PERF_EVENTS +int security_perf_event_open(struct perf_event_attr *attr, int type) +{ + return call_int_hook(perf_event_open, 0, attr, type); +} + +int security_perf_event_alloc(struct perf_event *event) +{ + return call_int_hook(perf_event_alloc, 0, event); +} + +void security_perf_event_free(struct perf_event *event) +{ + call_void_hook(perf_event_free, event); +} + +int security_perf_event_read(struct perf_event *event) +{ + return call_int_hook(perf_event_read, 0, event); +} + +int security_perf_event_write(struct perf_event *event) +{ + return call_int_hook(perf_event_write, 0, event); +} +#endif /* CONFIG_PERF_EVENTS */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 444739be29e9..2ad151edf5a4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6434,6 +6434,68 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif + +#ifdef CONFIG_PERF_EVENTS +static int selinux_perf_event_open(struct perf_event_attr *attr, int type) +{ + u32 requested, sid = current_sid(); + + if (type == PERF_SECURITY_OPEN) + requested = PERF_EVENT__OPEN; + else if (type == PERF_SECURITY_CPU) + requested = PERF_EVENT__CPU; + else if (type == PERF_SECURITY_KERNEL) + requested = PERF_EVENT__KERNEL; + else if (type == PERF_SECURITY_TRACEPOINT) + requested = PERF_EVENT__TRACEPOINT; + else + return -EINVAL; + + return avc_has_perm(sid, sid, SECCLASS_PERF_EVENT, + requested, NULL); +} + +static int selinux_perf_event_alloc(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec; + + perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); + if (!perfsec) + return -ENOMEM; + + perfsec->sid = current_sid(); + event->security = perfsec; + + return 0; +} + +static void selinux_perf_event_free(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + + event->security = NULL; + kfree(perfsec); +} + +static int selinux_perf_event_read(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL); +} + +static int selinux_perf_event_write(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL); +} +#endif + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6663,6 +6725,14 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif + +#ifdef CONFIG_PERF_EVENTS + LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), + LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), + LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), + LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), + LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), +#endif }; static __init int selinux_init(void) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 5ae315ab060b..000effa857aa 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -241,6 +241,8 @@ struct security_class_mapping secclass_map[] = { { "manage_subnet", NULL } }, { "bpf", {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, + { "perf_event", + {"open", "cpu", "kernel", "tracepoint", "read", "write"} }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 3d54468ce334..e482f6f457f1 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -151,7 +151,11 @@ struct pkey_security_struct { }; struct bpf_security_struct { - u32 sid; /*SID of bpf obj creater*/ + u32 sid; /* SID of bpf obj creator */ +}; + +struct perf_event_security_struct { + u32 sid; /* SID of perf_event obj creator */ }; extern unsigned int selinux_checkreqprot; From 434a3aa7cfb936041c65714d9e49fe8ac8cb6fc2 Mon Sep 17 00:00:00 2001 From: Kyeongdon Kim Date: Wed, 6 Sep 2017 18:50:19 +0900 Subject: [PATCH 2037/3715] UPSTREAM: selinux: Use kmem_cache for hashtab_node During random test as own device to check slub account, we found some slack memory from hashtab_node(kmalloc-64). By using kzalloc(), middle of test result like below: allocated size 240768 request size 45144 slack size 195624 allocation count 3762 So, we want to use kmem_cache_zalloc() and that can reduce memory size 52byte(slack size/alloc count) per each struct. Signed-off-by: Kyeongdon Kim Signed-off-by: Paul Moore (cherry picked from commit 7c620ece125cbab7b5dfcb574ee1e64ab8b562cd) Change-Id: I552ed33d1d51f6c0fc37d078ed8f5cf07e9d76aa Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/hashtab.c | 17 +++++++++++++++-- security/selinux/ss/hashtab.h | 4 ++++ security/selinux/ss/services.c | 4 ++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 6bd6dcd954fa..aeb990f52f54 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -10,6 +10,8 @@ #include #include "hashtab.h" +static struct kmem_cache *hashtab_node_cachep; + struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void *key), int (*keycmp)(struct hashtab *h, const void *key1, const void *key2), u32 size) @@ -58,7 +60,7 @@ int hashtab_insert(struct hashtab *h, void *key, void *datum) if (cur && (h->keycmp(h, key, cur->key) == 0)) return -EEXIST; - newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + newnode = kmem_cache_zalloc(hashtab_node_cachep, GFP_KERNEL); if (!newnode) return -ENOMEM; newnode->key = key; @@ -107,7 +109,7 @@ void hashtab_destroy(struct hashtab *h) while (cur) { temp = cur; cur = cur->next; - kfree(temp); + kmem_cache_free(hashtab_node_cachep, temp); } h->htable[i] = NULL; } @@ -167,3 +169,14 @@ void hashtab_stat(struct hashtab *h, struct hashtab_info *info) info->slots_used = slots_used; info->max_chain_len = max_chain_len; } +void hashtab_cache_init(void) +{ + hashtab_node_cachep = kmem_cache_create("hashtab_node", + sizeof(struct hashtab_node), + 0, SLAB_PANIC, NULL); +} + +void hashtab_cache_destroy(void) +{ + kmem_cache_destroy(hashtab_node_cachep); +} diff --git a/security/selinux/ss/hashtab.h b/security/selinux/ss/hashtab.h index 3e3e42bfd150..6183ee2a2e7a 100644 --- a/security/selinux/ss/hashtab.h +++ b/security/selinux/ss/hashtab.h @@ -85,4 +85,8 @@ int hashtab_map(struct hashtab *h, /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); +/* Use kmem_cache for hashtab_node */ +void hashtab_cache_init(void); +void hashtab_cache_destroy(void); + #endif /* _SS_HASHTAB_H */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b275743e23cc..1dde563aff1d 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2061,10 +2061,12 @@ int security_load_policy(void *data, size_t len) if (!ss_initialized) { avtab_cache_init(); ebitmap_cache_init(); + hashtab_cache_init(); rc = policydb_read(&policydb, fp); if (rc) { avtab_cache_destroy(); ebitmap_cache_destroy(); + hashtab_cache_destroy(); goto out; } @@ -2076,6 +2078,7 @@ int security_load_policy(void *data, size_t len) policydb_destroy(&policydb); avtab_cache_destroy(); ebitmap_cache_destroy(); + hashtab_cache_destroy(); goto out; } @@ -2084,6 +2087,7 @@ int security_load_policy(void *data, size_t len) policydb_destroy(&policydb); avtab_cache_destroy(); ebitmap_cache_destroy(); + hashtab_cache_destroy(); goto out; } From b86e8759d93cce8d89d227760e464fcc7f3f1370 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 1 Mar 2018 18:48:02 -0500 Subject: [PATCH 2038/3715] BACKPORT: selinux: wrap global selinux state Define a selinux state structure (struct selinux_state) for global SELinux state and pass it explicitly to all security server functions. The public portion of the structure contains state that is used throughout the SELinux code, such as the enforcing mode. The structure also contains a pointer to a selinux_ss structure whose definition is private to the security server and contains security server specific state such as the policy database and SID table. This change should have no effect on SELinux behavior or APIs (userspace or LSM). It merely wraps SELinux state and passes it explicitly as needed. Signed-off-by: Stephen Smalley [PM: minor fixups needed due to collisions with the SCTP patches] Signed-off-by: Paul Moore (cherry picked from commit aa8e712cee93d520e96a2ca8e3a20f807c937e3f) Resolved conflicts around non-backported support for sctp. Change-Id: Iedc64518daf86ce07648a4f3784c041198752857 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/avc.c | 16 +- security/selinux/hooks.c | 208 +++-- security/selinux/ibpkey.c | 3 +- security/selinux/include/avc.h | 6 - security/selinux/include/avc_ss.h | 6 - security/selinux/include/conditional.h | 11 +- security/selinux/include/objsec.h | 2 - security/selinux/include/security.h | 228 +++-- security/selinux/netif.c | 2 +- security/selinux/netlabel.c | 11 +- security/selinux/netnode.c | 4 +- security/selinux/netport.c | 2 +- security/selinux/selinuxfs.c | 145 ++-- security/selinux/ss/avtab.c | 9 +- security/selinux/ss/avtab.h | 3 - security/selinux/ss/ebitmap.c | 7 +- security/selinux/ss/ebitmap.h | 3 - security/selinux/ss/hashtab.c | 8 +- security/selinux/ss/hashtab.h | 4 - security/selinux/ss/mls.c | 72 +- security/selinux/ss/mls.h | 38 +- security/selinux/ss/services.c | 1082 ++++++++++++++---------- security/selinux/ss/services.h | 24 +- security/selinux/ss/status.c | 47 +- security/selinux/xfrm.c | 6 +- 25 files changed, 1152 insertions(+), 795 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 2380b8d72cec..36124f48a5ff 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -149,7 +149,8 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla char *scontext; u32 scontext_len; - rc = security_sid_to_context(ssid, &scontext, &scontext_len); + rc = security_sid_to_context(&selinux_state, ssid, + &scontext, &scontext_len); if (rc) audit_log_format(ab, "ssid=%d", ssid); else { @@ -157,7 +158,8 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla kfree(scontext); } - rc = security_sid_to_context(tsid, &scontext, &scontext_len); + rc = security_sid_to_context(&selinux_state, tsid, + &scontext, &scontext_len); if (rc) audit_log_format(ab, " tsid=%d", tsid); else { @@ -969,7 +971,8 @@ static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid, { rcu_read_unlock(); INIT_LIST_HEAD(&xp_node->xpd_head); - security_compute_av(ssid, tsid, tclass, avd, &xp_node->xp); + security_compute_av(&selinux_state, ssid, tsid, tclass, + avd, &xp_node->xp); rcu_read_lock(); return avc_insert(ssid, tsid, tclass, avd, xp_node); } @@ -982,7 +985,8 @@ static noinline int avc_denied(u32 ssid, u32 tsid, if (flags & AVC_STRICT) return -EACCES; - if (selinux_enforcing && !(avd->flags & AVD_FLAGS_PERMISSIVE)) + if (is_enforcing(&selinux_state) && + !(avd->flags & AVD_FLAGS_PERMISSIVE)) return -EACCES; avc_update_node(AVC_CALLBACK_GRANT, requested, driver, xperm, ssid, @@ -1043,8 +1047,8 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, goto decision; } rcu_read_unlock(); - security_compute_xperms_decision(ssid, tsid, tclass, driver, - &local_xpd); + security_compute_xperms_decision(&selinux_state, ssid, tsid, + tclass, driver, &local_xpd); rcu_read_lock(); avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested, driver, xperm, ssid, tsid, tclass, avd.seqno, &local_xpd, 0); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2ad151edf5a4..ac16028711de 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -98,20 +98,24 @@ #include "audit.h" #include "avc_ss.h" +struct selinux_state selinux_state; + /* SECMARK reference count */ static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); #ifdef CONFIG_SECURITY_SELINUX_DEVELOP -int selinux_enforcing; +static int selinux_enforcing_boot; static int __init enforcing_setup(char *str) { unsigned long enforcing; if (!kstrtoul(str, 0, &enforcing)) - selinux_enforcing = enforcing ? 1 : 0; + selinux_enforcing_boot = enforcing ? 1 : 0; return 1; } __setup("enforcing=", enforcing_setup); +#else +#define selinux_enforcing_boot 1 #endif #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM @@ -129,6 +133,19 @@ __setup("selinux=", selinux_enabled_setup); int selinux_enabled = 1; #endif +static unsigned int selinux_checkreqprot_boot = + CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; + +static int __init checkreqprot_setup(char *str) +{ + unsigned long checkreqprot; + + if (!kstrtoul(str, 0, &checkreqprot)) + selinux_checkreqprot_boot = checkreqprot ? 1 : 0; + return 1; +} +__setup("checkreqprot=", checkreqprot_setup); + static struct kmem_cache *sel_inode_cache; static struct kmem_cache *file_security_cache; @@ -145,7 +162,8 @@ static struct kmem_cache *file_security_cache; */ static int selinux_secmark_enabled(void) { - return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); + return (selinux_policycap_alwaysnetwork() || + atomic_read(&selinux_secmark_refcount)); } /** @@ -160,7 +178,8 @@ static int selinux_secmark_enabled(void) */ static int selinux_peerlbl_enabled(void) { - return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); + return (selinux_policycap_alwaysnetwork() || + netlbl_enabled() || selinux_xfrm_enabled()); } static int selinux_netcache_avc_callback(u32 event) @@ -264,7 +283,8 @@ static int __inode_security_revalidate(struct inode *inode, might_sleep_if(may_sleep); - if (ss_initialized && isec->initialized != LABEL_INITIALIZED) { + if (selinux_state.initialized && + isec->initialized != LABEL_INITIALIZED) { if (!may_sleep) return -ECHILD; @@ -480,7 +500,7 @@ static int selinux_is_genfs_special_handling(struct super_block *sb) !strcmp(sb->s_type->name, "debugfs") || !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "rootfs") || - (selinux_policycap_cgroupseclabel && + (selinux_policycap_cgroupseclabel() && (!strcmp(sb->s_type->name, "cgroup") || !strcmp(sb->s_type->name, "cgroup2"))); } @@ -608,7 +628,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb, if (!(sbsec->flags & SE_SBINITIALIZED)) return -EINVAL; - if (!ss_initialized) + if (!selinux_state.initialized) return -EINVAL; /* make sure we always check enough bits to cover the mask */ @@ -639,21 +659,25 @@ static int selinux_get_mnt_opts(const struct super_block *sb, i = 0; if (sbsec->flags & FSCONTEXT_MNT) { - rc = security_sid_to_context(sbsec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, sbsec->sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; } if (sbsec->flags & CONTEXT_MNT) { - rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); + rc = security_sid_to_context(&selinux_state, + sbsec->mntpoint_sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = CONTEXT_MNT; } if (sbsec->flags & DEFCONTEXT_MNT) { - rc = security_sid_to_context(sbsec->def_sid, &context, &len); + rc = security_sid_to_context(&selinux_state, sbsec->def_sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; @@ -663,7 +687,8 @@ static int selinux_get_mnt_opts(const struct super_block *sb, struct dentry *root = sbsec->sb->s_root; struct inode_security_struct *isec = backing_inode_security(root); - rc = security_sid_to_context(isec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, isec->sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; @@ -726,7 +751,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, mutex_lock(&sbsec->lock); - if (!ss_initialized) { + if (!selinux_state.initialized) { if (!num_opts) { /* Defer initialization until selinux_complete_init, after the initial policy is loaded and the security @@ -772,7 +797,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, if (flags[i] == SBLABEL_MNT) continue; - rc = security_context_str_to_sid(mount_options[i], &sid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, + mount_options[i], &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -848,7 +875,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, * Determine the labeling behavior to use for this * filesystem type. */ - rc = security_fs_use(sb); + rc = security_fs_use(&selinux_state, sb); if (rc) { printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n", @@ -873,7 +900,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, } if (sbsec->behavior == SECURITY_FS_USE_XATTR) { sbsec->behavior = SECURITY_FS_USE_MNTPOINT; - rc = security_transition_sid(current_sid(), current_sid(), + rc = security_transition_sid(&selinux_state, + current_sid(), + current_sid(), SECCLASS_FILE, NULL, &sbsec->mntpoint_sid); if (rc) @@ -1009,7 +1038,7 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, * if the parent was able to be mounted it clearly had no special lsm * mount options. thus we can safely deal with this superblock later */ - if (!ss_initialized) + if (!selinux_state.initialized) return 0; /* @@ -1039,7 +1068,7 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, if (newsbsec->behavior == SECURITY_FS_USE_NATIVE && !(kern_flags & SECURITY_LSM_NATIVE_LABELS) && !set_context) { - rc = security_fs_use(newsb); + rc = security_fs_use(&selinux_state, newsb); if (rc) goto out; } @@ -1322,7 +1351,7 @@ static inline int default_protocol_dgram(int protocol) static inline u16 socket_type_to_security_class(int family, int type, int protocol) { - int extsockclass = selinux_policycap_extsockclass; + int extsockclass = selinux_policycap_extsockclass(); switch (family) { case PF_UNIX: @@ -1496,7 +1525,8 @@ static int selinux_genfs_get_sid(struct dentry *dentry, path++; } } - rc = security_genfs_sid(sb->s_type->name, path, tclass, sid); + rc = security_genfs_sid(&selinux_state, sb->s_type->name, + path, tclass, sid); } free_page((unsigned long)buffer); return rc; @@ -1614,7 +1644,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent sid = sbsec->def_sid; rc = 0; } else { - rc = security_context_to_sid_default(context, rc, &sid, + rc = security_context_to_sid_default(&selinux_state, + context, rc, &sid, sbsec->def_sid, GFP_NOFS); if (rc) { @@ -1647,7 +1678,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent sid = sbsec->sid; /* Try to obtain a transition SID. */ - rc = security_transition_sid(task_sid, sid, sclass, NULL, &sid); + rc = security_transition_sid(&selinux_state, task_sid, sid, + sclass, NULL, &sid); if (rc) goto out; break; @@ -1908,7 +1940,8 @@ selinux_determine_inode_label(const struct task_security_struct *tsec, *_new_isid = tsec->create_sid; } else { const struct inode_security_struct *dsec = inode_security(dir); - return security_transition_sid(tsec->sid, dsec->sid, tclass, + return security_transition_sid(&selinux_state, tsec->sid, + dsec->sid, tclass, name, _new_isid); } @@ -2131,7 +2164,8 @@ static inline u32 open_file_to_av(struct file *file) u32 av = file_to_av(file); struct inode *inode = file_inode(file); - if (selinux_policycap_openperm && inode->i_sb->s_magic != SOCKFS_MAGIC) + if (selinux_policycap_openperm() && + inode->i_sb->s_magic != SOCKFS_MAGIC) av |= FILE__OPEN; return av; @@ -2376,7 +2410,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, * policy allows the corresponding permission between * the old and new contexts. */ - if (selinux_policycap_nnp_nosuid_transition) { + if (selinux_policycap_nnp_nosuid_transition()) { av = 0; if (nnp) av |= PROCESS2__NNP_TRANSITION; @@ -2393,7 +2427,8 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, * i.e. SIDs that are guaranteed to only be allowed a subset * of the permissions of the current SID. */ - rc = security_bounded_transition(old_tsec->sid, new_tsec->sid); + rc = security_bounded_transition(&selinux_state, old_tsec->sid, + new_tsec->sid); if (!rc) return 0; @@ -2445,8 +2480,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) return rc; } else { /* Check for a default transition on this program. */ - rc = security_transition_sid(old_tsec->sid, isec->sid, - SECCLASS_PROCESS, NULL, + rc = security_transition_sid(&selinux_state, old_tsec->sid, + isec->sid, SECCLASS_PROCESS, NULL, &new_tsec->sid); if (rc) return rc; @@ -2804,7 +2839,9 @@ static int selinux_sb_remount(struct super_block *sb, void *data) if (flags[i] == SBLABEL_MNT) continue; - rc = security_context_str_to_sid(mount_options[i], &sid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, + mount_options[i], &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2929,7 +2966,8 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode, if (rc) return rc; - return security_sid_to_context(newsid, (char **)ctx, ctxlen); + return security_sid_to_context(&selinux_state, newsid, (char **)ctx, + ctxlen); } static int selinux_dentry_create_files_as(struct dentry *dentry, int mode, @@ -2984,14 +3022,15 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, isec->initialized = LABEL_INITIALIZED; } - if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) + if (!selinux_state.initialized || !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (name) *name = XATTR_SELINUX_SUFFIX; if (value && len) { - rc = security_sid_to_context_force(newsid, &context, &clen); + rc = security_sid_to_context_force(&selinux_state, newsid, + &context, &clen); if (rc) return rc; *value = context; @@ -3152,7 +3191,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (selinux_policycap_openperm && + if (selinux_policycap_openperm() && inode->i_sb->s_magic != SOCKFS_MAGIC && (ia_valid & ATTR_SIZE) && !(ia_valid & ATTR_FILE)) @@ -3228,7 +3267,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); + rc = security_context_to_sid(&selinux_state, value, size, &newsid, + GFP_KERNEL); if (rc == -EINVAL) { if (!has_cap_mac_admin(true)) { struct audit_buffer *ab; @@ -3254,7 +3294,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, return rc; } - rc = security_context_to_sid_force(value, size, &newsid); + rc = security_context_to_sid_force(&selinux_state, value, + size, &newsid); } if (rc) return rc; @@ -3264,8 +3305,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - rc = security_validate_transition(isec->sid, newsid, sid, - isec->sclass); + rc = security_validate_transition(&selinux_state, isec->sid, newsid, + sid, isec->sclass); if (rc) return rc; @@ -3290,7 +3331,8 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, return; } - rc = security_context_to_sid_force(value, size, &newsid); + rc = security_context_to_sid_force(&selinux_state, value, size, + &newsid); if (rc) { printk(KERN_ERR "SELinux: unable to map context to SID" "for (%s, %lu), rc=%d\n", @@ -3358,10 +3400,12 @@ static int selinux_inode_getsecurity(struct inode *inode, const char *name, void */ isec = inode_security(inode); if (has_cap_mac_admin(false)) - error = security_sid_to_context_force(isec->sid, &context, + error = security_sid_to_context_force(&selinux_state, + isec->sid, &context, &size); else - error = security_sid_to_context(isec->sid, &context, &size); + error = security_sid_to_context(&selinux_state, isec->sid, + &context, &size); if (error) return error; error = size; @@ -3391,7 +3435,8 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name, if (!value || !size) return -EACCES; - rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); + rc = security_context_to_sid(&selinux_state, value, size, &newsid, + GFP_KERNEL); if (rc) return rc; @@ -3653,7 +3698,7 @@ static int selinux_mmap_file(struct file *file, unsigned long reqprot, return rc; } - if (selinux_checkreqprot) + if (selinux_state.checkreqprot) prot = reqprot; return file_map_prot_check(file, prot, @@ -3667,7 +3712,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, const struct cred *cred = current_cred(); u32 sid = cred_sid(cred); - if (selinux_checkreqprot) + if (selinux_state.checkreqprot) prot = reqprot; if (default_noexec && @@ -4325,7 +4370,8 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) if (unlikely(err)) return -EACCES; - err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); + err = security_net_peersid_resolve(&selinux_state, nlbl_sid, + nlbl_type, xfrm_sid, sid); if (unlikely(err)) { printk(KERN_WARNING "SELinux: failure in selinux_skb_peerlbl_sid()," @@ -4353,7 +4399,8 @@ static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid) int err = 0; if (skb_sid != SECSID_NULL) - err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid); + err = security_sid_mls_copy(&selinux_state, sk_sid, skb_sid, + conn_sid); else *conn_sid = sk_sid; @@ -4370,8 +4417,8 @@ static int socket_sockcreate_sid(const struct task_security_struct *tsec, return 0; } - return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL, - socksid); + return security_transition_sid(&selinux_state, tsec->sid, tsec->sid, + secclass, NULL, socksid); } static int sock_has_perm(struct sock *sk, u32 perms) @@ -4706,8 +4753,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, /* server child socket */ sksec_new->peer_sid = sksec_sock->sid; - err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid, - &sksec_new->sid); + err = security_sid_mls_copy(&selinux_state, sksec_other->sid, + sksec_sock->sid, &sksec_new->sid); if (err) return err; @@ -4812,7 +4859,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) * to the selinux_sock_rcv_skb_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return selinux_sock_rcv_skb_compat(sk, skb, family); secmark_active = selinux_secmark_enabled(); @@ -4873,7 +4920,8 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op if (peer_sid == SECSID_NULL) return -ENOPROTOOPT; - err = security_sid_to_context(peer_sid, &scontext, &scontext_len); + err = security_sid_to_context(&selinux_state, peer_sid, &scontext, + &scontext_len); if (err) return err; @@ -5158,7 +5206,8 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) sk->sk_protocol, nlh->nlmsg_type, secclass_map[sksec->sclass - 1].name, task_pid_nr(current), current->comm); - if (!selinux_enforcing || security_get_allow_unknown()) + if (!is_enforcing(&selinux_state) || + security_get_allow_unknown(&selinux_state)) err = 0; } @@ -5188,7 +5237,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, u8 netlbl_active; u8 peerlbl_active; - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); @@ -5357,7 +5406,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, * to the selinux_ip_postroute_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return selinux_ip_postroute_compat(skb, ifindex, family); secmark_active = selinux_secmark_enabled(); @@ -5663,8 +5712,8 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, * Compute new sid based on current process and * message queue this message will be stored in */ - rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, - NULL, &msec->sid); + rc = security_transition_sid(&selinux_state, sid, isec->sid, + SECCLASS_MSG, NULL, &msec->sid); if (rc) return rc; } @@ -5973,7 +6022,7 @@ static int selinux_getprocattr(struct task_struct *p, if (!sid) return 0; - error = security_sid_to_context(sid, value, &len); + error = security_sid_to_context(&selinux_state, sid, value, &len); if (error) return error; return len; @@ -6020,7 +6069,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) str[size-1] = 0; size--; } - error = security_context_to_sid(value, size, &sid, GFP_KERNEL); + error = security_context_to_sid(&selinux_state, value, size, + &sid, GFP_KERNEL); if (error == -EINVAL && !strcmp(name, "fscreate")) { if (!has_cap_mac_admin(true)) { struct audit_buffer *ab; @@ -6039,8 +6089,9 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) return error; } - error = security_context_to_sid_force(value, size, - &sid); + error = security_context_to_sid_force( + &selinux_state, + value, size, &sid); } if (error) return error; @@ -6077,7 +6128,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) /* Only allow single threaded processes to change context */ error = -EPERM; if (!current_is_single_threaded()) { - error = security_bounded_transition(tsec->sid, sid); + error = security_bounded_transition(&selinux_state, + tsec->sid, sid); if (error) goto abort_change; } @@ -6119,12 +6171,14 @@ static int selinux_ismaclabel(const char *name) static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { - return security_sid_to_context(secid, secdata, seclen); + return security_sid_to_context(&selinux_state, secid, + secdata, seclen); } static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { - return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); + return security_context_to_sid(&selinux_state, secdata, seclen, + secid, GFP_KERNEL); } static void selinux_release_secctx(char *secdata, u32 seclen) @@ -6229,7 +6283,8 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) unsigned len; int rc; - rc = security_sid_to_context(ksec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, ksec->sid, + &context, &len); if (!rc) rc = len; *_buffer = context; @@ -6268,7 +6323,8 @@ static int selinux_ib_endport_manage_subnet(void *ib_sec, const char *dev_name, struct ib_security_struct *sec = ib_sec; struct lsm_ibendport_audit ibendport; - err = security_ib_endport_sid(dev_name, port_num, &sid); + err = security_ib_endport_sid(&selinux_state, dev_name, port_num, + &sid); if (err) return err; @@ -6749,6 +6805,11 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); + memset(&selinux_state, 0, sizeof(selinux_state)); + set_enforcing(&selinux_state, selinux_enforcing_boot); + selinux_state.checkreqprot = selinux_checkreqprot_boot; + selinux_ss_init(&selinux_state.ss); + /* Set the security state for the initial task. */ cred_init_security(); @@ -6762,6 +6823,12 @@ static __init int selinux_init(void) 0, SLAB_PANIC, NULL); avc_init(); + avtab_cache_init(); + + ebitmap_cache_init(); + + hashtab_cache_init(); + security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks), "selinux"); if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET)) @@ -6770,7 +6837,7 @@ static __init int selinux_init(void) if (avc_add_callback(selinux_lsm_notifier_avc_callback, AVC_CALLBACK_RESET)) panic("SELinux: Unable to register AVC LSM notifier callback\n"); - if (selinux_enforcing) + if (selinux_enforcing_boot) printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n"); else printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); @@ -6891,23 +6958,22 @@ static void selinux_nf_ip_exit(void) #endif /* CONFIG_NETFILTER */ #ifdef CONFIG_SECURITY_SELINUX_DISABLE -static int selinux_disabled; - -int selinux_disable(void) +int selinux_disable(struct selinux_state *state) { - if (ss_initialized) { + if (state->initialized) { /* Not permitted after initial policy load. */ return -EINVAL; } - if (selinux_disabled) { + if (state->disabled) { /* Only do this once. */ return -EINVAL; } + state->disabled = 1; + printk(KERN_INFO "SELinux: Disabled at runtime.\n"); - selinux_disabled = 1; selinux_enabled = 0; security_delete_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks)); diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index e3614ee5f1c0..0a4b89d48297 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -152,7 +152,8 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) return 0; } - ret = security_ib_pkey_sid(subnet_prefix, pkey_num, sid); + ret = security_ib_pkey_sid(&selinux_state, subnet_prefix, pkey_num, + sid); if (ret) goto out; diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index 57d61cf36500..de33dc9034b8 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -20,12 +20,6 @@ #include "av_permissions.h" #include "security.h" -#ifdef CONFIG_SECURITY_SELINUX_DEVELOP -extern int selinux_enforcing; -#else -#define selinux_enforcing 1 -#endif - /* * An entry in the AVC. */ diff --git a/security/selinux/include/avc_ss.h b/security/selinux/include/avc_ss.h index 3bcc72769b87..4e2a44d0ae66 100644 --- a/security/selinux/include/avc_ss.h +++ b/security/selinux/include/avc_ss.h @@ -19,11 +19,5 @@ struct security_class_mapping { extern struct security_class_mapping secclass_map[]; -/* - * The security server must be initialized before - * any labeling or access decisions can be provided. - */ -extern int ss_initialized; - #endif /* _SELINUX_AVC_SS_H_ */ diff --git a/security/selinux/include/conditional.h b/security/selinux/include/conditional.h index ff4fddca9050..0e30eca02c48 100644 --- a/security/selinux/include/conditional.h +++ b/security/selinux/include/conditional.h @@ -13,10 +13,15 @@ #ifndef _SELINUX_CONDITIONAL_H_ #define _SELINUX_CONDITIONAL_H_ -int security_get_bools(int *len, char ***names, int **values); +#include "security.h" -int security_set_bools(int len, int *values); +int security_get_bools(struct selinux_state *state, + int *len, char ***names, int **values); -int security_get_bool_value(int index); +int security_set_bools(struct selinux_state *state, + int len, int *values); + +int security_get_bool_value(struct selinux_state *state, + int index); #endif diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index e482f6f457f1..512908b55ca3 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -158,6 +158,4 @@ struct perf_event_security_struct { u32 sid; /* SID of perf_event obj creator */ }; -extern unsigned int selinux_checkreqprot; - #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 02f0412d42f2..c3a1ef10e710 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "flask.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ @@ -81,13 +83,6 @@ enum { extern char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; -extern int selinux_policycap_netpeer; -extern int selinux_policycap_openperm; -extern int selinux_policycap_extsockclass; -extern int selinux_policycap_alwaysnetwork; -extern int selinux_policycap_cgroupseclabel; -extern int selinux_policycap_nnp_nosuid_transition; - /* * type_datum properties * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY @@ -98,13 +93,95 @@ extern int selinux_policycap_nnp_nosuid_transition; /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 -int security_mls_enabled(void); +struct selinux_ss; -int security_load_policy(void *data, size_t len); -int security_read_policy(void **data, size_t *len); -size_t security_policydb_len(void); +struct selinux_state { + bool disabled; +#ifdef CONFIG_SECURITY_SELINUX_DEVELOP + bool enforcing; +#endif + bool checkreqprot; + bool initialized; + bool policycap[__POLICYDB_CAPABILITY_MAX]; + struct selinux_ss *ss; +}; -int security_policycap_supported(unsigned int req_cap); +void selinux_ss_init(struct selinux_ss **ss); + +extern struct selinux_state selinux_state; + +#ifdef CONFIG_SECURITY_SELINUX_DEVELOP +static inline bool is_enforcing(struct selinux_state *state) +{ + return state->enforcing; +} + +static inline void set_enforcing(struct selinux_state *state, bool value) +{ + state->enforcing = value; +} +#else +static inline bool is_enforcing(struct selinux_state *state) +{ + return true; +} + +static inline void set_enforcing(struct selinux_state *state, bool value) +{ +} +#endif + +static inline bool selinux_policycap_netpeer(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_NETPEER]; +} + +static inline bool selinux_policycap_openperm(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_OPENPERM]; +} + +static inline bool selinux_policycap_extsockclass(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]; +} + +static inline bool selinux_policycap_alwaysnetwork(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]; +} + +static inline bool selinux_policycap_cgroupseclabel(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]; +} + +static inline bool selinux_policycap_nnp_nosuid_transition(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]; +} + +int security_mls_enabled(struct selinux_state *state); +int security_load_policy(struct selinux_state *state, + void *data, size_t len); +int security_read_policy(struct selinux_state *state, + void **data, size_t *len); +size_t security_policydb_len(struct selinux_state *state); + +int security_policycap_supported(struct selinux_state *state, + unsigned int req_cap); #define SEL_VEC_MAX 32 struct av_decision { @@ -141,76 +218,100 @@ struct extended_perms { /* definitions of av_decision.flags */ #define AVD_FLAGS_PERMISSIVE 0x0001 -void security_compute_av(u32 ssid, u32 tsid, +void security_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd, struct extended_perms *xperms); -void security_compute_xperms_decision(u32 ssid, u32 tsid, u16 tclass, - u8 driver, struct extended_perms_decision *xpermd); +void security_compute_xperms_decision(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u8 driver, + struct extended_perms_decision *xpermd); -void security_compute_av_user(u32 ssid, u32 tsid, - u16 tclass, struct av_decision *avd); +void security_compute_av_user(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, struct av_decision *avd); -int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid); -int security_member_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_member_sid(struct selinux_state *state, u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); -int security_change_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_change_sid(struct selinux_state *state, u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); -int security_sid_to_context(u32 sid, char **scontext, - u32 *scontext_len); +int security_sid_to_context(struct selinux_state *state, u32 sid, + char **scontext, u32 *scontext_len); -int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len); +int security_sid_to_context_force(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len); -int security_context_to_sid(const char *scontext, u32 scontext_len, +int security_context_to_sid(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *out_sid, gfp_t gfp); -int security_context_str_to_sid(const char *scontext, u32 *out_sid, gfp_t gfp); +int security_context_str_to_sid(struct selinux_state *state, + const char *scontext, u32 *out_sid, gfp_t gfp); -int security_context_to_sid_default(const char *scontext, u32 scontext_len, +int security_context_to_sid_default(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); -int security_context_to_sid_force(const char *scontext, u32 scontext_len, +int security_context_to_sid_force(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid); -int security_get_user_sids(u32 callsid, char *username, +int security_get_user_sids(struct selinux_state *state, + u32 callsid, char *username, u32 **sids, u32 *nel); -int security_port_sid(u8 protocol, u16 port, u32 *out_sid); +int security_port_sid(struct selinux_state *state, + u8 protocol, u16 port, u32 *out_sid); -int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid); +int security_ib_pkey_sid(struct selinux_state *state, + u64 subnet_prefix, u16 pkey_num, u32 *out_sid); -int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid); +int security_ib_endport_sid(struct selinux_state *state, + const char *dev_name, u8 port_num, u32 *out_sid); -int security_netif_sid(char *name, u32 *if_sid); +int security_netif_sid(struct selinux_state *state, + char *name, u32 *if_sid); -int security_node_sid(u16 domain, void *addr, u32 addrlen, - u32 *out_sid); +int security_node_sid(struct selinux_state *state, + u16 domain, void *addr, u32 addrlen, + u32 *out_sid); -int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); -int security_validate_transition_user(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition_user(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); -int security_bounded_transition(u32 oldsid, u32 newsid); +int security_bounded_transition(struct selinux_state *state, + u32 oldsid, u32 newsid); -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); +int security_sid_mls_copy(struct selinux_state *state, + u32 sid, u32 mls_sid, u32 *new_sid); -int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, +int security_net_peersid_resolve(struct selinux_state *state, + u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid); -int security_get_classes(char ***classes, int *nclasses); -int security_get_permissions(char *class, char ***perms, int *nperms); -int security_get_reject_unknown(void); -int security_get_allow_unknown(void); +int security_get_classes(struct selinux_state *state, + char ***classes, int *nclasses); +int security_get_permissions(struct selinux_state *state, + char *class, char ***perms, int *nperms); +int security_get_reject_unknown(struct selinux_state *state); +int security_get_allow_unknown(struct selinux_state *state); #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ @@ -221,27 +322,31 @@ int security_get_allow_unknown(void); #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ -int security_fs_use(struct super_block *sb); +int security_fs_use(struct selinux_state *state, struct super_block *sb); -int security_genfs_sid(const char *fstype, char *name, u16 sclass, - u32 *sid); +int security_genfs_sid(struct selinux_state *state, + const char *fstype, char *name, u16 sclass, + u32 *sid); #ifdef CONFIG_NETLABEL -int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, +int security_netlbl_secattr_to_sid(struct selinux_state *state, + struct netlbl_lsm_secattr *secattr, u32 *sid); -int security_netlbl_sid_to_secattr(u32 sid, +int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, struct netlbl_lsm_secattr *secattr); #else -static inline int security_netlbl_secattr_to_sid( +static inline int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid) { return -EIDRM; } -static inline int security_netlbl_sid_to_secattr(u32 sid, - struct netlbl_lsm_secattr *secattr) +static inline int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, + struct netlbl_lsm_secattr *secattr) { return -ENOENT; } @@ -252,7 +357,7 @@ const char *security_get_initial_sid_context(u32 sid); /* * status notifier using mmap interface */ -extern struct page *selinux_kernel_status_page(void); +extern struct page *selinux_kernel_status_page(struct selinux_state *state); #define SELINUX_KERNEL_STATUS_VERSION 1 struct selinux_kernel_status { @@ -266,10 +371,12 @@ struct selinux_kernel_status { */ } __packed; -extern void selinux_status_update_setenforce(int enforcing); -extern void selinux_status_update_policyload(int seqno); +extern void selinux_status_update_setenforce(struct selinux_state *state, + int enforcing); +extern void selinux_status_update_policyload(struct selinux_state *state, + int seqno); extern void selinux_complete_init(void); -extern int selinux_disable(void); +extern int selinux_disable(struct selinux_state *state); extern void exit_sel_fs(void); extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; @@ -277,5 +384,8 @@ extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); -#endif /* _SELINUX_SECURITY_H_ */ +extern void avtab_cache_init(void); +extern void ebitmap_cache_init(void); +extern void hashtab_cache_init(void); +#endif /* _SELINUX_SECURITY_H_ */ diff --git a/security/selinux/netif.c b/security/selinux/netif.c index e607b4473ef6..ac65f7417413 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -163,7 +163,7 @@ static int sel_netif_sid_slow(struct net *ns, int ifindex, u32 *sid) ret = -ENOMEM; goto out; } - ret = security_netif_sid(dev->name, &new->nsec.sid); + ret = security_netif_sid(&selinux_state, dev->name, &new->nsec.sid); if (ret != 0) goto out; new->nsec.ns = ns; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index aaba6677ee2e..00544f74d6ec 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -60,7 +60,7 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb, { int rc; - rc = security_netlbl_secattr_to_sid(secattr, sid); + rc = security_netlbl_secattr_to_sid(&selinux_state, secattr, sid); if (rc == 0 && (secattr->flags & NETLBL_SECATTR_CACHEABLE) && (secattr->flags & NETLBL_SECATTR_CACHE)) @@ -91,7 +91,8 @@ static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk) secattr = netlbl_secattr_alloc(GFP_ATOMIC); if (secattr == NULL) return NULL; - rc = security_netlbl_sid_to_secattr(sksec->sid, secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, sksec->sid, + secattr); if (rc != 0) { netlbl_secattr_free(secattr); return NULL; @@ -257,7 +258,8 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, if (secattr == NULL) { secattr = &secattr_storage; netlbl_secattr_init(secattr); - rc = security_netlbl_sid_to_secattr(sid, secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, sid, + secattr); if (rc != 0) goto skbuff_setsid_return; } @@ -290,7 +292,8 @@ int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family) return 0; netlbl_secattr_init(&secattr); - rc = security_netlbl_sid_to_secattr(req->secid, &secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, req->secid, + &secattr); if (rc != 0) goto inet_conn_request_return; rc = netlbl_req_setattr(req, &secattr); diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index da923f89d2a9..6dd89b89bc1f 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -215,12 +215,12 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid) goto out; switch (family) { case PF_INET: - ret = security_node_sid(PF_INET, + ret = security_node_sid(&selinux_state, PF_INET, addr, sizeof(struct in_addr), sid); new->nsec.addr.ipv4 = *(__be32 *)addr; break; case PF_INET6: - ret = security_node_sid(PF_INET6, + ret = security_node_sid(&selinux_state, PF_INET6, addr, sizeof(struct in6_addr), sid); new->nsec.addr.ipv6 = *(struct in6_addr *)addr; break; diff --git a/security/selinux/netport.c b/security/selinux/netport.c index 3311cc393cb4..9ed4c5064a5e 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -161,7 +161,7 @@ static int sel_netport_sid_slow(u8 protocol, u16 pnum, u32 *sid) new = kzalloc(sizeof(*new), GFP_ATOMIC); if (new == NULL) goto out; - ret = security_port_sid(protocol, pnum, sid); + ret = security_port_sid(&selinux_state, protocol, pnum, sid); if (ret != 0) goto out; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 00eed842c491..98492755adbf 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -41,17 +41,6 @@ #include "objsec.h" #include "conditional.h" -unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; - -static int __init checkreqprot_setup(char *str) -{ - unsigned long checkreqprot; - if (!kstrtoul(str, 0, &checkreqprot)) - selinux_checkreqprot = checkreqprot ? 1 : 0; - return 1; -} -__setup("checkreqprot=", checkreqprot_setup); - static DEFINE_MUTEX(sel_mutex); /* global data for booleans */ @@ -108,7 +97,8 @@ static ssize_t sel_read_enforce(struct file *filp, char __user *buf, char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%d", selinux_enforcing); + length = scnprintf(tmpbuf, TMPBUFLEN, "%d", + is_enforcing(&selinux_state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -119,7 +109,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, { char *page = NULL; ssize_t length; - int new_value; + int old_value, new_value; if (count >= PAGE_SIZE) return -ENOMEM; @@ -138,7 +128,9 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value = !!new_value; - if (new_value != selinux_enforcing) { + old_value = is_enforcing(&selinux_state); + + if (new_value != old_value) { length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETENFORCE, NULL); @@ -146,15 +138,16 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS, "enforcing=%d old_enforcing=%d auid=%u ses=%u", - new_value, selinux_enforcing, + new_value, old_value, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); - selinux_enforcing = new_value; - if (selinux_enforcing) + set_enforcing(&selinux_state, new_value); + if (new_value) avc_ss_reset(0); - selnl_notify_setenforce(selinux_enforcing); - selinux_status_update_setenforce(selinux_enforcing); - if (!selinux_enforcing) + selnl_notify_setenforce(new_value); + selinux_status_update_setenforce(&selinux_state, + new_value); + if (!new_value) call_lsm_notifier(LSM_POLICY_CHANGE, NULL); } length = count; @@ -179,7 +172,8 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf, ssize_t length; ino_t ino = file_inode(filp)->i_ino; int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ? - security_get_reject_unknown() : !security_get_allow_unknown(); + security_get_reject_unknown(&selinux_state) : + !security_get_allow_unknown(&selinux_state); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", handle_unknown); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -192,7 +186,7 @@ static const struct file_operations sel_handle_unknown_ops = { static int sel_open_handle_status(struct inode *inode, struct file *filp) { - struct page *status = selinux_kernel_status_page(); + struct page *status = selinux_kernel_status_page(&selinux_state); if (!status) return -ENOMEM; @@ -268,7 +262,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, goto out; if (new_value) { - length = selinux_disable(); + length = selinux_disable(&selinux_state); if (length) goto out; audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS, @@ -322,7 +316,7 @@ static ssize_t sel_read_mls(struct file *filp, char __user *buf, ssize_t length; length = scnprintf(tmpbuf, TMPBUFLEN, "%d", - security_mls_enabled()); + security_mls_enabled(&selinux_state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -359,13 +353,13 @@ static int sel_open_policy(struct inode *inode, struct file *filp) if (!plm) goto err; - if (i_size_read(inode) != security_policydb_len()) { + if (i_size_read(inode) != security_policydb_len(&selinux_state)) { inode_lock(inode); - i_size_write(inode, security_policydb_len()); + i_size_write(inode, security_policydb_len(&selinux_state)); inode_unlock(inode); } - rc = security_read_policy(&plm->data, &plm->len); + rc = security_read_policy(&selinux_state, &plm->data, &plm->len); if (rc) goto err; @@ -500,7 +494,7 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (copy_from_user(data, buf, count) != 0) goto out; - length = security_load_policy(data, count); + length = security_load_policy(&selinux_state, data, count); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); goto out; @@ -553,11 +547,12 @@ static ssize_t sel_write_context(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_context_to_sid(buf, size, &sid, GFP_KERNEL); + length = security_context_to_sid(&selinux_state, buf, size, + &sid, GFP_KERNEL); if (length) goto out; - length = security_sid_to_context(sid, &canon, &len); + length = security_sid_to_context(&selinux_state, sid, &canon, &len); if (length) goto out; @@ -581,7 +576,7 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%u", selinux_checkreqprot); + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", selinux_state.checkreqprot); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -613,7 +608,7 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, if (sscanf(page, "%u", &new_value) != 1) goto out; - selinux_checkreqprot = new_value ? 1 : 0; + selinux_state.checkreqprot = new_value ? 1 : 0; length = count; out: kfree(page); @@ -673,19 +668,23 @@ static ssize_t sel_write_validatetrans(struct file *file, if (sscanf(req, "%s %s %hu %s", oldcon, newcon, &tclass, taskcon) != 4) goto out; - rc = security_context_str_to_sid(oldcon, &osid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, oldcon, &osid, + GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(newcon, &nsid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, newcon, &nsid, + GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(taskcon, &tsid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, taskcon, &tsid, + GFP_KERNEL); if (rc) goto out; - rc = security_validate_transition_user(osid, nsid, tsid, tclass); + rc = security_validate_transition_user(&selinux_state, osid, nsid, + tsid, tclass); if (!rc) rc = count; out: @@ -780,15 +779,17 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, scon, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, tcon, &tsid, + GFP_KERNEL); if (length) goto out; - security_compute_av_user(ssid, tsid, tclass, &avd); + security_compute_av_user(&selinux_state, ssid, tsid, tclass, &avd); length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%x %x %x %x %u %x", @@ -868,20 +869,23 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) objname = namebuf; } - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, scon, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, tcon, &tsid, + GFP_KERNEL); if (length) goto out; - length = security_transition_sid_user(ssid, tsid, tclass, - objname, &newsid); + length = security_transition_sid_user(&selinux_state, ssid, tsid, + tclass, objname, &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(&selinux_state, newsid, &newcon, + &len); if (length) goto out; @@ -931,19 +935,23 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, scon, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, tcon, &tsid, + GFP_KERNEL); if (length) goto out; - length = security_change_sid(ssid, tsid, tclass, &newsid); + length = security_change_sid(&selinux_state, ssid, tsid, tclass, + &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(&selinux_state, newsid, &newcon, + &len); if (length) goto out; @@ -989,18 +997,21 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s", con, user) != 2) goto out; - length = security_context_str_to_sid(con, &sid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, con, &sid, + GFP_KERNEL); if (length) goto out; - length = security_get_user_sids(sid, user, &sids, &nsids); + length = security_get_user_sids(&selinux_state, sid, user, &sids, + &nsids); if (length) goto out; length = sprintf(buf, "%u", nsids) + 1; ptr = buf + length; for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(sids[i], &newcon, &len); + rc = security_sid_to_context(&selinux_state, sids[i], + &newcon, &len); if (rc) { length = rc; goto out; @@ -1051,19 +1062,23 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, scon, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(&selinux_state, tcon, &tsid, + GFP_KERNEL); if (length) goto out; - length = security_member_sid(ssid, tsid, tclass, &newsid); + length = security_member_sid(&selinux_state, ssid, tsid, tclass, + &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(&selinux_state, newsid, &newcon, + &len); if (length) goto out; @@ -1115,7 +1130,7 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, if (!page) goto out; - cur_enforcing = security_get_bool_value(index); + cur_enforcing = security_get_bool_value(&selinux_state, index); if (cur_enforcing < 0) { ret = cur_enforcing; goto out; @@ -1226,7 +1241,8 @@ static ssize_t sel_commit_bools_write(struct file *filep, length = 0; if (new_value && bool_pending_values) - length = security_set_bools(bool_num, bool_pending_values); + length = security_set_bools(&selinux_state, bool_num, + bool_pending_values); if (!length) length = count; @@ -1279,7 +1295,7 @@ static int sel_make_bools(void) if (!page) goto out; - ret = security_get_bools(&num, &names, &values); + ret = security_get_bools(&selinux_state, &num, &names, &values); if (ret) goto out; @@ -1300,7 +1316,8 @@ static int sel_make_bools(void) goto out; isec = (struct inode_security_struct *)inode->i_security; - ret = security_genfs_sid("selinuxfs", page, SECCLASS_FILE, &sid); + ret = security_genfs_sid(&selinux_state, "selinuxfs", page, + SECCLASS_FILE, &sid); if (ret) { pr_warn_ratelimited("SELinux: no sid found, defaulting to security isid for %s\n", page); @@ -1524,7 +1541,7 @@ static ssize_t sel_read_initcon(struct file *file, char __user *buf, ssize_t ret; sid = file_inode(file)->i_ino&SEL_INO_MASK; - ret = security_sid_to_context(sid, &con, &len); + ret = security_sid_to_context(&selinux_state, sid, &con, &len); if (ret) return ret; @@ -1617,7 +1634,8 @@ static ssize_t sel_read_policycap(struct file *file, char __user *buf, ssize_t length; unsigned long i_ino = file_inode(file)->i_ino; - value = security_policycap_supported(i_ino & SEL_INO_MASK); + value = security_policycap_supported(&selinux_state, + i_ino & SEL_INO_MASK); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -1634,7 +1652,8 @@ static int sel_make_perm_files(char *objclass, int classvalue, int i, rc, nperms; char **perms; - rc = security_get_permissions(objclass, &perms, &nperms); + rc = security_get_permissions(&selinux_state, objclass, &perms, + &nperms); if (rc) return rc; @@ -1701,7 +1720,7 @@ static int sel_make_classes(void) /* delete any existing entries */ sel_remove_entries(class_dir); - rc = security_get_classes(&classes, &nclasses); + rc = security_get_classes(&selinux_state, &classes, &nclasses); if (rc) return rc; diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 2c3c7d010d8a..a2c9148b0662 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -655,7 +655,8 @@ int avtab_write(struct policydb *p, struct avtab *a, void *fp) return rc; } -void avtab_cache_init(void) + +void __init avtab_cache_init(void) { avtab_node_cachep = kmem_cache_create("avtab_node", sizeof(struct avtab_node), @@ -664,9 +665,3 @@ void avtab_cache_init(void) sizeof(struct avtab_extended_perms), 0, SLAB_PANIC, NULL); } - -void avtab_cache_destroy(void) -{ - kmem_cache_destroy(avtab_node_cachep); - kmem_cache_destroy(avtab_xperms_cachep); -} diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h index 725853cadc42..0d652fad5319 100644 --- a/security/selinux/ss/avtab.h +++ b/security/selinux/ss/avtab.h @@ -114,9 +114,6 @@ struct avtab_node *avtab_search_node(struct avtab *h, struct avtab_key *key); struct avtab_node *avtab_search_node_next(struct avtab_node *node, int specified); -void avtab_cache_init(void); -void avtab_cache_destroy(void); - #define MAX_AVTAB_HASH_BITS 16 #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index b6a78b09235c..5ae8c61b75bf 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -523,14 +523,9 @@ int ebitmap_write(struct ebitmap *e, void *fp) return 0; } -void ebitmap_cache_init(void) +void __init ebitmap_cache_init(void) { ebitmap_node_cachep = kmem_cache_create("ebitmap_node", sizeof(struct ebitmap_node), 0, SLAB_PANIC, NULL); } - -void ebitmap_cache_destroy(void) -{ - kmem_cache_destroy(ebitmap_node_cachep); -} diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index edf4fa39c60a..6aa7cf6a2197 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -131,9 +131,6 @@ void ebitmap_destroy(struct ebitmap *e); int ebitmap_read(struct ebitmap *e, void *fp); int ebitmap_write(struct ebitmap *e, void *fp); -void ebitmap_cache_init(void); -void ebitmap_cache_destroy(void); - #ifdef CONFIG_NETLABEL int ebitmap_netlbl_export(struct ebitmap *ebmap, struct netlbl_lsm_catmap **catmap); diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index aeb990f52f54..836aa5b7d29b 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -169,14 +169,10 @@ void hashtab_stat(struct hashtab *h, struct hashtab_info *info) info->slots_used = slots_used; info->max_chain_len = max_chain_len; } -void hashtab_cache_init(void) + +void __init hashtab_cache_init(void) { hashtab_node_cachep = kmem_cache_create("hashtab_node", sizeof(struct hashtab_node), 0, SLAB_PANIC, NULL); } - -void hashtab_cache_destroy(void) -{ - kmem_cache_destroy(hashtab_node_cachep); -} diff --git a/security/selinux/ss/hashtab.h b/security/selinux/ss/hashtab.h index 6183ee2a2e7a..3e3e42bfd150 100644 --- a/security/selinux/ss/hashtab.h +++ b/security/selinux/ss/hashtab.h @@ -85,8 +85,4 @@ int hashtab_map(struct hashtab *h, /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); -/* Use kmem_cache for hashtab_node */ -void hashtab_cache_init(void); -void hashtab_cache_destroy(void); - #endif /* _SS_HASHTAB_H */ diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index ad982ce8bfa4..39475fb455bc 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -33,20 +33,20 @@ * Return the length in bytes for the MLS fields of the * security context string representation of `context'. */ -int mls_compute_context_len(struct context *context) +int mls_compute_context_len(struct policydb *p, struct context *context) { int i, l, len, head, prev; char *nm; struct ebitmap *e; struct ebitmap_node *node; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; len = 1; /* for the beginning ":" */ for (l = 0; l < 2; l++) { int index_sens = context->range.level[l].sens; - len += strlen(sym_name(&policydb, SYM_LEVELS, index_sens - 1)); + len += strlen(sym_name(p, SYM_LEVELS, index_sens - 1)); /* categories */ head = -2; @@ -56,17 +56,17 @@ int mls_compute_context_len(struct context *context) if (i - prev > 1) { /* one or more negative bits are skipped */ if (head != prev) { - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); len += strlen(nm) + 1; } - nm = sym_name(&policydb, SYM_CATS, i); + nm = sym_name(p, SYM_CATS, i); len += strlen(nm) + 1; head = i; } prev = i; } if (prev != head) { - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); len += strlen(nm) + 1; } if (l == 0) { @@ -86,7 +86,8 @@ int mls_compute_context_len(struct context *context) * the MLS fields of `context' into the string `*scontext'. * Update `*scontext' to point to the end of the MLS fields. */ -void mls_sid_to_context(struct context *context, +void mls_sid_to_context(struct policydb *p, + struct context *context, char **scontext) { char *scontextp, *nm; @@ -94,7 +95,7 @@ void mls_sid_to_context(struct context *context, struct ebitmap *e; struct ebitmap_node *node; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; scontextp = *scontext; @@ -103,7 +104,7 @@ void mls_sid_to_context(struct context *context, scontextp++; for (l = 0; l < 2; l++) { - strcpy(scontextp, sym_name(&policydb, SYM_LEVELS, + strcpy(scontextp, sym_name(p, SYM_LEVELS, context->range.level[l].sens - 1)); scontextp += strlen(scontextp); @@ -119,7 +120,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = '.'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); strcpy(scontextp, nm); scontextp += strlen(nm); } @@ -127,7 +128,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = ':'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, i); + nm = sym_name(p, SYM_CATS, i); strcpy(scontextp, nm); scontextp += strlen(nm); head = i; @@ -140,7 +141,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = '.'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); strcpy(scontextp, nm); scontextp += strlen(nm); } @@ -375,12 +376,13 @@ out: * the string `str'. This function will allocate temporary memory with the * given constraints of gfp_mask. */ -int mls_from_string(char *str, struct context *context, gfp_t gfp_mask) +int mls_from_string(struct policydb *p, char *str, struct context *context, + gfp_t gfp_mask) { char *tmpstr, *freestr; int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return -EINVAL; /* we need freestr because mls_context_to_sid will change @@ -389,7 +391,7 @@ int mls_from_string(char *str, struct context *context, gfp_t gfp_mask) if (!tmpstr) { rc = -ENOMEM; } else { - rc = mls_context_to_sid(&policydb, ':', &tmpstr, context, + rc = mls_context_to_sid(p, ':', &tmpstr, context, NULL, SECSID_NULL); kfree(freestr); } @@ -417,10 +419,11 @@ int mls_range_set(struct context *context, return rc; } -int mls_setup_user_range(struct context *fromcon, struct user_datum *user, +int mls_setup_user_range(struct policydb *p, + struct context *fromcon, struct user_datum *user, struct context *usercon) { - if (policydb.mls_enabled) { + if (p->mls_enabled) { struct mls_level *fromcon_sen = &(fromcon->range.level[0]); struct mls_level *fromcon_clr = &(fromcon->range.level[1]); struct mls_level *user_low = &(user->range.level[0]); @@ -470,7 +473,7 @@ int mls_convert_context(struct policydb *oldp, struct ebitmap_node *node; int l, i; - if (!policydb.mls_enabled) + if (!oldp->mls_enabled || !newp->mls_enabled) return 0; for (l = 0; l < 2; l++) { @@ -503,7 +506,8 @@ int mls_convert_context(struct policydb *oldp, return 0; } -int mls_compute_sid(struct context *scontext, +int mls_compute_sid(struct policydb *p, + struct context *scontext, struct context *tcontext, u16 tclass, u32 specified, @@ -515,7 +519,7 @@ int mls_compute_sid(struct context *scontext, struct class_datum *cladatum; int default_range = 0; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; switch (specified) { @@ -524,12 +528,12 @@ int mls_compute_sid(struct context *scontext, rtr.source_type = scontext->type; rtr.target_type = tcontext->type; rtr.target_class = tclass; - r = hashtab_search(policydb.range_tr, &rtr); + r = hashtab_search(p->range_tr, &rtr); if (r) return mls_range_set(newcontext, r); - if (tclass && tclass <= policydb.p_classes.nprim) { - cladatum = policydb.class_val_to_struct[tclass - 1]; + if (tclass && tclass <= p->p_classes.nprim) { + cladatum = p->class_val_to_struct[tclass - 1]; if (cladatum) default_range = cladatum->default_range; } @@ -551,7 +555,7 @@ int mls_compute_sid(struct context *scontext, /* Fallthrough */ case AVTAB_CHANGE: - if ((tclass == policydb.process_class) || (sock == true)) + if ((tclass == p->process_class) || (sock == true)) /* Use the process MLS attributes. */ return mls_context_cpy(newcontext, scontext); else @@ -577,10 +581,11 @@ int mls_compute_sid(struct context *scontext, * NetLabel MLS sensitivity level field. * */ -void mls_export_netlbl_lvl(struct context *context, +void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; secattr->attr.mls.lvl = context->range.level[0].sens - 1; @@ -597,10 +602,11 @@ void mls_export_netlbl_lvl(struct context *context, * NetLabel MLS sensitivity level into the context. * */ -void mls_import_netlbl_lvl(struct context *context, +void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; context->range.level[0].sens = secattr->attr.mls.lvl + 1; @@ -617,12 +623,13 @@ void mls_import_netlbl_lvl(struct context *context, * MLS category field. Returns zero on success, negative values on failure. * */ -int mls_export_netlbl_cat(struct context *context, +int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; rc = ebitmap_netlbl_export(&context->range.level[0].cat, @@ -645,12 +652,13 @@ int mls_export_netlbl_cat(struct context *context, * negative values on failure. * */ -int mls_import_netlbl_cat(struct context *context, +int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; rc = ebitmap_netlbl_import(&context->range.level[0].cat, diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 131d76266ea5..9a3ff7af70ad 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -25,8 +25,9 @@ #include "context.h" #include "policydb.h" -int mls_compute_context_len(struct context *context); -void mls_sid_to_context(struct context *context, char **scontext); +int mls_compute_context_len(struct policydb *p, struct context *context); +void mls_sid_to_context(struct policydb *p, struct context *context, + char **scontext); int mls_context_isvalid(struct policydb *p, struct context *c); int mls_range_isvalid(struct policydb *p, struct mls_range *r); int mls_level_isvalid(struct policydb *p, struct mls_level *l); @@ -38,7 +39,8 @@ int mls_context_to_sid(struct policydb *p, struct sidtab *s, u32 def_sid); -int mls_from_string(char *str, struct context *context, gfp_t gfp_mask); +int mls_from_string(struct policydb *p, char *str, struct context *context, + gfp_t gfp_mask); int mls_range_set(struct context *context, struct mls_range *range); @@ -46,42 +48,52 @@ int mls_convert_context(struct policydb *oldp, struct policydb *newp, struct context *context); -int mls_compute_sid(struct context *scontext, +int mls_compute_sid(struct policydb *p, + struct context *scontext, struct context *tcontext, u16 tclass, u32 specified, struct context *newcontext, bool sock); -int mls_setup_user_range(struct context *fromcon, struct user_datum *user, +int mls_setup_user_range(struct policydb *p, + struct context *fromcon, struct user_datum *user, struct context *usercon); #ifdef CONFIG_NETLABEL -void mls_export_netlbl_lvl(struct context *context, +void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -void mls_import_netlbl_lvl(struct context *context, +void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -int mls_export_netlbl_cat(struct context *context, +int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -int mls_import_netlbl_cat(struct context *context, +int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); #else -static inline void mls_export_netlbl_lvl(struct context *context, +static inline void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return; } -static inline void mls_import_netlbl_lvl(struct context *context, +static inline void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return; } -static inline int mls_export_netlbl_cat(struct context *context, +static inline int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return -ENOMEM; } -static inline int mls_import_netlbl_cat(struct context *context, +static inline int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return -ENOMEM; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 1dde563aff1d..1989ccab19cc 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -80,53 +80,32 @@ char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "nnp_nosuid_transition" }; -int selinux_policycap_netpeer; -int selinux_policycap_openperm; -int selinux_policycap_extsockclass; -int selinux_policycap_alwaysnetwork; -int selinux_policycap_cgroupseclabel; -int selinux_policycap_nnp_nosuid_transition; +static struct selinux_ss selinux_ss; -static DEFINE_RWLOCK(policy_rwlock); - -static struct sidtab sidtab; -struct policydb policydb; -int ss_initialized; - -/* - * The largest sequence number that has been used when - * providing an access decision to the access vector cache. - * The sequence number only changes when a policy change - * occurs. - */ -static u32 latest_granting; +void selinux_ss_init(struct selinux_ss **ss) +{ + rwlock_init(&selinux_ss.policy_rwlock); + mutex_init(&selinux_ss.status_lock); + *ss = &selinux_ss; +} /* Forward declaration. */ -static int context_struct_to_string(struct context *context, char **scontext, +static int context_struct_to_string(struct policydb *policydb, + struct context *context, + char **scontext, u32 *scontext_len); -static void context_struct_compute_av(struct context *scontext, - struct context *tcontext, - u16 tclass, - struct av_decision *avd, - struct extended_perms *xperms); - -struct selinux_mapping { - u16 value; /* policy value */ - unsigned num_perms; - u32 perms[sizeof(u32) * 8]; -}; - -static struct selinux_mapping *current_mapping; -static u16 current_mapping_size; +static void context_struct_compute_av(struct policydb *policydb, + struct context *scontext, + struct context *tcontext, + u16 tclass, + struct av_decision *avd, + struct extended_perms *xperms); static int selinux_set_mapping(struct policydb *pol, struct security_class_mapping *map, - struct selinux_mapping **out_map_p, - u16 *out_map_size) + struct selinux_map *out_map) { - struct selinux_mapping *out_map = NULL; - size_t size = sizeof(struct selinux_mapping); u16 i, j; unsigned k; bool print_unknown_handle = false; @@ -139,15 +118,15 @@ static int selinux_set_mapping(struct policydb *pol, i++; /* Allocate space for the class records, plus one for class zero */ - out_map = kcalloc(++i, size, GFP_ATOMIC); - if (!out_map) + out_map->mapping = kcalloc(++i, sizeof(*out_map->mapping), GFP_ATOMIC); + if (!out_map->mapping) return -ENOMEM; /* Store the raw class and permission values */ j = 0; while (map[j].name) { struct security_class_mapping *p_in = map + (j++); - struct selinux_mapping *p_out = out_map + j; + struct selinux_mapping *p_out = out_map->mapping + j; /* An empty class string skips ahead */ if (!strcmp(p_in->name, "")) { @@ -194,11 +173,11 @@ static int selinux_set_mapping(struct policydb *pol, printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", pol->allow_unknown ? "allowed" : "denied"); - *out_map_p = out_map; - *out_map_size = i; + out_map->size = i; return 0; err: - kfree(out_map); + kfree(out_map->mapping); + out_map->mapping = NULL; return -EINVAL; } @@ -206,10 +185,10 @@ err: * Get real, policy values from mapped values */ -static u16 unmap_class(u16 tclass) +static u16 unmap_class(struct selinux_map *map, u16 tclass) { - if (tclass < current_mapping_size) - return current_mapping[tclass].value; + if (tclass < map->size) + return map->mapping[tclass].value; return tclass; } @@ -217,42 +196,44 @@ static u16 unmap_class(u16 tclass) /* * Get kernel value for class from its policy value */ -static u16 map_class(u16 pol_value) +static u16 map_class(struct selinux_map *map, u16 pol_value) { u16 i; - for (i = 1; i < current_mapping_size; i++) { - if (current_mapping[i].value == pol_value) + for (i = 1; i < map->size; i++) { + if (map->mapping[i].value == pol_value) return i; } return SECCLASS_NULL; } -static void map_decision(u16 tclass, struct av_decision *avd, +static void map_decision(struct selinux_map *map, + u16 tclass, struct av_decision *avd, int allow_unknown) { - if (tclass < current_mapping_size) { - unsigned i, n = current_mapping[tclass].num_perms; + if (tclass < map->size) { + struct selinux_mapping *mapping = &map->mapping[tclass]; + unsigned int i, n = mapping->num_perms; u32 result; for (i = 0, result = 0; i < n; i++) { - if (avd->allowed & current_mapping[tclass].perms[i]) + if (avd->allowed & mapping->perms[i]) result |= 1<perms[i]) result |= 1<allowed = result; for (i = 0, result = 0; i < n; i++) - if (avd->auditallow & current_mapping[tclass].perms[i]) + if (avd->auditallow & mapping->perms[i]) result |= 1<auditallow = result; for (i = 0, result = 0; i < n; i++) { - if (avd->auditdeny & current_mapping[tclass].perms[i]) + if (avd->auditdeny & mapping->perms[i]) result |= 1<perms[i]) result |= 1<ss->policydb; + + return p->mls_enabled; } /* @@ -282,7 +265,8 @@ int security_mls_enabled(void) * of the process performing the transition. All other callers of * constraint_expr_eval should pass in NULL for xcontext. */ -static int constraint_expr_eval(struct context *scontext, +static int constraint_expr_eval(struct policydb *policydb, + struct context *scontext, struct context *tcontext, struct context *xcontext, struct constraint_expr *cexpr) @@ -326,8 +310,8 @@ static int constraint_expr_eval(struct context *scontext, case CEXPR_ROLE: val1 = scontext->role; val2 = tcontext->role; - r1 = policydb.role_val_to_struct[val1 - 1]; - r2 = policydb.role_val_to_struct[val2 - 1]; + r1 = policydb->role_val_to_struct[val1 - 1]; + r2 = policydb->role_val_to_struct[val2 - 1]; switch (e->op) { case CEXPR_DOM: s[++sp] = ebitmap_get_bit(&r1->dominates, @@ -472,7 +456,8 @@ static int dump_masked_av_helper(void *k, void *d, void *args) return 0; } -static void security_dump_masked_av(struct context *scontext, +static void security_dump_masked_av(struct policydb *policydb, + struct context *scontext, struct context *tcontext, u16 tclass, u32 permissions, @@ -492,8 +477,8 @@ static void security_dump_masked_av(struct context *scontext, if (!permissions) return; - tclass_name = sym_name(&policydb, SYM_CLASSES, tclass - 1); - tclass_dat = policydb.class_val_to_struct[tclass - 1]; + tclass_name = sym_name(policydb, SYM_CLASSES, tclass - 1); + tclass_dat = policydb->class_val_to_struct[tclass - 1]; common_dat = tclass_dat->comdatum; /* init permission_names */ @@ -507,11 +492,11 @@ static void security_dump_masked_av(struct context *scontext, goto out; /* get scontext/tcontext in text form */ - if (context_struct_to_string(scontext, + if (context_struct_to_string(policydb, scontext, &scontext_name, &length) < 0) goto out; - if (context_struct_to_string(tcontext, + if (context_struct_to_string(policydb, tcontext, &tcontext_name, &length) < 0) goto out; @@ -550,7 +535,8 @@ out: * security_boundary_permission - drops violated permissions * on boundary constraint. */ -static void type_attribute_bounds_av(struct context *scontext, +static void type_attribute_bounds_av(struct policydb *policydb, + struct context *scontext, struct context *tcontext, u16 tclass, struct av_decision *avd) @@ -562,14 +548,14 @@ static void type_attribute_bounds_av(struct context *scontext, struct type_datum *target; u32 masked = 0; - source = flex_array_get_ptr(policydb.type_val_to_struct_array, + source = flex_array_get_ptr(policydb->type_val_to_struct_array, scontext->type - 1); BUG_ON(!source); if (!source->bounds) return; - target = flex_array_get_ptr(policydb.type_val_to_struct_array, + target = flex_array_get_ptr(policydb->type_val_to_struct_array, tcontext->type - 1); BUG_ON(!target); @@ -584,7 +570,7 @@ static void type_attribute_bounds_av(struct context *scontext, tcontextp = &lo_tcontext; } - context_struct_compute_av(&lo_scontext, + context_struct_compute_av(policydb, &lo_scontext, tcontextp, tclass, &lo_avd, @@ -599,7 +585,7 @@ static void type_attribute_bounds_av(struct context *scontext, avd->allowed &= ~masked; /* audit masked permissions */ - security_dump_masked_av(scontext, tcontext, + security_dump_masked_av(policydb, scontext, tcontext, tclass, masked, "bounds"); } @@ -632,11 +618,12 @@ void services_compute_xperms_drivers( * Compute access vectors and extended permissions based on a context * structure pair for the permissions in a particular class. */ -static void context_struct_compute_av(struct context *scontext, - struct context *tcontext, - u16 tclass, - struct av_decision *avd, - struct extended_perms *xperms) +static void context_struct_compute_av(struct policydb *policydb, + struct context *scontext, + struct context *tcontext, + u16 tclass, + struct av_decision *avd, + struct extended_perms *xperms) { struct constraint_node *constraint; struct role_allow *ra; @@ -655,13 +642,13 @@ static void context_struct_compute_av(struct context *scontext, xperms->len = 0; } - if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) { if (printk_ratelimit()) printk(KERN_WARNING "SELinux: Invalid class %hu\n", tclass); return; } - tclass_datum = policydb.class_val_to_struct[tclass - 1]; + tclass_datum = policydb->class_val_to_struct[tclass - 1]; /* * If a specific type enforcement rule was defined for @@ -669,15 +656,18 @@ static void context_struct_compute_av(struct context *scontext, */ avkey.target_class = tclass; avkey.specified = AVTAB_AV | AVTAB_XPERMS; - sattr = flex_array_get(policydb.type_attr_map_array, scontext->type - 1); + sattr = flex_array_get(policydb->type_attr_map_array, + scontext->type - 1); BUG_ON(!sattr); - tattr = flex_array_get(policydb.type_attr_map_array, tcontext->type - 1); + tattr = flex_array_get(policydb->type_attr_map_array, + tcontext->type - 1); BUG_ON(!tattr); ebitmap_for_each_positive_bit(sattr, snode, i) { ebitmap_for_each_positive_bit(tattr, tnode, j) { avkey.source_type = i + 1; avkey.target_type = j + 1; - for (node = avtab_search_node(&policydb.te_avtab, &avkey); + for (node = avtab_search_node(&policydb->te_avtab, + &avkey); node; node = avtab_search_node_next(node, avkey.specified)) { if (node->key.specified == AVTAB_ALLOWED) @@ -691,7 +681,7 @@ static void context_struct_compute_av(struct context *scontext, } /* Check conditional av table for additional permissions */ - cond_compute_av(&policydb.te_cond_avtab, &avkey, + cond_compute_av(&policydb->te_cond_avtab, &avkey, avd, xperms); } @@ -704,7 +694,7 @@ static void context_struct_compute_av(struct context *scontext, constraint = tclass_datum->constraints; while (constraint) { if ((constraint->permissions & (avd->allowed)) && - !constraint_expr_eval(scontext, tcontext, NULL, + !constraint_expr_eval(policydb, scontext, tcontext, NULL, constraint->expr)) { avd->allowed &= ~(constraint->permissions); } @@ -716,16 +706,16 @@ static void context_struct_compute_av(struct context *scontext, * role is changing, then check the (current_role, new_role) * pair. */ - if (tclass == policydb.process_class && - (avd->allowed & policydb.process_trans_perms) && + if (tclass == policydb->process_class && + (avd->allowed & policydb->process_trans_perms) && scontext->role != tcontext->role) { - for (ra = policydb.role_allow; ra; ra = ra->next) { + for (ra = policydb->role_allow; ra; ra = ra->next) { if (scontext->role == ra->role && tcontext->role == ra->new_role) break; } if (!ra) - avd->allowed &= ~policydb.process_trans_perms; + avd->allowed &= ~policydb->process_trans_perms; } /* @@ -733,41 +723,46 @@ static void context_struct_compute_av(struct context *scontext, * constraint, lazy checks have to mask any violated * permission and notice it to userspace via audit. */ - type_attribute_bounds_av(scontext, tcontext, + type_attribute_bounds_av(policydb, scontext, tcontext, tclass, avd); } -static int security_validtrans_handle_fail(struct context *ocontext, +static int security_validtrans_handle_fail(struct selinux_state *state, + struct context *ocontext, struct context *ncontext, struct context *tcontext, u16 tclass) { + struct policydb *p = &state->ss->policydb; char *o = NULL, *n = NULL, *t = NULL; u32 olen, nlen, tlen; - if (context_struct_to_string(ocontext, &o, &olen)) + if (context_struct_to_string(p, ocontext, &o, &olen)) goto out; - if (context_struct_to_string(ncontext, &n, &nlen)) + if (context_struct_to_string(p, ncontext, &n, &nlen)) goto out; - if (context_struct_to_string(tcontext, &t, &tlen)) + if (context_struct_to_string(p, tcontext, &t, &tlen)) goto out; audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_validate_transition seresult=denied" " oldcontext=%s newcontext=%s taskcontext=%s tclass=%s", - o, n, t, sym_name(&policydb, SYM_CLASSES, tclass-1)); + o, n, t, sym_name(p, SYM_CLASSES, tclass-1)); out: kfree(o); kfree(n); kfree(t); - if (!selinux_enforcing) + if (!is_enforcing(state)) return 0; return -EPERM; } -static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, +static int security_compute_validatetrans(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 orig_tclass, bool user) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *ocontext; struct context *ncontext; struct context *tcontext; @@ -776,23 +771,27 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, u16 tclass; int rc = 0; - if (!ss_initialized) + + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; if (!user) - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); else tclass = orig_tclass; - if (!tclass || tclass > policydb.p_classes.nprim) { + if (!tclass || tclass > policydb->p_classes.nprim) { rc = -EINVAL; goto out; } - tclass_datum = policydb.class_val_to_struct[tclass - 1]; + tclass_datum = policydb->class_val_to_struct[tclass - 1]; - ocontext = sidtab_search(&sidtab, oldsid); + ocontext = sidtab_search(sidtab, oldsid); if (!ocontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, oldsid); @@ -800,7 +799,7 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, goto out; } - ncontext = sidtab_search(&sidtab, newsid); + ncontext = sidtab_search(sidtab, newsid); if (!ncontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, newsid); @@ -808,7 +807,7 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, goto out; } - tcontext = sidtab_search(&sidtab, tasksid); + tcontext = sidtab_search(sidtab, tasksid); if (!tcontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, tasksid); @@ -818,12 +817,13 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, constraint = tclass_datum->validatetrans; while (constraint) { - if (!constraint_expr_eval(ocontext, ncontext, tcontext, - constraint->expr)) { + if (!constraint_expr_eval(policydb, ocontext, ncontext, + tcontext, constraint->expr)) { if (user) rc = -EPERM; else - rc = security_validtrans_handle_fail(ocontext, + rc = security_validtrans_handle_fail(state, + ocontext, ncontext, tcontext, tclass); @@ -833,22 +833,24 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -int security_validate_transition_user(u32 oldsid, u32 newsid, u32 tasksid, - u16 tclass) +int security_validate_transition_user(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, + u16 tclass) { - return security_compute_validatetrans(oldsid, newsid, tasksid, - tclass, true); + return security_compute_validatetrans(state, oldsid, newsid, tasksid, + tclass, true); } -int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 orig_tclass) { - return security_compute_validatetrans(oldsid, newsid, tasksid, - orig_tclass, false); + return security_compute_validatetrans(state, oldsid, newsid, tasksid, + orig_tclass, false); } /* @@ -860,20 +862,26 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, * @oldsid : current security identifier * @newsid : destinated security identifier */ -int security_bounded_transition(u32 old_sid, u32 new_sid) +int security_bounded_transition(struct selinux_state *state, + u32 old_sid, u32 new_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *old_context, *new_context; struct type_datum *type; int index; int rc; - if (!ss_initialized) + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; rc = -EINVAL; - old_context = sidtab_search(&sidtab, old_sid); + old_context = sidtab_search(sidtab, old_sid); if (!old_context) { printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", __func__, old_sid); @@ -881,7 +889,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) } rc = -EINVAL; - new_context = sidtab_search(&sidtab, new_sid); + new_context = sidtab_search(sidtab, new_sid); if (!new_context) { printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", __func__, new_sid); @@ -895,7 +903,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) index = new_context->type; while (true) { - type = flex_array_get_ptr(policydb.type_val_to_struct_array, + type = flex_array_get_ptr(policydb->type_val_to_struct_array, index - 1); BUG_ON(!type); @@ -917,9 +925,9 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) char *new_name = NULL; u32 length; - if (!context_struct_to_string(old_context, + if (!context_struct_to_string(policydb, old_context, &old_name, &length) && - !context_struct_to_string(new_context, + !context_struct_to_string(policydb, new_context, &new_name, &length)) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, @@ -932,17 +940,17 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) kfree(old_name); } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -static void avd_init(struct av_decision *avd) +static void avd_init(struct selinux_state *state, struct av_decision *avd) { avd->allowed = 0; avd->auditallow = 0; avd->auditdeny = 0xffffffff; - avd->seqno = latest_granting; + avd->seqno = state->ss->latest_granting; avd->flags = 0; } @@ -1000,12 +1008,15 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, } } -void security_compute_xperms_decision(u32 ssid, - u32 tsid, - u16 orig_tclass, - u8 driver, - struct extended_perms_decision *xpermd) +void security_compute_xperms_decision(struct selinux_state *state, + u32 ssid, + u32 tsid, + u16 orig_tclass, + u8 driver, + struct extended_perms_decision *xpermd) { + struct policydb *policydb; + struct sidtab *sidtab; u16 tclass; struct context *scontext, *tcontext; struct avtab_key avkey; @@ -1020,60 +1031,64 @@ void security_compute_xperms_decision(u32 ssid, memset(xpermd->auditallow->p, 0, sizeof(xpermd->auditallow->p)); memset(xpermd->dontaudit->p, 0, sizeof(xpermd->dontaudit->p)); - read_lock(&policy_rwlock); - if (!ss_initialized) + read_lock(&state->ss->policy_rwlock); + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); if (unlikely(orig_tclass && !tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) { pr_warn_ratelimited("SELinux: Invalid class %hu\n", tclass); goto out; } avkey.target_class = tclass; avkey.specified = AVTAB_XPERMS; - sattr = flex_array_get(policydb.type_attr_map_array, + sattr = flex_array_get(policydb->type_attr_map_array, scontext->type - 1); BUG_ON(!sattr); - tattr = flex_array_get(policydb.type_attr_map_array, + tattr = flex_array_get(policydb->type_attr_map_array, tcontext->type - 1); BUG_ON(!tattr); ebitmap_for_each_positive_bit(sattr, snode, i) { ebitmap_for_each_positive_bit(tattr, tnode, j) { avkey.source_type = i + 1; avkey.target_type = j + 1; - for (node = avtab_search_node(&policydb.te_avtab, &avkey); + for (node = avtab_search_node(&policydb->te_avtab, + &avkey); node; node = avtab_search_node_next(node, avkey.specified)) services_compute_xperms_decision(xpermd, node); - cond_compute_xperms(&policydb.te_cond_avtab, + cond_compute_xperms(&policydb->te_cond_avtab, &avkey, xpermd); } } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: memset(xpermd->allowed->p, 0xff, sizeof(xpermd->allowed->p)); @@ -1091,22 +1106,28 @@ allow: * Compute a set of access vector decisions based on the * SID pair (@ssid, @tsid) for the permissions in @tclass. */ -void security_compute_av(u32 ssid, +void security_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, u16 orig_tclass, struct av_decision *avd, struct extended_perms *xperms) { + struct policydb *policydb; + struct sidtab *sidtab; u16 tclass; struct context *scontext = NULL, *tcontext = NULL; - read_lock(&policy_rwlock); - avd_init(avd); + read_lock(&state->ss->policy_rwlock); + avd_init(state, avd); xperms->len = 0; - if (!ss_initialized) + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, ssid); @@ -1114,45 +1135,53 @@ void security_compute_av(u32 ssid, } /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + if (ebitmap_get_bit(&policydb->permissive_map, scontext->type)) avd->flags |= AVD_FLAGS_PERMISSIVE; - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); if (unlikely(orig_tclass && !tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - context_struct_compute_av(scontext, tcontext, tclass, avd, xperms); - map_decision(orig_tclass, avd, policydb.allow_unknown); + context_struct_compute_av(policydb, scontext, tcontext, tclass, avd, + xperms); + map_decision(&state->ss->map, orig_tclass, avd, + policydb->allow_unknown); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: avd->allowed = 0xffffffff; goto out; } -void security_compute_av_user(u32 ssid, +void security_compute_av_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *scontext = NULL, *tcontext = NULL; - read_lock(&policy_rwlock); - avd_init(avd); - if (!ss_initialized) + read_lock(&state->ss->policy_rwlock); + avd_init(state, avd); + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, ssid); @@ -1160,10 +1189,10 @@ void security_compute_av_user(u32 ssid, } /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + if (ebitmap_get_bit(&policydb->permissive_map, scontext->type)) avd->flags |= AVD_FLAGS_PERMISSIVE; - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, tsid); @@ -1171,14 +1200,15 @@ void security_compute_av_user(u32 ssid, } if (unlikely(!tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - context_struct_compute_av(scontext, tcontext, tclass, avd, NULL); + context_struct_compute_av(policydb, scontext, tcontext, tclass, avd, + NULL); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: avd->allowed = 0xffffffff; @@ -1192,7 +1222,9 @@ allow: * to point to this string and set `*scontext_len' to * the length of the string. */ -static int context_struct_to_string(struct context *context, char **scontext, u32 *scontext_len) +static int context_struct_to_string(struct policydb *p, + struct context *context, + char **scontext, u32 *scontext_len) { char *scontextp; @@ -1211,10 +1243,10 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 } /* Compute the size of the context. */ - *scontext_len += strlen(sym_name(&policydb, SYM_USERS, context->user - 1)) + 1; - *scontext_len += strlen(sym_name(&policydb, SYM_ROLES, context->role - 1)) + 1; - *scontext_len += strlen(sym_name(&policydb, SYM_TYPES, context->type - 1)) + 1; - *scontext_len += mls_compute_context_len(context); + *scontext_len += strlen(sym_name(p, SYM_USERS, context->user - 1)) + 1; + *scontext_len += strlen(sym_name(p, SYM_ROLES, context->role - 1)) + 1; + *scontext_len += strlen(sym_name(p, SYM_TYPES, context->type - 1)) + 1; + *scontext_len += mls_compute_context_len(p, context); if (!scontext) return 0; @@ -1229,11 +1261,11 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 * Copy the user name, role name and type name into the context. */ scontextp += sprintf(scontextp, "%s:%s:%s", - sym_name(&policydb, SYM_USERS, context->user - 1), - sym_name(&policydb, SYM_ROLES, context->role - 1), - sym_name(&policydb, SYM_TYPES, context->type - 1)); + sym_name(p, SYM_USERS, context->user - 1), + sym_name(p, SYM_ROLES, context->role - 1), + sym_name(p, SYM_TYPES, context->type - 1)); - mls_sid_to_context(context, &scontextp); + mls_sid_to_context(p, context, &scontextp); *scontextp = 0; @@ -1249,9 +1281,12 @@ const char *security_get_initial_sid_context(u32 sid) return initial_sid_to_string[sid]; } -static int security_sid_to_context_core(u32 sid, char **scontext, +static int security_sid_to_context_core(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len, int force) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *context; int rc = 0; @@ -1259,7 +1294,7 @@ static int security_sid_to_context_core(u32 sid, char **scontext, *scontext = NULL; *scontext_len = 0; - if (!ss_initialized) { + if (!state->initialized) { if (sid <= SECINITSID_NUM) { char *scontextp; @@ -1280,20 +1315,23 @@ static int security_sid_to_context_core(u32 sid, char **scontext, rc = -EINVAL; goto out; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; if (force) - context = sidtab_search_force(&sidtab, sid); + context = sidtab_search_force(sidtab, sid); else - context = sidtab_search(&sidtab, sid); + context = sidtab_search(sidtab, sid); if (!context) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, sid); rc = -EINVAL; goto out_unlock; } - rc = context_struct_to_string(context, scontext, scontext_len); + rc = context_struct_to_string(policydb, context, scontext, + scontext_len); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); out: return rc; @@ -1309,14 +1347,18 @@ out: * into a dynamically allocated string of the correct size. Set @scontext * to point to this string and set @scontext_len to the length of the string. */ -int security_sid_to_context(u32 sid, char **scontext, u32 *scontext_len) +int security_sid_to_context(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len) { - return security_sid_to_context_core(sid, scontext, scontext_len, 0); + return security_sid_to_context_core(state, sid, scontext, + scontext_len, 0); } -int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len) +int security_sid_to_context_force(struct selinux_state *state, u32 sid, + char **scontext, u32 *scontext_len) { - return security_sid_to_context_core(sid, scontext, scontext_len, 1); + return security_sid_to_context_core(state, sid, scontext, + scontext_len, 1); } /* @@ -1404,10 +1446,13 @@ out: return rc; } -static int security_context_to_sid_core(const char *scontext, u32 scontext_len, +static int security_context_to_sid_core(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags, int force) { + struct policydb *policydb; + struct sidtab *sidtab; char *scontext2, *str = NULL; struct context context; int rc = 0; @@ -1421,7 +1466,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!scontext2) return -ENOMEM; - if (!ss_initialized) { + if (!state->initialized) { int i; for (i = 1; i < SECINITSID_NUM; i++) { @@ -1442,9 +1487,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!str) goto out; } - - read_lock(&policy_rwlock); - rc = string_to_context_struct(&policydb, &sidtab, scontext2, + read_lock(&state->ss->policy_rwlock); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + rc = string_to_context_struct(policydb, sidtab, scontext2, scontext_len, &context, def_sid); if (rc == -EINVAL && force) { context.str = str; @@ -1452,10 +1498,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, str = NULL; } else if (rc) goto out_unlock; - rc = sidtab_context_to_sid(&sidtab, &context, sid); + rc = sidtab_context_to_sid(sidtab, &context, sid); context_destroy(&context); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); out: kfree(scontext2); kfree(str); @@ -1474,16 +1520,19 @@ out: * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid, +int security_context_to_sid(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, gfp_t gfp) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, SECSID_NULL, gfp, 0); } -int security_context_str_to_sid(const char *scontext, u32 *sid, gfp_t gfp) +int security_context_str_to_sid(struct selinux_state *state, + const char *scontext, u32 *sid, gfp_t gfp) { - return security_context_to_sid(scontext, strlen(scontext), sid, gfp); + return security_context_to_sid(state, scontext, strlen(scontext), + sid, gfp); } /** @@ -1504,51 +1553,56 @@ int security_context_str_to_sid(const char *scontext, u32 *sid, gfp_t gfp) * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid_default(const char *scontext, u32 scontext_len, +int security_context_to_sid_default(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, def_sid, gfp_flags, 1); } -int security_context_to_sid_force(const char *scontext, u32 scontext_len, +int security_context_to_sid_force(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, SECSID_NULL, GFP_KERNEL, 1); } static int compute_sid_handle_invalid_context( + struct selinux_state *state, struct context *scontext, struct context *tcontext, u16 tclass, struct context *newcontext) { + struct policydb *policydb = &state->ss->policydb; char *s = NULL, *t = NULL, *n = NULL; u32 slen, tlen, nlen; - if (context_struct_to_string(scontext, &s, &slen)) + if (context_struct_to_string(policydb, scontext, &s, &slen)) goto out; - if (context_struct_to_string(tcontext, &t, &tlen)) + if (context_struct_to_string(policydb, tcontext, &t, &tlen)) goto out; - if (context_struct_to_string(newcontext, &n, &nlen)) + if (context_struct_to_string(policydb, newcontext, &n, &nlen)) goto out; audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_compute_sid invalid_context=%s" " scontext=%s" " tcontext=%s" " tclass=%s", - n, s, t, sym_name(&policydb, SYM_CLASSES, tclass-1)); + n, s, t, sym_name(policydb, SYM_CLASSES, tclass-1)); out: kfree(s); kfree(t); kfree(n); - if (!selinux_enforcing) + if (!is_enforcing(state)) return 0; return -EACCES; } -static void filename_compute_type(struct policydb *p, struct context *newcontext, +static void filename_compute_type(struct policydb *policydb, + struct context *newcontext, u32 stype, u32 ttype, u16 tclass, const char *objname) { @@ -1560,7 +1614,7 @@ static void filename_compute_type(struct policydb *p, struct context *newcontext * like /dev or /var/run. This bitmap will quickly skip rule searches * if the ttype does not contain any rules. */ - if (!ebitmap_get_bit(&p->filename_trans_ttypes, ttype)) + if (!ebitmap_get_bit(&policydb->filename_trans_ttypes, ttype)) return; ft.stype = stype; @@ -1568,12 +1622,13 @@ static void filename_compute_type(struct policydb *p, struct context *newcontext ft.tclass = tclass; ft.name = objname; - otype = hashtab_search(p->filename_trans, &ft); + otype = hashtab_search(policydb->filename_trans, &ft); if (otype) newcontext->type = otype->otype; } -static int security_compute_sid(u32 ssid, +static int security_compute_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 orig_tclass, u32 specified, @@ -1581,6 +1636,8 @@ static int security_compute_sid(u32 ssid, u32 *out_sid, bool kern) { + struct policydb *policydb; + struct sidtab *sidtab; struct class_datum *cladatum = NULL; struct context *scontext = NULL, *tcontext = NULL, newcontext; struct role_trans *roletr = NULL; @@ -1591,7 +1648,7 @@ static int security_compute_sid(u32 ssid, int rc = 0; bool sock; - if (!ss_initialized) { + if (!state->initialized) { switch (orig_tclass) { case SECCLASS_PROCESS: /* kernel value */ *out_sid = ssid; @@ -1605,24 +1662,28 @@ static int security_compute_sid(u32 ssid, context_init(&newcontext); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); if (kern) { - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); sock = security_is_socket_class(orig_tclass); } else { tclass = orig_tclass; - sock = security_is_socket_class(map_class(tclass)); + sock = security_is_socket_class(map_class(&state->ss->map, + tclass)); } - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, ssid); rc = -EINVAL; goto out_unlock; } - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, tsid); @@ -1630,8 +1691,8 @@ static int security_compute_sid(u32 ssid, goto out_unlock; } - if (tclass && tclass <= policydb.p_classes.nprim) - cladatum = policydb.class_val_to_struct[tclass - 1]; + if (tclass && tclass <= policydb->p_classes.nprim) + cladatum = policydb->class_val_to_struct[tclass - 1]; /* Set the user identity. */ switch (specified) { @@ -1657,7 +1718,7 @@ static int security_compute_sid(u32 ssid, } else if (cladatum && cladatum->default_role == DEFAULT_TARGET) { newcontext.role = tcontext->role; } else { - if ((tclass == policydb.process_class) || (sock == true)) + if ((tclass == policydb->process_class) || (sock == true)) newcontext.role = scontext->role; else newcontext.role = OBJECT_R_VAL; @@ -1669,7 +1730,7 @@ static int security_compute_sid(u32 ssid, } else if (cladatum && cladatum->default_type == DEFAULT_TARGET) { newcontext.type = tcontext->type; } else { - if ((tclass == policydb.process_class) || (sock == true)) { + if ((tclass == policydb->process_class) || (sock == true)) { /* Use the type of process. */ newcontext.type = scontext->type; } else { @@ -1683,11 +1744,11 @@ static int security_compute_sid(u32 ssid, avkey.target_type = tcontext->type; avkey.target_class = tclass; avkey.specified = specified; - avdatum = avtab_search(&policydb.te_avtab, &avkey); + avdatum = avtab_search(&policydb->te_avtab, &avkey); /* If no permanent rule, also check for enabled conditional rules */ if (!avdatum) { - node = avtab_search_node(&policydb.te_cond_avtab, &avkey); + node = avtab_search_node(&policydb->te_cond_avtab, &avkey); for (; node; node = avtab_search_node_next(node, specified)) { if (node->key.specified & AVTAB_ENABLED) { avdatum = &node->datum; @@ -1703,13 +1764,14 @@ static int security_compute_sid(u32 ssid, /* if we have a objname this is a file trans check so check those rules */ if (objname) - filename_compute_type(&policydb, &newcontext, scontext->type, + filename_compute_type(policydb, &newcontext, scontext->type, tcontext->type, tclass, objname); /* Check for class-specific changes. */ if (specified & AVTAB_TRANSITION) { /* Look for a role transition rule. */ - for (roletr = policydb.role_tr; roletr; roletr = roletr->next) { + for (roletr = policydb->role_tr; roletr; + roletr = roletr->next) { if ((roletr->role == scontext->role) && (roletr->type == tcontext->type) && (roletr->tclass == tclass)) { @@ -1722,14 +1784,14 @@ static int security_compute_sid(u32 ssid, /* Set the MLS attributes. This is done last because it may allocate memory. */ - rc = mls_compute_sid(scontext, tcontext, tclass, specified, + rc = mls_compute_sid(policydb, scontext, tcontext, tclass, specified, &newcontext, sock); if (rc) goto out_unlock; /* Check the validity of the context. */ - if (!policydb_context_isvalid(&policydb, &newcontext)) { - rc = compute_sid_handle_invalid_context(scontext, + if (!policydb_context_isvalid(policydb, &newcontext)) { + rc = compute_sid_handle_invalid_context(state, scontext, tcontext, tclass, &newcontext); @@ -1737,9 +1799,9 @@ static int security_compute_sid(u32 ssid, goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(&sidtab, &newcontext, out_sid); + rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); context_destroy(&newcontext); out: return rc; @@ -1758,17 +1820,21 @@ out: * if insufficient memory is available, or %0 if the new SID was * computed successfully. */ -int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_TRANSITION, qstr ? qstr->name : NULL, out_sid, true); } -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_TRANSITION, objname, out_sid, false); } @@ -1785,12 +1851,14 @@ int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, * if insufficient memory is available, or %0 if the SID was * computed successfully. */ -int security_member_sid(u32 ssid, +int security_member_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, NULL, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_MEMBER, NULL, out_sid, false); } @@ -1807,12 +1875,14 @@ int security_member_sid(u32 ssid, * if insufficient memory is available, or %0 if the SID was * computed successfully. */ -int security_change_sid(u32 ssid, +int security_change_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, NULL, + return security_compute_sid(state, + ssid, tsid, tclass, AVTAB_CHANGE, NULL, out_sid, false); } @@ -1829,15 +1899,18 @@ static int clone_sid(u32 sid, return 0; } -static inline int convert_context_handle_invalid_context(struct context *context) +static inline int convert_context_handle_invalid_context( + struct selinux_state *state, + struct context *context) { + struct policydb *policydb = &state->ss->policydb; char *s; u32 len; - if (selinux_enforcing) + if (is_enforcing(state)) return -EINVAL; - if (!context_struct_to_string(context, &s, &len)) { + if (!context_struct_to_string(policydb, context, &s, &len)) { printk(KERN_WARNING "SELinux: Context %s would be invalid if enforcing\n", s); kfree(s); } @@ -1845,6 +1918,7 @@ static inline int convert_context_handle_invalid_context(struct context *context } struct convert_context_args { + struct selinux_state *state; struct policydb *oldp; struct policydb *newp; }; @@ -1971,7 +2045,8 @@ static int convert_context(u32 key, /* Check the validity of the new context. */ if (!policydb_context_isvalid(args->newp, c)) { - rc = convert_context_handle_invalid_context(&oldc); + rc = convert_context_handle_invalid_context(args->state, + &oldc); if (rc) goto bad; } @@ -1983,7 +2058,7 @@ out: return rc; bad: /* Map old representation to string and save it. */ - rc = context_struct_to_string(&oldc, &s, &len); + rc = context_struct_to_string(args->oldp, &oldc, &s, &len); if (rc) return rc; context_destroy(&oldc); @@ -1996,39 +2071,29 @@ bad: goto out; } -static void security_load_policycaps(void) +static void security_load_policycaps(struct selinux_state *state) { + struct policydb *p = &state->ss->policydb; unsigned int i; struct ebitmap_node *node; - selinux_policycap_netpeer = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_NETPEER); - selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_OPENPERM); - selinux_policycap_extsockclass = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_EXTSOCKCLASS); - selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_ALWAYSNETWORK); - selinux_policycap_cgroupseclabel = - ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_CGROUPSECLABEL); - selinux_policycap_nnp_nosuid_transition = - ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION); + for (i = 0; i < ARRAY_SIZE(state->policycap); i++) + state->policycap[i] = ebitmap_get_bit(&p->policycaps, i); for (i = 0; i < ARRAY_SIZE(selinux_policycap_names); i++) pr_info("SELinux: policy capability %s=%d\n", selinux_policycap_names[i], - ebitmap_get_bit(&policydb.policycaps, i)); + ebitmap_get_bit(&p->policycaps, i)); - ebitmap_for_each_positive_bit(&policydb.policycaps, node, i) { + ebitmap_for_each_positive_bit(&p->policycaps, node, i) { if (i >= ARRAY_SIZE(selinux_policycap_names)) pr_info("SELinux: unknown policy capability %u\n", i); } } -static int security_preserve_bools(struct policydb *p); +static int security_preserve_bools(struct selinux_state *state, + struct policydb *newpolicydb); /** * security_load_policy - Load a security policy configuration. @@ -2040,14 +2105,16 @@ static int security_preserve_bools(struct policydb *p); * This function will flush the access vector cache after * loading the new policy. */ -int security_load_policy(void *data, size_t len) +int security_load_policy(struct selinux_state *state, void *data, size_t len) { + struct policydb *policydb; + struct sidtab *sidtab; struct policydb *oldpolicydb, *newpolicydb; struct sidtab oldsidtab, newsidtab; - struct selinux_mapping *oldmap, *map = NULL; + struct selinux_mapping *oldmapping; + struct selinux_map newmap; struct convert_context_args args; u32 seqno; - u16 map_size; int rc = 0; struct policy_file file = { data, len }, *fp = &file; @@ -2058,53 +2125,42 @@ int security_load_policy(void *data, size_t len) } newpolicydb = oldpolicydb + 1; - if (!ss_initialized) { - avtab_cache_init(); - ebitmap_cache_init(); - hashtab_cache_init(); - rc = policydb_read(&policydb, fp); + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + if (!state->initialized) { + rc = policydb_read(policydb, fp); + if (rc) + goto out; + + policydb->len = len; + rc = selinux_set_mapping(policydb, secclass_map, + &state->ss->map); if (rc) { - avtab_cache_destroy(); - ebitmap_cache_destroy(); - hashtab_cache_destroy(); + policydb_destroy(policydb); goto out; } - policydb.len = len; - rc = selinux_set_mapping(&policydb, secclass_map, - ¤t_mapping, - ¤t_mapping_size); + rc = policydb_load_isids(policydb, sidtab); if (rc) { - policydb_destroy(&policydb); - avtab_cache_destroy(); - ebitmap_cache_destroy(); - hashtab_cache_destroy(); + policydb_destroy(policydb); goto out; } - rc = policydb_load_isids(&policydb, &sidtab); - if (rc) { - policydb_destroy(&policydb); - avtab_cache_destroy(); - ebitmap_cache_destroy(); - hashtab_cache_destroy(); - goto out; - } - - security_load_policycaps(); - ss_initialized = 1; - seqno = ++latest_granting; + security_load_policycaps(state); + state->initialized = 1; + seqno = ++state->ss->latest_granting; selinux_complete_init(); avc_ss_reset(seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); selinux_xfrm_notify_policyload(); goto out; } #if 0 - sidtab_hash_eval(&sidtab, "sids"); + sidtab_hash_eval(sidtab, "sids"); #endif rc = policydb_read(newpolicydb, fp); @@ -2113,9 +2169,9 @@ int security_load_policy(void *data, size_t len) newpolicydb->len = len; /* If switching between different policy types, log MLS status */ - if (policydb.mls_enabled && !newpolicydb->mls_enabled) + if (policydb->mls_enabled && !newpolicydb->mls_enabled) printk(KERN_INFO "SELinux: Disabling MLS support...\n"); - else if (!policydb.mls_enabled && newpolicydb->mls_enabled) + else if (!policydb->mls_enabled && newpolicydb->mls_enabled) printk(KERN_INFO "SELinux: Enabling MLS support...\n"); rc = policydb_load_isids(newpolicydb, &newsidtab); @@ -2125,20 +2181,20 @@ int security_load_policy(void *data, size_t len) goto out; } - rc = selinux_set_mapping(newpolicydb, secclass_map, &map, &map_size); + rc = selinux_set_mapping(newpolicydb, secclass_map, &newmap); if (rc) goto err; - rc = security_preserve_bools(newpolicydb); + rc = security_preserve_bools(state, newpolicydb); if (rc) { printk(KERN_ERR "SELinux: unable to preserve booleans\n"); goto err; } /* Clone the SID table. */ - sidtab_shutdown(&sidtab); + sidtab_shutdown(sidtab); - rc = sidtab_map(&sidtab, clone_sid, &newsidtab); + rc = sidtab_map(sidtab, clone_sid, &newsidtab); if (rc) goto err; @@ -2146,7 +2202,8 @@ int security_load_policy(void *data, size_t len) * Convert the internal representations of contexts * in the new SID table. */ - args.oldp = &policydb; + args.state = state; + args.oldp = policydb; args.newp = newpolicydb; rc = sidtab_map(&newsidtab, convert_context, &args); if (rc) { @@ -2157,28 +2214,28 @@ int security_load_policy(void *data, size_t len) } /* Save the old policydb and SID table to free later. */ - memcpy(oldpolicydb, &policydb, sizeof(policydb)); - sidtab_set(&oldsidtab, &sidtab); + memcpy(oldpolicydb, policydb, sizeof(*policydb)); + sidtab_set(&oldsidtab, sidtab); /* Install the new policydb and SID table. */ - write_lock_irq(&policy_rwlock); - memcpy(&policydb, newpolicydb, sizeof(policydb)); - sidtab_set(&sidtab, &newsidtab); - security_load_policycaps(); - oldmap = current_mapping; - current_mapping = map; - current_mapping_size = map_size; - seqno = ++latest_granting; - write_unlock_irq(&policy_rwlock); + write_lock_irq(&state->ss->policy_rwlock); + memcpy(policydb, newpolicydb, sizeof(*policydb)); + sidtab_set(sidtab, &newsidtab); + security_load_policycaps(state); + oldmapping = state->ss->map.mapping; + state->ss->map.mapping = newmap.mapping; + state->ss->map.size = newmap.size; + seqno = ++state->ss->latest_granting; + write_unlock_irq(&state->ss->policy_rwlock); /* Free the old policydb and SID table. */ policydb_destroy(oldpolicydb); sidtab_destroy(&oldsidtab); - kfree(oldmap); + kfree(oldmapping); avc_ss_reset(seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); selinux_xfrm_notify_policyload(); @@ -2186,7 +2243,7 @@ int security_load_policy(void *data, size_t len) goto out; err: - kfree(map); + kfree(newmap.mapping); sidtab_destroy(&newsidtab); policydb_destroy(newpolicydb); @@ -2195,13 +2252,14 @@ out: return rc; } -size_t security_policydb_len(void) +size_t security_policydb_len(struct selinux_state *state) { + struct policydb *p = &state->ss->policydb; size_t len; - read_lock(&policy_rwlock); - len = policydb.len; - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + len = p->len; + read_unlock(&state->ss->policy_rwlock); return len; } @@ -2212,14 +2270,20 @@ size_t security_policydb_len(void) * @port: port number * @out_sid: security identifier */ -int security_port_sid(u8 protocol, u16 port, u32 *out_sid) +int security_port_sid(struct selinux_state *state, + u8 protocol, u16 port, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_PORT]; + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + c = policydb->ocontexts[OCON_PORT]; while (c) { if (c->u.port.protocol == protocol && c->u.port.low_port <= port && @@ -2230,7 +2294,7 @@ int security_port_sid(u8 protocol, u16 port, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) @@ -2242,7 +2306,7 @@ int security_port_sid(u8 protocol, u16 port, u32 *out_sid) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2252,14 +2316,20 @@ out: * @pkey_num: pkey number * @out_sid: security identifier */ -int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) +int security_ib_pkey_sid(struct selinux_state *state, + u64 subnet_prefix, u16 pkey_num, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_IBPKEY]; + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + c = policydb->ocontexts[OCON_IBPKEY]; while (c) { if (c->u.ibpkey.low_pkey <= pkey_num && c->u.ibpkey.high_pkey >= pkey_num && @@ -2271,7 +2341,7 @@ int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) @@ -2282,7 +2352,7 @@ int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) *out_sid = SECINITSID_UNLABELED; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2292,14 +2362,20 @@ out: * @port: port number * @out_sid: security identifier */ -int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) +int security_ib_endport_sid(struct selinux_state *state, + const char *dev_name, u8 port_num, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_IBENDPORT]; + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + c = policydb->ocontexts[OCON_IBENDPORT]; while (c) { if (c->u.ibendport.port == port_num && !strncmp(c->u.ibendport.dev_name, @@ -2312,7 +2388,7 @@ int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) @@ -2323,7 +2399,7 @@ int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) *out_sid = SECINITSID_UNLABELED; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2332,14 +2408,20 @@ out: * @name: interface name * @if_sid: interface SID */ -int security_netif_sid(char *name, u32 *if_sid) +int security_netif_sid(struct selinux_state *state, + char *name, u32 *if_sid) { + struct policydb *policydb; + struct sidtab *sidtab; int rc = 0; struct ocontext *c; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_NETIF]; + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + c = policydb->ocontexts[OCON_NETIF]; while (c) { if (strcmp(name, c->u.name) == 0) break; @@ -2348,12 +2430,12 @@ int security_netif_sid(char *name, u32 *if_sid) if (c) { if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[1], &c->sid[1]); if (rc) @@ -2364,7 +2446,7 @@ int security_netif_sid(char *name, u32 *if_sid) *if_sid = SECINITSID_NETIF; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2388,15 +2470,21 @@ static int match_ipv6_addrmask(u32 *input, u32 *addr, u32 *mask) * @addrlen: address length in bytes * @out_sid: security identifier */ -int security_node_sid(u16 domain, +int security_node_sid(struct selinux_state *state, + u16 domain, void *addrp, u32 addrlen, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; int rc; struct ocontext *c; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; switch (domain) { case AF_INET: { @@ -2408,7 +2496,7 @@ int security_node_sid(u16 domain, addr = *((u32 *)addrp); - c = policydb.ocontexts[OCON_NODE]; + c = policydb->ocontexts[OCON_NODE]; while (c) { if (c->u.node.addr == (addr & c->u.node.mask)) break; @@ -2421,7 +2509,7 @@ int security_node_sid(u16 domain, rc = -EINVAL; if (addrlen != sizeof(u64) * 2) goto out; - c = policydb.ocontexts[OCON_NODE6]; + c = policydb->ocontexts[OCON_NODE6]; while (c) { if (match_ipv6_addrmask(addrp, c->u.node6.addr, c->u.node6.mask)) @@ -2438,7 +2526,7 @@ int security_node_sid(u16 domain, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) @@ -2451,7 +2539,7 @@ int security_node_sid(u16 domain, rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2471,11 +2559,14 @@ out: * number of elements in the array. */ -int security_get_user_sids(u32 fromsid, +int security_get_user_sids(struct selinux_state *state, + u32 fromsid, char *username, u32 **sids, u32 *nel) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *fromcon, usercon; u32 *mysids = NULL, *mysids2, sid; u32 mynel = 0, maxnel = SIDS_NEL; @@ -2487,20 +2578,23 @@ int security_get_user_sids(u32 fromsid, *sids = NULL; *nel = 0; - if (!ss_initialized) + if (!state->initialized) goto out; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; context_init(&usercon); rc = -EINVAL; - fromcon = sidtab_search(&sidtab, fromsid); + fromcon = sidtab_search(sidtab, fromsid); if (!fromcon) goto out_unlock; rc = -EINVAL; - user = hashtab_search(policydb.p_users.table, username); + user = hashtab_search(policydb->p_users.table, username); if (!user) goto out_unlock; @@ -2512,15 +2606,16 @@ int security_get_user_sids(u32 fromsid, goto out_unlock; ebitmap_for_each_positive_bit(&user->roles, rnode, i) { - role = policydb.role_val_to_struct[i]; + role = policydb->role_val_to_struct[i]; usercon.role = i + 1; ebitmap_for_each_positive_bit(&role->types, tnode, j) { usercon.type = j + 1; - if (mls_setup_user_range(fromcon, user, &usercon)) + if (mls_setup_user_range(policydb, fromcon, user, + &usercon)) continue; - rc = sidtab_context_to_sid(&sidtab, &usercon, &sid); + rc = sidtab_context_to_sid(sidtab, &usercon, &sid); if (rc) goto out_unlock; if (mynel < maxnel) { @@ -2540,7 +2635,7 @@ int security_get_user_sids(u32 fromsid, } rc = 0; out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); if (rc || !mynel) { kfree(mysids); goto out; @@ -2583,11 +2678,14 @@ out: * * The caller must acquire the policy_rwlock before calling this function. */ -static inline int __security_genfs_sid(const char *fstype, +static inline int __security_genfs_sid(struct selinux_state *state, + const char *fstype, char *path, u16 orig_sclass, u32 *sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = &state->ss->sidtab; int len; u16 sclass; struct genfs *genfs; @@ -2597,10 +2695,10 @@ static inline int __security_genfs_sid(const char *fstype, while (path[0] == '/' && path[1] == '/') path++; - sclass = unmap_class(orig_sclass); + sclass = unmap_class(&state->ss->map, orig_sclass); *sid = SECINITSID_UNLABELED; - for (genfs = policydb.genfs; genfs; genfs = genfs->next) { + for (genfs = policydb->genfs; genfs; genfs = genfs->next) { cmp = strcmp(fstype, genfs->fstype); if (cmp <= 0) break; @@ -2622,7 +2720,7 @@ static inline int __security_genfs_sid(const char *fstype, goto out; if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, &c->context[0], &c->sid[0]); + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; } @@ -2643,16 +2741,17 @@ out: * Acquire policy_rwlock before calling __security_genfs_sid() and release * it afterward. */ -int security_genfs_sid(const char *fstype, +int security_genfs_sid(struct selinux_state *state, + const char *fstype, char *path, u16 orig_sclass, u32 *sid) { int retval; - read_lock(&policy_rwlock); - retval = __security_genfs_sid(fstype, path, orig_sclass, sid); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + retval = __security_genfs_sid(state, fstype, path, orig_sclass, sid); + read_unlock(&state->ss->policy_rwlock); return retval; } @@ -2660,16 +2759,21 @@ int security_genfs_sid(const char *fstype, * security_fs_use - Determine how to handle labeling for a filesystem. * @sb: superblock in question */ -int security_fs_use(struct super_block *sb) +int security_fs_use(struct selinux_state *state, struct super_block *sb) { + struct policydb *policydb; + struct sidtab *sidtab; int rc = 0; struct ocontext *c; struct superblock_security_struct *sbsec = sb->s_security; const char *fstype = sb->s_type->name; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_FSUSE]; + policydb = &state->ss->policydb; + sidtab = &state->ss->sidtab; + + c = policydb->ocontexts[OCON_FSUSE]; while (c) { if (strcmp(fstype, c->u.name) == 0) break; @@ -2679,14 +2783,14 @@ int security_fs_use(struct super_block *sb) if (c) { sbsec->behavior = c->v.behavior; if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, &c->context[0], + rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; } sbsec->sid = c->sid[0]; } else { - rc = __security_genfs_sid(fstype, "/", SECCLASS_DIR, + rc = __security_genfs_sid(state, fstype, "/", SECCLASS_DIR, &sbsec->sid); if (rc) { sbsec->behavior = SECURITY_FS_USE_NONE; @@ -2697,20 +2801,25 @@ int security_fs_use(struct super_block *sb) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -int security_get_bools(int *len, char ***names, int **values) +int security_get_bools(struct selinux_state *state, + int *len, char ***names, int **values) { + struct policydb *policydb; int i, rc; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + *names = NULL; *values = NULL; rc = 0; - *len = policydb.p_bools.nprim; + *len = policydb->p_bools.nprim; if (!*len) goto out; @@ -2725,16 +2834,17 @@ int security_get_bools(int *len, char ***names, int **values) goto err; for (i = 0; i < *len; i++) { - (*values)[i] = policydb.bool_val_to_struct[i]->state; + (*values)[i] = policydb->bool_val_to_struct[i]->state; rc = -ENOMEM; - (*names)[i] = kstrdup(sym_name(&policydb, SYM_BOOLS, i), GFP_ATOMIC); + (*names)[i] = kstrdup(sym_name(policydb, SYM_BOOLS, i), + GFP_ATOMIC); if (!(*names)[i]) goto err; } rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; err: if (*names) { @@ -2746,90 +2856,98 @@ err: } -int security_set_bools(int len, int *values) +int security_set_bools(struct selinux_state *state, int len, int *values) { + struct policydb *policydb; int i, rc; int lenp, seqno = 0; struct cond_node *cur; - write_lock_irq(&policy_rwlock); + write_lock_irq(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; rc = -EFAULT; - lenp = policydb.p_bools.nprim; + lenp = policydb->p_bools.nprim; if (len != lenp) goto out; for (i = 0; i < len; i++) { - if (!!values[i] != policydb.bool_val_to_struct[i]->state) { + if (!!values[i] != policydb->bool_val_to_struct[i]->state) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_MAC_CONFIG_CHANGE, "bool=%s val=%d old_val=%d auid=%u ses=%u", - sym_name(&policydb, SYM_BOOLS, i), + sym_name(policydb, SYM_BOOLS, i), !!values[i], - policydb.bool_val_to_struct[i]->state, + policydb->bool_val_to_struct[i]->state, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); } if (values[i]) - policydb.bool_val_to_struct[i]->state = 1; + policydb->bool_val_to_struct[i]->state = 1; else - policydb.bool_val_to_struct[i]->state = 0; + policydb->bool_val_to_struct[i]->state = 0; } - for (cur = policydb.cond_list; cur; cur = cur->next) { - rc = evaluate_cond_node(&policydb, cur); + for (cur = policydb->cond_list; cur; cur = cur->next) { + rc = evaluate_cond_node(policydb, cur); if (rc) goto out; } - seqno = ++latest_granting; + seqno = ++state->ss->latest_granting; rc = 0; out: - write_unlock_irq(&policy_rwlock); + write_unlock_irq(&state->ss->policy_rwlock); if (!rc) { avc_ss_reset(seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_xfrm_notify_policyload(); } return rc; } -int security_get_bool_value(int index) +int security_get_bool_value(struct selinux_state *state, + int index) { + struct policydb *policydb; int rc; int len; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; rc = -EFAULT; - len = policydb.p_bools.nprim; + len = policydb->p_bools.nprim; if (index >= len) goto out; - rc = policydb.bool_val_to_struct[index]->state; + rc = policydb->bool_val_to_struct[index]->state; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -static int security_preserve_bools(struct policydb *p) +static int security_preserve_bools(struct selinux_state *state, + struct policydb *policydb) { int rc, nbools = 0, *bvalues = NULL, i; char **bnames = NULL; struct cond_bool_datum *booldatum; struct cond_node *cur; - rc = security_get_bools(&nbools, &bnames, &bvalues); + rc = security_get_bools(state, &nbools, &bnames, &bvalues); if (rc) goto out; for (i = 0; i < nbools; i++) { - booldatum = hashtab_search(p->p_bools.table, bnames[i]); + booldatum = hashtab_search(policydb->p_bools.table, bnames[i]); if (booldatum) booldatum->state = bvalues[i]; } - for (cur = p->cond_list; cur; cur = cur->next) { - rc = evaluate_cond_node(p, cur); + for (cur = policydb->cond_list; cur; cur = cur->next) { + rc = evaluate_cond_node(policydb, cur); if (rc) goto out; } @@ -2848,8 +2966,11 @@ out: * security_sid_mls_copy() - computes a new sid based on the given * sid and the mls portion of mls_sid. */ -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) +int security_sid_mls_copy(struct selinux_state *state, + u32 sid, u32 mls_sid, u32 *new_sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = &state->ss->sidtab; struct context *context1; struct context *context2; struct context newcon; @@ -2858,17 +2979,17 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) int rc; rc = 0; - if (!ss_initialized || !policydb.mls_enabled) { + if (!state->initialized || !policydb->mls_enabled) { *new_sid = sid; goto out; } context_init(&newcon); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - context1 = sidtab_search(&sidtab, sid); + context1 = sidtab_search(sidtab, sid); if (!context1) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, sid); @@ -2876,7 +2997,7 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) } rc = -EINVAL; - context2 = sidtab_search(&sidtab, mls_sid); + context2 = sidtab_search(sidtab, mls_sid); if (!context2) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, mls_sid); @@ -2891,10 +3012,11 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) goto out_unlock; /* Check the validity of the new context. */ - if (!policydb_context_isvalid(&policydb, &newcon)) { - rc = convert_context_handle_invalid_context(&newcon); + if (!policydb_context_isvalid(policydb, &newcon)) { + rc = convert_context_handle_invalid_context(state, &newcon); if (rc) { - if (!context_struct_to_string(&newcon, &s, &len)) { + if (!context_struct_to_string(policydb, &newcon, &s, + &len)) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_sid_mls_copy " @@ -2905,9 +3027,9 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) } } - rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid); + rc = sidtab_context_to_sid(sidtab, &newcon, new_sid); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); context_destroy(&newcon); out: return rc; @@ -2933,10 +3055,13 @@ out: * multiple, inconsistent labels | - | SECSID_NULL * */ -int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, +int security_net_peersid_resolve(struct selinux_state *state, + u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = &state->ss->sidtab; int rc; struct context *nlbl_ctx; struct context *xfrm_ctx; @@ -2958,23 +3083,25 @@ int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, return 0; } - /* we don't need to check ss_initialized here since the only way both + /* + * We don't need to check initialized here since the only way both * nlbl_sid and xfrm_sid are not equal to SECSID_NULL would be if the - * security server was initialized and ss_initialized was true */ - if (!policydb.mls_enabled) + * security server was initialized and state->initialized was true. + */ + if (!policydb->mls_enabled) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - nlbl_ctx = sidtab_search(&sidtab, nlbl_sid); + nlbl_ctx = sidtab_search(sidtab, nlbl_sid); if (!nlbl_ctx) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, nlbl_sid); goto out; } rc = -EINVAL; - xfrm_ctx = sidtab_search(&sidtab, xfrm_sid); + xfrm_ctx = sidtab_search(sidtab, xfrm_sid); if (!xfrm_ctx) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", __func__, xfrm_sid); @@ -2991,7 +3118,7 @@ int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, * expressive */ *peer_sid = xfrm_sid; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3008,19 +3135,21 @@ static int get_classes_callback(void *k, void *d, void *args) return 0; } -int security_get_classes(char ***classes, int *nclasses) +int security_get_classes(struct selinux_state *state, + char ***classes, int *nclasses) { + struct policydb *policydb = &state->ss->policydb; int rc; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -ENOMEM; - *nclasses = policydb.p_classes.nprim; + *nclasses = policydb->p_classes.nprim; *classes = kcalloc(*nclasses, sizeof(**classes), GFP_ATOMIC); if (!*classes) goto out; - rc = hashtab_map(policydb.p_classes.table, get_classes_callback, + rc = hashtab_map(policydb->p_classes.table, get_classes_callback, *classes); if (rc) { int i; @@ -3030,7 +3159,7 @@ int security_get_classes(char ***classes, int *nclasses) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3047,15 +3176,17 @@ static int get_permissions_callback(void *k, void *d, void *args) return 0; } -int security_get_permissions(char *class, char ***perms, int *nperms) +int security_get_permissions(struct selinux_state *state, + char *class, char ***perms, int *nperms) { + struct policydb *policydb = &state->ss->policydb; int rc, i; struct class_datum *match; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - match = hashtab_search(policydb.p_classes.table, class); + match = hashtab_search(policydb->p_classes.table, class); if (!match) { printk(KERN_ERR "SELinux: %s: unrecognized class %s\n", __func__, class); @@ -3081,25 +3212,25 @@ int security_get_permissions(char *class, char ***perms, int *nperms) goto err; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; err: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); for (i = 0; i < *nperms; i++) kfree((*perms)[i]); kfree(*perms); return rc; } -int security_get_reject_unknown(void) +int security_get_reject_unknown(struct selinux_state *state) { - return policydb.reject_unknown; + return state->ss->policydb.reject_unknown; } -int security_get_allow_unknown(void) +int security_get_allow_unknown(struct selinux_state *state) { - return policydb.allow_unknown; + return state->ss->policydb.allow_unknown; } /** @@ -3112,13 +3243,15 @@ int security_get_allow_unknown(void) * supported, false (0) if it isn't supported. * */ -int security_policycap_supported(unsigned int req_cap) +int security_policycap_supported(struct selinux_state *state, + unsigned int req_cap) { + struct policydb *policydb = &state->ss->policydb; int rc; - read_lock(&policy_rwlock); - rc = ebitmap_get_bit(&policydb.policycaps, req_cap); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + rc = ebitmap_get_bit(&policydb->policycaps, req_cap); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3140,6 +3273,8 @@ void selinux_audit_rule_free(void *vrule) int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) { + struct selinux_state *state = &selinux_state; + struct policydb *policydb = &state->ss->policydb; struct selinux_audit_rule *tmprule; struct role_datum *roledatum; struct type_datum *typedatum; @@ -3149,7 +3284,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) *rule = NULL; - if (!ss_initialized) + if (!state->initialized) return -EOPNOTSUPP; switch (field) { @@ -3182,15 +3317,15 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) context_init(&tmprule->au_ctxt); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - tmprule->au_seqno = latest_granting; + tmprule->au_seqno = state->ss->latest_granting; switch (field) { case AUDIT_SUBJ_USER: case AUDIT_OBJ_USER: rc = -EINVAL; - userdatum = hashtab_search(policydb.p_users.table, rulestr); + userdatum = hashtab_search(policydb->p_users.table, rulestr); if (!userdatum) goto out; tmprule->au_ctxt.user = userdatum->value; @@ -3198,7 +3333,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_ROLE: case AUDIT_OBJ_ROLE: rc = -EINVAL; - roledatum = hashtab_search(policydb.p_roles.table, rulestr); + roledatum = hashtab_search(policydb->p_roles.table, rulestr); if (!roledatum) goto out; tmprule->au_ctxt.role = roledatum->value; @@ -3206,7 +3341,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_TYPE: case AUDIT_OBJ_TYPE: rc = -EINVAL; - typedatum = hashtab_search(policydb.p_types.table, rulestr); + typedatum = hashtab_search(policydb->p_types.table, rulestr); if (!typedatum) goto out; tmprule->au_ctxt.type = typedatum->value; @@ -3215,14 +3350,15 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_CLR: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - rc = mls_from_string(rulestr, &tmprule->au_ctxt, GFP_ATOMIC); + rc = mls_from_string(policydb, rulestr, &tmprule->au_ctxt, + GFP_ATOMIC); if (rc) goto out; break; } rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); if (rc) { selinux_audit_rule_free(tmprule); @@ -3262,6 +3398,7 @@ int selinux_audit_rule_known(struct audit_krule *rule) int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, struct audit_context *actx) { + struct selinux_state *state = &selinux_state; struct context *ctxt; struct mls_level *level; struct selinux_audit_rule *rule = vrule; @@ -3272,14 +3409,14 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, return -ENOENT; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - if (rule->au_seqno < latest_granting) { + if (rule->au_seqno < state->ss->latest_granting) { match = -ESTALE; goto out; } - ctxt = sidtab_search(&sidtab, sid); + ctxt = sidtab_search(&state->ss->sidtab, sid); if (unlikely(!ctxt)) { WARN_ONCE(1, "selinux_audit_rule_match: unrecognized SID %d\n", sid); @@ -3363,7 +3500,7 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return match; } @@ -3437,19 +3574,22 @@ static void security_netlbl_cache_add(struct netlbl_lsm_secattr *secattr, * failure. * */ -int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, +int security_netlbl_secattr_to_sid(struct selinux_state *state, + struct netlbl_lsm_secattr *secattr, u32 *sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = &state->ss->sidtab; int rc; struct context *ctx; struct context ctx_new; - if (!ss_initialized) { + if (!state->initialized) { *sid = SECSID_NULL; return 0; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); if (secattr->flags & NETLBL_SECATTR_CACHE) *sid = *(u32 *)secattr->cache->data; @@ -3457,7 +3597,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, *sid = secattr->attr.secid; else if (secattr->flags & NETLBL_SECATTR_MLS_LVL) { rc = -EIDRM; - ctx = sidtab_search(&sidtab, SECINITSID_NETMSG); + ctx = sidtab_search(sidtab, SECINITSID_NETMSG); if (ctx == NULL) goto out; @@ -3465,17 +3605,17 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; - mls_import_netlbl_lvl(&ctx_new, secattr); + mls_import_netlbl_lvl(policydb, &ctx_new, secattr); if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { - rc = mls_import_netlbl_cat(&ctx_new, secattr); + rc = mls_import_netlbl_cat(policydb, &ctx_new, secattr); if (rc) goto out; } rc = -EIDRM; - if (!mls_context_isvalid(&policydb, &ctx_new)) + if (!mls_context_isvalid(policydb, &ctx_new)) goto out_free; - rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + rc = sidtab_context_to_sid(sidtab, &ctx_new, sid); if (rc) goto out_free; @@ -3485,12 +3625,12 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, } else *sid = SECSID_NULL; - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return 0; out_free: ebitmap_destroy(&ctx_new.range.level[0].cat); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3504,33 +3644,35 @@ out: * Returns zero on success, negative values on failure. * */ -int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) +int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, struct netlbl_lsm_secattr *secattr) { + struct policydb *policydb = &state->ss->policydb; int rc; struct context *ctx; - if (!ss_initialized) + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -ENOENT; - ctx = sidtab_search(&sidtab, sid); + ctx = sidtab_search(&state->ss->sidtab, sid); if (ctx == NULL) goto out; rc = -ENOMEM; - secattr->domain = kstrdup(sym_name(&policydb, SYM_TYPES, ctx->type - 1), + secattr->domain = kstrdup(sym_name(policydb, SYM_TYPES, ctx->type - 1), GFP_ATOMIC); if (secattr->domain == NULL) goto out; secattr->attr.secid = sid; secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY | NETLBL_SECATTR_SECID; - mls_export_netlbl_lvl(ctx, secattr); - rc = mls_export_netlbl_cat(ctx, secattr); + mls_export_netlbl_lvl(policydb, ctx, secattr); + rc = mls_export_netlbl_cat(policydb, ctx, secattr); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } #endif /* CONFIG_NETLABEL */ @@ -3541,15 +3683,17 @@ out: * @len: length of data in bytes * */ -int security_read_policy(void **data, size_t *len) +int security_read_policy(struct selinux_state *state, + void **data, size_t *len) { + struct policydb *policydb = &state->ss->policydb; int rc; struct policy_file fp; - if (!ss_initialized) + if (!state->initialized) return -EINVAL; - *len = security_policydb_len(); + *len = security_policydb_len(state); *data = vmalloc_user(*len); if (!*data) @@ -3558,9 +3702,9 @@ int security_read_policy(void **data, size_t *len) fp.data = *data; fp.len = *len; - read_lock(&policy_rwlock); - rc = policydb_write(&policydb, &fp); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + rc = policydb_write(policydb, &fp); + read_unlock(&state->ss->policy_rwlock); if (rc) return rc; diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h index 356bdd36cf6d..24c7bdcc8075 100644 --- a/security/selinux/ss/services.h +++ b/security/selinux/ss/services.h @@ -10,7 +10,28 @@ #include "policydb.h" #include "sidtab.h" -extern struct policydb policydb; +/* Mapping for a single class */ +struct selinux_mapping { + u16 value; /* policy value for class */ + unsigned int num_perms; /* number of permissions in class */ + u32 perms[sizeof(u32) * 8]; /* policy values for permissions */ +}; + +/* Map for all of the classes, with array size */ +struct selinux_map { + struct selinux_mapping *mapping; /* indexed by class */ + u16 size; /* array size of mapping */ +}; + +struct selinux_ss { + struct sidtab sidtab; + struct policydb policydb; + rwlock_t policy_rwlock; + u32 latest_granting; + struct selinux_map map; + struct page *status_page; + struct mutex status_lock; +}; void services_compute_xperms_drivers(struct extended_perms *xperms, struct avtab_node *node); @@ -19,4 +40,3 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, struct avtab_node *node); #endif /* _SS_SERVICES_H_ */ - diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c index d982365f9d1a..043efc59f8e4 100644 --- a/security/selinux/ss/status.c +++ b/security/selinux/ss/status.c @@ -35,8 +35,6 @@ * In most cases, application shall confirm the kernel status is not * changed without any system call invocations. */ -static struct page *selinux_status_page; -static DEFINE_MUTEX(selinux_status_lock); /* * selinux_kernel_status_page @@ -44,21 +42,21 @@ static DEFINE_MUTEX(selinux_status_lock); * It returns a reference to selinux_status_page. If the status page is * not allocated yet, it also tries to allocate it at the first time. */ -struct page *selinux_kernel_status_page(void) +struct page *selinux_kernel_status_page(struct selinux_state *state) { struct selinux_kernel_status *status; struct page *result = NULL; - mutex_lock(&selinux_status_lock); - if (!selinux_status_page) { - selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); + mutex_lock(&state->ss->status_lock); + if (!state->ss->status_page) { + state->ss->status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (selinux_status_page) { - status = page_address(selinux_status_page); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->version = SELINUX_KERNEL_STATUS_VERSION; status->sequence = 0; - status->enforcing = selinux_enforcing; + status->enforcing = is_enforcing(state); /* * NOTE: the next policyload event shall set * a positive value on the status->policyload, @@ -66,11 +64,12 @@ struct page *selinux_kernel_status_page(void) * So, application can know it was updated. */ status->policyload = 0; - status->deny_unknown = !security_get_allow_unknown(); + status->deny_unknown = + !security_get_allow_unknown(state); } } - result = selinux_status_page; - mutex_unlock(&selinux_status_lock); + result = state->ss->status_page; + mutex_unlock(&state->ss->status_lock); return result; } @@ -80,13 +79,14 @@ struct page *selinux_kernel_status_page(void) * * It updates status of the current enforcing/permissive mode. */ -void selinux_status_update_setenforce(int enforcing) +void selinux_status_update_setenforce(struct selinux_state *state, + int enforcing) { struct selinux_kernel_status *status; - mutex_lock(&selinux_status_lock); - if (selinux_status_page) { - status = page_address(selinux_status_page); + mutex_lock(&state->ss->status_lock); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->sequence++; smp_wmb(); @@ -96,7 +96,7 @@ void selinux_status_update_setenforce(int enforcing) smp_wmb(); status->sequence++; } - mutex_unlock(&selinux_status_lock); + mutex_unlock(&state->ss->status_lock); } /* @@ -105,22 +105,23 @@ void selinux_status_update_setenforce(int enforcing) * It updates status of the times of policy reloaded, and current * setting of deny_unknown. */ -void selinux_status_update_policyload(int seqno) +void selinux_status_update_policyload(struct selinux_state *state, + int seqno) { struct selinux_kernel_status *status; - mutex_lock(&selinux_status_lock); - if (selinux_status_page) { - status = page_address(selinux_status_page); + mutex_lock(&state->ss->status_lock); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->sequence++; smp_wmb(); status->policyload = seqno; - status->deny_unknown = !security_get_allow_unknown(); + status->deny_unknown = !security_get_allow_unknown(state); smp_wmb(); status->sequence++; } - mutex_unlock(&selinux_status_lock); + mutex_unlock(&state->ss->status_lock); } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 56e354fcdfc6..dfbe4f32ef85 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -101,7 +101,8 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_len = str_len; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; - rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid, gfp); + rc = security_context_to_sid(&selinux_state, ctx->ctx_str, str_len, + &ctx->ctx_sid, gfp); if (rc) goto err; @@ -352,7 +353,8 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, if (secid == 0) return -EINVAL; - rc = security_sid_to_context(secid, &ctx_str, &str_len); + rc = security_sid_to_context(&selinux_state, secid, &ctx_str, + &str_len); if (rc) return rc; From 016d8cd33013a124e139d583d38850379afb654b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 1 Mar 2018 17:38:30 -0500 Subject: [PATCH 2039/3715] UPSTREAM: selinux: rename the {is,set}_enforcing() functions Rename is_enforcing() to enforcing_enabled() and enforcing_set() to set_enforcing(). Signed-off-by: Paul Moore (cherry picked from commit e5a5ca96a42ca7eee19cf8694377308771350950) Change-Id: I6063be6809663000835461787d9e43df2f6fd853 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/avc.c | 2 +- security/selinux/hooks.c | 4 ++-- security/selinux/include/security.h | 8 ++++---- security/selinux/selinuxfs.c | 6 +++--- security/selinux/ss/services.c | 6 +++--- security/selinux/ss/status.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 36124f48a5ff..54b09cc03b55 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -985,7 +985,7 @@ static noinline int avc_denied(u32 ssid, u32 tsid, if (flags & AVC_STRICT) return -EACCES; - if (is_enforcing(&selinux_state) && + if (enforcing_enabled(&selinux_state) && !(avd->flags & AVD_FLAGS_PERMISSIVE)) return -EACCES; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ac16028711de..6aa72501e537 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5206,7 +5206,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) sk->sk_protocol, nlh->nlmsg_type, secclass_map[sksec->sclass - 1].name, task_pid_nr(current), current->comm); - if (!is_enforcing(&selinux_state) || + if (!enforcing_enabled(&selinux_state) || security_get_allow_unknown(&selinux_state)) err = 0; } @@ -6806,7 +6806,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); memset(&selinux_state, 0, sizeof(selinux_state)); - set_enforcing(&selinux_state, selinux_enforcing_boot); + enforcing_set(&selinux_state, selinux_enforcing_boot); selinux_state.checkreqprot = selinux_checkreqprot_boot; selinux_ss_init(&selinux_state.ss); diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index c3a1ef10e710..f1db09a5f521 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -111,22 +111,22 @@ void selinux_ss_init(struct selinux_ss **ss); extern struct selinux_state selinux_state; #ifdef CONFIG_SECURITY_SELINUX_DEVELOP -static inline bool is_enforcing(struct selinux_state *state) +static inline bool enforcing_enabled(struct selinux_state *state) { return state->enforcing; } -static inline void set_enforcing(struct selinux_state *state, bool value) +static inline void enforcing_set(struct selinux_state *state, bool value) { state->enforcing = value; } #else -static inline bool is_enforcing(struct selinux_state *state) +static inline bool enforcing_enabled(struct selinux_state *state) { return true; } -static inline void set_enforcing(struct selinux_state *state, bool value) +static inline void enforcing_set(struct selinux_state *state, bool value) { } #endif diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 98492755adbf..0dbd5fd6a396 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -98,7 +98,7 @@ static ssize_t sel_read_enforce(struct file *filp, char __user *buf, ssize_t length; length = scnprintf(tmpbuf, TMPBUFLEN, "%d", - is_enforcing(&selinux_state)); + enforcing_enabled(&selinux_state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -128,7 +128,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value = !!new_value; - old_value = is_enforcing(&selinux_state); + old_value = enforcing_enabled(&selinux_state); if (new_value != old_value) { length = avc_has_perm(current_sid(), SECINITSID_SECURITY, @@ -141,7 +141,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value, old_value, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); - set_enforcing(&selinux_state, new_value); + enforcing_set(&selinux_state, new_value); if (new_value) avc_ss_reset(0); selnl_notify_setenforce(new_value); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 1989ccab19cc..70714eb56101 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -752,7 +752,7 @@ out: kfree(n); kfree(t); - if (!is_enforcing(state)) + if (!enforcing_enabled(state)) return 0; return -EPERM; } @@ -1596,7 +1596,7 @@ out: kfree(s); kfree(t); kfree(n); - if (!is_enforcing(state)) + if (!enforcing_enabled(state)) return 0; return -EACCES; } @@ -1907,7 +1907,7 @@ static inline int convert_context_handle_invalid_context( char *s; u32 len; - if (is_enforcing(state)) + if (enforcing_enabled(state)) return -EINVAL; if (!context_struct_to_string(policydb, context, &s, &len)) { diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c index 043efc59f8e4..a121de45ac0e 100644 --- a/security/selinux/ss/status.c +++ b/security/selinux/ss/status.c @@ -56,7 +56,7 @@ struct page *selinux_kernel_status_page(struct selinux_state *state) status->version = SELINUX_KERNEL_STATUS_VERSION; status->sequence = 0; - status->enforcing = is_enforcing(state); + status->enforcing = enforcing_enabled(state); /* * NOTE: the next policyload event shall set * a positive value on the status->policyload, From c1a453d2ecdbf33abfec241898183871528aac49 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 20 Mar 2018 11:59:11 -0400 Subject: [PATCH 2040/3715] UPSTREAM: selinux: wrap selinuxfs state Move global selinuxfs state to a per-instance structure (selinux_fs_info), and include a pointer to the selinux_state in this structure. Pass this selinux_state to all security server operations, thereby ensuring that each selinuxfs instance presents a view of and acts as an interface to a particular selinux_state instance. This change should have no effect on SELinux behavior or APIs (userspace or LSM). It merely wraps the selinuxfs global state, links it to a particular selinux_state (currently always the single global selinux_state) and uses that state for all operations. Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore (cherry picked from commit 0619f0f5e36f12e100ef294f5980cfe7c93ff23e) Change-Id: I62ef7f867b0c04391a1f918d516514344483b40c Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/selinuxfs.c | 438 +++++++++++++++++++++-------------- 1 file changed, 261 insertions(+), 177 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 0dbd5fd6a396..41099cc3d5e2 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,23 +42,6 @@ #include "objsec.h" #include "conditional.h" -static DEFINE_MUTEX(sel_mutex); - -/* global data for booleans */ -static struct dentry *bool_dir; -static int bool_num; -static char **bool_pending_names; -static int *bool_pending_values; - -/* global data for classes */ -static struct dentry *class_dir; -static unsigned long last_class_ino; - -static char policy_opened; - -/* global data for policy capabilities */ -static struct dentry *policycap_dir; - enum sel_inos { SEL_ROOT_INO = 2, SEL_LOAD, /* load policy */ @@ -82,7 +66,51 @@ enum sel_inos { SEL_INO_NEXT, /* The next inode number to use */ }; -static unsigned long sel_last_ino = SEL_INO_NEXT - 1; +struct selinux_fs_info { + struct dentry *bool_dir; + unsigned int bool_num; + char **bool_pending_names; + unsigned int *bool_pending_values; + struct dentry *class_dir; + unsigned long last_class_ino; + bool policy_opened; + struct dentry *policycap_dir; + struct mutex mutex; + unsigned long last_ino; + struct selinux_state *state; + struct super_block *sb; +}; + +static int selinux_fs_info_create(struct super_block *sb) +{ + struct selinux_fs_info *fsi; + + fsi = kzalloc(sizeof(*fsi), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + mutex_init(&fsi->mutex); + fsi->last_ino = SEL_INO_NEXT - 1; + fsi->state = &selinux_state; + fsi->sb = sb; + sb->s_fs_info = fsi; + return 0; +} + +static void selinux_fs_info_free(struct super_block *sb) +{ + struct selinux_fs_info *fsi = sb->s_fs_info; + int i; + + if (fsi) { + for (i = 0; i < fsi->bool_num; i++) + kfree(fsi->bool_pending_names[i]); + kfree(fsi->bool_pending_names); + kfree(fsi->bool_pending_values); + } + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; +} #define SEL_INITCON_INO_OFFSET 0x01000000 #define SEL_BOOL_INO_OFFSET 0x02000000 @@ -94,11 +122,12 @@ static unsigned long sel_last_ino = SEL_INO_NEXT - 1; static ssize_t sel_read_enforce(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; length = scnprintf(tmpbuf, TMPBUFLEN, "%d", - enforcing_enabled(&selinux_state)); + enforcing_enabled(fsi->state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -107,6 +136,8 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page = NULL; ssize_t length; int old_value, new_value; @@ -128,8 +159,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value = !!new_value; - old_value = enforcing_enabled(&selinux_state); - + old_value = enforcing_enabled(state); if (new_value != old_value) { length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETENFORCE, @@ -141,12 +171,11 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value, old_value, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); - enforcing_set(&selinux_state, new_value); + enforcing_set(state, new_value); if (new_value) avc_ss_reset(0); selnl_notify_setenforce(new_value); - selinux_status_update_setenforce(&selinux_state, - new_value); + selinux_status_update_setenforce(state, new_value); if (!new_value) call_lsm_notifier(LSM_POLICY_CHANGE, NULL); } @@ -168,12 +197,14 @@ static const struct file_operations sel_enforce_ops = { static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char tmpbuf[TMPBUFLEN]; ssize_t length; ino_t ino = file_inode(filp)->i_ino; int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ? - security_get_reject_unknown(&selinux_state) : - !security_get_allow_unknown(&selinux_state); + security_get_reject_unknown(state) : + !security_get_allow_unknown(state); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", handle_unknown); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -186,7 +217,8 @@ static const struct file_operations sel_handle_unknown_ops = { static int sel_open_handle_status(struct inode *inode, struct file *filp) { - struct page *status = selinux_kernel_status_page(&selinux_state); + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct page *status = selinux_kernel_status_page(fsi->state); if (!status) return -ENOMEM; @@ -242,6 +274,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; int new_value; @@ -262,7 +295,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, goto out; if (new_value) { - length = selinux_disable(&selinux_state); + length = selinux_disable(fsi->state); if (length) goto out; audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS, @@ -301,9 +334,9 @@ static const struct file_operations sel_policyvers_ops = { }; /* declaration for sel_write_load */ -static int sel_make_bools(void); -static int sel_make_classes(void); -static int sel_make_policycap(void); +static int sel_make_bools(struct selinux_fs_info *fsi); +static int sel_make_classes(struct selinux_fs_info *fsi); +static int sel_make_policycap(struct selinux_fs_info *fsi); /* declaration for sel_make_class_dirs */ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, @@ -312,11 +345,12 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, static ssize_t sel_read_mls(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; length = scnprintf(tmpbuf, TMPBUFLEN, "%d", - security_mls_enabled(&selinux_state)); + security_mls_enabled(fsi->state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -332,12 +366,14 @@ struct policy_load_memory { static int sel_open_policy(struct inode *inode, struct file *filp) { + struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; struct policy_load_memory *plm = NULL; int rc; BUG_ON(filp->private_data); - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); @@ -345,7 +381,7 @@ static int sel_open_policy(struct inode *inode, struct file *filp) goto err; rc = -EBUSY; - if (policy_opened) + if (fsi->policy_opened) goto err; rc = -ENOMEM; @@ -353,25 +389,25 @@ static int sel_open_policy(struct inode *inode, struct file *filp) if (!plm) goto err; - if (i_size_read(inode) != security_policydb_len(&selinux_state)) { + if (i_size_read(inode) != security_policydb_len(state)) { inode_lock(inode); - i_size_write(inode, security_policydb_len(&selinux_state)); + i_size_write(inode, security_policydb_len(state)); inode_unlock(inode); } - rc = security_read_policy(&selinux_state, &plm->data, &plm->len); + rc = security_read_policy(state, &plm->data, &plm->len); if (rc) goto err; - policy_opened = 1; + fsi->policy_opened = 1; filp->private_data = plm; - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); return 0; err: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); if (plm) vfree(plm->data); @@ -381,11 +417,12 @@ err: static int sel_release_policy(struct inode *inode, struct file *filp) { + struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; BUG_ON(!plm); - policy_opened = 0; + fsi->policy_opened = 0; vfree(plm->data); kfree(plm); @@ -396,10 +433,11 @@ static int sel_release_policy(struct inode *inode, struct file *filp) static ssize_t sel_read_policy(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; int ret; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); ret = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); @@ -408,7 +446,7 @@ static ssize_t sel_read_policy(struct file *filp, char __user *buf, ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); return ret; } @@ -462,14 +500,40 @@ static const struct file_operations sel_policy_ops = { .llseek = generic_file_llseek, }; +static int sel_make_policy_nodes(struct selinux_fs_info *fsi) +{ + int ret; + + ret = sel_make_bools(fsi); + if (ret) { + pr_err("SELinux: failed to load policy booleans\n"); + return ret; + } + + ret = sel_make_classes(fsi); + if (ret) { + pr_err("SELinux: failed to load policy classes\n"); + return ret; + } + + ret = sel_make_policycap(fsi); + if (ret) { + pr_err("SELinux: failed to load policy capabilities\n"); + return ret; + } + + return 0; +} + static ssize_t sel_write_load(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; ssize_t length; void *data = NULL; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); @@ -494,29 +558,15 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (copy_from_user(data, buf, count) != 0) goto out; - length = security_load_policy(&selinux_state, data, count); + length = security_load_policy(fsi->state, data, count); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); goto out; } - length = sel_make_bools(); - if (length) { - pr_err("SELinux: failed to load policy booleans\n"); + length = sel_make_policy_nodes(fsi); + if (length) goto out1; - } - - length = sel_make_classes(); - if (length) { - pr_err("SELinux: failed to load policy classes\n"); - goto out1; - } - - length = sel_make_policycap(); - if (length) { - pr_err("SELinux: failed to load policy capabilities\n"); - goto out1; - } length = count; @@ -526,7 +576,7 @@ out1: from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); vfree(data); return length; } @@ -538,6 +588,8 @@ static const struct file_operations sel_load_ops = { static ssize_t sel_write_context(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *canon = NULL; u32 sid, len; ssize_t length; @@ -547,12 +599,11 @@ static ssize_t sel_write_context(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_context_to_sid(&selinux_state, buf, size, - &sid, GFP_KERNEL); + length = security_context_to_sid(state, buf, size, &sid, GFP_KERNEL); if (length) goto out; - length = security_sid_to_context(&selinux_state, sid, &canon, &len); + length = security_sid_to_context(state, sid, &canon, &len); if (length) goto out; @@ -573,16 +624,18 @@ out: static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%u", selinux_state.checkreqprot); + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", fsi->state->checkreqprot); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; unsigned int new_value; @@ -608,7 +661,7 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, if (sscanf(page, "%u", &new_value) != 1) goto out; - selinux_state.checkreqprot = new_value ? 1 : 0; + fsi->state->checkreqprot = new_value ? 1 : 0; length = count; out: kfree(page); @@ -624,6 +677,8 @@ static ssize_t sel_write_validatetrans(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *oldcon = NULL, *newcon = NULL, *taskcon = NULL; char *req = NULL; u32 osid, nsid, tsid; @@ -668,23 +723,19 @@ static ssize_t sel_write_validatetrans(struct file *file, if (sscanf(req, "%s %s %hu %s", oldcon, newcon, &tclass, taskcon) != 4) goto out; - rc = security_context_str_to_sid(&selinux_state, oldcon, &osid, - GFP_KERNEL); + rc = security_context_str_to_sid(state, oldcon, &osid, GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(&selinux_state, newcon, &nsid, - GFP_KERNEL); + rc = security_context_str_to_sid(state, newcon, &nsid, GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(&selinux_state, taskcon, &tsid, - GFP_KERNEL); + rc = security_context_str_to_sid(state, taskcon, &tsid, GFP_KERNEL); if (rc) goto out; - rc = security_validate_transition_user(&selinux_state, osid, nsid, - tsid, tclass); + rc = security_validate_transition_user(state, osid, nsid, tsid, tclass); if (!rc) rc = count; out: @@ -754,6 +805,8 @@ static const struct file_operations transaction_ops = { static ssize_t sel_write_access(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid; u16 tclass; @@ -779,17 +832,15 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(&selinux_state, scon, &ssid, - GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(&selinux_state, tcon, &tsid, - GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - security_compute_av_user(&selinux_state, ssid, tsid, tclass, &avd); + security_compute_av_user(state, ssid, tsid, tclass, &avd); length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%x %x %x %x %u %x", @@ -804,6 +855,8 @@ out: static ssize_t sel_write_create(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; char *namebuf = NULL, *objname = NULL; u32 ssid, tsid, newsid; @@ -869,23 +922,20 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) objname = namebuf; } - length = security_context_str_to_sid(&selinux_state, scon, &ssid, - GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(&selinux_state, tcon, &tsid, - GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_transition_sid_user(&selinux_state, ssid, tsid, - tclass, objname, &newsid); + length = security_transition_sid_user(state, ssid, tsid, tclass, + objname, &newsid); if (length) goto out; - length = security_sid_to_context(&selinux_state, newsid, &newcon, - &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -908,6 +958,8 @@ out: static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid, newsid; u16 tclass; @@ -935,23 +987,19 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(&selinux_state, scon, &ssid, - GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(&selinux_state, tcon, &tsid, - GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_change_sid(&selinux_state, ssid, tsid, tclass, - &newsid); + length = security_change_sid(state, ssid, tsid, tclass, &newsid); if (length) goto out; - length = security_sid_to_context(&selinux_state, newsid, &newcon, - &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -970,6 +1018,8 @@ out: static ssize_t sel_write_user(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *con = NULL, *user = NULL, *ptr; u32 sid, *sids = NULL; ssize_t length; @@ -997,21 +1047,18 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s", con, user) != 2) goto out; - length = security_context_str_to_sid(&selinux_state, con, &sid, - GFP_KERNEL); + length = security_context_str_to_sid(state, con, &sid, GFP_KERNEL); if (length) goto out; - length = security_get_user_sids(&selinux_state, sid, user, &sids, - &nsids); + length = security_get_user_sids(state, sid, user, &sids, &nsids); if (length) goto out; length = sprintf(buf, "%u", nsids) + 1; ptr = buf + length; for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(&selinux_state, sids[i], - &newcon, &len); + rc = security_sid_to_context(state, sids[i], &newcon, &len); if (rc) { length = rc; goto out; @@ -1035,6 +1082,8 @@ out: static ssize_t sel_write_member(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid, newsid; u16 tclass; @@ -1062,23 +1111,19 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(&selinux_state, scon, &ssid, - GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(&selinux_state, tcon, &tsid, - GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_member_sid(&selinux_state, ssid, tsid, tclass, - &newsid); + length = security_member_sid(state, ssid, tsid, tclass, &newsid); if (length) goto out; - length = security_sid_to_context(&selinux_state, newsid, &newcon, - &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -1112,6 +1157,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) static ssize_t sel_read_bool(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; ssize_t ret; @@ -1119,10 +1165,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); ret = -EINVAL; - if (index >= bool_num || strcmp(name, bool_pending_names[index])) + if (index >= fsi->bool_num || strcmp(name, + fsi->bool_pending_names[index])) goto out; ret = -ENOMEM; @@ -1130,16 +1177,16 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, if (!page) goto out; - cur_enforcing = security_get_bool_value(&selinux_state, index); + cur_enforcing = security_get_bool_value(fsi->state, index); if (cur_enforcing < 0) { ret = cur_enforcing; goto out; } length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, - bool_pending_values[index]); + fsi->bool_pending_values[index]); ret = simple_read_from_buffer(buf, count, ppos, page, length); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); free_page((unsigned long)page); return ret; } @@ -1147,13 +1194,14 @@ out: static ssize_t sel_write_bool(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; int new_value; unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, @@ -1162,7 +1210,8 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, goto out; length = -EINVAL; - if (index >= bool_num || strcmp(name, bool_pending_names[index])) + if (index >= fsi->bool_num || strcmp(name, + fsi->bool_pending_names[index])) goto out; length = -ENOMEM; @@ -1188,11 +1237,11 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, if (new_value) new_value = 1; - bool_pending_values[index] = new_value; + fsi->bool_pending_values[index] = new_value; length = count; out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); kfree(page); return length; } @@ -1207,11 +1256,12 @@ static ssize_t sel_commit_bools_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; int new_value; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, @@ -1240,15 +1290,15 @@ static ssize_t sel_commit_bools_write(struct file *filep, goto out; length = 0; - if (new_value && bool_pending_values) - length = security_set_bools(&selinux_state, bool_num, - bool_pending_values); + if (new_value && fsi->bool_pending_values) + length = security_set_bools(fsi->state, fsi->bool_num, + fsi->bool_pending_values); if (!length) length = count; out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); kfree(page); return length; } @@ -1266,12 +1316,12 @@ static void sel_remove_entries(struct dentry *de) #define BOOL_DIR_NAME "booleans" -static int sel_make_bools(void) +static int sel_make_bools(struct selinux_fs_info *fsi) { int i, ret; ssize_t len; struct dentry *dentry = NULL; - struct dentry *dir = bool_dir; + struct dentry *dir = fsi->bool_dir; struct inode *inode = NULL; struct inode_security_struct *isec; char **names = NULL, *page; @@ -1280,13 +1330,13 @@ static int sel_make_bools(void) u32 sid; /* remove any existing files */ - for (i = 0; i < bool_num; i++) - kfree(bool_pending_names[i]); - kfree(bool_pending_names); - kfree(bool_pending_values); - bool_num = 0; - bool_pending_names = NULL; - bool_pending_values = NULL; + for (i = 0; i < fsi->bool_num; i++) + kfree(fsi->bool_pending_names[i]); + kfree(fsi->bool_pending_names); + kfree(fsi->bool_pending_values); + fsi->bool_num = 0; + fsi->bool_pending_names = NULL; + fsi->bool_pending_values = NULL; sel_remove_entries(dir); @@ -1295,7 +1345,7 @@ static int sel_make_bools(void) if (!page) goto out; - ret = security_get_bools(&selinux_state, &num, &names, &values); + ret = security_get_bools(fsi->state, &num, &names, &values); if (ret) goto out; @@ -1316,7 +1366,7 @@ static int sel_make_bools(void) goto out; isec = (struct inode_security_struct *)inode->i_security; - ret = security_genfs_sid(&selinux_state, "selinuxfs", page, + ret = security_genfs_sid(fsi->state, "selinuxfs", page, SECCLASS_FILE, &sid); if (ret) { pr_warn_ratelimited("SELinux: no sid found, defaulting to security isid for %s\n", @@ -1330,9 +1380,9 @@ static int sel_make_bools(void) inode->i_ino = i|SEL_BOOL_INO_OFFSET; d_add(dentry, inode); } - bool_num = num; - bool_pending_names = names; - bool_pending_values = values; + fsi->bool_num = num; + fsi->bool_pending_names = names; + fsi->bool_pending_values = values; free_page((unsigned long)page); return 0; @@ -1350,10 +1400,6 @@ out: return ret; } -#define NULL_FILE_NAME "null" - -struct path selinux_null; - static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { @@ -1503,6 +1549,8 @@ static const struct file_operations sel_avc_cache_stats_ops = { static int sel_make_avc_files(struct dentry *dir) { + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; int i; static const struct tree_descr files[] = { { "cache_threshold", @@ -1526,7 +1574,7 @@ static int sel_make_avc_files(struct dentry *dir) return -ENOMEM; inode->i_fop = files[i].ops; - inode->i_ino = ++sel_last_ino; + inode->i_ino = ++fsi->last_ino; d_add(dentry, inode); } @@ -1536,12 +1584,13 @@ static int sel_make_avc_files(struct dentry *dir) static ssize_t sel_read_initcon(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *con; u32 sid, len; ssize_t ret; sid = file_inode(file)->i_ino&SEL_INO_MASK; - ret = security_sid_to_context(&selinux_state, sid, &con, &len); + ret = security_sid_to_context(fsi->state, sid, &con, &len); if (ret) return ret; @@ -1629,13 +1678,13 @@ static const struct file_operations sel_perm_ops = { static ssize_t sel_read_policycap(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; int value; char tmpbuf[TMPBUFLEN]; ssize_t length; unsigned long i_ino = file_inode(file)->i_ino; - value = security_policycap_supported(&selinux_state, - i_ino & SEL_INO_MASK); + value = security_policycap_supported(fsi->state, i_ino & SEL_INO_MASK); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -1649,11 +1698,11 @@ static const struct file_operations sel_policycap_ops = { static int sel_make_perm_files(char *objclass, int classvalue, struct dentry *dir) { + struct selinux_fs_info *fsi = dir->d_sb->s_fs_info; int i, rc, nperms; char **perms; - rc = security_get_permissions(&selinux_state, objclass, &perms, - &nperms); + rc = security_get_permissions(fsi->state, objclass, &perms, &nperms); if (rc) return rc; @@ -1687,6 +1736,8 @@ out: static int sel_make_class_dir_entries(char *classname, int index, struct dentry *dir) { + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; struct dentry *dentry = NULL; struct inode *inode = NULL; int rc; @@ -1703,7 +1754,7 @@ static int sel_make_class_dir_entries(char *classname, int index, inode->i_ino = sel_class_to_ino(index); d_add(dentry, inode); - dentry = sel_make_dir(dir, "perms", &last_class_ino); + dentry = sel_make_dir(dir, "perms", &fsi->last_class_ino); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1712,26 +1763,27 @@ static int sel_make_class_dir_entries(char *classname, int index, return rc; } -static int sel_make_classes(void) +static int sel_make_classes(struct selinux_fs_info *fsi) { + int rc, nclasses, i; char **classes; /* delete any existing entries */ - sel_remove_entries(class_dir); + sel_remove_entries(fsi->class_dir); - rc = security_get_classes(&selinux_state, &classes, &nclasses); + rc = security_get_classes(fsi->state, &classes, &nclasses); if (rc) return rc; /* +2 since classes are 1-indexed */ - last_class_ino = sel_class_to_ino(nclasses + 2); + fsi->last_class_ino = sel_class_to_ino(nclasses + 2); for (i = 0; i < nclasses; i++) { struct dentry *class_name_dir; - class_name_dir = sel_make_dir(class_dir, classes[i], - &last_class_ino); + class_name_dir = sel_make_dir(fsi->class_dir, classes[i], + &fsi->last_class_ino); if (IS_ERR(class_name_dir)) { rc = PTR_ERR(class_name_dir); goto out; @@ -1751,25 +1803,25 @@ out: return rc; } -static int sel_make_policycap(void) +static int sel_make_policycap(struct selinux_fs_info *fsi) { unsigned int iter; struct dentry *dentry = NULL; struct inode *inode = NULL; - sel_remove_entries(policycap_dir); + sel_remove_entries(fsi->policycap_dir); for (iter = 0; iter <= POLICYDB_CAPABILITY_MAX; iter++) { if (iter < ARRAY_SIZE(selinux_policycap_names)) - dentry = d_alloc_name(policycap_dir, + dentry = d_alloc_name(fsi->policycap_dir, selinux_policycap_names[iter]); else - dentry = d_alloc_name(policycap_dir, "unknown"); + dentry = d_alloc_name(fsi->policycap_dir, "unknown"); if (dentry == NULL) return -ENOMEM; - inode = sel_make_inode(policycap_dir->d_sb, S_IFREG | S_IRUGO); + inode = sel_make_inode(fsi->sb, S_IFREG | 0444); if (inode == NULL) return -ENOMEM; @@ -1808,8 +1860,11 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, return dentry; } +#define NULL_FILE_NAME "null" + static int sel_fill_super(struct super_block *sb, void *data, int silent) { + struct selinux_fs_info *fsi; int ret; struct dentry *dentry; struct inode *inode; @@ -1837,14 +1892,20 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) S_IWUGO}, /* last one */ {""} }; + + ret = selinux_fs_info_create(sb); + if (ret) + goto err; + ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); if (ret) goto err; - bool_dir = sel_make_dir(sb->s_root, BOOL_DIR_NAME, &sel_last_ino); - if (IS_ERR(bool_dir)) { - ret = PTR_ERR(bool_dir); - bool_dir = NULL; + fsi = sb->s_fs_info; + fsi->bool_dir = sel_make_dir(sb->s_root, BOOL_DIR_NAME, &fsi->last_ino); + if (IS_ERR(fsi->bool_dir)) { + ret = PTR_ERR(fsi->bool_dir); + fsi->bool_dir = NULL; goto err; } @@ -1858,7 +1919,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) if (!inode) goto err; - inode->i_ino = ++sel_last_ino; + inode->i_ino = ++fsi->last_ino; isec = (struct inode_security_struct *)inode->i_security; isec->sid = SECINITSID_DEVNULL; isec->sclass = SECCLASS_CHR_FILE; @@ -1866,9 +1927,8 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_add(dentry, inode); - selinux_null.dentry = dentry; - dentry = sel_make_dir(sb->s_root, "avc", &sel_last_ino); + dentry = sel_make_dir(sb->s_root, "avc", &fsi->last_ino); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err; @@ -1878,7 +1938,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) if (ret) goto err; - dentry = sel_make_dir(sb->s_root, "initial_contexts", &sel_last_ino); + dentry = sel_make_dir(sb->s_root, "initial_contexts", &fsi->last_ino); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err; @@ -1888,23 +1948,31 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) if (ret) goto err; - class_dir = sel_make_dir(sb->s_root, "class", &sel_last_ino); - if (IS_ERR(class_dir)) { - ret = PTR_ERR(class_dir); - class_dir = NULL; + fsi->class_dir = sel_make_dir(sb->s_root, "class", &fsi->last_ino); + if (IS_ERR(fsi->class_dir)) { + ret = PTR_ERR(fsi->class_dir); + fsi->class_dir = NULL; goto err; } - policycap_dir = sel_make_dir(sb->s_root, "policy_capabilities", &sel_last_ino); - if (IS_ERR(policycap_dir)) { - ret = PTR_ERR(policycap_dir); - policycap_dir = NULL; + fsi->policycap_dir = sel_make_dir(sb->s_root, "policy_capabilities", + &fsi->last_ino); + if (IS_ERR(fsi->policycap_dir)) { + ret = PTR_ERR(fsi->policycap_dir); + fsi->policycap_dir = NULL; goto err; } + + ret = sel_make_policy_nodes(fsi); + if (ret) + goto err; return 0; err: printk(KERN_ERR "SELinux: %s: failed while creating inodes\n", __func__); + + selinux_fs_info_free(sb); + return ret; } @@ -1914,16 +1982,25 @@ static struct dentry *sel_mount(struct file_system_type *fs_type, return mount_single(fs_type, flags, data, sel_fill_super); } +static void sel_kill_sb(struct super_block *sb) +{ + selinux_fs_info_free(sb); + kill_litter_super(sb); +} + static struct file_system_type sel_fs_type = { .name = "selinuxfs", .mount = sel_mount, - .kill_sb = kill_litter_super, + .kill_sb = sel_kill_sb, }; struct vfsmount *selinuxfs_mount; +struct path selinux_null; static int __init init_sel_fs(void) { + struct qstr null_name = QSTR_INIT(NULL_FILE_NAME, + sizeof(NULL_FILE_NAME)-1); int err; if (!selinux_enabled) @@ -1945,6 +2022,13 @@ static int __init init_sel_fs(void) err = PTR_ERR(selinuxfs_mount); selinuxfs_mount = NULL; } + selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root, + &null_name); + if (IS_ERR(selinux_null.dentry)) { + pr_err("selinuxfs: could not lookup null!\n"); + err = PTR_ERR(selinux_null.dentry); + selinux_null.dentry = NULL; + } return err; } From 5d0939e1f59043043f1b032e09459cdda8bfd297 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 5 Mar 2018 11:47:56 -0500 Subject: [PATCH 2041/3715] BACKPORT: selinux: wrap AVC state Wrap the AVC state within the selinux_state structure and pass it explicitly to all AVC functions. The AVC private state is encapsulated in a selinux_avc structure that is referenced from the selinux_state. This change should have no effect on SELinux behavior or APIs (userspace or LSM). Signed-off-by: Stephen Smalley Reviewed-by: James Morris Signed-off-by: Paul Moore (cherry picked from commit 6b6bc6205d98796361962ee282a063f18ba8dc57) Resolved conflicts around non-backported sctp feature. Change-Id: I6412982af61be2d76e747a942d011be2f0bb2528 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/avc.c | 284 +++++++++++--------- security/selinux/hooks.c | 395 ++++++++++++++++++---------- security/selinux/include/avc.h | 32 ++- security/selinux/include/avc_ss.h | 3 +- security/selinux/include/security.h | 3 + security/selinux/netlabel.c | 3 +- security/selinux/selinuxfs.c | 60 +++-- security/selinux/ss/services.c | 9 +- security/selinux/xfrm.c | 17 +- 9 files changed, 510 insertions(+), 296 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 54b09cc03b55..f3aedf077509 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -82,14 +82,42 @@ struct avc_callback_node { struct avc_callback_node *next; }; -/* Exported via selinufs */ -unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; - #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; #endif -static struct avc_cache avc_cache; +struct selinux_avc { + unsigned int avc_cache_threshold; + struct avc_cache avc_cache; +}; + +static struct selinux_avc selinux_avc; + +void selinux_avc_init(struct selinux_avc **avc) +{ + int i; + + selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; + for (i = 0; i < AVC_CACHE_SLOTS; i++) { + INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]); + spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]); + } + atomic_set(&selinux_avc.avc_cache.active_nodes, 0); + atomic_set(&selinux_avc.avc_cache.lru_hint, 0); + *avc = &selinux_avc; +} + +unsigned int avc_get_cache_threshold(struct selinux_avc *avc) +{ + return avc->avc_cache_threshold; +} + +void avc_set_cache_threshold(struct selinux_avc *avc, + unsigned int cache_threshold) +{ + avc->avc_cache_threshold = cache_threshold; +} + static struct avc_callback_node *avc_callbacks; static struct kmem_cache *avc_node_cachep; static struct kmem_cache *avc_xperms_data_cachep; @@ -143,14 +171,14 @@ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) * @tsid: target security identifier * @tclass: target security class */ -static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tclass) +static void avc_dump_query(struct audit_buffer *ab, struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass) { int rc; char *scontext; u32 scontext_len; - rc = security_sid_to_context(&selinux_state, ssid, - &scontext, &scontext_len); + rc = security_sid_to_context(state, ssid, &scontext, &scontext_len); if (rc) audit_log_format(ab, "ssid=%d", ssid); else { @@ -158,8 +186,7 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla kfree(scontext); } - rc = security_sid_to_context(&selinux_state, tsid, - &scontext, &scontext_len); + rc = security_sid_to_context(state, tsid, &scontext, &scontext_len); if (rc) audit_log_format(ab, " tsid=%d", tsid); else { @@ -178,15 +205,6 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla */ void __init avc_init(void) { - int i; - - for (i = 0; i < AVC_CACHE_SLOTS; i++) { - INIT_HLIST_HEAD(&avc_cache.slots[i]); - spin_lock_init(&avc_cache.slots_lock[i]); - } - atomic_set(&avc_cache.active_nodes, 0); - atomic_set(&avc_cache.lru_hint, 0); - avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node), 0, SLAB_PANIC, NULL); avc_xperms_cachep = kmem_cache_create("avc_xperms_node", @@ -201,7 +219,7 @@ void __init avc_init(void) 0, SLAB_PANIC, NULL); } -int avc_get_hash_stats(char *page) +int avc_get_hash_stats(struct selinux_avc *avc, char *page) { int i, chain_len, max_chain_len, slots_used; struct avc_node *node; @@ -212,7 +230,7 @@ int avc_get_hash_stats(char *page) slots_used = 0; max_chain_len = 0; for (i = 0; i < AVC_CACHE_SLOTS; i++) { - head = &avc_cache.slots[i]; + head = &avc->avc_cache.slots[i]; if (!hlist_empty(head)) { slots_used++; chain_len = 0; @@ -227,7 +245,7 @@ int avc_get_hash_stats(char *page) return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" "longest chain: %d\n", - atomic_read(&avc_cache.active_nodes), + atomic_read(&avc->avc_cache.active_nodes), slots_used, AVC_CACHE_SLOTS, max_chain_len); } @@ -464,11 +482,12 @@ static inline u32 avc_xperms_audit_required(u32 requested, return audited; } -static inline int avc_xperms_audit(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct av_decision *avd, - struct extended_perms_decision *xpd, - u8 perm, int result, - struct common_audit_data *ad) +static inline int avc_xperms_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u32 requested, struct av_decision *avd, + struct extended_perms_decision *xpd, + u8 perm, int result, + struct common_audit_data *ad) { u32 audited, denied; @@ -476,7 +495,7 @@ static inline int avc_xperms_audit(u32 ssid, u32 tsid, u16 tclass, requested, avd, xpd, perm, result, &denied); if (likely(!audited)) return 0; - return slow_avc_audit(ssid, tsid, tclass, requested, + return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, ad, 0); } @@ -488,29 +507,30 @@ static void avc_node_free(struct rcu_head *rhead) avc_cache_stats_incr(frees); } -static void avc_node_delete(struct avc_node *node) +static void avc_node_delete(struct selinux_avc *avc, struct avc_node *node) { hlist_del_rcu(&node->list); call_rcu(&node->rhead, avc_node_free); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static void avc_node_kill(struct avc_node *node) +static void avc_node_kill(struct selinux_avc *avc, struct avc_node *node) { avc_xperms_free(node->ae.xp_node); kmem_cache_free(avc_node_cachep, node); avc_cache_stats_incr(frees); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static void avc_node_replace(struct avc_node *new, struct avc_node *old) +static void avc_node_replace(struct selinux_avc *avc, + struct avc_node *new, struct avc_node *old) { hlist_replace_rcu(&old->list, &new->list); call_rcu(&old->rhead, avc_node_free); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static inline int avc_reclaim_node(void) +static inline int avc_reclaim_node(struct selinux_avc *avc) { struct avc_node *node; int hvalue, try, ecx; @@ -519,16 +539,17 @@ static inline int avc_reclaim_node(void) spinlock_t *lock; for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { - hvalue = atomic_inc_return(&avc_cache.lru_hint) & (AVC_CACHE_SLOTS - 1); - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + hvalue = atomic_inc_return(&avc->avc_cache.lru_hint) & + (AVC_CACHE_SLOTS - 1); + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; if (!spin_trylock_irqsave(lock, flags)) continue; rcu_read_lock(); hlist_for_each_entry(node, head, list) { - avc_node_delete(node); + avc_node_delete(avc, node); avc_cache_stats_incr(reclaims); ecx++; if (ecx >= AVC_CACHE_RECLAIM) { @@ -544,7 +565,7 @@ out: return ecx; } -static struct avc_node *avc_alloc_node(void) +static struct avc_node *avc_alloc_node(struct selinux_avc *avc) { struct avc_node *node; @@ -555,8 +576,9 @@ static struct avc_node *avc_alloc_node(void) INIT_HLIST_NODE(&node->list); avc_cache_stats_incr(allocations); - if (atomic_inc_return(&avc_cache.active_nodes) > avc_cache_threshold) - avc_reclaim_node(); + if (atomic_inc_return(&avc->avc_cache.active_nodes) > + avc->avc_cache_threshold) + avc_reclaim_node(avc); out: return node; @@ -570,14 +592,15 @@ static void avc_node_populate(struct avc_node *node, u32 ssid, u32 tsid, u16 tcl memcpy(&node->ae.avd, avd, sizeof(node->ae.avd)); } -static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) +static inline struct avc_node *avc_search_node(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node, *ret = NULL; int hvalue; struct hlist_head *head; hvalue = avc_hash(ssid, tsid, tclass); - head = &avc_cache.slots[hvalue]; + head = &avc->avc_cache.slots[hvalue]; hlist_for_each_entry_rcu(node, head, list) { if (ssid == node->ae.ssid && tclass == node->ae.tclass && @@ -602,12 +625,13 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) * then this function returns the avc_node. * Otherwise, this function returns NULL. */ -static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) +static struct avc_node *avc_lookup(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node; avc_cache_stats_incr(lookups); - node = avc_search_node(ssid, tsid, tclass); + node = avc_search_node(avc, ssid, tsid, tclass); if (node) return node; @@ -616,7 +640,8 @@ static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) return NULL; } -static int avc_latest_notif_update(int seqno, int is_insert) +static int avc_latest_notif_update(struct selinux_avc *avc, + int seqno, int is_insert) { int ret = 0; static DEFINE_SPINLOCK(notif_lock); @@ -624,14 +649,14 @@ static int avc_latest_notif_update(int seqno, int is_insert) spin_lock_irqsave(¬if_lock, flag); if (is_insert) { - if (seqno < avc_cache.latest_notif) { + if (seqno < avc->avc_cache.latest_notif) { printk(KERN_WARNING "SELinux: avc: seqno %d < latest_notif %d\n", - seqno, avc_cache.latest_notif); + seqno, avc->avc_cache.latest_notif); ret = -EAGAIN; } } else { - if (seqno > avc_cache.latest_notif) - avc_cache.latest_notif = seqno; + if (seqno > avc->avc_cache.latest_notif) + avc->avc_cache.latest_notif = seqno; } spin_unlock_irqrestore(¬if_lock, flag); @@ -656,18 +681,19 @@ static int avc_latest_notif_update(int seqno, int is_insert) * the access vectors into a cache entry, returns * avc_node inserted. Otherwise, this function returns NULL. */ -static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, - struct av_decision *avd, - struct avc_xperms_node *xp_node) +static struct avc_node *avc_insert(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass, + struct av_decision *avd, + struct avc_xperms_node *xp_node) { struct avc_node *pos, *node = NULL; int hvalue; unsigned long flag; - if (avc_latest_notif_update(avd->seqno, 1)) + if (avc_latest_notif_update(avc, avd->seqno, 1)) goto out; - node = avc_alloc_node(); + node = avc_alloc_node(avc); if (node) { struct hlist_head *head; spinlock_t *lock; @@ -680,15 +706,15 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, kmem_cache_free(avc_node_cachep, node); return NULL; } - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); hlist_for_each_entry(pos, head, list) { if (pos->ae.ssid == ssid && pos->ae.tsid == tsid && pos->ae.tclass == tclass) { - avc_node_replace(node, pos); + avc_node_replace(avc, node, pos); goto found; } } @@ -726,9 +752,10 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; audit_log_format(ab, " "); - avc_dump_query(ab, ad->selinux_audit_data->ssid, - ad->selinux_audit_data->tsid, - ad->selinux_audit_data->tclass); + avc_dump_query(ab, ad->selinux_audit_data->state, + ad->selinux_audit_data->ssid, + ad->selinux_audit_data->tsid, + ad->selinux_audit_data->tclass); if (ad->selinux_audit_data->denied) { audit_log_format(ab, " permissive=%u", ad->selinux_audit_data->result ? 0 : 1); @@ -736,10 +763,11 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) } /* This is the slow part of avc audit with big stack footprint */ -noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, - u32 requested, u32 audited, u32 denied, int result, - struct common_audit_data *a, - unsigned flags) +noinline int slow_avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u32 requested, u32 audited, u32 denied, int result, + struct common_audit_data *a, + unsigned int flags) { struct common_audit_data stack_data; struct selinux_audit_data sad; @@ -767,6 +795,7 @@ noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, sad.audited = audited; sad.denied = denied; sad.result = result; + sad.state = state; a->selinux_audit_data = &sad; @@ -815,10 +844,11 @@ out: * otherwise, this function updates the AVC entry. The original AVC-entry object * will release later by RCU. */ -static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, - u32 tsid, u16 tclass, u32 seqno, - struct extended_perms_decision *xpd, - u32 flags) +static int avc_update_node(struct selinux_avc *avc, + u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, + u32 tsid, u16 tclass, u32 seqno, + struct extended_perms_decision *xpd, + u32 flags) { int hvalue, rc = 0; unsigned long flag; @@ -826,7 +856,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, struct hlist_head *head; spinlock_t *lock; - node = avc_alloc_node(); + node = avc_alloc_node(avc); if (!node) { rc = -ENOMEM; goto out; @@ -835,8 +865,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, /* Lock the target slot */ hvalue = avc_hash(ssid, tsid, tclass); - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); @@ -852,7 +882,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, if (!orig) { rc = -ENOENT; - avc_node_kill(node); + avc_node_kill(avc, node); goto out_unlock; } @@ -896,7 +926,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, avc_add_xperms_decision(node, xpd); break; } - avc_node_replace(node, orig); + avc_node_replace(avc, node, orig); out_unlock: spin_unlock_irqrestore(lock, flag); out: @@ -906,7 +936,7 @@ out: /** * avc_flush - Flush the cache */ -static void avc_flush(void) +static void avc_flush(struct selinux_avc *avc) { struct hlist_head *head; struct avc_node *node; @@ -915,8 +945,8 @@ static void avc_flush(void) int i; for (i = 0; i < AVC_CACHE_SLOTS; i++) { - head = &avc_cache.slots[i]; - lock = &avc_cache.slots_lock[i]; + head = &avc->avc_cache.slots[i]; + lock = &avc->avc_cache.slots_lock[i]; spin_lock_irqsave(lock, flag); /* @@ -925,7 +955,7 @@ static void avc_flush(void) */ rcu_read_lock(); hlist_for_each_entry(node, head, list) - avc_node_delete(node); + avc_node_delete(avc, node); rcu_read_unlock(); spin_unlock_irqrestore(lock, flag); } @@ -935,12 +965,12 @@ static void avc_flush(void) * avc_ss_reset - Flush the cache and revalidate migrated permissions. * @seqno: policy sequence number */ -int avc_ss_reset(u32 seqno) +int avc_ss_reset(struct selinux_avc *avc, u32 seqno) { struct avc_callback_node *c; int rc = 0, tmprc; - avc_flush(); + avc_flush(avc); for (c = avc_callbacks; c; c = c->next) { if (c->events & AVC_CALLBACK_RESET) { @@ -952,7 +982,7 @@ int avc_ss_reset(u32 seqno) } } - avc_latest_notif_update(seqno, 0); + avc_latest_notif_update(avc, seqno, 0); return rc; } @@ -965,32 +995,34 @@ int avc_ss_reset(u32 seqno) * Don't inline this, since it's the slow-path and just * results in a bigger stack frame. */ -static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid, - u16 tclass, struct av_decision *avd, - struct avc_xperms_node *xp_node) +static noinline +struct avc_node *avc_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, struct av_decision *avd, + struct avc_xperms_node *xp_node) { rcu_read_unlock(); INIT_LIST_HEAD(&xp_node->xpd_head); - security_compute_av(&selinux_state, ssid, tsid, tclass, - avd, &xp_node->xp); + security_compute_av(state, ssid, tsid, tclass, avd, &xp_node->xp); rcu_read_lock(); - return avc_insert(ssid, tsid, tclass, avd, xp_node); + return avc_insert(state->avc, ssid, tsid, tclass, avd, xp_node); } -static noinline int avc_denied(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - u8 driver, u8 xperm, unsigned flags, - struct av_decision *avd) +static noinline int avc_denied(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, u32 requested, + u8 driver, u8 xperm, unsigned int flags, + struct av_decision *avd) { if (flags & AVC_STRICT) return -EACCES; - if (enforcing_enabled(&selinux_state) && + if (enforcing_enabled(state) && !(avd->flags & AVD_FLAGS_PERMISSIVE)) return -EACCES; - avc_update_node(AVC_CALLBACK_GRANT, requested, driver, xperm, ssid, - tsid, tclass, avd->seqno, NULL, flags); + avc_update_node(state->avc, AVC_CALLBACK_GRANT, requested, driver, + xperm, ssid, tsid, tclass, avd->seqno, NULL, flags); return 0; } @@ -1001,8 +1033,9 @@ static noinline int avc_denied(u32 ssid, u32 tsid, * as-is the case with ioctls, then multiple may be chained together and the * driver field is used to specify which set contains the permission. */ -int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, - u8 driver, u8 xperm, struct common_audit_data *ad) +int avc_has_extended_perms(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + u8 driver, u8 xperm, struct common_audit_data *ad) { struct avc_node *node; struct av_decision avd; @@ -1021,9 +1054,9 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, rcu_read_lock(); - node = avc_lookup(ssid, tsid, tclass); + node = avc_lookup(state->avc, ssid, tsid, tclass); if (unlikely(!node)) { - node = avc_compute_av(ssid, tsid, tclass, &avd, xp_node); + node = avc_compute_av(state, ssid, tsid, tclass, &avd, xp_node); } else { memcpy(&avd, &node->ae.avd, sizeof(avd)); xp_node = node->ae.xp_node; @@ -1047,11 +1080,12 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, goto decision; } rcu_read_unlock(); - security_compute_xperms_decision(&selinux_state, ssid, tsid, - tclass, driver, &local_xpd); + security_compute_xperms_decision(state, ssid, tsid, tclass, + driver, &local_xpd); rcu_read_lock(); - avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested, driver, xperm, - ssid, tsid, tclass, avd.seqno, &local_xpd, 0); + avc_update_node(state->avc, AVC_CALLBACK_ADD_XPERMS, requested, + driver, xperm, ssid, tsid, tclass, avd.seqno, + &local_xpd, 0); } else { avc_quick_copy_xperms_decision(xperm, &local_xpd, xpd); } @@ -1063,12 +1097,12 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, decision: denied = requested & ~(avd.allowed); if (unlikely(denied)) - rc = avc_denied(ssid, tsid, tclass, requested, driver, xperm, - AVC_EXTENDED_PERMS, &avd); + rc = avc_denied(state, ssid, tsid, tclass, requested, + driver, xperm, AVC_EXTENDED_PERMS, &avd); rcu_read_unlock(); - rc2 = avc_xperms_audit(ssid, tsid, tclass, requested, + rc2 = avc_xperms_audit(state, ssid, tsid, tclass, requested, &avd, xpd, xperm, rc, ad); if (rc2) return rc2; @@ -1095,10 +1129,11 @@ decision: * auditing, e.g. in cases where a lock must be held for the check but * should be released for the auditing. */ -inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - unsigned flags, - struct av_decision *avd) +inline int avc_has_perm_noaudit(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, u32 requested, + unsigned int flags, + struct av_decision *avd) { struct avc_node *node; struct avc_xperms_node xp_node; @@ -1109,15 +1144,16 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, rcu_read_lock(); - node = avc_lookup(ssid, tsid, tclass); + node = avc_lookup(state->avc, ssid, tsid, tclass); if (unlikely(!node)) - node = avc_compute_av(ssid, tsid, tclass, avd, &xp_node); + node = avc_compute_av(state, ssid, tsid, tclass, avd, &xp_node); else memcpy(avd, &node->ae.avd, sizeof(*avd)); denied = requested & ~(avd->allowed); if (unlikely(denied)) - rc = avc_denied(ssid, tsid, tclass, requested, 0, 0, flags, avd); + rc = avc_denied(state, ssid, tsid, tclass, requested, 0, 0, + flags, avd); rcu_read_unlock(); return rc; @@ -1139,39 +1175,43 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ -int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, +int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata) { struct av_decision avd; int rc, rc2; - rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0, + &avd); - rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0); + rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, + auditdata, 0); if (rc2) return rc2; return rc; } -int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct common_audit_data *auditdata, +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + struct common_audit_data *auditdata, int flags) { struct av_decision avd; int rc, rc2; - rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0, + &avd); - rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, + rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, auditdata, flags); if (rc2) return rc2; return rc; } -u32 avc_policy_seqno(void) +u32 avc_policy_seqno(struct selinux_state *state) { - return avc_cache.latest_notif; + return state->avc->avc_cache.latest_notif; } void avc_disable(void) @@ -1188,7 +1228,7 @@ void avc_disable(void) * the cache and get that memory back. */ if (avc_node_cachep) { - avc_flush(); + avc_flush(selinux_state.avc); /* kmem_cache_destroy(avc_node_cachep); */ } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6aa72501e537..38eb6cc370f7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -466,12 +466,14 @@ static int may_context_mount_sb_relabel(u32 sid, const struct task_security_struct *tsec = cred->security; int rc; - rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, NULL); if (rc) return rc; - rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELTO, NULL); return rc; } @@ -482,12 +484,14 @@ static int may_context_mount_inode_relabel(u32 sid, { const struct task_security_struct *tsec = cred->security; int rc; - rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, NULL); if (rc) return rc; - rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, NULL); return rc; } @@ -1797,9 +1801,11 @@ static int cred_has_capability(const struct cred *cred, return -EINVAL; } - rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); + rc = avc_has_perm_noaudit(&selinux_state, + sid, sid, sclass, av, 0, &avd); if (audit == SECURITY_CAP_AUDIT) { - int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0); + int rc2 = avc_audit(&selinux_state, + sid, sid, sclass, av, &avd, rc, &ad, 0); if (rc2) return rc2; } @@ -1825,7 +1831,8 @@ static int inode_has_perm(const struct cred *cred, sid = cred_sid(cred); isec = inode->i_security; - return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, perms, adp); } /* Same as inode_has_perm, but pass explicit audit data containing @@ -1898,7 +1905,8 @@ static int file_has_perm(const struct cred *cred, ad.u.file = file; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -1968,7 +1976,8 @@ static int may_create(struct inode *dir, ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; - rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, + rc = avc_has_perm(&selinux_state, + sid, dsec->sid, SECCLASS_DIR, DIR__ADD_NAME | DIR__SEARCH, &ad); if (rc) @@ -1979,11 +1988,13 @@ static int may_create(struct inode *dir, if (rc) return rc; - rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad); + rc = avc_has_perm(&selinux_state, + sid, newsid, tclass, FILE__CREATE, &ad); if (rc) return rc; - return avc_has_perm(newsid, sbsec->sid, + return avc_has_perm(&selinux_state, + newsid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, &ad); } @@ -2012,7 +2023,8 @@ static int may_link(struct inode *dir, av = DIR__SEARCH; av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME); - rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, dsec->sid, SECCLASS_DIR, av, &ad); if (rc) return rc; @@ -2032,7 +2044,8 @@ static int may_link(struct inode *dir, return 0; } - rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, av, &ad); return rc; } @@ -2056,16 +2069,19 @@ static inline int may_rename(struct inode *old_dir, ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = old_dentry; - rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, + rc = avc_has_perm(&selinux_state, + sid, old_dsec->sid, SECCLASS_DIR, DIR__REMOVE_NAME | DIR__SEARCH, &ad); if (rc) return rc; - rc = avc_has_perm(sid, old_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, old_isec->sid, old_isec->sclass, FILE__RENAME, &ad); if (rc) return rc; if (old_is_dir && new_dir != old_dir) { - rc = avc_has_perm(sid, old_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, old_isec->sid, old_isec->sclass, DIR__REPARENT, &ad); if (rc) return rc; @@ -2075,13 +2091,15 @@ static inline int may_rename(struct inode *old_dir, av = DIR__ADD_NAME | DIR__SEARCH; if (d_is_positive(new_dentry)) av |= DIR__REMOVE_NAME; - rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, new_dsec->sid, SECCLASS_DIR, av, &ad); if (rc) return rc; if (d_is_positive(new_dentry)) { new_isec = backing_inode_security(new_dentry); new_is_dir = d_is_dir(new_dentry); - rc = avc_has_perm(sid, new_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, new_isec->sid, new_isec->sclass, (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad); if (rc) @@ -2101,7 +2119,8 @@ static int superblock_has_perm(const struct cred *cred, u32 sid = cred_sid(cred); sbsec = sb->s_security; - return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); + return avc_has_perm(&selinux_state, + sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); } /* Convert a Linux mode and permission mask to an access vector. */ @@ -2178,7 +2197,8 @@ static int selinux_binder_set_context_mgr(struct task_struct *mgr) u32 mysid = current_sid(); u32 mgrsid = task_sid(mgr); - return avc_has_perm(mysid, mgrsid, SECCLASS_BINDER, + return avc_has_perm(&selinux_state, + mysid, mgrsid, SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); } @@ -2191,13 +2211,15 @@ static int selinux_binder_transaction(struct task_struct *from, int rc; if (mysid != fromsid) { - rc = avc_has_perm(mysid, fromsid, SECCLASS_BINDER, + rc = avc_has_perm(&selinux_state, + mysid, fromsid, SECCLASS_BINDER, BINDER__IMPERSONATE, NULL); if (rc) return rc; } - return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, + return avc_has_perm(&selinux_state, + fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); } @@ -2207,7 +2229,8 @@ static int selinux_binder_transfer_binder(struct task_struct *from, u32 fromsid = task_sid(from); u32 tosid = task_sid(to); - return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, + return avc_has_perm(&selinux_state, + fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); } @@ -2226,7 +2249,8 @@ static int selinux_binder_transfer_file(struct task_struct *from, ad.u.path = file->f_path; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -2244,7 +2268,8 @@ static int selinux_binder_transfer_file(struct task_struct *from, return 0; isec = backing_inode_security(dentry); - return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file), + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, file_to_av(file), &ad); } @@ -2255,21 +2280,25 @@ static int selinux_ptrace_access_check(struct task_struct *child, u32 csid = task_sid(child); if (mode & PTRACE_MODE_READ) - return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); + return avc_has_perm(&selinux_state, + sid, csid, SECCLASS_FILE, FILE__READ, NULL); - return avc_has_perm(sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); + return avc_has_perm(&selinux_state, + sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); } static int selinux_ptrace_traceme(struct task_struct *parent) { - return avc_has_perm(task_sid(parent), current_sid(), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + task_sid(parent), current_sid(), SECCLASS_PROCESS, PROCESS__PTRACE, NULL); } static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - return avc_has_perm(current_sid(), task_sid(target), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(target), SECCLASS_PROCESS, PROCESS__GETCAP, NULL); } @@ -2278,7 +2307,8 @@ static int selinux_capset(struct cred *new, const struct cred *old, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - return avc_has_perm(cred_sid(old), cred_sid(new), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + cred_sid(old), cred_sid(new), SECCLASS_PROCESS, PROCESS__SETCAP, NULL); } @@ -2338,18 +2368,21 @@ static int selinux_syslog(int type) switch (type) { case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, NULL); case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ /* Set level of messages printed to console */ case SYSLOG_ACTION_CONSOLE_LEVEL: - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, NULL); } /* All other syslog types */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, NULL); } @@ -2416,7 +2449,8 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, av |= PROCESS2__NNP_TRANSITION; if (nosuid) av |= PROCESS2__NOSUID_TRANSITION; - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS2, av, NULL); if (!rc) return 0; @@ -2499,25 +2533,29 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) ad.u.file = bprm->file; if (new_tsec->sid == old_tsec->sid) { - rc = avc_has_perm(old_tsec->sid, isec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, isec->sid, SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); if (rc) return rc; } else { /* Check permissions for the transition. */ - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); if (rc) return rc; - rc = avc_has_perm(new_tsec->sid, isec->sid, + rc = avc_has_perm(&selinux_state, + new_tsec->sid, isec->sid, SECCLASS_FILE, FILE__ENTRYPOINT, &ad); if (rc) return rc; /* Check for shared state */ if (bprm->unsafe & LSM_UNSAFE_SHARE) { - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__SHARE, NULL); if (rc) @@ -2529,7 +2567,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) if (bprm->unsafe & LSM_UNSAFE_PTRACE) { u32 ptsid = ptrace_parent_sid(); if (ptsid != 0) { - rc = avc_has_perm(ptsid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + ptsid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); if (rc) @@ -2543,7 +2582,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) /* Enable secure mode for SIDs transitions unless the noatsecure permission is granted between the two SIDs, i.e. ahp returns 0. */ - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__NOATSECURE, NULL); bprm->secureexec |= !!rc; @@ -2635,7 +2675,8 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm) * higher than the default soft limit for cases where the default is * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. */ - rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__RLIMITINH, NULL); if (rc) { /* protect against do_prlimit() */ @@ -2675,7 +2716,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) * This must occur _after_ the task SID has been updated so that any * kill done after the flush will be checked against the new SID. */ - rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); + rc = avc_has_perm(&selinux_state, + osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); if (rc) { if (IS_ENABLED(CONFIG_POSIX_TIMERS)) { memset(&itimer, 0, sizeof itimer); @@ -3105,7 +3147,8 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, if (IS_ERR(isec)) return PTR_ERR(isec); - return avc_has_perm_flags(sid, isec->sid, isec->sclass, FILE__READ, &ad, + return avc_has_perm_flags(&selinux_state, + sid, isec->sid, isec->sclass, FILE__READ, &ad, rcu ? MAY_NOT_BLOCK : 0); } @@ -3121,7 +3164,8 @@ static noinline int audit_inode_permission(struct inode *inode, ad.type = LSM_AUDIT_DATA_INODE; ad.u.inode = inode; - rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms, + rc = slow_avc_audit(&selinux_state, + current_sid(), isec->sid, isec->sclass, perms, audited, denied, result, &ad, flags); if (rc) return rc; @@ -3159,7 +3203,8 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (IS_ERR(isec)) return PTR_ERR(isec); - rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); + rc = avc_has_perm_noaudit(&selinux_state, + sid, isec->sid, isec->sclass, perms, 0, &avd); audited = avc_audit_required(perms, &avd, rc, from_access ? FILE__AUDIT_ACCESS : 0, &denied); @@ -3262,7 +3307,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, ad.u.dentry = dentry; isec = backing_inode_security(dentry); - rc = avc_has_perm(sid, isec->sid, isec->sclass, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, FILE__RELABELFROM, &ad); if (rc) return rc; @@ -3300,7 +3346,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - rc = avc_has_perm(sid, newsid, isec->sclass, + rc = avc_has_perm(&selinux_state, + sid, newsid, isec->sclass, FILE__RELABELTO, &ad); if (rc) return rc; @@ -3310,7 +3357,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - return avc_has_perm(newsid, + return avc_has_perm(&selinux_state, + newsid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, @@ -3525,7 +3573,7 @@ static int selinux_file_permission(struct file *file, int mask) isec = inode_security(inode); if (sid == fsec->sid && fsec->isid == isec->sid && - fsec->pseqno == avc_policy_seqno()) + fsec->pseqno == avc_policy_seqno(&selinux_state)) /* No change since file_open check. */ return 0; @@ -3565,7 +3613,8 @@ static int ioctl_has_perm(const struct cred *cred, struct file *file, ad.u.op->path = file->f_path; if (ssid != fsec->sid) { - rc = avc_has_perm(ssid, fsec->sid, + rc = avc_has_perm(&selinux_state, + ssid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -3577,8 +3626,9 @@ static int ioctl_has_perm(const struct cred *cred, struct file *file, return 0; isec = inode_security(inode); - rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass, - requested, driver, xperm, &ad); + rc = avc_has_extended_perms(&selinux_state, + ssid, isec->sid, isec->sclass, + requested, driver, xperm, &ad); out: return rc; } @@ -3646,7 +3696,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared * private file mapping that will also be writable. * This has an additional check. */ - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM, NULL); if (rc) goto error; @@ -3676,7 +3727,8 @@ static int selinux_mmap_addr(unsigned long addr) if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { u32 sid = current_sid(); - rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, NULL); } @@ -3720,13 +3772,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, int rc = 0; if (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk) { - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && ((vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) || vma_is_stack_for_current(vma))) { - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECSTACK, NULL); } else if (vma->vm_file && vma->anon_vma) { /* @@ -3818,7 +3872,8 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, else perm = signal_to_av(signum); - return avc_has_perm(fsec->fown_sid, sid, + return avc_has_perm(&selinux_state, + fsec->fown_sid, sid, SECCLASS_PROCESS, perm, NULL); } @@ -3844,7 +3899,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred) * struct as its SID. */ fsec->isid = isec->sid; - fsec->pseqno = avc_policy_seqno(); + fsec->pseqno = avc_policy_seqno(&selinux_state); /* * Since the inode label or policy seqno may have changed * between the selinux_inode_permission check and the saving @@ -3863,7 +3918,8 @@ static int selinux_task_alloc(struct task_struct *task, { u32 sid = current_sid(); - return avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); } /* @@ -3937,7 +3993,8 @@ static int selinux_kernel_act_as(struct cred *new, u32 secid) u32 sid = current_sid(); int ret; - ret = avc_has_perm(sid, secid, + ret = avc_has_perm(&selinux_state, + sid, secid, SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__USE_AS_OVERRIDE, NULL); @@ -3961,7 +4018,8 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) u32 sid = current_sid(); int ret; - ret = avc_has_perm(sid, isec->sid, + ret = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__CREATE_FILES_AS, NULL); @@ -3978,7 +4036,8 @@ static int selinux_kernel_module_request(char *kmod_name) ad.type = LSM_AUDIT_DATA_KMOD; ad.u.kmod_name = kmod_name; - return avc_has_perm(current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, &ad); } @@ -3992,7 +4051,8 @@ static int selinux_kernel_module_from_file(struct file *file) /* init_module */ if (file == NULL) - return avc_has_perm(sid, sid, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_SYSTEM, SYSTEM__MODULE_LOAD, NULL); /* finit_module */ @@ -4002,13 +4062,15 @@ static int selinux_kernel_module_from_file(struct file *file) fsec = file->f_security; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); if (rc) return rc; } isec = inode_security(file_inode(file)); - return avc_has_perm(sid, isec->sid, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SYSTEM, SYSTEM__MODULE_LOAD, &ad); } @@ -4030,19 +4092,22 @@ static int selinux_kernel_read_file(struct file *file, static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETPGID, NULL); } static int selinux_task_getpgid(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETPGID, NULL); } static int selinux_task_getsid(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSESSION, NULL); } @@ -4053,19 +4118,22 @@ static void selinux_task_getsecid(struct task_struct *p, u32 *secid) static int selinux_task_setnice(struct task_struct *p, int nice) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_getioprio(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSCHED, NULL); } @@ -4080,7 +4148,8 @@ int selinux_task_prlimit(const struct cred *cred, const struct cred *tcred, av |= PROCESS__SETRLIMIT; if (flags & LSM_PRLIMIT_READ) av |= PROCESS__GETRLIMIT; - return avc_has_perm(cred_sid(cred), cred_sid(tcred), + return avc_has_perm(&selinux_state, + cred_sid(cred), cred_sid(tcred), SECCLASS_PROCESS, av, NULL); } @@ -4094,7 +4163,8 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return avc_has_perm(current_sid(), task_sid(p), + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETRLIMIT, NULL); return 0; @@ -4102,19 +4172,22 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, static int selinux_task_setscheduler(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_getscheduler(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSCHED, NULL); } static int selinux_task_movememory(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } @@ -4129,7 +4202,8 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = signal_to_av(sig); if (!secid) secid = current_sid(); - return avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); + return avc_has_perm(&selinux_state, + secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); } static void selinux_task_to_inode(struct task_struct *p, @@ -4434,7 +4508,8 @@ static int sock_has_perm(struct sock *sk, u32 perms) ad.u.net = &net; ad.u.net->sk = sk; - return avc_has_perm(current_sid(), sksec->sid, sksec->sclass, perms, + return avc_has_perm(&selinux_state, + current_sid(), sksec->sid, sksec->sclass, perms, &ad); } @@ -4454,7 +4529,8 @@ static int selinux_socket_create(int family, int type, if (rc) return rc; - return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); + return avc_has_perm(&selinux_state, + tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); } static int selinux_socket_post_create(struct socket *sock, int family, @@ -4550,7 +4626,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in ad.u.net = &net; ad.u.net->sport = htons(snum); ad.u.net->family = family; - err = avc_has_perm(sksec->sid, sid, + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, SOCKET__NAME_BIND, &ad); if (err) @@ -4590,7 +4667,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in else ad.u.net->v6info.saddr = addr6->sin6_addr; - err = avc_has_perm(sksec->sid, sid, + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, node_perm, &ad); if (err) goto out; @@ -4644,7 +4722,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, ad.u.net = &net; ad.u.net->dport = htons(snum); ad.u.net->family = sk->sk_family; - err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad); + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, perm, &ad); if (err) goto out; } @@ -4745,7 +4824,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, ad.u.net = &net; ad.u.net->sk = other; - err = avc_has_perm(sksec_sock->sid, sksec_other->sid, + err = avc_has_perm(&selinux_state, + sksec_sock->sid, sksec_other->sid, sksec_other->sclass, UNIX_STREAM_SOCKET__CONNECTTO, &ad); if (err) @@ -4776,7 +4856,8 @@ static int selinux_socket_unix_may_send(struct socket *sock, ad.u.net = &net; ad.u.net->sk = other->sk; - return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, + return avc_has_perm(&selinux_state, + ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, &ad); } @@ -4791,7 +4872,8 @@ static int selinux_inet_sys_rcv_skb(struct net *ns, int ifindex, err = sel_netif_sid(ns, ifindex, &if_sid); if (err) return err; - err = avc_has_perm(peer_sid, if_sid, + err = avc_has_perm(&selinux_state, + peer_sid, if_sid, SECCLASS_NETIF, NETIF__INGRESS, ad); if (err) return err; @@ -4799,7 +4881,8 @@ static int selinux_inet_sys_rcv_skb(struct net *ns, int ifindex, err = sel_netnode_sid(addrp, family, &node_sid); if (err) return err; - return avc_has_perm(peer_sid, node_sid, + return avc_has_perm(&selinux_state, + peer_sid, node_sid, SECCLASS_NODE, NODE__RECVFROM, ad); } @@ -4822,7 +4905,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, return err; if (selinux_secmark_enabled()) { - err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(&selinux_state, + sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) return err; @@ -4887,7 +4971,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) selinux_netlbl_err(skb, family, err, 0); return err; } - err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, + err = avc_has_perm(&selinux_state, + sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); if (err) { selinux_netlbl_err(skb, family, err, 0); @@ -4896,7 +4981,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) } if (secmark_active) { - err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(&selinux_state, + sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) return err; @@ -5087,7 +5173,9 @@ static int selinux_secmark_relabel_packet(u32 sid) __tsec = current_security(); tsid = __tsec->sid; - return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); + return avc_has_perm(&selinux_state, + tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, + NULL); } static void selinux_secmark_refcount_inc(void) @@ -5135,7 +5223,8 @@ static int selinux_tun_dev_create(void) * connections unlike traditional sockets - check the TUN driver to * get a better understanding of why this socket is special */ - return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, NULL); } @@ -5143,7 +5232,8 @@ static int selinux_tun_dev_attach_queue(void *security) { struct tun_security_struct *tunsec = security; - return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, + return avc_has_perm(&selinux_state, + current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__ATTACH_QUEUE, NULL); } @@ -5171,11 +5261,13 @@ static int selinux_tun_dev_open(void *security) u32 sid = current_sid(); int err; - err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET, + err = avc_has_perm(&selinux_state, + sid, tunsec->sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__RELABELFROM, NULL); if (err) return err; - err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, + err = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__RELABELTO, NULL); if (err) return err; @@ -5266,7 +5358,8 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, } if (secmark_active) - if (avc_has_perm(peer_sid, skb->secmark, + if (avc_has_perm(&selinux_state, + peer_sid, skb->secmark, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; @@ -5378,7 +5471,8 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, return NF_DROP; if (selinux_secmark_enabled()) - if (avc_has_perm(sksec->sid, skb->secmark, + if (avc_has_perm(&selinux_state, + sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad)) return NF_DROP_ERR(-ECONNREFUSED); @@ -5501,7 +5595,8 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, return NF_DROP; if (secmark_active) - if (avc_has_perm(peer_sid, skb->secmark, + if (avc_has_perm(&selinux_state, + peer_sid, skb->secmark, SECCLASS_PACKET, secmark_perm, &ad)) return NF_DROP_ERR(-ECONNREFUSED); @@ -5511,13 +5606,15 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, if (sel_netif_sid(dev_net(outdev), ifindex, &if_sid)) return NF_DROP; - if (avc_has_perm(peer_sid, if_sid, + if (avc_has_perm(&selinux_state, + peer_sid, if_sid, SECCLASS_NETIF, NETIF__EGRESS, &ad)) return NF_DROP_ERR(-ECONNREFUSED); if (sel_netnode_sid(addrp, family, &node_sid)) return NF_DROP; - if (avc_has_perm(peer_sid, node_sid, + if (avc_has_perm(&selinux_state, + peer_sid, node_sid, SECCLASS_NODE, NODE__SENDTO, &ad)) return NF_DROP_ERR(-ECONNREFUSED); } @@ -5605,7 +5702,8 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = ipc_perms->key; - return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, perms, &ad); } static int selinux_msg_msg_alloc_security(struct msg_msg *msg) @@ -5635,7 +5733,8 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__CREATE, &ad); if (rc) { ipc_free_security(&msq->q_perm); @@ -5660,7 +5759,8 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__ASSOCIATE, &ad); } @@ -5673,7 +5773,8 @@ static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd) case IPC_INFO: case MSG_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case IPC_STAT: case MSG_STAT: @@ -5722,15 +5823,18 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, ad.u.ipc_id = msq->q_perm.key; /* Can this process write to the queue? */ - rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__WRITE, &ad); if (!rc) /* Can this process send the message */ - rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG, + rc = avc_has_perm(&selinux_state, + sid, msec->sid, SECCLASS_MSG, MSG__SEND, &ad); if (!rc) /* Can the message be put in the queue? */ - rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + msec->sid, isec->sid, SECCLASS_MSGQ, MSGQ__ENQUEUE, &ad); return rc; @@ -5752,10 +5856,12 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - rc = avc_has_perm(sid, isec->sid, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__READ, &ad); if (!rc) - rc = avc_has_perm(sid, msec->sid, + rc = avc_has_perm(&selinux_state, + sid, msec->sid, SECCLASS_MSG, MSG__RECEIVE, &ad); return rc; } @@ -5777,7 +5883,8 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->shm_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SHM, SHM__CREATE, &ad); if (rc) { ipc_free_security(&shp->shm_perm); @@ -5802,7 +5909,8 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->shm_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_SHM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SHM, SHM__ASSOCIATE, &ad); } @@ -5816,7 +5924,8 @@ static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) case IPC_INFO: case SHM_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case IPC_STAT: case SHM_STAT: @@ -5870,7 +5979,8 @@ static int selinux_sem_alloc_security(struct sem_array *sma) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->sem_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SEM, SEM__CREATE, &ad); if (rc) { ipc_free_security(&sma->sem_perm); @@ -5895,7 +6005,8 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->sem_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_SEM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SEM, SEM__ASSOCIATE, &ad); } @@ -5909,7 +6020,8 @@ static int selinux_sem_semctl(struct sem_array *sma, int cmd) case IPC_INFO: case SEM_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case GETPID: case GETNCNT: @@ -5995,7 +6107,8 @@ static int selinux_getprocattr(struct task_struct *p, __tsec = __task_cred(p)->security; if (current != p) { - error = avc_has_perm(current_sid(), __tsec->sid, + error = avc_has_perm(&selinux_state, + current_sid(), __tsec->sid, SECCLASS_PROCESS, PROCESS__GETATTR, NULL); if (error) goto bad; @@ -6044,19 +6157,24 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) * Basic control over ability to set these attributes at all. */ if (!strcmp(name, "exec")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETEXEC, NULL); else if (!strcmp(name, "fscreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETFSCREATE, NULL); else if (!strcmp(name, "keycreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETKEYCREATE, NULL); else if (!strcmp(name, "sockcreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETSOCKCREATE, NULL); else if (!strcmp(name, "current")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETCURRENT, NULL); else error = -EINVAL; @@ -6113,7 +6231,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) } else if (!strcmp(name, "fscreate")) { tsec->create_sid = sid; } else if (!strcmp(name, "keycreate")) { - error = avc_has_perm(mysid, sid, SECCLASS_KEY, KEY__CREATE, + error = avc_has_perm(&selinux_state, + mysid, sid, SECCLASS_KEY, KEY__CREATE, NULL); if (error) goto abort_change; @@ -6135,7 +6254,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) } /* Check permissions for the transition. */ - error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + tsec->sid, sid, SECCLASS_PROCESS, PROCESS__DYNTRANSITION, NULL); if (error) goto abort_change; @@ -6144,7 +6264,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) Otherwise, leave SID unchanged and fail. */ ptsid = ptrace_parent_sid(); if (ptsid != 0) { - error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + ptsid, sid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); if (error) goto abort_change; @@ -6273,7 +6394,8 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); ksec = key->security; - return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); + return avc_has_perm(&selinux_state, + sid, ksec->sid, SECCLASS_KEY, perm, NULL); } static int selinux_key_getsecurity(struct key *key, char **_buffer) @@ -6309,7 +6431,8 @@ static int selinux_ib_pkey_access(void *ib_sec, u64 subnet_prefix, u16 pkey_val) ibpkey.subnet_prefix = subnet_prefix; ibpkey.pkey = pkey_val; ad.u.ibpkey = &ibpkey; - return avc_has_perm(sec->sid, sid, + return avc_has_perm(&selinux_state, + sec->sid, sid, SECCLASS_INFINIBAND_PKEY, INFINIBAND_PKEY__ACCESS, &ad); } @@ -6333,7 +6456,8 @@ static int selinux_ib_endport_manage_subnet(void *ib_sec, const char *dev_name, strncpy(ibendport.dev_name, dev_name, sizeof(ibendport.dev_name)); ibendport.port = port_num; ad.u.ibendport = &ibendport; - return avc_has_perm(sec->sid, sid, + return avc_has_perm(&selinux_state, + sec->sid, sid, SECCLASS_INFINIBAND_ENDPORT, INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad); } @@ -6366,11 +6490,13 @@ static int selinux_bpf(int cmd, union bpf_attr *attr, switch (cmd) { case BPF_MAP_CREATE: - ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__MAP_CREATE, + ret = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_BPF, BPF__MAP_CREATE, NULL); break; case BPF_PROG_LOAD: - ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__PROG_LOAD, + ret = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_BPF, BPF__PROG_LOAD, NULL); break; default: @@ -6410,14 +6536,16 @@ static int bpf_fd_pass(struct file *file, u32 sid) if (file->f_op == &bpf_map_fops) { map = file->private_data; bpfsec = map->security; - ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + ret = avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(file->f_mode), NULL); if (ret) return ret; } else if (file->f_op == &bpf_prog_fops) { prog = file->private_data; bpfsec = prog->aux->security; - ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + ret = avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); if (ret) return ret; @@ -6431,7 +6559,8 @@ static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode) struct bpf_security_struct *bpfsec; bpfsec = map->security; - return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + return avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(fmode), NULL); } @@ -6441,7 +6570,8 @@ static int selinux_bpf_prog(struct bpf_prog *prog) struct bpf_security_struct *bpfsec; bpfsec = prog->aux->security; - return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + return avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); } @@ -6809,6 +6939,7 @@ static __init int selinux_init(void) enforcing_set(&selinux_state, selinux_enforcing_boot); selinux_state.checkreqprot = selinux_checkreqprot_boot; selinux_ss_init(&selinux_state.ss); + selinux_avc_init(&selinux_state.avc); /* Set the security state for the initial task. */ cred_init_security(); diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index de33dc9034b8..ef899bcfd2cb 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -52,6 +52,7 @@ struct selinux_audit_data { u32 audited; u32 denied; int result; + struct selinux_state *state; }; /* @@ -96,7 +97,8 @@ static inline u32 avc_audit_required(u32 requested, return audited; } -int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, +int slow_avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, u32 audited, u32 denied, int result, struct common_audit_data *a, unsigned flags); @@ -121,7 +123,8 @@ int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ -static inline int avc_audit(u32 ssid, u32 tsid, +static inline int avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct av_decision *avd, int result, @@ -132,31 +135,35 @@ static inline int avc_audit(u32 ssid, u32 tsid, audited = avc_audit_required(requested, avd, result, 0, &denied); if (likely(!audited)) return 0; - return slow_avc_audit(ssid, tsid, tclass, + return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, a, flags); } #define AVC_STRICT 1 /* Ignore permissive mode. */ #define AVC_EXTENDED_PERMS 2 /* update extended permissions */ -int avc_has_perm_noaudit(u32 ssid, u32 tsid, +int avc_has_perm_noaudit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, unsigned flags, struct av_decision *avd); -int avc_has_perm(u32 ssid, u32 tsid, +int avc_has_perm(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata); -int avc_has_perm_flags(u32 ssid, u32 tsid, +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata, int flags); -int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, - u8 driver, u8 perm, struct common_audit_data *ad); +int avc_has_extended_perms(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + u8 driver, u8 perm, struct common_audit_data *ad); -u32 avc_policy_seqno(void); +u32 avc_policy_seqno(struct selinux_state *state); #define AVC_CALLBACK_GRANT 1 #define AVC_CALLBACK_TRY_REVOKE 2 @@ -171,8 +178,11 @@ u32 avc_policy_seqno(void); int avc_add_callback(int (*callback)(u32 event), u32 events); /* Exported to selinuxfs */ -int avc_get_hash_stats(char *page); -extern unsigned int avc_cache_threshold; +struct selinux_avc; +int avc_get_hash_stats(struct selinux_avc *avc, char *page); +unsigned int avc_get_cache_threshold(struct selinux_avc *avc); +void avc_set_cache_threshold(struct selinux_avc *avc, + unsigned int cache_threshold); /* Attempt to free avc node cache */ void avc_disable(void); diff --git a/security/selinux/include/avc_ss.h b/security/selinux/include/avc_ss.h index 4e2a44d0ae66..88c384c5c09e 100644 --- a/security/selinux/include/avc_ss.h +++ b/security/selinux/include/avc_ss.h @@ -9,7 +9,8 @@ #include "flask.h" -int avc_ss_reset(u32 seqno); +struct selinux_avc; +int avc_ss_reset(struct selinux_avc *avc, u32 seqno); /* Class/perm mapping support */ struct security_class_mapping { diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index f1db09a5f521..23e762d529fa 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -93,6 +93,7 @@ extern char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 +struct selinux_avc; struct selinux_ss; struct selinux_state { @@ -103,10 +104,12 @@ struct selinux_state { bool checkreqprot; bool initialized; bool policycap[__POLICYDB_CAPABILITY_MAX]; + struct selinux_avc *avc; struct selinux_ss *ss; }; void selinux_ss_init(struct selinux_ss **ss); +void selinux_avc_init(struct selinux_avc **avc); extern struct selinux_state selinux_state; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 00544f74d6ec..c99884a48fc4 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -406,7 +406,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, perm = RAWIP_SOCKET__RECVFROM; } - rc = avc_has_perm(sksec->sid, nlbl_sid, sksec->sclass, perm, ad); + rc = avc_has_perm(&selinux_state, + sksec->sid, nlbl_sid, sksec->sclass, perm, ad); if (rc == 0) return 0; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 41099cc3d5e2..4be683eeba01 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -161,7 +161,8 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, old_value = enforcing_enabled(state); if (new_value != old_value) { - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETENFORCE, NULL); if (length) @@ -173,7 +174,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, audit_get_sessionid(current)); enforcing_set(state, new_value); if (new_value) - avc_ss_reset(0); + avc_ss_reset(state->avc, 0); selnl_notify_setenforce(new_value); selinux_status_update_setenforce(state, new_value); if (!new_value) @@ -375,7 +376,8 @@ static int sel_open_policy(struct inode *inode, struct file *filp) mutex_lock(&fsi->mutex); - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, + rc = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (rc) goto err; @@ -439,7 +441,8 @@ static ssize_t sel_read_policy(struct file *filp, char __user *buf, mutex_lock(&fsi->mutex); - ret = avc_has_perm(current_sid(), SECINITSID_SECURITY, + ret = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (ret) goto out; @@ -535,7 +538,8 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); if (length) goto out; @@ -594,7 +598,8 @@ static ssize_t sel_write_context(struct file *file, char *buf, size_t size) u32 sid, len; ssize_t length; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__CHECK_CONTEXT, NULL); if (length) goto out; @@ -640,7 +645,8 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, ssize_t length; unsigned int new_value; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, NULL); if (length) @@ -685,7 +691,8 @@ static ssize_t sel_write_validatetrans(struct file *file, u16 tclass; int rc; - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, + rc = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__VALIDATE_TRANS, NULL); if (rc) goto out; @@ -813,7 +820,8 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) struct av_decision avd; ssize_t length; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_AV, NULL); if (length) goto out; @@ -866,7 +874,8 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) u32 len; int nargs; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, NULL); if (length) @@ -967,7 +976,8 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) char *newcon = NULL; u32 len; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_RELABEL, NULL); if (length) @@ -1027,7 +1037,8 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) int i, rc; u32 len, nsids; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_USER, NULL); if (length) @@ -1091,7 +1102,8 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) char *newcon = NULL; u32 len; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, NULL); if (length) @@ -1203,7 +1215,8 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, NULL); if (length) @@ -1263,7 +1276,8 @@ static ssize_t sel_commit_bools_write(struct file *filep, mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, NULL); if (length) @@ -1403,10 +1417,13 @@ out: static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%u", avc_cache_threshold); + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + avc_get_cache_threshold(state->avc)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1415,11 +1432,14 @@ static ssize_t sel_write_avc_cache_threshold(struct file *file, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page; ssize_t ret; unsigned int new_value; - ret = avc_has_perm(current_sid(), SECINITSID_SECURITY, + ret = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETSECPARAM, NULL); if (ret) @@ -1440,7 +1460,7 @@ static ssize_t sel_write_avc_cache_threshold(struct file *file, if (sscanf(page, "%u", &new_value) != 1) goto out; - avc_cache_threshold = new_value; + avc_set_cache_threshold(state->avc, new_value); ret = count; out: @@ -1451,6 +1471,8 @@ out: static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page; ssize_t length; @@ -1458,7 +1480,7 @@ static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, if (!page) return -ENOMEM; - length = avc_get_hash_stats(page); + length = avc_get_hash_stats(state->avc, page); if (length >= 0) length = simple_read_from_buffer(buf, count, ppos, page, length); free_page((unsigned long)page); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 70714eb56101..4f401949d81c 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2151,7 +2151,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) state->initialized = 1; seqno = ++state->ss->latest_granting; selinux_complete_init(); - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); @@ -2233,7 +2233,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) sidtab_destroy(&oldsidtab); kfree(oldmapping); - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); @@ -2649,7 +2649,8 @@ out_unlock: } for (i = 0, j = 0; i < mynel; i++) { struct av_decision dummy_avd; - rc = avc_has_perm_noaudit(fromsid, mysids[i], + rc = avc_has_perm_noaudit(state, + fromsid, mysids[i], SECCLASS_PROCESS, /* kernel value */ PROCESS__TRANSITION, AVC_STRICT, &dummy_avd); @@ -2900,7 +2901,7 @@ int security_set_bools(struct selinux_state *state, int len, int *values) out: write_unlock_irq(&state->ss->policy_rwlock); if (!rc) { - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); selinux_status_update_policyload(state, seqno); selinux_xfrm_notify_policyload(); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index dfbe4f32ef85..fa1430cfe691 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -106,7 +106,8 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, if (rc) goto err; - rc = avc_has_perm(tsec->sid, ctx->ctx_sid, + rc = avc_has_perm(&selinux_state, + tsec->sid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); if (rc) goto err; @@ -142,7 +143,8 @@ static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx) if (!ctx) return 0; - return avc_has_perm(tsec->sid, ctx->ctx_sid, + return avc_has_perm(&selinux_state, + tsec->sid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); } @@ -164,7 +166,8 @@ int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) if (!selinux_authorizable_ctx(ctx)) return -EINVAL; - rc = avc_has_perm(fl_secid, ctx->ctx_sid, + rc = avc_has_perm(&selinux_state, + fl_secid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); return (rc == -EACCES ? -ESRCH : rc); } @@ -203,7 +206,8 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, /* We don't need a separate SA Vs. policy polmatch check since the SA * is now of the same label as the flow and a flow Vs. policy polmatch * check had already happened in selinux_xfrm_policy_lookup() above. */ - return (avc_has_perm(fl->flowi_secid, state_sid, + return (avc_has_perm(&selinux_state, + fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, NULL) ? 0 : 1); } @@ -422,7 +426,8 @@ int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, /* This check even when there's no association involved is intended, * according to Trent Jaeger, to make sure a process can't engage in * non-IPsec communication unless explicitly allowed by policy. */ - return avc_has_perm(sk_sid, peer_sid, + return avc_has_perm(&selinux_state, + sk_sid, peer_sid, SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad); } @@ -465,6 +470,6 @@ int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, /* This check even when there's no association involved is intended, * according to Trent Jaeger, to make sure a process can't engage in * non-IPsec communication unless explicitly allowed by policy. */ - return avc_has_perm(sk_sid, SECINITSID_UNLABELED, + return avc_has_perm(&selinux_state, sk_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad); } From c23e7a185db69dcbacd88d0f2c3debc56f952c9b Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 13 Nov 2018 14:52:53 +0100 Subject: [PATCH 2042/3715] UPSTREAM: selinux: refactor sidtab conversion This is a purely cosmetic change that encapsulates the three-step sidtab conversion logic (shutdown -> clone -> map) into a single function defined in sidtab.c (as opposed to services.c). Signed-off-by: Ondrej Mosnacek Acked-by: Stephen Smalley [PM: whitespaces fixes to make checkpatch happy] Signed-off-by: Paul Moore (cherry picked from commit 5386e6caa6711407182573e2b0344fe908b0fbcc) Change-Id: If23e6e40bd0c920cac090c2e9379d148d3ac6ca3 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/services.c | 22 +-------------- security/selinux/ss/sidtab.c | 50 ++++++++++++++++++++++++---------- security/selinux/ss/sidtab.h | 11 ++++---- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 4f401949d81c..8413ec9ac799 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1886,19 +1886,6 @@ int security_change_sid(struct selinux_state *state, out_sid, false); } -/* Clone the SID into the new SID table. */ -static int clone_sid(u32 sid, - struct context *context, - void *arg) -{ - struct sidtab *s = arg; - - if (sid > SECINITSID_NUM) - return sidtab_insert(s, sid, context); - else - return 0; -} - static inline int convert_context_handle_invalid_context( struct selinux_state *state, struct context *context) @@ -2191,13 +2178,6 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) goto err; } - /* Clone the SID table. */ - sidtab_shutdown(sidtab); - - rc = sidtab_map(sidtab, clone_sid, &newsidtab); - if (rc) - goto err; - /* * Convert the internal representations of contexts * in the new SID table. @@ -2205,7 +2185,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) args.state = state; args.oldp = policydb; args.newp = newpolicydb; - rc = sidtab_map(&newsidtab, convert_context, &args); + rc = sidtab_convert(sidtab, &newsidtab, convert_context, &args); if (rc) { printk(KERN_ERR "SELinux: unable to convert the internal" " representation of contexts in the new SID" diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 5be31b7af225..f86459434087 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -116,11 +116,11 @@ struct context *sidtab_search_force(struct sidtab *s, u32 sid) return sidtab_search_core(s, sid, 1); } -int sidtab_map(struct sidtab *s, - int (*apply) (u32 sid, - struct context *context, - void *args), - void *args) +static int sidtab_map(struct sidtab *s, + int (*apply)(u32 sid, + struct context *context, + void *args), + void *args) { int i, rc = 0; struct sidtab_node *cur; @@ -141,6 +141,37 @@ out: return rc; } +/* Clone the SID into the new SID table. */ +static int clone_sid(u32 sid, struct context *context, void *arg) +{ + struct sidtab *s = arg; + + if (sid > SECINITSID_NUM) + return sidtab_insert(s, sid, context); + else + return 0; +} + +int sidtab_convert(struct sidtab *s, struct sidtab *news, + int (*convert)(u32 sid, + struct context *context, + void *args), + void *args) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&s->lock, flags); + s->shutdown = 1; + spin_unlock_irqrestore(&s->lock, flags); + + rc = sidtab_map(s, clone_sid, news); + if (rc) + return rc; + + return sidtab_map(news, convert, args); +} + static void sidtab_update_cache(struct sidtab *s, struct sidtab_node *n, int loc) { BUG_ON(loc >= SIDTAB_CACHE_LEN); @@ -296,12 +327,3 @@ void sidtab_set(struct sidtab *dst, struct sidtab *src) dst->cache[i] = NULL; spin_unlock_irqrestore(&src->lock, flags); } - -void sidtab_shutdown(struct sidtab *s) -{ - unsigned long flags; - - spin_lock_irqsave(&s->lock, flags); - s->shutdown = 1; - spin_unlock_irqrestore(&s->lock, flags); -} diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index a1a1d2617b6f..e1d1f0beb17c 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -37,11 +37,11 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context); struct context *sidtab_search(struct sidtab *s, u32 sid); struct context *sidtab_search_force(struct sidtab *s, u32 sid); -int sidtab_map(struct sidtab *s, - int (*apply) (u32 sid, - struct context *context, - void *args), - void *args); +int sidtab_convert(struct sidtab *s, struct sidtab *news, + int (*apply)(u32 sid, + struct context *context, + void *args), + void *args); int sidtab_context_to_sid(struct sidtab *s, struct context *context, @@ -50,7 +50,6 @@ int sidtab_context_to_sid(struct sidtab *s, void sidtab_hash_eval(struct sidtab *h, char *tag); void sidtab_destroy(struct sidtab *s); void sidtab_set(struct sidtab *dst, struct sidtab *src); -void sidtab_shutdown(struct sidtab *s); #endif /* _SS_SIDTAB_H_ */ From 64509e76ad57b0f817c084fa5035ebbd62ff95b2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 24 Nov 2018 12:11:32 +0300 Subject: [PATCH 2043/3715] UPSTREAM: selinux: make "selinux_policycap_names[]" const char * Those strings aren't written. Signed-off-by: Alexey Dobriyan Signed-off-by: Paul Moore (cherry picked from commit 89f5bebcf0401dac470756869587a50dd72ff7b5) Change-Id: I968b2f6197b2d749b3786089b942655d1880258a Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/include/security.h | 2 +- security/selinux/ss/services.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 23e762d529fa..ba8eedf42b90 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -81,7 +81,7 @@ enum { }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) -extern char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; +extern const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; /* * type_datum properties diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8413ec9ac799..bc4db8c3e461 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -71,7 +71,7 @@ #include "audit.h" /* Policy capability names */ -char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { +const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "network_peer_controls", "open_perms", "extended_socket_class", From b134d23d3db00b635986c06a51831748cec8c248 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 30 Nov 2018 16:24:07 +0100 Subject: [PATCH 2044/3715] BACKPORT: selinux: use separate table for initial SID lookup This moves handling of initial SIDs into a separate table. Note that the SIDs stored in the main table are now shifted by SECINITSID_NUM and converted to/from the actual SIDs transparently by helper functions. This change doesn't make much sense on its own, but it simplifies further sidtab overhaul in a succeeding patch. Signed-off-by: Ondrej Mosnacek Reviewed-by: Stephen Smalley [PM: fixed some checkpatch warnings on line length, whitespace] Signed-off-by: Paul Moore (cherry picked from commit 24ed7fdae669feda4c5e0dadba2467c4c0d297d3) Included flask.h in sidtab.h and policydb.c. Change-Id: Ie12a7095fba32bb6eaee5d959f98ab6f9bc01b9a Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/policydb.c | 11 ++- security/selinux/ss/services.c | 88 +++++++++-------- security/selinux/ss/services.h | 2 +- security/selinux/ss/sidtab.c | 170 ++++++++++++++++++++------------- security/selinux/ss/sidtab.h | 16 +++- 5 files changed, 176 insertions(+), 111 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 9d9f6bb1e56e..7c8b7f365980 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -43,6 +43,7 @@ #include "conditional.h" #include "mls.h" #include "services.h" +#include "flask.h" #define _DEBUG_HASHES @@ -912,13 +913,21 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (!c->context[0].user) { printk(KERN_ERR "SELinux: SID %s was never defined.\n", c->u.name); + sidtab_destroy(s); + goto out; + } + if (c->sid[0] == SECSID_NULL || c->sid[0] > SECINITSID_NUM) { + pr_err("SELinux: Initial SID %s out of range.\n", + c->u.name); + sidtab_destroy(s); goto out; } - rc = sidtab_insert(s, c->sid[0], &c->context[0]); + rc = sidtab_set_initial(s, c->sid[0], &c->context[0]); if (rc) { printk(KERN_ERR "SELinux: unable to load initial SID %s.\n", c->u.name); + sidtab_destroy(s); goto out; } } diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index bc4db8c3e461..70d05faa5da2 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -778,7 +778,7 @@ static int security_compute_validatetrans(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; if (!user) tclass = unmap_class(&state->ss->map, orig_tclass); @@ -878,7 +878,7 @@ int security_bounded_transition(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; rc = -EINVAL; old_context = sidtab_search(sidtab, old_sid); @@ -1036,7 +1036,7 @@ void security_compute_xperms_decision(struct selinux_state *state, goto allow; policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; scontext = sidtab_search(sidtab, ssid); if (!scontext) { @@ -1125,7 +1125,7 @@ void security_compute_av(struct selinux_state *state, goto allow; policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; scontext = sidtab_search(sidtab, ssid); if (!scontext) { @@ -1179,7 +1179,7 @@ void security_compute_av_user(struct selinux_state *state, goto allow; policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; scontext = sidtab_search(sidtab, ssid); if (!scontext) { @@ -1317,7 +1317,7 @@ static int security_sid_to_context_core(struct selinux_state *state, } read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; if (force) context = sidtab_search_force(sidtab, sid); else @@ -1489,7 +1489,7 @@ static int security_context_to_sid_core(struct selinux_state *state, } read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; rc = string_to_context_struct(policydb, sidtab, scontext2, scontext_len, &context, def_sid); if (rc == -EINVAL && force) { @@ -1674,7 +1674,7 @@ static int security_compute_sid(struct selinux_state *state, } policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; scontext = sidtab_search(sidtab, ssid); if (!scontext) { @@ -1930,10 +1930,7 @@ static int convert_context(u32 key, struct user_datum *usrdatum; char *s; u32 len; - int rc = 0; - - if (key <= SECINITSID_NUM) - goto out; + int rc; args = p; @@ -2095,9 +2092,8 @@ static int security_preserve_bools(struct selinux_state *state, int security_load_policy(struct selinux_state *state, void *data, size_t len) { struct policydb *policydb; - struct sidtab *sidtab; + struct sidtab *oldsidtab, *newsidtab; struct policydb *oldpolicydb, *newpolicydb; - struct sidtab oldsidtab, newsidtab; struct selinux_mapping *oldmapping; struct selinux_map newmap; struct convert_context_args args; @@ -2113,27 +2109,37 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) newpolicydb = oldpolicydb + 1; policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + + newsidtab = kmalloc(sizeof(*newsidtab), GFP_KERNEL); + if (!newsidtab) { + rc = -ENOMEM; + goto out; + } if (!state->initialized) { rc = policydb_read(policydb, fp); - if (rc) + if (rc) { + kfree(newsidtab); goto out; + } policydb->len = len; rc = selinux_set_mapping(policydb, secclass_map, &state->ss->map); if (rc) { + kfree(newsidtab); policydb_destroy(policydb); goto out; } - rc = policydb_load_isids(policydb, sidtab); + rc = policydb_load_isids(policydb, newsidtab); if (rc) { + kfree(newsidtab); policydb_destroy(policydb); goto out; } + state->ss->sidtab = newsidtab; security_load_policycaps(state); state->initialized = 1; seqno = ++state->ss->latest_granting; @@ -2146,13 +2152,17 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) goto out; } + oldsidtab = state->ss->sidtab; + #if 0 - sidtab_hash_eval(sidtab, "sids"); + sidtab_hash_eval(oldsidtab, "sids"); #endif rc = policydb_read(newpolicydb, fp); - if (rc) + if (rc) { + kfree(newsidtab); goto out; + } newpolicydb->len = len; /* If switching between different policy types, log MLS status */ @@ -2161,10 +2171,11 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) else if (!policydb->mls_enabled && newpolicydb->mls_enabled) printk(KERN_INFO "SELinux: Enabling MLS support...\n"); - rc = policydb_load_isids(newpolicydb, &newsidtab); + rc = policydb_load_isids(newpolicydb, newsidtab); if (rc) { printk(KERN_ERR "SELinux: unable to load the initial SIDs\n"); policydb_destroy(newpolicydb); + kfree(newsidtab); goto out; } @@ -2185,7 +2196,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) args.state = state; args.oldp = policydb; args.newp = newpolicydb; - rc = sidtab_convert(sidtab, &newsidtab, convert_context, &args); + rc = sidtab_convert(oldsidtab, newsidtab, convert_context, &args); if (rc) { printk(KERN_ERR "SELinux: unable to convert the internal" " representation of contexts in the new SID" @@ -2195,12 +2206,11 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) /* Save the old policydb and SID table to free later. */ memcpy(oldpolicydb, policydb, sizeof(*policydb)); - sidtab_set(&oldsidtab, sidtab); /* Install the new policydb and SID table. */ write_lock_irq(&state->ss->policy_rwlock); memcpy(policydb, newpolicydb, sizeof(*policydb)); - sidtab_set(sidtab, &newsidtab); + state->ss->sidtab = newsidtab; security_load_policycaps(state); oldmapping = state->ss->map.mapping; state->ss->map.mapping = newmap.mapping; @@ -2210,7 +2220,8 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) /* Free the old policydb and SID table. */ policydb_destroy(oldpolicydb); - sidtab_destroy(&oldsidtab); + sidtab_destroy(oldsidtab); + kfree(oldsidtab); kfree(oldmapping); avc_ss_reset(state->avc, seqno); @@ -2224,7 +2235,8 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) err: kfree(newmap.mapping); - sidtab_destroy(&newsidtab); + sidtab_destroy(newsidtab); + kfree(newsidtab); policydb_destroy(newpolicydb); out: @@ -2261,7 +2273,7 @@ int security_port_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_PORT]; while (c) { @@ -2307,7 +2319,7 @@ int security_ib_pkey_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_IBPKEY]; while (c) { @@ -2353,7 +2365,7 @@ int security_ib_endport_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_IBENDPORT]; while (c) { @@ -2399,7 +2411,7 @@ int security_netif_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_NETIF]; while (c) { @@ -2464,7 +2476,7 @@ int security_node_sid(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; switch (domain) { case AF_INET: { @@ -2564,7 +2576,7 @@ int security_get_user_sids(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; context_init(&usercon); @@ -2666,7 +2678,7 @@ static inline int __security_genfs_sid(struct selinux_state *state, u32 *sid) { struct policydb *policydb = &state->ss->policydb; - struct sidtab *sidtab = &state->ss->sidtab; + struct sidtab *sidtab = state->ss->sidtab; int len; u16 sclass; struct genfs *genfs; @@ -2752,7 +2764,7 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb) read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = &state->ss->sidtab; + sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_FSUSE]; while (c) { @@ -2951,7 +2963,7 @@ int security_sid_mls_copy(struct selinux_state *state, u32 sid, u32 mls_sid, u32 *new_sid) { struct policydb *policydb = &state->ss->policydb; - struct sidtab *sidtab = &state->ss->sidtab; + struct sidtab *sidtab = state->ss->sidtab; struct context *context1; struct context *context2; struct context newcon; @@ -3042,7 +3054,7 @@ int security_net_peersid_resolve(struct selinux_state *state, u32 *peer_sid) { struct policydb *policydb = &state->ss->policydb; - struct sidtab *sidtab = &state->ss->sidtab; + struct sidtab *sidtab = state->ss->sidtab; int rc; struct context *nlbl_ctx; struct context *xfrm_ctx; @@ -3397,7 +3409,7 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, goto out; } - ctxt = sidtab_search(&state->ss->sidtab, sid); + ctxt = sidtab_search(state->ss->sidtab, sid); if (unlikely(!ctxt)) { WARN_ONCE(1, "selinux_audit_rule_match: unrecognized SID %d\n", sid); @@ -3560,7 +3572,7 @@ int security_netlbl_secattr_to_sid(struct selinux_state *state, u32 *sid) { struct policydb *policydb = &state->ss->policydb; - struct sidtab *sidtab = &state->ss->sidtab; + struct sidtab *sidtab = state->ss->sidtab; int rc; struct context *ctx; struct context ctx_new; @@ -3638,7 +3650,7 @@ int security_netlbl_sid_to_secattr(struct selinux_state *state, read_lock(&state->ss->policy_rwlock); rc = -ENOENT; - ctx = sidtab_search(&state->ss->sidtab, sid); + ctx = sidtab_search(state->ss->sidtab, sid); if (ctx == NULL) goto out; diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h index 24c7bdcc8075..9a36de860368 100644 --- a/security/selinux/ss/services.h +++ b/security/selinux/ss/services.h @@ -24,7 +24,7 @@ struct selinux_map { }; struct selinux_ss { - struct sidtab sidtab; + struct sidtab *sidtab; struct policydb policydb; rwlock_t policy_rwlock; u32 latest_granting; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index f86459434087..a8e61eb8c894 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -22,16 +22,24 @@ int sidtab_init(struct sidtab *s) s->htable = kmalloc_array(SIDTAB_SIZE, sizeof(*s->htable), GFP_ATOMIC); if (!s->htable) return -ENOMEM; + + for (i = 0; i < SECINITSID_NUM; i++) + s->isids[i].set = 0; + for (i = 0; i < SIDTAB_SIZE; i++) s->htable[i] = NULL; + + for (i = 0; i < SIDTAB_CACHE_LEN; i++) + s->cache[i] = NULL; + s->nel = 0; - s->next_sid = 1; + s->next_sid = 0; s->shutdown = 0; spin_lock_init(&s->lock); return 0; } -int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) +static int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) { int hvalue; struct sidtab_node *prev, *cur, *newnode; @@ -76,36 +84,64 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) return 0; } -static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) +int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) +{ + struct sidtab_isid_entry *entry; + int rc; + + if (sid == 0 || sid > SECINITSID_NUM) + return -EINVAL; + + entry = &s->isids[sid - 1]; + + rc = context_cpy(&entry->context, context); + if (rc) + return rc; + + entry->set = 1; + return 0; +} + +static struct context *sidtab_lookup(struct sidtab *s, u32 sid) { int hvalue; struct sidtab_node *cur; - if (!s) - return NULL; - hvalue = SIDTAB_HASH(sid); cur = s->htable[hvalue]; while (cur && sid > cur->sid) cur = cur->next; - if (force && cur && sid == cur->sid && cur->context.len) - return &cur->context; - - if (!cur || sid != cur->sid || cur->context.len) { - /* Remap invalid SIDs to the unlabeled SID. */ - sid = SECINITSID_UNLABELED; - hvalue = SIDTAB_HASH(sid); - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) - cur = cur->next; - if (!cur || sid != cur->sid) - return NULL; - } + if (!cur || sid != cur->sid) + return NULL; return &cur->context; } +static struct context *sidtab_lookup_initial(struct sidtab *s, u32 sid) +{ + return s->isids[sid - 1].set ? &s->isids[sid - 1].context : NULL; +} + +static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) +{ + struct context *context; + + if (!s) + return NULL; + + if (sid != 0) { + if (sid > SECINITSID_NUM) + context = sidtab_lookup(s, sid - (SECINITSID_NUM + 1)); + else + context = sidtab_lookup_initial(s, sid); + if (context && (!context->len || force)) + return context; + } + + return sidtab_lookup_initial(s, SECINITSID_UNLABELED); +} + struct context *sidtab_search(struct sidtab *s, u32 sid) { return sidtab_search_core(s, sid, 0); @@ -145,11 +181,7 @@ out: static int clone_sid(u32 sid, struct context *context, void *arg) { struct sidtab *s = arg; - - if (sid > SECINITSID_NUM) - return sidtab_insert(s, sid, context); - else - return 0; + return sidtab_insert(s, sid, context); } int sidtab_convert(struct sidtab *s, struct sidtab *news, @@ -183,8 +215,8 @@ static void sidtab_update_cache(struct sidtab *s, struct sidtab_node *n, int loc s->cache[0] = n; } -static inline u32 sidtab_search_context(struct sidtab *s, - struct context *context) +static inline int sidtab_search_context(struct sidtab *s, + struct context *context, u32 *sid) { int i; struct sidtab_node *cur; @@ -194,15 +226,17 @@ static inline u32 sidtab_search_context(struct sidtab *s, while (cur) { if (context_cmp(&cur->context, context)) { sidtab_update_cache(s, cur, SIDTAB_CACHE_LEN - 1); - return cur->sid; + *sid = cur->sid; + return 0; } cur = cur->next; } } - return 0; + return -ENOENT; } -static inline u32 sidtab_search_cache(struct sidtab *s, struct context *context) +static inline int sidtab_search_cache(struct sidtab *s, struct context *context, + u32 *sid) { int i; struct sidtab_node *node; @@ -210,55 +244,70 @@ static inline u32 sidtab_search_cache(struct sidtab *s, struct context *context) for (i = 0; i < SIDTAB_CACHE_LEN; i++) { node = s->cache[i]; if (unlikely(!node)) - return 0; + return -ENOENT; if (context_cmp(&node->context, context)) { sidtab_update_cache(s, node, i); - return node->sid; + *sid = node->sid; + return 0; } } - return 0; + return -ENOENT; } -int sidtab_context_to_sid(struct sidtab *s, - struct context *context, - u32 *out_sid) +static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, + u32 *sid) { - u32 sid; - int ret = 0; + int ret; unsigned long flags; - *out_sid = SECSID_NULL; - - sid = sidtab_search_cache(s, context); - if (!sid) - sid = sidtab_search_context(s, context); - if (!sid) { + ret = sidtab_search_cache(s, context, sid); + if (ret) + ret = sidtab_search_context(s, context, sid); + if (ret) { spin_lock_irqsave(&s->lock, flags); /* Rescan now that we hold the lock. */ - sid = sidtab_search_context(s, context); - if (sid) + ret = sidtab_search_context(s, context, sid); + if (!ret) goto unlock_out; /* No SID exists for the context. Allocate a new one. */ - if (s->next_sid == UINT_MAX || s->shutdown) { + if (s->next_sid == (UINT_MAX - SECINITSID_NUM - 1) || + s->shutdown) { ret = -ENOMEM; goto unlock_out; } - sid = s->next_sid++; + *sid = s->next_sid++; if (context->len) printk(KERN_INFO "SELinux: Context %s is not valid (left unmapped).\n", context->str); - ret = sidtab_insert(s, sid, context); + ret = sidtab_insert(s, *sid, context); if (ret) s->next_sid--; unlock_out: spin_unlock_irqrestore(&s->lock, flags); } - if (ret) - return ret; + return ret; +} - *out_sid = sid; +int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid) +{ + int rc; + u32 i; + + for (i = 0; i < SECINITSID_NUM; i++) { + struct sidtab_isid_entry *entry = &s->isids[i]; + + if (entry->set && context_cmp(context, &entry->context)) { + *sid = i + 1; + return 0; + } + } + + rc = sidtab_reverse_lookup(s, context, sid); + if (rc) + return rc; + *sid += SECINITSID_NUM + 1; return 0; } @@ -297,6 +346,10 @@ void sidtab_destroy(struct sidtab *s) if (!s) return; + for (i = 0; i < SECINITSID_NUM; i++) + if (s->isids[i].set) + context_destroy(&s->isids[i].context); + for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; while (cur) { @@ -312,18 +365,3 @@ void sidtab_destroy(struct sidtab *s) s->nel = 0; s->next_sid = 1; } - -void sidtab_set(struct sidtab *dst, struct sidtab *src) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&src->lock, flags); - dst->htable = src->htable; - dst->nel = src->nel; - dst->next_sid = src->next_sid; - dst->shutdown = 0; - for (i = 0; i < SIDTAB_CACHE_LEN; i++) - dst->cache[i] = NULL; - spin_unlock_irqrestore(&src->lock, flags); -} diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index e1d1f0beb17c..b83b4621614c 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -9,6 +9,7 @@ #define _SS_SIDTAB_H_ #include "context.h" +#include "flask.h" struct sidtab_node { u32 sid; /* security identifier */ @@ -22,6 +23,11 @@ struct sidtab_node { #define SIDTAB_SIZE SIDTAB_HASH_BUCKETS +struct sidtab_isid_entry { + int set; + struct context context; +}; + struct sidtab { struct sidtab_node **htable; unsigned int nel; /* number of elements */ @@ -30,10 +36,13 @@ struct sidtab { #define SIDTAB_CACHE_LEN 3 struct sidtab_node *cache[SIDTAB_CACHE_LEN]; spinlock_t lock; + + /* index == SID - 1 (no entry for SECSID_NULL) */ + struct sidtab_isid_entry isids[SECINITSID_NUM]; }; int sidtab_init(struct sidtab *s); -int sidtab_insert(struct sidtab *s, u32 sid, struct context *context); +int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context); struct context *sidtab_search(struct sidtab *s, u32 sid); struct context *sidtab_search_force(struct sidtab *s, u32 sid); @@ -43,13 +52,10 @@ int sidtab_convert(struct sidtab *s, struct sidtab *news, void *args), void *args); -int sidtab_context_to_sid(struct sidtab *s, - struct context *context, - u32 *sid); +int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid); void sidtab_hash_eval(struct sidtab *h, char *tag); void sidtab_destroy(struct sidtab *s); -void sidtab_set(struct sidtab *dst, struct sidtab *src); #endif /* _SS_SIDTAB_H_ */ From 517071d4ae4363a448f9a5d56f3ba2ff69034d07 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 18 Apr 2018 01:35:06 +0200 Subject: [PATCH 2045/3715] UPSTREAM: scsi: ilog2: create truly constant version for sparse Sparse emits errors about ilog2() in array indices because of the use of __ilog2_32() and __ilog2_64(), rightly so (https://www.spinics.net/lists/linux-sparse/msg03471.html). Create a const_ilog2() variant that works with sparse for this scenario. (Note: checkpatch.pl complains about missing parentheses, but that appears to be a false positive. I can get rid of the warning simply by inserting whitespace, making checkpatch "see" the whole macro). Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen (cherry picked from commit dbef91ec5482239055dd2db8ec656fc13d382add) Change-Id: Id6d53888e2e7fc44e7fa0e4be4f87327445a6444 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- include/linux/log2.h | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index c373295f359f..df50139a0815 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -67,16 +67,13 @@ unsigned long __rounddown_pow_of_two(unsigned long n) } /** - * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value - * @n - parameter + * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value + * @n: parameter * - * constant-capable log of base 2 calculation - * - this can be used to initialise global variables from constant data, hence - * the massive ternary operator construction - * - * selects the appropriately-sized optimised version depending on sizeof(n) + * Use this where sparse expects a true constant expression, e.g. for array + * indices. */ -#define ilog2(n) \ +#define const_ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 2 ? 0 : \ @@ -142,10 +139,26 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - 1 ) : \ - (sizeof(n) <= 4) ? \ - __ilog2_u32(n) : \ - __ilog2_u64(n) \ + 1) : \ + -1) + +/** + * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value + * @n: parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? \ + const_ilog2(n) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ ) /** From 1067eb5f3a1a4fef8e539e1b8b5135048fde7ae3 Mon Sep 17 00:00:00 2001 From: peter enderborg Date: Tue, 12 Jun 2018 10:09:05 +0200 Subject: [PATCH 2046/3715] UPSTREAM: selinux: Cleanup printk logging in services Replace printk with pr_* to avoid checkpatch warnings. Signed-off-by: Peter Enderborg Signed-off-by: Paul Moore (cherry picked from commit b54c85c15a7bf1a34b14f23eb186e4a737ac3447) Change-Id: Iebe62c1f94ec8dcb5c442a8f2aa3f8640029c726 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/services.c | 71 +++++++++++++++++----------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 70d05faa5da2..85933e8db2df 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -136,8 +136,7 @@ static int selinux_set_mapping(struct policydb *pol, p_out->value = string_to_security_class(pol, p_in->name); if (!p_out->value) { - printk(KERN_INFO - "SELinux: Class %s not defined in policy.\n", + pr_info("SELinux: Class %s not defined in policy.\n", p_in->name); if (pol->reject_unknown) goto err; @@ -156,8 +155,7 @@ static int selinux_set_mapping(struct policydb *pol, p_out->perms[k] = string_to_av_perm(pol, p_out->value, p_in->perms[k]); if (!p_out->perms[k]) { - printk(KERN_INFO - "SELinux: Permission %s in class %s not defined in policy.\n", + pr_info("SELinux: Permission %s in class %s not defined in policy.\n", p_in->perms[k], p_in->name); if (pol->reject_unknown) goto err; @@ -170,7 +168,7 @@ static int selinux_set_mapping(struct policydb *pol, } if (print_unknown_handle) - printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", + pr_info("SELinux: the above unknown classes and permissions will be %s\n", pol->allow_unknown ? "allowed" : "denied"); out_map->size = i; @@ -644,7 +642,7 @@ static void context_struct_compute_av(struct policydb *policydb, if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) { if (printk_ratelimit()) - printk(KERN_WARNING "SELinux: Invalid class %hu\n", tclass); + pr_warn("SELinux: Invalid class %hu\n", tclass); return; } @@ -793,7 +791,7 @@ static int security_compute_validatetrans(struct selinux_state *state, ocontext = sidtab_search(sidtab, oldsid); if (!ocontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, oldsid); rc = -EINVAL; goto out; @@ -801,7 +799,7 @@ static int security_compute_validatetrans(struct selinux_state *state, ncontext = sidtab_search(sidtab, newsid); if (!ncontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, newsid); rc = -EINVAL; goto out; @@ -809,7 +807,7 @@ static int security_compute_validatetrans(struct selinux_state *state, tcontext = sidtab_search(sidtab, tasksid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tasksid); rc = -EINVAL; goto out; @@ -883,7 +881,7 @@ int security_bounded_transition(struct selinux_state *state, rc = -EINVAL; old_context = sidtab_search(sidtab, old_sid); if (!old_context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + pr_err("SELinux: %s: unrecognized SID %u\n", __func__, old_sid); goto out; } @@ -891,7 +889,7 @@ int security_bounded_transition(struct selinux_state *state, rc = -EINVAL; new_context = sidtab_search(sidtab, new_sid); if (!new_context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + pr_err("SELinux: %s: unrecognized SID %u\n", __func__, new_sid); goto out; } @@ -1040,14 +1038,14 @@ void security_compute_xperms_decision(struct selinux_state *state, scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } @@ -1129,7 +1127,7 @@ void security_compute_av(struct selinux_state *state, scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } @@ -1140,7 +1138,7 @@ void security_compute_av(struct selinux_state *state, tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } @@ -1183,7 +1181,7 @@ void security_compute_av_user(struct selinux_state *state, scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } @@ -1194,7 +1192,7 @@ void security_compute_av_user(struct selinux_state *state, tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } @@ -1310,7 +1308,7 @@ static int security_sid_to_context_core(struct selinux_state *state, *scontext = scontextp; goto out; } - printk(KERN_ERR "SELinux: %s: called before initial " + pr_err("SELinux: %s: called before initial " "load_policy on unknown SID %d\n", __func__, sid); rc = -EINVAL; goto out; @@ -1323,7 +1321,7 @@ static int security_sid_to_context_core(struct selinux_state *state, else context = sidtab_search(sidtab, sid); if (!context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, sid); rc = -EINVAL; goto out_unlock; @@ -1678,14 +1676,14 @@ static int security_compute_sid(struct selinux_state *state, scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); rc = -EINVAL; goto out_unlock; } tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); rc = -EINVAL; goto out_unlock; @@ -1898,7 +1896,8 @@ static inline int convert_context_handle_invalid_context( return -EINVAL; if (!context_struct_to_string(policydb, context, &s, &len)) { - printk(KERN_WARNING "SELinux: Context %s would be invalid if enforcing\n", s); + pr_warn("SELinux: Context %s would be invalid if enforcing\n", + s); kfree(s); } return 0; @@ -1946,7 +1945,7 @@ static int convert_context(u32 key, c->len, &ctx, SECSID_NULL); kfree(s); if (!rc) { - printk(KERN_INFO "SELinux: Context %s became valid (mapped).\n", + pr_info("SELinux: Context %s became valid (mapped).\n", c->str); /* Replace string with mapped representation. */ kfree(c->str); @@ -1958,7 +1957,7 @@ static int convert_context(u32 key, goto out; } else { /* Other error condition, e.g. ENOMEM. */ - printk(KERN_ERR "SELinux: Unable to map context %s, rc = %d.\n", + pr_err("SELinux: Unable to map context %s, rc = %d.\n", c->str, -rc); goto out; } @@ -2017,7 +2016,7 @@ static int convert_context(u32 key, oc = oc->next; rc = -EINVAL; if (!oc) { - printk(KERN_ERR "SELinux: unable to look up" + pr_err("SELinux: unable to look up" " the initial SIDs list\n"); goto bad; } @@ -2049,7 +2048,7 @@ bad: context_destroy(c); c->str = s; c->len = len; - printk(KERN_INFO "SELinux: Context %s became invalid (unmapped).\n", + pr_info("SELinux: Context %s became invalid (unmapped).\n", c->str); rc = 0; goto out; @@ -2167,13 +2166,13 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) newpolicydb->len = len; /* If switching between different policy types, log MLS status */ if (policydb->mls_enabled && !newpolicydb->mls_enabled) - printk(KERN_INFO "SELinux: Disabling MLS support...\n"); + pr_info("SELinux: Disabling MLS support...\n"); else if (!policydb->mls_enabled && newpolicydb->mls_enabled) - printk(KERN_INFO "SELinux: Enabling MLS support...\n"); + pr_info("SELinux: Enabling MLS support...\n"); rc = policydb_load_isids(newpolicydb, newsidtab); if (rc) { - printk(KERN_ERR "SELinux: unable to load the initial SIDs\n"); + pr_err("SELinux: unable to load the initial SIDs\n"); policydb_destroy(newpolicydb); kfree(newsidtab); goto out; @@ -2185,7 +2184,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) rc = security_preserve_bools(state, newpolicydb); if (rc) { - printk(KERN_ERR "SELinux: unable to preserve booleans\n"); + pr_err("SELinux: unable to preserve booleans\n"); goto err; } @@ -2198,7 +2197,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) args.newp = newpolicydb; rc = sidtab_convert(oldsidtab, newsidtab, convert_context, &args); if (rc) { - printk(KERN_ERR "SELinux: unable to convert the internal" + pr_err("SELinux: unable to convert the internal" " representation of contexts in the new SID" " table\n"); goto err; @@ -2984,7 +2983,7 @@ int security_sid_mls_copy(struct selinux_state *state, rc = -EINVAL; context1 = sidtab_search(sidtab, sid); if (!context1) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, sid); goto out_unlock; } @@ -2992,7 +2991,7 @@ int security_sid_mls_copy(struct selinux_state *state, rc = -EINVAL; context2 = sidtab_search(sidtab, mls_sid); if (!context2) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, mls_sid); goto out_unlock; } @@ -3089,14 +3088,14 @@ int security_net_peersid_resolve(struct selinux_state *state, rc = -EINVAL; nlbl_ctx = sidtab_search(sidtab, nlbl_sid); if (!nlbl_ctx) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, nlbl_sid); goto out; } rc = -EINVAL; xfrm_ctx = sidtab_search(sidtab, xfrm_sid); if (!xfrm_ctx) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, xfrm_sid); goto out; } @@ -3181,7 +3180,7 @@ int security_get_permissions(struct selinux_state *state, rc = -EINVAL; match = hashtab_search(policydb->p_classes.table, class); if (!match) { - printk(KERN_ERR "SELinux: %s: unrecognized class %s\n", + pr_err("SELinux: %s: unrecognized class %s\n", __func__, class); goto out; } From 2603870fde053e32186cfbd5ea7eeb73e9733166 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 6 Aug 2018 23:19:32 +0200 Subject: [PATCH 2047/3715] UPSTREAM: selinux: refactor mls_context_to_sid() and make it stricter The intended behavior change for this patch is to reject any MLS strings that contain (trailing) garbage if p->mls_enabled is true. As suggested by Paul Moore, change mls_context_to_sid() so that the two parts of the range are extracted before the rest of the parsing. Because now we don't have to scan for two different separators simultaneously everywhere, we can actually switch to strchr() everywhere instead of the open-coded loops that scan for two separators at once. mls_context_to_sid() used to signal how much of the input string was parsed by updating `*scontext`. However, there is actually no case in which mls_context_to_sid() only parses a subset of the input and still returns a success (other than the buggy case with a second '-' in which it incorrectly claims to have consumed the entire string). Turn `scontext` into a simple pointer argument and stop redundantly checking whether the entire input was consumed in string_to_context_struct(). This also lets us remove the `scontext_len` argument from `string_to_context_struct()`. Signed-off-by: Jann Horn [PM: minor merge fuzz in convert_context()] Signed-off-by: Paul Moore (cherry picked from commit 95ffe194204ae3cef88d0b59be209204bbe9b3be) Change-Id: I63960c9ef54cc29381f3bade53115cc6ed376045 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/mls.c | 176 ++++++++++++++------------------- security/selinux/ss/mls.h | 2 +- security/selinux/ss/services.c | 12 +-- 3 files changed, 81 insertions(+), 109 deletions(-) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 39475fb455bc..2fe459df3c85 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -218,9 +218,7 @@ int mls_context_isvalid(struct policydb *p, struct context *c) /* * Set the MLS fields in the security context structure * `context' based on the string representation in - * the string `*scontext'. Update `*scontext' to - * point to the end of the string representation of - * the MLS fields. + * the string `scontext'. * * This function modifies the string in place, inserting * NULL characters to terminate the MLS fields. @@ -235,22 +233,21 @@ int mls_context_isvalid(struct policydb *p, struct context *c) */ int mls_context_to_sid(struct policydb *pol, char oldc, - char **scontext, + char *scontext, struct context *context, struct sidtab *s, u32 def_sid) { - - char delim; - char *scontextp, *p, *rngptr; + char *sensitivity, *cur_cat, *next_cat, *rngptr; struct level_datum *levdatum; struct cat_datum *catdatum, *rngdatum; - int l, rc = -EINVAL; + int l, rc, i; + char *rangep[2]; if (!pol->mls_enabled) { - if (def_sid != SECSID_NULL && oldc) - *scontext += strlen(*scontext) + 1; - return 0; + if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') + return 0; + return -EINVAL; } /* @@ -261,113 +258,94 @@ int mls_context_to_sid(struct policydb *pol, struct context *defcon; if (def_sid == SECSID_NULL) - goto out; + return -EINVAL; defcon = sidtab_search(s, def_sid); if (!defcon) - goto out; + return -EINVAL; - rc = mls_context_cpy(context, defcon); - goto out; + return mls_context_cpy(context, defcon); } - /* Extract low sensitivity. */ - scontextp = p = *scontext; - while (*p && *p != ':' && *p != '-') - p++; - - delim = *p; - if (delim != '\0') - *p++ = '\0'; + /* + * If we're dealing with a range, figure out where the two parts + * of the range begin. + */ + rangep[0] = scontext; + rangep[1] = strchr(scontext, '-'); + if (rangep[1]) { + rangep[1][0] = '\0'; + rangep[1]++; + } + /* For each part of the range: */ for (l = 0; l < 2; l++) { - levdatum = hashtab_search(pol->p_levels.table, scontextp); - if (!levdatum) { - rc = -EINVAL; - goto out; - } + /* Split sensitivity and category set. */ + sensitivity = rangep[l]; + if (sensitivity == NULL) + break; + next_cat = strchr(sensitivity, ':'); + if (next_cat) + *(next_cat++) = '\0'; + /* Parse sensitivity. */ + levdatum = hashtab_search(pol->p_levels.table, sensitivity); + if (!levdatum) + return -EINVAL; context->range.level[l].sens = levdatum->level->sens; - if (delim == ':') { - /* Extract category set. */ - while (1) { - scontextp = p; - while (*p && *p != ',' && *p != '-') - p++; - delim = *p; - if (delim != '\0') - *p++ = '\0'; + /* Extract category set. */ + while (next_cat != NULL) { + cur_cat = next_cat; + next_cat = strchr(next_cat, ','); + if (next_cat != NULL) + *(next_cat++) = '\0'; - /* Separate into range if exists */ - rngptr = strchr(scontextp, '.'); - if (rngptr != NULL) { - /* Remove '.' */ - *rngptr++ = '\0'; - } + /* Separate into range if exists */ + rngptr = strchr(cur_cat, '.'); + if (rngptr != NULL) { + /* Remove '.' */ + *rngptr++ = '\0'; + } - catdatum = hashtab_search(pol->p_cats.table, - scontextp); - if (!catdatum) { - rc = -EINVAL; - goto out; - } + catdatum = hashtab_search(pol->p_cats.table, cur_cat); + if (!catdatum) + return -EINVAL; - rc = ebitmap_set_bit(&context->range.level[l].cat, - catdatum->value - 1, 1); + rc = ebitmap_set_bit(&context->range.level[l].cat, + catdatum->value - 1, 1); + if (rc) + return rc; + + /* If range, set all categories in range */ + if (rngptr == NULL) + continue; + + rngdatum = hashtab_search(pol->p_cats.table, rngptr); + if (!rngdatum) + return -EINVAL; + + if (catdatum->value >= rngdatum->value) + return -EINVAL; + + for (i = catdatum->value; i < rngdatum->value; i++) { + rc = ebitmap_set_bit(&context->range.level[l].cat, i, 1); if (rc) - goto out; - - /* If range, set all categories in range */ - if (rngptr) { - int i; - - rngdatum = hashtab_search(pol->p_cats.table, rngptr); - if (!rngdatum) { - rc = -EINVAL; - goto out; - } - - if (catdatum->value >= rngdatum->value) { - rc = -EINVAL; - goto out; - } - - for (i = catdatum->value; i < rngdatum->value; i++) { - rc = ebitmap_set_bit(&context->range.level[l].cat, i, 1); - if (rc) - goto out; - } - } - - if (delim != ',') - break; + return rc; } } - if (delim == '-') { - /* Extract high sensitivity. */ - scontextp = p; - while (*p && *p != ':') - p++; - - delim = *p; - if (delim != '\0') - *p++ = '\0'; - } else - break; } - if (l == 0) { + /* If we didn't see a '-', the range start is also the range end. */ + if (rangep[1] == NULL) { context->range.level[1].sens = context->range.level[0].sens; rc = ebitmap_cpy(&context->range.level[1].cat, &context->range.level[0].cat); if (rc) - goto out; + return rc; } - *scontext = ++p; - rc = 0; -out: - return rc; + + return 0; } /* @@ -379,21 +357,19 @@ out: int mls_from_string(struct policydb *p, char *str, struct context *context, gfp_t gfp_mask) { - char *tmpstr, *freestr; + char *tmpstr; int rc; if (!p->mls_enabled) return -EINVAL; - /* we need freestr because mls_context_to_sid will change - the value of tmpstr */ - tmpstr = freestr = kstrdup(str, gfp_mask); + tmpstr = kstrdup(str, gfp_mask); if (!tmpstr) { rc = -ENOMEM; } else { - rc = mls_context_to_sid(p, ':', &tmpstr, context, + rc = mls_context_to_sid(p, ':', tmpstr, context, NULL, SECSID_NULL); - kfree(freestr); + kfree(tmpstr); } return rc; diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 9a3ff7af70ad..67093647576d 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -34,7 +34,7 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l); int mls_context_to_sid(struct policydb *p, char oldc, - char **scontext, + char *scontext, struct context *context, struct sidtab *s, u32 def_sid); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 85933e8db2df..607e9a95bcbb 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1365,7 +1365,6 @@ int security_sid_to_context_force(struct selinux_state *state, u32 sid, static int string_to_context_struct(struct policydb *pol, struct sidtab *sidtabp, char *scontext, - u32 scontext_len, struct context *ctx, u32 def_sid) { @@ -1426,15 +1425,12 @@ static int string_to_context_struct(struct policydb *pol, ctx->type = typdatum->value; - rc = mls_context_to_sid(pol, oldc, &p, ctx, sidtabp, def_sid); + rc = mls_context_to_sid(pol, oldc, p, ctx, sidtabp, def_sid); if (rc) goto out; - rc = -EINVAL; - if ((p - scontext) < scontext_len) - goto out; - /* Check the validity of the new context. */ + rc = -EINVAL; if (!policydb_context_isvalid(pol, ctx)) goto out; rc = 0; @@ -1489,7 +1485,7 @@ static int security_context_to_sid_core(struct selinux_state *state, policydb = &state->ss->policydb; sidtab = state->ss->sidtab; rc = string_to_context_struct(policydb, sidtab, scontext2, - scontext_len, &context, def_sid); + &context, def_sid); if (rc == -EINVAL && force) { context.str = str; context.len = strlen(str) + 1; @@ -1942,7 +1938,7 @@ static int convert_context(u32 key, goto out; rc = string_to_context_struct(args->newp, NULL, s, - c->len, &ctx, SECSID_NULL); + &ctx, SECSID_NULL); kfree(s); if (!rc) { pr_info("SELinux: Context %s became valid (mapped).\n", From e5ecfcc0aa06dd0e6cb34d4b050d42fbacf63c6c Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 30 Nov 2018 16:24:08 +0100 Subject: [PATCH 2048/3715] BACKPORT: selinux: overhaul sidtab to fix bug and improve performance Before this patch, during a policy reload the sidtab would become frozen and trying to map a new context to SID would be unable to add a new entry to sidtab and fail with -ENOMEM. Such failures are usually propagated into userspace, which has no way of distignuishing them from actual allocation failures and thus doesn't handle them gracefully. Such situation can be triggered e.g. by the following reproducer: while true; do load_policy; echo -n .; sleep 0.1; done & for (( i = 0; i < 1024; i++ )); do runcon -l s0:c$i echo -n x || break # or: # chcon -l s0:c$i || break done This patch overhauls the sidtab so it doesn't need to be frozen during policy reload, thus solving the above problem. The new SID table leverages the fact that SIDs are allocated sequentially and are never invalidated and stores them in linear buckets indexed by a tree structure. This brings several advantages: 1. Fast SID -> context lookup - this lookup can now be done in logarithmic time complexity (usually in less than 4 array lookups) and can still be done safely without locking. 2. No need to re-search the whole table on reverse lookup miss - after acquiring the spinlock only the newly added entries need to be searched, which means that reverse lookups that end up inserting a new entry are now about twice as fast. 3. No need to freeze sidtab during policy reload - it is now possible to handle insertion of new entries even during sidtab conversion. The tree structure of the new sidtab is able to grow automatically to up to about 2^31 entries (at which point it should not have more than about 4 tree levels). The old sidtab had a theoretical capacity of almost 2^32 entries, but half of that is still more than enough since by that point the reverse table lookups would become unusably slow anyway... The number of entries per tree node is selected automatically so that each node fits into a single page, which should be the easiest size for kmalloc() to handle. Note that the cache for reverse lookup is preserved with equivalent logic. The only difference is that instead of storing pointers to the hash table nodes it stores just the indices of the cached entries. The new cache ensures that the indices are loaded/stored atomically, but it still has the drawback that concurrent cache updates may mess up the contents of the cache. Such situation however only reduces its effectivity, not the correctness of lookups. Tested by selinux-testsuite and thoroughly tortured by this simple stress test: ``` function rand_cat() { echo $(( $RANDOM % 1024 )) } function do_work() { while true; do echo -n "system_u:system_r:kernel_t:s0:c$(rand_cat),c$(rand_cat)" \ >/sys/fs/selinux/context 2>/dev/null || true done } do_work >/dev/null & do_work >/dev/null & do_work >/dev/null & while load_policy; do echo -n .; sleep 0.1; done kill %1 kill %2 kill %3 ``` Link: https://github.com/SELinuxProject/selinux-kernel/issues/38 Reported-by: Orion Poplawski Reported-by: Li Kun Signed-off-by: Ondrej Mosnacek Reviewed-by: Stephen Smalley [PM: most of sidtab.c merged by hand due to conflicts] [PM: checkpatch fixes in mls.c, services.c, sidtab.c] Signed-off-by: Paul Moore (cherry picked from commit ee1a84fdfeedfd7362e9a8a8f15fedc3482ade2d) Manually resolved conflicts in sidtab.c. Change-Id: I681074f58e980be0a76f0e4978c5d97fae17c85e Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/mls.c | 24 +- security/selinux/ss/mls.h | 3 +- security/selinux/ss/services.c | 122 +++---- security/selinux/ss/sidtab.c | 572 ++++++++++++++++++++------------- security/selinux/ss/sidtab.h | 80 +++-- 5 files changed, 472 insertions(+), 329 deletions(-) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 2fe459df3c85..18ba0c2328fb 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -436,16 +436,17 @@ int mls_setup_user_range(struct policydb *p, /* * Convert the MLS fields in the security context - * structure `c' from the values specified in the - * policy `oldp' to the values specified in the policy `newp'. + * structure `oldc' from the values specified in the + * policy `oldp' to the values specified in the policy `newp', + * storing the resulting context in `newc'. */ int mls_convert_context(struct policydb *oldp, struct policydb *newp, - struct context *c) + struct context *oldc, + struct context *newc) { struct level_datum *levdatum; struct cat_datum *catdatum; - struct ebitmap bitmap; struct ebitmap_node *node; int l, i; @@ -455,28 +456,25 @@ int mls_convert_context(struct policydb *oldp, for (l = 0; l < 2; l++) { levdatum = hashtab_search(newp->p_levels.table, sym_name(oldp, SYM_LEVELS, - c->range.level[l].sens - 1)); + oldc->range.level[l].sens - 1)); if (!levdatum) return -EINVAL; - c->range.level[l].sens = levdatum->level->sens; + newc->range.level[l].sens = levdatum->level->sens; - ebitmap_init(&bitmap); - ebitmap_for_each_positive_bit(&c->range.level[l].cat, node, i) { + ebitmap_for_each_positive_bit(&oldc->range.level[l].cat, + node, i) { int rc; catdatum = hashtab_search(newp->p_cats.table, sym_name(oldp, SYM_CATS, i)); if (!catdatum) return -EINVAL; - rc = ebitmap_set_bit(&bitmap, catdatum->value - 1, 1); + rc = ebitmap_set_bit(&newc->range.level[l].cat, + catdatum->value - 1, 1); if (rc) return rc; - - cond_resched(); } - ebitmap_destroy(&c->range.level[l].cat); - c->range.level[l].cat = bitmap; } return 0; diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 67093647576d..7954b1e60b64 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -46,7 +46,8 @@ int mls_range_set(struct context *context, struct mls_range *range); int mls_convert_context(struct policydb *oldp, struct policydb *newp, - struct context *context); + struct context *oldc, + struct context *newc); int mls_compute_sid(struct policydb *p, struct context *scontext, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 607e9a95bcbb..82bd87278da0 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1907,19 +1907,16 @@ struct convert_context_args { /* * Convert the values in the security context - * structure `c' from the values specified + * structure `oldc' from the values specified * in the policy `p->oldp' to the values specified - * in the policy `p->newp'. Verify that the - * context is valid under the new policy. + * in the policy `p->newp', storing the new context + * in `newc'. Verify that the context is valid + * under the new policy. */ -static int convert_context(u32 key, - struct context *c, - void *p) +static int convert_context(struct context *oldc, struct context *newc, void *p) { struct convert_context_args *args; - struct context oldc; struct ocontext *oc; - struct mls_range *range; struct role_datum *role; struct type_datum *typdatum; struct user_datum *usrdatum; @@ -1929,76 +1926,65 @@ static int convert_context(u32 key, args = p; - if (c->str) { - struct context ctx; - - rc = -ENOMEM; - s = kstrdup(c->str, GFP_KERNEL); + if (oldc->str) { + s = kstrdup(oldc->str, GFP_KERNEL); if (!s) - goto out; + return -ENOMEM; rc = string_to_context_struct(args->newp, NULL, s, - &ctx, SECSID_NULL); - kfree(s); - if (!rc) { - pr_info("SELinux: Context %s became valid (mapped).\n", - c->str); - /* Replace string with mapped representation. */ - kfree(c->str); - memcpy(c, &ctx, sizeof(*c)); - goto out; - } else if (rc == -EINVAL) { + newc, SECSID_NULL); + if (rc == -EINVAL) { /* Retain string representation for later mapping. */ - rc = 0; - goto out; - } else { + context_init(newc); + newc->str = s; + newc->len = oldc->len; + return 0; + } + kfree(s); + if (rc) { /* Other error condition, e.g. ENOMEM. */ pr_err("SELinux: Unable to map context %s, rc = %d.\n", - c->str, -rc); - goto out; + oldc->str, -rc); + return rc; } + pr_info("SELinux: Context %s became valid (mapped).\n", + oldc->str); + return 0; } - rc = context_cpy(&oldc, c); - if (rc) - goto out; + context_init(newc); /* Convert the user. */ rc = -EINVAL; usrdatum = hashtab_search(args->newp->p_users.table, - sym_name(args->oldp, SYM_USERS, c->user - 1)); + sym_name(args->oldp, + SYM_USERS, oldc->user - 1)); if (!usrdatum) goto bad; - c->user = usrdatum->value; + newc->user = usrdatum->value; /* Convert the role. */ rc = -EINVAL; role = hashtab_search(args->newp->p_roles.table, - sym_name(args->oldp, SYM_ROLES, c->role - 1)); + sym_name(args->oldp, SYM_ROLES, oldc->role - 1)); if (!role) goto bad; - c->role = role->value; + newc->role = role->value; /* Convert the type. */ rc = -EINVAL; typdatum = hashtab_search(args->newp->p_types.table, - sym_name(args->oldp, SYM_TYPES, c->type - 1)); + sym_name(args->oldp, + SYM_TYPES, oldc->type - 1)); if (!typdatum) goto bad; - c->type = typdatum->value; + newc->type = typdatum->value; /* Convert the MLS fields if dealing with MLS policies */ if (args->oldp->mls_enabled && args->newp->mls_enabled) { - rc = mls_convert_context(args->oldp, args->newp, c); + rc = mls_convert_context(args->oldp, args->newp, oldc, newc); if (rc) goto bad; - } else if (args->oldp->mls_enabled && !args->newp->mls_enabled) { - /* - * Switching between MLS and non-MLS policy: - * free any storage used by the MLS fields in the - * context for all existing entries in the sidtab. - */ - mls_context_destroy(c); } else if (!args->oldp->mls_enabled && args->newp->mls_enabled) { /* * Switching between non-MLS and MLS policy: @@ -2016,38 +2002,30 @@ static int convert_context(u32 key, " the initial SIDs list\n"); goto bad; } - range = &oc->context[0].range; - rc = mls_range_set(c, range); + rc = mls_range_set(newc, &oc->context[0].range); if (rc) goto bad; } /* Check the validity of the new context. */ - if (!policydb_context_isvalid(args->newp, c)) { - rc = convert_context_handle_invalid_context(args->state, - &oldc); + if (!policydb_context_isvalid(args->newp, newc)) { + rc = convert_context_handle_invalid_context(args->state, oldc); if (rc) goto bad; } - context_destroy(&oldc); - - rc = 0; -out: - return rc; + return 0; bad: /* Map old representation to string and save it. */ - rc = context_struct_to_string(args->oldp, &oldc, &s, &len); + rc = context_struct_to_string(args->oldp, oldc, &s, &len); if (rc) return rc; - context_destroy(&oldc); - context_destroy(c); - c->str = s; - c->len = len; + context_destroy(newc); + newc->str = s; + newc->len = len; pr_info("SELinux: Context %s became invalid (unmapped).\n", - c->str); - rc = 0; - goto out; + newc->str); + return 0; } static void security_load_policycaps(struct selinux_state *state) @@ -2091,6 +2069,7 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) struct policydb *oldpolicydb, *newpolicydb; struct selinux_mapping *oldmapping; struct selinux_map newmap; + struct sidtab_convert_params convert_params; struct convert_context_args args; u32 seqno; int rc = 0; @@ -2147,12 +2126,6 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) goto out; } - oldsidtab = state->ss->sidtab; - -#if 0 - sidtab_hash_eval(oldsidtab, "sids"); -#endif - rc = policydb_read(newpolicydb, fp); if (rc) { kfree(newsidtab); @@ -2184,6 +2157,8 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) goto err; } + oldsidtab = state->ss->sidtab; + /* * Convert the internal representations of contexts * in the new SID table. @@ -2191,7 +2166,12 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len) args.state = state; args.oldp = policydb; args.newp = newpolicydb; - rc = sidtab_convert(oldsidtab, newsidtab, convert_context, &args); + + convert_params.func = convert_context; + convert_params.args = &args; + convert_params.target = newsidtab; + + rc = sidtab_convert(oldsidtab, &convert_params); if (rc) { pr_err("SELinux: unable to convert the internal" " representation of contexts in the new SID" diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index a8e61eb8c894..e63a90ff2728 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -2,88 +2,41 @@ /* * Implementation of the SID table type. * - * Author : Stephen Smalley, + * Original author: Stephen Smalley, + * Author: Ondrej Mosnacek, + * + * Copyright (C) 2018 Red Hat, Inc. */ +#include #include #include +#include #include -#include +#include #include "flask.h" #include "security.h" #include "sidtab.h" -#define SIDTAB_HASH(sid) \ -(sid & SIDTAB_HASH_MASK) - int sidtab_init(struct sidtab *s) { - int i; + u32 i; - s->htable = kmalloc_array(SIDTAB_SIZE, sizeof(*s->htable), GFP_ATOMIC); - if (!s->htable) - return -ENOMEM; + memset(s->roots, 0, sizeof(s->roots)); + + for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) + atomic_set(&s->rcache[i], -1); for (i = 0; i < SECINITSID_NUM; i++) s->isids[i].set = 0; - for (i = 0; i < SIDTAB_SIZE; i++) - s->htable[i] = NULL; + atomic_set(&s->count, 0); - for (i = 0; i < SIDTAB_CACHE_LEN; i++) - s->cache[i] = NULL; + s->convert = NULL; - s->nel = 0; - s->next_sid = 0; - s->shutdown = 0; spin_lock_init(&s->lock); return 0; } -static int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) -{ - int hvalue; - struct sidtab_node *prev, *cur, *newnode; - - if (!s) - return -ENOMEM; - - hvalue = SIDTAB_HASH(sid); - prev = NULL; - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) { - prev = cur; - cur = cur->next; - } - - if (cur && sid == cur->sid) - return -EEXIST; - - newnode = kmalloc(sizeof(*newnode), GFP_ATOMIC); - if (!newnode) - return -ENOMEM; - - newnode->sid = sid; - if (context_cpy(&newnode->context, context)) { - kfree(newnode); - return -ENOMEM; - } - - if (prev) { - newnode->next = prev->next; - wmb(); - prev->next = newnode; - } else { - newnode->next = s->htable[hvalue]; - wmb(); - s->htable[hvalue] = newnode; - } - - s->nel++; - if (sid >= s->next_sid) - s->next_sid = sid + 1; - return 0; -} - int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) { struct sidtab_isid_entry *entry; @@ -102,20 +55,90 @@ int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) return 0; } -static struct context *sidtab_lookup(struct sidtab *s, u32 sid) +static u32 sidtab_level_from_count(u32 count) { - int hvalue; - struct sidtab_node *cur; + u32 capacity = SIDTAB_LEAF_ENTRIES; + u32 level = 0; - hvalue = SIDTAB_HASH(sid); - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) - cur = cur->next; + while (count > capacity) { + capacity <<= SIDTAB_INNER_SHIFT; + ++level; + } + return level; +} - if (!cur || sid != cur->sid) +static int sidtab_alloc_roots(struct sidtab *s, u32 level) +{ + u32 l; + + if (!s->roots[0].ptr_leaf) { + s->roots[0].ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!s->roots[0].ptr_leaf) + return -ENOMEM; + } + for (l = 1; l <= level; ++l) + if (!s->roots[l].ptr_inner) { + s->roots[l].ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!s->roots[l].ptr_inner) + return -ENOMEM; + s->roots[l].ptr_inner->entries[0] = s->roots[l - 1]; + } + return 0; +} + +static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc) +{ + union sidtab_entry_inner *entry; + u32 level, capacity_shift, leaf_index = index / SIDTAB_LEAF_ENTRIES; + + /* find the level of the subtree we need */ + level = sidtab_level_from_count(index + 1); + capacity_shift = level * SIDTAB_INNER_SHIFT; + + /* allocate roots if needed */ + if (alloc && sidtab_alloc_roots(s, level) != 0) return NULL; - return &cur->context; + /* lookup inside the subtree */ + entry = &s->roots[level]; + while (level != 0) { + capacity_shift -= SIDTAB_INNER_SHIFT; + --level; + + entry = &entry->ptr_inner->entries[leaf_index >> capacity_shift]; + leaf_index &= ((u32)1 << capacity_shift) - 1; + + if (!entry->ptr_inner) { + if (alloc) + entry->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!entry->ptr_inner) + return NULL; + } + } + if (!entry->ptr_leaf) { + if (alloc) + entry->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!entry->ptr_leaf) + return NULL; + } + return &entry->ptr_leaf->entries[index % SIDTAB_LEAF_ENTRIES].context; +} + +static struct context *sidtab_lookup(struct sidtab *s, u32 index) +{ + u32 count = (u32)atomic_read(&s->count); + + if (index >= count) + return NULL; + + /* read entries after reading count */ + smp_rmb(); + + return sidtab_do_lookup(s, index, 0); } static struct context *sidtab_lookup_initial(struct sidtab *s, u32 sid) @@ -127,9 +150,6 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) { struct context *context; - if (!s) - return NULL; - if (sid != 0) { if (sid > SECINITSID_NUM) context = sidtab_lookup(s, sid - (SECINITSID_NUM + 1)); @@ -152,102 +172,69 @@ struct context *sidtab_search_force(struct sidtab *s, u32 sid) return sidtab_search_core(s, sid, 1); } -static int sidtab_map(struct sidtab *s, - int (*apply)(u32 sid, - struct context *context, - void *args), - void *args) +static int sidtab_find_context(union sidtab_entry_inner entry, + u32 *pos, u32 count, u32 level, + struct context *context, u32 *index) { - int i, rc = 0; - struct sidtab_node *cur; - - if (!s) - goto out; - - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - rc = apply(cur->sid, &cur->context, args); - if (rc) - goto out; - cur = cur->next; - } - } -out: - return rc; -} - -/* Clone the SID into the new SID table. */ -static int clone_sid(u32 sid, struct context *context, void *arg) -{ - struct sidtab *s = arg; - return sidtab_insert(s, sid, context); -} - -int sidtab_convert(struct sidtab *s, struct sidtab *news, - int (*convert)(u32 sid, - struct context *context, - void *args), - void *args) -{ - unsigned long flags; int rc; + u32 i; - spin_lock_irqsave(&s->lock, flags); - s->shutdown = 1; - spin_unlock_irqrestore(&s->lock, flags); + if (level != 0) { + struct sidtab_node_inner *node = entry.ptr_inner; - rc = sidtab_map(s, clone_sid, news); - if (rc) - return rc; + i = 0; + while (i < SIDTAB_INNER_ENTRIES && *pos < count) { + rc = sidtab_find_context(node->entries[i], + pos, count, level - 1, + context, index); + if (rc == 0) + return 0; + i++; + } + } else { + struct sidtab_node_leaf *node = entry.ptr_leaf; - return sidtab_map(news, convert, args); -} - -static void sidtab_update_cache(struct sidtab *s, struct sidtab_node *n, int loc) -{ - BUG_ON(loc >= SIDTAB_CACHE_LEN); - - while (loc > 0) { - s->cache[loc] = s->cache[loc - 1]; - loc--; - } - s->cache[0] = n; -} - -static inline int sidtab_search_context(struct sidtab *s, - struct context *context, u32 *sid) -{ - int i; - struct sidtab_node *cur; - - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - if (context_cmp(&cur->context, context)) { - sidtab_update_cache(s, cur, SIDTAB_CACHE_LEN - 1); - *sid = cur->sid; + i = 0; + while (i < SIDTAB_LEAF_ENTRIES && *pos < count) { + if (context_cmp(&node->entries[i].context, context)) { + *index = *pos; return 0; } - cur = cur->next; + (*pos)++; + i++; } } return -ENOENT; } -static inline int sidtab_search_cache(struct sidtab *s, struct context *context, - u32 *sid) +static void sidtab_rcache_update(struct sidtab *s, u32 index, u32 pos) { - int i; - struct sidtab_node *node; + while (pos > 0) { + atomic_set(&s->rcache[pos], atomic_read(&s->rcache[pos - 1])); + --pos; + } + atomic_set(&s->rcache[0], (int)index); +} - for (i = 0; i < SIDTAB_CACHE_LEN; i++) { - node = s->cache[i]; - if (unlikely(!node)) - return -ENOENT; - if (context_cmp(&node->context, context)) { - sidtab_update_cache(s, node, i); - *sid = node->sid; +static void sidtab_rcache_push(struct sidtab *s, u32 index) +{ + sidtab_rcache_update(s, index, SIDTAB_RCACHE_SIZE - 1); +} + +static int sidtab_rcache_search(struct sidtab *s, struct context *context, + u32 *index) +{ + u32 i; + + for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) { + int v = atomic_read(&s->rcache[i]); + + if (v < 0) + continue; + + if (context_cmp(sidtab_do_lookup(s, (u32)v, 0), context)) { + sidtab_rcache_update(s, (u32)v, i); + *index = (u32)v; return 0; } } @@ -255,39 +242,98 @@ static inline int sidtab_search_cache(struct sidtab *s, struct context *context, } static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, - u32 *sid) + u32 *index) { - int ret; unsigned long flags; + u32 count = (u32)atomic_read(&s->count); + u32 count_locked, level, pos; + struct sidtab_convert_params *convert; + struct context *dst, *dst_convert; + int rc; - ret = sidtab_search_cache(s, context, sid); - if (ret) - ret = sidtab_search_context(s, context, sid); - if (ret) { - spin_lock_irqsave(&s->lock, flags); - /* Rescan now that we hold the lock. */ - ret = sidtab_search_context(s, context, sid); - if (!ret) - goto unlock_out; - /* No SID exists for the context. Allocate a new one. */ - if (s->next_sid == (UINT_MAX - SECINITSID_NUM - 1) || - s->shutdown) { - ret = -ENOMEM; - goto unlock_out; - } - *sid = s->next_sid++; - if (context->len) - printk(KERN_INFO - "SELinux: Context %s is not valid (left unmapped).\n", - context->str); - ret = sidtab_insert(s, *sid, context); - if (ret) - s->next_sid--; -unlock_out: - spin_unlock_irqrestore(&s->lock, flags); + rc = sidtab_rcache_search(s, context, index); + if (rc == 0) + return 0; + + level = sidtab_level_from_count(count); + + /* read entries after reading count */ + smp_rmb(); + + pos = 0; + rc = sidtab_find_context(s->roots[level], &pos, count, level, + context, index); + if (rc == 0) { + sidtab_rcache_push(s, *index); + return 0; } - return ret; + /* lock-free search failed: lock, re-search, and insert if not found */ + spin_lock_irqsave(&s->lock, flags); + + convert = s->convert; + count_locked = (u32)atomic_read(&s->count); + level = sidtab_level_from_count(count_locked); + + /* if count has changed before we acquired the lock, then catch up */ + while (count < count_locked) { + if (context_cmp(sidtab_do_lookup(s, count, 0), context)) { + sidtab_rcache_push(s, count); + *index = count; + rc = 0; + goto out_unlock; + } + ++count; + } + + /* insert context into new entry */ + rc = -ENOMEM; + dst = sidtab_do_lookup(s, count, 1); + if (!dst) + goto out_unlock; + + rc = context_cpy(dst, context); + if (rc) + goto out_unlock; + + /* + * if we are building a new sidtab, we need to convert the context + * and insert it there as well + */ + if (convert) { + rc = -ENOMEM; + dst_convert = sidtab_do_lookup(convert->target, count, 1); + if (!dst_convert) { + context_destroy(dst); + goto out_unlock; + } + + rc = convert->func(context, dst_convert, convert->args); + if (rc) { + context_destroy(dst); + goto out_unlock; + } + + /* at this point we know the insert won't fail */ + atomic_set(&convert->target->count, count + 1); + } + + if (context->len) + pr_info("SELinux: Context %s is not valid (left unmapped).\n", + context->str); + + sidtab_rcache_push(s, count); + *index = count; + + /* write entries before writing new count */ + smp_wmb(); + + atomic_set(&s->count, count + 1); + + rc = 0; +out_unlock: + spin_unlock_irqrestore(&s->lock, flags); + return rc; } int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid) @@ -311,57 +357,139 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid) return 0; } -void sidtab_hash_eval(struct sidtab *h, char *tag) +static int sidtab_convert_tree(union sidtab_entry_inner *edst, + union sidtab_entry_inner *esrc, + u32 *pos, u32 count, u32 level, + struct sidtab_convert_params *convert) { - int i, chain_len, slots_used, max_chain_len; - struct sidtab_node *cur; + int rc; + u32 i; - slots_used = 0; - max_chain_len = 0; - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = h->htable[i]; - if (cur) { - slots_used++; - chain_len = 0; - while (cur) { - chain_len++; - cur = cur->next; - } - - if (chain_len > max_chain_len) - max_chain_len = chain_len; + if (level != 0) { + if (!edst->ptr_inner) { + edst->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_KERNEL); + if (!edst->ptr_inner) + return -ENOMEM; } + i = 0; + while (i < SIDTAB_INNER_ENTRIES && *pos < count) { + rc = sidtab_convert_tree(&edst->ptr_inner->entries[i], + &esrc->ptr_inner->entries[i], + pos, count, level - 1, + convert); + if (rc) + return rc; + i++; + } + } else { + if (!edst->ptr_leaf) { + edst->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_KERNEL); + if (!edst->ptr_leaf) + return -ENOMEM; + } + i = 0; + while (i < SIDTAB_LEAF_ENTRIES && *pos < count) { + rc = convert->func(&esrc->ptr_leaf->entries[i].context, + &edst->ptr_leaf->entries[i].context, + convert->args); + if (rc) + return rc; + (*pos)++; + i++; + } + cond_resched(); + } + return 0; +} + +int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) +{ + unsigned long flags; + u32 count, level, pos; + int rc; + + spin_lock_irqsave(&s->lock, flags); + + /* concurrent policy loads are not allowed */ + if (s->convert) { + spin_unlock_irqrestore(&s->lock, flags); + return -EBUSY; } - printk(KERN_DEBUG "%s: %d entries and %d/%d buckets used, longest " - "chain length %d\n", tag, h->nel, slots_used, SIDTAB_SIZE, - max_chain_len); + count = (u32)atomic_read(&s->count); + level = sidtab_level_from_count(count); + + /* allocate last leaf in the new sidtab (to avoid race with + * live convert) + */ + rc = sidtab_do_lookup(params->target, count - 1, 1) ? 0 : -ENOMEM; + if (rc) { + spin_unlock_irqrestore(&s->lock, flags); + return rc; + } + + /* set count in case no new entries are added during conversion */ + atomic_set(¶ms->target->count, count); + + /* enable live convert of new entries */ + s->convert = params; + + /* we can safely do the rest of the conversion outside the lock */ + spin_unlock_irqrestore(&s->lock, flags); + + pr_info("SELinux: Converting %u SID table entries...\n", count); + + /* convert all entries not covered by live convert */ + pos = 0; + rc = sidtab_convert_tree(¶ms->target->roots[level], + &s->roots[level], &pos, count, level, params); + if (rc) { + /* we need to keep the old table - disable live convert */ + spin_lock_irqsave(&s->lock, flags); + s->convert = NULL; + spin_unlock_irqrestore(&s->lock, flags); + } + return rc; +} + +static void sidtab_destroy_tree(union sidtab_entry_inner entry, u32 level) +{ + u32 i; + + if (level != 0) { + struct sidtab_node_inner *node = entry.ptr_inner; + + if (!node) + return; + + for (i = 0; i < SIDTAB_INNER_ENTRIES; i++) + sidtab_destroy_tree(node->entries[i], level - 1); + kfree(node); + } else { + struct sidtab_node_leaf *node = entry.ptr_leaf; + + if (!node) + return; + + for (i = 0; i < SIDTAB_LEAF_ENTRIES; i++) + context_destroy(&node->entries[i].context); + kfree(node); + } } void sidtab_destroy(struct sidtab *s) { - int i; - struct sidtab_node *cur, *temp; - - if (!s) - return; + u32 i, level; for (i = 0; i < SECINITSID_NUM; i++) if (s->isids[i].set) context_destroy(&s->isids[i].context); - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - temp = cur; - cur = cur->next; - context_destroy(&temp->context); - kfree(temp); - } - s->htable[i] = NULL; - } - kfree(s->htable); - s->htable = NULL; - s->nel = 0; - s->next_sid = 1; + level = SIDTAB_MAX_LEVEL; + while (level && !s->roots[level].ptr_inner) + --level; + + sidtab_destroy_tree(s->roots[level], level); } diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index b83b4621614c..503ceb1ef3b2 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -1,42 +1,83 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * A security identifier table (sidtab) is a hash table + * A security identifier table (sidtab) is a lookup table * of security context structures indexed by SID value. * - * Author : Stephen Smalley, + * Original author: Stephen Smalley, + * Author: Ondrej Mosnacek, + * + * Copyright (C) 2018 Red Hat, Inc. */ #ifndef _SS_SIDTAB_H_ #define _SS_SIDTAB_H_ +#include +#include + #include "context.h" #include "flask.h" -struct sidtab_node { - u32 sid; /* security identifier */ - struct context context; /* security context structure */ - struct sidtab_node *next; +struct sidtab_entry_leaf { + struct context context; }; -#define SIDTAB_HASH_BITS 7 -#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS) -#define SIDTAB_HASH_MASK (SIDTAB_HASH_BUCKETS-1) +struct sidtab_node_inner; +struct sidtab_node_leaf; -#define SIDTAB_SIZE SIDTAB_HASH_BUCKETS +union sidtab_entry_inner { + struct sidtab_node_inner *ptr_inner; + struct sidtab_node_leaf *ptr_leaf; +}; + +/* align node size to page boundary */ +#define SIDTAB_NODE_ALLOC_SHIFT PAGE_SHIFT +#define SIDTAB_NODE_ALLOC_SIZE PAGE_SIZE + +#define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size) - 1) + 1)) + +#define SIDTAB_INNER_SHIFT \ + (SIDTAB_NODE_ALLOC_SHIFT - size_to_shift(sizeof(union sidtab_entry_inner))) +#define SIDTAB_INNER_ENTRIES ((size_t)1 << SIDTAB_INNER_SHIFT) +#define SIDTAB_LEAF_ENTRIES \ + (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry_leaf)) + +#define SIDTAB_MAX_BITS 31 /* limited to INT_MAX due to atomic_t range */ +#define SIDTAB_MAX (((u32)1 << SIDTAB_MAX_BITS) - 1) +/* ensure enough tree levels for SIDTAB_MAX entries */ +#define SIDTAB_MAX_LEVEL \ + DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \ + SIDTAB_INNER_SHIFT) + +struct sidtab_node_leaf { + struct sidtab_entry_leaf entries[SIDTAB_LEAF_ENTRIES]; +}; + +struct sidtab_node_inner { + union sidtab_entry_inner entries[SIDTAB_INNER_ENTRIES]; +}; struct sidtab_isid_entry { int set; struct context context; }; +struct sidtab_convert_params { + int (*func)(struct context *oldc, struct context *newc, void *args); + void *args; + struct sidtab *target; +}; + +#define SIDTAB_RCACHE_SIZE 3 + struct sidtab { - struct sidtab_node **htable; - unsigned int nel; /* number of elements */ - unsigned int next_sid; /* next SID to allocate */ - unsigned char shutdown; -#define SIDTAB_CACHE_LEN 3 - struct sidtab_node *cache[SIDTAB_CACHE_LEN]; + union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1]; + atomic_t count; + struct sidtab_convert_params *convert; spinlock_t lock; + /* reverse lookup cache */ + atomic_t rcache[SIDTAB_RCACHE_SIZE]; + /* index == SID - 1 (no entry for SECSID_NULL) */ struct sidtab_isid_entry isids[SECINITSID_NUM]; }; @@ -46,15 +87,10 @@ int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context); struct context *sidtab_search(struct sidtab *s, u32 sid); struct context *sidtab_search_force(struct sidtab *s, u32 sid); -int sidtab_convert(struct sidtab *s, struct sidtab *news, - int (*apply)(u32 sid, - struct context *context, - void *args), - void *args); +int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params); int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid); -void sidtab_hash_eval(struct sidtab *h, char *tag); void sidtab_destroy(struct sidtab *s); #endif /* _SS_SIDTAB_H_ */ From 66018f59c0b29838ea628d828c8db4b28c873960 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 3 Oct 2019 15:59:22 +0200 Subject: [PATCH 2049/3715] UPSTREAM: selinux: fix context string corruption in convert_context() string_to_context_struct() may garble the context string, so we need to copy back the contents again from the old context struct to avoid storing the corrupted context. Since string_to_context_struct() tokenizes (and therefore truncates) the context string and we are later potentially copying it with kstrdup(), this may eventually cause pieces of uninitialized kernel memory to be disclosed to userspace (when copying to userspace based on the stored length and not the null character). How to reproduce on Fedora and similar: # dnf install -y memcached # systemctl start memcached # semodule -d memcached # load_policy # load_policy # systemctl stop memcached # ausearch -m AVC type=AVC msg=audit(1570090572.648:313): avc: denied { signal } for pid=1 comm="systemd" scontext=system_u:system_r:init_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass=process permissive=0 trawcon=73797374656D5F75007400000000000070BE6E847296FFFF726F6D000096FFFF76 Cc: stable@vger.kernel.org Reported-by: Milos Malik Fixes: ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") Signed-off-by: Ondrej Mosnacek Acked-by: Stephen Smalley Signed-off-by: Paul Moore (cherry picked from commit 2a5243937c700ffe6a28e6557a4562a9ab0a17a4) Change-Id: I56add9632ba622b956adb00a2b1ecfc608a37c32 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/services.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 82bd87278da0..5a2a4954a760 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1934,7 +1934,14 @@ static int convert_context(struct context *oldc, struct context *newc, void *p) rc = string_to_context_struct(args->newp, NULL, s, newc, SECSID_NULL); if (rc == -EINVAL) { - /* Retain string representation for later mapping. */ + /* + * Retain string representation for later mapping. + * + * IMPORTANT: We need to copy the contents of oldc->str + * back into s again because string_to_context_struct() + * may have garbled it. + */ + memcpy(s, oldc->str, oldc->len); context_init(newc); newc->str = s; newc->len = oldc->len; From 114ca41561ab5d36b3495888b6e028876f8b3945 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 23 Jul 2019 08:50:59 +0200 Subject: [PATCH 2050/3715] UPSTREAM: selinux: check sidtab limit before adding a new entry We need to error out when trying to add an entry above SIDTAB_MAX in sidtab_reverse_lookup() to avoid overflow on the odd chance that this happens. Cc: stable@vger.kernel.org Fixes: ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") Signed-off-by: Ondrej Mosnacek Reviewed-by: Kees Cook Signed-off-by: Paul Moore (cherry picked from commit acbc372e6109c803cbee4733769d02008381740f) Change-Id: I88e6b5b1626c901e41ecbf1dfdded6f4e7301332 Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/sidtab.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index e63a90ff2728..1f0a6eaa2d6a 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -286,6 +286,11 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, ++count; } + /* bail out if we already reached max entries */ + rc = -EOVERFLOW; + if (count >= SIDTAB_MAX) + goto out_unlock; + /* insert context into new entry */ rc = -ENOMEM; dst = sidtab_do_lookup(s, count, 1); From ae114d920b0aff124c48b5a3e23a21e4ffed85da Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 14 Aug 2019 15:33:20 +0200 Subject: [PATCH 2051/3715] UPSTREAM: selinux: avoid atomic_t usage in sidtab As noted in Documentation/atomic_t.txt, if we don't need the RMW atomic operations, we should only use READ_ONCE()/WRITE_ONCE() + smp_rmb()/smp_wmb() where necessary (or the combined variants smp_load_acquire()/smp_store_release()). This patch converts the sidtab code to use regular u32 for the counter and reverse lookup cache and use the appropriate operations instead of atomic_get()/atomic_set(). Note that when reading/updating the reverse lookup cache we don't need memory barriers as it doesn't need to be consistent or accurate. We can now also replace some atomic ops with regular loads (when under spinlock) and stores (for conversion target fields that are always accessed under the master table's spinlock). We can now also bump SIDTAB_MAX to U32_MAX as we can use the full u32 range again. Suggested-by: Jann Horn Signed-off-by: Ondrej Mosnacek Reviewed-by: Jann Horn Signed-off-by: Paul Moore (cherry picked from commit 116f21bb967fcef1fa360fe591a2947481788020) Change-Id: I15ecafe9be3cc434fc91978d6621333b8a5669cb Bug: 140252993 Signed-off-by: Jeff Vander Stoep --- security/selinux/ss/sidtab.c | 48 ++++++++++++++++-------------------- security/selinux/ss/sidtab.h | 19 ++++++++++---- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 1f0a6eaa2d6a..7d49994e8d5f 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "flask.h" #include "security.h" #include "sidtab.h" @@ -23,14 +23,14 @@ int sidtab_init(struct sidtab *s) memset(s->roots, 0, sizeof(s->roots)); + /* max count is SIDTAB_MAX so valid index is always < SIDTAB_MAX */ for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) - atomic_set(&s->rcache[i], -1); + s->rcache[i] = SIDTAB_MAX; for (i = 0; i < SECINITSID_NUM; i++) s->isids[i].set = 0; - atomic_set(&s->count, 0); - + s->count = 0; s->convert = NULL; spin_lock_init(&s->lock); @@ -130,14 +130,12 @@ static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc) static struct context *sidtab_lookup(struct sidtab *s, u32 index) { - u32 count = (u32)atomic_read(&s->count); + /* read entries only after reading count */ + u32 count = smp_load_acquire(&s->count); if (index >= count) return NULL; - /* read entries after reading count */ - smp_rmb(); - return sidtab_do_lookup(s, index, 0); } @@ -210,10 +208,10 @@ static int sidtab_find_context(union sidtab_entry_inner entry, static void sidtab_rcache_update(struct sidtab *s, u32 index, u32 pos) { while (pos > 0) { - atomic_set(&s->rcache[pos], atomic_read(&s->rcache[pos - 1])); + WRITE_ONCE(s->rcache[pos], READ_ONCE(s->rcache[pos - 1])); --pos; } - atomic_set(&s->rcache[0], (int)index); + WRITE_ONCE(s->rcache[0], index); } static void sidtab_rcache_push(struct sidtab *s, u32 index) @@ -227,14 +225,14 @@ static int sidtab_rcache_search(struct sidtab *s, struct context *context, u32 i; for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) { - int v = atomic_read(&s->rcache[i]); + u32 v = READ_ONCE(s->rcache[i]); - if (v < 0) + if (v >= SIDTAB_MAX) continue; - if (context_cmp(sidtab_do_lookup(s, (u32)v, 0), context)) { - sidtab_rcache_update(s, (u32)v, i); - *index = (u32)v; + if (context_cmp(sidtab_do_lookup(s, v, 0), context)) { + sidtab_rcache_update(s, v, i); + *index = v; return 0; } } @@ -245,8 +243,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, u32 *index) { unsigned long flags; - u32 count = (u32)atomic_read(&s->count); - u32 count_locked, level, pos; + u32 count, count_locked, level, pos; struct sidtab_convert_params *convert; struct context *dst, *dst_convert; int rc; @@ -255,11 +252,10 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, if (rc == 0) return 0; + /* read entries only after reading count */ + count = smp_load_acquire(&s->count); level = sidtab_level_from_count(count); - /* read entries after reading count */ - smp_rmb(); - pos = 0; rc = sidtab_find_context(s->roots[level], &pos, count, level, context, index); @@ -272,7 +268,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, spin_lock_irqsave(&s->lock, flags); convert = s->convert; - count_locked = (u32)atomic_read(&s->count); + count_locked = s->count; level = sidtab_level_from_count(count_locked); /* if count has changed before we acquired the lock, then catch up */ @@ -320,7 +316,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, } /* at this point we know the insert won't fail */ - atomic_set(&convert->target->count, count + 1); + convert->target->count = count + 1; } if (context->len) @@ -331,9 +327,7 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, *index = count; /* write entries before writing new count */ - smp_wmb(); - - atomic_set(&s->count, count + 1); + smp_store_release(&s->count, count + 1); rc = 0; out_unlock: @@ -423,7 +417,7 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) return -EBUSY; } - count = (u32)atomic_read(&s->count); + count = s->count; level = sidtab_level_from_count(count); /* allocate last leaf in the new sidtab (to avoid race with @@ -436,7 +430,7 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) } /* set count in case no new entries are added during conversion */ - atomic_set(¶ms->target->count, count); + params->target->count = count; /* enable live convert of new entries */ s->convert = params; diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index 503ceb1ef3b2..5bf9898a858a 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -41,8 +41,8 @@ union sidtab_entry_inner { #define SIDTAB_LEAF_ENTRIES \ (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry_leaf)) -#define SIDTAB_MAX_BITS 31 /* limited to INT_MAX due to atomic_t range */ -#define SIDTAB_MAX (((u32)1 << SIDTAB_MAX_BITS) - 1) +#define SIDTAB_MAX_BITS 32 +#define SIDTAB_MAX U32_MAX /* ensure enough tree levels for SIDTAB_MAX entries */ #define SIDTAB_MAX_LEVEL \ DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \ @@ -70,13 +70,22 @@ struct sidtab_convert_params { #define SIDTAB_RCACHE_SIZE 3 struct sidtab { + /* + * lock-free read access only for as many items as a prior read of + * 'count' + */ union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1]; - atomic_t count; + /* + * access atomically via {READ|WRITE}_ONCE(); only increment under + * spinlock + */ + u32 count; + /* access only under spinlock */ struct sidtab_convert_params *convert; spinlock_t lock; - /* reverse lookup cache */ - atomic_t rcache[SIDTAB_RCACHE_SIZE]; + /* reverse lookup cache - access atomically via {READ|WRITE}_ONCE() */ + u32 rcache[SIDTAB_RCACHE_SIZE]; /* index == SID - 1 (no entry for SECSID_NULL) */ struct sidtab_isid_entry isids[SECINITSID_NUM]; From 45319d01102b06152cb38abcc1cf9d09b2fe8046 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Fri, 22 Nov 2019 10:33:06 +0100 Subject: [PATCH 2052/3715] UPSTREAM: selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep Reported-by: Stephen Smalley Reported-by: Jovana Knezevic Reviewed-by: Stephen Smalley Tested-by: Stephen Smalley [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore (cherry picked from commit 66f8e2f03c02e812002f8e9e465681cc62edda5b) Bug: 140252993 Change-Id: Iead2a1d90731ae24fefec2a40af5ffdc457ac916 Signed-off-by: Jeff Vander Stoep (cherry picked from commit 20810a2469745210a7b2b8e0f9f3b60a28305f43) --- security/selinux/Kconfig | 12 ++ security/selinux/include/security.h | 1 + security/selinux/selinuxfs.c | 65 +++++++ security/selinux/ss/context.h | 11 +- security/selinux/ss/policydb.c | 5 + security/selinux/ss/services.c | 96 +++++++--- security/selinux/ss/services.h | 4 +- security/selinux/ss/sidtab.c | 263 ++++++++++++++-------------- security/selinux/ss/sidtab.h | 16 +- 9 files changed, 306 insertions(+), 167 deletions(-) diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 8af7a690eb40..8297e48a283d 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -99,3 +99,15 @@ config SECURITY_SELINUX_CHECKREQPROT_VALUE via /selinux/checkreqprot if authorized by policy. If you are unsure how to answer this question, answer 0. + +config SECURITY_SELINUX_SIDTAB_HASH_BITS + int "NSA SELinux sidtab hashtable size" + depends on SECURITY_SELINUX + range 8 13 + default 9 + help + This option sets the number of buckets used in the sidtab hashtable + to 2^SECURITY_SELINUX_SIDTAB_HASH_BITS buckets. The number of hash + collisions may be viewed at /sys/fs/selinux/ss/sidtab_hash_stats. If + chain lengths are high (e.g. > 20) then selecting a higher value here + will ensure that lookups times are short and stable. diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ba8eedf42b90..0b3f3cc0c6a7 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -390,5 +390,6 @@ extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern void avtab_cache_init(void); extern void ebitmap_cache_init(void); extern void hashtab_cache_init(void); +extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); #endif /* _SELINUX_SECURITY_H_ */ diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 4be683eeba01..dba17ea51ec0 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1488,6 +1488,32 @@ static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, return length; } +static ssize_t sel_read_sidtab_hash_stats(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; + char *page; + ssize_t length; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + length = security_sidtab_hash_stats(state, page); + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, page, + length); + free_page((unsigned long)page); + + return length; +} + +static const struct file_operations sel_sidtab_hash_stats_ops = { + .read = sel_read_sidtab_hash_stats, + .llseek = generic_file_llseek, +}; + static const struct file_operations sel_avc_cache_threshold_ops = { .read = sel_read_avc_cache_threshold, .write = sel_write_avc_cache_threshold, @@ -1603,6 +1629,37 @@ static int sel_make_avc_files(struct dentry *dir) return 0; } +static int sel_make_ss_files(struct dentry *dir) +{ + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; + int i; + static struct tree_descr files[] = { + { "sidtab_hash_stats", &sel_sidtab_hash_stats_ops, S_IRUGO }, + }; + + for (i = 0; i < ARRAY_SIZE(files); i++) { + struct inode *inode; + struct dentry *dentry; + + dentry = d_alloc_name(dir, files[i].name); + if (!dentry) + return -ENOMEM; + + inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); + if (!inode) { + dput(dentry); + return -ENOMEM; + } + + inode->i_fop = files[i].ops; + inode->i_ino = ++fsi->last_ino; + d_add(dentry, inode); + } + + return 0; +} + static ssize_t sel_read_initcon(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -1957,6 +2014,14 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) } ret = sel_make_avc_files(dentry); + + dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err; + } + + ret = sel_make_ss_files(dentry); if (ret) goto err; diff --git a/security/selinux/ss/context.h b/security/selinux/ss/context.h index 2260c44a568c..69fd1943683c 100644 --- a/security/selinux/ss/context.h +++ b/security/selinux/ss/context.h @@ -31,6 +31,7 @@ struct context { u32 len; /* length of string in bytes */ struct mls_range range; char *str; /* string representation if context cannot be mapped. */ + u32 hash; /* a hash of the string representation */ }; static inline void mls_context_init(struct context *c) @@ -136,12 +137,13 @@ static inline int context_cpy(struct context *dst, struct context *src) kfree(dst->str); return rc; } + dst->hash = src->hash; return 0; } static inline void context_destroy(struct context *c) { - c->user = c->role = c->type = 0; + c->user = c->role = c->type = c->hash = 0; kfree(c->str); c->str = NULL; c->len = 0; @@ -150,6 +152,8 @@ static inline void context_destroy(struct context *c) static inline int context_cmp(struct context *c1, struct context *c2) { + if (c1->hash && c2->hash && (c1->hash != c2->hash)) + return 0; if (c1->len && c2->len) return (c1->len == c2->len && !strcmp(c1->str, c2->str)); if (c1->len || c2->len) @@ -160,5 +164,10 @@ static inline int context_cmp(struct context *c1, struct context *c2) mls_context_cmp(c1, c2)); } +static inline unsigned int context_compute_hash(const char *s) +{ + return full_name_hash(NULL, s, strlen(s)); +} + #endif /* _SS_CONTEXT_H_ */ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 7c8b7f365980..57e608f8a20c 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -922,6 +922,11 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) sidtab_destroy(s); goto out; } + rc = context_add_hash(p, &c->context[0]); + if (rc) { + sidtab_destroy(s); + goto out; + } rc = sidtab_set_initial(s, c->sid[0], &c->context[0]); if (rc) { diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 5a2a4954a760..c21b0cfe0de3 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1272,6 +1272,17 @@ static int context_struct_to_string(struct policydb *p, #include "initial_sid_to_string.h" +int security_sidtab_hash_stats(struct selinux_state *state, char *page) +{ + int rc; + + read_lock(&state->ss->policy_rwlock); + rc = sidtab_hash_stats(state->ss->sidtab, page); + read_unlock(&state->ss->policy_rwlock); + + return rc; +} + const char *security_get_initial_sid_context(u32 sid) { if (unlikely(sid > SECINITSID_NUM)) @@ -1440,6 +1451,42 @@ out: return rc; } +int context_add_hash(struct policydb *policydb, + struct context *context) +{ + int rc; + char *str; + int len; + + if (context->str) { + context->hash = context_compute_hash(context->str); + } else { + rc = context_struct_to_string(policydb, context, + &str, &len); + if (rc) + return rc; + context->hash = context_compute_hash(str); + kfree(str); + } + return 0; +} + +static int context_struct_to_sid(struct selinux_state *state, + struct context *context, u32 *sid) +{ + int rc; + struct sidtab *sidtab = state->ss->sidtab; + struct policydb *policydb = &state->ss->policydb; + + if (!context->hash) { + rc = context_add_hash(policydb, context); + if (rc) + return rc; + } + + return sidtab_context_to_sid(sidtab, context, sid); +} + static int security_context_to_sid_core(struct selinux_state *state, const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags, @@ -1492,7 +1539,7 @@ static int security_context_to_sid_core(struct selinux_state *state, str = NULL; } else if (rc) goto out_unlock; - rc = sidtab_context_to_sid(sidtab, &context, sid); + rc = context_struct_to_sid(state, &context, sid); context_destroy(&context); out_unlock: read_unlock(&state->ss->policy_rwlock); @@ -1793,7 +1840,7 @@ static int security_compute_sid(struct selinux_state *state, goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); + rc = context_struct_to_sid(state, &newcontext, out_sid); out_unlock: read_unlock(&state->ss->policy_rwlock); context_destroy(&newcontext); @@ -1945,6 +1992,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p) context_init(newc); newc->str = s; newc->len = oldc->len; + newc->hash = oldc->hash; return 0; } kfree(s); @@ -2021,6 +2069,10 @@ static int convert_context(struct context *oldc, struct context *newc, void *p) goto bad; } + rc = context_add_hash(args->newp, newc); + if (rc) + goto bad; + return 0; bad: /* Map old representation to string and save it. */ @@ -2030,6 +2082,7 @@ bad: context_destroy(newc); newc->str = s; newc->len = len; + newc->hash = context_compute_hash(s); pr_info("SELinux: Context %s became invalid (unmapped).\n", newc->str); return 0; @@ -2268,8 +2321,7 @@ int security_port_sid(struct selinux_state *state, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; @@ -2294,14 +2346,12 @@ int security_ib_pkey_sid(struct selinux_state *state, u64 subnet_prefix, u16 pkey_num, u32 *out_sid) { struct policydb *policydb; - struct sidtab *sidtab; struct ocontext *c; int rc = 0; read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = state->ss->sidtab; c = policydb->ocontexts[OCON_IBPKEY]; while (c) { @@ -2315,7 +2365,7 @@ int security_ib_pkey_sid(struct selinux_state *state, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) @@ -2362,8 +2412,7 @@ int security_ib_endport_sid(struct selinux_state *state, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; @@ -2404,13 +2453,11 @@ int security_netif_sid(struct selinux_state *state, if (c) { if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); + rc = context_struct_to_sid(state, &c->context[0], + &c->sid[0]); if (rc) goto out; - rc = sidtab_context_to_sid(sidtab, - &c->context[1], + rc = context_struct_to_sid(state, &c->context[1], &c->sid[1]); if (rc) goto out; @@ -2451,14 +2498,12 @@ int security_node_sid(struct selinux_state *state, u32 *out_sid) { struct policydb *policydb; - struct sidtab *sidtab; int rc; struct ocontext *c; read_lock(&state->ss->policy_rwlock); policydb = &state->ss->policydb; - sidtab = state->ss->sidtab; switch (domain) { case AF_INET: { @@ -2500,7 +2545,7 @@ int security_node_sid(struct selinux_state *state, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) @@ -2584,12 +2629,17 @@ int security_get_user_sids(struct selinux_state *state, usercon.role = i + 1; ebitmap_for_each_positive_bit(&role->types, tnode, j) { usercon.type = j + 1; + /* + * The same context struct is reused here so the hash + * must be reset. + */ + usercon.hash = 0; if (mls_setup_user_range(policydb, fromcon, user, &usercon)) continue; - rc = sidtab_context_to_sid(sidtab, &usercon, &sid); + rc = context_struct_to_sid(state, &usercon, &sid); if (rc) goto out_unlock; if (mynel < maxnel) { @@ -2660,7 +2710,6 @@ static inline int __security_genfs_sid(struct selinux_state *state, u32 *sid) { struct policydb *policydb = &state->ss->policydb; - struct sidtab *sidtab = state->ss->sidtab; int len; u16 sclass; struct genfs *genfs; @@ -2695,7 +2744,7 @@ static inline int __security_genfs_sid(struct selinux_state *state, goto out; if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; } @@ -2758,7 +2807,7 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb) if (c) { sbsec->behavior = c->v.behavior; if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; @@ -3001,8 +3050,7 @@ int security_sid_mls_copy(struct selinux_state *state, goto out_unlock; } } - - rc = sidtab_context_to_sid(sidtab, &newcon, new_sid); + rc = context_struct_to_sid(state, &newcon, new_sid); out_unlock: read_unlock(&state->ss->policy_rwlock); context_destroy(&newcon); @@ -3590,7 +3638,7 @@ int security_netlbl_secattr_to_sid(struct selinux_state *state, if (!mls_context_isvalid(policydb, &ctx_new)) goto out_free; - rc = sidtab_context_to_sid(sidtab, &ctx_new, sid); + rc = context_struct_to_sid(state, &ctx_new, sid); if (rc) goto out_free; diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h index 9a36de860368..fc40640a9725 100644 --- a/security/selinux/ss/services.h +++ b/security/selinux/ss/services.h @@ -8,7 +8,7 @@ #define _SS_SERVICES_H_ #include "policydb.h" -#include "sidtab.h" +#include "context.h" /* Mapping for a single class */ struct selinux_mapping { @@ -39,4 +39,6 @@ void services_compute_xperms_drivers(struct extended_perms *xperms, void services_compute_xperms_decision(struct extended_perms_decision *xpermd, struct avtab_node *node); +int context_add_hash(struct policydb *policydb, struct context *context); + #endif /* _SS_SERVICES_H_ */ diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 7d49994e8d5f..d9d8599e8e63 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -17,26 +17,43 @@ #include "security.h" #include "sidtab.h" +#define index_to_sid(index) (index + SECINITSID_NUM + 1) +#define sid_to_index(sid) (sid - (SECINITSID_NUM + 1)) + int sidtab_init(struct sidtab *s) { u32 i; memset(s->roots, 0, sizeof(s->roots)); - /* max count is SIDTAB_MAX so valid index is always < SIDTAB_MAX */ - for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) - s->rcache[i] = SIDTAB_MAX; - for (i = 0; i < SECINITSID_NUM; i++) s->isids[i].set = 0; s->count = 0; s->convert = NULL; + hash_init(s->context_to_sid); spin_lock_init(&s->lock); return 0; } +static u32 context_to_sid(struct sidtab *s, struct context *context) +{ + struct sidtab_entry_leaf *entry; + u32 sid = 0; + + rcu_read_lock(); + hash_for_each_possible_rcu(s->context_to_sid, entry, list, + context->hash) { + if (context_cmp(&entry->context, context)) { + sid = entry->sid; + break; + } + } + rcu_read_unlock(); + return sid; +} + int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) { struct sidtab_isid_entry *entry; @@ -47,14 +64,60 @@ int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) entry = &s->isids[sid - 1]; - rc = context_cpy(&entry->context, context); + rc = context_cpy(&entry->leaf.context, context); if (rc) return rc; entry->set = 1; + + /* + * Multiple initial sids may map to the same context. Check that this + * context is not already represented in the context_to_sid hashtable + * to avoid duplicate entries and long linked lists upon hash + * collision. + */ + if (!context_to_sid(s, context)) { + entry->leaf.sid = sid; + hash_add(s->context_to_sid, &entry->leaf.list, context->hash); + } + return 0; } +int sidtab_hash_stats(struct sidtab *sidtab, char *page) +{ + int i; + int chain_len = 0; + int slots_used = 0; + int entries = 0; + int max_chain_len = 0; + int cur_bucket = 0; + struct sidtab_entry_leaf *entry; + + rcu_read_lock(); + hash_for_each_rcu(sidtab->context_to_sid, i, entry, list) { + entries++; + if (i == cur_bucket) { + chain_len++; + if (chain_len == 1) + slots_used++; + } else { + cur_bucket = i; + if (chain_len > max_chain_len) + max_chain_len = chain_len; + chain_len = 0; + } + } + rcu_read_unlock(); + + if (chain_len > max_chain_len) + max_chain_len = chain_len; + + return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" + "longest chain: %d\n", entries, + slots_used, SIDTAB_HASH_BUCKETS, max_chain_len); +} + static u32 sidtab_level_from_count(u32 count) { u32 capacity = SIDTAB_LEAF_ENTRIES; @@ -88,7 +151,8 @@ static int sidtab_alloc_roots(struct sidtab *s, u32 level) return 0; } -static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc) +static struct sidtab_entry_leaf *sidtab_do_lookup(struct sidtab *s, u32 index, + int alloc) { union sidtab_entry_inner *entry; u32 level, capacity_shift, leaf_index = index / SIDTAB_LEAF_ENTRIES; @@ -125,7 +189,7 @@ static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc) if (!entry->ptr_leaf) return NULL; } - return &entry->ptr_leaf->entries[index % SIDTAB_LEAF_ENTRIES].context; + return &entry->ptr_leaf->entries[index % SIDTAB_LEAF_ENTRIES]; } static struct context *sidtab_lookup(struct sidtab *s, u32 index) @@ -136,12 +200,12 @@ static struct context *sidtab_lookup(struct sidtab *s, u32 index) if (index >= count) return NULL; - return sidtab_do_lookup(s, index, 0); + return &sidtab_do_lookup(s, index, 0)->context; } static struct context *sidtab_lookup_initial(struct sidtab *s, u32 sid) { - return s->isids[sid - 1].set ? &s->isids[sid - 1].context : NULL; + return s->isids[sid - 1].set ? &s->isids[sid - 1].leaf.context : NULL; } static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) @@ -150,7 +214,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) if (sid != 0) { if (sid > SECINITSID_NUM) - context = sidtab_lookup(s, sid - (SECINITSID_NUM + 1)); + context = sidtab_lookup(s, sid_to_index(sid)); else context = sidtab_lookup_initial(s, sid); if (context && (!context->len || force)) @@ -170,117 +234,30 @@ struct context *sidtab_search_force(struct sidtab *s, u32 sid) return sidtab_search_core(s, sid, 1); } -static int sidtab_find_context(union sidtab_entry_inner entry, - u32 *pos, u32 count, u32 level, - struct context *context, u32 *index) -{ - int rc; - u32 i; - - if (level != 0) { - struct sidtab_node_inner *node = entry.ptr_inner; - - i = 0; - while (i < SIDTAB_INNER_ENTRIES && *pos < count) { - rc = sidtab_find_context(node->entries[i], - pos, count, level - 1, - context, index); - if (rc == 0) - return 0; - i++; - } - } else { - struct sidtab_node_leaf *node = entry.ptr_leaf; - - i = 0; - while (i < SIDTAB_LEAF_ENTRIES && *pos < count) { - if (context_cmp(&node->entries[i].context, context)) { - *index = *pos; - return 0; - } - (*pos)++; - i++; - } - } - return -ENOENT; -} - -static void sidtab_rcache_update(struct sidtab *s, u32 index, u32 pos) -{ - while (pos > 0) { - WRITE_ONCE(s->rcache[pos], READ_ONCE(s->rcache[pos - 1])); - --pos; - } - WRITE_ONCE(s->rcache[0], index); -} - -static void sidtab_rcache_push(struct sidtab *s, u32 index) -{ - sidtab_rcache_update(s, index, SIDTAB_RCACHE_SIZE - 1); -} - -static int sidtab_rcache_search(struct sidtab *s, struct context *context, - u32 *index) -{ - u32 i; - - for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) { - u32 v = READ_ONCE(s->rcache[i]); - - if (v >= SIDTAB_MAX) - continue; - - if (context_cmp(sidtab_do_lookup(s, v, 0), context)) { - sidtab_rcache_update(s, v, i); - *index = v; - return 0; - } - } - return -ENOENT; -} - -static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, - u32 *index) +int sidtab_context_to_sid(struct sidtab *s, struct context *context, + u32 *sid) { unsigned long flags; - u32 count, count_locked, level, pos; + u32 count; struct sidtab_convert_params *convert; - struct context *dst, *dst_convert; + struct sidtab_entry_leaf *dst, *dst_convert; int rc; - rc = sidtab_rcache_search(s, context, index); - if (rc == 0) + *sid = context_to_sid(s, context); + if (*sid) return 0; - /* read entries only after reading count */ - count = smp_load_acquire(&s->count); - level = sidtab_level_from_count(count); - - pos = 0; - rc = sidtab_find_context(s->roots[level], &pos, count, level, - context, index); - if (rc == 0) { - sidtab_rcache_push(s, *index); - return 0; - } - /* lock-free search failed: lock, re-search, and insert if not found */ spin_lock_irqsave(&s->lock, flags); - convert = s->convert; - count_locked = s->count; - level = sidtab_level_from_count(count_locked); + rc = 0; + *sid = context_to_sid(s, context); + if (*sid) + goto out_unlock; - /* if count has changed before we acquired the lock, then catch up */ - while (count < count_locked) { - if (context_cmp(sidtab_do_lookup(s, count, 0), context)) { - sidtab_rcache_push(s, count); - *index = count; - rc = 0; - goto out_unlock; - } - ++count; - } + /* read entries only after reading count */ + count = smp_load_acquire(&s->count); + convert = s->convert; /* bail out if we already reached max entries */ rc = -EOVERFLOW; @@ -293,7 +270,9 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, if (!dst) goto out_unlock; - rc = context_cpy(dst, context); + dst->sid = index_to_sid(count); + + rc = context_cpy(&dst->context, context); if (rc) goto out_unlock; @@ -305,29 +284,32 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context, rc = -ENOMEM; dst_convert = sidtab_do_lookup(convert->target, count, 1); if (!dst_convert) { - context_destroy(dst); + context_destroy(&dst->context); goto out_unlock; } - rc = convert->func(context, dst_convert, convert->args); + rc = convert->func(context, &dst_convert->context, + convert->args); if (rc) { - context_destroy(dst); + context_destroy(&dst->context); goto out_unlock; } - - /* at this point we know the insert won't fail */ + dst_convert->sid = index_to_sid(count); convert->target->count = count + 1; + + hash_add_rcu(convert->target->context_to_sid, + &dst_convert->list, dst_convert->context.hash); } if (context->len) pr_info("SELinux: Context %s is not valid (left unmapped).\n", context->str); - sidtab_rcache_push(s, count); - *index = count; + *sid = index_to_sid(count); - /* write entries before writing new count */ + /* write entries before updating count */ smp_store_release(&s->count, count + 1); + hash_add_rcu(s->context_to_sid, &dst->list, dst->context.hash); rc = 0; out_unlock: @@ -335,25 +317,19 @@ out_unlock: return rc; } -int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid) +static void sidtab_convert_hashtable(struct sidtab *s, u32 count) { - int rc; + struct sidtab_entry_leaf *entry; u32 i; - for (i = 0; i < SECINITSID_NUM; i++) { - struct sidtab_isid_entry *entry = &s->isids[i]; + for (i = 0; i < count; i++) { + entry = sidtab_do_lookup(s, i, 0); + entry->sid = index_to_sid(i); + + hash_add_rcu(s->context_to_sid, &entry->list, + entry->context.hash); - if (entry->set && context_cmp(context, &entry->context)) { - *sid = i + 1; - return 0; - } } - - rc = sidtab_reverse_lookup(s, context, sid); - if (rc) - return rc; - *sid += SECINITSID_NUM + 1; - return 0; } static int sidtab_convert_tree(union sidtab_entry_inner *edst, @@ -400,6 +376,7 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst, } cond_resched(); } + return 0; } @@ -435,7 +412,7 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) /* enable live convert of new entries */ s->convert = params; - /* we can safely do the rest of the conversion outside the lock */ + /* we can safely convert the tree outside the lock */ spin_unlock_irqrestore(&s->lock, flags); pr_info("SELinux: Converting %u SID table entries...\n", count); @@ -449,8 +426,17 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) spin_lock_irqsave(&s->lock, flags); s->convert = NULL; spin_unlock_irqrestore(&s->lock, flags); + return rc; } - return rc; + /* + * The hashtable can also be modified in sidtab_context_to_sid() + * so we must re-acquire the lock here. + */ + spin_lock_irqsave(&s->lock, flags); + sidtab_convert_hashtable(params->target, count); + spin_unlock_irqrestore(&s->lock, flags); + + return 0; } static void sidtab_destroy_tree(union sidtab_entry_inner entry, u32 level) @@ -484,11 +470,16 @@ void sidtab_destroy(struct sidtab *s) for (i = 0; i < SECINITSID_NUM; i++) if (s->isids[i].set) - context_destroy(&s->isids[i].context); + context_destroy(&s->isids[i].leaf.context); level = SIDTAB_MAX_LEVEL; while (level && !s->roots[level].ptr_inner) --level; sidtab_destroy_tree(s->roots[level], level); + /* + * The context_to_sid hashtable's objects are all shared + * with the isids array and context tree, and so don't need + * to be cleaned up here. + */ } diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index 5bf9898a858a..8564ec0f3599 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -13,12 +13,15 @@ #include #include +#include #include "context.h" #include "flask.h" struct sidtab_entry_leaf { + u32 sid; struct context context; + struct hlist_node list; }; struct sidtab_node_inner; @@ -58,7 +61,7 @@ struct sidtab_node_inner { struct sidtab_isid_entry { int set; - struct context context; + struct sidtab_entry_leaf leaf; }; struct sidtab_convert_params { @@ -67,7 +70,8 @@ struct sidtab_convert_params { struct sidtab *target; }; -#define SIDTAB_RCACHE_SIZE 3 +#define SIDTAB_HASH_BITS CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS +#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS) struct sidtab { /* @@ -84,11 +88,11 @@ struct sidtab { struct sidtab_convert_params *convert; spinlock_t lock; - /* reverse lookup cache - access atomically via {READ|WRITE}_ONCE() */ - u32 rcache[SIDTAB_RCACHE_SIZE]; - /* index == SID - 1 (no entry for SECSID_NULL) */ struct sidtab_isid_entry isids[SECINITSID_NUM]; + + /* Hash table for fast reverse context-to-sid lookups. */ + DECLARE_HASHTABLE(context_to_sid, SIDTAB_HASH_BITS); }; int sidtab_init(struct sidtab *s); @@ -102,6 +106,8 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid); void sidtab_destroy(struct sidtab *s); +int sidtab_hash_stats(struct sidtab *sidtab, char *page); + #endif /* _SS_SIDTAB_H_ */ From a123233fc320ea72c048d41d348b568de1ece023 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Nov 2019 15:15:26 -0800 Subject: [PATCH 2053/3715] nvme_fc: add module to ops template to allow module references [ Upstream commit 863fbae929c7a5b64e96b8a3ffb34a29eefb9f8f ] In nvme-fc: it's possible to have connected active controllers and as no references are taken on the LLDD, the LLDD can be unloaded. The controller would enter a reconnect state and as long as the LLDD resumed within the reconnect timeout, the controller would resume. But if a namespace on the controller is the root device, allowing the driver to unload can be problematic. To reload the driver, it may require new io to the boot device, and as it's no longer connected we get into a catch-22 that eventually fails, and the system locks up. Fix this issue by taking a module reference for every connected controller (which is what the core layer did to the transport module). Reference is cleared when the controller is removed. Acked-by: Himanshu Madhani Reviewed-by: Christoph Hellwig Signed-off-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 14 ++++++++++++-- drivers/nvme/target/fcloop.c | 1 + drivers/scsi/lpfc/lpfc_nvme.c | 2 ++ drivers/scsi/qla2xxx/qla_nvme.c | 1 + include/linux/nvme-fc-driver.h | 4 ++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 058d542647dd..9e4d2ecf736d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -337,7 +337,8 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary) { + !template->max_dif_sgl_segments || !template->dma_boundary || + !template->module) { ret = -EINVAL; goto out_reghost_failed; } @@ -1762,6 +1763,7 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); + struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -1787,6 +1789,7 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); + module_put(lport->ops->module); } static void @@ -2765,10 +2768,15 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } + if (!try_module_get(lport->ops->module)) { + ret = -EUNATCH; + goto out_free_ctrl; + } + idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_free_ctrl; + goto out_mod_put; } ctrl->ctrl.opts = opts; @@ -2915,6 +2923,8 @@ out_free_queues: out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); +out_mod_put: + module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 096523d8dd42..b8fe8702065b 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -693,6 +693,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { + .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index fcf4b4175d77..af937b91765e 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1591,6 +1591,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { + .module = THIS_MODULE, + /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 6b33a1f24f56..7dceed021236 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -578,6 +578,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { + .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index a726f96010d5..e9c3b98df3e2 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -279,6 +279,8 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * + * @module: The LLDD module using the interface + * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -392,6 +394,8 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { + struct module *module; + /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); From eb9723e635d9d3cd5ea76d599933de710b9251c2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 2 Dec 2019 09:55:46 +0100 Subject: [PATCH 2054/3715] iio: adc: max9611: Fix too short conversion time delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9fd229c478fbf77c41c8528aa757ef14210365f6 ] As of commit b9ddd5091160793e ("iio: adc: max9611: Fix temperature reading in probe"), max9611 initialization sometimes fails on the Salvator-X(S) development board with: max9611 4-007f: Invalid value received from ADC 0x8000: aborting max9611: probe of 4-007f failed with error -5 The max9611 driver tests communications with the chip by reading the die temperature during the probe function, which returns an invalid value. According to the datasheet, the typical ADC conversion time is 2 ms, but no minimum or maximum values are provided. Maxim Technical Support confirmed this was tested with temperature Ta=25 degreeC, and promised to inform me if a maximum/minimum value is available (they didn't get back to me, so I assume it is not). However, the driver assumes a 1 ms conversion time. Usually the usleep_range() call returns after more than 1.8 ms, hence it succeeds. When it returns earlier, the data register may be read too early, and the previous measurement value will be returned. After boot, this is the temperature POR (power-on reset) value, causing the failure above. Fix this by increasing the delay from 1000-2000 µs to 3000-3300 µs. Note that this issue has always been present, but it was exposed by the aformentioned commit. Fixes: 69780a3bbc0b1e7e ("iio: adc: Add Maxim max9611 ADC driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Jacopo Mondi Reviewed-by: Wolfram Sang Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/max9611.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index 33be07c78b96..8649a61c50bc 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -92,6 +92,12 @@ #define MAX9611_TEMP_SCALE_NUM 1000000 #define MAX9611_TEMP_SCALE_DIV 2083 +/* + * Conversion time is 2 ms (typically) at Ta=25 degreeC + * No maximum value is known, so play it safe. + */ +#define MAX9611_CONV_TIME_US_RANGE 3000, 3300 + struct max9611_dev { struct device *dev; struct i2c_client *i2c_client; @@ -239,11 +245,9 @@ static int max9611_read_single(struct max9611_dev *max9611, return ret; } - /* - * need a delay here to make register configuration - * stabilize. 1 msec at least, from empirical testing. - */ - usleep_range(1000, 2000); + /* need a delay here to make register configuration stabilize. */ + + usleep_range(MAX9611_CONV_TIME_US_RANGE); ret = i2c_smbus_read_word_swapped(max9611->i2c_client, reg_addr); if (ret < 0) { @@ -511,7 +515,7 @@ static int max9611_init(struct max9611_dev *max9611) MAX9611_REG_CTRL2, 0); return ret; } - usleep_range(1000, 2000); + usleep_range(MAX9611_CONV_TIME_US_RANGE); return 0; } From b5f565ba27cbca21432d2cb8e2a2da905715ab7e Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 14 Nov 2019 01:21:31 +0200 Subject: [PATCH 2055/3715] PM / devfreq: Don't fail devfreq_dev_release if not in list [ Upstream commit 42a6b25e67df6ee6675e8d1eaf18065bd73328ba ] Right now devfreq_dev_release will print a warning and abort the rest of the cleanup if the devfreq instance is not part of the global devfreq_list. But this is a valid scenario, for example it can happen if the governor can't be found or on any other init error that happens after device_register. Initialize devfreq->node to an empty list head in devfreq_add_device so that list_del becomes a safe noop inside devfreq_dev_release and we can continue the rest of the cleanup. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/devfreq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index dc9c0032c97b..7b510ef1d0dd 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -484,11 +484,6 @@ static void devfreq_dev_release(struct device *dev) struct devfreq *devfreq = to_devfreq(dev); mutex_lock(&devfreq_list_lock); - if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) { - mutex_unlock(&devfreq_list_lock); - dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n"); - return; - } list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); @@ -547,6 +542,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.parent = dev; devfreq->dev.class = devfreq_class; devfreq->dev.release = devfreq_dev_release; + INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; From e47a078195d365e883a7dc36e0701552e383a47c Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 6 Dec 2019 09:24:26 +0800 Subject: [PATCH 2056/3715] RDMA/cma: add missed unregister_pernet_subsys in init failure [ Upstream commit 44a7b6759000ac51b92715579a7bba9e3f9245c2 ] The driver forgets to call unregister_pernet_subsys() in the error path of cma_init(). Add the missed call to fix it. Fixes: 4be74b42a6d0 ("IB/cma: Separate port allocation to network namespaces") Signed-off-by: Chuhong Yuan Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20191206012426.12744-1-hslester96@gmail.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f698c6a28c14..fc4630e4acdd 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4568,6 +4568,7 @@ err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; From 5083f6c6acb213b8319a61de2e33402887a067dc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 2 Dec 2019 20:03:20 -0600 Subject: [PATCH 2057/3715] rxe: correctly calculate iCRC for unaligned payloads [ Upstream commit 2030abddec6884aaf5892f5724c48fc340e6826f ] If RoCE PDUs being sent or received contain pad bytes, then the iCRC is miscalculated, resulting in PDUs being emitted by RXE with an incorrect iCRC, as well as ingress PDUs being dropped due to erroneously detecting a bad iCRC in the PDU. The fix is to include the pad bytes, if any, in iCRC computations. Note: This bug has caused broken on-the-wire compatibility with actual hardware RoCE devices since the soft-RoCE driver was first put into the mainstream kernel. Fixing it will create an incompatibility with the original soft-RoCE devices, but is necessary to be compatible with real hardware devices. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Steve Wise Link: https://lore.kernel.org/r/20191203020319.15036-2-larrystevenwise@gmail.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- drivers/infiniband/sw/rxe/rxe_req.c | 6 ++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 83412df726a5..b7098f7bb30e 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -393,7 +393,7 @@ int rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 9fd4f04df3b3..e6785b1ea85f 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 9207682b7a2e..a07a29b48863 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -738,6 +738,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; From ed07b2358eb0ab1648c005bf3bd77091333a2bd3 Mon Sep 17 00:00:00 2001 From: Bo Wu Date: Sat, 7 Dec 2019 03:22:46 +0000 Subject: [PATCH 2058/3715] scsi: lpfc: Fix memory leak on lpfc_bsg_write_ebuf_set func [ Upstream commit 9a1b0b9a6dab452fb0e39fe96880c4faf3878369 ] When phba->mbox_ext_buf_ctx.seqNum != phba->mbox_ext_buf_ctx.numBuf, dd_data should be freed before return SLI_CONFIG_HANDLED. When lpfc_sli_issue_mbox func return fails, pmboxq should be also freed in job_error tag. Link: https://lore.kernel.org/r/EDBAAA0BBBA2AC4E9C8B6B81DEEE1D6915E7A966@DGGEML525-MBS.china.huawei.com Signed-off-by: Bo Wu Reviewed-by: Zhiqiang Liu Reviewed-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_bsg.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 6dde21dc82a3..08ed27b0d4c6 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -4419,12 +4419,6 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, phba->mbox_ext_buf_ctx.seqNum++; nemb_tp = phba->mbox_ext_buf_ctx.nembType; - dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); - if (!dd_data) { - rc = -ENOMEM; - goto job_error; - } - pbuf = (uint8_t *)dmabuf->virt; size = job->request_payload.payload_len; sg_copy_to_buffer(job->request_payload.sg_list, @@ -4461,6 +4455,13 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, "2968 SLI_CONFIG ext-buffer wr all %d " "ebuffers received\n", phba->mbox_ext_buf_ctx.numBuf); + + dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); + if (!dd_data) { + rc = -ENOMEM; + goto job_error; + } + /* mailbox command structure for base driver */ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmboxq) { @@ -4509,6 +4510,8 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, return SLI_CONFIG_HANDLED; job_error: + if (pmboxq) + mempool_free(pmboxq, phba->mbox_mem_pool); lpfc_bsg_dma_page_free(phba, dmabuf); kfree(dd_data); From 28725bba2be905b00fce2533ac6f48a9f8426df7 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:56 +0300 Subject: [PATCH 2059/3715] scsi: qla2xxx: Don't call qlt_async_event twice [ Upstream commit 2c2f4bed9b6299e6430a65a29b5d27b8763fdf25 ] MBA_PORT_UPDATE generates duplicate log lines in target mode because qlt_async_event is called twice. Drop the calls within the case as the function will be called right after the switch statement. Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-8-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_isr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index ebca1a470e9b..7f2da56274bd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1046,8 +1046,6 @@ global_port_update: ql_dbg(ql_dbg_async, vha, 0x5011, "Asynchronous PORT UPDATE ignored %04x/%04x/%04x.\n", mb[1], mb[2], mb[3]); - - qlt_async_event(mb[0], vha, mb); break; } @@ -1065,8 +1063,6 @@ global_port_update: set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); set_bit(VP_CONFIG_OK, &vha->vp_flags); - - qlt_async_event(mb[0], vha, mb); break; case MBA_RSCN_UPDATE: /* State Change Registration */ From 5fac6ee295213de276fb0da852d773af43c24d46 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Dec 2019 12:45:09 +0300 Subject: [PATCH 2060/3715] scsi: iscsi: qla4xxx: fix double free in probe [ Upstream commit fee92f25777789d73e1936b91472e9c4644457c8 ] On this error path we call qla4xxx_mem_free() and then the caller also calls qla4xxx_free_adapter() which calls qla4xxx_mem_free(). It leads to a couple double frees: drivers/scsi/qla4xxx/ql4_os.c:8856 qla4xxx_probe_adapter() warn: 'ha->chap_dma_pool' double freed drivers/scsi/qla4xxx/ql4_os.c:8856 qla4xxx_probe_adapter() warn: 'ha->fw_ddb_dma_pool' double freed Fixes: afaf5a2d341d ("[SCSI] Initial Commit of qla4xxx") Link: https://lore.kernel.org/r/20191203094421.hw7ex7qr3j2rbsmx@kili.mountain Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla4xxx/ql4_os.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4421f9bdfcf7..b0ad60565fe9 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4285,7 +4285,6 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha) return QLA_SUCCESS; mem_alloc_error_exit: - qla4xxx_mem_free(ha); return QLA_ERROR; } From 82df1d3fa11bc7b25789efa4232bf0c188c8bc72 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 6 Dec 2019 09:11:18 +0800 Subject: [PATCH 2061/3715] scsi: libsas: stop discovering if oob mode is disconnected [ Upstream commit f70267f379b5e5e11bdc5d72a56bf17e5feed01f ] The discovering of sas port is driven by workqueue in libsas. When libsas is processing port events or phy events in workqueue, new events may rise up and change the state of some structures such as asd_sas_phy. This may cause some problems such as follows: ==>thread 1 ==>thread 2 ==>phy up ==>phy_up_v3_hw() ==>oob_mode = SATA_OOB_MODE; ==>phy down quickly ==>hisi_sas_phy_down() ==>sas_ha->notify_phy_event() ==>sas_phy_disconnected() ==>oob_mode = OOB_NOT_CONNECTED ==>workqueue wakeup ==>sas_form_port() ==>sas_discover_domain() ==>sas_get_port_device() ==>oob_mode is OOB_NOT_CONNECTED and device is wrongly taken as expander This at last lead to the panic when libsas trying to issue a command to discover the device. [183047.614035] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [183047.622896] Mem abort info: [183047.625762] ESR = 0x96000004 [183047.628893] Exception class = DABT (current EL), IL = 32 bits [183047.634888] SET = 0, FnV = 0 [183047.638015] EA = 0, S1PTW = 0 [183047.641232] Data abort info: [183047.644189] ISV = 0, ISS = 0x00000004 [183047.648100] CM = 0, WnR = 0 [183047.651145] user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000b7df67be [183047.657834] [0000000000000058] pgd=0000000000000000 [183047.662789] Internal error: Oops: 96000004 [#1] SMP [183047.667740] Process kworker/u16:2 (pid: 31291, stack limit = 0x00000000417c4974) [183047.675208] CPU: 0 PID: 3291 Comm: kworker/u16:2 Tainted: G W OE 4.19.36-vhulk1907.1.0.h410.eulerosv2r8.aarch64 #1 [183047.687015] Hardware name: N/A N/A/Kunpeng Desktop Board D920S10, BIOS 0.15 10/22/2019 [183047.695007] Workqueue: 0000:74:02.0_disco_q sas_discover_domain [183047.700999] pstate: 20c00009 (nzCv daif +PAN +UAO) [183047.705864] pc : prep_ata_v3_hw+0xf8/0x230 [hisi_sas_v3_hw] [183047.711510] lr : prep_ata_v3_hw+0xb0/0x230 [hisi_sas_v3_hw] [183047.717153] sp : ffff00000f28ba60 [183047.720541] x29: ffff00000f28ba60 x28: ffff8026852d7228 [183047.725925] x27: ffff8027dba3e0a8 x26: ffff8027c05fc200 [183047.731310] x25: 0000000000000000 x24: ffff8026bafa8dc0 [183047.736695] x23: ffff8027c05fc218 x22: ffff8026852d7228 [183047.742079] x21: ffff80007c2f2940 x20: ffff8027c05fc200 [183047.747464] x19: 0000000000f80800 x18: 0000000000000010 [183047.752848] x17: 0000000000000000 x16: 0000000000000000 [183047.758232] x15: ffff000089a5a4ff x14: 0000000000000005 [183047.763617] x13: ffff000009a5a50e x12: ffff8026bafa1e20 [183047.769001] x11: ffff0000087453b8 x10: ffff00000f28b870 [183047.774385] x9 : 0000000000000000 x8 : ffff80007e58f9b0 [183047.779770] x7 : 0000000000000000 x6 : 000000000000003f [183047.785154] x5 : 0000000000000040 x4 : ffffffffffffffe0 [183047.790538] x3 : 00000000000000f8 x2 : 0000000002000007 [183047.795922] x1 : 0000000000000008 x0 : 0000000000000000 [183047.801307] Call trace: [183047.803827] prep_ata_v3_hw+0xf8/0x230 [hisi_sas_v3_hw] [183047.809127] hisi_sas_task_prep+0x750/0x888 [hisi_sas_main] [183047.814773] hisi_sas_task_exec.isra.7+0x88/0x1f0 [hisi_sas_main] [183047.820939] hisi_sas_queue_command+0x28/0x38 [hisi_sas_main] [183047.826757] smp_execute_task_sg+0xec/0x218 [183047.831013] smp_execute_task+0x74/0xa0 [183047.834921] sas_discover_expander.part.7+0x9c/0x5f8 [183047.839959] sas_discover_root_expander+0x90/0x160 [183047.844822] sas_discover_domain+0x1b8/0x1e8 [183047.849164] process_one_work+0x1b4/0x3f8 [183047.853246] worker_thread+0x54/0x470 [183047.856981] kthread+0x134/0x138 [183047.860283] ret_from_fork+0x10/0x18 [183047.863931] Code: f9407a80 528000e2 39409281 72a04002 (b9405800) [183047.870097] kernel fault(0x1) notification starting on CPU 0 [183047.875828] kernel fault(0x1) notification finished on CPU 0 [183047.881559] Modules linked in: unibsp(OE) hns3(OE) hclge(OE) hnae3(OE) mem_drv(OE) hisi_sas_v3_hw(OE) hisi_sas_main(OE) [183047.892418] ---[ end trace 4cc26083fc11b783 ]--- [183047.897107] Kernel panic - not syncing: Fatal exception [183047.902403] kernel fault(0x5) notification starting on CPU 0 [183047.908134] kernel fault(0x5) notification finished on CPU 0 [183047.913865] SMP: stopping secondary CPUs [183047.917861] Kernel Offset: disabled [183047.921422] CPU features: 0x2,a2a00a38 [183047.925243] Memory Limit: none [183047.928372] kernel reboot(0x2) notification starting on CPU 0 [183047.934190] kernel reboot(0x2) notification finished on CPU 0 [183047.940008] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Link: https://lore.kernel.org/r/20191206011118.46909-1-yanaijie@huawei.com Reported-by: Gao Chuan Reviewed-by: John Garry Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_discover.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 60de66252fa2..b200edc665a5 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -97,12 +97,21 @@ static int sas_get_port_device(struct asd_sas_port *port) else dev->dev_type = SAS_SATA_DEV; dev->tproto = SAS_PROTOCOL_SATA; - } else { + } else if (port->oob_mode == SAS_OOB_MODE) { struct sas_identify_frame *id = (struct sas_identify_frame *) dev->frame_rcvd; dev->dev_type = id->dev_type; dev->iproto = id->initiator_bits; dev->tproto = id->target_bits; + } else { + /* If the oob mode is OOB_NOT_CONNECTED, the port is + * disconnected due to race with PHY down. We cannot + * continue to discover this port + */ + sas_put_device(dev); + pr_warn("Port %016llx is disconnected when discovering\n", + SAS_ADDR(port->attached_sas_addr)); + return -ENODEV; } sas_init_dev(dev); From bca6d54dcc98b944bf5efb34e982a06f935b2099 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Oct 2019 10:52:52 +0200 Subject: [PATCH 2062/3715] drm/nouveau: Move the declaration of struct nouveau_conn_atom up a bit [ Upstream commit 37a68eab4cd92b507c9e8afd760fdc18e4fecac6 ] Place the declaration of struct nouveau_conn_atom above that of struct nouveau_connector. This commit makes no changes to the moved block what so ever, it just moves it up a bit. This is a preparation patch to fix some issues with connector handling on pre nv50 displays (which do not use atomic modesetting). Signed-off-by: Hans de Goede Reviewed-by: Lyude Paul Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_connector.h | 110 ++++++++++---------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index dc7454e7f19a..b46e99f7641e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -29,6 +29,7 @@ #include +#include #include #include #include @@ -37,6 +38,60 @@ struct nvkm_i2c_port; +#define nouveau_conn_atom(p) \ + container_of((p), struct nouveau_conn_atom, state) + +struct nouveau_conn_atom { + struct drm_connector_state state; + + struct { + /* The enum values specifically defined here match nv50/gf119 + * hw values, and the code relies on this. + */ + enum { + DITHERING_MODE_OFF = 0x00, + DITHERING_MODE_ON = 0x01, + DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, + DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, + DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, + DITHERING_MODE_AUTO + } mode; + enum { + DITHERING_DEPTH_6BPC = 0x00, + DITHERING_DEPTH_8BPC = 0x02, + DITHERING_DEPTH_AUTO + } depth; + } dither; + + struct { + int mode; /* DRM_MODE_SCALE_* */ + struct { + enum { + UNDERSCAN_OFF, + UNDERSCAN_ON, + UNDERSCAN_AUTO, + } mode; + u32 hborder; + u32 vborder; + } underscan; + bool full; + } scaler; + + struct { + int color_vibrance; + int vibrant_hue; + } procamp; + + union { + struct { + bool dither:1; + bool scaler:1; + bool procamp:1; + }; + u8 mask; + } set; +}; + struct nouveau_connector { struct drm_connector base; enum dcb_connector_type type; @@ -111,61 +166,6 @@ extern int nouveau_ignorelid; extern int nouveau_duallink; extern int nouveau_hdmimhz; -#include -#define nouveau_conn_atom(p) \ - container_of((p), struct nouveau_conn_atom, state) - -struct nouveau_conn_atom { - struct drm_connector_state state; - - struct { - /* The enum values specifically defined here match nv50/gf119 - * hw values, and the code relies on this. - */ - enum { - DITHERING_MODE_OFF = 0x00, - DITHERING_MODE_ON = 0x01, - DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, - DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, - DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, - DITHERING_MODE_AUTO - } mode; - enum { - DITHERING_DEPTH_6BPC = 0x00, - DITHERING_DEPTH_8BPC = 0x02, - DITHERING_DEPTH_AUTO - } depth; - } dither; - - struct { - int mode; /* DRM_MODE_SCALE_* */ - struct { - enum { - UNDERSCAN_OFF, - UNDERSCAN_ON, - UNDERSCAN_AUTO, - } mode; - u32 hborder; - u32 vborder; - } underscan; - bool full; - } scaler; - - struct { - int color_vibrance; - int vibrant_hue; - } procamp; - - union { - struct { - bool dither:1; - bool scaler:1; - bool procamp:1; - }; - u8 mask; - } set; -}; - void nouveau_conn_attach_properties(struct drm_connector *); void nouveau_conn_reset(struct drm_connector *); struct drm_connector_state * From fbff45d2413eb11a0534b3faf9b00636b21337c3 Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Tue, 3 Dec 2019 23:34:56 -0800 Subject: [PATCH 2063/3715] usb: gadget: fix wrong endpoint desc [ Upstream commit e5b5da96da50ef30abb39cb9f694e99366404d24 ] Gadget driver should always use config_ep_by_speed() to initialize usb_ep struct according to usb device's operating speed. Otherwise, usb_ep struct may be wrong if usb devcie's operating speed is changed. The key point in this patch is that we want to make sure the desc pointer in usb_ep struct will be set to NULL when gadget is disconnected. This will force it to call config_ep_by_speed() to correctly initialize usb_ep struct based on the new operating speed when gadget is re-connected later. Reviewed-by: Peter Chen Signed-off-by: EJ Hsu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_ecm.c | 6 +++++- drivers/usb/gadget/function/f_rndis.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 4c488d15b6f6..dc99ed94f03d 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -625,8 +625,12 @@ static void ecm_disable(struct usb_function *f) DBG(cdev, "ecm deactivated\n"); - if (ecm->port.in_ep->enabled) + if (ecm->port.in_ep->enabled) { gether_disconnect(&ecm->port); + } else { + ecm->port.in_ep->desc = NULL; + ecm->port.out_ep->desc = NULL; + } usb_ep_disable(ecm->notify); ecm->notify->desc = NULL; diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index c7c5b3ce1d98..2bde68f5d246 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -622,6 +622,7 @@ static void rndis_disable(struct usb_function *f) gether_disconnect(&rndis->port); usb_ep_disable(rndis->notify); + rndis->notify->desc = NULL; } /*-------------------------------------------------------------------------*/ From f42504ab0aec92b757468bb3cf4fa345d6ff822d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Dec 2019 20:58:56 -0700 Subject: [PATCH 2064/3715] net: make socket read/write_iter() honor IOCB_NOWAIT [ Upstream commit ebfcd8955c0b52eb793bcbc9e71140e3d0cdb228 ] The socket read/write helpers only look at the file O_NONBLOCK. not the iocb IOCB_NOWAIT flag. This breaks users like preadv2/pwritev2 and io_uring that rely on not having the file itself marked nonblocking, but rather the iocb itself. Cc: netdev@vger.kernel.org Acked-by: David Miller Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- net/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index aab65277314d..5b134a6b6216 100644 --- a/net/socket.c +++ b/net/socket.c @@ -891,7 +891,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) .msg_iocb = iocb}; ssize_t res; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (iocb->ki_pos != 0) @@ -916,7 +916,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos != 0) return -ESPIPE; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (sock->type == SOCK_SEQPACKET) From 34ab847d906944ecd1976ad6f49870563bb0b322 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Tue, 10 Dec 2019 10:42:25 +0800 Subject: [PATCH 2065/3715] md: raid1: check rdev before reference in raid1_sync_request func [ Upstream commit 028288df635f5a9addd48ac4677b720192747944 ] In raid1_sync_request func, rdev should be checked before reference. Signed-off-by: Zhiqiang Liu Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f46ac9db9edb..0a9d623b13c2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2749,7 +2749,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, write_targets++; } } - if (bio->bi_end_io) { + if (rdev && bio->bi_end_io) { atomic_inc(&rdev->nr_pending); bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; bio_set_dev(bio, rdev->bdev); From 42c5538af3c01b79f1f06f147172a1a295376b2f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 28 Nov 2019 10:26:41 +0100 Subject: [PATCH 2066/3715] s390/cpum_sf: Adjust sampling interval to avoid hitting sample limits [ Upstream commit 39d4a501a9ef55c57b51e3ef07fc2aeed7f30b3b ] Function perf_event_ever_overflow() and perf_event_account_interrupt() are called every time samples are processed by the interrupt handler. However function perf_event_account_interrupt() has checks to avoid being flooded with interrupts (more then 1000 samples are received per task_tick). Samples are then dropped and a PERF_RECORD_THROTTLED is added to the perf data. The perf subsystem limit calculation is: maximum sample frequency := 100000 --> 1 samples per 10 us task_tick = 10ms = 10000us --> 1000 samples per task_tick The work flow is measurement_alert() uses SDBT head and each SBDT points to 511 SDB pages, each with 126 sample entries. After processing 8 SBDs and for each valid sample calling: perf_event_overflow() perf_event_account_interrupts() there is a considerable amount of samples being dropped, especially when the sample frequency is very high and near the 100000 limit. To avoid the high amount of samples being dropped near the end of a task_tick time frame, increment the sampling interval in case of dropped events. The CPU Measurement sampling facility on the s390 supports only intervals, specifiing how many CPU cycles have to be executed before a sample is generated. Increase the interval when the samples being generated hit the task_tick limit. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 45304085b6ee..95c047bf4a12 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1306,6 +1306,22 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", From 29f57767bfc3ab9b3c6f6638692ebc5158a46ead Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 29 Nov 2019 15:24:25 +0100 Subject: [PATCH 2067/3715] s390/cpum_sf: Avoid SBD overflow condition in irq handler [ Upstream commit 0539ad0b22877225095d8adef0c376f52cc23834 ] The s390 CPU Measurement sampling facility has an overflow condition which fires when all entries in a SBD are used. The measurement alert interrupt is triggered and reads out all samples in this SDB. It then tests the successor SDB, if this SBD is not full, the interrupt handler does not read any samples at all from this SDB The design waits for the hardware to fill this SBD and then trigger another meassurement alert interrupt. This scheme works nicely until an perf_event_overflow() function call discards the sample due to a too high sampling rate. The interrupt handler has logic to read out a partially filled SDB when the perf event overflow condition in linux common code is met. This causes the CPUM sampling measurement hardware and the PMU device driver to operate on the same SBD's trailer entry. This should not happen. This can be seen here using this trace: cpumsf_pmu_add: tear:0xb5286000 hw_perf_event_update: sdbt 0xb5286000 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 1. interrupt hw_perf_event_update: sdbt 0xb5286008 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 2. interrupt ... this goes on fine until... hw_perf_event_update: sdbt 0xb5286068 full 1 over 0 flush_all:0 perf_push_sample1: overflow one or more samples read from the IRQ handler are rejected by perf_event_overflow() and the IRQ handler advances to the next SDB and modifies the trailer entry of a partially filled SDB. hw_perf_event_update: sdbt 0xb5286070 full 0 over 0 flush_all:1 timestamp: 14:32:52.519953 Next time the IRQ handler is called for this SDB the trailer entry shows an overflow count of 19 missed entries. hw_perf_event_update: sdbt 0xb5286070 full 1 over 19 flush_all:1 timestamp: 14:32:52.970058 Remove access to a follow on SDB when event overflow happened. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 95c047bf4a12..b652593d7de6 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1294,12 +1294,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ From 556f40b79de5e23fba28f5e35831d1526df52a26 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 11:12:13 +0200 Subject: [PATCH 2068/3715] IB/mlx4: Follow mirror sequence of device add during device removal [ Upstream commit 89f988d93c62384758b19323c886db917a80c371 ] Current code device add sequence is: ib_register_device() ib_mad_init() init_sriov_init() register_netdev_notifier() Therefore, the remove sequence should be, unregister_netdev_notifier() close_sriov() mad_cleanup() ib_unregister_device() However it is not above. Hence, make do above remove sequence. Fixes: fa417f7b520ee ("IB/mlx4: Add support for IBoE") Signed-off-by: Parav Pandit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-3-leon@kernel.org Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0299c0642de8..7e73a1a6cb67 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3073,16 +3073,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->ib_active = false; flush_workqueue(wq); - mlx4_ib_close_sriov(ibdev); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); - mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + mlx4_ib_close_sriov(ibdev); + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); From 0ce254bc68edf06f93d3c0271851c619ff729d31 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 10 Dec 2019 14:53:05 +0000 Subject: [PATCH 2069/3715] xen-blkback: prevent premature module unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa2ac657f9783f0891b2935490afe9a7fd29d3fa ] Objects allocated by xen_blkif_alloc come from the 'blkif_cache' kmem cache. This cache is destoyed when xen-blkif is unloaded so it is necessary to wait for the deferred free routine used for such objects to complete. This necessity was missed in commit 14855954f636 "xen-blkback: allow module to be cleanly unloaded". This patch fixes the problem by taking/releasing extra module references in xen_blkif_alloc/free() respectively. Signed-off-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/block/xen-blkback/xenbus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index ed4e80779124..e9fa4a1fc791 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -178,6 +178,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->domid = domid; atomic_set(&blkif->refcnt, 1); init_completion(&blkif->drain_complete); + + /* + * Because freeing back to the cache may be deferred, it is not + * safe to unload the module (and hence destroy the cache) until + * this has completed. To prevent premature unloading, take an + * extra module reference here and release only when the object + * has been freed back to the cache. + */ + __module_get(THIS_MODULE); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); return blkif; @@ -327,6 +336,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) /* Make sure everything is drained before shutting down */ kmem_cache_free(xen_blkif_cachep, blkif); + module_put(THIS_MODULE); } int __init xen_blkif_interface_init(void) From 4df4720b6c1d609cd8ecd9bc55b3528766854c16 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Dec 2019 15:17:50 +0100 Subject: [PATCH 2070/3715] xen/balloon: fix ballooned page accounting without hotplug enabled [ Upstream commit c673ec61ade89bf2f417960f986bc25671762efb ] When CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not defined reserve_additional_memory() will set balloon_stats.target_pages to a wrong value in case there are still some ballooned pages allocated via alloc_xenballooned_pages(). This will result in balloon_process() no longer be triggered when ballooned pages are freed in batches. Reported-by: Nicholas Tsirakis Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/balloon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 71a6deeb4e71..3f9260af701f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -401,7 +401,8 @@ static struct notifier_block xen_memory_nb = { #else static enum bp_state reserve_additional_memory(void) { - balloon_stats.target_pages = balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages + + balloon_stats.target_unpopulated; return BP_ECANCELED; } #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ From c0fb3b1b499391d4afe4cbc805ecfaaf495e2124 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 25 Sep 2019 15:39:12 +0100 Subject: [PATCH 2071/3715] PM / hibernate: memory_bm_find_bit(): Tighten node optimisation [ Upstream commit da6043fe85eb5ec621e34a92540735dcebbea134 ] When looking for a bit by number we make use of the cached result from the preceding lookup to speed up operation. Firstly we check if the requested pfn is within the cached zone and if not lookup the new zone. We then check if the offset for that pfn falls within the existing cached node. This happens regardless of whether the node is within the zone we are now scanning. With certain memory layouts it is possible for this to false trigger creating a temporary alias for the pfn to a different bit. This leads the hibernation code to free memory which it was never allocated with the expected fallout. Ensure the zone we are scanning matches the cached zone before considering the cached node. Deep thanks go to Andrea for many, many, many hours of hacking and testing that went into cornering this bug. Reported-by: Andrea Righi Tested-by: Andrea Righi Signed-off-by: Andy Whitcroft Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/snapshot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0972a8e09d08..ff2aabb70de9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -734,8 +734,15 @@ zone_found: * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ + + /* + * If the zone we wish to scan is the the current zone and the + * pfn falls into the current node then we do not need to walk + * the tree. + */ node = bm->cur.node; - if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + if (zone == bm->cur.zone && + ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; From 54e15cac21c92a273b1163684670a807885fdd1c Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 3 Dec 2019 07:53:15 -0800 Subject: [PATCH 2072/3715] xfs: fix mount failure crash on invalid iclog memory access [ Upstream commit 798a9cada4694ca8d970259f216cec47e675bfd5 ] syzbot (via KASAN) reports a use-after-free in the error path of xlog_alloc_log(). Specifically, the iclog freeing loop doesn't handle the case of a fully initialized ->l_iclog linked list. Instead, it assumes that the list is partially constructed and NULL terminated. This bug manifested because there was no possible error scenario after iclog list setup when the original code was added. Subsequent code and associated error conditions were added some time later, while the original error handling code was never updated. Fix up the error loop to terminate either on a NULL iclog or reaching the end of the list. Reported-by: syzbot+c732f8644185de340492@syzkaller.appspotmail.com Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/xfs_log.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index dc95a49d62e7..4e768e606998 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1539,6 +1539,8 @@ out_free_iclog: if (iclog->ic_bp) xfs_buf_free(iclog->ic_bp); kmem_free(iclog); + if (prev_iclog == log->l_iclog) + break; } spinlock_destroy(&log->l_icloglock); xfs_buf_free(log->l_xbuf); From 4fe7cb918a4782f2cf127b9152b7cabfa670d2bd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 9 Oct 2019 13:48:09 +0200 Subject: [PATCH 2073/3715] taskstats: fix data-race [ Upstream commit 0b8d616fb5a8ffa307b1d3af37f55c15dae14f28 ] When assiging and testing taskstats in taskstats_exit() there's a race when setting up and reading sig->stats when a thread-group with more than one thread exits: write to 0xffff8881157bbe10 of 8 bytes by task 7951 on cpu 0: taskstats_tgid_alloc kernel/taskstats.c:567 [inline] taskstats_exit+0x6b7/0x717 kernel/taskstats.c:596 do_exit+0x2c2/0x18e0 kernel/exit.c:864 do_group_exit+0xb4/0x1c0 kernel/exit.c:983 get_signal+0x2a2/0x1320 kernel/signal.c:2734 do_signal+0x3b/0xc00 arch/x86/kernel/signal.c:815 exit_to_usermode_loop+0x250/0x2c0 arch/x86/entry/common.c:159 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x2d7/0x2f0 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff8881157bbe10 of 8 bytes by task 7949 on cpu 1: taskstats_tgid_alloc kernel/taskstats.c:559 [inline] taskstats_exit+0xb2/0x717 kernel/taskstats.c:596 do_exit+0x2c2/0x18e0 kernel/exit.c:864 do_group_exit+0xb4/0x1c0 kernel/exit.c:983 __do_sys_exit_group kernel/exit.c:994 [inline] __se_sys_exit_group kernel/exit.c:992 [inline] __x64_sys_exit_group+0x2e/0x30 kernel/exit.c:992 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by using smp_load_acquire() and smp_store_release(). Reported-by: syzbot+c5d03165a1bd1dead0c1@syzkaller.appspotmail.com Fixes: 34ec12349c8a ("taskstats: cleanup ->signal->stats allocation") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Marco Elver Reviewed-by: Will Deacon Reviewed-by: Andrea Parri Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20191009114809.8643-1-christian.brauner@ubuntu.com Signed-off-by: Sasha Levin --- kernel/taskstats.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4559e914452b..390c76d4503c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -568,25 +568,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; - struct taskstats *stats; + struct taskstats *stats_new, *stats; - if (sig->stats || thread_group_empty(tsk)) - goto ret; + /* Pairs with smp_store_release() below. */ + stats = smp_load_acquire(&sig->stats); + if (stats || thread_group_empty(tsk)) + return stats; /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; + stats = sig->stats; + if (!stats) { + /* + * Pairs with smp_store_release() above and order the + * kmem_cache_zalloc(). + */ + smp_store_release(&sig->stats, stats_new); + stats = stats_new; + stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); - if (stats) - kmem_cache_free(taskstats_cache, stats); -ret: - return sig->stats; + if (stats_new) + kmem_cache_free(taskstats_cache, stats_new); + + return stats; } /* Send pid data out on exit */ From 2acb20ebfbdec80b432da19809e76fff32dfae04 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 4 Dec 2019 16:52:37 -0800 Subject: [PATCH 2074/3715] drm: limit to INT_MAX in create_blob ioctl [ Upstream commit 5bf8bec3f4ce044a223c40cbce92590d938f0e9c ] The hardened usercpy code is too paranoid ever since commit 6a30afa8c1fb ("uaccess: disallow > INT_MAX copy sizes") Code itself should have been fine as-is. Link: http://lkml.kernel.org/r/20191106164755.31478-1-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Reported-by: syzbot+fb77e97ebf0612ee6914@syzkaller.appspotmail.com Fixes: 6a30afa8c1fb ("uaccess: disallow > INT_MAX copy sizes") Cc: Kees Cook Cc: Alexander Viro Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 78e630771214..9decd981d94e 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -540,7 +540,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, struct drm_property_blob *blob; int ret; - if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) + if (!length || length > INT_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); From 772593318160f81dddc84752aa599d2e48e0dc4d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 18 Dec 2019 20:26:06 +0100 Subject: [PATCH 2075/3715] ALSA: ice1724: Fix sleep-in-atomic in Infrasonic Quartet support code commit 0aec96f5897ac16ad9945f531b4bef9a2edd2ebd upstream. Jia-Ju Bai reported a possible sleep-in-atomic scenario in the ice1724 driver with Infrasonic Quartet support code: namely, ice->set_rate callback gets called inside ice->reg_lock spinlock, while the callback in quartet.c holds ice->gpio_mutex. This patch fixes the invalid call: it simply moves the calls of ice->set_rate and ice->set_mclk callbacks outside the spinlock. Reported-by: Jia-Ju Bai Cc: Link: https://lore.kernel.org/r/5d43135e-73b9-a46a-2155-9e91d0dcdf83@gmail.com Link: https://lore.kernel.org/r/20191218192606.12866-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ice1712/ice1724.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 057c2f394ea7..41ea8e7b389a 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -661,6 +661,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, unsigned long flags; unsigned char mclk_change; unsigned int i, old_rate; + bool call_set_rate = false; if (rate > ice->hw_rates->list[ice->hw_rates->count - 1]) return -EINVAL; @@ -684,7 +685,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, * setting clock rate for internal clock mode */ old_rate = ice->get_rate(ice); if (force || (old_rate != rate)) - ice->set_rate(ice, rate); + call_set_rate = true; else if (rate == ice->cur_rate) { spin_unlock_irqrestore(&ice->reg_lock, flags); return 0; @@ -692,12 +693,14 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, } ice->cur_rate = rate; + spin_unlock_irqrestore(&ice->reg_lock, flags); + + if (call_set_rate) + ice->set_rate(ice, rate); /* setting master clock */ mclk_change = ice->set_mclk(ice, rate); - spin_unlock_irqrestore(&ice->reg_lock, flags); - if (mclk_change && ice->gpio.i2s_mclk_changed) ice->gpio.i2s_mclk_changed(ice); if (ice->gpio.set_pro_rate) From 016120b3ebe79152f7ad59526303d00f67037d4d Mon Sep 17 00:00:00 2001 From: Stefan Mavrodiev Date: Tue, 17 Dec 2019 14:46:32 +0200 Subject: [PATCH 2076/3715] drm/sun4i: hdmi: Remove duplicate cleanup calls commit 57177d214ee0816c4436c23d6c933ccb32c571f1 upstream. When the HDMI unbinds drm_connector_cleanup() and drm_encoder_cleanup() are called. This also happens when the connector and the encoder are destroyed. This double call triggers a NULL pointer exception. The patch fixes this by removing the cleanup calls in the unbind function. Cc: Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Stefan Mavrodiev Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20191217124632.20820-1-stefan@olimex.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 3cf1a6932fac..298d6a8bab12 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -438,8 +438,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, struct sun4i_hdmi *hdmi = dev_get_drvdata(dev); cec_unregister_adapter(hdmi->cec_adap); - drm_connector_cleanup(&hdmi->connector); - drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); clk_disable_unprepare(hdmi->mod_clk); clk_disable_unprepare(hdmi->bus_clk); From 6547dc3b6d4c01f2eca3236b013b60692d3746b8 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 1 Jan 2020 20:50:38 -0800 Subject: [PATCH 2077/3715] MIPS: Avoid VDSO ABI breakage due to global register variable commit bbcc5672b0063b0e9d65dc8787a4f09c3b5bb5cc upstream. Declaring __current_thread_info as a global register variable has the effect of preventing GCC from saving & restoring its value in cases where the ABI would typically do so. To quote GCC documentation: > If the register is a call-saved register, call ABI is affected: the > register will not be restored in function epilogue sequences after the > variable has been assigned. Therefore, functions cannot safely return > to callers that assume standard ABI. When our position independent VDSO is built for the n32 or n64 ABIs all functions it exposes should be preserving the value of $gp/$28 for their caller, but in the presence of the __current_thread_info global register variable GCC stops doing so & simply clobbers $gp/$28 when calculating the address of the GOT. In cases where the VDSO returns success this problem will typically be masked by the caller in libc returning & restoring $gp/$28 itself, but that is by no means guaranteed. In cases where the VDSO returns an error libc will typically contain a fallback path which will now fail (typically with a bad memory access) if it attempts anything which relies upon the value of $gp/$28 - eg. accessing anything via the GOT. One fix for this would be to move the declaration of __current_thread_info inside the current_thread_info() function, demoting it from global register variable to local register variable & avoiding inadvertently creating a non-standard calling ABI for the VDSO. Unfortunately this causes issues for clang, which doesn't support local register variables as pointed out by commit fe92da0f355e ("MIPS: Changed current_thread_info() to an equivalent supported by both clang and GCC") which introduced the global register variable before we had a VDSO to worry about. Instead, fix this by continuing to use the global register variable for the kernel proper but declare __current_thread_info as a simple extern variable when building the VDSO. It should never be referenced, and will cause a link error if it is. This resolves the calling convention issue for the VDSO without having any impact upon the build of the kernel itself for either clang or gcc. Signed-off-by: Paul Burton Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Reported-by: Jason A. Donenfeld Reviewed-by: Jason A. Donenfeld Tested-by: Jason A. Donenfeld Cc: Arnd Bergmann Cc: Christian Brauner Cc: Vincenzo Frascino Cc: # v4.4+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/thread_info.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 5e8927f99a76..a0338dbabeaa 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -52,8 +52,26 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { From 981004da1a3323e19b21157d07fab8b5ca3592a1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 7 Dec 2019 23:43:23 +0100 Subject: [PATCH 2078/3715] media: pulse8-cec: fix lost cec_transmit_attempt_done() call commit e5a52a1d15c79bb48a430fb263852263ec1d3f11 upstream. The periodic PING command could interfere with the result of a CEC transmit, causing a lost cec_transmit_attempt_done() call. Signed-off-by: Hans Verkuil Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/pulse8-cec/pulse8-cec.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index 12da631c0fda..f1615fb60015 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -121,6 +121,7 @@ struct pulse8 { unsigned int vers; struct completion cmd_done; struct work_struct work; + u8 work_result; struct delayed_work ping_eeprom_work; struct cec_msg rx_msg; u8 data[DATA_SIZE]; @@ -142,8 +143,10 @@ static void pulse8_irq_work_handler(struct work_struct *work) { struct pulse8 *pulse8 = container_of(work, struct pulse8, work); + u8 result = pulse8->work_result; - switch (pulse8->data[0] & 0x3f) { + pulse8->work_result = 0; + switch (result & 0x3f) { case MSGCODE_FRAME_DATA: cec_received_msg(pulse8->adap, &pulse8->rx_msg); break; @@ -177,12 +180,12 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, pulse8->escape = false; } else if (data == MSGEND) { struct cec_msg *msg = &pulse8->rx_msg; + u8 msgcode = pulse8->buf[0]; if (debug) dev_info(pulse8->dev, "received: %*ph\n", pulse8->idx, pulse8->buf); - pulse8->data[0] = pulse8->buf[0]; - switch (pulse8->buf[0] & 0x3f) { + switch (msgcode & 0x3f) { case MSGCODE_FRAME_START: msg->len = 1; msg->msg[0] = pulse8->buf[1]; @@ -191,14 +194,20 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, if (msg->len == CEC_MAX_MSG_SIZE) break; msg->msg[msg->len++] = pulse8->buf[1]; - if (pulse8->buf[0] & MSGCODE_FRAME_EOM) + if (msgcode & MSGCODE_FRAME_EOM) { + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); + break; + } break; case MSGCODE_TRANSMIT_SUCCEEDED: case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_ACK: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); break; case MSGCODE_HIGH_ERROR: From d106de1b9e3670abffb2a4e18dbc88170c30b452 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 4 Dec 2019 08:52:08 +0100 Subject: [PATCH 2079/3715] media: cec: CEC 2.0-only bcast messages were ignored commit cec935ce69fc386f13959578deb40963ebbb85c3 upstream. Some messages are allowed to be a broadcast message in CEC 2.0 only, and should be ignored by CEC 1.4 devices. Unfortunately, the check was wrong, causing such messages to be marked as invalid under CEC 2.0. Signed-off-by: Hans Verkuil Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-adap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 27e57915eb4d..8030485ecfd7 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1031,11 +1031,11 @@ void cec_received_msg_ts(struct cec_adapter *adap, valid_la = false; else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED)) valid_la = false; - else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4)) + else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST)) valid_la = false; else if (cec_msg_is_broadcast(msg) && - adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 && - !(dir_fl & BCAST2_0)) + adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 && + !(dir_fl & BCAST1_4)) valid_la = false; } if (valid_la && min_len) { From 26e5eab93db02f82fd24ff76ce363958c6433936 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 7 Dec 2019 23:48:09 +0100 Subject: [PATCH 2080/3715] media: cec: avoid decrementing transmit_queue_sz if it is 0 commit 95c29d46ab2a517e4c26d0a07300edca6768db17 upstream. WARN if transmit_queue_sz is 0 but do not decrement it. The CEC adapter will become unresponsive if it goes below 0 since then it thinks there are 4 billion messages in the queue. Obviously this should not happen, but a driver bug could cause this. Signed-off-by: Hans Verkuil Cc: # for v4.12 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-adap.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 8030485ecfd7..0d7d687aeea0 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -330,7 +330,8 @@ static void cec_data_cancel(struct cec_data *data) } else { list_del_init(&data->list); if (!(data->msg.tx_status & CEC_TX_STATUS_OK)) - data->adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + data->adap->transmit_queue_sz--; } /* Mark it as an error */ @@ -377,6 +378,14 @@ static void cec_flush(struct cec_adapter *adap) * need to do anything special in that case. */ } + /* + * If something went wrong and this counter isn't what it should + * be, then this will reset it back to 0. Warn if it is not 0, + * since it indicates a bug, either in this framework or in a + * CEC driver. + */ + if (WARN_ON(adap->transmit_queue_sz)) + adap->transmit_queue_sz = 0; } /* @@ -465,7 +474,8 @@ int cec_thread_func(void *_adap) data = list_first_entry(&adap->transmit_queue, struct cec_data, list); list_del_init(&data->list); - adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + adap->transmit_queue_sz--; /* Make this the current transmitting message */ adap->transmitting = data; From 1501713aee057266a0774f757678e482a1dd047b Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Sat, 4 Jan 2020 12:59:36 -0800 Subject: [PATCH 2081/3715] mm/zsmalloc.c: fix the migrated zspage statistics. commit ac8f05da5174c560de122c499ce5dfb5d0dfbee5 upstream. When zspage is migrated to the other zone, the zone page state should be updated as well, otherwise the NR_ZSPAGE for each zone shows wrong counts including proc/zoneinfo in practice. Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat") Signed-off-by: Chanho Min Signed-off-by: Jinsuk Choi Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 3197de2a3896..c6df483b3751 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2088,6 +2088,11 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage, zs_pool_dec_isolated(pool); } + if (page_zone(newpage) != page_zone(page)) { + dec_zone_page_state(page, NR_ZSPAGES); + inc_zone_page_state(newpage, NR_ZSPAGES); + } + reset_page(page); put_page(page); page = newpage; From abb59358f1c4d9490688ef7e133afcb6b5bcb7ca Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 4 Jan 2020 12:59:43 -0800 Subject: [PATCH 2082/3715] memcg: account security cred as well to kmemcg commit 84029fd04c201a4c7e0b07ba262664900f47c6f5 upstream. The cred_jar kmem_cache is already memcg accounted in the current kernel but cred->security is not. Account cred->security to kmemcg. Recently we saw high root slab usage on our production and on further inspection, we found a buggy application leaking processes. Though that buggy application was contained within its memcg but we observe much more system memory overhead, couple of GiBs, during that period. This overhead can adversely impact the isolation on the system. One source of high overhead we found was cred->security objects, which have a lifetime of at least the life of the process which allocated them. Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Chris Down Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/cred.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index 5ab1f7ec946e..a9f0f8b21d8c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -220,7 +220,7 @@ struct cred *cred_alloc_blank(void) new->magic = CRED_MAGIC; #endif - if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; return new; @@ -279,7 +279,7 @@ struct cred *prepare_creds(void) new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; validate_creds(new); return new; @@ -654,7 +654,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) #ifdef CONFIG_SECURITY new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; put_cred(old); From 45b0a5affcbe3d7e3ab0dd5bc19338404a60a4b2 Mon Sep 17 00:00:00 2001 From: Aleksandr Yashkin Date: Mon, 23 Dec 2019 18:38:16 +0500 Subject: [PATCH 2083/3715] pstore/ram: Write new dumps to start of recycled zones commit 9e5f1c19800b808a37fb9815a26d382132c26c3d upstream. The ram_core.c routines treat przs as circular buffers. When writing a new crash dump, the old buffer needs to be cleared so that the new dump doesn't end up in the wrong place (i.e. at the end). The solution to this problem is to reset the circular buffer state before writing a new Oops dump. Signed-off-by: Aleksandr Yashkin Signed-off-by: Nikolay Merinov Signed-off-by: Ariel Gilman Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1e675be10926..11c7a171c0a1 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -433,6 +433,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) prz = cxt->dprzs[cxt->dump_write_cnt]; + /* + * Since this is a new crash dump, we need to reset the buffer in + * case it still has an old dump present. Without this, the new dump + * will get appended, which would seriously confuse anything trying + * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() + * expects to find a dump header in the beginning of buffer data, so + * we must to reset the buffer values, in order to ensure that the + * header will be written to the beginning of the buffer. + */ + persistent_ram_zap(prz); + /* Build header and append record contents. */ hlen = ramoops_write_kmsg_hdr(prz, record); size = record->size; From ee4cdf398aeceae9560601dfe5953e93455b0f0d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 22 Dec 2019 20:45:28 +0200 Subject: [PATCH 2084/3715] locks: print unsigned ino in /proc/locks commit 98ca480a8f22fdbd768e3dad07024c8d4856576c upstream. An ino is unsigned, so display it as such in /proc/locks. Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Signed-off-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index 665e3ce9ab47..1a40e277eb5e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2691,7 +2691,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, } if (inode) { /* userspace relies on this representation of dev_t */ - seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, + seq_printf(f, "%d %02x:%02x:%lu ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { From 123d28f803deb26b2ae2ac285c1b6ff44cd88e1d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 5 Dec 2019 12:54:49 +0100 Subject: [PATCH 2085/3715] dmaengine: Fix access to uninitialized dma_slave_caps commit 53a256a9b925b47c7e67fc1f16ca41561a7b877c upstream. dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the stack, populates it using dma_get_slave_caps() and then accesses one of its members. However dma_get_slave_caps() may fail and this isn't accounted for, leading to a legitimate warning of gcc-4.9 (but not newer versions): In file included from drivers/spi/spi-bcm2835.c:19:0: drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse': >> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized] if (caps.descriptor_reuse) { Fix it, thereby also silencing the gcc-4.9 warning. The issue has been present for 4 years but surfaces only now that the first caller of dmaengine_desc_set_reuse() has been added in spi-bcm2835.c. Another user of reusable DMA descriptors has existed for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag directly instead of calling dmaengine_desc_set_reuse(). Nevertheless, tag this commit for stable in case there are out-of-tree users. Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE") Reported-by: kbuild test robot Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.3+ Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- include/linux/dmaengine.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8319101170fc..087cbe776868 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1362,8 +1362,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) { struct dma_slave_caps caps; + int ret; - dma_get_slave_caps(tx->chan, &caps); + ret = dma_get_slave_caps(tx->chan, &caps); + if (ret) + return ret; if (caps.descriptor_reuse) { tx->flags |= DMA_CTRL_REUSE; From 77d19c9c3e078392810c79805a8f3f017fabbfe0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Nov 2019 11:28:22 +0100 Subject: [PATCH 2086/3715] compat_ioctl: block: handle Persistent Reservations commit b2c0fcd28772f99236d261509bcd242135677965 upstream. These were added to blkdev_ioctl() in linux-5.5 but not blkdev_compat_ioctl, so add them now. Cc: # v4.4+ Fixes: bbd3e064362e ("block: add an API for Persistent Reservations") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman Fold in followup patch from Arnd with missing pr.h header include. Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 6ca015f92766..413bd5c5380b 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -401,6 +402,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKTRACETEARDOWN: /* compatible */ ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); return ret; + case IOC_PR_REGISTER: + case IOC_PR_RESERVE: + case IOC_PR_RELEASE: + case IOC_PR_PREEMPT: + case IOC_PR_PREEMPT_ABORT: + case IOC_PR_CLEAR: + return blkdev_ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); default: if (disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); From 238d4e749b773eb58678ea93b8f6cfdf5c454b5a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Nov 2019 11:28:22 +0100 Subject: [PATCH 2087/3715] compat_ioctl: block: handle BLKREPORTZONE/BLKRESETZONE commit 673bdf8ce0a387ef585c13b69a2676096c6edfe9 upstream. These were added to blkdev_ioctl() but not blkdev_compat_ioctl, so add them now. Cc: # v4.10+ Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls") Reviewed-by: Damien Le Moal Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 413bd5c5380b..6490b2759bcb 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -355,6 +355,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: + case BLKREPORTZONE: + case BLKRESETZONE: return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: From eb3f6286e76aa16295081db7ce0229fda6fce4a1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:44 -0800 Subject: [PATCH 2088/3715] ata: libahci_platform: Export again ahci_platform_able_phys() commit 84b032dbfdf1c139cd2b864e43959510646975f8 upstream. This reverts commit 6bb86fefa086faba7b60bb452300b76a47cde1a5 ("libahci_platform: Staticize ahci_platform_able_phys()") we are going to need ahci_platform_{enable,disable}_phys() in a subsequent commit for ahci_brcm.c in order to properly control the PHY initialization order. Also make sure the function prototypes are declared in include/linux/ahci_platform.h as a result. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci_platform.c | 6 ++++-- include/linux/ahci_platform.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 70cdbf1b0f9a..5929672b809e 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_ops); * RETURNS: * 0 on success otherwise a negative error code */ -static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) { int rc, i; @@ -71,6 +71,7 @@ disable_phys: } return rc; } +EXPORT_SYMBOL_GPL(ahci_platform_enable_phys); /** * ahci_platform_disable_phys - Disable PHYs @@ -78,7 +79,7 @@ disable_phys: * * This function disables all PHYs found in hpriv->phys. */ -static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) { int i; @@ -87,6 +88,7 @@ static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) phy_exit(hpriv->phys[i]); } } +EXPORT_SYMBOL_GPL(ahci_platform_disable_phys); /** * ahci_platform_enable_clks - Enable platform clocks diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 1b0a17b22cd3..d560580d9cda 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -23,6 +23,8 @@ struct ahci_host_priv; struct platform_device; struct scsi_host_template; +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); From 0cbbbcda858601c2e2c1535c8819fa1a606d47f1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 1 Oct 2018 10:33:00 -0700 Subject: [PATCH 2089/3715] ata: ahci_brcm: Allow optional reset controller to be used commit 2b2c47d9e1fe90311b725125d6252a859ee87a79 upstream. On BCM63138, we need to reset the AHCI core prior to start utilizing it, grab the reset controller device cookie and do that. Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_brcm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 5936d1679bf3..cfa9192dab29 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "ahci.h" @@ -87,6 +88,7 @@ struct brcm_ahci_priv { u32 port_mask; u32 quirks; enum brcm_ahci_version version; + struct reset_control *rcdev; }; static const struct ata_port_info ahci_brcm_port_info = { @@ -327,6 +329,11 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (IS_ERR(priv->top_ctrl)) return PTR_ERR(priv->top_ctrl); + /* Reset is optional depending on platform */ + priv->rcdev = devm_reset_control_get(&pdev->dev, "ahci"); + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_deassert(priv->rcdev); + if ((priv->version == BRCM_SATA_BCM7425) || (priv->version == BRCM_SATA_NSP)) { priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ; From ea0b4277ea9df06e484632e92615c37e70545e65 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:45 -0800 Subject: [PATCH 2090/3715] ata: ahci_brcm: Fix AHCI resources management commit c0cdf2ac4b5bf3e5ef2451ea29fb4104278cdabc upstream. The AHCI resources management within ahci_brcm.c is a little convoluted, largely because it historically had a dedicated clock that was managed within this file in the downstream tree. Once brough upstream though, the clock was left to be managed by libahci_platform.c which is entirely appropriate. This patch series ensures that the AHCI resources are fetched and enabled before any register access is done, thus avoiding bus errors on platforms which clock gate the controller by default. As a result we need to re-arrange the suspend() and resume() functions in order to avoid accessing registers after the clocks have been turned off respectively before the clocks have been turned on. Finally, we can refactor brcm_ahci_get_portmask() in order to fetch the number of ports from hpriv->mmio which is now accessible without jumping through hoops like we used to do. The commit pointed in the Fixes tag is both old and new enough not to require major headaches for backporting of this patch. Fixes: eba68f829794 ("ata: ahci_brcmstb: rename to support across Broadcom SoC's") Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_brcm.c | 105 +++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index cfa9192dab29..8beb81b24f14 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -223,19 +223,12 @@ static void brcm_sata_phys_disable(struct brcm_ahci_priv *priv) brcm_sata_phy_disable(priv, i); } -static u32 brcm_ahci_get_portmask(struct platform_device *pdev, +static u32 brcm_ahci_get_portmask(struct ahci_host_priv *hpriv, struct brcm_ahci_priv *priv) { - void __iomem *ahci; - struct resource *res; u32 impl; - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ahci"); - ahci = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ahci)) - return 0; - - impl = readl(ahci + HOST_PORTS_IMPL); + impl = readl(hpriv->mmio + HOST_PORTS_IMPL); if (fls(impl) > SATA_TOP_MAX_PHYS) dev_warn(priv->dev, "warning: more ports than PHYs (%#x)\n", @@ -243,9 +236,6 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev, else if (!impl) dev_info(priv->dev, "no ports found\n"); - devm_iounmap(&pdev->dev, ahci); - devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); - return impl; } @@ -272,11 +262,10 @@ static int brcm_ahci_suspend(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; - int ret; - ret = ahci_platform_suspend(dev); brcm_sata_phys_disable(priv); - return ret; + + return ahci_platform_suspend(dev); } static int brcm_ahci_resume(struct device *dev) @@ -284,11 +273,44 @@ static int brcm_ahci_resume(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; + int ret; + + /* Make sure clocks are turned on before re-configuration */ + ret = ahci_platform_enable_clks(hpriv); + if (ret) + return ret; brcm_sata_init(priv); brcm_sata_phys_enable(priv); brcm_sata_alpm_init(hpriv); - return ahci_platform_resume(dev); + + /* Since we had to enable clocks earlier on, we cannot use + * ahci_platform_resume() as-is since a second call to + * ahci_platform_enable_resources() would bump up the resources + * (regulators, clocks, PHYs) count artificially so we copy the part + * after ahci_platform_enable_resources(). + */ + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + + ret = ahci_platform_resume_host(dev); + if (ret) + goto out_disable_platform_phys; + + /* We resumed so update PM runtime state */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); + ahci_platform_disable_clks(hpriv); + return ret; } #endif @@ -340,38 +362,63 @@ static int brcm_ahci_probe(struct platform_device *pdev) priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE; } + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) { + ret = PTR_ERR(hpriv); + goto out_reset; + } + + ret = ahci_platform_enable_clks(hpriv); + if (ret) + goto out_reset; + + /* Must be first so as to configure endianness including that + * of the standard AHCI register space. + */ brcm_sata_init(priv); - priv->port_mask = brcm_ahci_get_portmask(pdev, priv); - if (!priv->port_mask) - return -ENODEV; + /* Initializes priv->port_mask which is used below */ + priv->port_mask = brcm_ahci_get_portmask(hpriv, priv); + if (!priv->port_mask) { + ret = -ENODEV; + goto out_disable_clks; + } + /* Must be done before ahci_platform_enable_phys() */ brcm_sata_phys_enable(priv); - hpriv = ahci_platform_get_resources(pdev); - if (IS_ERR(hpriv)) - return PTR_ERR(hpriv); hpriv->plat_data = priv; hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP; brcm_sata_alpm_init(hpriv); - ret = ahci_platform_enable_resources(hpriv); - if (ret) - return ret; - if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ) hpriv->flags |= AHCI_HFLAG_NO_NCQ; hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO; + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info, &ahci_platform_sht); if (ret) - return ret; + goto out_disable_platform_phys; dev_info(dev, "Broadcom AHCI SATA3 registered\n"); return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); +out_disable_clks: + ahci_platform_disable_clks(hpriv); +out_reset: + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_assert(priv->rcdev); + return ret; } static int brcm_ahci_remove(struct platform_device *pdev) @@ -381,12 +428,12 @@ static int brcm_ahci_remove(struct platform_device *pdev) struct brcm_ahci_priv *priv = hpriv->plat_data; int ret; + brcm_sata_phys_disable(priv); + ret = ata_platform_remove_one(pdev); if (ret) return ret; - brcm_sata_phys_disable(priv); - return 0; } From 3444204d610b8ab60353d6731d1670edfa5fe911 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 7 Dec 2019 16:20:18 +0000 Subject: [PATCH 2091/3715] gpiolib: fix up emulated open drain outputs commit 256efaea1fdc4e38970489197409a26125ee0aaa upstream. gpiolib has a corner case with open drain outputs that are emulated. When such outputs are outputting a logic 1, emulation will set the hardware to input mode, which will cause gpiod_get_direction() to report that it is in input mode. This is different from the behaviour with a true open-drain output. Unify the semantics here. Cc: Suggested-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index c7f5f0be2d74..2b75aab8b3a0 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -206,6 +206,14 @@ int gpiod_get_direction(struct gpio_desc *desc) chip = gpiod_to_chip(desc); offset = gpio_chip_hwgpio(desc); + /* + * Open drain emulation using input mode may incorrectly report + * input here, fix that up. + */ + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) && + test_bit(FLAG_IS_OUT, &desc->flags)) + return 0; + if (!chip->get_direction) return status; From af24719234048a2634edbd797e6da43869ba5293 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Tue, 10 Dec 2019 09:15:16 +0000 Subject: [PATCH 2092/3715] tracing: Fix lock inversion in trace_event_enable_tgid_record() commit 3a53acf1d9bea11b57c1f6205e3fe73f9d8a3688 upstream. Task T2 Task T3 trace_options_core_write() subsystem_open() mutex_lock(trace_types_lock) mutex_lock(event_mutex) set_tracer_flag() trace_event_enable_tgid_record() mutex_lock(trace_types_lock) mutex_lock(event_mutex) This gives a circular dependency deadlock between trace_types_lock and event_mutex. To fix this invert the usage of trace_types_lock and event_mutex in trace_options_core_write(). This keeps the sequence of lock usage consistent. Link: http://lkml.kernel.org/r/0101016eef175e38-8ca71caf-a4eb-480d-a1e6-6f0bbc015495-000000@us-west-2.amazonses.com Cc: stable@vger.kernel.org Fixes: d914ba37d7145 ("tracing: Add support for recording tgid of tasks") Signed-off-by: Prateek Sood Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 8 ++++++++ kernel/trace/trace_events.c | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 286bbad7681b..c456c2b06277 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4368,6 +4368,10 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { + if ((mask == TRACE_ITER_RECORD_TGID) || + (mask == TRACE_ITER_RECORD_CMD)) + lockdep_assert_held(&event_mutex); + /* do nothing if flag is already set */ if (!!(tr->trace_flags & mask) == !!enabled) return 0; @@ -4433,6 +4437,7 @@ static int trace_set_options(struct trace_array *tr, char *option) cmp += 2; } + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); for (i = 0; trace_options[i]; i++) { @@ -4447,6 +4452,7 @@ static int trace_set_options(struct trace_array *tr, char *option) ret = set_tracer_option(tr, cmp, neg); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); /* * If the first trailing whitespace is replaced with '\0' by strstrip, @@ -7373,9 +7379,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, if (val != 0 && val != 1) return -EINVAL; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = set_tracer_flag(tr, 1 << index, val); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (ret < 0) return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 654a1587f6dd..2b0a01b2be2d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -326,7 +326,8 @@ void trace_event_enable_cmd_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) @@ -340,7 +341,6 @@ void trace_event_enable_cmd_record(bool enable) clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } void trace_event_enable_tgid_record(bool enable) @@ -348,7 +348,8 @@ void trace_event_enable_tgid_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; @@ -362,7 +363,6 @@ void trace_event_enable_tgid_record(bool enable) &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } static int __ftrace_event_enable_disable(struct trace_event_file *file, From bc5e8a8a58be3e7414fb5abdbe49d731cbd26f2f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 11 Dec 2019 15:44:22 -0500 Subject: [PATCH 2093/3715] tracing: Have the histogram compare functions convert to u64 first commit 106f41f5a302cb1f36c7543fae6a05de12e96fa4 upstream. The compare functions of the histogram code would be specific for the size of the value being compared (byte, short, int, long long). It would reference the value from the array via the type of the compare, but the value was stored in a 64 bit number. This is fine for little endian machines, but for big endian machines, it would end up comparing zeros or all ones (depending on the sign) for anything but 64 bit numbers. To fix this, first derference the value as a u64 then convert it to the type being compared. Link: http://lkml.kernel.org/r/20191211103557.7bed6928@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 08d43a5fa063e ("tracing: Add lock-free tracing_map") Acked-by: Tom Zanussi Reported-by: Sven Schnelle Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/tracing_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 305039b122fa..35b2ba07f3c6 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -90,8 +90,8 @@ static int tracing_map_cmp_atomic64(void *val_a, void *val_b) #define DEFINE_TRACING_MAP_CMP_FN(type) \ static int tracing_map_cmp_##type(void *val_a, void *val_b) \ { \ - type a = *(type *)val_a; \ - type b = *(type *)val_b; \ + type a = (type)(*(u64 *)val_a); \ + type b = (type)(*(u64 *)val_b); \ \ return (a > b) ? 1 : ((a < b) ? -1 : 0); \ } From d7867fbbd4d6dccf94aee75dd1281bd95ec46863 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Nov 2019 13:13:54 +0000 Subject: [PATCH 2094/3715] ALSA: cs4236: fix error return comparison of an unsigned integer commit d60229d84846a8399257006af9c5444599f64361 upstream. The return from pnp_irq is an unsigned integer type resource_size_t and hence the error check for a positive non-error code is always going to be true. A check for a non-failure return from pnp_irq should in fact be for (resource_size_t)-1 rather than >= 0. Addresses-Coverity: ("Unsigned compared against 0") Fixes: a9824c868a2c ("[ALSA] Add CS4232 PnP BIOS support") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20191122131354.58042-1-colin.king@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/cs423x/cs4236.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 70559e59d18f..7d4e18cb6351 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -293,7 +293,8 @@ static int snd_cs423x_pnp_init_mpu(int dev, struct pnp_dev *pdev) } else { mpu_port[dev] = pnp_port_start(pdev, 0); if (mpu_irq[dev] >= 0 && - pnp_irq_valid(pdev, 0) && pnp_irq(pdev, 0) >= 0) { + pnp_irq_valid(pdev, 0) && + pnp_irq(pdev, 0) != (resource_size_t)-1) { mpu_irq[dev] = pnp_irq(pdev, 0); } else { mpu_irq[dev] = -1; /* disable interrupt */ From 8d70c0d57805d3e7cdef93061bef930cc019a5d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2019 11:09:21 +0100 Subject: [PATCH 2095/3715] ALSA: firewire-motu: Correct a typo in the clock proc string commit 0929249e3be3bb82ee6cfec0025f4dde952210b3 upstream. Just fix a typo of "S/PDIF" in the clock name string. Fixes: 4638ec6ede08 ("ALSA: firewire-motu: add proc node to show current statuc of clock and packet formats") Acked-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191030100921.3826-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/motu/motu-proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/motu/motu-proc.c b/sound/firewire/motu/motu-proc.c index 4edc064999ed..706f1e982e36 100644 --- a/sound/firewire/motu/motu-proc.c +++ b/sound/firewire/motu/motu-proc.c @@ -17,7 +17,7 @@ static const char *const clock_names[] = { [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT] = "S/PDIF on optical interface", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A] = "S/PDIF on optical interface A", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B] = "S/PDIF on optical interface B", - [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PCIF on coaxial interface", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PDIF on coaxial interface", [SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR] = "AESEBU on XLR interface", [SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC] = "Word clock on BNC interface", }; From 66a10703d6164a0dbb3efdda66555a5570449c25 Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Thu, 19 Dec 2019 14:29:53 +0800 Subject: [PATCH 2096/3715] exit: panic before exit_mm() on global init exit commit 43cf75d96409a20ef06b756877a2e72b10a026fc upstream. Currently, when global init and all threads in its thread-group have exited we panic via: do_exit() -> exit_notify() -> forget_original_parent() -> find_child_reaper() This makes it hard to extract a useable coredump for global init from a kernel crashdump because by the time we panic exit_mm() will have already released global init's mm. This patch moves the panic futher up before exit_mm() is called. As was the case previously, we only panic when global init and all its threads in the thread-group have exited. Signed-off-by: chenqiwu Acked-by: Christian Brauner Acked-by: Oleg Nesterov [christian.brauner@ubuntu.com: fix typo, rewrite commit message] Link: https://lore.kernel.org/r/1576736993-10121-1-git-send-email-qiwuchen55@gmail.com Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 57cb0eb1271c..d1baf9c96c3e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -577,10 +577,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father, } write_unlock_irq(&tasklist_lock); - if (unlikely(pid_ns == &init_pid_ns)) { - panic("Attempted to kill init! exitcode=0x%08x\n", - father->signal->group_exit_code ?: father->exit_code); - } list_for_each_entry_safe(p, n, dead, ptrace_entry) { list_del_init(&p->ptrace_entry); @@ -823,6 +819,14 @@ void __noreturn do_exit(long code) acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { + /* + * If the last thread of global init has exited, panic + * immediately to get a useable coredump. + */ + if (unlikely(is_global_init(tsk))) + panic("Attempted to kill init! exitcode=0x%08x\n", + tsk->signal->group_exit_code ?: (int)code); + #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); From 7650b4b1df091815bbbbb837d308dd4154684f8a Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 3 Jan 2020 11:02:48 +0800 Subject: [PATCH 2097/3715] ftrace: Avoid potential division by zero in function profiler commit e31f7939c1c27faa5d0e3f14519eaf7c89e8a69d upstream. The ftrace_profile->counter is unsigned long and do_div truncates it to 32 bits, which means it can test non-zero and be truncated to zero for division. Fix this issue by using div64_ul() instead. Link: http://lkml.kernel.org/r/20200103030248.14516-1-wenyang@linux.alibaba.com Cc: stable@vger.kernel.org Fixes: e330b3bcd8319 ("tracing: Show sample std dev in function profiling") Fixes: 34886c8bc590f ("tracing: add average time in function to function profiler") Signed-off-by: Wen Yang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 144d982905fc..3864d2341442 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -622,8 +622,7 @@ static int function_stat_show(struct seq_file *m, void *v) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER - avg = rec->time; - do_div(avg, rec->counter); + avg = div64_ul(rec->time, rec->counter); if (tracing_thresh && (avg < tracing_thresh)) goto out; #endif @@ -649,7 +648,8 @@ static int function_stat_show(struct seq_file *m, void *v) * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, + rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); From d9c64efbe05e4cced2c1ffbf93b658eab5714f54 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 6 Jan 2020 14:35:39 +0000 Subject: [PATCH 2098/3715] arm64: Revert support for execute-only user mappings commit 24cecc37746393432d994c0dbc251fb9ac7c5d72 upstream. The ARMv8 64-bit architecture supports execute-only user permissions by clearing the PTE_USER and PTE_UXN bits, practically making it a mostly privileged mapping but from which user running at EL0 can still execute. The downside, however, is that the kernel at EL1 inadvertently reading such mapping would not trip over the PAN (privileged access never) protection. Revert the relevant bits from commit cab15ce604e5 ("arm64: Introduce execute-only page access permissions") so that PROT_EXEC implies PROT_READ (and therefore PTE_USER) until the architecture gains proper support for execute-only user mappings. Fixes: cab15ce604e5 ("arm64: Introduce execute-only page access permissions") Cc: # 4.9.x- Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable-prot.h | 5 ++--- arch/arm64/include/asm/pgtable.h | 10 +++------- arch/arm64/mm/fault.c | 2 +- mm/mmap.c | 6 ------ 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 20e45733afa4..26efe251f076 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -76,13 +76,12 @@ #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -91,7 +90,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa3b8dd8fc35..9b676c3dd3ce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -90,12 +90,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -111,8 +107,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bf7c285d0c82..617787e4081f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -400,7 +400,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) diff --git a/mm/mmap.c b/mm/mmap.c index 59fd53b41c9c..8c6ed06983f9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -89,12 +89,6 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and - * MAP_PRIVATE: - * r: (no) no - * w: (no) no - * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, From 05c3fa01b50856ff17abef3c42caee9879696ebb Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Sep 2019 10:26:53 +0300 Subject: [PATCH 2099/3715] PM / devfreq: Check NULL governor in available_governors_show commit d68adc8f85cd757bd33c8d7b2660ad6f16f7f3dc upstream. The governor is initialized after sysfs attributes become visible so in theory the governor field can be NULL here. Fixes: bcf23c79c4e46 ("PM / devfreq: Fix available_governor sysfs") Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 7b510ef1d0dd..ad18de955b6c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -974,7 +974,7 @@ static ssize_t available_governors_show(struct device *d, * The devfreq with immutable governor (e.g., passive) shows * only own governor. */ - if (df->governor->immutable) { + if (df->governor && df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, "%s ", df->governor_name); /* From 46a3c4fb68da8d8d78c81d443d9e80084e13853e Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 9 Oct 2019 15:11:37 -0400 Subject: [PATCH 2100/3715] nfsd4: fix up replay_matches_cache() commit 6e73e92b155c868ff7fce9d108839668caf1d9be upstream. When running an nfs stress test, I see quite a few cached replies that don't match up with the actual request. The first comment in replay_matches_cache() makes sense, but the code doesn't seem to match... fix it. This isn't exactly a bugfix, as the server isn't required to catch every case of a false retry. So, we may as well do this, but if this is fixing a problem then that suggests there's a client bug. Fixes: 53da6a53e1d4 ("nfsd4: catch some false session retries") Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 87ee9cbf7dcb..fc13236d1be1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3058,12 +3058,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, (bool)seq->cachethis) return false; /* - * If there's an error than the reply can have fewer ops than - * the call. But if we cached a reply with *more* ops than the - * call you're sending us now, then this new call is clearly not - * really a replay of the old one: + * If there's an error then the reply can have fewer ops than + * the call. */ - if (slot->sl_opcnt < argp->opcnt) + if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) + return false; + /* + * But if we cached a reply with *more* ops than the call you're + * sending us now, then this new call is clearly not really a + * replay of the old one: + */ + if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) From 35ddeb365a88ab87bb69cb4fc43160cbf378795d Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:53 +0300 Subject: [PATCH 2101/3715] scsi: qla2xxx: Drop superfluous INIT_WORK of del_work commit 600954e6f2df695434887dfc6a99a098859990cf upstream. del_work is already initialized inside qla2x00_alloc_fcport, there's no need to overwrite it. Indeed, it might prevent complete traversal of workqueue list. Fixes: a01c77d2cbc45 ("scsi: qla2xxx: Move session delete to driver work queue") Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-5-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 69ed544d80ef..55227d20496a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1210,7 +1210,6 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess, "Scheduling sess %p for deletion %8phC\n", sess, sess->port_name); - INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); queue_work(sess->vha->hw->wq, &sess->del_work); } From fb7b53cecb818b98cd1fada81286f53b34d4bb9f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 26 Nov 2019 16:58:08 -0800 Subject: [PATCH 2102/3715] xfs: don't check for AG deadlock for realtime files in bunmapi commit 69ffe5960df16938bccfe1b65382af0b3de51265 upstream. Commit 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi") added a check in __xfs_bunmapi() to stop early if we would touch multiple AGs in the wrong order. However, this check isn't applicable for realtime files. In most cases, it just makes us do unnecessary commits. However, without the fix from the previous commit ("xfs: fix realtime file data space leak"), if the last and second-to-last extents also happen to have different "AG numbers", then the break actually causes __xfs_bunmapi() to return without making any progress, which sends xfs_itruncate_extents_flags() into an infinite loop. Fixes: 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi") Signed-off-by: Omar Sandoval Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7b25a88569c9..84245d210182 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5556,7 +5556,7 @@ __xfs_bunmapi( * Make sure we don't touch multiple AGF headers out of order * in a single transaction, as that could cause AB-BA deadlocks. */ - if (!wasdel) { + if (!wasdel && !isrt) { agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); if (prev_agno != NULLAGNUMBER && prev_agno > agno) break; From 4d445d3e83f208fa0c8b533915e9645353100ff5 Mon Sep 17 00:00:00 2001 From: Michael Haener Date: Fri, 29 Nov 2019 10:16:49 +0100 Subject: [PATCH 2103/3715] platform/x86: pmc_atom: Add Siemens CONNECT X300 to critclk_systems DMI table commit e8796c6c69d129420ee94a1906b18d86b84644d4 upstream. The CONNECT X300 uses the PMC clock for on-board components and gets stuck during boot if the clock is disabled. Therefore, add this device to the critical systems list. Tested on CONNECT X300. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: Michael Haener Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/pmc_atom.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 971ae892c611..74997194fd88 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -482,6 +482,14 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"), }, }, + { + .ident = "CONNECT X300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_VERSION, "A5E45074588"), + }, + }, + { /*sentinel*/ } }; From 29ea30c084917f5bde08c318e3e36dd495191030 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 16:01:18 +0100 Subject: [PATCH 2104/3715] Bluetooth: btusb: fix PM leak in error case of setup commit 3d44a6fd0775e6215e836423e27f8eedf8c871ea upstream. If setup() fails a reference for runtime PM has already been taken. Proper use of the error handling in btusb_open()is needed. You cannot just return. Fixes: ace31982585a3 ("Bluetooth: btusb: Add setup callback for chip init on USB") Signed-off-by: Oliver Neukum Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 73561bfd95d4..424f399cc79b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1123,7 +1123,7 @@ static int btusb_open(struct hci_dev *hdev) if (data->setup_on_usb) { err = data->setup_on_usb(hdev); if (err < 0) - return err; + goto setup_fail; } data->intf->needs_remote_wakeup = 1; @@ -1155,6 +1155,7 @@ done: failed: clear_bit(BTUSB_INTR_RUNNING, &data->flags); +setup_fail: usb_autopm_put_interface(data->intf); return err; } From bb5a3cc04468c8de9f957b9396ec6102911d3b0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Nov 2019 09:17:05 +0300 Subject: [PATCH 2105/3715] Bluetooth: delete a stray unlock commit df66499a1fab340c167250a5743931dc50d5f0fa upstream. We used to take a lock in amp_physical_cfm() but then we moved it to the caller function. Unfortunately the unlock on this error path was overlooked so it leads to a double unlock. Fixes: a514b17fab51 ("Bluetooth: Refactor locking in amp_physical_cfm") Signed-off-by: Dan Carpenter Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f63d9918b15a..ebdf1b0e576a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4918,10 +4918,8 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result) BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d", chan, result, local_amp_id, remote_amp_id); - if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) { - l2cap_chan_unlock(chan); + if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) return; - } if (chan->state != BT_CONNECTED) { l2cap_do_create(chan, result, local_amp_id, remote_amp_id); From f3cb0d22715681a02dbb287bb2f7c4989744e2ee Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 21 Nov 2019 14:20:36 -0600 Subject: [PATCH 2106/3715] Bluetooth: Fix memory leak in hci_connect_le_scan commit d088337c38a5cd8f0230fbf2d514ff7672f9d0d3 upstream. In the implementation of hci_connect_le_scan() when conn is added via hci_conn_add(), if hci_explicit_conn_params_set() fails the allocated memory for conn is leaked. Use hci_conn_del() to release it. Fixes: f75113a26008 ("Bluetooth: add hci_connect_le_scan") Signed-off-by: Navid Emamdoost Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_conn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bd41b78d131d..1d085eed72d0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1054,8 +1054,10 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { + hci_conn_del(conn); return ERR_PTR(-EBUSY); + } conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); From 6b74fc7903ed26c81a2351682501db6afd4acd51 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 25 Oct 2019 15:33:39 +0200 Subject: [PATCH 2107/3715] media: flexcop-usb: ensure -EIO is returned on error condition commit 74a96b51a36de4d86660fbc56b05d86668162d6b upstream. An earlier commit hard coded a return 0 to function flexcop_usb_i2c_req even though the an -EIO was intended to be returned in the case where ret != buflen. Fix this by replacing the return 0 with the return of ret to return the error return code. Addresses-Coverity: ("Unused value") Fixes: b430eaba0be5 ("[media] flexcop-usb: don't use stack for DMA") Signed-off-by: Colin Ian King Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/b2c2/flexcop-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index f1807c16438d..427cda457af6 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -294,7 +294,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, mutex_unlock(&fc_usb->data_mutex); - return 0; + return ret; } /* actual bus specific access functions, From 0902316524bf0d0e6de17263f9e02a3dd4e1d0ae Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Nov 2019 18:31:24 +0100 Subject: [PATCH 2108/3715] regulator: ab8500: Remove AB8505 USB regulator commit 99c4f70df3a6446c56ca817c2d0f9c12d85d4e7c upstream. The USB regulator was removed for AB8500 in commit 41a06aa738ad ("regulator: ab8500: Remove USB regulator"). It was then added for AB8505 in commit 547f384f33db ("regulator: ab8500: add support for ab8505"). However, there was never an entry added for it in ab8505_regulator_match. This causes all regulators after it to be initialized with the wrong device tree data, eventually leading to an out-of-bounds array read. Given that it is not used anywhere in the kernel, it seems likely that similar arguments against supporting it exist for AB8505 (it is controlled by hardware). Therefore, simply remove it like for AB8500 instead of adding an entry in ab8505_regulator_match. Fixes: 547f384f33db ("regulator: ab8500: add support for ab8505") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20191106173125.14496-1-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/ab8500.c | 17 ----------------- include/linux/regulator/ab8500.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 0f97514e3474..c9f20e1394e3 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -1099,23 +1099,6 @@ static struct ab8500_regulator_info .update_val_idle = 0x82, .update_val_normal = 0x02, }, - [AB8505_LDO_USB] = { - .desc = { - .name = "LDO-USB", - .ops = &ab8500_regulator_mode_ops, - .type = REGULATOR_VOLTAGE, - .id = AB8505_LDO_USB, - .owner = THIS_MODULE, - .n_voltages = 1, - .volt_table = fixed_3300000_voltage, - }, - .update_bank = 0x03, - .update_reg = 0x82, - .update_mask = 0x03, - .update_val = 0x01, - .update_val_idle = 0x03, - .update_val_normal = 0x01, - }, [AB8505_LDO_AUDIO] = { .desc = { .name = "LDO-AUDIO", diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index d8ecefaf63ca..260c4aa1d976 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -38,7 +38,6 @@ enum ab8505_regulator_id { AB8505_LDO_AUX6, AB8505_LDO_INTCORE, AB8505_LDO_ADC, - AB8505_LDO_USB, AB8505_LDO_AUDIO, AB8505_LDO_ANAMIC1, AB8505_LDO_ANAMIC2, From 2d7c27957cac081eeacea7c38d8c9c59049883dc Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 9 Oct 2019 12:01:47 -0300 Subject: [PATCH 2109/3715] media: usb: fix memory leak in af9005_identify_state commit 2289adbfa559050d2a38bcd9caac1c18b800e928 upstream. In af9005_identify_state when returning -EIO the allocated buffer should be released. Replace the "return -EIO" with assignment into ret and move deb_info() under a check. Fixes: af4e067e1dcf ("V4L/DVB (5625): Add support for the AF9005 demodulator from Afatech") Signed-off-by: Navid Emamdoost Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/af9005.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index 986763b1b2b3..c047a0bdf91f 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -985,8 +985,9 @@ static int af9005_identify_state(struct usb_device *udev, else if (reply == 0x02) *cold = 0; else - return -EIO; - deb_info("Identify state cold = %d\n", *cold); + ret = -EIO; + if (!ret) + deb_info("Identify state cold = %d\n", *cold); err: kfree(buf); From 08bb799e43a9f4eee1fc00372df0477a6a0fb570 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Oct 2019 16:56:50 +0200 Subject: [PATCH 2110/3715] dt-bindings: clock: renesas: rcar-usb2-clock-sel: Fix typo in example commit 830dbce7c76ea529decac7d23b808c1e7da3d891 upstream. The documented compatible value for R-Car H3 is "renesas,r8a7795-rcar-usb2-clock-sel", not "renesas,r8a77950-rcar-usb2-clock-sel". Fixes: 311accb64570db45 ("clk: renesas: rcar-usb2-clock-sel: Add R-Car USB 2.0 clock selector PHY") Signed-off-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Acked-by: Rob Herring Link: https://lore.kernel.org/r/20191016145650.30003-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt index e96e085271c1..83f6c6a7c41c 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt +++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt @@ -46,7 +46,7 @@ Required properties: Example (R-Car H3): usb2_clksel: clock-controller@e6590630 { - compatible = "renesas,r8a77950-rcar-usb2-clock-sel", + compatible = "renesas,r8a7795-rcar-usb2-clock-sel", "renesas,rcar-gen3-usb2-clock-sel"; reg = <0 0xe6590630 0 0x02>; clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>; From ee21b594af100be62eea7732b1f0114b2609c613 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 27 Nov 2019 22:15:43 +0800 Subject: [PATCH 2111/3715] tty: serial: msm_serial: Fix lockup for sysrq and oops commit 0e4f7f920a5c6bfe5e851e989f27b35a0cc7fb7e upstream. As the commit 677fe555cbfb ("serial: imx: Fix recursive locking bug") has mentioned the uart driver might cause recursive locking between normal printing and the kernel debugging facilities (e.g. sysrq and oops). In the commit it gave out suggestion for fixing recursive locking issue: "The solution is to avoid locking in the sysrq case and trylock in the oops_in_progress case." This patch follows the suggestion (also used the exactly same code with other serial drivers, e.g. amba-pl011.c) to fix the recursive locking issue, this can avoid stuck caused by deadlock and print out log for sysrq and oops. Fixes: 04896a77a97b ("msm_serial: serial driver for MSM7K onboard serial peripheral.") Signed-off-by: Leo Yan Reviewed-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20191127141544.4277-2-leo.yan@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index e937fb189034..77a1f00fe843 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1588,6 +1588,7 @@ static void __msm_console_write(struct uart_port *port, const char *s, int num_newlines = 0; bool replaced = false; void __iomem *tf; + int locked = 1; if (is_uartdm) tf = port->membase + UARTDM_TF; @@ -1600,7 +1601,13 @@ static void __msm_console_write(struct uart_port *port, const char *s, num_newlines++; count += num_newlines; - spin_lock(&port->lock); + if (port->sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock(&port->lock); + else + spin_lock(&port->lock); + if (is_uartdm) msm_reset_dm_count(port, count); @@ -1636,7 +1643,9 @@ static void __msm_console_write(struct uart_port *port, const char *s, iowrite32_rep(tf, buf, 1); i += num_chars; } - spin_unlock(&port->lock); + + if (locked) + spin_unlock(&port->lock); } static void msm_console_write(struct console *co, const char *s, From 973536f045d2850220c9b12871b38024fc75a276 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Apr 2019 18:53:50 -0400 Subject: [PATCH 2112/3715] fix compat handling of FICLONERANGE, FIDEDUPERANGE and FS_IOC_FIEMAP commit 6b2daec19094a90435abe67d16fb43b1a5527254 upstream. Unlike FICLONE, all of those take a pointer argument; they do need compat_ptr() applied to arg. Fixes: d79bdd52d8be ("vfs: wire up compat ioctl for CLONE/CLONE_RANGE") Fixes: 54dbc1517237 ("vfs: hoist the btrfs deduplication ioctl to the vfs") Fixes: ceac204e1da9 ("fs: make fiemap work from compat_ioctl") Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- fs/compat_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 033e8e6aabb7..f445bc9cdc94 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1577,9 +1577,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, #endif case FICLONE: + goto do_ioctl; case FICLONERANGE: case FIDEDUPERANGE: - goto do_ioctl; + goto found_handler; case FIBMAP: case FIGETBSZ: From 0f6ecead5cfd2bea08a61df2fe695b7e13105494 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Tue, 26 Mar 2019 00:38:33 -0700 Subject: [PATCH 2113/3715] scsi: qedf: Do not retry ELS request if qedf_alloc_cmd fails [ Upstream commit f1c43590365bac054d753d808dbbd207d09e088d ] If we cannot allocate an ELS middlepath request, simply fail instead of trying to delay and then reallocate. This delay logic is causing soft lockup messages: NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [kworker/2:1:7639] Modules linked in: xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun devlink ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter dm_service_time vfat fat rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt iTCO_vendor_support qedr(OE) ib_core joydev ipmi_ssif pcspkr hpilo hpwdt sg ipmi_si ipmi_devintf ipmi_msghandler ioatdma shpchp lpc_ich wmi dca acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic qedf(OE) libfcoe mgag200 libfc i2c_algo_bit drm_kms_helper scsi_transport_fc qede(OE) syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qed(OE) drm crct10dif_pclmul e1000e crct10dif_common crc32c_intel scsi_tgt hpsa i2c_core ptp scsi_transport_sas pps_core dm_mirror dm_region_hash dm_log dm_mod CPU: 2 PID: 7639 Comm: kworker/2:1 Kdump: loaded Tainted: G OEL ------------ 3.10.0-861.el7.x86_64 #1 Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 07/21/2016 Workqueue: qedf_2_dpc qedf_handle_rrq [qedf] task: ffff959edd628fd0 ti: ffff959ed6f08000 task.ti: ffff959ed6f08000 RIP: 0010:[] [] delay_tsc+0x3a/0x60 RSP: 0018:ffff959ed6f0bd30 EFLAGS: 00000246 RAX: 000000008ef5f791 RBX: 5f646d635f666465 RCX: 0000025b8ededa2f RDX: 000000000000025b RSI: 0000000000000002 RDI: 0000000000217d1e RBP: ffff959ed6f0bd30 R08: ffffffffc079aae8 R09: 0000000000000200 R10: ffffffffc07952c6 R11: 0000000000000000 R12: 6c6c615f66646571 R13: ffff959ed6f0bcc8 R14: ffff959ed6f0bd08 R15: ffff959e00000028 FS: 0000000000000000(0000) GS:ffff959eff480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4117fa1eb0 CR3: 0000002039e66000 CR4: 00000000003607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] __const_udelay+0x2d/0x30 [] qedf_initiate_els+0x13a/0x450 [qedf] [] ? qedf_srr_compl+0x2a0/0x2a0 [qedf] [] qedf_send_rrq+0x127/0x230 [qedf] [] qedf_handle_rrq+0x15/0x20 [qedf] [] process_one_work+0x17f/0x440 [] worker_thread+0x126/0x3c0 [] ? manage_workers.isra.24+0x2a0/0x2a0 [] kthread+0xd1/0xe0 [] ? insert_kthread_work+0x40/0x40 [] ret_from_fork_nospec_begin+0x21/0x21 [] ? insert_kthread_work+0x40/0x40 Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qedf/qedf_els.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c index 59c18ca4cda9..e5927a09f7bc 100644 --- a/drivers/scsi/qedf/qedf_els.c +++ b/drivers/scsi/qedf/qedf_els.c @@ -23,8 +23,6 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op, int rc = 0; uint32_t did, sid; uint16_t xid; - uint32_t start_time = jiffies / HZ; - uint32_t current_time; struct fcoe_wqe *sqe; unsigned long flags; u16 sqe_idx; @@ -50,18 +48,12 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op, goto els_err; } -retry_els: els_req = qedf_alloc_cmd(fcport, QEDF_ELS); if (!els_req) { - current_time = jiffies / HZ; - if ((current_time - start_time) > 10) { - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, - "els: Failed els 0x%x\n", op); - rc = -ENOMEM; - goto els_err; - } - mdelay(20 * USEC_PER_MSEC); - goto retry_els; + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS, + "Failed to alloc ELS request 0x%x\n", op); + rc = -ENOMEM; + goto els_err; } QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = " From d3f3d3999307fa2566848ee3951a8afaa02076f5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 24 May 2019 00:24:33 +0300 Subject: [PATCH 2114/3715] drm/mst: Fix MST sideband up-reply failure handling [ Upstream commit d8fd3722207f154b53c80eee2cf4977c3fc25a92 ] Fix the breakage resulting in the stacktrace below, due to tx queue being full when trying to send an up-reply. txmsg->seqno is -1 in this case leading to a corruption of the mstb object by txmsg->dst->tx_slots[txmsg->seqno] = NULL; in process_single_up_tx_qlock(). [ +0,005162] [drm:process_single_tx_qlock [drm_kms_helper]] set_hdr_from_dst_qlock: failed to find slot [ +0,000015] [drm:drm_dp_send_up_ack_reply.constprop.19 [drm_kms_helper]] failed to send msg in q -11 [ +0,000939] BUG: kernel NULL pointer dereference, address: 00000000000005a0 [ +0,006982] #PF: supervisor write access in kernel mode [ +0,005223] #PF: error_code(0x0002) - not-present page [ +0,005135] PGD 0 P4D 0 [ +0,002581] Oops: 0002 [#1] PREEMPT SMP NOPTI [ +0,004359] CPU: 1 PID: 1200 Comm: kworker/u16:3 Tainted: G U 5.2.0-rc1+ #410 [ +0,008433] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP, BIOS ICLSFWR1.R00.3175.A00.1904261428 04/26/2019 [ +0,013323] Workqueue: i915-dp i915_digport_work_func [i915] [ +0,005676] RIP: 0010:queue_work_on+0x19/0x70 [ +0,004372] Code: ff ff ff 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 41 56 49 89 f6 41 55 41 89 fd 41 54 55 53 48 89 d3 9c 5d fa e8 e7 81 0c 00 48 0f ba 2b 00 73 31 45 31 e4 f7 c5 00 02 00 00 74 13 e8 cf 7f [ +0,018750] RSP: 0018:ffffc900007dfc50 EFLAGS: 00010006 [ +0,005222] RAX: 0000000000000046 RBX: 00000000000005a0 RCX: 0000000000000001 [ +0,007133] RDX: 000000000001b608 RSI: 0000000000000000 RDI: ffffffff82121972 [ +0,007129] RBP: 0000000000000202 R08: 0000000000000000 R09: 0000000000000001 [ +0,007129] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88847bfa5096 [ +0,007131] R13: 0000000000000010 R14: ffff88849c08f3f8 R15: 0000000000000000 [ +0,007128] FS: 0000000000000000(0000) GS:ffff88849dc80000(0000) knlGS:0000000000000000 [ +0,008083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0,005749] CR2: 00000000000005a0 CR3: 0000000005210006 CR4: 0000000000760ee0 [ +0,007128] PKRU: 55555554 [ +0,002722] Call Trace: [ +0,002458] drm_dp_mst_handle_up_req+0x517/0x540 [drm_kms_helper] [ +0,006197] ? drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper] [ +0,005764] drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper] [ +0,005623] ? intel_dp_hpd_pulse+0x205/0x370 [i915] [ +0,005018] intel_dp_hpd_pulse+0x205/0x370 [i915] [ +0,004836] i915_digport_work_func+0xbb/0x140 [i915] [ +0,005108] process_one_work+0x245/0x610 [ +0,004027] worker_thread+0x37/0x380 [ +0,003684] ? process_one_work+0x610/0x610 [ +0,004184] kthread+0x119/0x130 [ +0,003240] ? kthread_park+0x80/0x80 [ +0,003668] ret_from_fork+0x24/0x50 Cc: Lyude Paul Cc: Dave Airlie Signed-off-by: Imre Deak Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190523212433.9058-1-imre.deak@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index bb9a9852ec22..ef86721c06f3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1540,7 +1540,11 @@ static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, if (ret != 1) DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); - txmsg->dst->tx_slots[txmsg->seqno] = NULL; + if (txmsg->seqno != -1) { + WARN_ON((unsigned int)txmsg->seqno > + ARRAY_SIZE(txmsg->dst->tx_slots)); + txmsg->dst->tx_slots[txmsg->seqno] = NULL; + } } static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, From 72e77ea7b5935428d816124fb5a104a777410d51 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 3 Jun 2019 16:56:57 +1000 Subject: [PATCH 2115/3715] powerpc/pseries/hvconsole: Fix stack overread via udbg [ Upstream commit 934bda59f286d0221f1a3ebab7f5156a996cc37d ] While developing KASAN for 64-bit book3s, I hit the following stack over-read. It occurs because the hypercall to put characters onto the terminal takes 2 longs (128 bits/16 bytes) of characters at a time, and so hvc_put_chars() would unconditionally copy 16 bytes from the argument buffer, regardless of supplied length. However, udbg_hvc_putc() can call hvc_put_chars() with a single-byte buffer, leading to the error. ================================================================== BUG: KASAN: stack-out-of-bounds in hvc_put_chars+0xdc/0x110 Read of size 8 at addr c0000000023e7a90 by task swapper/0 CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-rc2-next-20190528-02824-g048a6ab4835b #113 Call Trace: dump_stack+0x104/0x154 (unreliable) print_address_description+0xa0/0x30c __kasan_report+0x20c/0x224 kasan_report+0x18/0x30 __asan_report_load8_noabort+0x24/0x40 hvc_put_chars+0xdc/0x110 hvterm_raw_put_chars+0x9c/0x110 udbg_hvc_putc+0x154/0x200 udbg_write+0xf0/0x240 console_unlock+0x868/0xd30 register_console+0x970/0xe90 register_early_udbg_console+0xf8/0x114 setup_arch+0x108/0x790 start_kernel+0x104/0x784 start_here_common+0x1c/0x534 Memory state around the buggy address: c0000000023e7980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c0000000023e7a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 >c0000000023e7a80: f1 f1 01 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 ^ c0000000023e7b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c0000000023e7b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Document that a 16-byte buffer is requred, and provide it in udbg. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/hvconsole.c | 2 +- drivers/tty/hvc/hvc_vio.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 74da18de853a..73ec15cd2708 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(hvc_get_chars); * @vtermno: The vtermno or unit_address of the adapter from which the data * originated. * @buf: The character buffer that contains the character data to send to - * firmware. + * firmware. Must be at least 16 bytes, even if count is less than 16. * @count: Send this number of characters. */ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index a1d272ac82bb..c33150fcd964 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -120,6 +120,14 @@ static int hvterm_raw_get_chars(uint32_t vtermno, char *buf, int count) return got; } +/** + * hvterm_raw_put_chars: send characters to firmware for given vterm adapter + * @vtermno: The virtual terminal number. + * @buf: The characters to send. Because of the underlying hypercall in + * hvc_put_chars(), this buffer must be at least 16 bytes long, even if + * you are sending fewer chars. + * @count: number of chars to send. + */ static int hvterm_raw_put_chars(uint32_t vtermno, const char *buf, int count) { struct hvterm_priv *pv = hvterm_privs[vtermno]; @@ -232,6 +240,7 @@ static const struct hv_ops hvterm_hvsi_ops = { static void udbg_hvc_putc(char c) { int count = -1; + unsigned char bounce_buffer[16]; if (!hvterm_privs[0]) return; @@ -242,7 +251,12 @@ static void udbg_hvc_putc(char c) do { switch(hvterm_privs[0]->proto) { case HV_PROTOCOL_RAW: - count = hvterm_raw_put_chars(0, &c, 1); + /* + * hvterm_raw_put_chars requires at least a 16-byte + * buffer, so go via the bounce buffer + */ + bounce_buffer[0] = c; + count = hvterm_raw_put_chars(0, bounce_buffer, 1); break; case HV_PROTOCOL_HVSI: count = hvterm_hvsi_put_chars(0, &c, 1); From 544d4b9fc42469a8dd5e05bc6cbce31d6848b0e0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jun 2019 16:02:28 +0200 Subject: [PATCH 2116/3715] selftests: rtnetlink: add addresses with fixed life time [ Upstream commit 3cfa148826e3c666da1cc2a43fbe8689e2650636 ] This exercises kernel code path that deal with addresses that have a limited lifetime. Without previous fix, this triggers following crash on net-next: BUG: KASAN: null-ptr-deref in check_lifetime+0x403/0x670 Read of size 8 at addr 0000000000000010 by task kworker [..] Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/rtnetlink.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 891130daac7c..8a5066d98e72 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -195,6 +195,26 @@ kci_test_route_get() echo "PASS: route get" } +kci_test_addrlft() +{ + for i in $(seq 10 100) ;do + lft=$(((RANDOM%3) + 1)) + ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) + check_err $? + done + + sleep 5 + + ip addr show dev "$devdummy" | grep "10.23.11." + if [ $? -eq 0 ]; then + echo "FAIL: preferred_lft addresses remaining" + check_err 1 + return + fi + + echo "PASS: preferred_lft addresses have expired" +} + kci_test_addrlabel() { ret=0 @@ -245,6 +265,7 @@ kci_test_rtnl() kci_test_polrouting kci_test_route_get + kci_test_addrlft kci_test_tc kci_test_gre kci_test_bridge From 85aa8f877cc99717353089ac02cdc322491137fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Oct 2019 15:52:34 +0100 Subject: [PATCH 2117/3715] rxrpc: Fix possible NULL pointer access in ICMP handling [ Upstream commit f0308fb0708078d6c1d8a4d533941a7a191af634 ] If an ICMP packet comes in on the UDP socket backing an AF_RXRPC socket as the UDP socket is being shut down, rxrpc_error_report() may get called to deal with it after sk_user_data on the UDP socket has been cleared, leading to a NULL pointer access when this local endpoint record gets accessed. Fix this by just returning immediately if sk_user_data was NULL. The oops looks like the following: #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... RIP: 0010:rxrpc_error_report+0x1bd/0x6a9 ... Call Trace: ? sock_queue_err_skb+0xbd/0xde ? __udp4_lib_err+0x313/0x34d __udp4_lib_err+0x313/0x34d icmp_unreach+0x1ee/0x207 icmp_rcv+0x25b/0x28f ip_protocol_deliver_rcu+0x95/0x10e ip_local_deliver+0xe9/0x148 __netif_receive_skb_one_core+0x52/0x6e process_backlog+0xdc/0x177 net_rx_action+0xf9/0x270 __do_softirq+0x1b6/0x39a ? smpboot_register_percpu_thread+0xce/0xce run_ksoftirqd+0x1d/0x42 smpboot_thread_fn+0x19e/0x1b3 kthread+0xf1/0xf6 ? kthread_delayed_work_timer_fn+0x83/0x83 ret_from_fork+0x24/0x30 Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Reported-by: syzbot+611164843bd48cc2190c@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/peer_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7f749505e699..7d73e8ce6660 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -150,6 +150,9 @@ void rxrpc_error_report(struct sock *sk) struct rxrpc_peer *peer; struct sk_buff *skb; + if (unlikely(!local)) + return; + _enter("%p{%d}", sk, local->debug_id); skb = sock_dequeue_err_skb(sk); From a0758704a2d8cb58697be6d91ed3c0cab7d97d47 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Fri, 27 Sep 2019 11:51:45 +0900 Subject: [PATCH 2118/3715] ath9k_htc: Modify byte order for an error message [ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ] rs_datalen is be16 so we need to convert it before printing. Signed-off-by: Masashi Honma Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index b38a586ea59a..d913b9e9bd8f 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) { ath_err(common, "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n", - rxstatus->rs_datalen, skb->len); + be16_to_cpu(rxstatus->rs_datalen), skb->len); goto rx_next; } From a6c433bf734165adf58d742b18924381d624ca80 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Fri, 27 Sep 2019 11:51:46 +0900 Subject: [PATCH 2119/3715] ath9k_htc: Discard undersized packets [ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ] Sometimes the hardware will push small packets that trigger a WARN_ON in mac80211. Discard them early to avoid this issue. This patch ports 2 patches from ath9k to ath9k_htc. commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard undersized packets". commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly handle short radar pulses". [ 112.835889] ------------[ cut here ]------------ [ 112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211] [ 112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi [ 112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1 [ 112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015 [ 112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211] [ 112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60 [ 112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286 [ 112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000 [ 112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448 [ 112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004 [ 112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000 [ 112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004 [ 112.836070] FS: 0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000 [ 112.836071] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0 [ 112.836074] Call Trace: [ 112.836076] [ 112.836083] ? finish_td+0xb3/0xf0 [ 112.836092] ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc] [ 112.836099] ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc] [ 112.836105] tasklet_action_common.isra.22+0x63/0x110 [ 112.836108] tasklet_action+0x22/0x30 [ 112.836115] __do_softirq+0xe4/0x2da [ 112.836118] irq_exit+0xae/0xb0 [ 112.836121] do_IRQ+0x86/0xe0 [ 112.836125] common_interrupt+0xf/0xf [ 112.836126] [ 112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440 [ 112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18 [ 112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde [ 112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f [ 112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000 [ 112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0 [ 112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00 [ 112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d [ 112.836145] ? cpuidle_enter_state+0x98/0x440 [ 112.836149] ? menu_select+0x370/0x600 [ 112.836151] cpuidle_enter+0x2e/0x40 [ 112.836154] call_cpuidle+0x23/0x40 [ 112.836156] do_idle+0x204/0x280 [ 112.836159] cpu_startup_entry+0x1d/0x20 [ 112.836164] start_secondary+0x167/0x1c0 [ 112.836169] secondary_startup_64+0xa4/0xb0 [ 112.836173] ---[ end trace 9f4cd18479cc5ae5 ]--- Signed-off-by: Masashi Honma Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index d913b9e9bd8f..4748f557c753 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, struct ath_htc_rx_status *rxstatus; struct ath_rx_status rx_stats; bool decrypt_error = false; + __be16 rs_datalen; + bool is_phyerr; if (skb->len < HTC_RX_FRAME_HEADER_SIZE) { ath_err(common, "Corrupted RX frame, dropping (len: %d)\n", @@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, rxstatus = (struct ath_htc_rx_status *)skb->data; - if (be16_to_cpu(rxstatus->rs_datalen) - - (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) { + rs_datalen = be16_to_cpu(rxstatus->rs_datalen); + if (unlikely(rs_datalen - + (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) { ath_err(common, "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n", - be16_to_cpu(rxstatus->rs_datalen), skb->len); + rs_datalen, skb->len); + goto rx_next; + } + + is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY; + /* + * Discard zero-length packets and packets smaller than an ACK + * which are not PHY_ERROR (short radar pulses have a length of 3) + */ + if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) { + ath_warn(common, + "Short RX data len, dropping (dlen: %d)\n", + rs_datalen); goto rx_next; } @@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, * Process PHY errors and return so that the packet * can be dropped. */ - if (rx_stats.rs_status & ATH9K_RXERR_PHY) { + if (unlikely(is_phyerr)) { /* TODO: Not using DFS processing now. */ if (ath_cmn_process_fft(&priv->spec_priv, hdr, &rx_stats, rx_status->mactime)) { From 55db26f2e970de124c01afe67adb40b0f99c7312 Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Mon, 2 Sep 2019 05:49:35 +0000 Subject: [PATCH 2120/3715] arm64: dts: meson: odroid-c2: Disable usb_otg bus to avoid power failed warning [ Upstream commit 72c9b5f6f75fbc6c47e0a2d02bc3838a2a47c90a ] usb_otg bus needs to get initialize from the u-boot to be configured to used as power source to SBC or usb otg port will get configured as host device. Right now this support is missing in the u-boot and phy driver so to avoid power failed warning, we would disable this feature until proper fix is found. [ 2.716048] phy phy-c0000000.phy.0: USB ID detect failed! [ 2.720186] phy phy-c0000000.phy.0: phy poweron failed --> -22 [ 2.726001] ------------[ cut here ]------------ [ 2.730583] WARNING: CPU: 0 PID: 12 at drivers/regulator/core.c:2039 _regulator_put+0x3c/0xe8 [ 2.738983] Modules linked in: [ 2.742005] CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.2.9-1-ARCH #1 [ 2.748643] Hardware name: Hardkernel ODROID-C2 (DT) [ 2.753566] Workqueue: events deferred_probe_work_func [ 2.758649] pstate: 60000005 (nZCv daif -PAN -UAO) [ 2.763394] pc : _regulator_put+0x3c/0xe8 [ 2.767361] lr : _regulator_put+0x3c/0xe8 [ 2.771326] sp : ffff000011aa3a50 [ 2.774604] x29: ffff000011aa3a50 x28: ffff80007ed1b600 [ 2.779865] x27: ffff80007f7036a8 x26: ffff80007f7036a8 [ 2.785126] x25: 0000000000000000 x24: ffff000011a44458 [ 2.790387] x23: ffff000011344218 x22: 0000000000000009 [ 2.795649] x21: ffff000011aa3b68 x20: ffff80007ed1b500 [ 2.800910] x19: ffff80007ed1b500 x18: 0000000000000010 [ 2.806171] x17: 000000005be5943c x16: 00000000f1c73b29 [ 2.811432] x15: ffffffffffffffff x14: ffff0000117396c8 [ 2.816694] x13: ffff000091aa37a7 x12: ffff000011aa37af [ 2.821955] x11: ffff000011763000 x10: ffff000011aa3730 [ 2.827216] x9 : 00000000ffffffd0 x8 : ffff000010871760 [ 2.832477] x7 : 00000000000000d0 x6 : ffff0000119d151b [ 2.837739] x5 : 000000000000000f x4 : 0000000000000000 [ 2.843000] x3 : 0000000000000000 x2 : 38104b2678c20100 [ 2.848261] x1 : 0000000000000000 x0 : 0000000000000024 [ 2.853523] Call trace: [ 2.855940] _regulator_put+0x3c/0xe8 [ 2.859562] regulator_put+0x34/0x48 [ 2.863098] regulator_bulk_free+0x40/0x58 [ 2.867153] devm_regulator_bulk_release+0x24/0x30 [ 2.871896] release_nodes+0x1f0/0x2e0 [ 2.875604] devres_release_all+0x64/0xa4 [ 2.879571] really_probe+0x1c8/0x3e0 [ 2.883194] driver_probe_device+0xe4/0x138 [ 2.887334] __device_attach_driver+0x90/0x110 [ 2.891733] bus_for_each_drv+0x8c/0xd8 [ 2.895527] __device_attach+0xdc/0x160 [ 2.899322] device_initial_probe+0x24/0x30 [ 2.903463] bus_probe_device+0x9c/0xa8 [ 2.907258] deferred_probe_work_func+0xa0/0xf0 [ 2.911745] process_one_work+0x1b4/0x408 [ 2.915711] worker_thread+0x54/0x4b8 [ 2.919334] kthread+0x12c/0x130 [ 2.922526] ret_from_fork+0x10/0x1c [ 2.926060] ---[ end trace 51a68f4c0035d6c0 ]--- [ 2.930691] ------------[ cut here ]------------ [ 2.935242] WARNING: CPU: 0 PID: 12 at drivers/regulator/core.c:2039 _regulator_put+0x3c/0xe8 [ 2.943653] Modules linked in: [ 2.946675] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G W 5.2.9-1-ARCH #1 [ 2.954694] Hardware name: Hardkernel ODROID-C2 (DT) [ 2.959613] Workqueue: events deferred_probe_work_func [ 2.964700] pstate: 60000005 (nZCv daif -PAN -UAO) [ 2.969445] pc : _regulator_put+0x3c/0xe8 [ 2.973412] lr : _regulator_put+0x3c/0xe8 [ 2.977377] sp : ffff000011aa3a50 [ 2.980655] x29: ffff000011aa3a50 x28: ffff80007ed1b600 [ 2.985916] x27: ffff80007f7036a8 x26: ffff80007f7036a8 [ 2.991177] x25: 0000000000000000 x24: ffff000011a44458 [ 2.996439] x23: ffff000011344218 x22: 0000000000000009 [ 3.001700] x21: ffff000011aa3b68 x20: ffff80007ed1bd00 [ 3.006961] x19: ffff80007ed1bd00 x18: 0000000000000010 [ 3.012222] x17: 000000005be5943c x16: 00000000f1c73b29 [ 3.017484] x15: ffffffffffffffff x14: ffff0000117396c8 [ 3.022745] x13: ffff000091aa37a7 x12: ffff000011aa37af [ 3.028006] x11: ffff000011763000 x10: ffff000011aa3730 [ 3.033267] x9 : 00000000ffffffd0 x8 : ffff000010871760 [ 3.038528] x7 : 00000000000000fd x6 : ffff0000119d151b [ 3.043790] x5 : 000000000000000f x4 : 0000000000000000 [ 3.049051] x3 : 0000000000000000 x2 : 38104b2678c20100 [ 3.054312] x1 : 0000000000000000 x0 : 0000000000000024 [ 3.059574] Call trace: [ 3.061991] _regulator_put+0x3c/0xe8 [ 3.065613] regulator_put+0x34/0x48 [ 3.069149] regulator_bulk_free+0x40/0x58 [ 3.073203] devm_regulator_bulk_release+0x24/0x30 [ 3.077947] release_nodes+0x1f0/0x2e0 [ 3.081655] devres_release_all+0x64/0xa4 [ 3.085622] really_probe+0x1c8/0x3e0 [ 3.089245] driver_probe_device+0xe4/0x138 [ 3.093385] __device_attach_driver+0x90/0x110 [ 3.097784] bus_for_each_drv+0x8c/0xd8 [ 3.101578] __device_attach+0xdc/0x160 [ 3.105373] device_initial_probe+0x24/0x30 [ 3.109514] bus_probe_device+0x9c/0xa8 [ 3.113309] deferred_probe_work_func+0xa0/0xf0 [ 3.117796] process_one_work+0x1b4/0x408 [ 3.121762] worker_thread+0x54/0x4b8 [ 3.125384] kthread+0x12c/0x130 [ 3.128575] ret_from_fork+0x10/0x1c [ 3.132110] ---[ end trace 51a68f4c0035d6c1 ]--- [ 3.136753] dwc2: probe of c9000000.usb failed with error -22 Fixes: 5a0803bd5ae2 ("ARM64: dts: meson-gxbb-odroidc2: Enable USB Nodes") Cc: Martin Blumenstingl Cc: Jerome Brunet Cc: Neil Armstrong Acked-by: Martin Blumenstingl Signed-off-by: Anand Moon Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 4ea23df81f21..5da604e5cf28 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -295,7 +295,7 @@ }; &usb0_phy { - status = "okay"; + status = "disabled"; phy-supply = <&usb_otg_pwr>; }; @@ -305,7 +305,7 @@ }; &usb0 { - status = "okay"; + status = "disabled"; }; &usb1 { From 33e1cea2dc772298f27cff484debf17d9299c916 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 18:29:11 -0800 Subject: [PATCH 2121/3715] net: add annotations on hh->hh_len lockless accesses [ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ] KCSAN reported a data-race [1] While we can use READ_ONCE() on the read sides, we need to make sure hh->hh_len is written last. [1] BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0: eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247 neigh_hh_init net/core/neighbour.c:1463 [inline] neigh_resolve_output net/core/neighbour.c:1480 [inline] neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505 ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647 rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1: neigh_resolve_output net/core/neighbour.c:1479 [inline] neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505 ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647 rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events rt6_probe_deferred Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/firewire/net.c | 6 +++++- include/net/neighbour.h | 2 +- net/core/neighbour.c | 4 ++-- net/ethernet/eth.c | 7 ++++++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 242359c2d1f1..215f4f71b943 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -249,7 +249,11 @@ static int fwnet_header_cache(const struct neighbour *neigh, h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h))); h->h_proto = type; memcpy(h->h_dest, neigh->ha, net->addr_len); - hh->hh_len = FWNET_HLEN; + + /* Pairs with the READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, FWNET_HLEN); return 0; } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 1d6b98119a1d..e89273f9a0bc 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -458,7 +458,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2664ad58e5c0..16ac50b1b9a7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1094,7 +1094,7 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { hh = &neigh->hh; - if (hh->hh_len) { + if (READ_ONCE(hh->hh_len)) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1355,7 +1355,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && !neigh->hh.hh_len) + if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) neigh_hh_init(neigh); do { diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index eaeba9b99a73..7e0e5f2706ba 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -239,7 +239,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); - hh->hh_len = ETH_HLEN; + + /* Pairs with READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, ETH_HLEN); + return 0; } EXPORT_SYMBOL(eth_header_cache); From 26752e31fd64283720020bd4c82985fbaefbbd9c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 17 Nov 2019 14:55:38 +0100 Subject: [PATCH 2122/3715] s390/smp: fix physical to logical CPU map for SMT [ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ] If an SMT capable system is not IPL'ed from the first CPU the setup of the physical to logical CPU mapping is broken: the IPL core gets CPU number 0, but then the next core gets CPU number 1. Correct would be that all SMT threads of CPU 0 get the subsequent logical CPU numbers. This is important since a lot of code (like e.g. the CPU topology code) assumes that CPU maps are setup like this. If the mapping is broken the system will not IPL due to broken topology masks: [ 1.716341] BUG: arch topology broken [ 1.716342] the SMT domain not a subset of the MC domain [ 1.716343] BUG: arch topology broken [ 1.716344] the MC domain not a subset of the BOOK domain This scenario can usually not happen since LPARs are always IPL'ed from CPU 0 and also re-IPL is intiated from CPU 0. However older kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL from an old kernel into a new kernel is initiated this may lead to crash. Fix this by setting up the physical to logical CPU mapping correctly. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27258db640d7..b649a6538350 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -725,39 +725,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -803,7 +831,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1156,7 +1184,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); From 3cc5cbcc504df58afd0213b16335db522bdf85bf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 26 Nov 2019 16:36:05 +0100 Subject: [PATCH 2123/3715] xen/blkback: Avoid unmapping unmapped grant pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ] For each I/O request, blkback first maps the foreign pages for the request to its local pages. If an allocation of a local page for the mapping fails, it should unmap every mapping already made for the request. However, blkback's handling mechanism for the allocation failure does not mark the remaining foreign pages as unmapped. Therefore, the unmap function merely tries to unmap every valid grant page for the request, including the pages not mapped due to the allocation failure. On a system that fails the allocation frequently, this problem leads to following kernel crash. [ 372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [ 372.012546] IP: [] gnttab_unmap_refs.part.7+0x1c/0x40 [ 372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0 [ 372.012562] Oops: 0002 [#1] SMP [ 372.012566] Modules linked in: act_police sch_ingress cls_u32 ... [ 372.012746] Call Trace: [ 372.012752] [] gnttab_unmap_refs+0x34/0x40 [ 372.012759] [] xen_blkbk_unmap+0x83/0x150 [xen_blkback] ... [ 372.012802] [] dispatch_rw_block_io+0x970/0x980 [xen_blkback] ... Decompressing Linux... Parsing ELF... done. Booting the kernel. [ 0.000000] Initializing cgroup subsys cpuset This commit fixes this problem by marking the grant pages of the given request that didn't mapped due to the allocation failure as invalid. Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings") Reviewed-by: David Woodhouse Reviewed-by: Maximilian Heyne Reviewed-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: SeongJae Park Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/xen-blkback/blkback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 987d665e82de..c1d1b94f71b5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -929,6 +929,8 @@ next: out_of_memory: pr_alert("%s: out of memory\n", __func__); put_free_pages(ring, pages_to_gnt, segs_to_map); + for (i = last_map; i < num; i++) + pages[i]->handle = BLKBACK_INVALID_HANDLE; return -ENOMEM; } From c8b4d608f6efb84b12d9e98d0aed33676f893363 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 5 Dec 2019 17:28:52 +0300 Subject: [PATCH 2124/3715] perf/x86/intel/bts: Fix the use of page_private() [ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ] Commit 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver") brought in a warning with the BTS buffer initialization that is easily tripped with (assuming KPTI is disabled): instantly throwing: > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0 > Modules linked in: > CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904 > RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0 > Call Trace: > rb_alloc_aux+0x339/0x550 > perf_mmap+0x607/0xc70 > mmap_region+0x76b/0xbd0 ... It appears to assume (for lost raisins) that PagePrivate() is set, while later it actually tests for PagePrivate() before using page_private(). Make it consistent and always check PagePrivate() before using page_private(). Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver") Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Jiri Olsa Cc: Vince Weaver Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/bts.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 24ffa1e88cf9..4d3399405d06 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -71,9 +71,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -89,9 +97,7 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -115,7 +121,7 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); From b0cdffaa546e24acf92ab3b0d4e917a51aff6a82 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Jan 2020 10:18:00 +0100 Subject: [PATCH 2125/3715] Linux 4.14.163 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cb57b5c58e2b..35a71a78d1d2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 162 +SUBLEVEL = 163 EXTRAVERSION = NAME = Petit Gorille From 7753861270f417d58fef10f65eea7cd1827ece0e Mon Sep 17 00:00:00 2001 From: Jeffrey Vander Stoep Date: Thu, 9 Jan 2020 10:27:26 +0000 Subject: [PATCH 2126/3715] Revert "BACKPORT: perf_event: Add support for LSM and SELinux checks" This reverts commit f81151cd3afda797c1f0871e42a19606277b414b. Reason for revert: collides with aosp/1137243 and breaks build Change-Id: I6d0216ccaa1a759fb1732c07601f5877b81a5f03 Signed-off-by: Jeff Vander Stoep --- arch/powerpc/perf/core-book3s.c | 15 ++++--- arch/x86/events/intel/bts.c | 8 ++-- arch/x86/events/intel/core.c | 5 +-- arch/x86/events/intel/p4.c | 5 +-- include/linux/lsm_hooks.h | 15 ------- include/linux/perf_event.h | 36 +++------------ include/linux/security.h | 38 +--------------- kernel/events/core.c | 57 +++++------------------ kernel/trace/trace_event_perf.c | 15 +++---- security/security.c | 27 ----------- security/selinux/hooks.c | 70 ----------------------------- security/selinux/include/classmap.h | 2 - security/selinux/include/objsec.h | 6 +-- 13 files changed, 38 insertions(+), 261 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 1c37f08bcddd..3188040022c4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,7 +95,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { } +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { return 0; @@ -126,7 +126,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} -static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} +static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } static bool use_ic(u64 event) { @@ -174,7 +174,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ -static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; bool sdar_valid; @@ -435,7 +435,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -463,7 +463,8 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) */ - if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(addr)) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2076,12 +2077,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) - perf_get_data_addr(event, regs, &data.addr); + perf_get_data_addr(regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); - power_pmu_bhrb_read(event, cpuhw); + power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 68162163a05d..7139f6bf27ad 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -557,11 +557,9 @@ static int bts_event_init(struct perf_event *event) * Note that the default paranoia setting permits unprivileged * users to profile the kernel. */ - if (event->attr.exclude_kernel) { - ret = perf_allow_kernel(&event->attr); - if (ret) - return ret; - } + if (event->attr.exclude_kernel && perf_paranoid_kernel() && + !capable(CAP_SYS_ADMIN)) + return -EACCES; if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0307e34d2272..4a60ed8c4413 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3087,9 +3087,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - ret = perf_allow_cpu(&event->attr); - if (ret) - return ret; + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 4f9ac72968db..d32c0eed38ca 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -776,9 +776,8 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - v = perf_allow_cpu(&event->attr); - if (v) - return v; + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } /* ESCR EventMask bits may be invalid */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7e9f59aeadb6..7161d8e7ee79 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1727,14 +1727,6 @@ union security_list_options { int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ -#ifdef CONFIG_PERF_EVENTS - int (*perf_event_open)(struct perf_event_attr *attr, int type); - int (*perf_event_alloc)(struct perf_event *event); - void (*perf_event_free)(struct perf_event *event); - int (*perf_event_read)(struct perf_event *event); - int (*perf_event_write)(struct perf_event *event); - -#endif }; struct security_hook_heads { @@ -1963,13 +1955,6 @@ struct security_hook_heads { struct list_head bpf_prog_alloc_security; struct list_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ -#ifdef CONFIG_PERF_EVENTS - struct list_head perf_event_open; - struct list_head perf_event_alloc; - struct list_head perf_event_free; - struct list_head perf_event_read; - struct list_head perf_event_write; -#endif } __randomize_layout; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5d798eb5ac0a..ac16bac38c03 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,7 +54,6 @@ struct perf_guest_info_callbacks { #include #include #include -#include #include struct perf_callchain_entry { @@ -713,9 +712,6 @@ struct perf_event { int cgrp_defer_enabled; #endif -#ifdef CONFIG_SECURITY - void *security; -#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1179,46 +1175,24 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -/* Access to perf_event_open(2) syscall. */ -#define PERF_SECURITY_OPEN 0 - -/* Finer grained perf_event_open(2) access control. */ -#define PERF_SECURITY_CPU 1 -#define PERF_SECURITY_KERNEL 2 -#define PERF_SECURITY_TRACEPOINT 3 - static inline bool perf_paranoid_any(void) { return sysctl_perf_event_paranoid > 2; } -static inline int perf_is_paranoid(void) +static inline bool perf_paranoid_tracepoint_raw(void) { return sysctl_perf_event_paranoid > -1; } -static inline int perf_allow_kernel(struct perf_event_attr *attr) +static inline bool perf_paranoid_cpu(void) { - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) - return -EACCES; - - return security_perf_event_open(attr, PERF_SECURITY_KERNEL); + return sysctl_perf_event_paranoid > 0; } -static inline int perf_allow_cpu(struct perf_event_attr *attr) +static inline bool perf_paranoid_kernel(void) { - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) - return -EACCES; - - return security_perf_event_open(attr, PERF_SECURITY_CPU); -} - -static inline int perf_allow_tracepoint(struct perf_event_attr *attr) -{ - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); + return sysctl_perf_event_paranoid > 1; } extern void perf_event_init(void); diff --git a/include/linux/security.h b/include/linux/security.h index aee15efd27ea..73f1ef625d40 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1801,41 +1801,5 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#ifdef CONFIG_PERF_EVENTS -struct perf_event_attr; - -#ifdef CONFIG_SECURITY -extern int security_perf_event_open(struct perf_event_attr *attr, int type); -extern int security_perf_event_alloc(struct perf_event *event); -extern void security_perf_event_free(struct perf_event *event); -extern int security_perf_event_read(struct perf_event *event); -extern int security_perf_event_write(struct perf_event *event); -#else -static inline int security_perf_event_open(struct perf_event_attr *attr, - int type) -{ - return 0; -} - -static inline int security_perf_event_alloc(struct perf_event *event) -{ - return 0; -} - -static inline void security_perf_event_free(struct perf_event *event) -{ -} - -static inline int security_perf_event_read(struct perf_event *event) -{ - return 0; -} - -static inline int security_perf_event_write(struct perf_event *event) -{ - return 0; -} -#endif /* CONFIG_SECURITY */ -#endif /* CONFIG_PERF_EVENTS */ - #endif /* ! __LINUX_SECURITY_H */ + diff --git a/kernel/events/core.c b/kernel/events/core.c index e24e2d558cbc..f87d54270076 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3912,9 +3912,8 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - err = perf_allow_cpu(&event->attr); - if (err) - return ERR_PTR(err); + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4217,8 +4216,6 @@ static void _free_event(struct perf_event *event) unaccount_event(event); - security_perf_event_free(event); - if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4638,10 +4635,6 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; - ret = security_perf_event_read(event); - if (ret) - return ret; - ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -4887,11 +4880,6 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; - /* Treat ioctl like writes as it is likely a mutating operation. */ - ret = security_perf_event_write(event); - if (ret) - return ret; - ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5352,10 +5340,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - ret = security_perf_event_read(event); - if (ret) - return ret; - vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5469,7 +5453,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = vma->vm_mm->pinned_vm + extra; - if ((locked > lock_limit) && perf_is_paranoid() && + if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -9709,20 +9693,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } - err = security_perf_event_alloc(event); - if (err) - goto err_callchain_buffer; - /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; -err_callchain_buffer: - if (!event->parent) { - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - put_callchain_buffers(); - } err_addr_filters: kfree(event->addr_filter_ranges); @@ -9840,11 +9815,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { - ret = perf_allow_kernel(attr); - if (ret) - return ret; - } + if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) + && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10057,19 +10030,13 @@ SYSCALL_DEFINE5(perf_event_open, if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) return -EACCES; - /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); - if (err) - return err; - err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - err = perf_allow_kernel(&attr); - if (err) - return err; + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } if (attr.namespaces) { @@ -10086,11 +10053,9 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { - err = perf_allow_kernel(&attr); - if (err) - return err; - } + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && + perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; if (!attr.sample_max_stack) attr.sample_max_stack = sysctl_perf_event_max_stack; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 80b7b194c181..13ba2d3f6a91 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -7,7 +7,6 @@ #include #include -#include #include "trace.h" static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; @@ -25,10 +24,8 @@ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { - int ret; - if (tp_event->perf_perm) { - ret = tp_event->perf_perm(tp_event, p_event); + int ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } @@ -47,9 +44,8 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - ret = perf_allow_tracepoint(&p_event->attr); - if (ret) - return ret; + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; if (!is_sampling_event(p_event)) return 0; @@ -84,9 +80,8 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - ret = perf_allow_tracepoint(&p_event->attr); - if (ret) - return ret; + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; return 0; } diff --git a/security/security.c b/security/security.c index 5afd1dc81511..fb4910f0d0e2 100644 --- a/security/security.c +++ b/security/security.c @@ -1745,30 +1745,3 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux) call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ - -#ifdef CONFIG_PERF_EVENTS -int security_perf_event_open(struct perf_event_attr *attr, int type) -{ - return call_int_hook(perf_event_open, 0, attr, type); -} - -int security_perf_event_alloc(struct perf_event *event) -{ - return call_int_hook(perf_event_alloc, 0, event); -} - -void security_perf_event_free(struct perf_event *event) -{ - call_void_hook(perf_event_free, event); -} - -int security_perf_event_read(struct perf_event *event) -{ - return call_int_hook(perf_event_read, 0, event); -} - -int security_perf_event_write(struct perf_event *event) -{ - return call_int_hook(perf_event_write, 0, event); -} -#endif /* CONFIG_PERF_EVENTS */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 38eb6cc370f7..3c017acc3a92 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6620,68 +6620,6 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif - -#ifdef CONFIG_PERF_EVENTS -static int selinux_perf_event_open(struct perf_event_attr *attr, int type) -{ - u32 requested, sid = current_sid(); - - if (type == PERF_SECURITY_OPEN) - requested = PERF_EVENT__OPEN; - else if (type == PERF_SECURITY_CPU) - requested = PERF_EVENT__CPU; - else if (type == PERF_SECURITY_KERNEL) - requested = PERF_EVENT__KERNEL; - else if (type == PERF_SECURITY_TRACEPOINT) - requested = PERF_EVENT__TRACEPOINT; - else - return -EINVAL; - - return avc_has_perm(sid, sid, SECCLASS_PERF_EVENT, - requested, NULL); -} - -static int selinux_perf_event_alloc(struct perf_event *event) -{ - struct perf_event_security_struct *perfsec; - - perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); - if (!perfsec) - return -ENOMEM; - - perfsec->sid = current_sid(); - event->security = perfsec; - - return 0; -} - -static void selinux_perf_event_free(struct perf_event *event) -{ - struct perf_event_security_struct *perfsec = event->security; - - event->security = NULL; - kfree(perfsec); -} - -static int selinux_perf_event_read(struct perf_event *event) -{ - struct perf_event_security_struct *perfsec = event->security; - u32 sid = current_sid(); - - return avc_has_perm(sid, perfsec->sid, - SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL); -} - -static int selinux_perf_event_write(struct perf_event *event) -{ - struct perf_event_security_struct *perfsec = event->security; - u32 sid = current_sid(); - - return avc_has_perm(sid, perfsec->sid, - SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL); -} -#endif - static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6911,14 +6849,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif - -#ifdef CONFIG_PERF_EVENTS - LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), - LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), - LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), - LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), - LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), -#endif }; static __init int selinux_init(void) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 000effa857aa..5ae315ab060b 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -241,8 +241,6 @@ struct security_class_mapping secclass_map[] = { { "manage_subnet", NULL } }, { "bpf", {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, - { "perf_event", - {"open", "cpu", "kernel", "tracepoint", "read", "write"} }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 512908b55ca3..18b3fe02b44c 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -151,11 +151,7 @@ struct pkey_security_struct { }; struct bpf_security_struct { - u32 sid; /* SID of bpf obj creator */ -}; - -struct perf_event_security_struct { - u32 sid; /* SID of perf_event obj creator */ + u32 sid; /*SID of bpf obj creater*/ }; #endif /* _SELINUX_OBJSEC_H_ */ From 17d43f2bf98c75afe28fdc5f7032b43df2fac9ce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 16:34:16 -0700 Subject: [PATCH 2127/3715] fscrypt: invoke crypto API for ESSIV handling Instead of open-coding the calculations for ESSIV handling, use an ESSIV skcipher which does all of this under the hood. ESSIV was added to the crypto API in v5.4. This is based on a patch from Ard Biesheuvel, but reworked to apply after all the fscrypt changes that went into v5.4. Tested with 'kvm-xfstests -c ext4,f2fs -g encrypt', including the ciphertext verification tests for v1 and v2 encryption policies. Originally-from: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 5 +- fs/crypto/crypto.c | 4 - fs/crypto/fscrypt_private.h | 7 -- fs/crypto/keysetup.c | 111 +++----------------------- fs/crypto/keysetup_v1.c | 4 - 5 files changed, 14 insertions(+), 117 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 8a0700af9596..6ec459be3de1 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -308,8 +308,9 @@ If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair. AES-128-CBC was added only for low-powered embedded devices with crypto accelerators such as CAAM or CESA that do not support XTS. To -use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256 -implementation) must be enabled so that ESSIV can be used. +use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or +another SHA-256 implementation) must be enabled so that ESSIV can be +used. Adiantum is a (primarily) stream cipher-based mode that is fast even on CPUs without dedicated crypto instructions. It's also a true diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 35efeae24efc..946d9bb70cf2 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include "fscrypt_private.h" @@ -142,9 +141,6 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, if (fscrypt_is_direct_key_policy(&ci->ci_policy)) memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); - - if (ci->ci_essiv_tfm != NULL) - crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); } /* Encrypt or decrypt a single filesystem block of file contents */ diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index e84efc01512e..76c64297ce18 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -163,12 +163,6 @@ struct fscrypt_info { /* The actual crypto transform used for encryption and decryption */ struct crypto_skcipher *ci_ctfm; - /* - * Cipher for ESSIV IV generation. Only set for CBC contents - * encryption, otherwise is NULL. - */ - struct crypto_cipher *ci_essiv_tfm; - /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. @@ -444,7 +438,6 @@ struct fscrypt_mode { int keysize; int ivsize; bool logged_impl_name; - bool needs_essiv; }; static inline bool diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index e3ba6b45c06c..03ecd4ed713a 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -8,15 +8,11 @@ * Heavily modified since then. */ -#include -#include #include #include #include "fscrypt_private.h" -static struct crypto_shash *essiv_hash_tfm; - static struct fscrypt_mode available_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", @@ -31,11 +27,10 @@ static struct fscrypt_mode available_modes[] = { .ivsize = 16, }, [FSCRYPT_MODE_AES_128_CBC] = { - .friendly_name = "AES-128-CBC", - .cipher_str = "cbc(aes)", + .friendly_name = "AES-128-CBC-ESSIV", + .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, .ivsize = 16, - .needs_essiv = true, }, [FSCRYPT_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CTS-CBC", @@ -111,98 +106,16 @@ err_free_tfm: return ERR_PTR(err); } -static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) -{ - struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm); - - /* init hash transform on demand */ - if (unlikely(!tfm)) { - struct crypto_shash *prev_tfm; - - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - if (PTR_ERR(tfm) == -ENOENT) { - fscrypt_warn(NULL, - "Missing crypto API support for SHA-256"); - return -ENOPKG; - } - fscrypt_err(NULL, - "Error allocating SHA-256 transform: %ld", - PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm); - if (prev_tfm) { - crypto_free_shash(tfm); - tfm = prev_tfm; - } - } - - { - SHASH_DESC_ON_STACK(desc, tfm); - desc->tfm = tfm; - desc->flags = 0; - - return crypto_shash_digest(desc, key, keysize, salt); - } -} - -static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key, - int keysize) -{ - int err; - struct crypto_cipher *essiv_tfm; - u8 salt[SHA256_DIGEST_SIZE]; - - if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE)) - return -EINVAL; - - essiv_tfm = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(essiv_tfm)) - return PTR_ERR(essiv_tfm); - - ci->ci_essiv_tfm = essiv_tfm; - - err = derive_essiv_salt(raw_key, keysize, salt); - if (err) - goto out; - - /* - * Using SHA256 to derive the salt/key will result in AES-256 being - * used for IV generation. File contents encryption will still use the - * configured keysize (AES-128) nevertheless. - */ - err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt)); - if (err) - goto out; - -out: - memzero_explicit(salt, sizeof(salt)); - return err; -} - -/* Given the per-file key, set up the file's crypto transform object(s) */ +/* Given the per-file key, set up the file's crypto transform object */ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) { - struct fscrypt_mode *mode = ci->ci_mode; - struct crypto_skcipher *ctfm; - int err; + struct crypto_skcipher *tfm; - ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode); - if (IS_ERR(ctfm)) - return PTR_ERR(ctfm); + tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); - ci->ci_ctfm = ctfm; - - if (mode->needs_essiv) { - err = init_essiv_generator(ci, derived_key, mode->keysize); - if (err) { - fscrypt_warn(ci->ci_inode, - "Error initializing ESSIV generator: %d", - err); - return err; - } - } + ci->ci_ctfm = tfm; return 0; } @@ -389,13 +302,11 @@ static void put_crypt_info(struct fscrypt_info *ci) if (!ci) return; - if (ci->ci_direct_key) { + if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); - } else if ((ci->ci_ctfm != NULL || ci->ci_essiv_tfm != NULL) && - !fscrypt_is_direct_key_policy(&ci->ci_policy)) { + else if (ci->ci_ctfm != NULL && + !fscrypt_is_direct_key_policy(&ci->ci_policy)) crypto_free_skcipher(ci->ci_ctfm); - crypto_free_cipher(ci->ci_essiv_tfm); - } key = ci->ci_master_key; if (key) { diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 4ae795d61840..454fb03fc30e 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -270,10 +270,6 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci, return -EINVAL; } - /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */ - if (WARN_ON(mode->needs_essiv)) - return -EINVAL; - dk = fscrypt_get_direct_key(ci, raw_master_key); if (IS_ERR(dk)) return PTR_ERR(dk); From e3ba9dad312c74db8fa8d685fd6329fccfb53cda Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 16:34:17 -0700 Subject: [PATCH 2128/3715] fscrypt: remove struct fscrypt_ctx Now that ext4 and f2fs implement their own post-read workflow that supports both fscrypt and fsverity, the fscrypt-only workflow based around struct fscrypt_ctx is no longer used. So remove the unused code. This is based on a patch from Chandan Rajendra's "Consolidate FS read I/O callbacks code" patchset, but rebased onto the latest kernel, folded __fscrypt_decrypt_bio() into fscrypt_decrypt_bio(), cleaned up fscrypt_initialize(), and updated the commit message. Originally-from: Chandan Rajendra Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 29 +--------- fs/crypto/crypto.c | 110 +++--------------------------------- fs/crypto/fscrypt_private.h | 2 - include/linux/fscrypt.h | 32 ----------- 4 files changed, 10 insertions(+), 163 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 0eb98a6d6f14..4a7f4d78ef90 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -26,7 +26,7 @@ #include #include "fscrypt_private.h" -static void __fscrypt_decrypt_bio(struct bio *bio, bool done) +void fscrypt_decrypt_bio(struct bio *bio) { struct bio_vec *bv; int i; @@ -37,37 +37,10 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) bv->bv_offset); if (ret) SetPageError(page); - else if (done) - SetPageUptodate(page); - if (done) - unlock_page(page); } } - -void fscrypt_decrypt_bio(struct bio *bio) -{ - __fscrypt_decrypt_bio(bio, false); -} EXPORT_SYMBOL(fscrypt_decrypt_bio); -static void completion_pages(struct work_struct *work) -{ - struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work); - struct bio *bio = ctx->bio; - - __fscrypt_decrypt_bio(bio, true); - fscrypt_release_ctx(ctx); - bio_put(bio); -} - -void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) -{ - INIT_WORK(&ctx->work, completion_pages); - ctx->bio = bio; - fscrypt_enqueue_decrypt_work(&ctx->work); -} -EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); - int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 946d9bb70cf2..9c5b59cdc63c 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -30,24 +30,16 @@ #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; -static unsigned int num_prealloc_crypto_ctxs = 128; module_param(num_prealloc_crypto_pages, uint, 0444); MODULE_PARM_DESC(num_prealloc_crypto_pages, "Number of crypto pages to preallocate"); -module_param(num_prealloc_crypto_ctxs, uint, 0444); -MODULE_PARM_DESC(num_prealloc_crypto_ctxs, - "Number of crypto contexts to preallocate"); static mempool_t *fscrypt_bounce_page_pool = NULL; -static LIST_HEAD(fscrypt_free_ctxs); -static DEFINE_SPINLOCK(fscrypt_ctx_lock); - static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); -static struct kmem_cache *fscrypt_ctx_cachep; struct kmem_cache *fscrypt_info_cachep; void fscrypt_enqueue_decrypt_work(struct work_struct *work) @@ -56,62 +48,6 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work) } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); -/** - * fscrypt_release_ctx() - Release a decryption context - * @ctx: The decryption context to release. - * - * If the decryption context was allocated from the pre-allocated pool, return - * it to that pool. Else, free it. - */ -void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - unsigned long flags; - - if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { - kmem_cache_free(fscrypt_ctx_cachep, ctx); - } else { - spin_lock_irqsave(&fscrypt_ctx_lock, flags); - list_add(&ctx->free_list, &fscrypt_free_ctxs); - spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); - } -} -EXPORT_SYMBOL(fscrypt_release_ctx); - -/** - * fscrypt_get_ctx() - Get a decryption context - * @gfp_flags: The gfp flag for memory allocation - * - * Allocate and initialize a decryption context. - * - * Return: A new decryption context on success; an ERR_PTR() otherwise. - */ -struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) -{ - struct fscrypt_ctx *ctx; - unsigned long flags; - - /* - * First try getting a ctx from the free list so that we don't have to - * call into the slab allocator. - */ - spin_lock_irqsave(&fscrypt_ctx_lock, flags); - ctx = list_first_entry_or_null(&fscrypt_free_ctxs, - struct fscrypt_ctx, free_list); - if (ctx) - list_del(&ctx->free_list); - spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); - if (!ctx) { - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags); - if (!ctx) - return ERR_PTR(-ENOMEM); - ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; - } - return ctx; -} -EXPORT_SYMBOL(fscrypt_get_ctx); - struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) { return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); @@ -391,17 +327,6 @@ const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -static void fscrypt_destroy(void) -{ - struct fscrypt_ctx *pos, *n; - - list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list) - kmem_cache_free(fscrypt_ctx_cachep, pos); - INIT_LIST_HEAD(&fscrypt_free_ctxs); - mempool_destroy(fscrypt_bounce_page_pool); - fscrypt_bounce_page_pool = NULL; -} - /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags @@ -409,11 +334,11 @@ static void fscrypt_destroy(void) * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * - * Return: Zero on success, non-zero otherwise. + * Return: 0 on success; -errno on failure */ int fscrypt_initialize(unsigned int cop_flags) { - int i, res = -ENOMEM; + int err = 0; /* No need to allocate a bounce page pool if this FS won't use it. */ if (cop_flags & FS_CFLG_OWN_PAGES) @@ -421,29 +346,18 @@ int fscrypt_initialize(unsigned int cop_flags) mutex_lock(&fscrypt_init_mutex); if (fscrypt_bounce_page_pool) - goto already_initialized; - - for (i = 0; i < num_prealloc_crypto_ctxs; i++) { - struct fscrypt_ctx *ctx; - - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS); - if (!ctx) - goto fail; - list_add(&ctx->free_list, &fscrypt_free_ctxs); - } + goto out_unlock; + err = -ENOMEM; fscrypt_bounce_page_pool = mempool_create_page_pool(num_prealloc_crypto_pages, 0); if (!fscrypt_bounce_page_pool) - goto fail; + goto out_unlock; -already_initialized: + err = 0; +out_unlock: mutex_unlock(&fscrypt_init_mutex); - return 0; -fail: - fscrypt_destroy(); - mutex_unlock(&fscrypt_init_mutex); - return res; + return err; } void fscrypt_msg(const struct inode *inode, const char *level, @@ -489,13 +403,9 @@ static int __init fscrypt_init(void) if (!fscrypt_read_workqueue) goto fail; - fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT); - if (!fscrypt_ctx_cachep) - goto fail_free_queue; - fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT); if (!fscrypt_info_cachep) - goto fail_free_ctx; + goto fail_free_queue; err = fscrypt_init_keyring(); if (err) @@ -505,8 +415,6 @@ static int __init fscrypt_init(void) fail_free_info: kmem_cache_destroy(fscrypt_info_cachep); -fail_free_ctx: - kmem_cache_destroy(fscrypt_ctx_cachep); fail_free_queue: destroy_workqueue(fscrypt_read_workqueue); fail: diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 76c64297ce18..dacf8fcbac3b 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -203,8 +203,6 @@ typedef enum { FS_ENCRYPT, } fscrypt_direction_t; -#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 - static inline bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 72ea24ce52ab..d1013ba97ddf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FS_CRYPTO_BLOCK_SIZE 16 -struct fscrypt_ctx; struct fscrypt_info; struct fscrypt_str { @@ -64,18 +63,6 @@ struct fscrypt_operations { unsigned int max_namelen; }; -/* Decryption work */ -struct fscrypt_ctx { - union { - struct { - struct bio *bio; - struct work_struct work; - }; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - static inline bool fscrypt_has_encryption_key(const struct inode *inode) { /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */ @@ -102,8 +89,6 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) /* crypto.c */ extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, @@ -244,8 +229,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); -extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); @@ -290,16 +273,6 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - return; -} - static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, @@ -479,11 +452,6 @@ static inline void fscrypt_decrypt_bio(struct bio *bio) { } -static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio) -{ -} - static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { From 85416094893e980b18aa20ddc0435695c3aaca64 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 16:34:17 -0700 Subject: [PATCH 2129/3715] fscrypt: zeroize fscrypt_info before freeing memset the struct fscrypt_info to zero before freeing. This isn't really needed currently, since there's no secret key directly in the fscrypt_info. But there's a decent chance that someone will add such a field in the future, e.g. in order to use an API that takes a raw key such as siphash(). So it's good to do this as a hardening measure. Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 03ecd4ed713a..8b6072b2b84f 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -327,6 +327,7 @@ static void put_crypt_info(struct fscrypt_info *ci) key_invalidate(key); key_put(key); } + memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_info_cachep, ci); } From 5d0a5024fd62229f65c56aac6fa2dfe01fc5ba45 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 21 Oct 2019 13:49:03 -0700 Subject: [PATCH 2130/3715] fscrypt: avoid data race on fscrypt_mode::logged_impl_name The access to logged_impl_name is technically a data race, which tools like KCSAN could complain about in the future. See: https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE Fix by using xchg(), which also ensures that only one thread does the logging. This also required switching from bool to int, to avoid a build error on the RISC-V architecture which doesn't implement xchg on bytes. Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 2 +- fs/crypto/keysetup.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index dacf8fcbac3b..d9a3e8614049 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -435,7 +435,7 @@ struct fscrypt_mode { const char *cipher_str; int keysize; int ivsize; - bool logged_impl_name; + int logged_impl_name; }; static inline bool diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 8b6072b2b84f..b634e9d71aa4 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -81,15 +81,13 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, mode->cipher_str, PTR_ERR(tfm)); return tfm; } - if (unlikely(!mode->logged_impl_name)) { + if (!xchg(&mode->logged_impl_name, 1)) { /* * fscrypt performance can vary greatly depending on which * crypto algorithm implementation is used. Help people debug * performance problems by logging the ->cra_driver_name the - * first time a mode is used. Note that multiple threads can - * race here, but it doesn't really matter. + * first time a mode is used. */ - mode->logged_impl_name = true; pr_info("fscrypt: %s using implementation \"%s\"\n", mode->friendly_name, crypto_skcipher_alg(tfm)->base.cra_driver_name); From 959cb31e1ae85bf249a8faf142a74d3af3d27c3d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Oct 2019 14:54:36 -0700 Subject: [PATCH 2131/3715] fscrypt: add support for IV_INO_LBLK_64 policies Inline encryption hardware compliant with the UFS v2.1 standard or with the upcoming version of the eMMC standard has the following properties: (1) Per I/O request, the encryption key is specified by a previously loaded keyslot. There might be only a small number of keyslots. (2) Per I/O request, the starting IV is specified by a 64-bit "data unit number" (DUN). IV bits 64-127 are assumed to be 0. The hardware automatically increments the DUN for each "data unit" of configurable size in the request, e.g. for each filesystem block. Property (1) makes it inefficient to use the traditional fscrypt per-file keys. Property (2) precludes the use of the existing DIRECT_KEY fscrypt policy flag, which needs at least 192 IV bits. Therefore, add a new fscrypt policy flag IV_INO_LBLK_64 which causes the encryption to modified as follows: - The encryption keys are derived from the master key, encryption mode number, and filesystem UUID. - The IVs are chosen as (inode_number << 32) | file_logical_block_num. For filenames encryption, file_logical_block_num is 0. Since the file nonces aren't used in the key derivation, many files may share the same encryption key. This is much more efficient on the target hardware. Including the inode number in the IVs and mixing the filesystem UUID into the keys ensures that data in different files is nevertheless still encrypted differently. Additionally, limiting the inode and block numbers to 32 bits and placing the block number in the low bits maintains compatibility with the 64-bit DUN convention (property (2) above). Since this scheme assumes that inode numbers are stable (which may preclude filesystem shrinking) and that inode and file logical block numbers are at most 32-bit, IV_INO_LBLK_64 will only be allowed on filesystems that meet these constraints. These are acceptable limitations for the cases where this format would actually be used. Note that IV_INO_LBLK_64 is an on-disk format, not an implementation. This patch just adds support for it using the existing filesystem layer encryption. A later patch will add support for inline encryption. Reviewed-by: Paul Crowley Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 63 +++++++++++++++++---------- fs/crypto/crypto.c | 12 +++-- fs/crypto/fscrypt_private.h | 16 +++++-- fs/crypto/keyring.c | 6 ++- fs/crypto/keysetup.c | 45 ++++++++++++++----- fs/crypto/policy.c | 41 ++++++++++++++++- include/linux/fscrypt.h | 3 ++ include/uapi/linux/fscrypt.h | 3 +- 8 files changed, 147 insertions(+), 42 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 6ec459be3de1..471a511c7508 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -256,13 +256,8 @@ alternative master keys or to support rotating master keys. Instead, the master keys may be wrapped in userspace, e.g. as is done by the `fscrypt `_ tool. -Including the inode number in the IVs was considered. However, it was -rejected as it would have prevented ext4 filesystems from being -resized, and by itself still wouldn't have been sufficient to prevent -the same key from being directly reused for both XTS and CTS-CBC. - -DIRECT_KEY and per-mode keys ----------------------------- +DIRECT_KEY policies +------------------- The Adiantum encryption mode (see `Encryption modes and usage`_) is suitable for both contents and filenames encryption, and it accepts @@ -285,6 +280,21 @@ IV. Moreover: key derived using the KDF. Users may use the same master key for other v2 encryption policies. +IV_INO_LBLK_64 policies +----------------------- + +When FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 is set in the fscrypt policy, +the encryption keys are derived from the master key, encryption mode +number, and filesystem UUID. This normally results in all files +protected by the same master key sharing a single contents encryption +key and a single filenames encryption key. To still encrypt different +files' data differently, inode numbers are included in the IVs. +Consequently, shrinking the filesystem may not be allowed. + +This format is optimized for use with inline encryption hardware +compliant with the UFS or eMMC standards, which support only 64 IV +bits per I/O request and may have only a small number of keyslots. + Key identifiers --------------- @@ -342,10 +352,16 @@ a little endian number, except that: is encrypted with AES-256 where the AES-256 key is the SHA-256 hash of the file's data encryption key. -- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY - set in the fscrypt_policy), the file's nonce is also appended to the - IV. Currently this is only allowed with the Adiantum encryption - mode. +- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV. + Currently this is only allowed with the Adiantum encryption mode. + +- With `IV_INO_LBLK_64 policies`_, the logical block number is limited + to 32 bits and is placed in bits 0-31 of the IV. The inode number + (which is also limited to 32 bits) is placed in bits 32-63. + +Note that because file logical block numbers are included in the IVs, +filesystems must enforce that blocks are never shifted around within +encrypted files, e.g. via "collapse range" or "insert range". Filenames encryption -------------------- @@ -355,10 +371,10 @@ the requirements to retain support for efficient directory lookups and filenames of up to 255 bytes, the same IV is used for every filename in a directory. -However, each encrypted directory still uses a unique key; or -alternatively (for the "direct key" configuration) has the file's -nonce included in the IVs. Thus, IV reuse is limited to within a -single directory. +However, each encrypted directory still uses a unique key, or +alternatively has the file's nonce (for `DIRECT_KEY policies`_) or +inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs. +Thus, IV reuse is limited to within a single directory. With CTS-CBC, the IV reuse means that when the plaintext filenames share a common prefix at least as long as the cipher block size (16 @@ -432,12 +448,15 @@ This structure must be initialized as follows: (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS (4) for ``filenames_encryption_mode``. -- ``flags`` must contain a value from ```` which - identifies the amount of NUL-padding to use when encrypting - filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3). - Additionally, if the encryption modes are both - FSCRYPT_MODE_ADIANTUM, this can contain - FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_. +- ``flags`` contains optional flags from ````: + + - FSCRYPT_POLICY_FLAGS_PAD_*: The amount of NUL padding to use when + encrypting filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 + (0x3). + - FSCRYPT_POLICY_FLAG_DIRECT_KEY: See `DIRECT_KEY policies`_. + - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64: See `IV_INO_LBLK_64 + policies`_. This is mutually exclusive with DIRECT_KEY and is not + supported on v1 policies. - For v2 encryption policies, ``__reserved`` must be zeroed. @@ -1090,7 +1109,7 @@ policy structs (see `Setting an encryption policy`_), except that the context structs also contain a nonce. The nonce is randomly generated by the kernel and is used as KDF input or as a tweak to cause different files to be encrypted differently; see `Per-file keys`_ and -`DIRECT_KEY and per-mode keys`_. +`DIRECT_KEY policies`_. Data path changes ----------------- diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 9c5b59cdc63c..6e6f39ea18a7 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -72,11 +72,17 @@ EXPORT_SYMBOL(fscrypt_free_bounce_page); void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { - memset(iv, 0, ci->ci_mode->ivsize); - iv->lblk_num = cpu_to_le64(lblk_num); + u8 flags = fscrypt_policy_flags(&ci->ci_policy); - if (fscrypt_is_direct_key_policy(&ci->ci_policy)) + memset(iv, 0, ci->ci_mode->ivsize); + + if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { + WARN_ON_ONCE((u32)lblk_num != lblk_num); + lblk_num |= (u64)ci->ci_inode->i_ino << 32; + } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); + } + iv->lblk_num = cpu_to_le64(lblk_num); } /* Encrypt or decrypt a single filesystem block of file contents */ diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index d9a3e8614049..130b50e5a011 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -163,6 +163,9 @@ struct fscrypt_info { /* The actual crypto transform used for encryption and decryption */ struct crypto_skcipher *ci_ctfm; + /* True if the key should be freed when this fscrypt_info is freed */ + bool ci_owns_key; + /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. @@ -281,7 +284,8 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 #define HKDF_CONTEXT_PER_FILE_KEY 2 -#define HKDF_CONTEXT_PER_MODE_KEY 3 +#define HKDF_CONTEXT_DIRECT_KEY 3 +#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, @@ -378,8 +382,14 @@ struct fscrypt_master_key { struct list_head mk_decrypted_inodes; spinlock_t mk_decrypted_inodes_lock; - /* Per-mode tfms for DIRECT_KEY policies, allocated on-demand */ - struct crypto_skcipher *mk_mode_keys[__FSCRYPT_MODE_MAX + 1]; + /* Crypto API transforms for DIRECT_KEY policies, allocated on-demand */ + struct crypto_skcipher *mk_direct_tfms[__FSCRYPT_MODE_MAX + 1]; + + /* + * Crypto API transforms for filesystem-layer implementation of + * IV_INO_LBLK_64 policies, allocated on-demand. + */ + struct crypto_skcipher *mk_iv_ino_lblk_64_tfms[__FSCRYPT_MODE_MAX + 1]; } __randomize_layout; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 7f43ca5d30ae..ecbebdc1b02a 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -43,8 +43,10 @@ static void free_master_key(struct fscrypt_master_key *mk) wipe_master_key_secret(&mk->mk_secret); - for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++) - crypto_free_skcipher(mk->mk_mode_keys[i]); + for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + crypto_free_skcipher(mk->mk_direct_tfms[i]); + crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]); + } key_put(mk->mk_users); kzfree(mk); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index b634e9d71aa4..2f926d3e6b5d 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -114,40 +114,54 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) return PTR_ERR(tfm); ci->ci_ctfm = tfm; + ci->ci_owns_key = true; return 0; } static int setup_per_mode_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk) + struct fscrypt_master_key *mk, + struct crypto_skcipher **tfms, + u8 hkdf_context, bool include_fs_uuid) { + const struct inode *inode = ci->ci_inode; + const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; u8 mode_num = mode - available_modes; struct crypto_skcipher *tfm, *prev_tfm; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; + u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; + unsigned int hkdf_infolen = 0; int err; - if (WARN_ON(mode_num >= ARRAY_SIZE(mk->mk_mode_keys))) + if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) return -EINVAL; /* pairs with cmpxchg() below */ - tfm = READ_ONCE(mk->mk_mode_keys[mode_num]); + tfm = READ_ONCE(tfms[mode_num]); if (likely(tfm != NULL)) goto done; BUILD_BUG_ON(sizeof(mode_num) != 1); + BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); + BUILD_BUG_ON(sizeof(hkdf_info) != 17); + hkdf_info[hkdf_infolen++] = mode_num; + if (include_fs_uuid) { + memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid, + sizeof(sb->s_uuid)); + hkdf_infolen += sizeof(sb->s_uuid); + } err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_MODE_KEY, - &mode_num, sizeof(mode_num), + hkdf_context, hkdf_info, hkdf_infolen, mode_key, mode->keysize); if (err) return err; - tfm = fscrypt_allocate_skcipher(mode, mode_key, ci->ci_inode); + tfm = fscrypt_allocate_skcipher(mode, mode_key, inode); memzero_explicit(mode_key, mode->keysize); if (IS_ERR(tfm)) return PTR_ERR(tfm); /* pairs with READ_ONCE() above */ - prev_tfm = cmpxchg(&mk->mk_mode_keys[mode_num], NULL, tfm); + prev_tfm = cmpxchg(&tfms[mode_num], NULL, tfm); if (prev_tfm != NULL) { crypto_free_skcipher(tfm); tfm = prev_tfm; @@ -178,7 +192,19 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, ci->ci_mode->friendly_name); return -EINVAL; } - return setup_per_mode_key(ci, mk); + return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, + HKDF_CONTEXT_DIRECT_KEY, false); + } else if (ci->ci_policy.v2.flags & + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { + /* + * IV_INO_LBLK_64: encryption keys are derived from (master_key, + * mode_num, filesystem_uuid), and inode number is included in + * the IVs. This format is optimized for use with inline + * encryption hardware compliant with the UFS or eMMC standards. + */ + return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + true); } err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, @@ -302,8 +328,7 @@ static void put_crypt_info(struct fscrypt_info *ci) if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); - else if (ci->ci_ctfm != NULL && - !fscrypt_is_direct_key_policy(&ci->ci_policy)) + else if (ci->ci_owns_key) crypto_free_skcipher(ci->ci_ctfm); key = ci->ci_master_key; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4072ba644595..96f528071bed 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,6 +29,40 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool supported_iv_ino_lblk_64_policy( + const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + int ino_bits = 64, lblk_bits = 64; + + if (policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { + fscrypt_warn(inode, + "The DIRECT_KEY and IV_INO_LBLK_64 flags are mutually exclusive"); + return false; + } + /* + * It's unsafe to include inode numbers in the IVs if the filesystem can + * potentially renumber inodes, e.g. via filesystem shrinking. + */ + if (!sb->s_cop->has_stable_inodes || + !sb->s_cop->has_stable_inodes(sb)) { + fscrypt_warn(inode, + "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't have stable inode numbers", + sb->s_id); + return false; + } + if (sb->s_cop->get_ino_and_lblk_bits) + sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits); + if (ino_bits > 32 || lblk_bits > 32) { + fscrypt_warn(inode, + "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't use 32-bit inode and block numbers", + sb->s_id); + return false; + } + return true; +} + /** * fscrypt_supported_policy - check whether an encryption policy is supported * @@ -55,7 +89,8 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, return false; } - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", policy->flags); @@ -83,6 +118,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, return false; } + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && + !supported_iv_ino_lblk_64_policy(policy, inode)) + return false; + if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { fscrypt_warn(inode, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d1013ba97ddf..7f302250e52e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -61,6 +61,9 @@ struct fscrypt_operations { bool (*dummy_context)(struct inode *); bool (*empty_dir)(struct inode *); unsigned int max_namelen; + bool (*has_stable_inodes)(struct super_block *sb); + void (*get_ino_and_lblk_bits)(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret); }; static inline bool fscrypt_has_encryption_key(const struct inode *inode) diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 39ccfe9311c3..1beb174ad950 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -17,7 +17,8 @@ #define FSCRYPT_POLICY_FLAGS_PAD_32 0x03 #define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 -#define FSCRYPT_POLICY_FLAGS_VALID 0x07 +#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 +#define FSCRYPT_POLICY_FLAGS_VALID 0x0F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 From b9c8fe456e8918ef476d433f09bb01b424e444e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Oct 2019 14:54:37 -0700 Subject: [PATCH 2132/3715] ext4: add support for IV_INO_LBLK_64 encryption policies IV_INO_LBLK_64 encryption policies have special requirements from the filesystem beyond those of the existing encryption policies: - Inode numbers must never change, even if the filesystem is resized. - Inode numbers must be <= 32 bits. - File logical block numbers must be <= 32 bits. ext4 has 32-bit inode and file logical block numbers. However, resize2fs can re-number inodes when shrinking an ext4 filesystem. However, typically the people who would want to use this format don't care about filesystem shrinking. They'd be fine with a solution that just prevents the filesystem from being shrunk. Therefore, add a new feature flag EXT4_FEATURE_COMPAT_STABLE_INODES that will do exactly that. Then wire up the fscrypt_operations to expose this flag to fs/crypto/, so that it allows IV_INO_LBLK_64 policies when this flag is set. Acked-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/ext4/ext4.h | 2 ++ fs/ext4/super.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6b1bf406cf10..8d6accd3f763 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1683,6 +1683,7 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 +#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 @@ -1779,6 +1780,7 @@ EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR) EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE) EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX) EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2) +EXT4_FEATURE_COMPAT_FUNCS(stable_inodes, STABLE_INODES) EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER) EXT4_FEATURE_RO_COMPAT_FUNCS(large_file, LARGE_FILE) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 64de90f18408..066941be4812 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1239,6 +1239,18 @@ static bool ext4_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); } +static bool ext4_has_stable_inodes(struct super_block *sb) +{ + return ext4_has_feature_stable_inodes(sb); +} + +static void ext4_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); + *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); +} + static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, @@ -1246,6 +1258,8 @@ static const struct fscrypt_operations ext4_cryptops = { .dummy_context = ext4_dummy_context, .empty_dir = ext4_empty_dir, .max_namelen = EXT4_NAME_LEN, + .has_stable_inodes = ext4_has_stable_inodes, + .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, }; #endif From a6a7ff5b18879839e7f4b261828addcc245c16db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Oct 2019 14:54:38 -0700 Subject: [PATCH 2133/3715] f2fs: add support for IV_INO_LBLK_64 encryption policies f2fs inode numbers are stable across filesystem resizing, and f2fs inode and file logical block numbers are always 32-bit. So f2fs can always support IV_INO_LBLK_64 encryption policies. Wire up the needed fscrypt_operations to declare support. Acked-by: Jaegeuk Kim Signed-off-by: Eric Biggers --- fs/f2fs/super.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 47898dc931ef..6ed411850b1c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2324,13 +2324,27 @@ static bool f2fs_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); } +static bool f2fs_has_stable_inodes(struct super_block *sb) +{ + return true; +} + +static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(nid_t); + *lblk_bits_ret = 8 * sizeof(block_t); +} + static const struct fscrypt_operations f2fs_cryptops = { - .key_prefix = "f2fs:", - .get_context = f2fs_get_context, - .set_context = f2fs_set_context, - .dummy_context = f2fs_dummy_context, - .empty_dir = f2fs_empty_dir, - .max_namelen = F2FS_NAME_LEN, + .key_prefix = "f2fs:", + .get_context = f2fs_get_context, + .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, + .empty_dir = f2fs_empty_dir, + .max_namelen = F2FS_NAME_LEN, + .has_stable_inodes = f2fs_has_stable_inodes, + .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, }; #endif From 9cccb8da2cc7fa2152597e5897bbb3c244c80f36 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 30 Oct 2019 15:19:15 -0700 Subject: [PATCH 2134/3715] docs: fs-verity: document first supported kernel version I had meant to replace these TODOs with the actual version when applying the patches, but forgot to do so. Do it now. Signed-off-by: Eric Biggers --- Documentation/filesystems/fsverity.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst index 42a0b6dd9e0b..dd55204c9ef8 100644 --- a/Documentation/filesystems/fsverity.rst +++ b/Documentation/filesystems/fsverity.rst @@ -398,7 +398,7 @@ pages have been read into the pagecache. (See `Verifying data`_.) ext4 ---- -ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2. +ext4 supports fs-verity since Linux v5.4 and e2fsprogs v1.45.2. To create verity files on an ext4 filesystem, the filesystem must have been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on @@ -434,7 +434,7 @@ also only supports extent-based files. f2fs ---- -f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0. +f2fs supports fs-verity since Linux v5.4 and f2fs-tools v1.11.0. To create verity files on an f2fs filesystem, the filesystem must have been formatted with ``-O verity``. From b5f2f63e91693d8b4739c72aa20d240b3831b3bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Oct 2019 13:41:38 -0700 Subject: [PATCH 2135/3715] statx: define STATX_ATTR_VERITY Add a statx attribute bit STATX_ATTR_VERITY which will be set if the file has fs-verity enabled. This is the statx() equivalent of FS_VERITY_FL which is returned by FS_IOC_GETFLAGS. This is useful because it allows applications to check whether a file is a verity file without opening it. Opening a verity file can be expensive because the fsverity_info is set up on open, which involves parsing metadata and optionally verifying a cryptographic signature. This is analogous to how various other bits are exposed through both FS_IOC_GETFLAGS and statx(), e.g. the encrypt bit. Reviewed-by: Andreas Dilger Acked-by: Darrick J. Wong Signed-off-by: Eric Biggers --- include/linux/stat.h | 3 ++- include/uapi/linux/stat.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/stat.h b/include/linux/stat.h index 22484e44544d..07295841fccd 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -33,7 +33,8 @@ struct kstat { STATX_ATTR_IMMUTABLE | \ STATX_ATTR_APPEND | \ STATX_ATTR_NODUMP | \ - STATX_ATTR_ENCRYPTED \ + STATX_ATTR_ENCRYPTED | \ + STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ u64 ino; dev_t dev; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 7b35e98d3c58..ad80a5c885d5 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -167,8 +167,8 @@ struct statx { #define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ - #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ +#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #endif /* _UAPI_LINUX_STAT_H */ From 0c17322429c1962edcac6d3703b0f7bf77ad1d64 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Oct 2019 13:41:39 -0700 Subject: [PATCH 2136/3715] ext4: support STATX_ATTR_VERITY Set the STATX_ATTR_VERITY bit when the statx() system call is used on a verity file on ext4. Reviewed-by: Andreas Dilger Signed-off-by: Eric Biggers --- fs/ext4/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a8b0cbc65655..4f2c73b52c83 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5510,12 +5510,15 @@ int ext4_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_IMMUTABLE; if (flags & EXT4_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + if (flags & EXT4_VERITY_FL) + stat->attributes |= STATX_ATTR_VERITY; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | - STATX_ATTR_NODUMP); + STATX_ATTR_NODUMP | + STATX_ATTR_VERITY); generic_fillattr(inode, stat); return 0; From c4948febfddaee63b867427716794ebdc41a49b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Oct 2019 13:41:40 -0700 Subject: [PATCH 2137/3715] f2fs: support STATX_ATTR_VERITY Set the STATX_ATTR_VERITY bit when the statx() system call is used on a verity file on f2fs. Signed-off-by: Eric Biggers --- fs/f2fs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 70292386ed85..115763b0135c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -725,11 +725,14 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_IMMUTABLE; if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + if (IS_VERITY(inode)) + stat->attributes |= STATX_ATTR_VERITY; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | - STATX_ATTR_NODUMP); + STATX_ATTR_NODUMP | + STATX_ATTR_VERITY); generic_fillattr(inode, stat); From 84648bc446073628e6659b4bab41fc5add71b301 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Oct 2019 13:41:41 -0700 Subject: [PATCH 2138/3715] docs: fs-verity: mention statx() support Document that the statx() system call can now be used to check whether a file is a verity file. Signed-off-by: Eric Biggers --- Documentation/filesystems/fsverity.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst index dd55204c9ef8..a95536b6443c 100644 --- a/Documentation/filesystems/fsverity.rst +++ b/Documentation/filesystems/fsverity.rst @@ -226,6 +226,14 @@ To do so, check for FS_VERITY_FL (0x00100000) in the returned flags. The verity flag is not settable via FS_IOC_SETFLAGS. You must use FS_IOC_ENABLE_VERITY instead, since parameters must be provided. +statx +----- + +Since Linux v5.5, the statx() system call sets STATX_ATTR_VERITY if +the file has fs-verity enabled. This can perform better than +FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require +opening the file, and opening verity files can be expensive. + Accessing verity files ====================== From 5f2d37a7252e86f5734defb2336d2278a8dc0d57 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Thu, 9 Jan 2020 14:53:34 -0800 Subject: [PATCH 2139/3715] ANDROID: cuttlefish_defconfig: remove 80211_HWSIM Fix to re-enable wifi on cf + kernel 4.14. Remove mac80211_hwsim virtual device on cuttlefish kernels as we want to use virt_wifi now instead. Test: Local boot with cuttlefish Bug: 145836345 Change-Id: Ifac04724afef7f5ad5aff46bae9c1ea0e77f892c Signed-off-by: Ram Muthiah --- arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index a21970185ddf..11e1f466ddd8 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -281,7 +281,6 @@ CONFIG_USB_USBNET=y # CONFIG_WLAN_VENDOR_TI is not set # CONFIG_WLAN_VENDOR_ZYDAS is not set # CONFIG_WLAN_VENDOR_QUANTENNA is not set -CONFIG_MAC80211_HWSIM=y CONFIG_VIRT_WIFI=y CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_EVDEV=y From ca9689826312faeee13b0b5d78653658241b8c7d Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 14 Oct 2019 13:03:08 -0400 Subject: [PATCH 2140/3715] BACKPORT: perf_event: Add support for LSM and SELinux checks In current mainline, the degree of access to perf_event_open(2) system call depends on the perf_event_paranoid sysctl. This has a number of limitations: 1. The sysctl is only a single value. Many types of accesses are controlled based on the single value thus making the control very limited and coarse grained. 2. The sysctl is global, so if the sysctl is changed, then that means all processes get access to perf_event_open(2) opening the door to security issues. This patch adds LSM and SELinux access checking which will be used in Android to access perf_event_open(2) for the purposes of attaching BPF programs to tracepoints, perf profiling and other operations from userspace. These operations are intended for production systems. 5 new LSM hooks are added: 1. perf_event_open: This controls access during the perf_event_open(2) syscall itself. The hook is called from all the places that the perf_event_paranoid sysctl is checked to keep it consistent with the systctl. The hook gets passed a 'type' argument which controls CPU, kernel and tracepoint accesses (in this context, CPU, kernel and tracepoint have the same semantics as the perf_event_paranoid sysctl). Additionally, I added an 'open' type which is similar to perf_event_paranoid sysctl == 3 patch carried in Android and several other distros but was rejected in mainline [1] in 2016. 2. perf_event_alloc: This allocates a new security object for the event which stores the current SID within the event. It will be useful when the perf event's FD is passed through IPC to another process which may try to read the FD. Appropriate security checks will limit access. 3. perf_event_free: Called when the event is closed. 4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event. 5. perf_event_write: Called from the ioctl(2) syscalls for the event. [1] https://lwn.net/Articles/696240/ Since Peter had suggest LSM hooks in 2016 [1], I am adding his Suggested-by tag below. To use this patch, we set the perf_event_paranoid sysctl to -1 and then apply selinux checking as appropriate (default deny everything, and then add policy rules to give access to domains that need it). In the future we can remove the perf_event_paranoid sysctl altogether. Suggested-by: Peter Zijlstra Co-developed-by: Peter Zijlstra Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Morris Cc: Arnaldo Carvalho de Melo Cc: rostedt@goodmis.org Cc: Yonghong Song Cc: Kees Cook Cc: Ingo Molnar Cc: Alexei Starovoitov Cc: jeffv@google.com Cc: Jiri Olsa Cc: Daniel Borkmann Cc: primiano@google.com Cc: Song Liu Cc: rsavitski@google.com Cc: Namhyung Kim Cc: Matthew Garrett Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org (cherry picked from commit da97e18458fb42d7c00fac5fd1c56a3896ec666e) [ Ryan Savitski: adapted for older APIs, and folded in upstream ae79d5588a04 (perf/core: Fix !CONFIG_PERF_EVENTS build warnings and failures). This should fix the build errors from the previous backport attempt, where certain configurations would end up with functions referring to the perf_event struct prior to its declaration (and therefore declaring it with a different scope). ] Bug: 137092007 Change-Id: Iece194b3519dc5016ccbe127fc4e5c425ee7c442 Signed-off-by: Ryan Savitski --- arch/powerpc/perf/core-book3s.c | 15 +++---- arch/x86/events/intel/bts.c | 8 ++-- arch/x86/events/intel/core.c | 5 ++- arch/x86/events/intel/p4.c | 5 ++- include/linux/lsm_hooks.h | 15 +++++++ include/linux/perf_event.h | 36 ++++++++++++--- include/linux/security.h | 39 +++++++++++++++- kernel/events/core.c | 57 ++++++++++++++++++----- kernel/trace/trace_event_perf.c | 15 ++++--- security/security.c | 27 +++++++++++ security/selinux/hooks.c | 70 +++++++++++++++++++++++++++++ security/selinux/include/classmap.h | 2 + security/selinux/include/objsec.h | 6 ++- 13 files changed, 262 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3188040022c4..1c37f08bcddd 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,7 +95,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { return 0; @@ -126,7 +126,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} -static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } static bool use_ic(u64 event) { @@ -174,7 +174,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; bool sdar_valid; @@ -435,7 +435,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -463,8 +463,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) */ - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && - is_kernel_addr(addr)) + if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2077,12 +2076,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) - perf_get_data_addr(regs, &data.addr); + perf_get_data_addr(event, regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); - power_pmu_bhrb_read(cpuhw); + power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; } diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 510f9461407e..5a1cd9c3addf 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -563,9 +563,11 @@ static int bts_event_init(struct perf_event *event) * Note that the default paranoia setting permits unprivileged * users to profile the kernel. */ - if (event->attr.exclude_kernel && perf_paranoid_kernel() && - !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (event->attr.exclude_kernel) { + ret = perf_allow_kernel(&event->attr); + if (ret) + return ret; + } if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4a60ed8c4413..0307e34d2272 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3087,8 +3087,9 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + ret = perf_allow_cpu(&event->attr); + if (ret) + return ret; event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index d32c0eed38ca..4f9ac72968db 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + v = perf_allow_cpu(&event->attr); + if (v) + return v; } /* ESCR EventMask bits may be invalid */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..7e9f59aeadb6 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1727,6 +1727,14 @@ union security_list_options { int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + int (*perf_event_open)(struct perf_event_attr *attr, int type); + int (*perf_event_alloc)(struct perf_event *event); + void (*perf_event_free)(struct perf_event *event); + int (*perf_event_read)(struct perf_event *event); + int (*perf_event_write)(struct perf_event *event); + +#endif }; struct security_hook_heads { @@ -1955,6 +1963,13 @@ struct security_hook_heads { struct list_head bpf_prog_alloc_security; struct list_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + struct list_head perf_event_open; + struct list_head perf_event_alloc; + struct list_head perf_event_free; + struct list_head perf_event_read; + struct list_head perf_event_write; +#endif } __randomize_layout; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ac16bac38c03..5d798eb5ac0a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -712,6 +713,9 @@ struct perf_event { int cgrp_defer_enabled; #endif +#ifdef CONFIG_SECURITY + void *security; +#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1175,24 +1179,46 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +/* Access to perf_event_open(2) syscall. */ +#define PERF_SECURITY_OPEN 0 + +/* Finer grained perf_event_open(2) access control. */ +#define PERF_SECURITY_CPU 1 +#define PERF_SECURITY_KERNEL 2 +#define PERF_SECURITY_TRACEPOINT 3 + static inline bool perf_paranoid_any(void) { return sysctl_perf_event_paranoid > 2; } -static inline bool perf_paranoid_tracepoint_raw(void) +static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; } -static inline bool perf_paranoid_cpu(void) +static inline int perf_allow_kernel(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 0; + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_KERNEL); } -static inline bool perf_paranoid_kernel(void) +static inline int perf_allow_cpu(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 1; + if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_CPU); +} + +static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +{ + if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } extern void perf_event_init(void); diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..666c75c2269c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1801,5 +1801,42 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#endif /* ! __LINUX_SECURITY_H */ +#ifdef CONFIG_PERF_EVENTS +struct perf_event_attr; +struct perf_event; +#ifdef CONFIG_SECURITY +extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_alloc(struct perf_event *event); +extern void security_perf_event_free(struct perf_event *event); +extern int security_perf_event_read(struct perf_event *event); +extern int security_perf_event_write(struct perf_event *event); +#else +static inline int security_perf_event_open(struct perf_event_attr *attr, + int type) +{ + return 0; +} + +static inline int security_perf_event_alloc(struct perf_event *event) +{ + return 0; +} + +static inline void security_perf_event_free(struct perf_event *event) +{ +} + +static inline int security_perf_event_read(struct perf_event *event) +{ + return 0; +} + +static inline int security_perf_event_write(struct perf_event *event) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_PERF_EVENTS */ + +#endif /* ! __LINUX_SECURITY_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index f87d54270076..e24e2d558cbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3912,8 +3912,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); + err = perf_allow_cpu(&event->attr); + if (err) + return ERR_PTR(err); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4216,6 +4217,8 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + security_perf_event_free(event); + if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4635,6 +4638,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; + ret = security_perf_event_read(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -4880,6 +4887,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; + /* Treat ioctl like writes as it is likely a mutating operation. */ + ret = security_perf_event_write(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5340,6 +5352,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; + ret = security_perf_event_read(event); + if (ret) + return ret; + vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5453,7 +5469,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = vma->vm_mm->pinned_vm + extra; - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + if ((locked > lock_limit) && perf_is_paranoid() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -9693,11 +9709,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + err = security_perf_event_alloc(event); + if (err) + goto err_callchain_buffer; + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_callchain_buffer: + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } err_addr_filters: kfree(event->addr_filter_ranges); @@ -9815,9 +9840,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) - && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { + ret = perf_allow_kernel(attr); + if (ret) + return ret; + } } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10030,13 +10057,19 @@ SYSCALL_DEFINE5(perf_event_open, if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + err = perf_allow_kernel(&attr); + if (err) + return err; } if (attr.namespaces) { @@ -10053,9 +10086,11 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { + err = perf_allow_kernel(&attr); + if (err) + return err; + } if (!attr.sample_max_stack) attr.sample_max_stack = sysctl_perf_event_max_stack; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..80b7b194c181 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -7,6 +7,7 @@ #include #include +#include #include "trace.h" static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; @@ -24,8 +25,10 @@ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { + int ret; + if (tp_event->perf_perm) { - int ret = tp_event->perf_perm(tp_event, p_event); + ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } @@ -44,8 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; if (!is_sampling_event(p_event)) return 0; @@ -80,8 +84,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; return 0; } diff --git a/security/security.c b/security/security.c index fb4910f0d0e2..5afd1dc81511 100644 --- a/security/security.c +++ b/security/security.c @@ -1745,3 +1745,30 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux) call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ + +#ifdef CONFIG_PERF_EVENTS +int security_perf_event_open(struct perf_event_attr *attr, int type) +{ + return call_int_hook(perf_event_open, 0, attr, type); +} + +int security_perf_event_alloc(struct perf_event *event) +{ + return call_int_hook(perf_event_alloc, 0, event); +} + +void security_perf_event_free(struct perf_event *event) +{ + call_void_hook(perf_event_free, event); +} + +int security_perf_event_read(struct perf_event *event) +{ + return call_int_hook(perf_event_read, 0, event); +} + +int security_perf_event_write(struct perf_event *event) +{ + return call_int_hook(perf_event_write, 0, event); +} +#endif /* CONFIG_PERF_EVENTS */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3c017acc3a92..2595465dcd3b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6620,6 +6620,68 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif + +#ifdef CONFIG_PERF_EVENTS +static int selinux_perf_event_open(struct perf_event_attr *attr, int type) +{ + u32 requested, sid = current_sid(); + + if (type == PERF_SECURITY_OPEN) + requested = PERF_EVENT__OPEN; + else if (type == PERF_SECURITY_CPU) + requested = PERF_EVENT__CPU; + else if (type == PERF_SECURITY_KERNEL) + requested = PERF_EVENT__KERNEL; + else if (type == PERF_SECURITY_TRACEPOINT) + requested = PERF_EVENT__TRACEPOINT; + else + return -EINVAL; + + return avc_has_perm(&selinux_state, sid, sid, SECCLASS_PERF_EVENT, + requested, NULL); +} + +static int selinux_perf_event_alloc(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec; + + perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); + if (!perfsec) + return -ENOMEM; + + perfsec->sid = current_sid(); + event->security = perfsec; + + return 0; +} + +static void selinux_perf_event_free(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + + event->security = NULL; + kfree(perfsec); +} + +static int selinux_perf_event_read(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL); +} + +static int selinux_perf_event_write(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL); +} +#endif + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6849,6 +6911,14 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif + +#ifdef CONFIG_PERF_EVENTS + LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), + LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), + LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), + LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), + LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), +#endif }; static __init int selinux_init(void) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 5ae315ab060b..000effa857aa 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -241,6 +241,8 @@ struct security_class_mapping secclass_map[] = { { "manage_subnet", NULL } }, { "bpf", {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, + { "perf_event", + {"open", "cpu", "kernel", "tracepoint", "read", "write"} }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 18b3fe02b44c..512908b55ca3 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -151,7 +151,11 @@ struct pkey_security_struct { }; struct bpf_security_struct { - u32 sid; /*SID of bpf obj creater*/ + u32 sid; /* SID of bpf obj creator */ +}; + +struct perf_event_security_struct { + u32 sid; /* SID of perf_event obj creator */ }; #endif /* _SELINUX_OBJSEC_H_ */ From d9d60cd6fc5cccaf639f96bc0921150c9c7589cf Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 26 Sep 2017 15:16:05 -0400 Subject: [PATCH 2141/3715] UPSTREAM: USB: dummy-hcd: bandwidth limits for non-bulk transfers (Upstream commit ffc4ea79bc06f42283da10ea06bb17b9a3e2b2b4.) Part of the emulation performed by dummy-hcd is accounting for bandwidth utilization. The total amount of data transferred in a single frame is supposed to be no larger than an actual USB connection could accommodate. Currently the driver performs bandwidth limiting only for bulk transfers; control and periodic transfers are effectively unlimited. (Presumably drivers were not expected to request extremely large control or interrupt transfers.) This patch improves the situation somewhat by restricting them as well. The emulation still isn't perfect. On a real system, even 0-length transfers use some bandwidth because of transaction overhead (IN, OUT, ACK, NACK packets) and packet overhead (SYNC, PID, bit stuffing, CRC, EOP). Adding in those factors is left as an exercise for a later patch. Signed-off-by: Alan Stern Signed-off-by: Felipe Balbi Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: Ib3bbaf9868a76dba2795042abb0f063099f9fc98 --- drivers/usb/gadget/udc/dummy_hcd.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 7e90f786d923..1336cadfdb5d 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1770,6 +1770,7 @@ static void dummy_timer(unsigned long _dum_hcd) int i; /* simplistic model for one frame's bandwidth */ + /* FIXME: account for transaction and packet overhead */ switch (dum->gadget.speed) { case USB_SPEED_LOW: total = 8/*bytes*/ * 12/*packets*/; @@ -1814,7 +1815,6 @@ restart: struct dummy_request *req; u8 address; struct dummy_ep *ep = NULL; - int type; int status = -EINPROGRESS; /* stop when we reach URBs queued after the timer interrupt */ @@ -1826,14 +1826,10 @@ restart: goto return_urb; else if (dum_hcd->rh_state != DUMMY_RH_RUNNING) continue; - type = usb_pipetype(urb->pipe); - /* used up this frame's non-periodic bandwidth? - * FIXME there's infinite bandwidth for control and - * periodic transfers ... unrealistic. - */ - if (total <= 0 && type == PIPE_BULK) - continue; + /* Used up this frame's bandwidth? */ + if (total <= 0) + break; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); From 30b987d1afb40e6f3662ed602cd25d6c1d1080ad Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 18 Apr 2019 13:12:07 -0400 Subject: [PATCH 2142/3715] UPSTREAM: USB: dummy-hcd: Fix failure to give back unlinked URBs (Upstream commit 50896c410354432e8e7baf97fcdd7df265e683ae.) The syzkaller USB fuzzer identified a failure mode in which dummy-hcd would never give back an unlinked URB. This causes usb_kill_urb() to hang, leading to WARNINGs and unkillable threads. In dummy-hcd, all URBs are given back by the dummy_timer() routine as it scans through the list of pending URBS. Failure to give back URBs can be caused by failure to start or early exit from the scanning loop. The code currently has two such pathways: One is triggered when an unsupported bus transfer speed is encountered, and the other by exhausting the simulated bandwidth for USB transfers during a frame. This patch removes those two paths, thereby allowing all unlinked URBs to be given back in a timely manner. It adds a check for the bus speed when the gadget first starts running, so that dummy_timer() will never thereafter encounter an unsupported speed. And it prevents the loop from exiting as soon as the total bandwidth has been used up (the scanning loop continues, giving back unlinked URBs as they are found, but not transferring any more data). Thanks to Andrey Konovalov for manually running the syzkaller fuzzer to help track down the source of the bug. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+d919b0f29d7b5a4994b9@syzkaller.appspotmail.com CC: Signed-off-by: Felipe Balbi Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I67f054e708130e1c7f252395bc560d6ec0760b24 --- drivers/usb/gadget/udc/dummy_hcd.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 1336cadfdb5d..f29b5b0bb0dc 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -980,8 +980,18 @@ static int dummy_udc_start(struct usb_gadget *g, struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; - if (driver->max_speed == USB_SPEED_UNKNOWN) + switch (g->speed) { + /* All the speeds we support */ + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", + driver->max_speed); return -EINVAL; + } /* * SLAVE side init ... the layer above hardware, which @@ -1785,9 +1795,10 @@ static void dummy_timer(unsigned long _dum_hcd) /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; - default: + default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); - return; + total = 0; + break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ @@ -1829,7 +1840,7 @@ restart: /* Used up this frame's bandwidth? */ if (total <= 0) - break; + continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); From e7da360e27600d547ee57a6561b701b912b68a14 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 21 Oct 2019 16:20:58 +0200 Subject: [PATCH 2143/3715] UPSTREAM: USB: dummy-hcd: increase max number of devices to 32 (Upstream commit 8442b02bf3c6770e0d7e7ea17be36c30e95987b6.) When fuzzing the USB subsystem with syzkaller, we currently use 8 testing processes within one VM. To isolate testing processes from one another it is desirable to assign a dedicated USB bus to each of those, which means we need at least 8 Dummy UDC/HCD devices. This patch increases the maximum number of Dummy UDC/HCD devices to 32 (more than 8 in case we need more of them in the future). Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/665578f904484069bb6100fb20283b22a046ad9b.1571667489.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I09d161f22c639b5b4864d38dcff2c967019b2e7c --- drivers/usb/gadget/udc/dummy_hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index f29b5b0bb0dc..4e002e650be1 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2741,7 +2741,7 @@ static struct platform_driver dummy_hcd_driver = { }; /*-------------------------------------------------------------------------*/ -#define MAX_NUM_UDC 2 +#define MAX_NUM_UDC 32 static struct platform_device *the_udc_pdev[MAX_NUM_UDC]; static struct platform_device *the_hcd_pdev[MAX_NUM_UDC]; From 6bdf711ca1ab2423f04d6372e6061595f738144f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 21 Oct 2019 16:20:59 +0200 Subject: [PATCH 2144/3715] UPSTREAM: USB: dummy-hcd: use usb_urb_dir_in instead of usb_pipein (Upstream commit 6dabeb891c001c592645df2f477fed9f5d959987.) Commit fea3409112a9 ("USB: add direction bit to urb->transfer_flags") has added a usb_urb_dir_in() helper function that can be used to determine the direction of the URB. With that patch USB_DIR_IN control requests with wLength == 0 are considered out requests by real USB HCDs. This patch changes dummy-hcd to use the usb_urb_dir_in() helper to match that behavior. Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/4ae9e68ebca02f08a93ac61fe065057c9a01f0a8.1571667489.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: Ia6397c15219ec53c753ce0e843603628968fc906 --- drivers/usb/gadget/udc/dummy_hcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 4e002e650be1..8ee76524a0b7 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1335,7 +1335,7 @@ static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req, u32 this_sg; bool next_sg; - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); rbuf = req->req.buf + req->req.actual; if (!urb->num_sgs) { @@ -1423,7 +1423,7 @@ top: /* FIXME update emulated data toggle too */ - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); if (unlikely(len == 0)) is_short = 1; else { @@ -1844,7 +1844,7 @@ restart: /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); - if (usb_pipein(urb->pipe)) + if (usb_urb_dir_in(urb)) address |= USB_DIR_IN; ep = find_endpoint(dum, address); if (!ep) { @@ -2397,7 +2397,7 @@ static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb) s = "?"; break; } s; }), - ep, ep ? (usb_pipein(urb->pipe) ? "in" : "out") : "", + ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "", ({ char *s; \ switch (usb_pipetype(urb->pipe)) { \ case PIPE_CONTROL: \ From 65c0bbac0a29de4356e814a0fed838cb4484364c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 21 Oct 2019 16:20:59 +0200 Subject: [PATCH 2145/3715] USB: dummy-hcd: use usb_urb_dir_in instead of usb_pipein commit 6dabeb891c001c592645df2f477fed9f5d959987 upstream. Commit fea3409112a9 ("USB: add direction bit to urb->transfer_flags") has added a usb_urb_dir_in() helper function that can be used to determine the direction of the URB. With that patch USB_DIR_IN control requests with wLength == 0 are considered out requests by real USB HCDs. This patch changes dummy-hcd to use the usb_urb_dir_in() helper to match that behavior. Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/4ae9e68ebca02f08a93ac61fe065057c9a01f0a8.1571667489.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 7e90f786d923..8218a5845ed0 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1325,7 +1325,7 @@ static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req, u32 this_sg; bool next_sg; - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); rbuf = req->req.buf + req->req.actual; if (!urb->num_sgs) { @@ -1413,7 +1413,7 @@ top: /* FIXME update emulated data toggle too */ - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); if (unlikely(len == 0)) is_short = 1; else { @@ -1837,7 +1837,7 @@ restart: /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); - if (usb_pipein(urb->pipe)) + if (usb_urb_dir_in(urb)) address |= USB_DIR_IN; ep = find_endpoint(dum, address); if (!ep) { @@ -2390,7 +2390,7 @@ static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb) s = "?"; break; } s; }), - ep, ep ? (usb_pipein(urb->pipe) ? "in" : "out") : "", + ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "", ({ char *s; \ switch (usb_pipetype(urb->pipe)) { \ case PIPE_CONTROL: \ From f0e24d683636a9193a2adcc928b78ba74bd46f1a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 21 Oct 2019 16:20:58 +0200 Subject: [PATCH 2146/3715] USB: dummy-hcd: increase max number of devices to 32 commit 8442b02bf3c6770e0d7e7ea17be36c30e95987b6 upstream. When fuzzing the USB subsystem with syzkaller, we currently use 8 testing processes within one VM. To isolate testing processes from one another it is desirable to assign a dedicated USB bus to each of those, which means we need at least 8 Dummy UDC/HCD devices. This patch increases the maximum number of Dummy UDC/HCD devices to 32 (more than 8 in case we need more of them in the future). Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/665578f904484069bb6100fb20283b22a046ad9b.1571667489.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 8218a5845ed0..a0c1d77a7e38 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2734,7 +2734,7 @@ static struct platform_driver dummy_hcd_driver = { }; /*-------------------------------------------------------------------------*/ -#define MAX_NUM_UDC 2 +#define MAX_NUM_UDC 32 static struct platform_device *the_udc_pdev[MAX_NUM_UDC]; static struct platform_device *the_hcd_pdev[MAX_NUM_UDC]; From 09226e5c38639437565af01e6009a9286a351d04 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 20 Nov 2019 16:57:15 +0100 Subject: [PATCH 2147/3715] locking/spinlock/debug: Fix various data races [ Upstream commit 1a365e822372ba24c9da0822bc583894f6f3d821 ] This fixes various data races in spinlock_debug. By testing with KCSAN, it is observable that the console gets spammed with data races reports, suggesting these are extremely frequent. Example data race report: read to 0xffff8ab24f403c48 of 4 bytes by task 221 on cpu 2: debug_spin_lock_before kernel/locking/spinlock_debug.c:85 [inline] do_raw_spin_lock+0x9b/0x210 kernel/locking/spinlock_debug.c:112 __raw_spin_lock include/linux/spinlock_api_smp.h:143 [inline] _raw_spin_lock+0x39/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:338 [inline] get_partial_node.isra.0.part.0+0x32/0x2f0 mm/slub.c:1873 get_partial_node mm/slub.c:1870 [inline] write to 0xffff8ab24f403c48 of 4 bytes by task 167 on cpu 3: debug_spin_unlock kernel/locking/spinlock_debug.c:103 [inline] do_raw_spin_unlock+0xc9/0x1a0 kernel/locking/spinlock_debug.c:138 __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:159 [inline] _raw_spin_unlock_irqrestore+0x2d/0x50 kernel/locking/spinlock.c:191 spin_unlock_irqrestore include/linux/spinlock.h:393 [inline] free_debug_processing+0x1b3/0x210 mm/slub.c:1214 __slab_free+0x292/0x400 mm/slub.c:2864 As a side-effect, with KCSAN, this eventually locks up the console, most likely due to deadlock, e.g. .. -> printk lock -> spinlock_debug -> KCSAN detects data race -> kcsan_print_report() -> printk lock -> deadlock. This fix will 1) avoid the data races, and 2) allow using lock debugging together with KCSAN. Reported-by: Qian Cai Signed-off-by: Marco Elver Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/20191120155715.28089-1-elver@google.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/locking/spinlock_debug.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 9aa0fccd5d43..03595c29c566 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init); static void spin_dump(raw_spinlock_t *lock, const char *msg) { - struct task_struct *owner = NULL; + struct task_struct *owner = READ_ONCE(lock->owner); - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; + if (owner == SPINLOCK_OWNER_INIT) + owner = NULL; printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", msg, raw_smp_processor_id(), current->comm, task_pid_nr(current)); printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " ".owner_cpu: %d\n", - lock, lock->magic, + lock, READ_ONCE(lock->magic), owner ? owner->comm : "", owner ? task_pid_nr(owner) : -1, - lock->owner_cpu); + READ_ONCE(lock->owner_cpu)); dump_stack(); } @@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg) static inline void debug_spin_lock_before(raw_spinlock_t *lock) { - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner == current, lock, "recursion"); - SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion"); + SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(), lock, "cpu recursion"); } static inline void debug_spin_lock_after(raw_spinlock_t *lock) { - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_spin_unlock(raw_spinlock_t *lock) @@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); } /* @@ -183,8 +183,8 @@ static inline void debug_write_lock_before(rwlock_t *lock) static inline void debug_write_lock_after(rwlock_t *lock) { - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_write_unlock(rwlock_t *lock) @@ -193,8 +193,8 @@ static inline void debug_write_unlock(rwlock_t *lock) RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); } void do_raw_write_lock(rwlock_t *lock) From d74adafded9ceb57fca7012ae1e24d512dd8dcb6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 15 Nov 2019 12:39:23 +0100 Subject: [PATCH 2148/3715] netfilter: ctnetlink: netns exit must wait for callbacks [ Upstream commit 18a110b022a5c02e7dc9f6109d0bd93e58ac6ebb ] Curtis Taylor and Jon Maxwell reported and debugged a crash on 3.10 based kernel. Crash occurs in ctnetlink_conntrack_events because net->nfnl socket is NULL. The nfnl socket was set to NULL by netns destruction running on another cpu. The exiting network namespace calls the relevant destructors in the following order: 1. ctnetlink_net_exit_batch This nulls out the event callback pointer in struct netns. 2. nfnetlink_net_exit_batch This nulls net->nfnl socket and frees it. 3. nf_conntrack_cleanup_net_list This removes all remaining conntrack entries. This is order is correct. The only explanation for the crash so ar is: cpu1: conntrack is dying, eviction occurs: -> nf_ct_delete() -> nf_conntrack_event_report \ -> nf_conntrack_eventmask_report -> notify->fcn() (== ctnetlink_conntrack_events). cpu1: a. fetches rcu protected pointer to obtain ctnetlink event callback. b. gets interrupted. cpu2: runs netns exit handlers: a runs ctnetlink destructor, event cb pointer set to NULL. b runs nfnetlink destructor, nfnl socket is closed and set to NULL. cpu1: c. resumes and trips over NULL net->nfnl. Problem appears to be that ctnetlink_net_exit_batch only prevents future callers of nf_conntrack_eventmask_report() from obtaining the callback. It doesn't wait of other cpus that might have already obtained the callbacks address. I don't see anything in upstream kernels that would prevent similar crash: We need to wait for all cpus to have exited the event callback. Fixes: 9592a5c01e79dbc59eb56fa ("netfilter: ctnetlink: netns support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c781c9a1a697..39a32edaa92c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3422,6 +3422,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) list_for_each_entry(net, net_exit_list, exit_list) ctnetlink_net_exit(net); + + /* wait for other cpus until they are done with ctnl_notifiers */ + synchronize_rcu(); } static struct pernet_operations ctnetlink_net_ops = { From ec3bb975c6013aa2f5e8a96a0bee2c8d39618e89 Mon Sep 17 00:00:00 2001 From: qize wang Date: Fri, 29 Nov 2019 18:10:54 +0800 Subject: [PATCH 2149/3715] mwifiex: Fix heap overflow in mmwifiex_process_tdls_action_frame() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1e58252e334dc3f3756f424a157d1b7484464c40 ] mwifiex_process_tdls_action_frame() without checking the incoming tdls infomation element's vality before use it, this may cause multi heap buffer overflows. Fix them by putting vality check before use it. IE is TLV struct, but ht_cap and ht_oper aren’t TLV struct. the origin marvell driver code is wrong: memcpy(&sta_ptr->tdls_cap.ht_oper, pos,.... memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos,... Fix the bug by changing pos(the address of IE) to pos+2 ( the address of IE value ). Signed-off-by: qize wang Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/tdls.c | 70 +++++++++++++++++++-- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index e76af2866a19..b5340af9fa5e 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -956,59 +956,117 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, switch (*pos) { case WLAN_EID_SUPP_RATES: + if (pos[1] > 32) + return; sta_ptr->tdls_cap.rates_len = pos[1]; for (i = 0; i < pos[1]; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: + if (pos[1] > 32) + return; basic = sta_ptr->tdls_cap.rates_len; + if (pos[1] > 32 - basic) + return; for (i = 0; i < pos[1]; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; sta_ptr->tdls_cap.rates_len += pos[1]; break; case WLAN_EID_HT_CAPABILITY: - memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos, + if (pos > end - sizeof(struct ieee80211_ht_cap) - 2) + return; + if (pos[1] != sizeof(struct ieee80211_ht_cap)) + return; + /* copy the ie's value into ht_capb*/ + memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, sizeof(struct ieee80211_ht_cap)); sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - memcpy(&sta_ptr->tdls_cap.ht_oper, pos, + if (pos > end - + sizeof(struct ieee80211_ht_operation) - 2) + return; + if (pos[1] != sizeof(struct ieee80211_ht_operation)) + return; + /* copy the ie's value into ht_oper*/ + memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: + if (pos > end - 3) + return; + if (pos[1] != 1) + return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: + if (pos > end - sizeof(struct ieee_types_header)) + return; + if (pos[1] < sizeof(struct ieee_types_header)) + return; + if (pos[1] > 8) + return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + min_t(u8, pos[1], 8)); break; case WLAN_EID_RSN: + if (pos > end - sizeof(struct ieee_types_header)) + return; + if (pos[1] < sizeof(struct ieee_types_header)) + return; + if (pos[1] > IEEE_MAX_IE_SIZE - + sizeof(struct ieee_types_header)) + return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + min_t(u8, pos[1], IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: + if (pos > end - 3) + return; + if (pos[1] != 1) + return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: - if (priv->adapter->is_hw_11ac_capable) - memcpy(&sta_ptr->tdls_cap.vhtoper, pos, + if (priv->adapter->is_hw_11ac_capable) { + if (pos > end - + sizeof(struct ieee80211_vht_operation) - 2) + return; + if (pos[1] != + sizeof(struct ieee80211_vht_operation)) + return; + /* copy the ie's value into vhtoper*/ + memcpy(&sta_ptr->tdls_cap.vhtoper, pos + 2, sizeof(struct ieee80211_vht_operation)); + } break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos, + if (pos > end - + sizeof(struct ieee80211_vht_cap) - 2) + return; + if (pos[1] != sizeof(struct ieee80211_vht_cap)) + return; + /* copy the ie's value into vhtcap*/ + memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, sizeof(struct ieee80211_vht_cap)); sta_ptr->is_11ac_enabled = 1; } break; case WLAN_EID_AID: - if (priv->adapter->is_hw_11ac_capable) + if (priv->adapter->is_hw_11ac_capable) { + if (pos > end - 4) + return; + if (pos[1] != 2) + return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); + } + break; default: break; } From 9606b11726ecfb9724baf695cfa49704d370b2fb Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 15 Nov 2019 11:36:09 +0000 Subject: [PATCH 2150/3715] libtraceevent: Fix lib installation with O= [ Upstream commit 587db8ebdac2c5eb3a8851e16b26f2e2711ab797 ] When we use 'O=' with make to build libtraceevent in a separate folder it fails to install libtraceevent.a and libtraceevent.so.1.1.0 with the error: INSTALL /home/sudip/linux/obj-trace/libtraceevent.a INSTALL /home/sudip/linux/obj-trace/libtraceevent.so.1.1.0 cp: cannot stat 'libtraceevent.a': No such file or directory Makefile:225: recipe for target 'install_lib' failed make: *** [install_lib] Error 1 I used the command: make O=../../../obj-trace DESTDIR=~/test prefix==/usr install It turns out libtraceevent Makefile, even though it builds in a separate folder, searches for libtraceevent.a and libtraceevent.so.1.1.0 in its source folder. So, add the 'OUTPUT' prefix to the source path so that 'make' looks for the files in the correct place. Signed-off-by: Sudipm Mukherjee Reviewed-by: Steven Rostedt (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20191115113610.21493-1-sudipm.mukherjee@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/traceevent/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 8107f060fa84..a0ac01c647f5 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -115,6 +115,7 @@ EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) LIB_INSTALL = libtraceevent.a libtraceevent.so* +LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) From c2f48252f119d8363475cca0544246e566fed31d Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 4 Dec 2019 15:52:33 +0800 Subject: [PATCH 2151/3715] x86/efi: Update e820 with reserved EFI boot services data to fix kexec breakage [ Upstream commit af164898482817a1d487964b68f3c21bae7a1beb ] Michael Weiser reported that he got this error during a kexec rebooting: esrt: Unsupported ESRT version 2904149718861218184. The ESRT memory stays in EFI boot services data, and it was reserved in kernel via efi_mem_reserve(). The initial purpose of the reservation is to reuse the EFI boot services data across kexec reboot. For example the BGRT image data and some ESRT memory like Michael reported. But although the memory is reserved it is not updated in the X86 E820 table, and kexec_file_load() iterates system RAM in the IO resource list to find places for kernel, initramfs and other stuff. In Michael's case the kexec loaded initramfs overwrote the ESRT memory and then the failure happened. Since kexec_file_load() depends on the E820 table being updated, just fix this by updating the reserved EFI boot services memory as reserved type in E820. Originally any memory descriptors with EFI_MEMORY_RUNTIME attribute are bypassed in the reservation code path because they are assumed as reserved. But the reservation is still needed for multiple kexec reboots, and it is the only possible case we come here thus just drop the code chunk, then everything works without side effects. On my machine the ESRT memory sits in an EFI runtime data range, it does not trigger the problem, but I successfully tested with BGRT instead. both kexec_load() and kexec_file_load() work and kdump works as well. [ mingo: Edited the changelog. ] Reported-by: Michael Weiser Tested-by: Michael Weiser Signed-off-by: Dave Young Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Eric W. Biederman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kexec@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191204075233.GA10520@dhcp-128-65.nay.redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/quirks.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 5b513ccffde4..cadd7fd290fa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -257,10 +257,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - /* No need to reserve regions that will never be freed. */ - if (md.attribute & EFI_MEMORY_RUNTIME) - return; - size += addr % EFI_PAGE_SIZE; size = round_up(size, EFI_PAGE_SIZE); addr = round_down(addr, EFI_PAGE_SIZE); @@ -290,6 +286,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) early_memunmap(new, new_size); efi_memmap_install(new_phys, num_entries); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); } /* From 57bf13ee5910b446d45426f8d2ac136d2c19b39d Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 6 Dec 2019 16:55:38 +0000 Subject: [PATCH 2152/3715] efi/gop: Return EFI_NOT_FOUND if there are no usable GOPs [ Upstream commit 6fc3cec30dfeee7d3c5db8154016aff9d65503c5 ] If we don't find a usable instance of the Graphics Output Protocol (GOP) because none of them have a framebuffer (i.e. they were all PIXEL_BLT_ONLY), but all the EFI calls succeeded, we will return EFI_SUCCESS even though we didn't find a usable GOP. Fix this by explicitly returning EFI_NOT_FOUND if no usable GOPs are found, allowing the caller to probe for UGA instead. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Masayoshi Mizuma Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191206165542.31469-3-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/gop.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 24c461dea7af..16ed61c023e8 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -121,7 +121,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status = EFI_NOT_FOUND; + efi_status_t status; u32 *handles = (u32 *)(unsigned long)gop_handle; int i; @@ -177,7 +177,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, /* Did we find any GOPs? */ if (!first_gop) - goto out; + return EFI_NOT_FOUND; /* EFI framebuffer */ si->orig_video_isVGA = VIDEO_TYPE_EFI; @@ -199,7 +199,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, si->lfb_size = si->lfb_linelength * si->lfb_height; si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: + return status; } @@ -239,7 +239,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status = EFI_NOT_FOUND; + efi_status_t status; u64 *handles = (u64 *)(unsigned long)gop_handle; int i; @@ -295,7 +295,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, /* Did we find any GOPs? */ if (!first_gop) - goto out; + return EFI_NOT_FOUND; /* EFI framebuffer */ si->orig_video_isVGA = VIDEO_TYPE_EFI; @@ -317,7 +317,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, si->lfb_size = si->lfb_linelength * si->lfb_height; si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: + return status; } From 49277422c995e0d960381a6f67ebda03b548492e Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 6 Dec 2019 16:55:39 +0000 Subject: [PATCH 2153/3715] efi/gop: Return EFI_SUCCESS if a usable GOP was found [ Upstream commit dbd89c303b4420f6cdb689fd398349fc83b059dd ] If we've found a usable instance of the Graphics Output Protocol (GOP) with a framebuffer, it is possible that one of the later EFI calls fails while checking if any support console output. In this case status may be an EFI error code even though we found a usable GOP. Fix this by explicitly return EFI_SUCCESS if a usable GOP has been located. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Masayoshi Mizuma Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191206165542.31469-4-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/gop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 16ed61c023e8..81ffda5d1e48 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -200,7 +200,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; - return status; + return EFI_SUCCESS; } static efi_status_t @@ -318,7 +318,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; - return status; + return EFI_SUCCESS; } /* From 3b631b675b39d084ef4066a5642597a585357f95 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 6 Dec 2019 16:55:40 +0000 Subject: [PATCH 2154/3715] efi/gop: Fix memory leak in __gop_query32/64() [ Upstream commit ff397be685e410a59c34b21ce0c55d4daa466bb7 ] efi_graphics_output_protocol::query_mode() returns info in callee-allocated memory which must be freed by the caller, which we aren't doing. We don't actually need to call query_mode() in order to obtain the info for the current graphics mode, which is already there in gop->mode->info, so just access it directly in the setup_gop32/64() functions. Also nothing uses the size of the info structure, so don't update the passed-in size (which is the size of the gop_handle table in bytes) unnecessarily. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Masayoshi Mizuma Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191206165542.31469-5-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/gop.c | 66 ++++++------------------------ 1 file changed, 12 insertions(+), 54 deletions(-) diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 81ffda5d1e48..fd8053f9556e 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -85,30 +85,6 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, } } -static efi_status_t -__gop_query32(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_32 *gop32, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_32 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop32->mode; - mode = (struct efi_graphics_output_protocol_mode_32 *)m; - query_mode = (void *)(unsigned long)gop32->query_mode; - - status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - static efi_status_t setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **gop_handle) @@ -130,6 +106,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u32); for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_protocol_mode_32 *mode; struct efi_graphics_output_mode_info *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; @@ -147,9 +124,11 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(sys_table_arg, gop32, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found) && + mode = (void *)(unsigned long)gop32->mode; + info = (void *)(unsigned long)mode->info; + current_fb_base = mode->frame_buffer_base; + + if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may @@ -203,30 +182,6 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, return EFI_SUCCESS; } -static efi_status_t -__gop_query64(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_64 *gop64, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_64 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop64->mode; - mode = (struct efi_graphics_output_protocol_mode_64 *)m; - query_mode = (void *)(unsigned long)gop64->query_mode; - - status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - static efi_status_t setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **gop_handle) @@ -248,6 +203,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u64); for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_protocol_mode_64 *mode; struct efi_graphics_output_mode_info *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; @@ -265,9 +221,11 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(sys_table_arg, gop64, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found) && + mode = (void *)(unsigned long)gop64->mode; + info = (void *)(unsigned long)mode->info; + current_fb_base = mode->frame_buffer_base; + + if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may From 210670f32876544b6cb7613dc4d1c7b63dec03d0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 27 Nov 2019 15:56:40 +0000 Subject: [PATCH 2155/3715] ARM: vexpress: Set-up shared OPP table instead of individual for each CPU [ Upstream commit 2a76352ad2cc6b78e58f737714879cc860903802 ] Currently we add individual copy of same OPP table for each CPU within the cluster. This is redundant and doesn't reflect the reality. We can't use core cpumask to set policy->cpus in ve_spc_cpufreq_init() anymore as it gets called via cpuhp_cpufreq_online()->cpufreq_online() ->cpufreq_driver->init() and the cpumask gets updated upon CPU hotplug operations. It also may cause issues when the vexpress_spc_cpufreq driver is built as a module. Since ve_spc_clk_init is built-in device initcall, we should be able to use the same topology_core_cpumask to set the opp sharing cpumask via dev_pm_opp_set_sharing_cpus and use the same later in the driver via dev_pm_opp_get_sharing_cpus. Cc: Liviu Dudau Cc: Lorenzo Pieralisi Acked-by: Viresh Kumar Tested-by: Dietmar Eggemann Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- arch/arm/mach-vexpress/spc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index fe488523694c..635b0d549487 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -555,8 +555,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -582,8 +583,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); From 38061de32f9c9f943e5fe1685e8473ccad2d5098 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 5 Dec 2019 13:35:11 +0100 Subject: [PATCH 2156/3715] netfilter: uapi: Avoid undefined left-shift in xt_sctp.h [ Upstream commit 164166558aacea01b99c8c8ffb710d930405ba69 ] With 'bytes(__u32)' being 32, a left-shift of 31 may happen which is undefined for the signed 32-bit value 1. Avoid this by declaring 1 as unsigned. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/uapi/linux/netfilter/xt_sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h index 4bc6d1a08781..b4d804a9fccb 100644 --- a/include/uapi/linux/netfilter/xt_sctp.h +++ b/include/uapi/linux/netfilter/xt_sctp.h @@ -41,19 +41,19 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_SET(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] |= \ - 1 << (type % bytes(__u32)); \ + 1u << (type % bytes(__u32)); \ } while (0) #define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] &= \ - ~(1 << (type % bytes(__u32))); \ + ~(1u << (type % bytes(__u32))); \ } while (0) #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ ({ \ ((chunkmap)[type / bytes (__u32)] & \ - (1 << (type % bytes (__u32)))) ? 1: 0; \ + (1u << (type % bytes (__u32)))) ? 1: 0; \ }) #define SCTP_CHUNKMAP_RESET(chunkmap) \ From 4798a83039ce8dd2afbe7d9395796593991d1363 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 6 Dec 2019 21:55:20 +0100 Subject: [PATCH 2157/3715] netfilter: nf_tables: validate NFT_SET_ELEM_INTERVAL_END [ Upstream commit bffc124b6fe37d0ae9b428d104efb426403bb5c9 ] Only NFTA_SET_ELEM_KEY and NFTA_SET_ELEM_FLAGS make sense for elements whose NFT_SET_ELEM_INTERVAL_END flag is set on. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7ef126489d4e..91490446ebb4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3917,14 +3917,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; - if (nla[NFTA_SET_ELEM_DATA] != NULL && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && + (nla[NFTA_SET_ELEM_DATA] || + nla[NFTA_SET_ELEM_OBJREF] || + nla[NFTA_SET_ELEM_TIMEOUT] || + nla[NFTA_SET_ELEM_EXPIRATION] || + nla[NFTA_SET_ELEM_USERDATA] || + nla[NFTA_SET_ELEM_EXPR])) + return -EINVAL; + timeout = 0; if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) From 84b33e7a2364bda397e9953782e7a6de8468aa90 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 6 Dec 2019 10:19:09 -0800 Subject: [PATCH 2158/3715] ARM: dts: Cygnus: Fix MDIO node address/size cells [ Upstream commit fac2c2da3596d77c343988bb0d41a8c533b2e73c ] The MDIO node on Cygnus had an reversed #address-cells and #size-cells properties, correct those. Fixes: 40c26d3af60a ("ARM: dts: Cygnus: Add the ethernet switch and ethernet PHY") Reported-by: Simon Horman Reviewed-by: Ray Jui Reviewed-by: Simon Horman Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm-cygnus.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 8b2c65cd61a2..b822952c29f8 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -165,8 +165,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { From 0c2cabe56e1186310af9b1bb25f6ad4a429052ce Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 6 Dec 2019 15:55:00 +0800 Subject: [PATCH 2159/3715] spi: spi-cavium-thunderx: Add missing pci_release_regions() [ Upstream commit a841e2853e1afecc2ee692b8cc5bff606bc84e4c ] The driver forgets to call pci_release_regions() in probe failure and remove. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Link: https://lore.kernel.org/r/20191206075500.18525-1-hslester96@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-cavium-thunderx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c index 877937706240..828fbbebc3c4 100644 --- a/drivers/spi/spi-cavium-thunderx.c +++ b/drivers/spi/spi-cavium-thunderx.c @@ -81,6 +81,7 @@ static int thunderx_spi_probe(struct pci_dev *pdev, error: clk_disable_unprepare(p->clk); + pci_release_regions(pdev); spi_master_put(master); return ret; } @@ -95,6 +96,7 @@ static void thunderx_spi_remove(struct pci_dev *pdev) return; clk_disable_unprepare(p->clk); + pci_release_regions(pdev); /* Put everything in a known state. */ writeq(0, p->register_base + OCTEON_SPI_CFG(p)); } From 31764a701731a018ecbc2c62f950828b377c7b48 Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Mon, 9 Dec 2019 18:39:39 -0600 Subject: [PATCH 2160/3715] ASoC: topology: Check return value for soc_tplg_pcm_create() [ Upstream commit a3039aef52d9ffeb67e9211899cd3e8a2953a01f ] The return value of soc_tplg_pcm_create() is currently not checked in soc_tplg_pcm_elems_load(). If an error is to occur there, the topology ignores it and continues loading. Fix that by checking the status and rejecting the topology on error. Reviewed-by: Ranjani Sridharan Signed-off-by: Dragos Tarcatu Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191210003939.15752-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-topology.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2d5cf263515b..72301bcad3bd 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1921,6 +1921,7 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, int count = hdr->count; int i; bool abi_match; + int ret; if (tplg->pass != SOC_TPLG_PASS_PCM_DAI) return 0; @@ -1957,7 +1958,12 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, } /* create the FE DAIs and DAI links */ - soc_tplg_pcm_create(tplg, _pcm); + ret = soc_tplg_pcm_create(tplg, _pcm); + if (ret < 0) { + if (!abi_match) + kfree(_pcm); + return ret; + } /* offset by version-specific struct size and * real priv data size From d4aea1529970f3d50217913824fb93d10b98f0e2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 30 Nov 2019 13:31:13 +0100 Subject: [PATCH 2161/3715] ARM: dts: bcm283x: Fix critical trip point [ Upstream commit 30e647a764d446723a7e0fb08d209e0104f16173 ] During definition of the CPU thermal zone of BCM283x SoC family there was a misunderstanding of the meaning "criticial trip point" and the thermal throttling range of the VideoCore firmware. The latter one takes effect when the core temperature is at least 85 degree celsius or higher So the current critical trip point doesn't make sense, because the thermal shutdown appears before the firmware has a chance to throttle the ARM core(s). Fix these unwanted shutdowns by increasing the critical trip point to a value which shouldn't be reached with working thermal throttling. Fixes: 0fe4d2181cc4 ("ARM: dts: bcm283x: Add CPU thermal zone with 1 trip point") Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm283x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 4745e3c7806b..fdb018e1278f 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -38,7 +38,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; From ec41394252a9c4f8d7f0224f16de2b6a29704e54 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 9 Dec 2019 19:52:52 +0100 Subject: [PATCH 2162/3715] bpf, mips: Limit to 33 tail calls [ Upstream commit e49e6f6db04e915dccb494ae10fa14888fea6f89 ] All BPF JIT compilers except RISC-V's and MIPS' enforce a 33-tail calls limit at runtime. In addition, a test was recently added, in tailcalls2, to check this limit. This patch updates the tail call limit in MIPS' JIT compiler to allow 33 tail calls. Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.") Reported-by: Mahshid Khezri Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/b8eb2caac1c25453c539248e56ca22f74b5316af.1575916815.git.paul.chaignon@gmail.com Signed-off-by: Sasha Levin --- arch/mips/net/ebpf_jit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 42faa95ce664..57a7a9d68475 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -612,6 +612,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -624,14 +625,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) From 328133b1c1ee44e52f583482a191c152fb693fba Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 14 Nov 2019 11:39:48 +0200 Subject: [PATCH 2163/3715] ARM: dts: am437x-gp/epos-evm: fix panel compatible [ Upstream commit c6b16761c6908d3dc167a0a566578b4b0b972905 ] The LCD panel on AM4 GP EVMs and ePOS boards seems to be osd070t1718-19ts. The current dts files say osd057T0559-34ts. Possibly the panel has changed since the early EVMs, or there has been a mistake with the panel type. Update the DT files accordingly. Acked-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am437x-gp-evm.dts | 2 +- arch/arm/boot/dts/am43x-epos-evm.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index afb8eb0a0a16..051823b7e5a1 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -83,7 +83,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; panel-timing { diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 081fa68b6f98..c4279b0b9f12 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -45,7 +45,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; panel-timing { From b50ba34bdba823b4bd26bd6c0ca940f3e43d93e9 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Thu, 5 Dec 2019 17:01:13 +0900 Subject: [PATCH 2164/3715] samples: bpf: Replace symbol compare of trace_event [ Upstream commit bba1b2a890253528c45aa66cf856f289a215bfbc ] Previously, when this sample is added, commit 1c47910ef8013 ("samples/bpf: add perf_event+bpf example"), a symbol 'sys_read' and 'sys_write' has been used without no prefixes. But currently there are no exact symbols with these under kallsyms and this leads to failure. This commit changes exact compare to substring compare to keep compatible with exact symbol or prefixed symbol. Fixes: 1c47910ef8013 ("samples/bpf: add perf_event+bpf example") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191205080114.19766-2-danieltimlee@gmail.com Signed-off-by: Sasha Levin --- samples/bpf/trace_event_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index c7d525e5696e..8c7445874662 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -34,9 +34,9 @@ static void print_ksym(__u64 addr) return; sym = ksym_search(addr); printf("%s;", sym->name); - if (!strcmp(sym->name, "sys_read")) + if (!strstr(sym->name, "sys_read")) sys_read_seen = true; - else if (!strcmp(sym->name, "sys_write")) + else if (!strstr(sym->name, "sys_write")) sys_write_seen = true; } From f0822e783ef59454cefdcd8e32468fe92386fa94 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Thu, 5 Dec 2019 17:01:14 +0900 Subject: [PATCH 2165/3715] samples: bpf: fix syscall_tp due to unused syscall [ Upstream commit fe3300897cbfd76c6cb825776e5ac0ca50a91ca4 ] Currently, open() is called from the user program and it calls the syscall 'sys_openat', not the 'sys_open'. This leads to an error of the program of user side, due to the fact that the counter maps are zero since no function such 'sys_open' is called. This commit adds the kernel bpf program which are attached to the tracepoint 'sys_enter_openat' and 'sys_enter_openat'. Fixes: 1da236b6be963 ("bpf: add a test case for syscalls/sys_{enter|exit}_* tracepoints") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- samples/bpf/syscall_tp_kern.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 9149c524d279..8833aacb9c8c 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -50,13 +50,27 @@ static __always_inline void count(void *map) SEC("tracepoint/syscalls/sys_enter_open") int trace_enter_open(struct syscalls_enter_open_args *ctx) { - count((void *)&enter_open_map); + count(&enter_open_map); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_openat") +int trace_enter_open_at(struct syscalls_enter_open_args *ctx) +{ + count(&enter_open_map); return 0; } SEC("tracepoint/syscalls/sys_exit_open") int trace_enter_exit(struct syscalls_exit_open_args *ctx) { - count((void *)&exit_open_map); + count(&exit_open_map); + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_openat") +int trace_enter_exit_at(struct syscalls_exit_open_args *ctx) +{ + count(&exit_open_map); return 0; } From bea1fe7e2f8edb86e66a56e2ebb74bc9bf3c83bd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 14:35:24 +0200 Subject: [PATCH 2166/3715] powerpc: Ensure that swiotlb buffer is allocated from low memory [ Upstream commit 8fabc623238e68b3ac63c0dd1657bf86c1fa33af ] Some powerpc platforms (e.g. 85xx) limit DMA-able memory way below 4G. If a system has more physical memory than this limit, the swiotlb buffer is not addressable because it is allocated from memblock using top-down mode. Force memblock to bottom-up mode before calling swiotlb_init() to ensure that the swiotlb buffer is DMA-able. Reported-by: Christian Zigotzky Signed-off-by: Mike Rapoport Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191204123524.22919-1-rppt@kernel.org Signed-off-by: Sasha Levin --- arch/powerpc/mm/mem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 30bf13b72e5e..3c5abfbbe60e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -353,6 +353,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif From 58ee0e0b74e7e4d8dadc3597d3d721a9cd252a88 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 11 Dec 2019 09:59:55 -0800 Subject: [PATCH 2167/3715] bnx2x: Do not handle requests from VFs after parity [ Upstream commit 7113f796bbbced2470cd6d7379d50d7a7a78bf34 ] Parity error from the hardware will cause PF to lose the state of their VFs due to PF's internal reload and hardware reset following the parity error. Restrict any configuration request from the VFs after the parity as it could cause unexpected hardware behavior, only way for VFs to recover would be to trigger FLR on VFs and reload them. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 12 ++++++++++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 12 ++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dbe8feec456c..b0ada7eac652 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9995,10 +9995,18 @@ static void bnx2x_recovery_failed(struct bnx2x *bp) */ static void bnx2x_parity_recover(struct bnx2x *bp) { - bool global = false; u32 error_recovered, error_unrecovered; - bool is_parity; + bool is_parity, global = false; +#ifdef CONFIG_BNX2X_SRIOV + int vf_idx; + for (vf_idx = 0; vf_idx < bp->requested_nr_virtfn; vf_idx++) { + struct bnx2x_virtf *vf = BP_VF(bp, vf_idx); + + if (vf) + vf->state = VF_LOST; + } +#endif DP(NETIF_MSG_HW, "Handling parity\n"); while (1) { switch (bp->recovery_state) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 53466f6cebab..a887bfa24c88 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -139,6 +139,7 @@ struct bnx2x_virtf { #define VF_ACQUIRED 1 /* VF acquired, but not initialized */ #define VF_ENABLED 2 /* VF Enabled */ #define VF_RESET 3 /* VF FLR'd, pending cleanup */ +#define VF_LOST 4 /* Recovery while VFs are loaded */ bool flr_clnup_stage; /* true during flr cleanup */ bool malicious; /* true if FW indicated so, until FLR */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 76a4668c50fe..6d5b81a971e3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -2112,6 +2112,18 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf, { int i; + if (vf->state == VF_LOST) { + /* Just ack the FW and return if VFs are lost + * in case of parity error. VFs are supposed to be timedout + * on waiting for PF response. + */ + DP(BNX2X_MSG_IOV, + "VF 0x%x lost, not handling the request\n", vf->abs_vfid); + + storm_memset_vf_mbx_ack(bp, vf->abs_vfid); + return; + } + /* check if tlv type is known */ if (bnx2x_tlv_supported(mbx->first_tlv.tl.type)) { /* Lock the per vf op mutex and note the locker's identity. From 9c914b0d4beb0af271049453d1e05ac278ebfabc Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 11 Dec 2019 09:59:56 -0800 Subject: [PATCH 2168/3715] bnx2x: Fix logic to get total no. of PFs per engine [ Upstream commit ee699f89bdbaa19c399804504241b5c531b48888 ] Driver doesn't calculate total number of PFs configured on a given engine correctly which messed up resources in the PFs loaded on that engine, leading driver to exceed configuration of resources (like vlan filters etc.) beyond the limit per engine, which ended up with asserts from the firmware. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 4e091a11daaf..52bce009d096 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -1112,7 +1112,7 @@ static inline u8 bnx2x_get_path_func_num(struct bnx2x *bp) for (i = 0; i < E1H_FUNC_MAX / 2; i++) { u32 func_config = MF_CFG_RD(bp, - func_mf_config[BP_PORT(bp) + 2 * i]. + func_mf_config[BP_PATH(bp) + 2 * i]. config); func_num += ((func_config & FUNC_MF_CFG_FUNC_HIDE) ? 0 : 1); From 4f5cf943699a6331949ce4cb319de3e23b787a04 Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Fri, 13 Dec 2019 18:33:11 +0200 Subject: [PATCH 2169/3715] net: usb: lan78xx: Fix error message format specifier [ Upstream commit 858ce8ca62ea1530f2779d0e3f934b0176e663c3 ] Display the return code as decimal integer. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Cristian Birsan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 7d1d5b30ecc3..0aa6f3a5612d 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -497,7 +497,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev, } } else { netdev_warn(dev->net, - "Failed to read stat ret = 0x%x", ret); + "Failed to read stat ret = %d", ret); } kfree(stats); From c7a6c3d2c372a592c975cda98a479287ebd169d1 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 09:34:08 -0600 Subject: [PATCH 2170/3715] rfkill: Fix incorrect check to avoid NULL pointer dereference [ Upstream commit 6fc232db9e8cd50b9b83534de9cd91ace711b2d7 ] In rfkill_register, the struct rfkill pointer is first derefernced and then checked for NULL. This patch removes the BUG_ON and returns an error to the caller in case rfkill is NULL. Signed-off-by: Aditya Pakki Link: https://lore.kernel.org/r/20191215153409.21696-1-pakki001@umn.edu Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/rfkill/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 99a2e55b01cf..e31b4288f32c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -998,10 +998,13 @@ static void rfkill_sync_work(struct work_struct *work) int __must_check rfkill_register(struct rfkill *rfkill) { static unsigned long rfkill_no; - struct device *dev = &rfkill->dev; + struct device *dev; int error; - BUG_ON(!rfkill); + if (!rfkill) + return -EINVAL; + + dev = &rfkill->dev; mutex_lock(&rfkill_global_mutex); From ebfcc61fb719b08d63ed463d8ddea64570fb55ce Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 11 Dec 2019 19:57:22 +0800 Subject: [PATCH 2171/3715] ASoC: wm8962: fix lambda value [ Upstream commit 556672d75ff486e0b6786056da624131679e0576 ] According to user manual, it is required that FLL_LAMBDA > 0 in all cases (Integer and Franctional modes). Fixes: 9a76f1ff6e29 ("ASoC: Add initial WM8962 CODEC driver") Signed-off-by: Shengjiu Wang Acked-by: Charles Keepax Link: https://lore.kernel.org/r/1576065442-19763-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8962.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index fd2731d171dd..0e8008d38161 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2791,7 +2791,7 @@ static int fll_factors(struct _fll_div *fll_div, unsigned int Fref, if (target % Fref == 0) { fll_div->theta = 0; - fll_div->lambda = 0; + fll_div->lambda = 1; } else { gcd_fll = gcd(target, fratio * Fref); @@ -2861,7 +2861,7 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source, return -EINVAL; } - if (fll_div.theta || fll_div.lambda) + if (fll_div.theta) fll1 |= WM8962_FLL_FRAC; /* Stop the FLL while we reconfigure */ From 2dfde10dda0e0939ae4f29aff02327962f9ef9f3 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Wed, 11 Dec 2019 23:16:00 +0100 Subject: [PATCH 2172/3715] regulator: rn5t618: fix module aliases [ Upstream commit 62a1923cc8fe095912e6213ed5de27abbf1de77e ] platform device aliases were missing, preventing autoloading of module. Fixes: 811b700630ff ("regulator: rn5t618: add driver for Ricoh RN5T618 regulators") Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20191211221600.29438-1-andreas@kemnade.info Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/rn5t618-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c index 790a4a73ea2c..40b74648bd31 100644 --- a/drivers/regulator/rn5t618-regulator.c +++ b/drivers/regulator/rn5t618-regulator.c @@ -154,6 +154,7 @@ static struct platform_driver rn5t618_regulator_driver = { module_platform_driver(rn5t618_regulator_driver); +MODULE_ALIAS("platform:rn5t618-regulator"); MODULE_AUTHOR("Beniamino Galvani "); MODULE_DESCRIPTION("RN5T618 regulator driver"); MODULE_LICENSE("GPL v2"); From 16b9c5c49b4131a63f1e108b23ba2399c241fd87 Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Mon, 9 Dec 2019 00:19:17 -0800 Subject: [PATCH 2173/3715] kconfig: don't crash on NULL expressions in expr_eq() [ Upstream commit 272a72103012862e3a24ea06635253ead0b6e808 ] NULL expressions are taken to always be true, as implemented by the expr_is_yes() macro and by several other functions in expr.c. As such, they ought to be valid inputs to expr_eq(), which compares two expressions. Signed-off-by: Thomas Hebb Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/expr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index ed29bad1f03a..96420b620963 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -201,6 +201,13 @@ static int expr_eq(struct expr *e1, struct expr *e2) { int res, old_count; + /* + * A NULL expr is taken to be yes, but there's also a different way to + * represent yes. expr_is_yes() checks for either representation. + */ + if (!e1 || !e2) + return expr_is_yes(e1) && expr_is_yes(e2); + if (e1->type != e2->type) return 0; switch (e1->type) { From 396bf8fe4f679d8e457867b9ff60ee5fd6d99d42 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 10 Dec 2019 12:51:01 +0200 Subject: [PATCH 2174/3715] perf/x86/intel: Fix PT PMI handling [ Upstream commit 92ca7da4bdc24d63bb0bcd241c11441ddb63b80a ] Commit: ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the core supports it") skips the PT/LBR exclusivity check on CPUs where PT and LBRs coexist, but also inadvertently skips the active_events bump for PT in that case, which is a bug. If there aren't any hardware events at the same time as PT, the PMI handler will ignore PT PMIs, as active_events reads zero in that case, resulting in the "Uhhuh" spurious NMI warning and PT data loss. Fix this by always increasing active_events for PT events. Fixes: ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the core supports it") Reported-by: Vitaly Slobodskoy Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexey Budankov Cc: Jiri Olsa Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Link: https://lkml.kernel.org/r/20191210105101.77210-1-alexander.shishkin@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/events/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6ed99de2ddf5..c1f7b3cb84a9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -375,7 +375,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -387,6 +387,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -397,11 +398,15 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) From 61855d6805ae078ae6bba561e6e76715436d776d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 6 Dec 2019 10:54:23 -0600 Subject: [PATCH 2175/3715] fs: avoid softlockups in s_inodes iterators [ Upstream commit 04646aebd30b99f2cfa0182435a2ec252fcb16d0 ] Anything that walks all inodes on sb->s_inodes list without rescheduling risks softlockups. Previous efforts were made in 2 functions, see: c27d82f fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() ac05fbb inode: don't softlockup when evicting inodes but there hasn't been an audit of all walkers, so do that now. This also consistently moves the cond_resched() calls to the bottom of each loop in cases where it already exists. One loop remains: remove_dquot_ref(), because I'm not quite sure how to deal with that one w/o taking the i_lock. Signed-off-by: Eric Sandeen Reviewed-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/drop_caches.c | 2 +- fs/inode.c | 7 +++++++ fs/notify/fsnotify.c | 1 + fs/quota/dquot.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d31b6c72b476..dc1a1d5d825b 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); - cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); diff --git a/fs/inode.c b/fs/inode.c index 76f7535fe754..d2a700c5efce 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -656,6 +656,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) struct inode *inode, *next; LIST_HEAD(dispose); +again: spin_lock(&sb->s_inode_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); @@ -678,6 +679,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) inode_lru_list_del(inode); spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); + if (need_resched()) { + spin_unlock(&sb->s_inode_list_lock); + cond_resched(); + dispose_list(&dispose); + goto again; + } } spin_unlock(&sb->s_inode_list_lock); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 506da82ff3f1..a308f7a7e577 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -90,6 +90,7 @@ void fsnotify_unmount_inodes(struct super_block *sb) iput_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3fdbdd29702b..30f5da8f4aff 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -976,6 +976,7 @@ static int add_dquot_ref(struct super_block *sb, int type) * later. */ old_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); From aa6bf9433ef76485243428754e723e71642f4a6d Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Dec 2019 11:17:37 +0100 Subject: [PATCH 2176/3715] net: stmmac: Do not accept invalid MTU values [ Upstream commit eaf4fac478077d4ed57cbca2c044c4b58a96bd98 ] The maximum MTU value is determined by the maximum size of TX FIFO so that a full packet can fit in the FIFO. Add a check for this in the MTU change callback. Also check if provided and rounded MTU does not passes the maximum limit of 16K. Changes from v2: - Align MTU before checking if its valid Fixes: 7ac6653a085b ("stmmac: Move the STMicroelectronics driver") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e6d16c48ffef..4ef923f1094a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3597,12 +3597,24 @@ static void stmmac_set_rx_mode(struct net_device *dev) static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); + int txfifosz = priv->plat->tx_fifo_size; + + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + txfifosz /= priv->plat->tx_queues_to_use; if (netif_running(dev)) { netdev_err(priv->dev, "must be stopped to change its MTU\n"); return -EBUSY; } + new_mtu = STMMAC_ALIGN(new_mtu); + + /* If condition true, FIFO is too small or MTU too large */ + if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) + return -EINVAL; + dev->mtu = new_mtu; netdev_update_features(dev); From f1cf84a64527c94c24e009e73bf2c38ce2361596 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Dec 2019 11:17:40 +0100 Subject: [PATCH 2177/3715] net: stmmac: RX buffer size must be 16 byte aligned [ Upstream commit 8d558f0294fe92e04af192e221d0d0f6a180ee7b ] We need to align the RX buffer size to at least 16 byte so that IP doesn't mis-behave. This is required by HW. Changes from v2: - Align UP and not DOWN (David) Fixes: 7ac6653a085b ("stmmac: Move the STMicroelectronics driver") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4ef923f1094a..e89466bd432d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -51,7 +51,7 @@ #include #include "dwmac1000.h" -#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) +#define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) /* Module parameters */ From 29753fc2f8d0b406f2435c53f5e49c959c381ac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Thu, 19 Dec 2019 09:43:50 +0100 Subject: [PATCH 2178/3715] s390/dasd/cio: Interpret ccw_device_get_mdc return value correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dd4b3c83b9efac10d48a94c61372119fc555a077 ] The max data count (mdc) is an unsigned 16-bit integer value as per AR documentation and is received via ccw_device_get_mdc() for a specific path mask from the CIO layer. The function itself also always returns a positive mdc value or 0 in case mdc isn't supported or couldn't be determined. Though, the comment for this function describes a negative return value to indicate failures. As a result, the DASD device driver interprets the return value of ccw_device_get_mdc() incorrectly. The error case is essentially a dead code path. To fix this behaviour, check explicitly for a return value of 0 and change the comment for ccw_device_get_mdc() accordingly. This fix merely enables the error code path in the DASD functions get_fcx_max_data() and verify_fcx_max_data(). The actual functionality stays the same and is still correct. Reviewed-by: Cornelia Huck Signed-off-by: Jan Höppner Acked-by: Peter Oberparleiter Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_eckd.c | 9 +++++---- drivers/s390/cio/device_ops.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0d5e2d92e05b..81359312a987 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1133,7 +1133,8 @@ static u32 get_fcx_max_data(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; int fcx_in_css, fcx_in_gneq, fcx_in_features; - int tpm, mdc; + unsigned int mdc; + int tpm; if (dasd_nofcx) return 0; @@ -1147,7 +1148,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) return 0; mdc = ccw_device_get_mdc(device->cdev, 0); - if (mdc < 0) { + if (mdc == 0) { dev_warn(&device->cdev->dev, "Detecting the maximum supported data size for zHPF requests failed\n"); return 0; } else { @@ -1158,12 +1159,12 @@ static u32 get_fcx_max_data(struct dasd_device *device) static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) { struct dasd_eckd_private *private = device->private; - int mdc; + unsigned int mdc; u32 fcx_max_data; if (private->fcx_max_data) { mdc = ccw_device_get_mdc(device->cdev, lpm); - if ((mdc < 0)) { + if (mdc == 0) { dev_warn(&device->cdev->dev, "Detecting the maximum data size for zHPF " "requests failed (rc=%d) for a new path %x\n", diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index b22922ec32d1..474afec9ab87 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -595,7 +595,7 @@ EXPORT_SYMBOL(ccw_device_tm_start_timeout); * @mask: mask of paths to use * * Return the number of 64K-bytes blocks all paths at least support - * for a transport command. Return values <= 0 indicate failures. + * for a transport command. Return value 0 indicates failure. */ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask) { From 66aa3bf42c2c009a14c3cf0214b95a32c11abc9e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 19 Dec 2019 09:43:51 +0100 Subject: [PATCH 2179/3715] s390/dasd: fix memleak in path handling error case [ Upstream commit 00b39f698a4f1ee897227cace2e3937fc4412270 ] If for whatever reason the dasd_eckd_check_characteristics() function exits after at least some paths have their configuration data allocated those data is never freed again. In the error case the device->private pointer is set to NULL and dasd_eckd_uncheck_device() will exit without freeing the path data because of this NULL pointer. Fix by calling dasd_eckd_clear_conf_data() for error cases. Also use dasd_eckd_clear_conf_data() in dasd_eckd_uncheck_device() to avoid code duplication. Reported-by: Qian Cai Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_eckd.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 81359312a987..aa651403546f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1768,7 +1768,7 @@ out_err2: dasd_free_block(device->block); device->block = NULL; out_err1: - kfree(private->conf_data); + dasd_eckd_clear_conf_data(device); kfree(device->private); device->private = NULL; return rc; @@ -1777,7 +1777,6 @@ out_err1: static void dasd_eckd_uncheck_device(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; - int i; if (!private) return; @@ -1787,21 +1786,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) private->sneq = NULL; private->vdsneq = NULL; private->gneq = NULL; - private->conf_len = 0; - for (i = 0; i < 8; i++) { - kfree(device->path[i].conf_data); - if ((__u8 *)device->path[i].conf_data == - private->conf_data) { - private->conf_data = NULL; - private->conf_len = 0; - } - device->path[i].conf_data = NULL; - device->path[i].cssid = 0; - device->path[i].ssid = 0; - device->path[i].chpid = 0; - } - kfree(private->conf_data); - private->conf_data = NULL; + dasd_eckd_clear_conf_data(device); } static struct dasd_ccw_req * From 62dfe5f55d2ca0b350fa76333fbb8a57b31c864a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 18 Dec 2019 16:44:04 +0800 Subject: [PATCH 2180/3715] block: fix memleak when __blk_rq_map_user_iov() is failed [ Upstream commit 3b7995a98ad76da5597b488fa84aa5a56d43b608 ] When I doing fuzzy test, get the memleak report: BUG: memory leak unreferenced object 0xffff88837af80000 (size 4096): comm "memleak", pid 3557, jiffies 4294817681 (age 112.499s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 20 00 00 00 10 01 00 00 00 00 00 00 01 00 00 00 ............... backtrace: [<000000001c894df8>] bio_alloc_bioset+0x393/0x590 [<000000008b139a3c>] bio_copy_user_iov+0x300/0xcd0 [<00000000a998bd8c>] blk_rq_map_user_iov+0x2f1/0x5f0 [<000000005ceb7f05>] blk_rq_map_user+0xf2/0x160 [<000000006454da92>] sg_common_write.isra.21+0x1094/0x1870 [<00000000064bb208>] sg_write.part.25+0x5d9/0x950 [<000000004fc670f6>] sg_write+0x5f/0x8c [<00000000b0d05c7b>] __vfs_write+0x7c/0x100 [<000000008e177714>] vfs_write+0x1c3/0x500 [<0000000087d23f34>] ksys_write+0xf9/0x200 [<000000002c8dbc9d>] do_syscall_64+0x9f/0x4f0 [<00000000678d8e9a>] entry_SYSCALL_64_after_hwframe+0x49/0xbe If __blk_rq_map_user_iov() is failed in blk_rq_map_user_iov(), the bio(s) which is allocated before this failing will leak. The refcount of the bio(s) is init to 1 and increased to 2 by calling bio_get(), but __blk_rq_unmap_user() only decrease it to 1, so the bio cannot be freed. Fix it by calling blk_rq_unmap_user(). Reviewed-by: Bob Liu Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-map.c b/block/blk-map.c index e31be14da8ea..f72a3af689b6 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -152,7 +152,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, return 0; unmap_rq: - __blk_rq_unmap_user(bio); + blk_rq_unmap_user(bio); fail: rq->bio = NULL; return ret; From 5d5d36505ddf5ad6d2d7b3d2486778b86f53efbc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Dec 2019 21:00:19 +0100 Subject: [PATCH 2181/3715] parisc: Fix compiler warnings in debug_core.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 75cf9797006a3a9f29a3a25c1febd6842a4a9eb2 ] Fix this compiler warning: kernel/debug/debug_core.c: In function ‘kgdb_cpu_enter’: arch/parisc/include/asm/cmpxchg.h:48:3: warning: value computed is not used [-Wunused-value] 48 | ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) arch/parisc/include/asm/atomic.h:78:30: note: in expansion of macro ‘xchg’ 78 | #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) | ^~~~ kernel/debug/debug_core.c:596:4: note: in expansion of macro ‘atomic_xchg’ 596 | atomic_xchg(&kgdb_active, cpu); | ^~~~~~~~~~~ Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/cmpxchg.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c..ab5c215cf46c 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); From 3a8d4b961747e79a9d28e9f7621216045403b2bb Mon Sep 17 00:00:00 2001 From: "Chan Shu Tak, Alex" Date: Thu, 19 Dec 2019 14:16:18 +0800 Subject: [PATCH 2182/3715] llc2: Fix return statement of llc_stat_ev_rx_null_dsap_xid_c (and _test_c) [ Upstream commit af1c0e4e00f3cc76cb136ebf2e2c04e8b6446285 ] When a frame with NULL DSAP is received, llc_station_rcv is called. In turn, llc_stat_ev_rx_null_dsap_xid_c is called to check if it is a NULL XID frame. The return statement of llc_stat_ev_rx_null_dsap_xid_c returns 1 when the incoming frame is not a NULL XID frame and 0 otherwise. Hence, a NULL XID response is returned unexpectedly, e.g. when the incoming frame is a NULL TEST command. To fix the error, simply remove the conditional operator. A similar error in llc_stat_ev_rx_null_dsap_test_c is also fixed. Signed-off-by: Chan Shu Tak, Alex Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/llc/llc_station.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 204a8351efff..c29170e767a8 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap ? 0 : 1; /* NULL DSAP value */ + !pdu->dsap; /* NULL DSAP value */ } static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) @@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && - !pdu->dsap ? 0 : 1; /* NULL DSAP */ + !pdu->dsap; /* NULL DSAP */ } static int llc_station_ac_send_xid_r(struct sk_buff *skb) From 1051a28b7255e6624d379f2bd45713352f9470cf Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 19 Dec 2019 18:28:10 -0800 Subject: [PATCH 2183/3715] hv_netvsc: Fix unwanted rx_table reset [ Upstream commit b0689faa8efc5a3391402d7ae93bd373b7248e51 ] In existing code, the receive indirection table, rx_table, is in struct rndis_device, which will be reset when changing MTU, ringparam, etc. User configured receive indirection table values will be lost. To fix this, move rx_table to struct net_device_context, and check netif_is_rxfh_configured(), so rx_table will be set to default only if no user configured value. Fixes: ff4a44199012 ("netvsc: allow get/set of RSS indirection table") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc_drv.c | 4 ++-- drivers/net/hyperv/rndis_filter.c | 10 +++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 0f07b5978fa1..fc794e69e6a1 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -179,7 +179,6 @@ struct rndis_device { u8 hw_mac_adr[ETH_ALEN]; u8 rss_key[NETVSC_HASH_KEYLEN]; - u16 rx_table[ITAB_NUM]; }; @@ -741,6 +740,8 @@ struct net_device_context { u32 tx_table[VRSS_SEND_TAB_SIZE]; + u16 rx_table[ITAB_NUM]; + /* Ethtool settings */ bool udp4_l4_hash; bool udp6_l4_hash; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5a44b9795266..a89de5752a8c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1528,7 +1528,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - indir[i] = rndis_dev->rx_table[i]; + indir[i] = ndc->rx_table[i]; } if (key) @@ -1558,7 +1558,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, return -EINVAL; for (i = 0; i < ITAB_NUM; i++) - rndis_dev->rx_table[i] = indir[i]; + ndc->rx_table[i] = indir[i]; } if (!key) { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index fc1d5e14d83e..b19557c035f2 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -715,6 +715,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, const u8 *rss_key, u16 flag) { struct net_device *ndev = rdev->ndev; + struct net_device_context *ndc = netdev_priv(ndev); struct rndis_request *request; struct rndis_set_request *set; struct rndis_set_complete *set_complete; @@ -754,7 +755,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) - itab[i] = rdev->rx_table[i]; + itab[i] = ndc->rx_table[i]; /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); @@ -1204,6 +1205,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *ndc = netdev_priv(net); struct netvsc_device *net_device; struct rndis_device *rndis_device; struct ndis_recv_scale_cap rsscap; @@ -1286,9 +1288,11 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, /* We will use the given number of channels if available. */ net_device->num_chn = min(net_device->max_chn, device_info->num_chn); - for (i = 0; i < ITAB_NUM; i++) - rndis_device->rx_table[i] = ethtool_rxfh_indir_default( + if (!netif_is_rxfh_configured(net)) { + for (i = 0; i < ITAB_NUM; i++) + ndc->rx_table[i] = ethtool_rxfh_indir_default( i, net_device->num_chn); + } atomic_set(&net_device->open_chn, 1); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); From 7fed98f4a1e6eb77a5d66ecfdf9345e21df6ac82 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jun 2018 17:40:03 +0200 Subject: [PATCH 2184/3715] bpf: reject passing modified ctx to helper functions commit 58990d1ff3f7896ee341030e9a7c2e4002570683 upstream. As commit 28e33f9d78ee ("bpf: disallow arithmetic operations on context pointer") already describes, f1174f77b50c ("bpf/verifier: rework value tracking") removed the specific white-listed cases we had previously where we would allow for pointer arithmetic in order to further generalize it, and allow e.g. context access via modified registers. While the dereferencing of modified context pointers had been forbidden through 28e33f9d78ee, syzkaller did recently manage to trigger several KASAN splats for slab out of bounds access and use after frees by simply passing a modified context pointer to a helper function which would then do the bad access since verifier allowed it in adjust_ptr_min_max_vals(). Rejecting arithmetic on ctx pointer in adjust_ptr_min_max_vals() generally could break existing programs as there's a valid use case in tracing in combination with passing the ctx to helpers as bpf_probe_read(), where the register then becomes unknown at verification time due to adding a non-constant offset to it. An access sequence may look like the following: offset = args->filename; /* field __data_loc filename */ bpf_probe_read(&dst, len, (char *)args + offset); // args is ctx There are two options: i) we could special case the ctx and as soon as we add a constant or bounded offset to it (hence ctx type wouldn't change) we could turn the ctx into an unknown scalar, or ii) we generalize the sanity test for ctx member access into a small helper and assert it on the ctx register that was passed as a function argument. Fwiw, latter is more obvious and less complex at the same time, and one case that may potentially be legitimate in future for ctx member access at least would be for ctx to carry a const offset. Therefore, fix follows approach from ii) and adds test cases to BPF kselftests. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Reported-by: syzbot+3d0b2441dbb71751615e@syzkaller.appspotmail.com Reported-by: syzbot+c8504affd4fdd0c1b626@syzkaller.appspotmail.com Reported-by: syzbot+e5190cb881d8660fb1a3@syzkaller.appspotmail.com Reported-by: syzbot+efae31b384d5badbd620@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Edward Cree Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 45 ++++++++++------ tools/testing/selftests/bpf/test_verifier.c | 58 ++++++++++++++++++++- 2 files changed, 87 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a4875ff0bab1..be52b0529225 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1251,6 +1251,30 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); } +static int check_ctx_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + /* Access to ctx or passing it to a helper is only allowed in + * its original, unmodified form. + */ + + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d disallowed\n", + regno, reg->off); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("variable ctx access var_off=%s disallowed\n", tn_buf); + return -EACCES; + } + + return 0; +} + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -1320,22 +1344,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - /* ctx accesses must be at a fixed offset, so that we can - * determine what type of data were returned. - */ - if (reg->off) { - verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", - regno, reg->off, off - reg->off); - return -EACCES; - } - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - char tn_buf[48]; + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("variable ctx access var_off=%s off=%d size=%d", - tn_buf, off, size); - return -EACCES; - } err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a @@ -1573,6 +1585,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_CTX; if (type != expected_type) goto err_type; + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; } else if (arg_type == ARG_PTR_TO_MEM || arg_type == ARG_PTR_TO_UNINIT_MEM) { expected_type = PTR_TO_STACK; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 913539aea645..9babb3fef8e2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7281,7 +7281,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .errstr = "dereference of modified ctx ptr", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -7944,6 +7944,62 @@ static struct bpf_test tests[] = { .errstr = "BPF_XADD stores into R2 packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "pass unmodified ctx pointer to helper", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "pass modified ctx pointer to helper, 1", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr_unpriv = "dereference of modified ctx ptr", + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "variable ctx access var_off=(0x0; 0x4)", + }, }; static int probe_filter_length(const struct bpf_insn *fp) From b454ac1b22af130c6fb8d34c344a98339f1cea9a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jan 2020 22:51:57 +0100 Subject: [PATCH 2185/3715] bpf: Fix passing modified ctx to ld/abs/ind instruction commit 6d4f151acf9a4f6fab09b615f246c717ddedcf0c upstream. Anatoly has been fuzzing with kBdysch harness and reported a KASAN slab oob in one of the outcomes: [...] [ 77.359642] BUG: KASAN: slab-out-of-bounds in bpf_skb_load_helper_8_no_cache+0x71/0x130 [ 77.360463] Read of size 4 at addr ffff8880679bac68 by task bpf/406 [ 77.361119] [ 77.361289] CPU: 2 PID: 406 Comm: bpf Not tainted 5.5.0-rc2-xfstests-00157-g2187f215eba #1 [ 77.362134] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 77.362984] Call Trace: [ 77.363249] dump_stack+0x97/0xe0 [ 77.363603] print_address_description.constprop.0+0x1d/0x220 [ 77.364251] ? bpf_skb_load_helper_8_no_cache+0x71/0x130 [ 77.365030] ? bpf_skb_load_helper_8_no_cache+0x71/0x130 [ 77.365860] __kasan_report.cold+0x37/0x7b [ 77.366365] ? bpf_skb_load_helper_8_no_cache+0x71/0x130 [ 77.366940] kasan_report+0xe/0x20 [ 77.367295] bpf_skb_load_helper_8_no_cache+0x71/0x130 [ 77.367821] ? bpf_skb_load_helper_8+0xf0/0xf0 [ 77.368278] ? mark_lock+0xa3/0x9b0 [ 77.368641] ? kvm_sched_clock_read+0x14/0x30 [ 77.369096] ? sched_clock+0x5/0x10 [ 77.369460] ? sched_clock_cpu+0x18/0x110 [ 77.369876] ? bpf_skb_load_helper_8+0xf0/0xf0 [ 77.370330] ___bpf_prog_run+0x16c0/0x28f0 [ 77.370755] __bpf_prog_run32+0x83/0xc0 [ 77.371153] ? __bpf_prog_run64+0xc0/0xc0 [ 77.371568] ? match_held_lock+0x1b/0x230 [ 77.371984] ? rcu_read_lock_held+0xa1/0xb0 [ 77.372416] ? rcu_is_watching+0x34/0x50 [ 77.372826] sk_filter_trim_cap+0x17c/0x4d0 [ 77.373259] ? sock_kzfree_s+0x40/0x40 [ 77.373648] ? __get_filter+0x150/0x150 [ 77.374059] ? skb_copy_datagram_from_iter+0x80/0x280 [ 77.374581] ? do_raw_spin_unlock+0xa5/0x140 [ 77.375025] unix_dgram_sendmsg+0x33a/0xa70 [ 77.375459] ? do_raw_spin_lock+0x1d0/0x1d0 [ 77.375893] ? unix_peer_get+0xa0/0xa0 [ 77.376287] ? __fget_light+0xa4/0xf0 [ 77.376670] __sys_sendto+0x265/0x280 [ 77.377056] ? __ia32_sys_getpeername+0x50/0x50 [ 77.377523] ? lock_downgrade+0x350/0x350 [ 77.377940] ? __sys_setsockopt+0x2a6/0x2c0 [ 77.378374] ? sock_read_iter+0x240/0x240 [ 77.378789] ? __sys_socketpair+0x22a/0x300 [ 77.379221] ? __ia32_sys_socket+0x50/0x50 [ 77.379649] ? mark_held_locks+0x1d/0x90 [ 77.380059] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 77.380536] __x64_sys_sendto+0x74/0x90 [ 77.380938] do_syscall_64+0x68/0x2a0 [ 77.381324] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 77.381878] RIP: 0033:0x44c070 [...] After further debugging, turns out while in case of other helper functions we disallow passing modified ctx, the special case of ld/abs/ind instruction which has similar semantics (except r6 being the ctx argument) is missing such check. Modified ctx is impossible here as bpf_skb_load_helper_8_no_cache() and others are expecting skb fields in original position, hence, add check_ctx_reg() to reject any modified ctx. Issue was first introduced back in f1174f77b50c ("bpf/verifier: rework value tracking"). Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Reported-by: Anatoly Trosinenko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200106215157.3553-1-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index be52b0529225..615a2e44d2a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3457,6 +3457,7 @@ static bool may_access_skb(enum bpf_prog_type type) static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = cur_regs(env); + static const int ctx_reg = BPF_REG_6; u8 mode = BPF_MODE(insn->code); int i, err; @@ -3473,11 +3474,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check whether implicit source operand (register R6) is readable */ - err = check_reg_arg(env, BPF_REG_6, SRC_OP); + err = check_reg_arg(env, ctx_reg, SRC_OP); if (err) return err; - if (regs[BPF_REG_6].type != PTR_TO_CTX) { + if (regs[ctx_reg].type != PTR_TO_CTX) { verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); return -EINVAL; } @@ -3489,6 +3490,10 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } + err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + if (err < 0) + return err; + /* reset caller saved regs to unreadable */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(regs, caller_saved[i]); From e9eae4143c33ebe33aa2e195c2863c6e1bf3f8cd Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 10 Sep 2019 13:58:33 -0600 Subject: [PATCH 2186/3715] PCI/switchtec: Read all 64 bits of part_event_bitmap commit 6acdf7e19b37cb3a9258603d0eab315079c19c5e upstream. The part_event_bitmap register is 64 bits wide, so read it with ioread64() instead of the 32-bit ioread32(). Fixes: 52eabba5bcdb ("switchtec: Add IOCTLs to the Switchtec driver") Link: https://lore.kernel.org/r/20190910195833.3891-1-logang@deltatee.com Reported-by: Doug Meyer Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.12+ Cc: Kelvin Cao Signed-off-by: Greg Kroah-Hartman --- drivers/pci/switch/switchtec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index e3aefdafae89..0941555b84a5 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -23,7 +23,7 @@ #include #include #include - +#include #include MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCIe Management Driver"); @@ -898,7 +898,7 @@ static int ioctl_event_summary(struct switchtec_dev *stdev, u32 reg; s.global = ioread32(&stdev->mmio_sw_event->global_summary); - s.part_bitmap = ioread32(&stdev->mmio_sw_event->part_event_bitmap); + s.part_bitmap = readq(&stdev->mmio_sw_event->part_event_bitmap); s.local_part = ioread32(&stdev->mmio_part_cfg->part_event_summary); for (i = 0; i < stdev->partition_count; i++) { From 37d58689dfdd068c2f15f90d573f8e93fe28cf86 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Sep 2017 10:02:00 +0200 Subject: [PATCH 2187/3715] mmc: block: Convert RPMB to a character device commit 97548575bef38abd06690a5a6f6816200c7e77f7 upstream. The RPMB partition on the eMMC devices is a special area used for storing cryptographically safe information signed by a special secret key. To write and read records from this special area, authentication is needed. The RPMB area is *only* and *exclusively* accessed using ioctl():s from userspace. It is not really a block device, as blocks cannot be read or written from the device, also the signed chunks that can be stored on the RPMB are actually 256 bytes, not 512 making a block device a real bad fit. Currently the RPMB partition spawns a separate block device named /dev/mmcblkNrpmb for each device with an RPMB partition, including the creation of a block queue with its own kernel thread and all overhead associated with this. On the Ux500 HREFv60 platform, for example, the two eMMCs means that two block queues with separate threads are created for no use whatsoever. I have concluded that this block device design for RPMB is actually pretty wrong. The RPMB area should have been designed to be accessed from /dev/mmcblkN directly, using ioctl()s on the main block device. It is however way too late to change that, since userspace expects to open an RPMB device in /dev/mmcblkNrpmb and we cannot break userspace. This patch tries to amend the situation using the following strategy: - Stop creating a block device for the RPMB partition/area - Instead create a custom, dynamic character device with the same name. - Make this new character device support exactly the same set of ioctl()s as the old block device. - Wrap the requests back to the same ioctl() handlers, but issue them on the block queue of the main partition/area, i.e. /dev/mmcblkN We need to create a special "rpmb" bus type in order to get udev and/or busybox hot/coldplug to instantiate the device node properly. Before the patch, this appears in 'ps aux': 101 root 0:00 [mmcqd/2rpmb] 123 root 0:00 [mmcqd/3rpmb] After applying the patch these surplus block queue threads are gone, but RPMB is as usable as ever using the userspace MMC tools, such as 'mmc rpmb read-counter'. We get instead those dynamice devices in /dev: brw-rw---- 1 root root 179, 0 Jan 1 2000 mmcblk0 brw-rw---- 1 root root 179, 1 Jan 1 2000 mmcblk0p1 brw-rw---- 1 root root 179, 2 Jan 1 2000 mmcblk0p2 brw-rw---- 1 root root 179, 5 Jan 1 2000 mmcblk0p5 brw-rw---- 1 root root 179, 8 Jan 1 2000 mmcblk2 brw-rw---- 1 root root 179, 16 Jan 1 2000 mmcblk2boot0 brw-rw---- 1 root root 179, 24 Jan 1 2000 mmcblk2boot1 crw-rw---- 1 root root 248, 0 Jan 1 2000 mmcblk2rpmb brw-rw---- 1 root root 179, 32 Jan 1 2000 mmcblk3 brw-rw---- 1 root root 179, 40 Jan 1 2000 mmcblk3boot0 brw-rw---- 1 root root 179, 48 Jan 1 2000 mmcblk3boot1 brw-rw---- 1 root root 179, 33 Jan 1 2000 mmcblk3p1 crw-rw---- 1 root root 248, 1 Jan 1 2000 mmcblk3rpmb Notice the (248,0) and (248,1) character devices for RPMB. Cc: Tomas Winkler Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 283 ++++++++++++++++++++++++++++++++++++--- drivers/mmc/core/queue.h | 2 + 2 files changed, 263 insertions(+), 22 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ce6dd49fbb98..0c29605dd829 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,7 @@ static int max_devices; #define MAX_DEVICES 256 static DEFINE_IDA(mmc_blk_ida); +static DEFINE_IDA(mmc_rpmb_ida); /* * There is one mmc_blk_data per slot. @@ -97,6 +99,7 @@ struct mmc_blk_data { struct gendisk *disk; struct mmc_queue queue; struct list_head part; + struct list_head rpmbs; unsigned int flags; #define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ @@ -126,6 +129,32 @@ struct mmc_blk_data { struct dentry *ext_csd_dentry; }; +/* Device type for RPMB character devices */ +static dev_t mmc_rpmb_devt; + +/* Bus type for RPMB character devices */ +static struct bus_type mmc_rpmb_bus_type = { + .name = "mmc_rpmb", +}; + +/** + * struct mmc_rpmb_data - special RPMB device type for these areas + * @dev: the device for the RPMB area + * @chrdev: character device for the RPMB area + * @id: unique device ID number + * @part_index: partition index (0 on first) + * @md: parent MMC block device + * @node: list item, so we can put this device on a list + */ +struct mmc_rpmb_data { + struct device dev; + struct cdev chrdev; + int id; + unsigned int part_index; + struct mmc_blk_data *md; + struct list_head node; +}; + static DEFINE_MUTEX(open_lock); module_param(perdev_minors, int, 0444); @@ -309,6 +338,7 @@ struct mmc_blk_ioc_data { struct mmc_ioc_cmd ic; unsigned char *buf; u64 buf_bytes; + struct mmc_rpmb_data *rpmb; }; static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( @@ -447,14 +477,25 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, struct mmc_request mrq = {}; struct scatterlist sg; int err; - bool is_rpmb = false; + unsigned int target_part; u32 status = 0; if (!card || !md || !idata) return -EINVAL; - if (md->area_type & MMC_BLK_DATA_AREA_RPMB) - is_rpmb = true; + /* + * The RPMB accesses comes in from the character device, so we + * need to target these explicitly. Else we just target the + * partition type for the block device the ioctl() was issued + * on. + */ + if (idata->rpmb) { + /* Support multiple RPMB partitions */ + target_part = idata->rpmb->part_index; + target_part |= EXT_CSD_PART_CONFIG_ACC_RPMB; + } else { + target_part = md->part_type; + } cmd.opcode = idata->ic.opcode; cmd.arg = idata->ic.arg; @@ -498,7 +539,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, mrq.cmd = &cmd; - err = mmc_blk_part_switch(card, md->part_type); + err = mmc_blk_part_switch(card, target_part); if (err) return err; @@ -508,7 +549,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return err; } - if (is_rpmb) { + if (idata->rpmb) { err = mmc_set_blockcount(card, data.blocks, idata->ic.write_flag & (1 << 31)); if (err) @@ -566,7 +607,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp)); - if (is_rpmb) { + if (idata->rpmb) { /* * Ensure RPMB command has completed by polling CMD13 * "Send Status". @@ -582,7 +623,8 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, } static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, - struct mmc_ioc_cmd __user *ic_ptr) + struct mmc_ioc_cmd __user *ic_ptr, + struct mmc_rpmb_data *rpmb) { struct mmc_blk_ioc_data *idata; struct mmc_blk_ioc_data *idatas[1]; @@ -594,6 +636,8 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, idata = mmc_blk_ioctl_copy_from_user(ic_ptr); if (IS_ERR(idata)) return PTR_ERR(idata); + /* This will be NULL on non-RPMB ioctl():s */ + idata->rpmb = rpmb; card = md->queue.card; if (IS_ERR(card)) { @@ -613,7 +657,8 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, goto cmd_done; } idatas[0] = idata; - req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_IOCTL; + req_to_mmc_queue_req(req)->drv_op = + rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idatas; req_to_mmc_queue_req(req)->ioc_count = 1; blk_execute_rq(mq->queue, NULL, req, 0); @@ -628,7 +673,8 @@ cmd_done: } static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, - struct mmc_ioc_multi_cmd __user *user) + struct mmc_ioc_multi_cmd __user *user, + struct mmc_rpmb_data *rpmb) { struct mmc_blk_ioc_data **idata = NULL; struct mmc_ioc_cmd __user *cmds = user->cmds; @@ -659,6 +705,8 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, num_of_cmds = i; goto cmd_err; } + /* This will be NULL on non-RPMB ioctl():s */ + idata[i]->rpmb = rpmb; } card = md->queue.card; @@ -679,7 +727,8 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, err = PTR_ERR(req); goto cmd_err; } - req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_IOCTL; + req_to_mmc_queue_req(req)->drv_op = + rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idata; req_to_mmc_queue_req(req)->ioc_count = num_of_cmds; blk_execute_rq(mq->queue, NULL, req, 0); @@ -727,7 +776,8 @@ static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, if (!md) return -EINVAL; ret = mmc_blk_ioctl_cmd(md, - (struct mmc_ioc_cmd __user *)arg); + (struct mmc_ioc_cmd __user *)arg, + NULL); mmc_blk_put(md); return ret; case MMC_IOC_MULTI_CMD: @@ -738,7 +788,8 @@ static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, if (!md) return -EINVAL; ret = mmc_blk_ioctl_multi_cmd(md, - (struct mmc_ioc_multi_cmd __user *)arg); + (struct mmc_ioc_multi_cmd __user *)arg, + NULL); mmc_blk_put(md); return ret; default: @@ -1210,17 +1261,19 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) struct mmc_queue_req *mq_rq; struct mmc_card *card = mq->card; struct mmc_blk_data *md = mq->blkdata; - struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev); struct mmc_blk_ioc_data **idata; + bool rpmb_ioctl; u8 **ext_csd; u32 status; int ret; int i; mq_rq = req_to_mmc_queue_req(req); + rpmb_ioctl = (mq_rq->drv_op == MMC_DRV_OP_IOCTL_RPMB); switch (mq_rq->drv_op) { case MMC_DRV_OP_IOCTL: + case MMC_DRV_OP_IOCTL_RPMB: idata = mq_rq->drv_op_data; for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) { ret = __mmc_blk_ioctl_cmd(card, md, idata[i]); @@ -1228,8 +1281,8 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) break; } /* Always switch back to main area after RPMB access */ - if (md->area_type & MMC_BLK_DATA_AREA_RPMB) - mmc_blk_part_switch(card, main_md->part_type); + if (rpmb_ioctl) + mmc_blk_part_switch(card, 0); break; case MMC_DRV_OP_BOOT_WP: ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP, @@ -2114,6 +2167,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, spin_lock_init(&md->lock); INIT_LIST_HEAD(&md->part); + INIT_LIST_HEAD(&md->rpmbs); md->usage = 1; ret = mmc_init_queue(&md->queue, card, &md->lock, subname); @@ -2232,6 +2286,154 @@ static int mmc_blk_alloc_part(struct mmc_card *card, return 0; } +/** + * mmc_rpmb_ioctl() - ioctl handler for the RPMB chardev + * @filp: the character device file + * @cmd: the ioctl() command + * @arg: the argument from userspace + * + * This will essentially just redirect the ioctl()s coming in over to + * the main block device spawning the RPMB character device. + */ +static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mmc_rpmb_data *rpmb = filp->private_data; + int ret; + + switch (cmd) { + case MMC_IOC_CMD: + ret = mmc_blk_ioctl_cmd(rpmb->md, + (struct mmc_ioc_cmd __user *)arg, + rpmb); + break; + case MMC_IOC_MULTI_CMD: + ret = mmc_blk_ioctl_multi_cmd(rpmb->md, + (struct mmc_ioc_multi_cmd __user *)arg, + rpmb); + break; + default: + ret = -EINVAL; + break; + } + + return 0; +} + +#ifdef CONFIG_COMPAT +static long mmc_rpmb_ioctl_compat(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return mmc_rpmb_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static int mmc_rpmb_chrdev_open(struct inode *inode, struct file *filp) +{ + struct mmc_rpmb_data *rpmb = container_of(inode->i_cdev, + struct mmc_rpmb_data, chrdev); + + get_device(&rpmb->dev); + filp->private_data = rpmb; + mutex_lock(&open_lock); + rpmb->md->usage++; + mutex_unlock(&open_lock); + + return nonseekable_open(inode, filp); +} + +static int mmc_rpmb_chrdev_release(struct inode *inode, struct file *filp) +{ + struct mmc_rpmb_data *rpmb = container_of(inode->i_cdev, + struct mmc_rpmb_data, chrdev); + + put_device(&rpmb->dev); + mutex_lock(&open_lock); + rpmb->md->usage--; + mutex_unlock(&open_lock); + + return 0; +} + +static const struct file_operations mmc_rpmb_fileops = { + .release = mmc_rpmb_chrdev_release, + .open = mmc_rpmb_chrdev_open, + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = mmc_rpmb_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = mmc_rpmb_ioctl_compat, +#endif +}; + + +static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, + struct mmc_blk_data *md, + unsigned int part_index, + sector_t size, + const char *subname) +{ + int devidx, ret; + char rpmb_name[DISK_NAME_LEN]; + char cap_str[10]; + struct mmc_rpmb_data *rpmb; + + /* This creates the minor number for the RPMB char device */ + devidx = ida_simple_get(&mmc_rpmb_ida, 0, max_devices, GFP_KERNEL); + if (devidx < 0) + return devidx; + + rpmb = kzalloc(sizeof(*rpmb), GFP_KERNEL); + if (!rpmb) + return -ENOMEM; + + snprintf(rpmb_name, sizeof(rpmb_name), + "mmcblk%u%s", card->host->index, subname ? subname : ""); + + rpmb->id = devidx; + rpmb->part_index = part_index; + rpmb->dev.init_name = rpmb_name; + rpmb->dev.bus = &mmc_rpmb_bus_type; + rpmb->dev.devt = MKDEV(MAJOR(mmc_rpmb_devt), rpmb->id); + rpmb->dev.parent = &card->dev; + device_initialize(&rpmb->dev); + dev_set_drvdata(&rpmb->dev, rpmb); + rpmb->md = md; + + cdev_init(&rpmb->chrdev, &mmc_rpmb_fileops); + rpmb->chrdev.owner = THIS_MODULE; + ret = cdev_device_add(&rpmb->chrdev, &rpmb->dev); + if (ret) { + pr_err("%s: could not add character device\n", rpmb_name); + goto out_remove_ida; + } + + list_add(&rpmb->node, &md->rpmbs); + + string_get_size((u64)size, 512, STRING_UNITS_2, + cap_str, sizeof(cap_str)); + + pr_info("%s: %s %s partition %u %s, chardev (%d:%d)\n", + rpmb_name, mmc_card_id(card), + mmc_card_name(card), EXT_CSD_PART_CONFIG_ACC_RPMB, cap_str, + MAJOR(mmc_rpmb_devt), rpmb->id); + + return 0; + +out_remove_ida: + ida_simple_remove(&mmc_rpmb_ida, rpmb->id); + kfree(rpmb); + return ret; +} + +static void mmc_blk_remove_rpmb_part(struct mmc_rpmb_data *rpmb) +{ + cdev_device_del(&rpmb->chrdev, &rpmb->dev); + device_del(&rpmb->dev); + ida_simple_remove(&mmc_rpmb_ida, rpmb->id); + kfree(rpmb); +} + /* MMC Physical partitions consist of two boot partitions and * up to four general purpose partitions. * For each partition enabled in EXT_CSD a block device will be allocatedi @@ -2240,13 +2442,26 @@ static int mmc_blk_alloc_part(struct mmc_card *card, static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) { - int idx, ret = 0; + int idx, ret; if (!mmc_card_mmc(card)) return 0; for (idx = 0; idx < card->nr_parts; idx++) { - if (card->part[idx].size) { + if (card->part[idx].area_type & MMC_BLK_DATA_AREA_RPMB) { + /* + * RPMB partitions does not provide block access, they + * are only accessed using ioctl():s. Thus create + * special RPMB block devices that do not have a + * backing block queue for these. + */ + ret = mmc_blk_alloc_rpmb_part(card, md, + card->part[idx].part_cfg, + card->part[idx].size >> 9, + card->part[idx].name); + if (ret) + return ret; + } else if (card->part[idx].size) { ret = mmc_blk_alloc_part(card, md, card->part[idx].part_cfg, card->part[idx].size >> 9, @@ -2258,7 +2473,7 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) } } - return ret; + return 0; } static void mmc_blk_remove_req(struct mmc_blk_data *md) @@ -2295,7 +2510,15 @@ static void mmc_blk_remove_parts(struct mmc_card *card, { struct list_head *pos, *q; struct mmc_blk_data *part_md; + struct mmc_rpmb_data *rpmb; + /* Remove RPMB partitions */ + list_for_each_safe(pos, q, &md->rpmbs) { + rpmb = list_entry(pos, struct mmc_rpmb_data, node); + list_del(pos); + mmc_blk_remove_rpmb_part(rpmb); + } + /* Remove block partitions */ list_for_each_safe(pos, q, &md->part) { part_md = list_entry(pos, struct mmc_blk_data, part); list_del(pos); @@ -2649,6 +2872,17 @@ static int __init mmc_blk_init(void) { int res; + res = bus_register(&mmc_rpmb_bus_type); + if (res < 0) { + pr_err("mmcblk: could not register RPMB bus type\n"); + return res; + } + res = alloc_chrdev_region(&mmc_rpmb_devt, 0, MAX_DEVICES, "rpmb"); + if (res < 0) { + pr_err("mmcblk: failed to allocate rpmb chrdev region\n"); + goto out_bus_unreg; + } + if (perdev_minors != CONFIG_MMC_BLOCK_MINORS) pr_info("mmcblk: using %d minors per device\n", perdev_minors); @@ -2656,16 +2890,20 @@ static int __init mmc_blk_init(void) res = register_blkdev(MMC_BLOCK_MAJOR, "mmc"); if (res) - goto out; + goto out_chrdev_unreg; res = mmc_register_driver(&mmc_driver); if (res) - goto out2; + goto out_blkdev_unreg; return 0; - out2: + +out_blkdev_unreg: unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); - out: +out_chrdev_unreg: + unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); +out_bus_unreg: + bus_unregister(&mmc_rpmb_bus_type); return res; } @@ -2673,6 +2911,7 @@ static void __exit mmc_blk_exit(void) { mmc_unregister_driver(&mmc_driver); unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); + unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); } module_init(mmc_blk_init); diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 6bfba32ffa66..15c80421321e 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -36,12 +36,14 @@ struct mmc_blk_request { /** * enum mmc_drv_op - enumerates the operations in the mmc_queue_req * @MMC_DRV_OP_IOCTL: ioctl operation + * @MMC_DRV_OP_IOCTL_RPMB: RPMB-oriented ioctl operation * @MMC_DRV_OP_BOOT_WP: write protect boot partitions * @MMC_DRV_OP_GET_CARD_STATUS: get card status * @MMC_DRV_OP_GET_EXT_CSD: get the EXT CSD from an eMMC card */ enum mmc_drv_op { MMC_DRV_OP_IOCTL, + MMC_DRV_OP_IOCTL_RPMB, MMC_DRV_OP_BOOT_WP, MMC_DRV_OP_GET_CARD_STATUS, MMC_DRV_OP_GET_EXT_CSD, From ae4e8ce0d86159bbba7cfaa44f6276d38b1f2200 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Sep 2017 10:02:01 +0200 Subject: [PATCH 2188/3715] mmc: block: Delete mmc_access_rpmb() commit 14f4ca7e4d2825f9f71e22905ae177b899959f1d upstream. This function is used by the block layer queue to bail out of requests if the current request is towards an RPMB "block device". This was done to avoid boot time scanning of this "block device" which was never really a block device, thus duct-taping over the fact that it was badly engineered. This problem is now gone as we removed the offending RPMB block device in another patch and replaced it with a character device. Cc: Tomas Winkler Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 12 ------------ drivers/mmc/core/queue.c | 2 +- drivers/mmc/core/queue.h | 2 -- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 0c29605dd829..b3d8717963d8 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1239,18 +1239,6 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } -int mmc_access_rpmb(struct mmc_queue *mq) -{ - struct mmc_blk_data *md = mq->blkdata; - /* - * If this is a RPMB partition access, return ture - */ - if (md && md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) - return true; - - return false; -} - /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 0a4e77a5ba33..f74f9ef460cc 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -30,7 +30,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) { struct mmc_queue *mq = q->queuedata; - if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq))) + if (mq && mmc_card_removed(mq->card)) return BLKPREP_KILL; req->rq_flags |= RQF_DONTPREP; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 15c80421321e..547b457c4251 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -84,6 +84,4 @@ extern void mmc_queue_resume(struct mmc_queue *); extern unsigned int mmc_queue_map_sg(struct mmc_queue *, struct mmc_queue_req *); -extern int mmc_access_rpmb(struct mmc_queue *); - #endif From 3879a509ac7f02e0ba899d22cad53d366b656f67 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 4 Oct 2017 11:10:07 +0200 Subject: [PATCH 2189/3715] mmc: block: Fix bug when removing RPMB chardev commit 1c87f73578497a6c3cc77bcbfd2e5bf15fe753c7 upstream. I forgot to account for the fact that the device core holds a reference to a device added with device_initialize() that need to be released with a corresponding put_device() to reach a 0 refcount at the end of the lifecycle. This led to a NULL pointer reference when freeing the device when e.g. unbidning the host device in sysfs. Fix this and use the device .release() callback to free the IDA and free:ing the memory used by the RPMB device. Before this patch: /sys/bus/amba/drivers/mmci-pl18x$ echo 80114000.sdi4_per2 > unbind [ 29.797332] mmc3: card 0001 removed [ 29.810791] Unable to handle kernel NULL pointer dereference at virtual address 00000050 [ 29.818878] pgd = de70c000 [ 29.821624] [00000050] *pgd=1e70a831, *pte=00000000, *ppte=00000000 [ 29.827911] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 29.833282] Modules linked in: [ 29.836334] CPU: 1 PID: 154 Comm: sh Not tainted 4.14.0-rc3-00039-g83318e309566-dirty #736 [ 29.844604] Hardware name: ST-Ericsson Ux5x0 platform (Device Tree Support) [ 29.851562] task: de572700 task.stack: de742000 [ 29.856079] PC is at kernfs_find_ns+0x8/0x100 [ 29.860443] LR is at kernfs_find_and_get_ns+0x30/0x48 After this patch: /sys/bus/amba/drivers/mmci-pl18x$ echo 80005000.sdi4_per2 > unbind [ 20.623382] mmc3: card 0001 removed Fixes: 97548575bef3 ("mmc: block: Convert RPMB to a character device") Reported-by: Adrian Hunter Signed-off-by: Linus Walleij Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index b3d8717963d8..df9903ee1fae 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2323,9 +2323,7 @@ static int mmc_rpmb_chrdev_open(struct inode *inode, struct file *filp) get_device(&rpmb->dev); filp->private_data = rpmb; - mutex_lock(&open_lock); - rpmb->md->usage++; - mutex_unlock(&open_lock); + mmc_blk_get(rpmb->md->disk); return nonseekable_open(inode, filp); } @@ -2336,9 +2334,7 @@ static int mmc_rpmb_chrdev_release(struct inode *inode, struct file *filp) struct mmc_rpmb_data, chrdev); put_device(&rpmb->dev); - mutex_lock(&open_lock); - rpmb->md->usage--; - mutex_unlock(&open_lock); + mmc_blk_put(rpmb->md); return 0; } @@ -2354,6 +2350,13 @@ static const struct file_operations mmc_rpmb_fileops = { #endif }; +static void mmc_blk_rpmb_device_release(struct device *dev) +{ + struct mmc_rpmb_data *rpmb = dev_get_drvdata(dev); + + ida_simple_remove(&mmc_rpmb_ida, rpmb->id); + kfree(rpmb); +} static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, struct mmc_blk_data *md, @@ -2372,8 +2375,10 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, return devidx; rpmb = kzalloc(sizeof(*rpmb), GFP_KERNEL); - if (!rpmb) + if (!rpmb) { + ida_simple_remove(&mmc_rpmb_ida, devidx); return -ENOMEM; + } snprintf(rpmb_name, sizeof(rpmb_name), "mmcblk%u%s", card->host->index, subname ? subname : ""); @@ -2384,6 +2389,7 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, rpmb->dev.bus = &mmc_rpmb_bus_type; rpmb->dev.devt = MKDEV(MAJOR(mmc_rpmb_devt), rpmb->id); rpmb->dev.parent = &card->dev; + rpmb->dev.release = mmc_blk_rpmb_device_release; device_initialize(&rpmb->dev); dev_set_drvdata(&rpmb->dev, rpmb); rpmb->md = md; @@ -2393,7 +2399,7 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, ret = cdev_device_add(&rpmb->chrdev, &rpmb->dev); if (ret) { pr_err("%s: could not add character device\n", rpmb_name); - goto out_remove_ida; + goto out_put_device; } list_add(&rpmb->node, &md->rpmbs); @@ -2408,18 +2414,16 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, return 0; -out_remove_ida: - ida_simple_remove(&mmc_rpmb_ida, rpmb->id); - kfree(rpmb); +out_put_device: + put_device(&rpmb->dev); return ret; } static void mmc_blk_remove_rpmb_part(struct mmc_rpmb_data *rpmb) + { cdev_device_del(&rpmb->chrdev, &rpmb->dev); - device_del(&rpmb->dev); - ida_simple_remove(&mmc_rpmb_ida, rpmb->id); - kfree(rpmb); + put_device(&rpmb->dev); } /* MMC Physical partitions consist of two boot partitions and From a52f6b2f57b6695fa46d1b3cb37c54e16efabf53 Mon Sep 17 00:00:00 2001 From: Alexander Kappner Date: Wed, 28 Mar 2018 15:18:31 -0700 Subject: [PATCH 2190/3715] mmc: core: Prevent bus reference leak in mmc_blk_init() commit d0a0852b9f81cf5f793bf2eae7336ed40a1a1815 upstream. Upon module load, mmc_block allocates a bus with bus_registeri() in mmc_blk_init(). This reference never gets freed during module unload, which leads to subsequent re-insertions of the module fails and a WARN() splat is triggered. Fix the bug by dropping the reference for the bus in mmc_blk_exit(). Signed-off-by: Alexander Kappner Fixes: 97548575bef3 ("mmc: block: Convert RPMB to a character device") Cc: Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index df9903ee1fae..a9ce192828b8 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2904,6 +2904,7 @@ static void __exit mmc_blk_exit(void) mmc_unregister_driver(&mmc_driver); unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); + bus_unregister(&mmc_rpmb_bus_type); } module_init(mmc_blk_init); From 0f65291617d4117379ba702130040d2db283c2fb Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 16 May 2018 21:20:20 +0200 Subject: [PATCH 2191/3715] mmc: block: propagate correct returned value in mmc_rpmb_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b25b750df99bcba29317d3f9d9f93c4ec58890e6 upstream. In commit 97548575bef3 ("mmc: block: Convert RPMB to a character device") a new function `mmc_rpmb_ioctl` was added. The final return is simply returning a value of `0` instead of propagating the correct return code. Discovered during a compilation with W=1, silence the following gcc warning drivers/mmc/core/block.c:2470:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] Signed-off-by: Mathieu Malaterre Reviewed-by: Shawn Lin Fixes: 97548575bef3 ("mmc: block: Convert RPMB to a character device") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Ulf Hansson Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a9ce192828b8..916b88ee2de4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2305,7 +2305,7 @@ static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd, break; } - return 0; + return ret; } #ifdef CONFIG_COMPAT From 887b0296a905f8d5cc090ca08d309918fc24bf24 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jan 2020 06:45:37 -0800 Subject: [PATCH 2192/3715] gtp: fix bad unlock balance in gtp_encap_enable_socket [ Upstream commit 90d72256addff9e5f8ad645e8f632750dd1f8935 ] WARNING: bad unlock balance detected! 5.5.0-rc5-syzkaller #0 Not tainted ------------------------------------- syz-executor921/9688 is trying to release lock (sk_lock-AF_INET6) at: [] gtp_encap_enable_socket+0x146/0x400 drivers/net/gtp.c:830 but there are no more locks to release! other info that might help us debug this: 2 locks held by syz-executor921/9688: #0: ffffffff8a4d8840 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:72 [inline] #0: ffffffff8a4d8840 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x405/0xaf0 net/core/rtnetlink.c:5421 #1: ffff88809304b560 (slock-AF_INET6){+...}, at: spin_lock_bh include/linux/spinlock.h:343 [inline] #1: ffff88809304b560 (slock-AF_INET6){+...}, at: release_sock+0x20/0x1c0 net/core/sock.c:2951 stack backtrace: CPU: 0 PID: 9688 Comm: syz-executor921 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_unlock_imbalance_bug kernel/locking/lockdep.c:4008 [inline] print_unlock_imbalance_bug.cold+0x114/0x123 kernel/locking/lockdep.c:3984 __lock_release kernel/locking/lockdep.c:4242 [inline] lock_release+0x5f2/0x960 kernel/locking/lockdep.c:4503 sock_release_ownership include/net/sock.h:1496 [inline] release_sock+0x17c/0x1c0 net/core/sock.c:2961 gtp_encap_enable_socket+0x146/0x400 drivers/net/gtp.c:830 gtp_encap_enable drivers/net/gtp.c:852 [inline] gtp_newlink+0x9fc/0xc60 drivers/net/gtp.c:666 __rtnl_newlink+0x109e/0x1790 net/core/rtnetlink.c:3305 rtnl_newlink+0x69/0xa0 net/core/rtnetlink.c:3363 rtnetlink_rcv_msg+0x45e/0xaf0 net/core/rtnetlink.c:5424 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x58c/0x7d0 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:659 ____sys_sendmsg+0x753/0x880 net/socket.c:2330 ___sys_sendmsg+0x100/0x170 net/socket.c:2384 __sys_sendmsg+0x105/0x1d0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg net/socket.c:2424 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2424 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x445d49 Code: e8 bc b7 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f8019074db8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000006dac38 RCX: 0000000000445d49 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00000000006dac30 R08: 0000000000000004 R09: 0000000000000000 R10: 0000000000000008 R11: 0000000000000246 R12: 00000000006dac3c R13: 00007ffea687f6bf R14: 00007f80190759c0 R15: 20c49ba5e353f7cf Fixes: e198987e7dd7 ("gtp: fix suspicious RCU usage") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 35905e9ee9ec..25be27826a22 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -816,7 +816,7 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, lock_sock(sock->sk); if (sock->sk->sk_user_data) { sk = ERR_PTR(-EBUSY); - goto out_sock; + goto out_rel_sock; } sk = sock->sk; @@ -829,8 +829,9 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg); -out_sock: +out_rel_sock: release_sock(sock->sk); +out_sock: sockfd_put(sock); return sk; } From 4a953272f2d2db63bba97137b64b3f1770634e00 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jan 2020 12:30:48 -0800 Subject: [PATCH 2193/3715] macvlan: do not assume mac_header is set in macvlan_broadcast() [ Upstream commit 96cc4b69581db68efc9749ef32e9cf8e0160c509 ] Use of eth_hdr() in tx path is error prone. Many drivers call skb_reset_mac_header() before using it, but others do not. Commit 6d1ccff62780 ("net: reset mac header in dev_start_xmit()") attempted to fix this generically, but commit d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") brought back the macvlan bug. Lets add a new helper, so that tx paths no longer have to call skb_reset_mac_header() only to get a pointer to skb->data. Hopefully we will be able to revert 6d1ccff62780 ("net: reset mac header in dev_start_xmit()") and save few cycles in transmit fast path. BUG: KASAN: use-after-free in __get_unaligned_cpu32 include/linux/unaligned/packed_struct.h:19 [inline] BUG: KASAN: use-after-free in mc_hash drivers/net/macvlan.c:251 [inline] BUG: KASAN: use-after-free in macvlan_broadcast+0x547/0x620 drivers/net/macvlan.c:277 Read of size 4 at addr ffff8880a4932401 by task syz-executor947/9579 CPU: 0 PID: 9579 Comm: syz-executor947 Not tainted 5.5.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 __asan_report_load_n_noabort+0xf/0x20 mm/kasan/generic_report.c:145 __get_unaligned_cpu32 include/linux/unaligned/packed_struct.h:19 [inline] mc_hash drivers/net/macvlan.c:251 [inline] macvlan_broadcast+0x547/0x620 drivers/net/macvlan.c:277 macvlan_queue_xmit drivers/net/macvlan.c:520 [inline] macvlan_start_xmit+0x402/0x77f drivers/net/macvlan.c:559 __netdev_start_xmit include/linux/netdevice.h:4447 [inline] netdev_start_xmit include/linux/netdevice.h:4461 [inline] dev_direct_xmit+0x419/0x630 net/core/dev.c:4079 packet_direct_xmit+0x1a9/0x250 net/packet/af_packet.c:240 packet_snd net/packet/af_packet.c:2966 [inline] packet_sendmsg+0x260d/0x6220 net/packet/af_packet.c:2991 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:659 __sys_sendto+0x262/0x380 net/socket.c:1985 __do_sys_sendto net/socket.c:1997 [inline] __se_sys_sendto net/socket.c:1993 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1993 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x442639 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc13549e08 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000442639 RDX: 000000000000000e RSI: 0000000020000080 RDI: 0000000000000003 RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000403bb0 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 9389: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:513 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527 __do_kmalloc mm/slab.c:3656 [inline] __kmalloc+0x163/0x770 mm/slab.c:3665 kmalloc include/linux/slab.h:561 [inline] tomoyo_realpath_from_path+0xc5/0x660 security/tomoyo/realpath.c:252 tomoyo_get_realpath security/tomoyo/file.c:151 [inline] tomoyo_path_perm+0x230/0x430 security/tomoyo/file.c:822 tomoyo_inode_getattr+0x1d/0x30 security/tomoyo/tomoyo.c:129 security_inode_getattr+0xf2/0x150 security/security.c:1222 vfs_getattr+0x25/0x70 fs/stat.c:115 vfs_statx_fd+0x71/0xc0 fs/stat.c:145 vfs_fstat include/linux/fs.h:3265 [inline] __do_sys_newfstat+0x9b/0x120 fs/stat.c:378 __se_sys_newfstat fs/stat.c:375 [inline] __x64_sys_newfstat+0x54/0x80 fs/stat.c:375 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9389: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:335 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 __cache_free mm/slab.c:3426 [inline] kfree+0x10a/0x2c0 mm/slab.c:3757 tomoyo_realpath_from_path+0x1a7/0x660 security/tomoyo/realpath.c:289 tomoyo_get_realpath security/tomoyo/file.c:151 [inline] tomoyo_path_perm+0x230/0x430 security/tomoyo/file.c:822 tomoyo_inode_getattr+0x1d/0x30 security/tomoyo/tomoyo.c:129 security_inode_getattr+0xf2/0x150 security/security.c:1222 vfs_getattr+0x25/0x70 fs/stat.c:115 vfs_statx_fd+0x71/0xc0 fs/stat.c:145 vfs_fstat include/linux/fs.h:3265 [inline] __do_sys_newfstat+0x9b/0x120 fs/stat.c:378 __se_sys_newfstat fs/stat.c:375 [inline] __x64_sys_newfstat+0x54/0x80 fs/stat.c:375 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a4932000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 1025 bytes inside of 4096-byte region [ffff8880a4932000, ffff8880a4933000) The buggy address belongs to the page: page:ffffea0002924c80 refcount:1 mapcount:0 mapping:ffff8880aa402000 index:0x0 compound_mapcount: 0 raw: 00fffe0000010200 ffffea0002846208 ffffea00028f3888 ffff8880aa402000 raw: 0000000000000000 ffff8880a4932000 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880a4932300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4932380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880a4932400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880a4932480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4932500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: b863ceb7ddce ("[NET]: Add macvlan driver") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 +- include/linux/if_ether.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8d5f88a538fc..2b977655834c 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -263,7 +263,7 @@ static void macvlan_broadcast(struct sk_buff *skb, struct net_device *src, enum macvlan_mode mode) { - const struct ethhdr *eth = eth_hdr(skb); + const struct ethhdr *eth = skb_eth_hdr(skb); const struct macvlan_dev *vlan; struct sk_buff *nskb; unsigned int i; diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 548fd535fd02..d433f5e292c9 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -28,6 +28,14 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_mac_header(skb); } +/* Prefer this version in TX path, instead of + * skb_reset_mac_header() + eth_hdr() + */ +static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb->data; +} + static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_inner_mac_header(skb); From 19716758430e63e0cf6097cdde2a72b6ac28dc75 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Jan 2020 23:14:51 +0100 Subject: [PATCH 2194/3715] net: dsa: mv88e6xxx: Preserve priority when setting CPU port. [ Upstream commit d8dc2c9676e614ef62f54a155b50076888c8a29a ] The 6390 family uses an extended register to set the port connected to the CPU. The lower 5 bits indicate the port, the upper three bits are the priority of the frames as they pass through the switch, what egress queue they should use, etc. Since frames being set to the CPU are typically management frames, BPDU, IGMP, ARP, etc set the priority to 7, the reset default, and the highest. Fixes: 33641994a676 ("net: dsa: mv88e6xxx: Monitor and Management tables") Signed-off-by: Andrew Lunn Tested-by: Chris Healy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/global1.c | 5 +++++ drivers/net/dsa/mv88e6xxx/global1.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index d76d7c7ea819..544b6a9cc01a 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -313,6 +313,11 @@ int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port) { u16 ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST; + /* Use the default high priority for management frames sent to + * the CPU. + */ + port |= MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI; + return mv88e6390_g1_monitor_write(chip, ptr, port); } diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 950b914f9251..d82e8956cbd5 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -189,6 +189,7 @@ #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST 0x2000 #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST 0x2100 #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST 0x3000 +#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI 0x00e0 #define MV88E6390_G1_MONITOR_MGMT_CTL_DATA_MASK 0x00ff /* Offset 0x1C: Global Control 2 */ From fee038c31896073d7b0b7c1b1183f02579f44a4e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 6 Jan 2020 11:09:45 +0800 Subject: [PATCH 2195/3715] net: stmmac: dwmac-sun8i: Allow all RGMII modes [ Upstream commit f1239d8aa84dad8fe4b6cc1356f40fc8e842db47 ] Allow all the RGMII modes to be used. This would allow us to represent the hardware better in the device tree with RGMII_ID where in most cases the PHY's internal delay for both RX and TX are used. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Signed-off-by: Chen-Yu Tsai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a62128a444a6..149fd0d5e069 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -724,6 +724,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) /* default */ break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: reg |= SYSCON_EPIT | SYSCON_ETCS_INT_GMII; break; case PHY_INTERFACE_MODE_RMII: From 8835b0eff11a61f5c5df0b3c4737cfdde28b1a41 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 6 Jan 2020 11:09:22 +0800 Subject: [PATCH 2196/3715] net: stmmac: dwmac-sunxi: Allow all RGMII modes [ Upstream commit 52cc73e5404c7ba0cbfc50cb4c265108c84b3d5a ] Allow all the RGMII modes to be used. This would allow us to represent the hardware better in the device tree with RGMII_ID where in most cases the PHY's internal delay for both RX and TX are used. Fixes: af0bd4e9ba80 ("net: stmmac: sunxi platform extensions for GMAC in Allwinner A20 SoC's") Signed-off-by: Chen-Yu Tsai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 62ccbd47c1db..fc1fa0f9f338 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -53,7 +53,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) * rate, which then uses the auto-reparenting feature of the * clock driver, and enabling/disabling the clock. */ - if (gmac->interface == PHY_INTERFACE_MODE_RGMII) { + if (phy_interface_mode_is_rgmii(gmac->interface)) { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_GMII_RGMII_RATE); clk_prepare_enable(gmac->tx_clk); gmac->clk_enabled = 1; From a45335027cece5e979c1bb1a603604b2f34f32f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Jan 2020 10:57:01 -0800 Subject: [PATCH 2197/3715] net: usb: lan78xx: fix possible skb leak [ Upstream commit 47240ba0cd09bb6fe6db9889582048324999dfa4 ] If skb_linearize() fails, we need to free the skb. TSO makes skb bigger, and this bug might be the reason Raspberry Pi 3B+ users had to disable TSO. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Eric Dumazet Reported-by: RENARD Pierre-Francois Cc: Stefan Wahren Cc: Woojung Huh Cc: Microchip Linux Driver Support Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0aa6f3a5612d..c23f35dba718 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2604,11 +2604,6 @@ static int lan78xx_stop(struct net_device *net) return 0; } -static int lan78xx_linearize(struct sk_buff *skb) -{ - return skb_linearize(skb); -} - static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, struct sk_buff *skb, gfp_t flags) { @@ -2619,8 +2614,10 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, return NULL; } - if (lan78xx_linearize(skb) < 0) + if (skb_linearize(skb)) { + dev_kfree_skb_any(skb); return NULL; + } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN_MASK_) | TX_CMD_A_FCS_; From 73a6f18d8390abc233212085ba4f06088f9fb075 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jan 2020 06:10:39 -0800 Subject: [PATCH 2198/3715] pkt_sched: fq: do not accept silly TCA_FQ_QUANTUM [ Upstream commit d9e15a2733067c9328fb56d98fe8e574fa19ec31 ] As diagnosed by Florian : If TCA_FQ_QUANTUM is set to 0x80000000, fq_deueue() can loop forever in : if (f->credit <= 0) { f->credit += q->quantum; goto begin; } ... because f->credit is either 0 or -2147483648. Let's limit TCA_FQ_QUANTUM to no more than 1 << 20 : This max value should limit risks of breaking user setups while fixing this bug. Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: Eric Dumazet Diagnosed-by: Florian Westphal Reported-by: syzbot+dc9071cc5a85950bdfce@syzkaller.appspotmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_fq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f50eb87cfe79..7a944f508cae 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -734,7 +734,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); - if (quantum > 0) + if (quantum > 0 && quantum <= (1 << 20)) q->quantum = quantum; else err = -EINVAL; From bb275c92aaa05ba8fdf6919950cede0c03f62253 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Dec 2019 17:10:16 +0100 Subject: [PATCH 2199/3715] USB: core: fix check for duplicate endpoints commit 3e4f8e21c4f27bcf30a48486b9dcc269512b79ff upstream. Amend the endpoint-descriptor sanity checks to detect all duplicate endpoint addresses in a configuration. Commit 0a8fd1346254 ("USB: fix problems with duplicate endpoint addresses") added a check for duplicate endpoint addresses within a single alternate setting, but did not look for duplicate addresses in other interfaces. The current check would also not detect all duplicate addresses when one endpoint is as a (bi-directional) control endpoint. This specifically avoids overwriting the endpoint entries in struct usb_device when enabling a duplicate endpoint, something which could potentially lead to crashes or leaks, for example, when endpoints are later disabled. Cc: stable Signed-off-by: Johan Hovold Acked-by: Alan Stern Link: https://lore.kernel.org/r/20191219161016.6695-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 70 ++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index cfb8f1126cf8..1f525d5f6d2d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -203,9 +203,58 @@ static const unsigned short super_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_INT] = 1024, }; -static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, - int asnum, struct usb_host_interface *ifp, int num_ep, - unsigned char *buffer, int size) +static bool endpoint_is_duplicate(struct usb_endpoint_descriptor *e1, + struct usb_endpoint_descriptor *e2) +{ + if (e1->bEndpointAddress == e2->bEndpointAddress) + return true; + + if (usb_endpoint_xfer_control(e1) || usb_endpoint_xfer_control(e2)) { + if (usb_endpoint_num(e1) == usb_endpoint_num(e2)) + return true; + } + + return false; +} + +/* + * Check for duplicate endpoint addresses in other interfaces and in the + * altsetting currently being parsed. + */ +static bool config_endpoint_is_duplicate(struct usb_host_config *config, + int inum, int asnum, struct usb_endpoint_descriptor *d) +{ + struct usb_endpoint_descriptor *epd; + struct usb_interface_cache *intfc; + struct usb_host_interface *alt; + int i, j, k; + + for (i = 0; i < config->desc.bNumInterfaces; ++i) { + intfc = config->intf_cache[i]; + + for (j = 0; j < intfc->num_altsetting; ++j) { + alt = &intfc->altsetting[j]; + + if (alt->desc.bInterfaceNumber == inum && + alt->desc.bAlternateSetting != asnum) + continue; + + for (k = 0; k < alt->desc.bNumEndpoints; ++k) { + epd = &alt->endpoint[k].desc; + + if (endpoint_is_duplicate(epd, d)) + return true; + } + } + } + + return false; +} + +static int usb_parse_endpoint(struct device *ddev, int cfgno, + struct usb_host_config *config, int inum, int asnum, + struct usb_host_interface *ifp, int num_ep, + unsigned char *buffer, int size) { unsigned char *buffer0 = buffer; struct usb_endpoint_descriptor *d; @@ -242,13 +291,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, goto skip_to_next_endpoint_or_interface_descriptor; /* Check for duplicate endpoint addresses */ - for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { - if (ifp->endpoint[i].desc.bEndpointAddress == - d->bEndpointAddress) { - dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", - cfgno, inum, asnum, d->bEndpointAddress); - goto skip_to_next_endpoint_or_interface_descriptor; - } + if (config_endpoint_is_duplicate(config, inum, asnum, d)) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; } endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; @@ -522,8 +568,8 @@ static int usb_parse_interface(struct device *ddev, int cfgno, if (((struct usb_descriptor_header *) buffer)->bDescriptorType == USB_DT_INTERFACE) break; - retval = usb_parse_endpoint(ddev, cfgno, inum, asnum, alt, - num_ep, buffer, size); + retval = usb_parse_endpoint(ddev, cfgno, config, inum, asnum, + alt, num_ep, buffer, size); if (retval < 0) return retval; ++n; From 7eff1139e85c78b0c96afe587792cabf19d0fb2a Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 13 Dec 2019 14:56:15 +0100 Subject: [PATCH 2200/3715] USB: serial: option: add Telit ME910G1 0x110a composition commit 0d3010fa442429f8780976758719af05592ff19f upstream. This patch adds the following Telit ME910G1 composition: 0x110a: tty, tty, tty, rmnet Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8d349f2e5656..dc9a1139e7e1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1175,6 +1175,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff), /* Telit ME910 (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ + .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), From 2e2d29bacd3f70b13a3abfc7b7033aacdb4c2aee Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 4 Jan 2020 14:15:02 +0800 Subject: [PATCH 2201/3715] sctp: free cmd->obj.chunk for the unprocessed SCTP_CMD_REPLY [ Upstream commit be7a7729207797476b6666f046d765bdf9630407 ] This patch is to fix a memleak caused by no place to free cmd->obj.chunk for the unprocessed SCTP_CMD_REPLY. This issue occurs when failing to process a cmd while there're still SCTP_CMD_REPLY cmds on the cmd seq with an allocated chunk in cmd->obj.chunk. So fix it by freeing cmd->obj.chunk for each SCTP_CMD_REPLY cmd left on the cmd seq when any cmd returns error. While at it, also remove 'nomem' label. Reported-by: syzbot+107c4aff5f392bf1517f@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bf0c61adb09c..482bb0a5d4d3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1359,8 +1359,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate an INIT ACK chunk. */ new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC, 0); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1382,7 +1384,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, if (!new_obj) { if (cmd->obj.chunk) sctp_chunk_free(cmd->obj.chunk); - goto nomem; + error = -ENOMEM; + break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1429,8 +1432,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate a SHUTDOWN chunk. */ new_obj = sctp_make_shutdown(asoc, chunk); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; @@ -1760,11 +1765,17 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; } - if (error) + if (error) { + cmd = sctp_next_cmd(commands); + while (cmd) { + if (cmd->verb == SCTP_CMD_REPLY) + sctp_chunk_free(cmd->obj.chunk); + cmd = sctp_next_cmd(commands); + } break; + } } -out: /* If this is in response to a received chunk, wait until * we are done with the packet to open the queue so that we don't * send multiple packets in response to a single request. @@ -1779,8 +1790,5 @@ out: sp->data_ready_signalled = 0; return error; -nomem: - error = -ENOMEM; - goto out; } From ab31605bdb67483047f3cc48f49b0ad63ec20465 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Mon, 30 Dec 2019 17:54:41 +0800 Subject: [PATCH 2202/3715] tcp: fix "old stuff" D-SACK causing SACK to be treated as D-SACK [ Upstream commit c9655008e7845bcfdaac10a1ed8554ec167aea88 ] When we receive a D-SACK, where the sequence number satisfies: undo_marker <= start_seq < end_seq <= prior_snd_una we consider this is a valid D-SACK and tcp_is_sackblock_valid() returns true, then this D-SACK is discarded as "old stuff", but the variable first_sack_index is not marked as negative in tcp_sacktag_write_queue(). If this D-SACK also carries a SACK that needs to be processed (for example, the previous SACK segment was lost), this SACK will be treated as a D-SACK in the following processing of tcp_sacktag_write_queue(), which will eventually lead to incorrect updates of undo_retrans and reordering. Fixes: fd6dad616d4f ("[TCP]: Earlier SACK block verification & simplify access to them") Signed-off-by: Pengcheng Yang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 55253ba0681f..d2b1c39c4223 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1750,8 +1750,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } /* Ignore very old stuff early */ - if (!after(sp[used_sacks].end_seq, prior_snd_una)) + if (!after(sp[used_sacks].end_seq, prior_snd_una)) { + if (i == 0) + first_sack_index = -1; continue; + } used_sacks++; } From 36821b48f5203d5490349e514c2774ff9784bebc Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 2 Jan 2020 17:23:45 +0800 Subject: [PATCH 2203/3715] vxlan: fix tos value before xmit [ Upstream commit 71130f29979c7c7956b040673e6b9d5643003176 ] Before ip_tunnel_ecn_encap() and udp_tunnel_xmit_skb() we should filter tos value by RT_TOS() instead of using config tos directly. vxlan_get_route() would filter the tos to fl4.flowi4_tos but we didn't return it back, as geneve_get_v4_rt() did. So we have to use RT_TOS() directly in function ip_tunnel_ecn_encap(). Fixes: 206aaafcd279 ("VXLAN: Use IP Tunnels tunnel ENC encap API") Fixes: 1400615d64cf ("vxlan: allow setting ipv6 traffic class") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 153a81ece9fe..5aa7d5091f4d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2216,7 +2216,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu(skb, mtu); } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); @@ -2257,7 +2257,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu(skb, mtu); } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), From 841d685ca1154db46d409883a48d15409819a838 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Jan 2020 01:42:25 -0800 Subject: [PATCH 2204/3715] vlan: vlan_changelink() should propagate errors [ Upstream commit eb8ef2a3c50092bb018077c047b8dba1ce0e78e3 ] Both vlan_dev_change_flags() and vlan_dev_set_egress_priority() can return an error. vlan_changelink() should not ignore them. Fixes: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_netlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..ef7e18a27719 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -95,11 +95,13 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; - int rem; + int rem, err; if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan_dev_change_flags(dev, flags->flags, flags->mask); + err = vlan_dev_change_flags(dev, flags->flags, flags->mask); + if (err) + return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { @@ -110,7 +112,9 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); - vlan_dev_set_egress_priority(dev, m->from, m->to); + err = vlan_dev_set_egress_priority(dev, m->from, m->to); + if (err) + return err; } } return 0; From a56f6034450677945452a72800dc84354959026e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 6 Jan 2020 18:01:56 +0000 Subject: [PATCH 2205/3715] net: sch_prio: When ungrafting, replace with FIFO [ Upstream commit 240ce7f6428ff5188b9eedc066e1e4d645b8635f ] When a child Qdisc is removed from one of the PRIO Qdisc's bands, it is replaced unconditionally by a NOOP qdisc. As a result, any traffic hitting that band gets dropped. That is incorrect--no Qdisc was explicitly added when PRIO was created, and after removal, none should have to be added either. Fix PRIO by first attempting to create a default Qdisc and only falling back to noop when that fails. This pattern of attempting to create an invisible FIFO, using NOOP only as a fallback, is also seen in other Qdiscs. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_prio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index c60777351de1..ff6bc7cf6cbd 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -244,8 +244,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (new == NULL) - new = &noop_qdisc; + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, arg)); + if (!new) + new = &noop_qdisc; + else + qdisc_hash_add(new, true); + } *old = qdisc_replace(sch, new, &q->queues[band]); return 0; From 9df7257626785ede4905f8813adc78ba740d3f72 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Jan 2020 01:42:24 -0800 Subject: [PATCH 2206/3715] vlan: fix memory leak in vlan_dev_set_egress_priority [ Upstream commit 9bbd917e0bec9aebdbd0c8dbc966caec15eb33e9 ] There are few cases where the ndo_uninit() handler might be not called if an error happens while device is initialized. Since vlan_newlink() calls vlan_changelink() before trying to register the netdevice, we need to make sure vlan_dev_uninit() has been called at least once, or we might leak allocated memory. BUG: memory leak unreferenced object 0xffff888122a206c0 (size 32): comm "syz-executor511", pid 7124, jiffies 4294950399 (age 32.240s) hex dump (first 32 bytes): 00 00 00 00 00 00 61 73 00 00 00 00 00 00 00 00 ......as........ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000000eb3bb85>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000000eb3bb85>] slab_post_alloc_hook mm/slab.h:586 [inline] [<000000000eb3bb85>] slab_alloc mm/slab.c:3320 [inline] [<000000000eb3bb85>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3549 [<000000007b99f620>] kmalloc include/linux/slab.h:556 [inline] [<000000007b99f620>] vlan_dev_set_egress_priority+0xcc/0x150 net/8021q/vlan_dev.c:194 [<000000007b0cb745>] vlan_changelink+0xd6/0x140 net/8021q/vlan_netlink.c:126 [<0000000065aba83a>] vlan_newlink+0x135/0x200 net/8021q/vlan_netlink.c:181 [<00000000fb5dd7a2>] __rtnl_newlink+0x89a/0xb80 net/core/rtnetlink.c:3305 [<00000000ae4273a1>] rtnl_newlink+0x4e/0x80 net/core/rtnetlink.c:3363 [<00000000decab39f>] rtnetlink_rcv_msg+0x178/0x4b0 net/core/rtnetlink.c:5424 [<00000000accba4ee>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<00000000319fe20f>] rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442 [<00000000d51938dc>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000d51938dc>] netlink_unicast+0x223/0x310 net/netlink/af_netlink.c:1328 [<00000000e539ac79>] netlink_sendmsg+0x2c0/0x570 net/netlink/af_netlink.c:1917 [<000000006250c27e>] sock_sendmsg_nosec net/socket.c:639 [inline] [<000000006250c27e>] sock_sendmsg+0x54/0x70 net/socket.c:659 [<00000000e2a156d1>] ____sys_sendmsg+0x2d0/0x300 net/socket.c:2330 [<000000008c87466e>] ___sys_sendmsg+0x8a/0xd0 net/socket.c:2384 [<00000000110e3054>] __sys_sendmsg+0x80/0xf0 net/socket.c:2417 [<00000000d71077c8>] __do_sys_sendmsg net/socket.c:2426 [inline] [<00000000d71077c8>] __se_sys_sendmsg net/socket.c:2424 [inline] [<00000000d71077c8>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2424 Fixe: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.h | 1 + net/8021q/vlan_dev.c | 3 ++- net/8021q/vlan_netlink.c | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 0e7afdf86127..235bed825e3a 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -110,6 +110,7 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ac4c93c999b0..ed3717dc2d20 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -610,7 +610,8 @@ static int vlan_dev_init(struct net_device *dev) return 0; } -static void vlan_dev_uninit(struct net_device *dev) +/* Note: this function might be called multiple times for the same device. */ +void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index ef7e18a27719..fdf39dd5e755 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -161,10 +161,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); - if (err < 0) - return err; - - return register_vlan_dev(dev); + if (!err) + err = register_vlan_dev(dev); + if (err) + vlan_dev_uninit(dev); + return err; } static inline size_t vlan_qos_map_size(unsigned int n) From 6d0c334a400db31751c787c411e7187ab59a3f1d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 12 Jan 2020 12:12:09 +0100 Subject: [PATCH 2207/3715] Linux 4.14.164 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 35a71a78d1d2..f2aa55cea457 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 163 +SUBLEVEL = 164 EXTRAVERSION = NAME = Petit Gorille From ac137d538b497aca88389fa5f45fec3c359a5dba Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 17 Nov 2017 15:30:42 -0800 Subject: [PATCH 2208/3715] UPSTREAM: kcov: remove pointless current != NULL check (Upstream commit fcf4edac049a8bca41658970292e2dfdbc9d5f62.) __sanitizer_cov_trace_pc() is a hot code, so it's worth to remove pointless '!current' check. Current is never NULL. Link: http://lkml.kernel.org/r/20170929162221.32500-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Dmitry Vyukov Acked-by: Mark Rutland Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I2c07ed1a805ff2e9e569adc5aa59386e47f5bb23 --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index f1e060b04ef6..dd12f62cfaae 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -62,7 +62,7 @@ void notrace __sanitizer_cov_trace_pc(void) * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. */ - if (!t || !in_task()) + if (!in_task()) return; mode = READ_ONCE(t->kcov_mode); if (mode == KCOV_MODE_TRACE) { From fccb16d96f3ee4055a8a023edfbb1e7f00bc6c75 Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:46 -0800 Subject: [PATCH 2209/3715] BACKPORT: kcov: support comparison operands collection (Upstream commit ded97d2c2b2c5f1dcced0bc57133f7753b037dfc.) Enables kcov to collect comparison operands from instrumented code. This is done by using Clang's -fsanitize=trace-cmp instrumentation (currently not available for GCC). The comparison operands help a lot in fuzz testing. E.g. they are used in Syzkaller to cover the interiors of conditional statements with way less attempts and thus make previously unreachable code reachable. To allow separate collection of coverage and comparison operands two different work modes are implemented. Mode selection is now done via a KCOV_ENABLE ioctl call with corresponding argument value. Link: http://lkml.kernel.org/r/20171011095459.70721-1-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I227775c812f342423102cd28fd68b235579c60d3 Signed-off-by: Andrey Konovalov Bug: 147413187 --- include/linux/kcov.h | 12 ++- include/uapi/linux/kcov.h | 24 +++++ kernel/kcov.c | 218 +++++++++++++++++++++++++++++++------- 3 files changed, 213 insertions(+), 41 deletions(-) diff --git a/include/linux/kcov.h b/include/linux/kcov.h index f5d8ce4f4f86..3ecf6f5e3a5f 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -8,19 +8,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 33eabbb8ada1..9529867717a8 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -8,4 +8,28 @@ #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +enum { + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + * In new KCOV version the mode is chosen by calling + * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument + * was supposed to be 0 in such a call. So, for reasons of backward + * compatibility, we have chosen the value KCOV_TRACE_PC to be 0. + */ + KCOV_TRACE_PC = 0, + /* Collecting comparison operands mode. */ + KCOV_TRACE_CMP = 1, +}; + +/* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ +#define KCOV_CMP_CONST (1 << 0) +#define KCOV_CMP_SIZE(n) ((n) << 1) +#define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index dd12f62cfaae..05e3f9f0f096 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -22,13 +22,21 @@ #include #include +/* Number of 64-bit words written per one comparison: */ +#define KCOV_WORDS_PER_CMP 4 + /* * kcov descriptor (one per opened debugfs file). * State transitions of the descriptor: * - initial state after open() * - then there must be a single ioctl(KCOV_INIT_TRACE) call * - then, mmap() call (several calls are allowed but not useful) - * - then, repeated enable/disable for a task (only one task a time allowed) + * - then, ioctl(KCOV_ENABLE, arg), where arg is + * KCOV_TRACE_PC - to trace only the PCs + * or + * KCOV_TRACE_CMP - to trace only the comparison operands + * - then, ioctl(KCOV_DISABLE) to disable the task. + * Enabling/disabling ioctls can be repeated (only one task a time allowed). */ struct kcov { /* @@ -48,6 +56,36 @@ struct kcov { struct task_struct *t; }; +static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) +{ + enum kcov_mode mode; + + /* + * We are interested in code coverage as a function of a syscall inputs, + * so we ignore code executed in interrupts. + */ + if (!in_task()) + return false; + mode = READ_ONCE(t->kcov_mode); + /* + * There is some code that runs in interrupts but for which + * in_interrupt() returns false (e.g. preempt_schedule_irq()). + * READ_ONCE()/barrier() effectively provides load-acquire wrt + * interrupts, there are paired barrier()/WRITE_ONCE() in + * kcov_ioctl_locked(). + */ + barrier(); + return mode == needed_mode; +} + +static unsigned long canonicalize_ip(unsigned long ip) +{ +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif + return ip; +} + /* * Entry point from instrumented code. * This is called once per basic-block/edge. @@ -55,44 +93,139 @@ struct kcov { void notrace __sanitizer_cov_trace_pc(void) { struct task_struct *t; - enum kcov_mode mode; + unsigned long *area; + unsigned long ip = canonicalize_ip(_RET_IP_); + unsigned long pos; t = current; - /* - * We are interested in code coverage as a function of a syscall inputs, - * so we ignore code executed in interrupts. - */ - if (!in_task()) + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) return; - mode = READ_ONCE(t->kcov_mode); - if (mode == KCOV_MODE_TRACE) { - unsigned long *area; - unsigned long pos; - unsigned long ip = _RET_IP_; -#ifdef CONFIG_RANDOMIZE_BASE - ip -= kaslr_offset(); -#endif - - /* - * There is some code that runs in interrupts but for which - * in_interrupt() returns false (e.g. preempt_schedule_irq()). - * READ_ONCE()/barrier() effectively provides load-acquire wrt - * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). - */ - barrier(); - area = t->kcov_area; - /* The first word is number of subsequent PCs. */ - pos = READ_ONCE(area[0]) + 1; - if (likely(pos < t->kcov_size)) { - area[pos] = ip; - WRITE_ONCE(area[0], pos); - } + area = t->kcov_area; + /* The first 64-bit word is the number of subsequent PCs. */ + pos = READ_ONCE(area[0]) + 1; + if (likely(pos < t->kcov_size)) { + area[pos] = ip; + WRITE_ONCE(area[0], pos); } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS +static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) +{ + struct task_struct *t; + u64 *area; + u64 count, start_index, end_pos, max_pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t)) + return; + + ip = canonicalize_ip(ip); + + /* + * We write all comparison arguments and types as u64. + * The buffer was allocated for t->kcov_size unsigned longs. + */ + area = (u64 *)t->kcov_area; + max_pos = t->kcov_size * sizeof(unsigned long); + + count = READ_ONCE(area[0]); + + /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */ + start_index = 1 + count * KCOV_WORDS_PER_CMP; + end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); + if (likely(end_pos <= max_pos)) { + area[start_index] = type; + area[start_index + 1] = arg1; + area[start_index + 2] = arg2; + area[start_index + 3] = ip; + WRITE_ONCE(area[0], count + 1); + } +} + +void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1); + +void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); + +void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); + +void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8); + +void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1); + +void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); + +void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); + +void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); + +void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +{ + u64 i; + u64 count = cases[0]; + u64 size = cases[1]; + u64 type = KCOV_CMP_CONST; + + switch (size) { + case 8: + type |= KCOV_CMP_SIZE(0); + break; + case 16: + type |= KCOV_CMP_SIZE(1); + break; + case 32: + type |= KCOV_CMP_SIZE(2); + break; + case 64: + type |= KCOV_CMP_SIZE(3); + break; + default: + return; + } + for (i = 0; i < count; i++) + write_comp_data(type, cases[i + 2], val, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_switch); +#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + static void kcov_get(struct kcov *kcov) { atomic_inc(&kcov->refcount); @@ -130,6 +263,7 @@ void kcov_task_exit(struct task_struct *t) /* Just to not leave dangling references behind. */ kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -148,7 +282,7 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) spin_lock(&kcov->lock); size = kcov->size * sizeof(unsigned long); - if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 || + if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; @@ -177,6 +311,7 @@ static int kcov_open(struct inode *inode, struct file *filep) kcov = kzalloc(sizeof(*kcov), GFP_KERNEL); if (!kcov) return -ENOMEM; + kcov->mode = KCOV_MODE_DISABLED; atomic_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -212,7 +347,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; kcov->size = size; - kcov->mode = KCOV_MODE_TRACE; + kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: /* @@ -222,17 +357,25 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, * at task exit or voluntary by KCOV_DISABLE. After that it can * be enabled for another task. */ - unused = arg; - if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || - kcov->area == NULL) + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; + if (arg == KCOV_TRACE_PC) + kcov->mode = KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + kcov->mode = KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; - /* See comment in __sanitizer_cov_trace_pc(). */ + /* See comment in check_kcov_mode(). */ barrier(); WRITE_ONCE(t->kcov_mode, kcov->mode); t->kcov = kcov; @@ -250,6 +393,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; kcov_put(kcov); return 0; default: From 3283a3b899f3d54c89a700249d177ede43fae2d5 Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:50 -0800 Subject: [PATCH 2210/3715] BACKPORT: Makefile: support flag -fsanitizer-coverage=trace-cmp (Upstream commit d677a4d6019385488e794cc47bd3d6f9c2aab874.) The flag enables Clang instrumentation of comparison operations (currently not supported by GCC). This instrumentation is needed by the new KCOV device to collect comparison operands. Link: http://lkml.kernel.org/r/20171011095459.70721-2-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: Id294d9cc7619fb351a8929d11d45e532f86d2c36 Signed-off-by: Andrey Konovalov Bug: 147413187 --- Makefile | 4 ++-- lib/Kconfig.debug | 10 ++++++++++ scripts/Makefile.kcov | 7 +++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 scripts/Makefile.kcov diff --git a/Makefile b/Makefile index a4ec8f733611..e121b0021c92 100644 --- a/Makefile +++ b/Makefile @@ -650,8 +650,7 @@ KBUILD_AFLAGS += $(call cc-option,-fno-PIE) CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \ $(call cc-option,-fno-tree-loop-im) \ $(call cc-disable-warning,maybe-uninitialized,) -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) -export CFLAGS_GCOV CFLAGS_KCOV +export CFLAGS_GCOV # Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure # ar/cc/ld-* macros return correct values. @@ -706,6 +705,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif +include scripts/Makefile.kcov include scripts/Makefile.gcc-plugins ifdef CONFIG_READABLE_ASM diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 81a10bd78d8d..5bc743f4d402 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -756,6 +756,16 @@ config KCOV For more details, see Documentation/dev-tools/kcov.rst. +config KCOV_ENABLE_COMPARISONS + bool "Enable comparison operands collection by KCOV" + depends on KCOV + default n + help + KCOV also exposes operands of every comparison in the instrumented + code along with operand sizes and PCs of the comparison instructions. + These operands can be used by fuzzing engines to improve the quality + of fuzzing coverage. + config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov new file mode 100644 index 000000000000..5cc72037e423 --- /dev/null +++ b/scripts/Makefile.kcov @@ -0,0 +1,7 @@ +ifdef CONFIG_KCOV +CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) +ifeq ($(CONFIG_KCOV_ENABLE_COMPARISONS),y) +CFLAGS_KCOV += $(call cc-option,-fsanitize-coverage=trace-cmp,) +endif + +endif From 6e61cc02a75888d648e8a7221b70a19163d95eed Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:53 -0800 Subject: [PATCH 2211/3715] UPSTREAM: kcov: update documentation (Upstream commit c512ac01d8a841033da8ec538a83f80fb0b4d1fe.) The updated documentation describes new KCOV mode for collecting comparison operands. Link: http://lkml.kernel.org/r/20171011095459.70721-3-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I5e50446e3c908962de518bc5131a864b0e95b082 --- Documentation/dev-tools/kcov.rst | 99 ++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 44886c91e112..c2f6452e38ed 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -12,19 +12,30 @@ To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is disabled (e.g. scheduler, locking). -Usage ------ +kcov is also able to collect comparison operands from the instrumented code +(this feature currently requires that the kernel is compiled with clang). + +Prerequisites +------------- Configure the kernel with:: CONFIG_KCOV=y CONFIG_KCOV requires gcc built on revision 231296 or later. + +If the comparison operands need to be collected, set:: + + CONFIG_KCOV_ENABLE_COMPARISONS=y + Profiling data will only become accessible once debugfs has been mounted:: mount -t debugfs none /sys/kernel/debug -The following program demonstrates kcov usage from within a test program: +Coverage collection +------------------- +The following program demonstrates coverage collection from within a test +program using kcov: .. code-block:: c @@ -44,6 +55,9 @@ The following program demonstrates kcov usage from within a test program: #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (64<<10) + #define KCOV_TRACE_PC 0 + #define KCOV_TRACE_CMP 1 + int main(int argc, char **argv) { int fd; @@ -64,7 +78,7 @@ The following program demonstrates kcov usage from within a test program: if ((void*)cover == MAP_FAILED) perror("mmap"), exit(1); /* Enable coverage collection on the current thread. */ - if (ioctl(fd, KCOV_ENABLE, 0)) + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC)) perror("ioctl"), exit(1); /* Reset coverage from the tail of the ioctl() call. */ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); @@ -111,3 +125,80 @@ The interface is fine-grained to allow efficient forking of test processes. That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode, mmaps coverage buffer and then forks child processes in a loop. Child processes only need to enable coverage (disable happens automatically on thread end). + +Comparison operands collection +------------------------------ +Comparison operands collection is similar to coverage collection: + +.. code-block:: c + + /* Same includes and defines as above. */ + + /* Number of 64-bit words per record. */ + #define KCOV_WORDS_PER_CMP 4 + + /* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ + + #define KCOV_CMP_CONST (1 << 0) + #define KCOV_CMP_SIZE(n) ((n) << 1) + #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + + int main(int argc, char **argv) + { + int fd; + uint64_t *cover, type, arg1, arg2, is_const, size; + unsigned long n, i; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + /* + * Note that the buffer pointer is of type uint64_t*, because all + * the comparison operands are promoted to uint64_t. + */ + cover = (uint64_t *)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + /* Note KCOV_TRACE_CMP instead of KCOV_TRACE_PC. */ + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_CMP)) + perror("ioctl"), exit(1); + __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); + read(-1, NULL, 0); + /* Read number of comparisons collected. */ + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) { + type = cover[i * KCOV_WORDS_PER_CMP + 1]; + /* arg1 and arg2 - operands of the comparison. */ + arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; + arg2 = cover[i * KCOV_WORDS_PER_CMP + 3]; + /* ip - caller address. */ + ip = cover[i * KCOV_WORDS_PER_CMP + 4]; + /* size of the operands. */ + size = 1 << ((type & KCOV_CMP_MASK) >> 1); + /* is_const - true if either operand is a compile-time constant.*/ + is_const = type & KCOV_CMP_CONST; + printf("ip: 0x%lx type: 0x%lx, arg1: 0x%lx, arg2: 0x%lx, " + "size: %lu, %s\n", + ip, type, arg1, arg2, size, + is_const ? "const" : "non-const"); + } + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + /* Free resources. */ + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } + +Note that the kcov modes (coverage collection or comparison operands) are +mutually exclusive. From 13ffffd3bab5b9c0cd52b0d0f4a2a3120954f131 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 14 Dec 2017 15:33:02 -0800 Subject: [PATCH 2212/3715] UPSTREAM: kcov: fix comparison callback signature (Upstream commit 689d77f001cd22da31cc943170e1f6f2e8197035.) Fix a silly copy-paste bug. We truncated u32 args to u16. Link: http://lkml.kernel.org/r/20171207101134.107168-1-dvyukov@google.com Fixes: ded97d2c2b2c ("kcov: support comparison operands collection") Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: Alexander Potapenko Cc: Vegard Nossum Cc: Quentin Casasnovas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: If15ae4d86c445e908c355975ccf5cf53e296b27d --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 05e3f9f0f096..5be9a60a959f 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); -void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); } @@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); -void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, _RET_IP_); From 0f0fe8e7a521002f7c3b34a5907035e402db1840 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 13 Apr 2018 14:06:10 +0900 Subject: [PATCH 2213/3715] UPSTREAM: gcc-plugins: fix build condition of SANCOV plugin (Upstream commit 642ef99be932c4071274b28eaf3d3d85bbb6e78c.) Since commit d677a4d60193 ("Makefile: support flag -fsanitizer-coverage=trace-cmp"), you miss to build the SANCOV plugin under some circumstances. CONFIG_KCOV=y CONFIG_KCOV_ENABLE_COMPARISONS=y Your compiler does not support -fsanitize-coverage=trace-pc Your compiler does not support -fsanitize-coverage=trace-cmp Under this condition, $(CFLAGS_KCOV) is not empty but contains a space, so the following ifeq-conditional is false. ifeq ($(CFLAGS_KCOV),) Then, scripts/Makefile.gcc-plugins misses to add sancov_plugin.so to gcc-plugin-y while the SANCOV plugin is necessary as an alternative means. Fixes: d677a4d60193 ("Makefile: support flag -fsanitizer-coverage=trace-cmp") Signed-off-by: Masahiro Yamada Acked-by: Kees Cook Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: Ifa747836a53f74563fcff44e9d95948e9589b552 --- scripts/Makefile.gcc-plugins | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index b2a95af7df18..7f5c86246138 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -14,7 +14,7 @@ ifdef CONFIG_GCC_PLUGINS endif ifdef CONFIG_GCC_PLUGIN_SANCOV - ifeq ($(CFLAGS_KCOV),) + ifeq ($(strip $(CFLAGS_KCOV)),) # It is needed because of the gcc-plugin.sh and gcc version checks. gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV) += sancov_plugin.so From c093b585dac3962f3a54772bc8ef2031bd1f1990 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 28 May 2018 18:22:04 +0900 Subject: [PATCH 2214/3715] BACKPORT: kcov: test compiler capability in Kconfig and correct dependency Work around missing cc-option support in Kconfig by checking required compiler flags in Makefile. (Upstream commit 5aadfdeb8de001ca04d500586e3b033404c28617.) As Documentation/kbuild/kconfig-language.txt notes, 'select' should be be used with care - it forces a lower limit of another symbol, ignoring the dependency. Currently, KCOV can select GCC_PLUGINS even if arch does not select HAVE_GCC_PLUGINS. This could cause the unmet direct dependency. Now that Kconfig can test compiler capability, let's handle this in a more sophisticated way. There are two ways to enable KCOV; use the compiler that natively supports -fsanitize-coverage=trace-pc, or build the SANCOV plugin if the compiler has ability to build GCC plugins. Hence, the correct dependency for KCOV is: depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS You do not need to build the SANCOV plugin if the compiler already supports -fsanitize-coverage=trace-pc. Hence, the select should be: select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC With this, GCC_PLUGIN_SANCOV is selected only when necessary, so scripts/Makefile.gcc-plugins can be cleaner. I also cleaned up Kconfig and scripts/Makefile.kcov as well. Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook Change-Id: Iad9110eb7b6ecef6dfcec38cf483699c1b85af01 Signed-off-by: Andrey Konovalov Bug: 147413187 --- lib/Kconfig.debug | 14 ++++++++++---- scripts/Makefile.gcc-plugins | 8 ++------ scripts/Makefile.kcov | 24 +++++++++++++++++++++--- scripts/gcc-plugins/Makefile | 4 ---- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5bc743f4d402..ec057b40021b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -740,12 +740,17 @@ config ARCH_HAS_KCOV only for x86_64. KCOV requires testing on other archs, and most likely disabling of instrumentation for some early boot code. +# Upstream uses $(cc-option, -fsanitize-coverage=trace-pc), which requires +# cc-option support. Here we instead check CC in scripts/Makefile.kcov. +config CC_HAS_SANCOV_TRACE_PC + def_bool ARCH_HAS_KCOV + config KCOV bool "Code coverage for fuzzing" depends on ARCH_HAS_KCOV + depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS - select GCC_PLUGINS if !COMPILE_TEST - select GCC_PLUGIN_SANCOV if !COMPILE_TEST + select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). @@ -756,10 +761,11 @@ config KCOV For more details, see Documentation/dev-tools/kcov.rst. +# Upstream uses $(cc-option, -fsanitize-coverage=trace-cmp), which requires +# cc-option support. Here we instead check CC in scripts/Makefile.kcov. config KCOV_ENABLE_COMPARISONS bool "Enable comparison operands collection by KCOV" depends on KCOV - default n help KCOV also exposes operands of every comparison in the instrumented code along with operand sizes and PCs of the comparison instructions. @@ -769,7 +775,7 @@ config KCOV_ENABLE_COMPARISONS config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV - default y if KCOV + default y help If you are doing generic system call fuzzing (like e.g. syzkaller), then you will want to instrument the whole kernel and you should diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 7f5c86246138..708c8f6a5717 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -14,16 +14,12 @@ ifdef CONFIG_GCC_PLUGINS endif ifdef CONFIG_GCC_PLUGIN_SANCOV - ifeq ($(strip $(CFLAGS_KCOV)),) # It is needed because of the gcc-plugin.sh and gcc version checks. gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV) += sancov_plugin.so - ifneq ($(PLUGINCC),) - CFLAGS_KCOV := $(SANCOV_PLUGIN) - else + ifeq ($(PLUGINCC),) $(warning warning: cannot use CONFIG_KCOV: -fsanitize-coverage=trace-pc is not supported by compiler) endif - endif endif gcc-plugin-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) += structleak_plugin.so @@ -38,7 +34,7 @@ ifdef CONFIG_GCC_PLUGINS GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y)) export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR - export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN + export DISABLE_LATENT_ENTROPY_PLUGIN ifneq ($(PLUGINCC),) # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication. diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov index 5cc72037e423..945724d226e8 100644 --- a/scripts/Makefile.kcov +++ b/scripts/Makefile.kcov @@ -1,7 +1,25 @@ ifdef CONFIG_KCOV -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) -ifeq ($(CONFIG_KCOV_ENABLE_COMPARISONS),y) -CFLAGS_KCOV += $(call cc-option,-fsanitize-coverage=trace-cmp,) + +ifeq ($(call cc-option, -fsanitize-coverage=trace-pc -Werror),) + ifneq ($(CONFIG_COMPILE_TEST),y) + $(error Cannot use CONFIG_KCOV: \ + -fsanitize-coverage=trace-pc is not supported by compiler) + endif endif +ifdef CONFIG_KCOV_ENABLE_COMPARISONS + ifeq ($(call cc-option, -fsanitize-coverage=trace-cmp -Werror),) + ifneq ($(CONFIG_COMPILE_TEST),y) + $(error Cannot use CONFIG_KCOV_ENABLE_COMPARISONS: \ + -fsanitize-coverage=trace-cmp is not supported by compiler) + endif + endif +endif + +kcov-flags-$(CONFIG_CC_HAS_SANCOV_TRACE_PC) += -fsanitize-coverage=trace-pc +kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp +kcov-flags-$(CONFIG_GCC_PLUGIN_SANCOV) += -fplugin=$(objtree)/scripts/gcc-plugins/sancov_plugin.so + +export CFLAGS_KCOV := $(kcov-flags-y) + endif diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index e2ff425f4c7e..ea465799ced5 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -13,10 +13,6 @@ else export HOST_EXTRACXXFLAGS endif -ifneq ($(CFLAGS_KCOV), $(SANCOV_PLUGIN)) - GCC_PLUGIN := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGIN)) -endif - export HOSTLIBS $(obj)/randomize_layout_plugin.o: $(objtree)/$(obj)/randomize_layout_seed.h From bef2ed2af64863db5e14c49f15cf776182a4fd90 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jun 2018 15:27:37 -0700 Subject: [PATCH 2215/3715] UPSTREAM: kcov: prefault the kcov_area (Upstream commit dc55daff9040a90adce97208e776ee0bf515ab12.) On many architectures the vmalloc area is lazily faulted in upon first access. This is problematic for KCOV, as __sanitizer_cov_trace_pc accesses the (vmalloc'd) kcov_area, and fault handling code may be instrumented. If an access to kcov_area faults, this will result in mutual recursion through the fault handling code and __sanitizer_cov_trace_pc(), eventually leading to stack corruption and/or overflow. We can avoid this by faulting in the kcov_area before __sanitizer_cov_trace_pc() is permitted to access it. Once it has been faulted in, it will remain present in the process page tables, and will not fault again. [akpm@linux-foundation.org: code cleanup] [akpm@linux-foundation.org: add comment explaining kcov_fault_in_area()] [akpm@linux-foundation.org: fancier code comment from Mark] Link: http://lkml.kernel.org/r/20180504135535.53744-3-mark.rutland@arm.com Signed-off-by: Mark Rutland Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 147413187 Change-Id: Id90248e11b7a0ea2c5d28faf6e55515cd7dc4987 --- kernel/kcov.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/kcov.c b/kernel/kcov.c index 5be9a60a959f..cf250392c55c 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -324,6 +324,21 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +/* + * Fault in a lazily-faulted vmalloc area before it can be used by + * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the + * vmalloc fault handling path is instrumented. + */ +static void kcov_fault_in_area(struct kcov *kcov) +{ + unsigned long stride = PAGE_SIZE / sizeof(unsigned long); + unsigned long *area = kcov->area; + unsigned long offset; + + for (offset = 0; offset < kcov->size; offset += stride) + READ_ONCE(area[offset]); +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { @@ -372,6 +387,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, #endif else return -EINVAL; + kcov_fault_in_area(kcov); /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; From cc7a64cd0df7ddc308e2d1d7dbe40f9ec611342e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jun 2018 15:27:41 -0700 Subject: [PATCH 2216/3715] BACKPORT: sched/core / kcov: avoid kcov_area during task switch (Upstream commit 0ed557aa813922f6f32adec69e266532091c895b.) During a context switch, we first switch_mm() to the next task's mm, then switch_to() that new task. This means that vmalloc'd regions which had previously been faulted in can transiently disappear in the context of the prev task. Functions instrumented by KCOV may try to access a vmalloc'd kcov_area during this window, and as the fault handling code is instrumented, this results in a recursive fault. We must avoid accessing any kcov_area during this window. We can do so with a new flag in kcov_mode, set prior to switching the mm, and cleared once the new task is live. Since task_struct::kcov_mode isn't always a specific enum kcov_mode value, this is made an unsigned int. The manipulation is hidden behind kcov_{prepare,finish}_switch() helpers, which are empty for !CONFIG_KCOV kernels. The code uses macros because I can't use static inline functions without a circular include dependency between and , since the definition of task_struct uses things defined in Link: http://lkml.kernel.org/r/20180504135535.53744-4-mark.rutland@arm.com Signed-off-by: Mark Rutland Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I58f8a16210e6e58b3cca5b9e6976da305bc9a83a Signed-off-by: Andrey Konovalov Bug: 147413187 --- include/linux/kcov.h | 14 ++++++++++++++ include/linux/sched.h | 2 +- kernel/kcov.c | 2 +- kernel/sched/core.c | 3 +++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 3ecf6f5e3a5f..b76a1807028d 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -22,13 +22,27 @@ enum kcov_mode { KCOV_MODE_TRACE_CMP = 3, }; +#define KCOV_IN_CTXSW (1 << 30) + void kcov_task_init(struct task_struct *t); void kcov_task_exit(struct task_struct *t); +#define kcov_prepare_switch(t) \ +do { \ + (t)->kcov_mode |= KCOV_IN_CTXSW; \ +} while (0) + +#define kcov_finish_switch(t) \ +do { \ + (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ +} while (0) + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} +static inline void kcov_prepare_switch(struct task_struct *t) {} +static inline void kcov_finish_switch(struct task_struct *t) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 605ed53d26b6..ebea6e14a6bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1185,7 +1185,7 @@ struct task_struct { #ifdef CONFIG_KCOV /* Coverage collection mode enabled for this task (0 if disabled): */ - enum kcov_mode kcov_mode; + unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; diff --git a/kernel/kcov.c b/kernel/kcov.c index cf250392c55c..3ebd09efe72a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -58,7 +58,7 @@ struct kcov { static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { - enum kcov_mode mode; + unsigned int mode; /* * We are interested in code coverage as a function of a syscall inputs, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0b830e61ddde..c0b4be91cd8d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -2657,6 +2658,7 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + kcov_prepare_switch(prev); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); @@ -2734,6 +2736,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) smp_mb__after_unlock_lock(); finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); + kcov_finish_switch(current); fire_sched_in_preempt_notifiers(current); if (mm) From d1ad2a37ed7e1161f9a04927853b1476e9b33f46 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 30 Nov 2018 14:10:05 -0800 Subject: [PATCH 2217/3715] UPSTREAM: kernel/kcov.c: mark funcs in __sanitizer_cov_trace_pc() as notrace (Upstream commit 903e8ff86753e6f327bb92166a0665e4ecb8e2e7.) Since __sanitizer_cov_trace_pc() is marked as notrace, function calls in __sanitizer_cov_trace_pc() shouldn't be traced either. ftrace_graph_caller() gets called for each function that isn't marked 'notrace', like canonicalize_ip(). This is the call trace from a run: [ 139.644550] ftrace_graph_caller+0x1c/0x24 [ 139.648352] canonicalize_ip+0x18/0x28 [ 139.652313] __sanitizer_cov_trace_pc+0x14/0x58 [ 139.656184] sched_clock+0x34/0x1e8 [ 139.659759] trace_clock_local+0x40/0x88 [ 139.663722] ftrace_push_return_trace+0x8c/0x1f0 [ 139.667767] prepare_ftrace_return+0xa8/0x100 [ 139.671709] ftrace_graph_caller+0x1c/0x24 Rework so that check_kcov_mode() and canonicalize_ip() that are called from __sanitizer_cov_trace_pc() are also marked as notrace. Link: http://lkml.kernel.org/r/20181128081239.18317-1-anders.roxell@linaro.org Signed-off-by: Arnd Bergmann Signen-off-by: Anders Roxell Co-developed-by: Arnd Bergmann Acked-by: Steven Rostedt (VMware) Cc: Dmitry Vyukov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I3f07daa04361aa6020bf6843aeeb57663bd8d169 --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 3ebd09efe72a..97959d7b77e2 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -56,7 +56,7 @@ struct kcov { struct task_struct *t; }; -static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) +static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -78,7 +78,7 @@ static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) return mode == needed_mode; } -static unsigned long canonicalize_ip(unsigned long ip) +static notrace unsigned long canonicalize_ip(unsigned long ip) { #ifdef CONFIG_RANDOMIZE_BASE ip -= kaslr_offset(); From 37638f9a197402ed0ba2c134852760114416a3e1 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 3 Jan 2019 15:28:24 -0800 Subject: [PATCH 2218/3715] UPSTREAM: kernel/kcov.c: mark write_comp_data() as notrace (Upstream commit 634724431607f6f46c495dfef801a1c8b44a96d9.) Since __sanitizer_cov_trace_const_cmp4 is marked as notrace, the function called from __sanitizer_cov_trace_const_cmp4 shouldn't be traceable either. ftrace_graph_caller() gets called every time func write_comp_data() gets called if it isn't marked 'notrace'. This is the backtrace from gdb: #0 ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:179 #1 0xffffff8010201920 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:151 #2 0xffffff8010439714 in write_comp_data (type=5, arg1=0, arg2=0, ip=18446743524224276596) at ../kernel/kcov.c:116 #3 0xffffff8010439894 in __sanitizer_cov_trace_const_cmp4 (arg1=, arg2=) at ../kernel/kcov.c:188 #4 0xffffff8010201874 in prepare_ftrace_return (self_addr=18446743524226602768, parent=0xffffff801014b918, frame_pointer=18446743524223531344) at ./include/generated/atomic-instrumented.h:27 #5 0xffffff801020194c in ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:182 Rework so that write_comp_data() that are called from __sanitizer_cov_trace_*_cmp*() are marked as 'notrace'. Commit 903e8ff86753 ("kernel/kcov.c: mark funcs in __sanitizer_cov_trace_pc() as notrace") missed to mark write_comp_data() as 'notrace'. When that patch was created gcc-7 was used. In lib/Kconfig.debug config KCOV_ENABLE_COMPARISONS depends on $(cc-option,-fsanitize-coverage=trace-cmp) That code path isn't hit with gcc-7. However, it were that with gcc-8. Link: http://lkml.kernel.org/r/20181206143011.23719-1-anders.roxell@linaro.org Signed-off-by: Anders Roxell Signed-off-by: Arnd Bergmann Co-developed-by: Arnd Bergmann Acked-by: Steven Rostedt (VMware) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I12a04221abd2f26f4943f7f36c74fdd24af1c4ad --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 97959d7b77e2..c2277dbdbfb1 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -112,7 +112,7 @@ void notrace __sanitizer_cov_trace_pc(void) EXPORT_SYMBOL(__sanitizer_cov_trace_pc); #ifdef CONFIG_KCOV_ENABLE_COMPARISONS -static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) +static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) { struct task_struct *t; u64 *area; From 6d9fd0993c21d848bcfc46eb7cfe04085b4b5c0d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 Mar 2019 16:29:56 -0800 Subject: [PATCH 2219/3715] UPSTREAM: kcov: no need to check return value of debugfs_create functions (Upstream commit ec9672d57670d495404f36ab8b651bfefc0ea10b.) When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Link: http://lkml.kernel.org/r/20190122152151.16139-46-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Cc: Andrey Ryabinin Cc: Mark Rutland Cc: Arnd Bergmann Cc: "Steven Rostedt (VMware)" Cc: Dmitry Vyukov Cc: Anders Roxell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I8ed5dc6aeba3dda8b91ceea4fed5cd9ef058461f --- kernel/kcov.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index c2277dbdbfb1..5b0bb281c1a0 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -444,10 +444,8 @@ static int __init kcov_init(void) * there is no need to protect it against removal races. The * use of debugfs_create_file_unsafe() is actually safe here. */ - if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { - pr_err("failed to create kcov in debugfs\n"); - return -ENOMEM; - } + debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops); + return 0; } From 3a3cd9cd27ec7d3a7037524d14bbf615951a62ef Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 7 Mar 2019 16:30:00 -0800 Subject: [PATCH 2220/3715] UPSTREAM: kcov: convert kcov.refcount to refcount_t (Upstream commit 39e07cb60860e3162fc377380b8a60409315681e.) atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable kcov.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the kcov.refcount it might make a difference in following places: - kcov_put(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Link: http://lkml.kernel.org/r/1547634429-772-1-git-send-email-elena.reshetova@intel.com Signed-off-by: Elena Reshetova Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Dmitry Vyukov Reviewed-by: Andrea Parri Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: Ie22524d133af5ab86dcc5cadde4bdca931625d3a --- kernel/kcov.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 5b0bb281c1a0..2ee38727844a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* Number of 64-bit words written per one comparison: */ @@ -44,7 +45,7 @@ struct kcov { * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) */ - atomic_t refcount; + refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; @@ -228,12 +229,12 @@ EXPORT_SYMBOL(__sanitizer_cov_trace_switch); static void kcov_get(struct kcov *kcov) { - atomic_inc(&kcov->refcount); + refcount_inc(&kcov->refcount); } static void kcov_put(struct kcov *kcov) { - if (atomic_dec_and_test(&kcov->refcount)) { + if (refcount_dec_and_test(&kcov->refcount)) { vfree(kcov->area); kfree(kcov); } @@ -312,7 +313,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; - atomic_set(&kcov->refcount, 1); + refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; return nonseekable_open(inode, filep); From 9b5f12321a8dff8aab4475b922146fa0acb0f7b3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 18 Apr 2019 17:50:37 -0700 Subject: [PATCH 2221/3715] UPSTREAM: kcov: improve CONFIG_ARCH_HAS_KCOV help text (Upstream commit 40453c4f9bb6d166a56a102a8c51dd24b0801557.) The help text for CONFIG_ARCH_HAS_KCOV is stale, and describes the feature as being enabled only for x86_64, when it is now enabled for several architectures, including arm, arm64, powerpc, and s390. Let's remove that stale help text, and update it along the lines of hat for ARCH_HAS_FORTIFY_SOURCE, better describing when an architecture should select CONFIG_ARCH_HAS_KCOV. Link: http://lkml.kernel.org/r/20190412102733.5154-1-mark.rutland@arm.com Signed-off-by: Mark Rutland Acked-by: Dmitry Vyukov Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: If1a6cce383c704fc96ea9a267459b665d32fb8bd --- lib/Kconfig.debug | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ec057b40021b..65ac0511546e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -736,9 +736,9 @@ endmenu # "Memory Debugging" config ARCH_HAS_KCOV bool help - KCOV does not have any arch-specific code, but currently it is enabled - only for x86_64. KCOV requires testing on other archs, and most likely - disabling of instrumentation for some early boot code. + An architecture should select this when it can successfully + build and run with CONFIG_KCOV. This typically requires + disabling instrumentation for some early boot code. # Upstream uses $(cc-option, -fsanitize-coverage=trace-pc), which requires # cc-option support. Here we instead check CC in scripts/Makefile.kcov. From 5627b215f9825bf9a7fa16626fdf70727f9220d2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:43 -0800 Subject: [PATCH 2222/3715] BACKPORT: kcov: remote coverage support Workaround missing struct_size() support by calculating the size manually. (Upstream commit eec028c9386ed1a692aa01a85b55952202b41619.) Patch series " kcov: collect coverage from usb and vhost", v3. This patchset extends kcov to allow collecting coverage from backgound kernel threads. This extension requires custom annotations for each of the places where coverage collection is desired. This patchset implements this for hub events in the USB subsystem and for vhost workers. See the first patch description for details about the kcov extension. The other two patches apply this kcov extension to USB and vhost. Examples of other subsystems that might potentially benefit from this when custom annotations are added (the list is based on process_one_work() callers for bugs recently reported by syzbot): 1. fs: writeback wb_workfn() worker, 2. net: addrconf_dad_work()/addrconf_verify_work() workers, 3. net: neigh_periodic_work() worker, 4. net/p9: p9_write_work()/p9_read_work() workers, 5. block: blk_mq_run_work_fn() worker. These patches have been used to enable coverage-guided USB fuzzing with syzkaller for the last few years, see the details here: https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md This patchset has been pushed to the public Linux kernel Gerrit instance: https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524 This patch (of 3): Add background thread coverage collection ability to kcov. With KCOV_ENABLE coverage is collected only for syscalls that are issued from the current process. With KCOV_REMOTE_ENABLE it's possible to collect coverage for arbitrary parts of the kernel code, provided that those parts are annotated with kcov_remote_start()/kcov_remote_stop(). This allows to collect coverage from two types of kernel background threads: the global ones, that are spawned during kernel boot in a limited number of instances (e.g. one USB hub_event() worker thread is spawned per USB HCD); and the local ones, that are spawned when a user interacts with some kernel interface (e.g. vhost workers). To enable collecting coverage from a global background thread, a unique global handle must be assigned and passed to the corresponding kcov_remote_start() call. Then a userspace process can pass a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the kcov_remote_arg struct. This will attach the used kcov device to the code sections, that are referenced by those handles. Since there might be many local background threads spawned from different userspace processes, we can't use a single global handle per annotation. Instead, the userspace process passes a non-zero handle through the common_handle field of the kcov_remote_arg struct. This common handle gets saved to the kcov_handle field in the current task_struct and needs to be passed to the newly spawned threads via custom annotations. Those threads should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). Internally kcov stores handles as u64 integers. The top byte of a handle is used to denote the id of a subsystem that this handle belongs to, and the lower 4 bytes are used to denote the id of a thread instance within that subsystem. A reserved value 0 is used as a subsystem id for common handles as they don't belong to a particular subsystem. The bytes 4-7 are currently reserved and must be zero. In the future the number of bytes used for the subsystem or handle ids might be increased. When a particular userspace process collects coverage by via a common handle, kcov will collect coverage for each code section that is annotated to use the common handle obtained as kcov_handle from the current task_struct. However non common handles allow to collect coverage selectively from different subsystems. Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Arnd Bergmann Cc: Steven Rostedt Cc: David Windsor Cc: Elena Reshetova Cc: Anders Roxell Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Change-Id: I868c4846a412bfbae16086017e113813571df377 Signed-off-by: Andrey Konovalov Bug: 147413187 --- Documentation/dev-tools/kcov.rst | 129 ++++++++ include/linux/kcov.h | 23 ++ include/linux/sched.h | 8 + include/uapi/linux/kcov.h | 28 ++ kernel/kcov.c | 547 +++++++++++++++++++++++++++++-- 5 files changed, 700 insertions(+), 35 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index c2f6452e38ed..b37e8e743af1 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted:: Coverage collection ------------------- + The following program demonstrates coverage collection from within a test program using kcov: @@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end). Comparison operands collection ------------------------------ + Comparison operands collection is similar to coverage collection: .. code-block:: c @@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection: Note that the kcov modes (coverage collection or comparison operands) are mutually exclusive. + +Remote coverage collection +-------------------------- + +With KCOV_ENABLE coverage is collected only for syscalls that are issued +from the current process. With KCOV_REMOTE_ENABLE it's possible to collect +coverage for arbitrary parts of the kernel code, provided that those parts +are annotated with kcov_remote_start()/kcov_remote_stop(). + +This allows to collect coverage from two types of kernel background +threads: the global ones, that are spawned during kernel boot in a limited +number of instances (e.g. one USB hub_event() worker thread is spawned per +USB HCD); and the local ones, that are spawned when a user interacts with +some kernel interface (e.g. vhost workers). + +To enable collecting coverage from a global background thread, a unique +global handle must be assigned and passed to the corresponding +kcov_remote_start() call. Then a userspace process can pass a list of such +handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the +kcov_remote_arg struct. This will attach the used kcov device to the code +sections, that are referenced by those handles. + +Since there might be many local background threads spawned from different +userspace processes, we can't use a single global handle per annotation. +Instead, the userspace process passes a non-zero handle through the +common_handle field of the kcov_remote_arg struct. This common handle gets +saved to the kcov_handle field in the current task_struct and needs to be +passed to the newly spawned threads via custom annotations. Those threads +should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). + +Internally kcov stores handles as u64 integers. The top byte of a handle +is used to denote the id of a subsystem that this handle belongs to, and +the lower 4 bytes are used to denote the id of a thread instance within +that subsystem. A reserved value 0 is used as a subsystem id for common +handles as they don't belong to a particular subsystem. The bytes 4-7 are +currently reserved and must be zero. In the future the number of bytes +used for the subsystem or handle ids might be increased. + +When a particular userspace proccess collects coverage by via a common +handle, kcov will collect coverage for each code section that is annotated +to use the common handle obtained as kcov_handle from the current +task_struct. However non common handles allow to collect coverage +selectively from different subsystems. + +.. code-block:: c + + struct kcov_remote_arg { + unsigned trace_mode; + unsigned area_size; + unsigned num_handles; + uint64_t common_handle; + uint64_t handles[0]; + }; + + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) + #define KCOV_DISABLE _IO('c', 101) + #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + + #define COVER_SIZE (64 << 10) + + #define KCOV_TRACE_PC 0 + + #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) + #define KCOV_SUBSYSTEM_USB (0x01ull << 56) + + #define KCOV_SUBSYSTEM_MASK (0xffull << 56) + #define KCOV_INSTANCE_MASK (0xffffffffull) + + static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) + { + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; + } + + #define KCOV_COMMON_ID 0x42 + #define KCOV_USB_BUS_NUM 1 + + int main(int argc, char **argv) + { + int fd; + unsigned long *cover, n, i; + struct kcov_remote_arg *arg; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + + /* Enable coverage collection via common handle and from USB bus #1. */ + arg = calloc(1, sizeof(*arg) + sizeof(uint64_t)); + if (!arg) + perror("calloc"), exit(1); + arg->trace_mode = KCOV_TRACE_PC; + arg->area_size = COVER_SIZE; + arg->num_handles = 1; + arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, + KCOV_COMMON_ID); + arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, + KCOV_USB_BUS_NUM); + if (ioctl(fd, KCOV_REMOTE_ENABLE, arg)) + perror("ioctl"), free(arg), exit(1); + free(arg); + + /* + * Here the user needs to trigger execution of a kernel code section + * that is either annotated with the common handle, or to trigger some + * activity on USB bus #1. + */ + sleep(2); + + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) + printf("0x%lx\n", cover[i + 1]); + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b76a1807028d..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -37,12 +37,35 @@ do { \ (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ } while (0) +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} static inline void kcov_prepare_switch(struct task_struct *t) {} static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ebea6e14a6bf..445d83cc6721 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1184,6 +1184,8 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; @@ -1195,6 +1197,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 9529867717a8..409d3ad1e6e2 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -4,9 +4,24 @@ #include +/* + * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst + * and the comment before kcov_remote_start() for usage details. + */ +struct kcov_remote_arg { + unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + unsigned int area_size; /* Length of coverage buffer in words */ + unsigned int num_handles; /* Size of handles array */ + __u64 common_handle; + __u64 handles[0]; +}; + +#define KCOV_REMOTE_MAX_HANDLES 0x100 + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) enum { /* @@ -32,4 +47,17 @@ enum { #define KCOV_CMP_SIZE(n) ((n) << 1) #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) +#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) +#define KCOV_SUBSYSTEM_USB (0x01ull << 56) + +#define KCOV_SUBSYSTEM_MASK (0xffull << 56) +#define KCOV_INSTANCE_MASK (0xffffffffull) + +static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) +{ + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; +} + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/kernel/kcov.c b/kernel/kcov.c index 2ee38727844a..2f0048ef4b64 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -21,8 +22,11 @@ #include #include #include +#include #include +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + /* Number of 64-bit words written per one comparison: */ #define KCOV_WORDS_PER_CMP 4 @@ -44,19 +48,100 @@ struct kcov { * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * in_interrupt() returns false (e.g. preempt_schedule_irq()). * READ_ONCE()/barrier() effectively provides load-acquire wrt * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). + * kcov_start(). */ barrier(); return mode == needed_mode; @@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) +{ + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); + t->kcov_size = 0; + t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); + t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + static void kcov_get(struct kcov *kcov) { refcount_inc(&kcov->refcount); @@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov) static void kcov_put(struct kcov *kcov) { if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); vfree(kcov->area); kfree(kcov); } } -void kcov_task_init(struct task_struct *t) -{ - WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); - barrier(); - t->kcov_size = 0; - t->kcov_area = NULL; - t->kcov = NULL; -} - void kcov_task_exit(struct task_struct *t) { struct kcov *kcov; @@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + /* * Fault in a lazily-faulted vmalloc area before it can be used by * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the @@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov) READ_ONCE(area[offset]); } +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - if (arg == KCOV_TRACE_PC) - kcov->mode = KCOV_MODE_TRACE_PC; - else if (arg == KCOV_TRACE_CMP) -#ifdef CONFIG_KCOV_ENABLE_COMPARISONS - kcov->mode = KCOV_MODE_TRACE_CMP; -#else - return -ENOTSUPP; -#endif - else - return -EINVAL; + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; kcov_fault_in_area(kcov); - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in check_kcov_mode(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = sizeof(*remote_arg) + + sizeof(remote_arg->handles[0]) * remote_num_handles; + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* From 00ce9d07b9a6daa5e1190263ab85f55f1a522f27 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:47 -0800 Subject: [PATCH 2223/3715] UPSTREAM: usb, kcov: collect coverage from hub_event (Upstream commit 95d23dc27bde0ab4b25f7ade5e2fddc08dd97d9b.) Add kcov_remote_start()/kcov_remote_stop() annotations to the hub_event() function, which is responsible for processing events on USB buses, in particular events that happen during USB device enumeration. Since hub_event() is run in a global background kernel thread (see Documentation/dev-tools/kcov.rst for details), each USB bus gets a unique global handle from the USB subsystem kcov handle range. As the result kcov can now be used to collect coverage from events that happen on a particular USB bus. [akpm@linux-foundation.org: avoid patch conflicts to make life easier for Andrew] Link: http://lkml.kernel.org/r/de4fe1c219db2d002d905dc1736e2a3bfa1db997.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Greg Kroah-Hartman Cc: Alan Stern Cc: Alexander Potapenko Cc: Anders Roxell Cc: Arnd Bergmann Cc: David Windsor Cc: Dmitry Vyukov Cc: Elena Reshetova Cc: Jason Wang Cc: Marco Elver Cc: "Michael S. Tsirkin" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I4ef39331fe5b789459938e998b1f557406627ba6 --- drivers/usb/core/hub.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4efccf8bf99f..294b9e48d3b3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -5183,6 +5184,8 @@ static void hub_event(struct work_struct *work) hub_dev = hub->intfdev; intf = to_usb_interface(hub_dev); + kcov_remote_start_usb((u64)hdev->bus->busnum); + dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", hdev->state, hdev->maxchild, /* NOTE: expects max 15 ports... */ @@ -5289,6 +5292,8 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); kref_put(&hub->kref, hub_release); + + kcov_remote_stop(); } static const struct usb_device_id hub_id_table[] = { From 47ba92a2552b60441f0dc69b94a731b9c1d7bfb9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:50 -0800 Subject: [PATCH 2224/3715] UPSTREAM: vhost, kcov: collect coverage from vhost_worker (Upstream commit 8f6a7f96dc29cefe16ab60f06f9c3a43510b96fd.) Add kcov_remote_start()/kcov_remote_stop() annotations to the vhost_worker() function, which is responsible for processing vhost works. Since vhost_worker() threads are spawned per vhost device instance the common kcov handle is used for kcov_remote_start()/stop() annotations (see Documentation/dev-tools/kcov.rst for details). As the result kcov can now be used to collect coverage from vhost worker threads. Link: http://lkml.kernel.org/r/e49d5d154e5da6c9ada521d2b7ce10a49ce9f98b.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alan Stern Cc: Alexander Potapenko Cc: Anders Roxell Cc: Arnd Bergmann Cc: David Windsor Cc: Dmitry Vyukov Cc: Elena Reshetova Cc: Greg Kroah-Hartman Cc: Jason Wang Cc: Marco Elver Cc: "Michael S. Tsirkin" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: Ie99a67ede00a839a28472877e5c3263db69d1c58 --- drivers/vhost/vhost.c | 6 ++++++ drivers/vhost/vhost.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3d7bea15c57b..85edacc0be47 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "vhost.h" @@ -361,7 +362,9 @@ static int vhost_worker(void *data) llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); + kcov_remote_start_common(dev->kcov_handle); work->fn(work); + kcov_remote_stop(); if (need_resched()) schedule(); } @@ -521,6 +524,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); + dev->kcov_handle = kcov_common_handle(); worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); if (IS_ERR(worker)) { err = PTR_ERR(worker); @@ -546,6 +550,7 @@ err_worker: if (dev->mm) mmput(dev->mm); dev->mm = NULL; + dev->kcov_handle = 0; err_mm: return err; } @@ -665,6 +670,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; + dev->kcov_handle = 0; } if (dev->mm) mmput(dev->mm); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 950c5c4e4ee3..6e8f67ff1e1c 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -175,6 +175,7 @@ struct vhost_dev { wait_queue_head_t wait; int weight; int byte_weight; + u64 kcov_handle; }; bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); From 3852ef1983e37088e8ef7f21c80bfbbb8287e493 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sat, 4 Jan 2020 12:59:39 -0800 Subject: [PATCH 2225/3715] UPSTREAM: kcov: fix struct layout for kcov_remote_arg (Upstream commit a69b83e1ae7f6c5ff2cc310870c1708405d86be2.) Make the layout of kcov_remote_arg the same for 32-bit and 64-bit code. This makes it more convenient to write userspace apps that can be compiled into 32-bit or 64-bit binaries and still work with the same 64-bit kernel. Also use proper __u32 types in uapi headers instead of unsigned ints. Link: http://lkml.kernel.org/r/9e91020876029cfefc9211ff747685eba9536426.1575638983.git.andreyknvl@google.com Fixes: eec028c9386ed1a ("kcov: remote coverage support") Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Felipe Balbi Cc: Chunfeng Yun Cc: "Jacky . Cao @ sony . com" Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Andrey Konovalov Bug: 147413187 Change-Id: I25a7107841048b3735db94c89199f9de73615333 --- Documentation/dev-tools/kcov.rst | 10 +++++----- include/uapi/linux/kcov.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index b37e8e743af1..f254173b180f 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -251,11 +251,11 @@ selectively from different subsystems. .. code-block:: c struct kcov_remote_arg { - unsigned trace_mode; - unsigned area_size; - unsigned num_handles; - uint64_t common_handle; - uint64_t handles[0]; + __u32 trace_mode; + __u32 area_size; + __u32 num_handles; + __aligned_u64 common_handle; + __aligned_u64 handles[0]; }; #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 409d3ad1e6e2..1d0350e44ae3 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -9,11 +9,11 @@ * and the comment before kcov_remote_start() for usage details. */ struct kcov_remote_arg { - unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ - unsigned int area_size; /* Length of coverage buffer in words */ - unsigned int num_handles; /* Size of handles array */ - __u64 common_handle; - __u64 handles[0]; + __u32 trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + __u32 area_size; /* Length of coverage buffer in words */ + __u32 num_handles; /* Size of handles array */ + __aligned_u64 common_handle; + __aligned_u64 handles[0]; }; #define KCOV_REMOTE_MAX_HANDLES 0x100 From c807f43500e14ed24599106745bb6005665f74d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Dec 2019 12:02:03 +0000 Subject: [PATCH 2226/3715] chardev: Avoid potential use-after-free in 'chrdev_open()' commit 68faa679b8be1a74e6663c21c3a9d25d32f1c079 upstream. 'chrdev_open()' calls 'cdev_get()' to obtain a reference to the 'struct cdev *' stashed in the 'i_cdev' field of the target inode structure. If the pointer is NULL, then it is initialised lazily by looking up the kobject in the 'cdev_map' and so the whole procedure is protected by the 'cdev_lock' spinlock to serialise initialisation of the shared pointer. Unfortunately, it is possible for the initialising thread to fail *after* installing the new pointer, for example if the subsequent '->open()' call on the file fails. In this case, 'cdev_put()' is called, the reference count on the kobject is dropped and, if nobody else has taken a reference, the release function is called which finally clears 'inode->i_cdev' from 'cdev_purge()' before potentially freeing the object. The problem here is that a racing thread can happily take the 'cdev_lock' and see the non-NULL pointer in the inode, which can result in a refcount increment from zero and a warning: | ------------[ cut here ]------------ | refcount_t: addition on 0; use-after-free. | WARNING: CPU: 2 PID: 6385 at lib/refcount.c:25 refcount_warn_saturate+0x6d/0xf0 | Modules linked in: | CPU: 2 PID: 6385 Comm: repro Not tainted 5.5.0-rc2+ #22 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 | RIP: 0010:refcount_warn_saturate+0x6d/0xf0 | Code: 05 55 9a 15 01 01 e8 9d aa c8 ff 0f 0b c3 80 3d 45 9a 15 01 00 75 ce 48 c7 c7 00 9c 62 b3 c6 08 | RSP: 0018:ffffb524c1b9bc70 EFLAGS: 00010282 | RAX: 0000000000000000 RBX: ffff9e9da1f71390 RCX: 0000000000000000 | RDX: ffff9e9dbbd27618 RSI: ffff9e9dbbd18798 RDI: ffff9e9dbbd18798 | RBP: 0000000000000000 R08: 000000000000095f R09: 0000000000000039 | R10: 0000000000000000 R11: ffffb524c1b9bb20 R12: ffff9e9da1e8c700 | R13: ffffffffb25ee8b0 R14: 0000000000000000 R15: ffff9e9da1e8c700 | FS: 00007f3b87d26700(0000) GS:ffff9e9dbbd00000(0000) knlGS:0000000000000000 | CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | CR2: 00007fc16909c000 CR3: 000000012df9c000 CR4: 00000000000006e0 | DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 | Call Trace: | kobject_get+0x5c/0x60 | cdev_get+0x2b/0x60 | chrdev_open+0x55/0x220 | ? cdev_put.part.3+0x20/0x20 | do_dentry_open+0x13a/0x390 | path_openat+0x2c8/0x1470 | do_filp_open+0x93/0x100 | ? selinux_file_ioctl+0x17f/0x220 | do_sys_open+0x186/0x220 | do_syscall_64+0x48/0x150 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 | RIP: 0033:0x7f3b87efcd0e | Code: 89 54 24 08 e8 a3 f4 ff ff 8b 74 24 0c 48 8b 3c 24 41 89 c0 44 8b 54 24 08 b8 01 01 00 00 89 f4 | RSP: 002b:00007f3b87d259f0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 | RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3b87efcd0e | RDX: 0000000000000000 RSI: 00007f3b87d25a80 RDI: 00000000ffffff9c | RBP: 00007f3b87d25e90 R08: 0000000000000000 R09: 0000000000000000 | R10: 0000000000000000 R11: 0000000000000293 R12: 00007ffe188f504e | R13: 00007ffe188f504f R14: 00007f3b87d26700 R15: 0000000000000000 | ---[ end trace 24f53ca58db8180a ]--- Since 'cdev_get()' can already fail to obtain a reference, simply move it over to use 'kobject_get_unless_zero()' instead of 'kobject_get()', which will cause the racing thread to return -ENXIO if the initialising thread fails unexpectedly. Cc: Hillf Danton Cc: Andrew Morton Cc: Al Viro Reported-by: syzbot+82defefbbd8527e1c2cb@syzkaller.appspotmail.com Signed-off-by: Will Deacon Cc: stable Link: https://lore.kernel.org/r/20191219120203.32691-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/char_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/char_dev.c b/fs/char_dev.c index 20ce45c7c57c..715d76b00108 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -361,7 +361,7 @@ static struct kobject *cdev_get(struct cdev *p) if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&p->kobj); + kobj = kobject_get_unless_zero(&p->kobj); if (!kobj) module_put(owner); return kobj; From c07d275dd30f95d063ba27a83e8793bcf1afb8d7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 26 Dec 2019 07:57:54 -0800 Subject: [PATCH 2227/3715] usb: chipidea: host: Disable port power only if previously enabled commit c1ffba305dbcf3fb9ca969c20a97acbddc38f8e9 upstream. On shutdown, ehci_power_off() is called unconditionally to power off each port, even if it was never called to power on the port. For chipidea, this results in a call to ehci_ci_portpower() with a request to power off ports even if the port was never powered on. This results in the following warning from the regulator code. WARNING: CPU: 0 PID: 182 at drivers/regulator/core.c:2596 _regulator_disable+0x1a8/0x210 unbalanced disables for usb_otg2_vbus Modules linked in: CPU: 0 PID: 182 Comm: init Not tainted 5.4.6 #1 Hardware name: Freescale i.MX7 Dual (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xe0/0x10c) [] (dump_stack) from [] (__warn+0xf4/0x10c) [] (__warn) from [] (warn_slowpath_fmt+0x78/0xbc) [] (warn_slowpath_fmt) from [] (_regulator_disable+0x1a8/0x210) [] (_regulator_disable) from [] (regulator_disable+0x38/0xe8) [] (regulator_disable) from [] (ehci_ci_portpower+0x38/0xdc) [] (ehci_ci_portpower) from [] (ehci_port_power+0x50/0xa4) [] (ehci_port_power) from [] (ehci_silence_controller+0x5c/0xc4) [] (ehci_silence_controller) from [] (ehci_stop+0x3c/0xcc) [] (ehci_stop) from [] (usb_remove_hcd+0xe0/0x19c) [] (usb_remove_hcd) from [] (host_stop+0x38/0xa8) [] (host_stop) from [] (ci_hdrc_remove+0x44/0xe4) ... Keeping track of the power enable state avoids the warning and traceback. Fixes: c8679a2fb8dec ("usb: chipidea: host: add portpower override") Cc: Michael Grzeschik Cc: Peter Chen Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Acked-by: Peter Chen Link: https://lore.kernel.org/r/20191226155754.25451-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/host.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 18cb8e46262d..83683a5627f3 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -37,6 +37,7 @@ static int (*orig_bus_suspend)(struct usb_hcd *hcd); struct ehci_ci_priv { struct regulator *reg_vbus; + bool enabled; }; static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) @@ -48,7 +49,7 @@ static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) int ret = 0; int port = HCS_N_PORTS(ehci->hcs_params); - if (priv->reg_vbus) { + if (priv->reg_vbus && enable != priv->enabled) { if (port > 1) { dev_warn(dev, "Not support multi-port regulator control\n"); @@ -64,6 +65,7 @@ static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) enable ? "enable" : "disable", ret); return ret; } + priv->enabled = enable; } if (enable && (ci->platdata->phy_mode == USBPHY_INTERFACE_MODE_HSIC)) { From c33be6e4898f646b6ac9f6adb65c1c7670f70444 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 4 Jan 2020 12:09:36 +0100 Subject: [PATCH 2228/3715] ALSA: usb-audio: Apply the sample rate quirk for Bose Companion 5 commit 51d4efab7865e6ea6a4ebcd25b3f03c019515c4c upstream. Bose Companion 5 (with USB ID 05a7:1020) doesn't seem supporting reading back the sample rate, so the existing quirk is needed. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206063 Cc: Link: https://lore.kernel.org/r/20200104110936.14288-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ad14d6b78bdc..51ee7910e98c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1143,6 +1143,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ + case USB_ID(0x05a7, 0x1020): /* Bose Companion 5 */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ From bbfcd088dec42f9b1b019dca6e67cc574bd7b967 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 3 Jan 2020 16:24:06 +0800 Subject: [PATCH 2229/3715] ALSA: hda/realtek - Add new codec supported for ALCS1200A commit 6d9ffcff646bbd0ede6c2a59f4cd28414ecec6e0 upstream. Add ALCS1200A supported. It was similar as ALC900. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/a9bd3cdaa02d4fa197623448d5c51e50@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 41e3c77d5fb7..69877ee47740 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -396,6 +396,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) break; case 0x10ec0899: case 0x10ec0900: + case 0x10ec0b00: case 0x10ec1168: case 0x10ec1220: alc_update_coef_idx(codec, 0x7, 1<<1, 0); @@ -2389,6 +2390,7 @@ static int patch_alc882(struct hda_codec *codec) case 0x10ec0882: case 0x10ec0885: case 0x10ec0900: + case 0x10ec0b00: case 0x10ec1220: break; default: @@ -8398,6 +8400,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662), HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882), HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882), + HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882), HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882), {} /* terminator */ From b2d1f611a7c46d2ae13fc9b54128eddc4bd04e8a Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 7 Jan 2020 17:22:19 +0800 Subject: [PATCH 2230/3715] ALSA: hda/realtek - Set EAPD control to default for ALC222 commit 9194a1ebbc56d7006835e2b4cacad301201fb832 upstream. Set EAPD control to verb control. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 69877ee47740..5a7afbeb612d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -378,6 +378,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0672: alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ break; + case 0x10ec0222: case 0x10ec0623: alc_update_coef_idx(codec, 0x19, 1<<13, 0); break; From d51eac9941a45be04f97c886656dc853db69e160 Mon Sep 17 00:00:00 2001 From: Kaitao Cheng Date: Tue, 31 Dec 2019 05:35:30 -0800 Subject: [PATCH 2231/3715] kernel/trace: Fix do not unregister tracepoints when register sched_migrate_task fail commit 50f9ad607ea891a9308e67b81f774c71736d1098 upstream. In the function, if register_trace_sched_migrate_task() returns error, sched_switch/sched_wakeup_new/sched_wakeup won't unregister. That is why fail_deprobe_sched_switch was added. Link: http://lkml.kernel.org/r/20191231133530.2794-1-pilgrimtao@gmail.com Cc: stable@vger.kernel.org Fixes: 478142c39c8c2 ("tracing: do not grab lock in wakeup latency function tracing") Signed-off-by: Kaitao Cheng Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_sched_wakeup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0fa9dadf3f4f..a5a4b5663163 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -640,7 +640,7 @@ static void start_wakeup_tracer(struct trace_array *tr) if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_migrate_task\n"); - return; + goto fail_deprobe_sched_switch; } wakeup_reset(tr); @@ -658,6 +658,8 @@ static void start_wakeup_tracer(struct trace_array *tr) printk(KERN_ERR "failed to start wakeup tracer\n"); return; +fail_deprobe_sched_switch: + unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); fail_deprobe_wake_new: unregister_trace_sched_wakeup_new(probe_wakeup, NULL); fail_deprobe: From f12e2598a5a2871dbba539ec4e943246594b4256 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Jan 2020 22:02:41 -0500 Subject: [PATCH 2232/3715] tracing: Have stack tracer compile when MCOUNT_INSN_SIZE is not defined commit b8299d362d0837ae39e87e9019ebe6b736e0f035 upstream. On some archs with some configurations, MCOUNT_INSN_SIZE is not defined, and this makes the stack tracer fail to compile. Just define it to zero in this case. Link: https://lore.kernel.org/r/202001020219.zvE3vsty%lkp@intel.com Cc: stable@vger.kernel.org Fixes: 4df297129f622 ("tracing: Remove most or all of stack tracer stack size from stack_max_size") Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_stack.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 719a52a4064a..6f9091f874a9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -196,6 +196,11 @@ check_stack(unsigned long ip, unsigned long *stack) local_irq_restore(flags); } +/* Some archs may not define MCOUNT_INSN_SIZE */ +#ifndef MCOUNT_INSN_SIZE +# define MCOUNT_INSN_SIZE 0 +#endif + static void stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) From c54db442f5b2bb3de2cafb49dcc45d029e519cc6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 10 Dec 2019 16:26:11 -0500 Subject: [PATCH 2233/3715] HID: Fix slab-out-of-bounds read in hid_field_extract commit 8ec321e96e056de84022c032ffea253431a83c3c upstream. The syzbot fuzzer found a slab-out-of-bounds bug in the HID report handler. The bug was caused by a report descriptor which included a field with size 12 bits and count 4899, for a total size of 7349 bytes. The usbhid driver uses at most a single-page 4-KB buffer for reports. In the test there wasn't any problem about overflowing the buffer, since only one byte was received from the device. Rather, the bug occurred when the HID core tried to extract the data from the report fields, which caused it to try reading data beyond the end of the allocated buffer. This patch fixes the problem by rejecting any report whose total length exceeds the HID_MAX_BUFFER_SIZE limit (minus one byte to allow for a possible report index). In theory a device could have a report longer than that, but if there was such a thing we wouldn't handle it correctly anyway. Reported-and-tested-by: syzbot+09ef48aa58261464b621@syzkaller.appspotmail.com Signed-off-by: Alan Stern CC: Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 6a04b56d161b..2d089d3954e3 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -268,6 +268,12 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign offset = report->size; report->size += parser->global.report_size * parser->global.report_count; + /* Total size check: Allow for possible report index byte */ + if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) { + hid_err(parser->device, "report is too long\n"); + return -1; + } + if (!parser->local.usage_index) /* Ignore padding fields */ return 0; From 90a26bdc2f73c1d2a1d764d3d00e75228fc6b48e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Dec 2019 03:43:55 +0100 Subject: [PATCH 2234/3715] HID: uhid: Fix returning EPOLLOUT from uhid_char_poll commit be54e7461ffdc5809b67d2aeefc1ddc9a91470c7 upstream. Always return EPOLLOUT from uhid_char_poll to allow polling /dev/uhid for writable state. Fixes: 1f9dec1e0164 ("HID: uhid: allow poll()'ing on uhid devices") Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/uhid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 6f67d73b184e..e63b761f600a 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -25,6 +25,7 @@ #include #include #include +#include #define UHID_NAME "uhid" #define UHID_BUFSIZE 32 @@ -774,7 +775,7 @@ static unsigned int uhid_char_poll(struct file *file, poll_table *wait) if (uhid->head != uhid->tail) return POLLIN | POLLRDNORM; - return 0; + return EPOLLOUT | EPOLLWRNORM; } static const struct file_operations uhid_fops = { From 1df54fdd3f989a140ab3fa6c9f1c485288421a96 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:32:31 +0100 Subject: [PATCH 2235/3715] can: gs_usb: gs_usb_probe(): use descriptors of current altsetting commit 2f361cd9474ab2c4ab9ac8db20faf81e66c6279b upstream. Make sure to always use the descriptors of the current alternate setting to avoid future issues when accessing fields that may differ between settings. Signed-off-by: Johan Hovold Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index bfbf80949600..aed8ab6d6c5b 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -926,7 +926,7 @@ static int gs_usb_probe(struct usb_interface *intf, GS_USB_BREQ_HOST_FORMAT, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, - intf->altsetting[0].desc.bInterfaceNumber, + intf->cur_altsetting->desc.bInterfaceNumber, hconf, sizeof(*hconf), 1000); @@ -949,7 +949,7 @@ static int gs_usb_probe(struct usb_interface *intf, GS_USB_BREQ_DEVICE_CONFIG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, - intf->altsetting[0].desc.bInterfaceNumber, + intf->cur_altsetting->desc.bInterfaceNumber, dconf, sizeof(*dconf), 1000); From e08e3dda248f115c77fc59016d26fdc1fd19cf58 Mon Sep 17 00:00:00 2001 From: Florian Faber Date: Thu, 26 Dec 2019 19:51:24 +0100 Subject: [PATCH 2236/3715] can: mscan: mscan_rx_poll(): fix rx path lockup when returning from polling to irq mode commit 2d77bd61a2927be8f4e00d9478fe6996c47e8d45 upstream. Under load, the RX side of the mscan driver can get stuck while TX still works. Restarting the interface locks up the system. This behaviour could be reproduced reliably on a MPC5121e based system. The patch fixes the return value of the NAPI polling function (should be the number of processed packets, not constant 1) and the condition under which IRQs are enabled again after polling is finished. With this patch, no more lockups were observed over a test period of ten days. Fixes: afa17a500a36 ("net/can: add driver for mscan family & mpc52xx_mscan") Signed-off-by: Florian Faber Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/mscan/mscan.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index acb708fc1463..0a7d818a06f3 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -392,13 +392,12 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota) struct net_device *dev = napi->dev; struct mscan_regs __iomem *regs = priv->reg_base; struct net_device_stats *stats = &dev->stats; - int npackets = 0; - int ret = 1; + int work_done = 0; struct sk_buff *skb; struct can_frame *frame; u8 canrflg; - while (npackets < quota) { + while (work_done < quota) { canrflg = in_8(®s->canrflg); if (!(canrflg & (MSCAN_RXF | MSCAN_ERR_IF))) break; @@ -419,18 +418,18 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota) stats->rx_packets++; stats->rx_bytes += frame->can_dlc; - npackets++; + work_done++; netif_receive_skb(skb); } - if (!(in_8(®s->canrflg) & (MSCAN_RXF | MSCAN_ERR_IF))) { - napi_complete(&priv->napi); - clear_bit(F_RX_PROGRESS, &priv->flags); - if (priv->can.state < CAN_STATE_BUS_OFF) - out_8(®s->canrier, priv->shadow_canrier); - ret = 0; + if (work_done < quota) { + if (likely(napi_complete_done(&priv->napi, work_done))) { + clear_bit(F_RX_PROGRESS, &priv->flags); + if (priv->can.state < CAN_STATE_BUS_OFF) + out_8(®s->canrier, priv->shadow_canrier); + } } - return ret; + return work_done; } static irqreturn_t mscan_isr(int irq, void *dev_id) From 54a5ba5136c188c9d349236cc0a0abc5dc0a899d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 7 Dec 2019 19:34:18 +0100 Subject: [PATCH 2237/3715] can: can_dropped_invalid_skb(): ensure an initialized headroom in outgoing CAN sk_buffs commit e7153bf70c3496bac00e7e4f395bb8d8394ac0ea upstream. KMSAN sysbot detected a read access to an untinitialized value in the headroom of an outgoing CAN related sk_buff. When using CAN sockets this area is filled appropriately - but when using a packet socket this initialization is missing. The problematic read access occurs in the CAN receive path which can only be triggered when the sk_buff is sent through a (virtual) CAN interface. So we check in the sending path whether we need to perform the missing initializations. Fixes: d3b58c47d330d ("can: replace timestamp as unique skb attribute") Reported-by: syzbot+b02ff0707a97e4e79ebb@syzkaller.appspotmail.com Signed-off-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Cc: linux-stable # >= v4.1 Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- include/linux/can/dev.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c0c0b992210e..995903c7055b 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -18,6 +18,7 @@ #include #include #include +#include #include /* @@ -90,6 +91,36 @@ struct can_priv { #define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC)) #define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static inline bool can_skb_headroom_valid(struct net_device *dev, + struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* preform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ static inline bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) @@ -107,6 +138,9 @@ static inline bool can_dropped_invalid_skb(struct net_device *dev, } else goto inval_skb; + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + return false; inval_skb: From dfeb44f281b14c56a99203b119e4c6f4386390d6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Jan 2020 17:03:56 +0100 Subject: [PATCH 2238/3715] gpiolib: acpi: Turn dmi_system_id table into a generic quirk table commit 1ad1b54099c231aed8f6f257065c1b322583f264 upstream. Turn the existing run_edge_events_on_boot_blacklist dmi_system_id table into a generic quirk table, storing the quirks in the driver_data ptr. This is a preparation patch for adding other types of (DMI based) quirks. Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200105160357.97154-2-hdegoede@redhat.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 3aa7fe6baf2a..e2f933682182 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -24,6 +24,8 @@ #include "gpiolib.h" +#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l + static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, @@ -1312,7 +1314,7 @@ static int acpi_gpio_handle_deferred_request_irqs(void) /* We must use _sync so that this runs after the first deferred_probe run */ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); -static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { +static const struct dmi_system_id gpiolib_acpi_quirks[] = { { /* * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for @@ -1322,7 +1324,8 @@ static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), - } + }, + .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, }, { /* @@ -1334,15 +1337,23 @@ static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), - } + }, + .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, }, {} /* Terminating entry */ }; static int acpi_gpio_setup_params(void) { + const struct dmi_system_id *id; + long quirks = 0; + + id = dmi_first_match(gpiolib_acpi_quirks); + if (id) + quirks = (long)id->driver_data; + if (run_edge_events_on_boot < 0) { - if (dmi_check_system(run_edge_events_on_boot_blacklist)) + if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; From fbfb42b7268ec0459785d335a6fefee24c8a94b6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 5 Jan 2020 17:03:57 +0100 Subject: [PATCH 2239/3715] gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism commit aa23ca3d98f756d5b1e503fb140665fb24a41a38 upstream. On some laptops enabling wakeup on the GPIO interrupts used for ACPI _AEI event handling causes spurious wakeups. This commit adds a new honor_wakeup option, defaulting to true (our current behavior), which can be used to disable wakeup on troublesome hardware to avoid these spurious wakeups. This is a workaround for an architectural problem with s2idle under Linux where we do not have any mechanism to immediately go back to sleep after wakeup events, other then for embedded-controller events using the standard ACPI EC interface, for details see: https://lore.kernel.org/linux-acpi/61450f9b-cbc6-0c09-8b3a-aff6bf9a0b3c@redhat.com/ One series of laptops which is not able to suspend without this workaround is the HP x2 10 Cherry Trail models, this commit adds a DMI based quirk which makes sets honor_wakeup to false on these models. Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200105160357.97154-3-hdegoede@redhat.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index e2f933682182..c7b9125c8ec2 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -25,12 +25,18 @@ #include "gpiolib.h" #define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l +#define QUIRK_NO_WAKEUP 0x02l static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); +static int honor_wakeup = -1; +module_param(honor_wakeup, int, 0444); +MODULE_PARM_DESC(honor_wakeup, + "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto"); + /** * struct acpi_gpio_event - ACPI GPIO event handler data * @@ -341,7 +347,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, event->handle = evt_handle; event->handler = handler; event->irq = irq; - event->irq_is_wake = agpio->wake_capable == ACPI_WAKE_CAPABLE; + event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE; event->pin = pin; event->desc = desc; @@ -1340,6 +1346,23 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { }, .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, }, + { + /* + * Various HP X2 10 Cherry Trail models use an external + * embedded-controller connected via I2C + an ACPI GPIO + * event handler. The embedded controller generates various + * spurious wakeup events when suspended. So disable wakeup + * for its handler (it uses the only ACPI GPIO event handler). + * This breaks wakeup when opening the lid, the user needs + * to press the power-button to wakeup the system. The + * alternative is suspend simply not working, which is worse. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), + }, + .driver_data = (void *)QUIRK_NO_WAKEUP, + }, {} /* Terminating entry */ }; @@ -1359,6 +1382,13 @@ static int acpi_gpio_setup_params(void) run_edge_events_on_boot = 1; } + if (honor_wakeup < 0) { + if (quirks & QUIRK_NO_WAKEUP) + honor_wakeup = 0; + else + honor_wakeup = 1; + } + return 0; } From e77914f232115b4967491b5fb48a0dc8330e2d4a Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Fri, 20 Dec 2019 21:15:59 +0000 Subject: [PATCH 2240/3715] staging: vt6656: set usb_set_intfdata on driver fail. commit c0bcf9f3f5b661d4ace2a64a79ef661edd2a4dc8 upstream. intfdata will contain stale pointer when the device is detached after failed initialization when referenced in vt6656_disconnect Provide driver access to it here and NULL it. Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/6de448d7-d833-ef2e-dd7b-3ef9992fee0e@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/device.h | 1 + drivers/staging/vt6656/main_usb.c | 1 + drivers/staging/vt6656/wcmd.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/staging/vt6656/device.h b/drivers/staging/vt6656/device.h index 74715c854856..705fffa59da9 100644 --- a/drivers/staging/vt6656/device.h +++ b/drivers/staging/vt6656/device.h @@ -269,6 +269,7 @@ struct vnt_private { u8 mac_hw; /* netdev */ struct usb_device *usb; + struct usb_interface *intf; u64 tsf_time; u8 rx_rate; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index cc6d8778fe5b..645ea16b53d5 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -954,6 +954,7 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) priv = hw->priv; priv->hw = hw; priv->usb = udev; + priv->intf = intf; vnt_set_options(priv); diff --git a/drivers/staging/vt6656/wcmd.c b/drivers/staging/vt6656/wcmd.c index b2fc17f1381b..3f6ccdeb6dec 100644 --- a/drivers/staging/vt6656/wcmd.c +++ b/drivers/staging/vt6656/wcmd.c @@ -109,6 +109,7 @@ void vnt_run_command(struct work_struct *work) if (vnt_init(priv)) { /* If fail all ends TODO retry */ dev_err(&priv->usb->dev, "failed to start\n"); + usb_set_intfdata(priv->intf, NULL); ieee80211_free_hw(priv->hw); return; } From 3a56fda03237994891658d420c35b07d053d3a3d Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 19 Dec 2019 11:07:07 +0100 Subject: [PATCH 2241/3715] USB: serial: option: add ZLP support for 0x1bc7/0x9010 commit 2438c3a19dec5e98905fd3ffcc2f24716aceda6b upstream. Telit FN980 flashing device 0x1bc7/0x9010 requires zero packet to be sent if out data size is is equal to the endpoint max size. Signed-off-by: Daniele Palmas [ johan: switch operands in conditional ] Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ drivers/usb/serial/usb-wwan.h | 1 + drivers/usb/serial/usb_wwan.c | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index dc9a1139e7e1..e69e31539914 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -570,6 +570,9 @@ static void option_instat_callback(struct urb *urb); /* Interface must have two endpoints */ #define NUMEP2 BIT(16) +/* Device needs ZLP */ +#define ZLP BIT(17) + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -1201,6 +1204,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ + .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -2109,6 +2114,9 @@ static int option_attach(struct usb_serial *serial) if (!(device_flags & NCTRL(iface_desc->bInterfaceNumber))) data->use_send_setup = 1; + if (device_flags & ZLP) + data->use_zlp = 1; + spin_lock_init(&data->susp_lock); usb_set_serial_data(serial, data); diff --git a/drivers/usb/serial/usb-wwan.h b/drivers/usb/serial/usb-wwan.h index d28dab4b9eff..9879773fb39e 100644 --- a/drivers/usb/serial/usb-wwan.h +++ b/drivers/usb/serial/usb-wwan.h @@ -36,6 +36,7 @@ struct usb_wwan_intf_private { spinlock_t susp_lock; unsigned int suspended:1; unsigned int use_send_setup:1; + unsigned int use_zlp:1; int in_flight; unsigned int open_ports; void *private; diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index 59bfcb3da116..95e9576c2fe6 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -492,6 +492,7 @@ static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, void (*callback) (struct urb *)) { struct usb_serial *serial = port->serial; + struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); /* No ISO */ @@ -502,6 +503,9 @@ static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, usb_sndbulkpipe(serial->dev, endpoint) | dir, buf, len, callback, ctx); + if (intfdata->use_zlp && dir == USB_DIR_OUT) + urb->transfer_flags |= URB_ZERO_PACKET; + return urb; } From b095f9e2e8ea53c4830ba81183a3ce8721aea0ce Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 7 Jan 2020 09:26:24 -0600 Subject: [PATCH 2242/3715] usb: musb: fix idling for suspend after disconnect interrupt commit 5fbf7a2534703fd71159d3d71504b0ad01b43394 upstream. When disconnected as USB B-device, suspend interrupt should come before diconnect interrupt, because the DP/DM pins are shorter than the VBUS/GND pins on the USB connectors. But we sometimes get a suspend interrupt after disconnect interrupt. In that case we have devctl set to 99 with VBUS still valid and musb_pm_runtime_check_session() wrongly thinks we have an active session. We have no other interrupts after disconnect coming in this case at least with the omap2430 glue. Let's fix the issue by checking the interrupt status again with delayed work for the devctl 99 case. In the suspend after disconnect case the devctl session bit has cleared by then and musb can idle. For a typical USB B-device connect case we just continue with normal interrupts. Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20200107152625.857-2-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index ff17e94ef465..cc8cdeb4b7fe 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1838,6 +1838,9 @@ static const struct attribute_group musb_attr_group = { #define MUSB_QUIRK_B_INVALID_VBUS_91 (MUSB_DEVCTL_BDEVICE | \ (2 << MUSB_DEVCTL_VBUS_SHIFT) | \ MUSB_DEVCTL_SESSION) +#define MUSB_QUIRK_B_DISCONNECT_99 (MUSB_DEVCTL_BDEVICE | \ + (3 << MUSB_DEVCTL_VBUS_SHIFT) | \ + MUSB_DEVCTL_SESSION) #define MUSB_QUIRK_A_DISCONNECT_19 ((3 << MUSB_DEVCTL_VBUS_SHIFT) | \ MUSB_DEVCTL_SESSION) @@ -1860,6 +1863,11 @@ static void musb_pm_runtime_check_session(struct musb *musb) s = MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV | MUSB_DEVCTL_HR; switch (devctl & ~s) { + case MUSB_QUIRK_B_DISCONNECT_99: + musb_dbg(musb, "Poll devctl in case of suspend after disconnect\n"); + schedule_delayed_work(&musb->irq_work, + msecs_to_jiffies(1000)); + break; case MUSB_QUIRK_B_INVALID_VBUS_91: if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, From b40e1f6761536f54e83e1ee99148ad354c43daa2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Jan 2020 09:26:25 -0600 Subject: [PATCH 2243/3715] usb: musb: Disable pullup at init commit 96a0c12843109e5c4d5eb1e09d915fdd0ce31d25 upstream. The pullup may be already enabled before the driver is initialized. This happens for instance on JZ4740. It has to be disabled at init time, as we cannot guarantee that a gadget driver will be bound to the UDC. Signed-off-by: Paul Cercueil Suggested-by: Bin Liu Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20200107152625.857-3-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index cc8cdeb4b7fe..dca39c9a13b0 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2328,6 +2328,9 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_disable_interrupts(musb); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + /* MUSB_POWER_SOFTCONN might be already set, JZ4740 does this. */ + musb_writeb(musb->mregs, MUSB_POWER, 0); + /* Init IRQ workqueue before request_irq */ INIT_DELAYED_WORK(&musb->irq_work, musb_irq_work); INIT_DELAYED_WORK(&musb->deassert_reset_work, musb_deassert_reset); From ba21819f1cdc3914bd76ff577d499424e9244c7f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 16 Dec 2019 10:18:43 -0600 Subject: [PATCH 2244/3715] usb: musb: dma: Correct parameter passed to IRQ handler commit c80d0f4426c7fdc7efd6ae8d8b021dcfc89b4254 upstream. The IRQ handler was passed a pointer to a struct dma_controller, but the argument was then casted to a pointer to a struct musb_dma_controller. Fixes: 427c4f333474 ("usb: struct device - replace bus_id with dev_name(), dev_set_name()") Signed-off-by: Paul Cercueil Tested-by: Artur Rojek Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20191216161844.772-2-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musbhsdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index 512108e22d2b..1dc35ab31275 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -399,7 +399,7 @@ struct dma_controller *musbhs_dma_controller_create(struct musb *musb, controller->controller.channel_abort = dma_channel_abort; if (request_irq(irq, dma_controller_irq, 0, - dev_name(musb->controller), &controller->controller)) { + dev_name(musb->controller), controller)) { dev_err(dev, "request_irq %d failed!\n", irq); musb_dma_controller_destroy(&controller->controller); From 912cb3eac58deadf051138c2021cbb3b0acb14b4 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 27 Dec 2019 17:00:54 +0000 Subject: [PATCH 2245/3715] staging: comedi: adv_pci1710: fix AI channels 16-31 for PCI-1713 commit a9d3a9cedc1330c720e0ddde1978a8e7771da5ab upstream. The Advantech PCI-1713 has 32 analog input channels, but an incorrect bit-mask in the definition of the `PCI171X_MUX_CHANH(x)` and PCI171X_MUX_CHANL(x)` macros is causing channels 16 to 31 to be aliases of channels 0 to 15. Change the bit-mask value from 0xf to 0xff to fix it. Note that the channel numbers will have been range checked already, so the bit-mask isn't really needed. Fixes: 92c65e5553ed ("staging: comedi: adv_pci1710: define the mux control register bits") Reported-by: Dmytro Fil Cc: # v4.5+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20191227170054.32051-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adv_pci1710.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index 2c1b6de30da8..385e14269870 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -45,8 +45,8 @@ #define PCI171X_RANGE_UNI BIT(4) #define PCI171X_RANGE_GAIN(x) (((x) & 0x7) << 0) #define PCI171X_MUX_REG 0x04 /* W: A/D multiplexor control */ -#define PCI171X_MUX_CHANH(x) (((x) & 0xf) << 8) -#define PCI171X_MUX_CHANL(x) (((x) & 0xf) << 0) +#define PCI171X_MUX_CHANH(x) (((x) & 0xff) << 8) +#define PCI171X_MUX_CHANL(x) (((x) & 0xff) << 0) #define PCI171X_MUX_CHAN(x) (PCI171X_MUX_CHANH(x) | PCI171X_MUX_CHANL(x)) #define PCI171X_STATUS_REG 0x06 /* R: status register */ #define PCI171X_STATUS_IRQ BIT(11) /* 1=IRQ occurred */ From cb0a3edf8d00740303e5b42e9c0e72d924fc23d2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 7 Dec 2019 13:05:18 -0800 Subject: [PATCH 2246/3715] HID: hid-input: clear unmapped usages commit 4f3882177240a1f55e45a3d241d3121341bead78 upstream. We should not be leaving half-mapped usages with potentially invalid keycodes, as that may confuse hidinput_find_key() when the key is located by index, which may end up feeding way too large keycode into the VT keyboard handler and cause OOB write there: BUG: KASAN: global-out-of-bounds in clear_bit include/asm-generic/bitops-instrumented.h:56 [inline] BUG: KASAN: global-out-of-bounds in kbd_keycode drivers/tty/vt/keyboard.c:1411 [inline] BUG: KASAN: global-out-of-bounds in kbd_event+0xe6b/0x3790 drivers/tty/vt/keyboard.c:1495 Write of size 8 at addr ffffffff89a1b2d8 by task syz-executor108/1722 ... kbd_keycode drivers/tty/vt/keyboard.c:1411 [inline] kbd_event+0xe6b/0x3790 drivers/tty/vt/keyboard.c:1495 input_to_handler+0x3b6/0x4c0 drivers/input/input.c:118 input_pass_values.part.0+0x2e3/0x720 drivers/input/input.c:145 input_pass_values drivers/input/input.c:949 [inline] input_set_keycode+0x290/0x320 drivers/input/input.c:954 evdev_handle_set_keycode_v2+0xc4/0x120 drivers/input/evdev.c:882 evdev_do_ioctl drivers/input/evdev.c:1150 [inline] Cc: stable@vger.kernel.org Reported-by: syzbot+19340dff067c2d3835c0@syzkaller.appspotmail.com Signed-off-by: Dmitry Torokhov Tested-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9d24fb0715ba..14e4003fde4d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1116,9 +1116,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel } mapped: - if (device->driver->input_mapped && device->driver->input_mapped(device, - hidinput, field, usage, &bit, &max) < 0) - goto ignore; + if (device->driver->input_mapped && + device->driver->input_mapped(device, hidinput, field, usage, + &bit, &max) < 0) { + /* + * The driver indicated that no further generic handling + * of the usage is desired. + */ + return; + } set_bit(usage->type, input->evbit); @@ -1176,9 +1182,11 @@ mapped: set_bit(MSC_SCAN, input->mscbit); } -ignore: return; +ignore: + usage->type = 0; + usage->code = 0; } void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) From af62c38b0f86539504dc5c0e5dcfc7613b1150a5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Dec 2019 14:56:16 -0800 Subject: [PATCH 2247/3715] Input: add safety guards to input_set_keycode() commit cb222aed03d798fc074be55e59d9a112338ee784 upstream. If we happen to have a garbage in input device's keycode table with values too big we'll end up doing clear_bit() with offset way outside of our bitmaps, damaging other objects within an input device or even outside of it. Let's add sanity checks to the returned old keycodes. Reported-by: syzbot+c769968809f9359b07aa@syzkaller.appspotmail.com Reported-by: syzbot+76f3a30e88d256644c78@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20191207212757.GA245964@dtor-ws Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/input.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 50d425fe6706..cadb368be8ef 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -858,16 +858,18 @@ static int input_default_setkeycode(struct input_dev *dev, } } - __clear_bit(*old_keycode, dev->keybit); - __set_bit(ke->keycode, dev->keybit); - - for (i = 0; i < dev->keycodemax; i++) { - if (input_fetch_keycode(dev, i) == *old_keycode) { - __set_bit(*old_keycode, dev->keybit); - break; /* Setting the bit twice is useless, so break */ + if (*old_keycode <= KEY_MAX) { + __clear_bit(*old_keycode, dev->keybit); + for (i = 0; i < dev->keycodemax; i++) { + if (input_fetch_keycode(dev, i) == *old_keycode) { + __set_bit(*old_keycode, dev->keybit); + /* Setting the bit twice is useless, so break */ + break; + } } } + __set_bit(ke->keycode, dev->keybit); return 0; } @@ -923,9 +925,13 @@ int input_set_keycode(struct input_dev *dev, * Simulate keyup event if keycode is not present * in the keymap anymore */ - if (test_bit(EV_KEY, dev->evbit) && - !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && - __test_and_clear_bit(old_keycode, dev->key)) { + if (old_keycode > KEY_MAX) { + dev_warn(dev->dev.parent ?: &dev->dev, + "%s: got too big old keycode %#x\n", + __func__, old_keycode); + } else if (test_bit(EV_KEY, dev->evbit) && + !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && + __test_and_clear_bit(old_keycode, dev->key)) { struct input_value vals[] = { { EV_KEY, old_keycode, 0 }, input_value_sync From 733463fdf7b0080e8092238a2c1f817f6aa81e4c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 30 Dec 2019 14:27:34 +0100 Subject: [PATCH 2248/3715] drm/fb-helper: Round up bits_per_pixel if possible commit f30e27779d3031a092c2a177b7fb76adccc45241 upstream. When userspace requests a video mode parameter value that is not supported, frame buffer device drivers should round it up to a supported value, if possible, instead of just rejecting it. This allows applications to quickly scan for supported video modes. Currently this rule is not followed for the number of bits per pixel, causing e.g. "fbset -depth N" to fail, if N is smaller than the current number of bits per pixel. Fix this by returning an error only if bits per pixel is too large, and setting it to the current value otherwise. See also Documentation/fb/framebuffer.rst, Section 2 (Programmer's View of /dev/fb*"). Fixes: 865afb11949e5bf4 ("drm/fb-helper: reject any changes to the fbdev") Cc: stable@vger.kernel.org Signed-off-by: Geert Uytterhoeven Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191230132734.4538-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index f1259a0c2883..eb6bf881c465 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1590,7 +1590,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. */ - if (var->bits_per_pixel != fb->format->cpp[0] * 8 || + if (var->bits_per_pixel > fb->format->cpp[0] * 8 || var->xres > fb->width || var->yres > fb->height || var->xres_virtual > fb->width || var->yres_virtual > fb->height) { DRM_DEBUG("fb requested width/height/bpp can't fit in current fb " @@ -1615,6 +1615,11 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, drm_fb_helper_fill_pixel_fmt(var, fb->format->depth); } + /* + * Likewise, bits_per_pixel should be rounded up to a supported value. + */ + var->bits_per_pixel = fb->format->cpp[0] * 8; + /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. From 0c703639c11a17db7e479e71fea8778d098c95c2 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 3 Jan 2020 13:50:01 +0800 Subject: [PATCH 2249/3715] drm/dp_mst: correct the shifting in DP_REMOTE_I2C_READ commit c4e4fccc5d52d881afaac11d3353265ef4eccb8b upstream. [Why] According to DP spec, it should shift left 4 digits for NO_STOP_BIT in REMOTE_I2C_READ message. Not 5 digits. In current code, NO_STOP_BIT is always set to zero which means I2C master is always generating a I2C stop at the end of each I2C write transaction while handling REMOTE_I2C_READ sideband message. This issue might have the generated I2C signal not meeting the requirement. Take random read in I2C for instance, I2C master should generate a repeat start to start to read data after writing the read address. This issue will cause the I2C master to generate a stop-start rather than a re-start which is not expected in I2C random read. [How] Correct the shifting value of NO_STOP_BIT for DP_REMOTE_I2C_READ case in drm_dp_encode_sideband_req(). Changes since v1:(https://patchwork.kernel.org/patch/11312667/) * Add more descriptions in commit and cc to stable Fixes: ad7f8a1f9ced ("drm/helper: add Displayport multi-stream helper (v0.6)") Reviewed-by: Harry Wentland Signed-off-by: Wayne Lin Cc: stable@vger.kernel.org Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20200103055001.10287-1-Wayne.Lin@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ef86721c06f3..c8c83f84aced 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -274,7 +274,7 @@ static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req, memcpy(&buf[idx], req->u.i2c_read.transactions[i].bytes, req->u.i2c_read.transactions[i].num_bytes); idx += req->u.i2c_read.transactions[i].num_bytes; - buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 5; + buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 4; buf[idx] |= (req->u.i2c_read.transactions[i].i2c_transaction_delay & 0xf); idx++; } From a188bd5c9eb5294daff27cc2d8247074fee2eae2 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Sat, 28 Dec 2019 15:37:25 +0100 Subject: [PATCH 2250/3715] staging: rtl8188eu: Add device code for TP-Link TL-WN727N v5.21 commit 58dcc5bf4030cab548d5c98cd4cd3632a5444d5a upstream. This device was added to the stand-alone driver on github. Add it to the staging driver as well. Link: https://github.com/lwfinger/rtl8188eu/commit/b9b537aa25a8 Signed-off-by: Michael Straube Cc: stable Link: https://lore.kernel.org/r/20191228143725.24455-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 3733b73863b6..536453358568 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -45,6 +45,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ + {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ From 3d7cbd45ae237bcc958b79cf6bb70d0e1052ca41 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 12 Dec 2019 13:16:02 +0000 Subject: [PATCH 2251/3715] tty: link tty and port before configuring it as console commit fb2b90014d782d80d7ebf663e50f96d8c507a73c upstream. There seems to be a race condition in tty drivers and I could see on many boot cycles a NULL pointer dereference as tty_init_dev() tries to do 'tty->port->itty = tty' even though tty->port is NULL. 'tty->port' will be set by the driver and if the driver has not yet done it before we open the tty device we can get to this situation. By adding some extra debug prints, I noticed that: 6.650130: uart_add_one_port 6.663849: register_console 6.664846: tty_open 6.674391: tty_init_dev 6.675456: tty_port_link_device uart_add_one_port() registers the console, as soon as it registers, the userspace tries to use it and that leads to tty_open() but uart_add_one_port() has not yet done tty_port_link_device() and so tty->port is not yet configured when control reaches tty_init_dev(). Further look into the code and tty_port_link_device() is done by uart_add_one_port(). After registering the console uart_add_one_port() will call tty_port_register_device_attr_serdev() and tty_port_link_device() is called from this. Call add tty_port_link_device() before uart_configure_port() is done and add a check in tty_port_link_device() so that it only links the port if it has not been done yet. Suggested-by: Jiri Slaby Signed-off-by: Sudip Mukherjee Cc: stable Link: https://lore.kernel.org/r/20191212131602.29504-1-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 1 + drivers/tty/tty_port.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 38bb8f85e88d..0ff8de7725cf 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2810,6 +2810,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); + tty_port_link_device(port, drv->tty_driver, uport->line); uart_configure_port(drv, state, uport); port->console = uart_console(uport); diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index c93a33701d32..d5b598137211 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -88,7 +88,8 @@ void tty_port_link_device(struct tty_port *port, { if (WARN_ON(index >= driver->num)) return; - driver->ports[index] = port; + if (!driver->ports[index]) + driver->ports[index] = port; } EXPORT_SYMBOL_GPL(tty_port_link_device); From 292c7f12b2b986f84a612d9f623334e48aacce8f Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Fri, 27 Dec 2019 17:44:34 +0000 Subject: [PATCH 2252/3715] tty: always relink the port commit 273f632912f1b24b642ba5b7eb5022e43a72f3b5 upstream. If the serial device is disconnected and reconnected, it re-enumerates properly but does not link it. fwiw, linking means just saving the port index, so allow it always as there is no harm in saving the same value again even if it tries to relink with the same port. Fixes: fb2b90014d78 ("tty: link tty and port before configuring it as console") Reported-by: Kenneth R. Crudup Signed-off-by: Sudip Mukherjee Cc: stable Link: https://lore.kernel.org/r/20191227174434.12057-1-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index d5b598137211..c93a33701d32 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -88,8 +88,7 @@ void tty_port_link_device(struct tty_port *port, { if (WARN_ON(index >= driver->num)) return; - if (!driver->ports[index]) - driver->ports[index] = port; + driver->ports[index] = port; } EXPORT_SYMBOL_GPL(tty_port_link_device); From c2544fb30080aecc3fff99f2e97999ce8e625f45 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Thu, 21 Nov 2019 21:34:38 +0530 Subject: [PATCH 2253/3715] mwifiex: fix possible heap overflow in mwifiex_process_country_ie() commit 3d94a4a8373bf5f45cf5f939e88b8354dbf2311b upstream. mwifiex_process_country_ie() function parse elements of bss descriptor in beacon packet. When processing WLAN_EID_COUNTRY element, there is no upper limit check for country_ie_len before calling memcpy. The destination buffer domain_info->triplet is an array of length MWIFIEX_MAX_TRIPLET_802_11D(83). The remote attacker can build a fake AP with the same ssid as real AP, and send malicous beacon packet with long WLAN_EID_COUNTRY elemen (country_ie_len > 83). Attacker can force STA connect to fake AP on a different channel. When the victim STA connects to fake AP, will trigger the heap buffer overflow. Fix this by checking for length and if found invalid, don not connect to the AP. This fix addresses CVE-2019-14895. Reported-by: huangwen Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index a8043d76152a..f88a953b3cd5 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -271,6 +271,14 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, "11D: skip setting domain info in FW\n"); return 0; } + + if (country_ie_len > + (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { + mwifiex_dbg(priv->adapter, ERROR, + "11D: country_ie_len overflow!, deauth AP\n"); + return -EINVAL; + } + memcpy(priv->adapter->country_code, &country_ie[2], 2); domain_info->country_code[0] = country_ie[2]; @@ -314,8 +322,9 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss, priv->scan_block = false; if (bss) { - if (adapter->region_code == 0x00) - mwifiex_process_country_ie(priv, bss); + if (adapter->region_code == 0x00 && + mwifiex_process_country_ie(priv, bss)) + return -EINVAL; /* Allocate and fill new bss descriptor */ bss_desc = kzalloc(sizeof(struct mwifiex_bssdescriptor), From 6ddbe82681d911534f460e6afd297fcf7f388049 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 4 Oct 2019 15:08:52 -0500 Subject: [PATCH 2254/3715] mwifiex: pcie: Fix memory leak in mwifiex_pcie_alloc_cmdrsp_buf commit db8fd2cde93227e566a412cf53173ffa227998bc upstream. In mwifiex_pcie_alloc_cmdrsp_buf, a new skb is allocated which should be released if mwifiex_map_pci_memory() fails. The release is added. Fixes: fc3314609047 ("mwifiex: use pci_alloc/free_consistent APIs for PCIe") Signed-off-by: Navid Emamdoost Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/pcie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 9d0d790a1319..8ee9609ef974 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1022,8 +1022,10 @@ static int mwifiex_pcie_alloc_cmdrsp_buf(struct mwifiex_adapter *adapter) } skb_put(skb, MWIFIEX_UPLD_SIZE); if (mwifiex_map_pci_memory(adapter, skb, MWIFIEX_UPLD_SIZE, - PCI_DMA_FROMDEVICE)) + PCI_DMA_FROMDEVICE)) { + kfree_skb(skb); return -1; + } card->cmdrsp_buf = skb; From f4e8c78fad1294c785de5e92562862dbef1e9c1e Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 10 Sep 2019 18:44:15 -0500 Subject: [PATCH 2255/3715] scsi: bfa: release allocated memory in case of error commit 0e62395da2bd5166d7c9e14cbc7503b256a34cb0 upstream. In bfad_im_get_stats if bfa_port_get_stats fails, allocated memory needs to be released. Link: https://lore.kernel.org/r/20190910234417.22151-1-navid.emamdoost@gmail.com Signed-off-by: Navid Emamdoost Signed-off-by: Martin K. Petersen Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfad_attr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index d0a504af5b4f..0a70d54a4df6 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -283,8 +283,10 @@ bfad_im_get_stats(struct Scsi_Host *shost) rc = bfa_port_get_stats(BFA_FCPORT(&bfad->bfa), fcstats, bfad_hcb_comp, &fcomp); spin_unlock_irqrestore(&bfad->bfad_lock, flags); - if (rc != BFA_STATUS_OK) + if (rc != BFA_STATUS_OK) { + kfree(fcstats); return NULL; + } wait_for_completion(&fcomp.comp); From 32079b0c59f4620fdf7a5576af7502b0d05fcb01 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 22:00:41 -0500 Subject: [PATCH 2256/3715] rtl8xxxu: prevent leaking urb commit a2cdd07488e666aa93a49a3fc9c9b1299e27ef3c upstream. In rtl8xxxu_submit_int_urb if usb_submit_urb fails the allocated urb should be released. Signed-off-by: Navid Emamdoost Reviewed-by: Chris Chiu Signed-off-by: Kalle Valo Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index b58bf8e2cad2..73fc5952fd37 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5453,6 +5453,7 @@ static int rtl8xxxu_submit_int_urb(struct ieee80211_hw *hw) ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); + usb_free_urb(urb); goto error; } From 4af2276845448609264360e95973246f222a7d86 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 20:36:26 -0500 Subject: [PATCH 2257/3715] ath10k: fix memory leak commit b8d17e7d93d2beb89e4f34c59996376b8b544792 upstream. In ath10k_usb_hif_tx_sg the allocated urb should be released if usb_submit_urb fails. Signed-off-by: Navid Emamdoost Signed-off-by: Kalle Valo Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index f9c79e21ab22..c64a03f164c0 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -454,6 +454,7 @@ static int ath10k_usb_hif_tx_sg(struct ath10k *ar, u8 pipe_id, ath10k_dbg(ar, ATH10K_DBG_USB_BULK, "usb bulk transmit failed: %d\n", ret); usb_unanchor_urb(urb); + usb_free_urb(urb); ret = -EINVAL; goto err_free_urb_to_pipe; } From 66552949c83a903779830561cb9c23eabd9866ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Mar 2018 11:51:12 +0100 Subject: [PATCH 2258/3715] arm64: cpufeature: Avoid warnings due to unused symbols commit 12eb369125abe92bfc55e9ce198200f5807b63ff upstream. An allnoconfig build complains about unused symbols due to functions that are called via conditional cpufeature and cpu_errata table entries. Annotate these as __maybe_unused if they are likely to be generic, or predicate their compilation on the same option as the table entry if they are specific to a given alternative. Signed-off-by: Will Deacon [Just a portion of the original patch] Signed-off-by: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 15ce2c8b9ee2..60066315d669 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -799,11 +799,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return is_kernel_in_hyp_mode(); -} - static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -937,6 +932,12 @@ static int __init parse_kpti(char *str) } early_param("kpti", parse_kpti); +#ifdef CONFIG_ARM64_VHE +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_kernel_in_hyp_mode(); +} + static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* @@ -950,6 +951,7 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } +#endif #ifdef CONFIG_ARM64_SSBD static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) From ac3a29ac803b5ce052f201ec7ce497bc3f30bd37 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Dec 2019 14:50:21 -0800 Subject: [PATCH 2259/3715] HID: hiddev: fix mess in hiddev_open() commit 18a1b06e5b91d47dc86c0a66a762646ea7c5d141 upstream. The open method of hiddev handler fails to bring the device out of autosuspend state as was promised in 0361a28d3f9a, as it actually has 2 blocks that try to start the transport (call hid_hw_open()) with both being guarded by the "open" counter, so the 2nd block is never executed as the first block increments the counter so it is never at 0 when we check it for the second block. Additionally hiddev_open() was leaving counter incremented on errors, causing the device to never be reopened properly if there was ever an error. Let's fix all of this by factoring out code that creates client structure and powers up the device into a separate function that is being called from usbhid_open() with the "existancelock" being held. Fixes: 0361a28d3f9a ("HID: autosuspend support for USB HID") Signed-off-by: Dmitry Torokhov Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 97 ++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 55 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index ce342fd0457e..bccd97cdc53f 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -254,12 +254,51 @@ static int hiddev_release(struct inode * inode, struct file * file) return 0; } +static int __hiddev_open(struct hiddev *hiddev, struct file *file) +{ + struct hiddev_list *list; + int error; + + lockdep_assert_held(&hiddev->existancelock); + + list = vzalloc(sizeof(*list)); + if (!list) + return -ENOMEM; + + mutex_init(&list->thread_lock); + list->hiddev = hiddev; + + if (!hiddev->open++) { + error = hid_hw_power(hiddev->hid, PM_HINT_FULLON); + if (error < 0) + goto err_drop_count; + + error = hid_hw_open(hiddev->hid); + if (error < 0) + goto err_normal_power; + } + + spin_lock_irq(&hiddev->list_lock); + list_add_tail(&list->node, &hiddev->list); + spin_unlock_irq(&hiddev->list_lock); + + file->private_data = list; + + return 0; + +err_normal_power: + hid_hw_power(hiddev->hid, PM_HINT_NORMAL); +err_drop_count: + hiddev->open--; + vfree(list); + return error; +} + /* * open file op */ static int hiddev_open(struct inode *inode, struct file *file) { - struct hiddev_list *list; struct usb_interface *intf; struct hid_device *hid; struct hiddev *hiddev; @@ -268,66 +307,14 @@ static int hiddev_open(struct inode *inode, struct file *file) intf = usbhid_find_interface(iminor(inode)); if (!intf) return -ENODEV; + hid = usb_get_intfdata(intf); hiddev = hid->hiddev; - if (!(list = vzalloc(sizeof(struct hiddev_list)))) - return -ENOMEM; - mutex_init(&list->thread_lock); - list->hiddev = hiddev; - file->private_data = list; - - /* - * no need for locking because the USB major number - * is shared which usbcore guards against disconnect - */ - if (list->hiddev->exist) { - if (!list->hiddev->open++) { - res = hid_hw_open(hiddev->hid); - if (res < 0) - goto bail; - } - } else { - res = -ENODEV; - goto bail; - } - - spin_lock_irq(&list->hiddev->list_lock); - list_add_tail(&list->node, &hiddev->list); - spin_unlock_irq(&list->hiddev->list_lock); - mutex_lock(&hiddev->existancelock); - /* - * recheck exist with existance lock held to - * avoid opening a disconnected device - */ - if (!list->hiddev->exist) { - res = -ENODEV; - goto bail_unlock; - } - if (!list->hiddev->open++) - if (list->hiddev->exist) { - struct hid_device *hid = hiddev->hid; - res = hid_hw_power(hid, PM_HINT_FULLON); - if (res < 0) - goto bail_unlock; - res = hid_hw_open(hid); - if (res < 0) - goto bail_normal_power; - } - mutex_unlock(&hiddev->existancelock); - return 0; -bail_normal_power: - hid_hw_power(hid, PM_HINT_NORMAL); -bail_unlock: + res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV; mutex_unlock(&hiddev->existancelock); - spin_lock_irq(&list->hiddev->list_lock); - list_del(&list->node); - spin_unlock_irq(&list->hiddev->list_lock); -bail: - file->private_data = NULL; - vfree(list); return res; } From c51a3c85eb8c9a499e7efe51157ad21e1d83034d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 6 Jan 2020 10:43:42 -0500 Subject: [PATCH 2260/3715] USB: Fix: Don't skip endpoint descriptors with maxpacket=0 commit 2548288b4fb059b2da9ceada172ef763077e8a59 upstream. It turns out that even though endpoints with a maxpacket length of 0 aren't useful for data transfer, the descriptors do serve other purposes. In particular, skipping them will also skip over other class-specific descriptors for classes such as UVC. This unexpected side effect has caused some UVC cameras to stop working. In addition, the USB spec requires that when isochronous endpoint descriptors are present in an interface's altsetting 0 (which is true on some devices), the maxpacket size _must_ be set to 0. Warning about such things seems like a bad idea. This patch updates an earlier commit which would log a warning and skip these endpoint descriptors. Now we only log a warning, and we don't even do that for isochronous endpoints in altsetting 0. We don't need to worry about preventing endpoints with maxpacket = 0 from ever being used for data transfers; usb_submit_urb() already checks for this. Reported-and-tested-by: Roger Whittaker Fixes: d482c7bb0541 ("USB: Skip endpoints with 0 maxpacket length") Signed-off-by: Alan Stern CC: Laurent Pinchart Link: https://marc.info/?l=linux-usb&m=157790377329882&w=2 Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2001061040270.1514-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 1f525d5f6d2d..7df7faa3eed5 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -392,12 +392,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, endpoint->desc.wMaxPacketSize = cpu_to_le16(8); } - /* Validate the wMaxPacketSize field */ + /* + * Validate the wMaxPacketSize field. + * Some devices have isochronous endpoints in altsetting 0; + * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 + * (see the end of section 5.6.3), so don't warn about them. + */ maxp = usb_endpoint_maxp(&endpoint->desc); - if (maxp == 0) { - dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n", + if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); - goto skip_to_next_endpoint_or_interface_descriptor; } /* Find the highest legal maxpacket size for this endpoint */ From 1adecb749b826d4956a2691b470d806715619f90 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 20 Dec 2019 16:21:40 +0530 Subject: [PATCH 2261/3715] phy: cpcap-usb: Fix error path when no host driver is loaded commit 4acb0200ab2b07843e3ef5599add3454c7440f03 upstream. If musb_mailbox() returns an error, we must still continue to finish configuring the phy. Otherwise the phy state may end up only half initialized, and this can cause the debug serial console to stop working. And this will happen if the usb driver musb controller is not loaded. Let's fix the issue by adding helper for cpcap_usb_try_musb_mailbox(). Fixes: 6d6ce40f63af ("phy: cpcap-usb: Add CPCAP PMIC USB support") Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/motorola/phy-cpcap-usb.c | 33 +++++++++++++++------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 6601ad0dfb3a..6beaf8e0449c 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -207,6 +207,19 @@ static int cpcap_phy_get_ints_state(struct cpcap_phy_ddata *ddata, static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata); static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata); +static void cpcap_usb_try_musb_mailbox(struct cpcap_phy_ddata *ddata, + enum musb_vbus_id_status status) +{ + int error; + + error = musb_mailbox(status); + if (!error) + return; + + dev_dbg(ddata->dev, "%s: musb_mailbox failed: %i\n", + __func__, error); +} + static void cpcap_usb_detect(struct work_struct *work) { struct cpcap_phy_ddata *ddata; @@ -226,9 +239,7 @@ static void cpcap_usb_detect(struct work_struct *work) if (error) goto out_err; - error = musb_mailbox(MUSB_ID_GROUND); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_ID_GROUND); error = regmap_update_bits(ddata->reg, CPCAP_REG_USBC3, CPCAP_BIT_VBUSSTBY_EN, @@ -255,9 +266,7 @@ static void cpcap_usb_detect(struct work_struct *work) error = cpcap_usb_set_usb_mode(ddata); if (error) goto out_err; - error = musb_mailbox(MUSB_ID_GROUND); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_ID_GROUND); return; } @@ -267,9 +276,7 @@ static void cpcap_usb_detect(struct work_struct *work) error = cpcap_usb_set_usb_mode(ddata); if (error) goto out_err; - error = musb_mailbox(MUSB_VBUS_VALID); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_VALID); return; } @@ -279,9 +286,7 @@ static void cpcap_usb_detect(struct work_struct *work) if (error) goto out_err; - error = musb_mailbox(MUSB_VBUS_OFF); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); dev_dbg(ddata->dev, "set UART mode\n"); @@ -647,9 +652,7 @@ static int cpcap_usb_phy_remove(struct platform_device *pdev) if (error) dev_err(ddata->dev, "could not set UART mode\n"); - error = musb_mailbox(MUSB_VBUS_OFF); - if (error) - dev_err(ddata->dev, "could not set mailbox\n"); + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); usb_remove_phy(&ddata->phy); cancel_delayed_work_sync(&ddata->detect_work); From f71f56ad53546fed4b22b783d7aecdf7dee4c71d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 22 Dec 2019 10:00:19 -0800 Subject: [PATCH 2262/3715] phy: cpcap-usb: Fix flakey host idling and enumerating of devices commit 049226b9fd7442149dcbcf55f15408f5973cceda upstream. We must let the USB host idle things properly before we switch to debug UART mode. Otherwise the USB host may never idle after disconnecting devices, and that causes the next enumeration to be flakey. Cc: Jacopo Mondi Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Scott Cc: NeKit Cc: Pavel Machek Cc: Sebastian Reichel Acked-by: Pavel Machek Fixes: 6d6ce40f63af ("phy: cpcap-usb: Add CPCAP PMIC USB support") Signed-off-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/motorola/phy-cpcap-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 6beaf8e0449c..4ba3634009af 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -281,13 +281,13 @@ static void cpcap_usb_detect(struct work_struct *work) return; } + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); + /* Default to debug UART mode */ error = cpcap_usb_set_uart_mode(ddata); if (error) goto out_err; - cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); - dev_dbg(ddata->dev, "set UART mode\n"); return; From c28aabbd643e2201a09d39e45240b661bfb61b50 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 27 Dec 2019 01:33:10 +0100 Subject: [PATCH 2263/3715] netfilter: arp_tables: init netns pointer in xt_tgchk_param struct commit 1b789577f655060d98d20ed0c6f9fbd469d6ba63 upstream. We get crash when the targets checkentry function tries to make use of the network namespace pointer for arptables. When the net pointer got added back in 2010, only ip/ip6/ebtables were changed to initialize it, so arptables has this set to NULL. This isn't a problem for normal arptables because no existing arptables target has a checkentry function that makes use of par->net. However, direct users of the setsockopt interface can provide any target they want as long as its registered for ARP or UNPSEC protocols. syzkaller managed to send a semi-valid arptables rule for RATEEST target which is enough to trigger NULL deref: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN RIP: xt_rateest_tg_checkentry+0x11d/0xb40 net/netfilter/xt_RATEEST.c:109 [..] xt_check_target+0x283/0x690 net/netfilter/x_tables.c:1019 check_target net/ipv4/netfilter/arp_tables.c:399 [inline] find_check_entry net/ipv4/netfilter/arp_tables.c:422 [inline] translate_table+0x1005/0x1d70 net/ipv4/netfilter/arp_tables.c:572 do_replace net/ipv4/netfilter/arp_tables.c:977 [inline] do_arpt_set_ctl+0x310/0x640 net/ipv4/netfilter/arp_tables.c:1456 Fixes: add67461240c1d ("netfilter: add struct net * to target parameters") Reported-by: syzbot+d7358a458d8a81aee898@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Acked-by: Cong Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 356ae7da4f16..e288489ae3d5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -394,10 +394,11 @@ next: ; return 1; } -static inline int check_target(struct arpt_entry *e, const char *name) +static int check_target(struct arpt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = arpt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -409,8 +410,9 @@ static inline int check_target(struct arpt_entry *e, const char *name) return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } -static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, +static int +find_check_entry(struct arpt_entry *e, struct net *net, const char *name, + unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; @@ -429,7 +431,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; return 0; @@ -522,7 +524,9 @@ static inline void cleanup_entry(struct arpt_entry *e) /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(struct xt_table_info *newinfo, void *entry0, +static int translate_table(struct net *net, + struct xt_table_info *newinfo, + void *entry0, const struct arpt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; @@ -586,7 +590,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, repl->name, repl->size, + ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; @@ -974,7 +978,7 @@ static int do_replace(struct net *net, const void __user *user, goto free_newinfo; } - ret = translate_table(newinfo, loc_cpu_entry, &tmp); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1149,7 +1153,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, } } -static int translate_compat_table(struct xt_table_info **pinfo, +static int translate_compat_table(struct net *net, + struct xt_table_info **pinfo, void **pentry0, const struct compat_arpt_replace *compatr) { @@ -1217,7 +1222,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; - ret = translate_table(newinfo, entry1, &repl); + ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; @@ -1270,7 +1275,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1546,7 +1551,7 @@ int arpt_register_table(struct net *net, loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(newinfo, loc_cpu_entry, repl); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; From bd6d13a15aaab4967bedd46f216dd63c428252aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jan 2020 10:59:38 +0100 Subject: [PATCH 2264/3715] netfilter: ipset: avoid null deref when IPSET_ATTR_LINENO is present commit 22dad713b8a5ff488e07b821195270672f486eb2 upstream. The set uadt functions assume lineno is never NULL, but it is in case of ip_set_utest(). syzkaller managed to generate a netlink message that calls this with LINENO attr present: general protection fault: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:hash_mac4_uadt+0x1bc/0x470 net/netfilter/ipset/ip_set_hash_mac.c:104 Call Trace: ip_set_utest+0x55b/0x890 net/netfilter/ipset/ip_set_core.c:1867 nfnetlink_rcv_msg+0xcf2/0xfb0 net/netfilter/nfnetlink.c:229 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 nfnetlink_rcv+0x1ba/0x460 net/netfilter/nfnetlink.c:563 pass a dummy lineno storage, its easier than patching all set implementations. This seems to be a day-0 bug. Cc: Jozsef Kadlecsik Reported-by: syzbot+34bd2369d38707f3f4a7@syzkaller.appspotmail.com Fixes: a7b4f989a6294 ("netfilter: ipset: IP set core support") Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 94d74ec61f42..c2b21c9c1229 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1639,6 +1639,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; + u32 lineno; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_SETNAME] || @@ -1655,7 +1656,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0); rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) From 39465647eda707db7c7561006da3a8450ca634b9 Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Wed, 8 Jan 2020 12:37:25 -0800 Subject: [PATCH 2265/3715] drm/i915/gen9: Clear residual context state on context switch commit bc8a76a152c5f9ef3b48104154a65a68a8b76946 upstream. Intel ID: PSIRT-TA-201910-001 CVEID: CVE-2019-14615 Intel GPU Hardware prior to Gen11 does not clear EU state during a context switch. This can result in information leakage between contexts. For Gen8 and Gen9, hardware provides a mechanism for fast cleardown of the EU state, by issuing a PIPE_CONTROL with bit 27 set. We can use this in a context batch buffer to explicitly cleardown the state on every context switch. As this workaround is already in place for gen8, we can borrow the code verbatim for Gen9. Signed-off-by: Mika Kuoppala Signed-off-by: Akeem G Abodunrin Cc: Kumar Valsan Prathap Cc: Chris Wilson Cc: Balestrieri Francesco Cc: Bloomfield Jon Cc: Dutt Sudeep Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0483c068d23..baff1f01bfc7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1101,17 +1101,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_NOOP; - /* WaClearSlmSpaceAtContextSwitch:kbl */ - /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - i915_ggtt_offset(engine->scratch) - + 2 * CACHELINE_BYTES); - } + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { From c04fc6fa5c96ec57316527b2228fa31f26494abe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Jan 2020 20:05:49 +0100 Subject: [PATCH 2266/3715] Linux 4.14.165 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f2aa55cea457..166e18aa9ca9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 164 +SUBLEVEL = 165 EXTRAVERSION = NAME = Petit Gorille From 642db7c730a4f8468d5148f500735a0912f6edd6 Mon Sep 17 00:00:00 2001 From: Siarhei Vishniakou Date: Wed, 8 Jan 2020 18:19:25 -0800 Subject: [PATCH 2267/3715] ANDROID: Enable HID_STEAM as y These configs will now be required in aosp/1204022. Change-Id: Ib2961a9abf545f483cd5691f04b93cffd011ec82 Signed-off-by: Siarhei Vishniakou Bug: 136263708 Test: none --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 71fc12c51973..e718b8a1860b 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -368,6 +368,7 @@ CONFIG_HID_SAITEK=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SPEEDLINK=y +CONFIG_HID_STEAM=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_GREENASIA_FF=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 11e1f466ddd8..5314fe11470e 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -389,6 +389,7 @@ CONFIG_HID_SAITEK=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SPEEDLINK=y +CONFIG_HID_STEAM=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_GREENASIA_FF=y From d3b701efacbc0d39d1da593ab3e0ec336e6d3d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 13 Jan 2020 23:52:14 -0800 Subject: [PATCH 2268/3715] cuttlefish - enable CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quota logging is needed to enable data metering. Bug: 147203196 Signed-off-by: Maciej Żenczykowski Change-Id: I7219a3abd922b31249a3bccb94bc0dcff197788e --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index e718b8a1860b..d22e5b86216c 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -150,6 +150,7 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_MATCH_STATISTIC=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 5314fe11470e..9e92a37c20b1 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -159,6 +159,7 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_MATCH_STATISTIC=y From 37d5c0dbbb882c9913fe9ed2ab1a4088e6cff27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 16 Jan 2020 01:52:49 -0800 Subject: [PATCH 2269/3715] cuttlefish: enable CONFIG_DUMMY=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dummy network interface required for upcoming self-tests. Bug: 147203196 Signed-off-by: Maciej Żenczykowski Change-Id: I1989ece6cb95a8fa0c8b03ed756f9962476e0577 --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index d22e5b86216c..c2700608d034 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -230,6 +230,7 @@ CONFIG_DM_VERITY_FEC=y CONFIG_DM_VERITY_AVB=y CONFIG_DM_BOW=y CONFIG_NETDEVICES=y +CONFIG_DUMMY=y CONFIG_NETCONSOLE=y CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 9e92a37c20b1..0cee03ee5f2c 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -246,6 +246,7 @@ CONFIG_DM_VERITY_FEC=y CONFIG_DM_ANDROID_VERITY=y CONFIG_DM_BOW=y CONFIG_NETDEVICES=y +CONFIG_DUMMY=y CONFIG_NETCONSOLE=y CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y From 4f9ec9a986ca8f359d9128caa9d1085bac9d84da Mon Sep 17 00:00:00 2001 From: Fabian Henneke Date: Tue, 9 Jul 2019 13:03:37 +0200 Subject: [PATCH 2270/3715] hidraw: Return EPOLLOUT from hidraw_poll [ Upstream commit 378b80370aa1fe50f9c48a3ac8af3e416e73b89f ] Always return EPOLLOUT from hidraw_poll when a device is connected. This is safe since writes are always possible (but will always block). hidraw does not support non-blocking writes and instead always calls blocking backend functions on write requests. Hence, so far, a call to poll never returned EPOLLOUT, which confuses tools like socat. Signed-off-by: Fabian Henneke In-reply-to: Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hidraw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 5652bd0ffb4d..1ac82e194818 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -260,7 +260,7 @@ static unsigned int hidraw_poll(struct file *file, poll_table *wait) poll_wait(file, &list->hidraw->wait, wait); if (list->head != list->tail) - return POLLIN | POLLRDNORM; + return POLLIN | POLLRDNORM | POLLOUT; if (!list->hidraw->exist) return POLLERR | POLLHUP; return 0; From fa8b1126ca127c50f47553971af7b4febe220ecc Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Dec 2019 03:37:13 +0100 Subject: [PATCH 2271/3715] HID: hidraw: Fix returning EPOLLOUT from hidraw_poll [ Upstream commit 9f3b61dc1dd7b81e99e7ed23776bb64a35f39e1a ] When polling a connected /dev/hidrawX device, it is useful to get the EPOLLOUT when writing is possible. Since writing is possible as soon as the device is connected, always return it. Right now EPOLLOUT is only returned when there are also input reports are available. This works if devices start sending reports when connected, but some HID devices might need an output report first before sending any input reports. This change will allow using EPOLLOUT here as well. Fixes: 378b80370aa1 ("hidraw: Return EPOLLOUT from hidraw_poll") Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hidraw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 1ac82e194818..1abf5008def0 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -260,10 +260,10 @@ static unsigned int hidraw_poll(struct file *file, poll_table *wait) poll_wait(file, &list->hidraw->wait, wait); if (list->head != list->tail) - return POLLIN | POLLRDNORM | POLLOUT; + return POLLIN | POLLRDNORM; if (!list->hidraw->exist) return POLLERR | POLLHUP; - return 0; + return POLLOUT | POLLWRNORM; } static int hidraw_open(struct inode *inode, struct file *file) From 2aaed8c188341c22e0b6f39f238d800936c77c01 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 10 Jan 2020 15:32:51 +0100 Subject: [PATCH 2272/3715] HID: hidraw, uhid: Always report EPOLLOUT [ Upstream commit 9e635c2851df6caee651e589fbf937b637973c91 ] hidraw and uhid device nodes are always available for writing so we should always report EPOLLOUT and EPOLLWRNORM bits, not only in the cases when there is nothing to read. Reported-by: Linus Torvalds Fixes: be54e7461ffdc ("HID: uhid: Fix returning EPOLLOUT from uhid_char_poll") Fixes: 9f3b61dc1dd7b ("HID: hidraw: Fix returning EPOLLOUT from hidraw_poll") Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hidraw.c | 7 ++++--- drivers/hid/uhid.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 1abf5008def0..5243c4120819 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -257,13 +257,14 @@ out: static unsigned int hidraw_poll(struct file *file, poll_table *wait) { struct hidraw_list *list = file->private_data; + unsigned int mask = POLLOUT | POLLWRNORM; /* hidraw is always writable */ poll_wait(file, &list->hidraw->wait, wait); if (list->head != list->tail) - return POLLIN | POLLRDNORM; + mask |= POLLIN | POLLRDNORM; if (!list->hidraw->exist) - return POLLERR | POLLHUP; - return POLLOUT | POLLWRNORM; + mask |= POLLERR | POLLHUP; + return mask; } static int hidraw_open(struct inode *inode, struct file *file) diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index e63b761f600a..c749f449c7cb 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -769,13 +769,14 @@ unlock: static unsigned int uhid_char_poll(struct file *file, poll_table *wait) { struct uhid_device *uhid = file->private_data; + unsigned int mask = POLLOUT | POLLWRNORM; /* uhid is always writable */ poll_wait(file, &uhid->waitq, wait); if (uhid->head != uhid->tail) - return POLLIN | POLLRDNORM; + mask |= POLLIN | POLLRDNORM; - return EPOLLOUT | EPOLLWRNORM; + return mask; } static const struct file_operations uhid_fops = { From 10dbcf14b89929100d36e529e0d22de4b97d9a68 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 16:58:35 +0100 Subject: [PATCH 2273/3715] ethtool: reduce stack usage with clang commit 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 upstream. clang inlines the dev_ethtool() more aggressively than gcc does, leading to a larger amount of used stack space: net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=] Marking the sub-functions that require the most stack space as noinline_for_stack gives us reasonable behavior on all compilers. Signed-off-by: Arnd Bergmann Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Miles Chen Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 7822defa5a5d..749d48393d06 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2343,9 +2343,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int ret; @@ -2375,9 +2376,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } -static int ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int i, ret = 0; @@ -2434,7 +2436,7 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; From f3848952e00578330e427e3008f9a099ba43985c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 May 2019 15:41:42 -0700 Subject: [PATCH 2274/3715] fs/select: avoid clang stack usage warning commit ad312f95d41c9de19313c51e388c4984451c010f upstream. The select() implementation is carefully tuned to put a sensible amount of data on the stack for holding a copy of the user space fd_set, but not too large to risk overflowing the kernel stack. When building a 32-bit kernel with clang, we need a little more space than with gcc, which often triggers a warning: fs/select.c:619:5: error: stack frame size of 1048 bytes in function 'core_sys_select' [-Werror,-Wframe-larger-than=] int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, I experimentally found that for 32-bit ARM, reducing the maximum stack usage by 64 bytes keeps us reliably under the warning limit again. Link: http://lkml.kernel.org/r/20190307090146.1874906-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Andi Kleen Cc: Nick Desaulniers Cc: Alexander Viro Cc: Christoph Hellwig Cc: Eric Dumazet Cc: "Darrick J. Wong" Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Miles Chen Signed-off-by: Greg Kroah-Hartman --- include/linux/poll.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/poll.h b/include/linux/poll.h index d384f12abdd5..c7acd7c09747 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -15,7 +15,11 @@ extern struct ctl_table epoll_table[]; /* for sysctl */ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ +#ifdef __clang__ +#define MAX_STACK_ALLOC 768 +#else #define MAX_STACK_ALLOC 832 +#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC From ba8bbddaa188235df007411fdead5832f9e0cb59 Mon Sep 17 00:00:00 2001 From: Sanjay Konduri Date: Tue, 15 May 2018 14:34:30 +0530 Subject: [PATCH 2275/3715] rsi: add fix for crash during assertions commit abd39c6ded9db53aa44c2540092bdd5fb6590fa8 upstream. Observed crash in some scenarios when assertion has occurred, this is because hw structure is freed and is tried to get accessed in some functions where null check is already present. So, avoided the crash by making the hw to NULL after freeing. Signed-off-by: Sanjay Konduri Signed-off-by: Sushant Kumar Mishra Signed-off-by: Kalle Valo Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index fa12c05d9e23..233b2239311d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -218,6 +218,7 @@ void rsi_mac80211_detach(struct rsi_hw *adapter) ieee80211_stop_queues(hw); ieee80211_unregister_hw(hw); ieee80211_free_hw(hw); + adapter->hw = NULL; } for (band = 0; band < NUM_NL80211_BANDS; band++) { From 8cf89b9506ebffce80d280991da845e41b9781b0 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:20 +0000 Subject: [PATCH 2276/3715] arm64: don't open code page table entry creation commit 193383043f14a398393dc18bae8380f7fe665ec3 upstream. Instead of open coding the generation of page table entries, use the macros/functions that exist for this - pfn_p*d and p*d_populate. Most code in the kernel already uses these macros, this patch tries to fix up the few places that don't. This is useful for the next patch in this series, which needs to change the page table entry logic, and it's better to have that logic in one place. The KVM extended ID map is special, since we're creating a level above CONFIG_PGTABLE_LEVELS and the required function isn't available. Leave it as is and add a comment to explain it. (The normal kernel ID map code doesn't need this change because its page tables are created in assembly (__create_page_tables)). Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/hibernate.c | 3 +-- arch/arm64/mm/mmu.c | 14 +++++++++----- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e42c1f0ae6cf..47ba6a57dc45 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -296,6 +296,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return __cpu_uses_extended_idmap(); } +/* + * Can't use pgd_populate here, because the extended idmap adds an extra level + * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended + * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. + */ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9b676c3dd3ce..324db23b37de 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -343,6 +343,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pud_write(pud) pte_write(pud_pte(pud)) #define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pud(pfn,prot) (__pud(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index bb444c693796..49f543ebd6cb 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -246,8 +246,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } pte = pte_offset_kernel(pmd, dst_addr); - set_pte(pte, __pte(virt_to_phys((void *)dst) | - pgprot_val(PAGE_KERNEL_EXEC))); + set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index abb9d2ecc675..045017e7148c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -605,8 +605,8 @@ static void __init map_kernel(pgd_t *pgd) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); + pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), + lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -721,7 +721,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); + pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); @@ -915,15 +915,19 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PUD_MASK); - set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; } From 9a6baa402ee47d9e3bb20e08bbae437685652eb6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Jan 2020 15:44:11 +0000 Subject: [PATCH 2277/3715] arm64: mm: Change page table pointer name in p[md]_set_huge() This is preparation for the following backported fixes. It was done upstream as part of commit 20a004e7b017 "arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables", the rest of which does not seem suitable for stable. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 045017e7148c..7ea6acce6fce 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -913,21 +913,21 @@ int __init arch_ioremap_pmd_supported(void) return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); } -int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PUD_MASK); - set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } -int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; } From 68a066f6ff13e7029d54da9d322ad686694c7039 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 21 Feb 2018 12:59:27 +0000 Subject: [PATCH 2278/3715] arm64: Enforce BBM for huge IO/VMAP mappings commit 15122ee2c515a253b0c66a3e618bc7ebe35105eb upstream. ioremap_page_range doesn't honour break-before-make and attempts to put down huge mappings (using p*d_set_huge) over the top of pre-existing table entries. This leads to us leaking page table memory and also gives rise to TLB conflicts and spurious aborts, which have been seen in practice on Cortex-A75. Until this has been resolved, refuse to put block mappings when the existing entry is found to be present. Fixes: 324420bf91f60 ("arm64: add support for ioremap() block mappings") Reported-by: Hanjun Guo Reported-by: Lei Li Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7ea6acce6fce..2c037a123c79 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -917,6 +917,11 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pud_present(READ_ONCE(*pudp))) + return 0; + BUG_ON(phys & ~PUD_MASK); set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; @@ -926,6 +931,11 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pmd_present(READ_ONCE(*pmdp))) + return 0; + BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; From 4ded4a2cf506a1aa621901d1289e89a8587963bc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 23 May 2018 11:43:46 -0700 Subject: [PATCH 2279/3715] arm64: Make sure permission updates happen for pmd/pud commit 82034c23fcbc2389c73d97737f61fa2dd6526413 upstream. Commit 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings") disallowed block mappings for ioremap since that code does not honor break-before-make. The same APIs are also used for permission updating though and the extra checks prevent the permission updates from happening, even though this should be permitted. This results in read-only permissions not being fully applied. Visibly, this can occasionaly be seen as a failure on the built in rodata test when the test data ends up in a section or as an odd RW gap on the page table dump. Fix this by using pgattr_change_is_safe instead of p*d_present for determining if the change is permitted. Reviewed-by: Kees Cook Tested-by: Peter Robinson Reported-by: Peter Robinson Fixes: 15122ee2c515 ("arm64: Enforce BBM for huge IO/VMAP mappings") Signed-off-by: Laura Abbott Signed-off-by: Will Deacon Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2c037a123c79..e02a6326c800 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -917,13 +917,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -931,13 +933,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } From b06e6e5c3705d14a90f30955bce7befd24adecc4 Mon Sep 17 00:00:00 2001 From: Dedy Lansky Date: Sun, 29 Jul 2018 14:59:16 +0300 Subject: [PATCH 2280/3715] cfg80211/mac80211: make ieee80211_send_layer2_update a public function commit 30ca1aa536211f5ac3de0173513a7a99a98a97f3 upstream. Make ieee80211_send_layer2_update() a common function so other drivers can re-use it. Signed-off-by: Dedy Lansky Signed-off-by: Johannes Berg [bwh: Backported to 4.14 as dependency of commit 3e493173b784 "mac80211: Do not send Layer 2 Update frame before authorization"] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/net/cfg80211.h | 11 ++++++++++ net/mac80211/cfg.c | 48 ++---------------------------------------- net/wireless/util.c | 45 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 46 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ea0ed58db97e..a4c8e9d7dd06 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4479,6 +4479,17 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len); +/** + * cfg80211_send_layer2_update - send layer 2 update frame + * + * @dev: network device + * @addr: STA MAC address + * + * Wireless drivers can use this function to update forwarding tables in bridge + * devices upon STA association. + */ +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr); + /** * DOC: Regulatory enforcement infrastructure * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8168c667d91d..f236a990638f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1089,50 +1089,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) return 0; } -/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ -struct iapp_layer2_update { - u8 da[ETH_ALEN]; /* broadcast */ - u8 sa[ETH_ALEN]; /* STA addr */ - __be16 len; /* 6 */ - u8 dsap; /* 0 */ - u8 ssap; /* 0 */ - u8 control; - u8 xid_info[3]; -} __packed; - -static void ieee80211_send_layer2_update(struct sta_info *sta) -{ - struct iapp_layer2_update *msg; - struct sk_buff *skb; - - /* Send Level 2 Update Frame to update forwarding tables in layer 2 - * bridge devices */ - - skb = dev_alloc_skb(sizeof(*msg)); - if (!skb) - return; - msg = skb_put(skb, sizeof(*msg)); - - /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) - * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ - - eth_broadcast_addr(msg->da); - memcpy(msg->sa, sta->sta.addr, ETH_ALEN); - msg->len = htons(6); - msg->dsap = 0; - msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ - msg->control = 0xaf; /* XID response lsb.1111F101. - * F=0 (no poll command; unsolicited frame) */ - msg->xid_info[0] = 0x81; /* XID format identifier */ - msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */ - msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */ - - skb->dev = sta->sdata->dev; - skb->protocol = eth_type_trans(skb, sta->sdata->dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx_ni(skb); -} - static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) @@ -1496,7 +1452,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } if (layer2_update) - ieee80211_send_layer2_update(sta); + cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); rcu_read_unlock(); @@ -1598,7 +1554,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_inc_num_mcast(sta->sdata); - ieee80211_send_layer2_update(sta); + cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); } err = sta_apply_parameters(local, sta, params); diff --git a/net/wireless/util.c b/net/wireless/util.c index 0f6c34ff9b55..2234817f5dbb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1873,3 +1873,48 @@ EXPORT_SYMBOL(rfc1042_header); const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; EXPORT_SYMBOL(bridge_tunnel_header); + +/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ +struct iapp_layer2_update { + u8 da[ETH_ALEN]; /* broadcast */ + u8 sa[ETH_ALEN]; /* STA addr */ + __be16 len; /* 6 */ + u8 dsap; /* 0 */ + u8 ssap; /* 0 */ + u8 control; + u8 xid_info[3]; +} __packed; + +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr) +{ + struct iapp_layer2_update *msg; + struct sk_buff *skb; + + /* Send Level 2 Update Frame to update forwarding tables in layer 2 + * bridge devices */ + + skb = dev_alloc_skb(sizeof(*msg)); + if (!skb) + return; + msg = skb_put(skb, sizeof(*msg)); + + /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) + * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ + + eth_broadcast_addr(msg->da); + ether_addr_copy(msg->sa, addr); + msg->len = htons(6); + msg->dsap = 0; + msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ + msg->control = 0xaf; /* XID response lsb.1111F101. + * F=0 (no poll command; unsolicited frame) */ + msg->xid_info[0] = 0x81; /* XID format identifier */ + msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */ + msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */ + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx_ni(skb); +} +EXPORT_SYMBOL(cfg80211_send_layer2_update); From 57e1b5f6b55acb655358bd62c7e2f14d8af90e0a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 11 Sep 2019 16:03:05 +0300 Subject: [PATCH 2281/3715] mac80211: Do not send Layer 2 Update frame before authorization commit 3e493173b7841259a08c5c8e5cbe90adb349da7e upstream. The Layer 2 Update frame is used to update bridges when a station roams to another AP even if that STA does not transmit any frames after the reassociation. This behavior was described in IEEE Std 802.11F-2003 as something that would happen based on MLME-ASSOCIATE.indication, i.e., before completing 4-way handshake. However, this IEEE trial-use recommended practice document was published before RSN (IEEE Std 802.11i-2004) and as such, did not consider RSN use cases. Furthermore, IEEE Std 802.11F-2003 was withdrawn in 2006 and as such, has not been maintained amd should not be used anymore. Sending out the Layer 2 Update frame immediately after association is fine for open networks (and also when using SAE, FT protocol, or FILS authentication when the station is actually authenticated by the time association completes). However, it is not appropriate for cases where RSN is used with PSK or EAP authentication since the station is actually fully authenticated only once the 4-way handshake completes after authentication and attackers might be able to use the unauthenticated triggering of Layer 2 Update frame transmission to disrupt bridge behavior. Fix this by postponing transmission of the Layer 2 Update frame from station entry addition to the point when the station entry is marked authorized. Similarly, send out the VLAN binding update only if the STA entry has already been authorized. Signed-off-by: Jouni Malinen Reviewed-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 14 ++++---------- net/mac80211/sta_info.c | 4 ++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f236a990638f..d437007b15bb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1398,7 +1398,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; - int layer2_update; if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -1442,18 +1441,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); - layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_AP; - err = sta_info_insert_rcu(sta); if (err) { rcu_read_unlock(); return err; } - if (layer2_update) - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); - rcu_read_unlock(); return 0; @@ -1551,10 +1544,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); - - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); + } } err = sta_apply_parameters(local, sta, params); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1a86974b02e3..627dc642f894 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1899,6 +1899,10 @@ int sta_info_move_state(struct sta_info *sta, ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } + if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_AP) + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); break; default: break; From ffe76c896fbc91af8859aaea15b75b0b887960f3 Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Wed, 22 May 2019 04:34:15 -0400 Subject: [PATCH 2282/3715] media: usb:zr364xx:Fix KASAN:null-ptr-deref Read in zr364xx_vidioc_querycap commit 5d2e73a5f80a5b5aff3caf1ec6d39b5b3f54b26e upstream. SyzKaller hit the null pointer deref while reading from uninitialized udev->product in zr364xx_vidioc_querycap(). ================================================================== BUG: KASAN: null-ptr-deref in read_word_at_a_time+0xe/0x20 include/linux/compiler.h:274 Read of size 1 at addr 0000000000000000 by task v4l_id/5287 CPU: 1 PID: 5287 Comm: v4l_id Not tainted 5.1.0-rc3-319004-g43151d6 #6 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xe8/0x16e lib/dump_stack.c:113 kasan_report.cold+0x5/0x3c mm/kasan/report.c:321 read_word_at_a_time+0xe/0x20 include/linux/compiler.h:274 strscpy+0x8a/0x280 lib/string.c:207 zr364xx_vidioc_querycap+0xb5/0x210 drivers/media/usb/zr364xx/zr364xx.c:706 v4l_querycap+0x12b/0x340 drivers/media/v4l2-core/v4l2-ioctl.c:1062 __video_do_ioctl+0x5bb/0xb40 drivers/media/v4l2-core/v4l2-ioctl.c:2874 video_usercopy+0x44e/0xf00 drivers/media/v4l2-core/v4l2-ioctl.c:3056 v4l2_ioctl+0x14e/0x1a0 drivers/media/v4l2-core/v4l2-dev.c:364 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xced/0x12f0 fs/ioctl.c:696 ksys_ioctl+0xa0/0xc0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x74/0xb0 fs/ioctl.c:718 do_syscall_64+0xcf/0x4f0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f3b56d8b347 Code: 90 90 90 48 8b 05 f1 fa 2a 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 90 90 90 90 90 90 90 90 90 90 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c1 fa 2a 00 31 d2 48 29 c2 64 RSP: 002b:00007ffe005d5d68 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f3b56d8b347 RDX: 00007ffe005d5d70 RSI: 0000000080685600 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400884 R13: 00007ffe005d5ec0 R14: 0000000000000000 R15: 0000000000000000 ================================================================== For this device udev->product is not initialized and accessing it causes a NULL pointer deref. The fix is to check for NULL before strscpy() and copy empty string, if product is NULL Reported-by: syzbot+66010012fd4c531a1a96@syzkaller.appspotmail.com Signed-off-by: Vandana BN Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab [bwh: Backported to 4.14: This function uses strlcpy() instead of strscpy()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/zr364xx/zr364xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 4ff8d0aed015..d30f129a9db7 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -706,7 +706,8 @@ static int zr364xx_vidioc_querycap(struct file *file, void *priv, struct zr364xx_camera *cam = video_drvdata(file); strlcpy(cap->driver, DRIVER_DESC, sizeof(cap->driver)); - strlcpy(cap->card, cam->udev->product, sizeof(cap->card)); + if (cam->udev->product) + strlcpy(cap->card, cam->udev->product, sizeof(cap->card)); strlcpy(cap->bus_info, dev_name(&cam->udev->dev), sizeof(cap->bus_info)); cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | From 1059b758b6fb0fb6949b8c0474d5db27ea269b01 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:30:38 +0800 Subject: [PATCH 2283/3715] cifs: Fix lease buffer length error commit b57a55e2200ede754e4dc9cce4ba9402544b9365 upstream. There is a KASAN slab-out-of-bounds: BUG: KASAN: slab-out-of-bounds in _copy_from_iter_full+0x783/0xaa0 Read of size 80 at addr ffff88810c35e180 by task mount.cifs/539 CPU: 1 PID: 539 Comm: mount.cifs Not tainted 4.19 #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0xdd/0x12a print_address_description+0xa7/0x540 kasan_report+0x1ff/0x550 check_memory_region+0x2f1/0x310 memcpy+0x2f/0x80 _copy_from_iter_full+0x783/0xaa0 tcp_sendmsg_locked+0x1840/0x4140 tcp_sendmsg+0x37/0x60 inet_sendmsg+0x18c/0x490 sock_sendmsg+0xae/0x130 smb_send_kvec+0x29c/0x520 __smb_send_rqst+0x3ef/0xc60 smb_send_rqst+0x25a/0x2e0 compound_send_recv+0x9e8/0x2af0 cifs_send_recv+0x24/0x30 SMB2_open+0x35e/0x1620 open_shroot+0x27b/0x490 smb2_open_op_close+0x4e1/0x590 smb2_query_path_info+0x2ac/0x650 cifs_get_inode_info+0x1058/0x28f0 cifs_root_iget+0x3bb/0xf80 cifs_smb3_do_mount+0xe00/0x14c0 cifs_do_mount+0x15/0x20 mount_fs+0x5e/0x290 vfs_kern_mount+0x88/0x460 do_mount+0x398/0x31e0 ksys_mount+0xc6/0x150 __x64_sys_mount+0xea/0x190 do_syscall_64+0x122/0x590 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It can be reproduced by the following step: 1. samba configured with: server max protocol = SMB2_10 2. mount -o vers=default When parse the mount version parameter, the 'ops' and 'vals' was setted to smb30, if negotiate result is smb21, just update the 'ops' to smb21, but the 'vals' is still smb30. When add lease context, the iov_base is allocated with smb21 ops, but the iov_len is initiallited with the smb30. Because the iov_len is longer than iov_base, when send the message, copy array out of bounds. we need to keep the 'ops' and 'vals' consistent. Fixes: 9764c02fcbad ("SMB3: Add support for multidialect negotiate (SMB2.1 and later)") Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky [bwh: Backported to 4.14: We never switch to SMB3.1.1 here] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0e1c36c92f60..4eb0a9e7194b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -575,6 +575,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; + ses->server->vals = &smb21_values; } } else if (le16_to_cpu(rsp->DialectRevision) != ses->server->vals->protocol_id) { From cae904fa00de645b6de57c698e5e00c0ba7e97e8 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 10 Sep 2019 18:01:40 -0500 Subject: [PATCH 2284/3715] wimax: i2400: fix memory leak commit 2507e6ab7a9a440773be476141a255934468c5ef upstream. In i2400m_op_rfkill_sw_toggle cmd buffer should be released along with skb response. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wimax/i2400m/op-rfkill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index b0dba35a8ad2..7c92e8ace9c2 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -142,6 +142,7 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev, "%d\n", result); result = 0; error_cmd: + kfree(cmd); kfree_skb(ack_skb); error_msg_to_dev: error_alloc: From 67a8c1b6bc85dfe2d2ae5d86d1ea8d00b1b29493 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 25 Oct 2019 23:53:30 -0500 Subject: [PATCH 2285/3715] wimax: i2400: Fix memory leak in i2400m_op_rfkill_sw_toggle commit 6f3ef5c25cc762687a7341c18cbea5af54461407 upstream. In the implementation of i2400m_op_rfkill_sw_toggle() the allocated buffer for cmd should be released before returning. The documentation for i2400m_msg_to_dev() says when it returns the buffer can be reused. Meaning cmd should be released in either case. Move kfree(cmd) before return to be reached by all execution paths. Fixes: 2507e6ab7a9a ("wimax: i2400: fix memory leak") Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wimax/i2400m/op-rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index 7c92e8ace9c2..dc6fe93ce71f 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -142,12 +142,12 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev, "%d\n", result); result = 0; error_cmd: - kfree(cmd); kfree_skb(ack_skb); error_msg_to_dev: error_alloc: d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n", wimax_dev, state, result); + kfree(cmd); return result; } From db1fb5a39747a680a4cc182c8bb4648b845a841f Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 12 Sep 2019 23:23:27 -0500 Subject: [PATCH 2286/3715] iwlwifi: dbg_ini: fix memory leak in alloc_sgtable commit b4b814fec1a5a849383f7b3886b654a13abbda7d upstream. In alloc_sgtable if alloc_page fails, the alocated table should be released. Signed-off-by: Navid Emamdoost Signed-off-by: Luca Coelho Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 4650b9e5da2b..ba9e7bfeca2c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -532,6 +532,7 @@ static struct scatterlist *alloc_sgtable(int size) if (new_page) __free_page(new_page); } + kfree(table); return NULL; } alloc_size = min_t(int, size, PAGE_SIZE); From d0c15c1e8f9223552818fe5340b0427483b34f22 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 1 Apr 2019 09:35:54 +0800 Subject: [PATCH 2287/3715] dccp: Fix memleak in __feat_register_sp commit 1d3ff0950e2b40dc861b1739029649d03f591820 upstream. If dccp_feat_push_change fails, we forget free the mem which is alloced by kmemdup in dccp_feat_clone_sp_val. Reported-by: Hulk Robot Fixes: e8ef967a54f4 ("dccp: Registration routines for changing feature values") Reviewed-by: Mukesh Ojha Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/dccp/feat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index f227f002c73d..db87d9f58019 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** From 573e1fe003c1e2016bc40cc4f2b231e3b8c990f8 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 14 Jan 2020 18:39:37 +0000 Subject: [PATCH 2288/3715] drm/i915: Fix use-after-free when destroying GEM context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is a simplified fix to address a use-after-free in 4.14.x and 4.19.x stable kernels. The flaw is already fixed upstream, starting in 5.2, by commit 7dc40713618c ("drm/i915: Introduce a mutex for file_priv->context_idr") as part of a more complex patch series that isn't appropriate for backporting to stable kernels. Expand mutex coverage, while destroying the GEM context, to include the GEM context lookup step. This fixes a use-after-free detected by KASAN: ================================================================== BUG: KASAN: use-after-free in i915_ppgtt_close+0x2ca/0x2f0 Write of size 1 at addr ffff8881368a8368 by task i915-poc/3124 CPU: 0 PID: 3124 Comm: i915-poc Not tainted 4.14.164 #1 Hardware name: HP HP Elite x2 1012 G1 /80FC, BIOS N85 Ver. 01.20 04/05/2017 Call Trace: dump_stack+0xcd/0x12e ? _atomic_dec_and_lock+0x1b2/0x1b2 ? i915_ppgtt_close+0x2ca/0x2f0 ? printk+0x8f/0xab ? show_regs_print_info+0x53/0x53 ? i915_ppgtt_close+0x2ca/0x2f0 print_address_description+0x65/0x270 ? i915_ppgtt_close+0x2ca/0x2f0 kasan_report+0x251/0x340 i915_ppgtt_close+0x2ca/0x2f0 ? __radix_tree_insert+0x3f0/0x3f0 ? i915_ppgtt_init_hw+0x7c0/0x7c0 context_close+0x42e/0x680 ? i915_gem_context_release+0x230/0x230 ? kasan_kmalloc+0xa0/0xd0 ? radix_tree_delete_item+0x1d4/0x250 ? radix_tree_lookup+0x10/0x10 ? inet_recvmsg+0x4b0/0x4b0 ? kasan_slab_free+0x88/0xc0 i915_gem_context_destroy_ioctl+0x236/0x300 ? i915_gem_context_create_ioctl+0x360/0x360 ? drm_dev_printk+0x1d0/0x1d0 ? memcpy+0x34/0x50 ? i915_gem_context_create_ioctl+0x360/0x360 drm_ioctl_kernel+0x1b0/0x2b0 ? drm_ioctl_permit+0x2a0/0x2a0 ? avc_ss_reset+0xd0/0xd0 drm_ioctl+0x6fe/0xa20 ? i915_gem_context_create_ioctl+0x360/0x360 ? drm_getstats+0x20/0x20 ? put_unused_fd+0x260/0x260 do_vfs_ioctl+0x189/0x12d0 ? ioctl_preallocate+0x280/0x280 ? selinux_file_ioctl+0x3a7/0x680 ? selinux_bprm_set_creds+0xe30/0xe30 ? security_file_ioctl+0x69/0xa0 ? selinux_bprm_set_creds+0xe30/0xe30 SyS_ioctl+0x6f/0x80 ? __sys_sendmmsg+0x4a0/0x4a0 ? do_vfs_ioctl+0x12d0/0x12d0 do_syscall_64+0x214/0x5f0 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x25/0x60 ? __switch_to_asm+0x31/0x60 ? syscall_return_slowpath+0x2c0/0x2c0 ? copy_overflow+0x20/0x20 ? __switch_to_asm+0x25/0x60 ? syscall_return_via_sysret+0x2a/0x7a ? prepare_exit_to_usermode+0x200/0x200 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x25/0x60 ? __switch_to_asm+0x25/0x60 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x25/0x60 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x31/0x60 ? __switch_to_asm+0x25/0x60 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7f7fda5115d7 RSP: 002b:00007f7eec317ec8 EFLAGS: 00000286 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7fda5115d7 RDX: 000055b306db9188 RSI: 000000004008646e RDI: 0000000000000003 RBP: 00007f7eec317ef0 R08: 00007f7eec318700 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000286 R12: 00007f7eec317fc0 R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffd8007ade0 Allocated by task 2898: save_stack+0x32/0xb0 kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc_trace+0x5e/0x180 i915_ppgtt_create+0xab/0x2510 i915_gem_create_context+0x981/0xf90 i915_gem_context_create_ioctl+0x1d7/0x360 drm_ioctl_kernel+0x1b0/0x2b0 drm_ioctl+0x6fe/0xa20 do_vfs_ioctl+0x189/0x12d0 SyS_ioctl+0x6f/0x80 do_syscall_64+0x214/0x5f0 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Freed by task 104: save_stack+0x32/0xb0 kasan_slab_free+0x72/0xc0 kfree+0x88/0x190 i915_ppgtt_release+0x24e/0x460 i915_gem_context_free+0x90/0x480 contexts_free_worker+0x54/0x80 process_one_work+0x876/0x14e0 worker_thread+0x1b8/0xfd0 kthread+0x2f8/0x3c0 ret_from_fork+0x35/0x40 The buggy address belongs to the object at ffff8881368a8000 which belongs to the cache kmalloc-8192 of size 8192 The buggy address is located 872 bytes inside of 8192-byte region [ffff8881368a8000, ffff8881368aa000) The buggy address belongs to the page: page:ffffea0004da2a00 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x200000000008100(slab|head) raw: 0200000000008100 0000000000000000 0000000000000000 0000000100030003 raw: dead000000000100 dead000000000200 ffff88822a002280 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881368a8200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881368a8280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8881368a8300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881368a8380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881368a8400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 1acfc104cdf8 ("drm/i915: Enable rcu-only context lookups") Reported-by: 罗权 Cc: Chris Wilson Cc: Jon Bloomfield Cc: stable@vger.kernel.org # 4.14.x Cc: stable@vger.kernel.org # 4.19.x Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3925a63c1661..cdb67889817c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -993,18 +993,19 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) return -ENOENT; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = i915_mutex_lock_interruptible(dev); if (ret) - goto out; + return ret; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) { + mutex_unlock(&dev->struct_mutex); + return -ENOENT; + } __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); -out: i915_gem_context_put(ctx); return 0; } From f1545409a5e179e4bc94a0fc28eebb61b2e1ff33 Mon Sep 17 00:00:00 2001 From: Ran Bi Date: Wed, 11 Dec 2019 17:43:54 +0800 Subject: [PATCH 2289/3715] rtc: mt6397: fix alarm register overwrite commit 653997eeecef95c3ead4fba1b2d27e6a5854d6cd upstream. Alarm registers high byte was reserved for other functions. This add mask in alarm registers operation functions. This also fix error condition in interrupt handler. Fixes: fc2979118f3f ("rtc: mediatek: Add MT6397 RTC driver") Signed-off-by: Ran Bi Signed-off-by: Hsin-Hsiung Wang Link: https://lore.kernel.org/r/1576057435-3561-6-git-send-email-hsin-hsiung.wang@mediatek.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-mt6397.c | 47 ++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index e82df43e5ca2..c696d9186451 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -55,6 +55,14 @@ #define RTC_AL_SEC 0x0018 +#define RTC_AL_SEC_MASK 0x003f +#define RTC_AL_MIN_MASK 0x003f +#define RTC_AL_HOU_MASK 0x001f +#define RTC_AL_DOM_MASK 0x001f +#define RTC_AL_DOW_MASK 0x0007 +#define RTC_AL_MTH_MASK 0x000f +#define RTC_AL_YEA_MASK 0x007f + #define RTC_PDN2 0x002e #define RTC_PDN2_PWRON_ALARM BIT(4) @@ -111,7 +119,7 @@ static irqreturn_t mtk_rtc_irq_handler_thread(int irq, void *data) irqen = irqsta & ~RTC_IRQ_EN_AL; mutex_lock(&rtc->lock); if (regmap_write(rtc->regmap, rtc->addr_base + RTC_IRQ_EN, - irqen) < 0) + irqen) == 0) mtk_rtc_write_trigger(rtc); mutex_unlock(&rtc->lock); @@ -233,12 +241,12 @@ static int mtk_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->pending = !!(pdn2 & RTC_PDN2_PWRON_ALARM); mutex_unlock(&rtc->lock); - tm->tm_sec = data[RTC_OFFSET_SEC]; - tm->tm_min = data[RTC_OFFSET_MIN]; - tm->tm_hour = data[RTC_OFFSET_HOUR]; - tm->tm_mday = data[RTC_OFFSET_DOM]; - tm->tm_mon = data[RTC_OFFSET_MTH]; - tm->tm_year = data[RTC_OFFSET_YEAR]; + tm->tm_sec = data[RTC_OFFSET_SEC] & RTC_AL_SEC_MASK; + tm->tm_min = data[RTC_OFFSET_MIN] & RTC_AL_MIN_MASK; + tm->tm_hour = data[RTC_OFFSET_HOUR] & RTC_AL_HOU_MASK; + tm->tm_mday = data[RTC_OFFSET_DOM] & RTC_AL_DOM_MASK; + tm->tm_mon = data[RTC_OFFSET_MTH] & RTC_AL_MTH_MASK; + tm->tm_year = data[RTC_OFFSET_YEAR] & RTC_AL_YEA_MASK; tm->tm_year += RTC_MIN_YEAR_OFFSET; tm->tm_mon--; @@ -259,14 +267,25 @@ static int mtk_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) tm->tm_year -= RTC_MIN_YEAR_OFFSET; tm->tm_mon++; - data[RTC_OFFSET_SEC] = tm->tm_sec; - data[RTC_OFFSET_MIN] = tm->tm_min; - data[RTC_OFFSET_HOUR] = tm->tm_hour; - data[RTC_OFFSET_DOM] = tm->tm_mday; - data[RTC_OFFSET_MTH] = tm->tm_mon; - data[RTC_OFFSET_YEAR] = tm->tm_year; - mutex_lock(&rtc->lock); + ret = regmap_bulk_read(rtc->regmap, rtc->addr_base + RTC_AL_SEC, + data, RTC_OFFSET_COUNT); + if (ret < 0) + goto exit; + + data[RTC_OFFSET_SEC] = ((data[RTC_OFFSET_SEC] & ~(RTC_AL_SEC_MASK)) | + (tm->tm_sec & RTC_AL_SEC_MASK)); + data[RTC_OFFSET_MIN] = ((data[RTC_OFFSET_MIN] & ~(RTC_AL_MIN_MASK)) | + (tm->tm_min & RTC_AL_MIN_MASK)); + data[RTC_OFFSET_HOUR] = ((data[RTC_OFFSET_HOUR] & ~(RTC_AL_HOU_MASK)) | + (tm->tm_hour & RTC_AL_HOU_MASK)); + data[RTC_OFFSET_DOM] = ((data[RTC_OFFSET_DOM] & ~(RTC_AL_DOM_MASK)) | + (tm->tm_mday & RTC_AL_DOM_MASK)); + data[RTC_OFFSET_MTH] = ((data[RTC_OFFSET_MTH] & ~(RTC_AL_MTH_MASK)) | + (tm->tm_mon & RTC_AL_MTH_MASK)); + data[RTC_OFFSET_YEAR] = ((data[RTC_OFFSET_YEAR] & ~(RTC_AL_YEA_MASK)) | + (tm->tm_year & RTC_AL_YEA_MASK)); + if (alm->enabled) { ret = regmap_bulk_write(rtc->regmap, rtc->addr_base + RTC_AL_SEC, From d070b8d5701e91dee87603c784cfb2484e5db4e1 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 25 Nov 2019 00:39:30 -0800 Subject: [PATCH 2290/3715] RDMA/bnxt_re: Fix Send Work Entry state check while polling completions commit c5275723580922e5f3264f96751337661a153c7d upstream. Some adapters need a fence Work Entry to handle retransmission. Currently the driver checks for this condition, only if the Send queue entry is signalled. Implement the condition check, irrespective of the signalled state of the Work queue entries Failure to add the fence can result in access to memory that is already marked as completed, triggering data corruption, transmission failure, IOMMU failures, etc. Fixes: 9152e0b722b2 ("RDMA/bnxt_re: HW workarounds for handling specific conditions") Link: https://lore.kernel.org/r/1574671174-5064-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e8afc47f8949..908803fe8276 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2024,13 +2024,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_mark_qp_error(qp); bnxt_qplib_unlock_buddy_cq(qp, cq); } else { + /* Before we complete, do WA 9060 */ + if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + cqe_sq_cons)) { + *lib_qp = qp; + goto out; + } if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { - /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, - cqe_sq_cons)) { - *lib_qp = qp; - goto out; - } cqe->status = CQ_REQ_STATUS_OK; cqe++; (*budget)--; From 42182bcccd7f534d462240f2d341ecba60567ecd Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 4 Dec 2019 16:43:31 +0100 Subject: [PATCH 2291/3715] ASoC: stm32: spdifrx: fix inconsistent lock state commit 2859b1784031b5709446af8f6039c467f136e67d upstream. In current spdifrx driver locks may be requested as follows: - request lock on iec capture control, when starting synchronization. - request lock in interrupt context, when spdifrx stop is called from IRQ handler. Take lock with IRQs disabled, to avoid the possible deadlock. Lockdep report: [ 74.278059] ================================ [ 74.282306] WARNING: inconsistent lock state [ 74.290120] -------------------------------- ... [ 74.314373] CPU0 [ 74.314377] ---- [ 74.314381] lock(&(&spdifrx->lock)->rlock); [ 74.314396] [ 74.314400] lock(&(&spdifrx->lock)->rlock); Fixes: 03e4d5d56fa5 ("ASoC: stm32: Add SPDIFRX support") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20191204154333.7152-2-olivier.moysan@st.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_spdifrx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index 84cc5678beba..60d7980104e0 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -313,6 +313,7 @@ static void stm32_spdifrx_dma_ctrl_stop(struct stm32_spdifrx_data *spdifrx) static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) { int cr, cr_mask, imr, ret; + unsigned long flags; /* Enable IRQs */ imr = SPDIFRX_IMR_IFEIE | SPDIFRX_IMR_SYNCDIE | SPDIFRX_IMR_PERRIE; @@ -320,7 +321,7 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) if (ret) return ret; - spin_lock(&spdifrx->lock); + spin_lock_irqsave(&spdifrx->lock, flags); spdifrx->refcount++; @@ -353,7 +354,7 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) "Failed to start synchronization\n"); } - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); return ret; } @@ -361,11 +362,12 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) static void stm32_spdifrx_stop(struct stm32_spdifrx_data *spdifrx) { int cr, cr_mask, reg; + unsigned long flags; - spin_lock(&spdifrx->lock); + spin_lock_irqsave(&spdifrx->lock, flags); if (--spdifrx->refcount) { - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); return; } @@ -384,7 +386,7 @@ static void stm32_spdifrx_stop(struct stm32_spdifrx_data *spdifrx) regmap_read(spdifrx->regmap, STM32_SPDIFRX_DR, ®); regmap_read(spdifrx->regmap, STM32_SPDIFRX_CSR, ®); - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); } static int stm32_spdifrx_dma_ctrl_register(struct device *dev, From 3b3c9bfa0640ab023221248460dc578dad5b356b Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 4 Dec 2019 16:43:32 +0100 Subject: [PATCH 2292/3715] ASoC: stm32: spdifrx: fix race condition in irq handler commit 86e1956af4c863d653136fd6e5694adf2054dbaa upstream. When snd_pcm_stop() is called in interrupt routine, substream context may have already been released. Add protection on substream context. Fixes: 03e4d5d56fa5 ("ASoC: stm32: Add SPDIFRX support") Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20191204154333.7152-3-olivier.moysan@st.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_spdifrx.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index 60d7980104e0..7bc57651e186 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -213,6 +213,7 @@ * @slave_config: dma slave channel runtime config pointer * @phys_addr: SPDIFRX registers physical base address * @lock: synchronization enabling lock + * @irq_lock: prevent race condition with IRQ on stream state * @cs: channel status buffer * @ub: user data buffer * @irq: SPDIFRX interrupt line @@ -233,6 +234,7 @@ struct stm32_spdifrx_data { struct dma_slave_config slave_config; dma_addr_t phys_addr; spinlock_t lock; /* Sync enabling lock */ + spinlock_t irq_lock; /* Prevent race condition on stream state */ unsigned char cs[SPDIFRX_CS_BYTES_NB]; unsigned char ub[SPDIFRX_UB_BYTES_NB]; int irq; @@ -646,7 +648,6 @@ static const struct regmap_config stm32_h7_spdifrx_regmap_conf = { static irqreturn_t stm32_spdifrx_isr(int irq, void *devid) { struct stm32_spdifrx_data *spdifrx = (struct stm32_spdifrx_data *)devid; - struct snd_pcm_substream *substream = spdifrx->substream; struct platform_device *pdev = spdifrx->pdev; unsigned int cr, mask, sr, imr; unsigned int flags; @@ -714,14 +715,19 @@ static irqreturn_t stm32_spdifrx_isr(int irq, void *devid) regmap_update_bits(spdifrx->regmap, STM32_SPDIFRX_CR, SPDIFRX_CR_SPDIFEN_MASK, cr); - if (substream) - snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED); + spin_lock(&spdifrx->irq_lock); + if (spdifrx->substream) + snd_pcm_stop(spdifrx->substream, + SNDRV_PCM_STATE_DISCONNECTED); + spin_unlock(&spdifrx->irq_lock); return IRQ_HANDLED; } - if (err_xrun && substream) - snd_pcm_stop_xrun(substream); + spin_lock(&spdifrx->irq_lock); + if (err_xrun && spdifrx->substream) + snd_pcm_stop_xrun(spdifrx->substream); + spin_unlock(&spdifrx->irq_lock); return IRQ_HANDLED; } @@ -730,9 +736,12 @@ static int stm32_spdifrx_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_spdifrx_data *spdifrx = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; int ret; + spin_lock_irqsave(&spdifrx->irq_lock, flags); spdifrx->substream = substream; + spin_unlock_irqrestore(&spdifrx->irq_lock, flags); ret = clk_prepare_enable(spdifrx->kclk); if (ret) @@ -804,8 +813,12 @@ static void stm32_spdifrx_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_spdifrx_data *spdifrx = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; + spin_lock_irqsave(&spdifrx->irq_lock, flags); spdifrx->substream = NULL; + spin_unlock_irqrestore(&spdifrx->irq_lock, flags); + clk_disable_unprepare(spdifrx->kclk); } @@ -910,6 +923,7 @@ static int stm32_spdifrx_probe(struct platform_device *pdev) spdifrx->pdev = pdev; init_completion(&spdifrx->cs_completion); spin_lock_init(&spdifrx->lock); + spin_lock_init(&spdifrx->irq_lock); platform_set_drvdata(pdev, spdifrx); From a5eedf4e6bba2d4f87af4bebb0c74be90f82b54e Mon Sep 17 00:00:00 2001 From: Swapna Manupati Date: Thu, 26 Dec 2019 17:42:11 +0530 Subject: [PATCH 2293/3715] gpio: zynq: Fix for bug in zynq_gpio_restore_context API commit 36f2e7207f21a83ca0054116191f119ac64583ab upstream. This patch writes the inverse value of Interrupt Mask Status register into the Interrupt Enable register in zynq_gpio_restore_context API to fix the bug. Fixes: e11de4de28c0 ("gpio: zynq: Add support for suspend resume") Signed-off-by: Swapna Manupati Signed-off-by: Michal Simek Signed-off-by: Srinivas Neeli Link: https://lore.kernel.org/r/1577362338-28744-2-git-send-email-srinivas.neeli@xilinx.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-zynq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index b3cc948a2d8b..f1d7066b6637 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -639,6 +639,8 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) unsigned int bank_num; for (bank_num = 0; bank_num < gpio->p_data->max_bank; bank_num++) { + writel_relaxed(ZYNQ_GPIO_IXR_DISABLE_ALL, gpio->base_addr + + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); writel_relaxed(gpio->context.datalsw[bank_num], gpio->base_addr + ZYNQ_GPIO_DATA_LSW_OFFSET(bank_num)); @@ -648,9 +650,6 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) writel_relaxed(gpio->context.dirm[bank_num], gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); - writel_relaxed(gpio->context.int_en[bank_num], - gpio->base_addr + - ZYNQ_GPIO_INTEN_OFFSET(bank_num)); writel_relaxed(gpio->context.int_type[bank_num], gpio->base_addr + ZYNQ_GPIO_INTTYPE_OFFSET(bank_num)); @@ -660,6 +659,9 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) writel_relaxed(gpio->context.int_any[bank_num], gpio->base_addr + ZYNQ_GPIO_INTANY_OFFSET(bank_num)); + writel_relaxed(~(gpio->context.int_en[bank_num]), + gpio->base_addr + + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); } } From 1ef9c81097d0aef4ffc45aa76b9a9f25a10609a3 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 31 Dec 2019 13:24:19 -0700 Subject: [PATCH 2294/3715] iommu: Remove device link to group on failure commit 7d4e6ccd1fb09dbfbc49746ca82bd5c25ad4bfe4 upstream. This adds the missing teardown step that removes the device link from the group when the device addition fails. Signed-off-by: Jon Derrick Fixes: 797a8b4d768c5 ("iommu: Handle default domain attach failure") Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1620a6f49989..4b761678a18b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -613,6 +613,7 @@ err_put_group: mutex_unlock(&group->mutex); dev->iommu_group = NULL; kobject_put(group->devices_kobj); + sysfs_remove_link(group->devices_kobj, device->name); err_free_name: kfree(device->name); err_remove_link: From 5371360dd2a8b29593bd8bdd6aec5847f094c075 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 27 Nov 2019 10:59:19 +0100 Subject: [PATCH 2295/3715] gpio: Fix error message on out-of-range GPIO in lookup table commit d935bd50dd14a7714cbdba9a76435dbb56edb1ae upstream. When a GPIO offset in a lookup table is out-of-range, the printed error message (1) does not include the actual out-of-range value, and (2) contains an off-by-one error in the upper bound. Avoid user confusion by also printing the actual GPIO offset, and correcting the upper bound of the range. While at it, use "%u" for unsigned int. Sample impact: -requested GPIO 0 is out of range [0..32] for chip e6052000.gpio +requested GPIO 0 (45) is out of range [0..31] for chip e6052000.gpio Fixes: 2a3cf6a3599e9015 ("gpiolib: return -ENOENT if no GPIO mapping exists") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191127095919.4214-1-geert+renesas@glider.be Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 2b75aab8b3a0..f0777a7a4305 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3167,8 +3167,9 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, if (chip->ngpio <= p->chip_hwnum) { dev_err(dev, - "requested GPIO %d is out of range [0..%d] for chip %s\n", - idx, chip->ngpio, chip->label); + "requested GPIO %u (%u) is out of range [0..%u] for chip %s\n", + idx, p->chip_hwnum, chip->ngpio - 1, + chip->label); return ERR_PTR(-EINVAL); } From 8496401c9de59ad878ce13a05a717c62c41b4795 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 22 Dec 2019 11:27:08 +0000 Subject: [PATCH 2296/3715] hsr: reset network header when supervision frame is created commit 3ed0a1d563903bdb4b4c36c58c4d9c1bcb23a6e6 upstream. The supervision frame is L2 frame. When supervision frame is created, hsr module doesn't set network header. If tap routine is enabled, dev_queue_xmit_nit() is called and it checks network_header. If network_header pointer wasn't set(or invalid), it resets network_header and warns. In order to avoid unnecessary warning message, resetting network_header is needed. Test commands: ip netns add nst ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link set veth1 netns nst ip link set veth3 netns nst ip link set veth0 up ip link set veth2 up ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip netns exec nst ip link set veth1 up ip netns exec nst ip link set veth3 up ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3 ip netns exec nst ip a a 192.168.100.2/24 dev hsr1 ip netns exec nst ip link set hsr1 up tcpdump -nei veth0 Splat looks like: [ 175.852292][ C3] protocol 88fb is buggy, dev veth0 Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index cfe20f15f618..c962c406d7b1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -281,6 +281,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); if (hsrVer > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); From 7ffb1ac23fcc687db2624e09bde36a8deced6ac0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 20:04:51 -0700 Subject: [PATCH 2297/3715] cifs: Adjust indentation in smb2_open_file commit 7935799e041ae10d380d04ea23868240f082bd11 upstream. Clang warns: ../fs/cifs/smb2file.c:70:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (oparms->tcon->use_resilient) { ^ ../fs/cifs/smb2file.c:66:2: note: previous statement is here if (rc) ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 592fafe644bf ("Add resilienthandles mount parm") Link: https://github.com/ClangBuiltLinux/linux/issues/826 Signed-off-by: Nathan Chancellor Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 2c809233084b..e270812927cf 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -69,7 +69,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; - if (oparms->tcon->use_resilient) { + if (oparms->tcon->use_resilient) { nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ nr_ioctl_req.Reserved = 0; rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, From f04fb2025339f90d65c3473250c4ee8b6f482a60 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 11 Sep 2019 11:45:15 -0500 Subject: [PATCH 2298/3715] btrfs: simplify inode locking for RWF_NOWAIT commit 9cf35f673583ccc9f3e2507498b3079d56614ad3 upstream. This is similar to 942491c9e6d6 ("xfs: fix AIM7 regression"). Apparently our current rwsem code doesn't like doing the trylock, then lock for real scheme. This causes extra contention on the lock and can be measured eg. by AIM7 benchmark. So change our read/write methods to just do the trylock for the RWF_NOWAIT case. Fixes: edf064e7c6fe ("btrfs: nowait aio support") Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bf654d48eb46..97be32da857a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1890,9 +1890,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, (iocb->ki_flags & IOCB_NOWAIT)) return -EOPNOTSUPP; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } From 4dbdf3e7c22c801e5d5b3bf489a091c43e78d37f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 07:57:21 +0200 Subject: [PATCH 2299/3715] RDMA/mlx5: Return proper error value commit 546d30099ed204792083f043cd7e016de86016a3 upstream. Returned value from mlx5_mr_cache_alloc() is checked to be error or real pointer. Return proper error code instead of NULL which is not checked later. Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support") Link: https://lore.kernel.org/r/20191029055721.7192-1-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index cfddca850cb4..fb45bfa4f845 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -460,7 +460,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; From 56614548ff01c92cbae9870a462bde24dedaf194 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 Nov 2019 13:46:32 -0800 Subject: [PATCH 2300/3715] RDMA/srpt: Report the SCSI residual to the initiator commit e88982ad1bb12db699de96fbc07096359ef6176c upstream. The code added by this patch is similar to the code that already exists in ibmvscsis_determine_resid(). This patch has been tested by running the following command: strace sg_raw -r 1k /dev/sdb 12 00 00 00 60 00 -o inquiry.bin |& grep resid= Link: https://lore.kernel.org/r/20191105214632.183302-1-bvanassche@acm.org Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Acked-by: Honggang Li Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 94161ca526fc..1446e1cc69ae 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1246,9 +1246,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status) { + struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; /* * The lowest bit of all SAM-3 status codes is zero (see also @@ -1270,6 +1272,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->tag = tag; srp_rsp->status = status; + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + if (sense_data_len) { BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); From a97fdbf0f25c17b3c296dce6c1f2731cbdb39e7c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Apr 2018 11:41:33 +0100 Subject: [PATCH 2301/3715] arm64: add sentinel to kpti_safe_list commit 71c751f2a43fa03fae3cf5f0067ed3001a397013 upstream. We're missing a sentinel entry in kpti_safe_list. Thus is_midr_in_range_list() can walk past the end of kpti_safe_list. Depending on the contents of memory, this could erroneously match a CPU's MIDR, cause a data abort, or other bad outcomes. Add the sentinel entry to avoid this. Fixes: be5b299830c63ed7 ("arm64: capabilities: Add support for checks based on a list of MIDRs") Signed-off-by: Mark Rutland Reported-by: Jan Kiszka Tested-by: Jan Kiszka Reviewed-by: Suzuki K Poulose Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Juerg Haefliger Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 60066315d669..ae28979676c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -836,6 +836,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + { /* sentinel */ } }; char const *str = "kpti command line option"; bool meltdown_safe; From 87973b285545c45b2780f59ce4efda145b024d9b Mon Sep 17 00:00:00 2001 From: Dirk Mueller Date: Wed, 25 Jul 2018 13:10:28 +0200 Subject: [PATCH 2302/3715] arm64: Check for errata before evaluating cpu features commit dc0e36581eb2da1aa3c63ceeff0f10ef1e899b2a upstream. Since commit d3aec8a28be3b8 ("arm64: capabilities: Restrict KPTI detection to boot-time CPUs") we rely on errata flags being already populated during feature enumeration. The order of errata and features was flipped as part of commit ed478b3f9e4a ("arm64: capabilities: Group handling of features and errata workarounds"). Return to the orginal order of errata and feature evaluation to ensure errata flags are present during feature evaluation. Fixes: ed478b3f9e4a ("arm64: capabilities: Group handling of features and errata workarounds") CC: Suzuki K Poulose CC: Marc Zyngier Signed-off-by: Dirk Mueller Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Juerg Haefliger Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ae28979676c1..09c6499bc500 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1278,9 +1278,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void update_cpu_capabilities(u16 scope_mask) { - __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); __update_cpu_capabilities(arm64_errata, scope_mask, "enabling workaround for"); + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); } static int __enable_cpu_capability(void *arg) @@ -1335,8 +1335,8 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(u16 scope_mask) { - __enable_cpu_capabilities(arm64_features, scope_mask); __enable_cpu_capabilities(arm64_errata, scope_mask); + __enable_cpu_capabilities(arm64_features, scope_mask); } /* From e3258fc2fc74008ff4e5713c62ec1e95c79c8e77 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 8 Jan 2020 17:21:32 -0800 Subject: [PATCH 2303/3715] scsi: enclosure: Fix stale device oops with hot replug commit 529244bd1afc102ab164429d338d310d5d65e60d upstream. Doing an add/remove/add on a SCSI device in an enclosure leads to an oops caused by poisoned values in the enclosure device list pointers. The reason is because we are keeping the enclosure device across the enclosed device add/remove/add but the current code is doing a device_add/device_del/device_add on it. This is the wrong thing to do in sysfs, so fix it by not doing a device_del on the enclosure device simply because of a hot remove of the drive in the slot. [mkp: added missing email addresses] Fixes: 43d8eb9cfd0a ("[SCSI] ses: add support for enclosure component hot removal") Link: https://lore.kernel.org/r/1578532892.3852.10.camel@HansenPartnership.com Signed-off-by: James Bottomley Reported-by: Luo Jiaxing Tested-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/misc/enclosure.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index eb29113e0bac..b11737f7bdca 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -419,10 +419,9 @@ int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) cdev = &edev->component[i]; if (cdev->dev == dev) { enclosure_remove_links(cdev); - device_del(&cdev->cdev); put_device(dev); cdev->dev = NULL; - return device_add(&cdev->cdev); + return 0; } } return -ENODEV; From c301a4e96501e6f8333145efbda2aeb1ec0ba4fe Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 9 Jan 2020 09:12:24 +0800 Subject: [PATCH 2304/3715] scsi: sd: Clear sdkp->protection_type if disk is reformatted without PI commit 465f4edaecc6c37f81349233e84d46246bcac11a upstream. If an attached disk with protection information enabled is reformatted to Type 0 the revalidation code does not clear the original protection type and subsequent accesses will keep setting RDPROTECT/WRPROTECT. Set the protection type to 0 if the disk reports PROT_EN=0 in READ CAPACITY(16). [mkp: commit desc] Fixes: fe542396da73 ("[SCSI] sd: Ensure we correctly disable devices with unknown protection type") Link: https://lore.kernel.org/r/1578532344-101668-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 35cea5827a7a..dd7ca76c000a 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2206,8 +2206,10 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer u8 type; int ret = 0; - if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) + if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) { + sdkp->protection_type = 0; return ret; + } type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ From c2d4a986f979d4004be7f5a3460bc0f0b8f41c02 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 30 Dec 2019 16:30:45 +0800 Subject: [PATCH 2305/3715] platform/x86: asus-wmi: Fix keyboard brightness cannot be set to 0 commit 176a7fca81c5090a7240664e3002c106d296bf31 upstream. Some of ASUS laptops like UX431FL keyboard backlight cannot be set to brightness 0. According to ASUS' information, the brightness should be 0x80 ~ 0x83. This patch fixes it by following the logic. Fixes: e9809c0b9670 ("asus-wmi: add keyboard backlight support") Signed-off-by: Jian-Hong Pan Reviewed-by: Daniel Drake Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-wmi.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 1c1999600717..af26ca49996d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -457,13 +457,7 @@ static void kbd_led_update(struct work_struct *work) asus = container_of(work, struct asus_wmi, kbd_led_work); - /* - * bits 0-2: level - * bit 7: light on/off - */ - if (asus->kbd_led_wk > 0) - ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F); - + ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F); asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL); } From 760e1a2e4d220c7c1992bbb197790cedf0c86f3d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Jan 2020 11:52:17 -0500 Subject: [PATCH 2306/3715] xprtrdma: Fix completion wait during device removal commit 13cb886c591f341a8759f175292ddf978ef903a1 upstream. I've found that on occasion, "rmmod " will hang while if an NFS is under load. Ensure that ri_remove_done is initialized only just before the transport is woken up to force a close. This avoids the completion possibly getting initialized again while the CM event handler is waiting for a wake-up. Fixes: bebd031866ca ("xprtrdma: Support unplugging an HCA from under an NFS mount") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2aaf46599126..c5e991d14888 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -264,6 +264,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ia->ri_device->name, sap, rpc_get_port(sap)); #endif + init_completion(&ia->ri_remove_done); set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); ep->rep_connected = -ENODEV; xprt_force_disconnect(&xprt->rx_xprt); @@ -319,7 +320,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, int rc; init_completion(&ia->ri_done); - init_completion(&ia->ri_remove_done); id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); From 34ed0dfdd8f561a05bbc62aae31ac29cc9cb8d07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Nov 2019 09:39:36 +0100 Subject: [PATCH 2307/3715] NFSv4.x: Drop the slot if nfs4_delegreturn_prepare waits for layoutreturn commit 5326de9e94bedcf7366e7e7625d4deb8c1f1ca8a upstream. If nfs4_delegreturn_prepare needs to wait for a layoutreturn to complete then make sure we drop the sequence slot if we hold it. Fixes: 1c5bd76d17cc ("pNFS: Enable layoutreturn operation for return-on-close") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f1526f65cc58..3dd403943b07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5797,8 +5797,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { + nfs4_sequence_done(task, &d_data->res.seq_res); return; + } nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, From 9d9aee7be0dbb89d6ce4e64c996110a050727d54 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 1 Nov 2019 11:35:03 +0200 Subject: [PATCH 2308/3715] iio: imu: adis16480: assign bias value only if operation succeeded commit 9b742763d9d4195e823ae6ece760c9ed0500c1dc upstream. This was found only after the whole thing with the inline functions, but the compiler actually found something. The value of the `bias` (in adis16480_get_calibbias()) should only be set if the read operation was successful. No actual known problem occurs as users of this function all ultimately check the return value. Hence probably not stable material. Fixes: 2f3abe6cbb6c9 ("iio:imu: Add support for the ADIS16480 and similar IMUs") Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16480.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index c950aa10d0ae..5abe095901c8 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -372,12 +372,14 @@ static int adis16480_get_calibbias(struct iio_dev *indio_dev, case IIO_MAGN: case IIO_PRESSURE: ret = adis_read_reg_16(&st->adis, reg, &val16); - *bias = sign_extend32(val16, 15); + if (ret == 0) + *bias = sign_extend32(val16, 15); break; case IIO_ANGL_VEL: case IIO_ACCEL: ret = adis_read_reg_32(&st->adis, reg, &val32); - *bias = sign_extend32(val32, 31); + if (ret == 0) + *bias = sign_extend32(val32, 31); break; default: ret = -EINVAL; From 7e1a1d6e4140a3942882f245e6fe22c13ebee6ac Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 8 Oct 2019 03:57:34 +0300 Subject: [PATCH 2309/3715] mei: fix modalias documentation commit 73668309215285366c433489de70d31362987be9 upstream. mei client bus added the client protocol version to the device alias, but ABI documentation was not updated. Fixes: b26864cad1c9 (mei: bus: add client protocol version to the device alias) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191008005735.12707-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-mei | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei index 6bd45346ac7e..3f8701e8fa24 100644 --- a/Documentation/ABI/testing/sysfs-bus-mei +++ b/Documentation/ABI/testing/sysfs-bus-mei @@ -4,7 +4,7 @@ KernelVersion: 3.10 Contact: Samuel Ortiz linux-mei@linux.intel.com Description: Stores the same MODALIAS value emitted by uevent - Format: mei::: + Format: mei::: What: /sys/bus/mei/devices/.../name Date: May 2015 From d0b83984688f218f4894652d71ec1e4c633f0d18 Mon Sep 17 00:00:00 2001 From: Marian Mihailescu Date: Tue, 29 Oct 2019 11:20:25 +1030 Subject: [PATCH 2310/3715] clk: samsung: exynos5420: Preserve CPU clocks configuration during suspend/resume commit e21be0d1d7bd7f78a77613f6bcb6965e72b22fc1 upstream. Save and restore top PLL related configuration registers for big (APLL) and LITTLE (KPLL) cores during suspend/resume cycle. So far, CPU clocks were reset to default values after suspend/resume cycle and performance after system resume was affected when performance governor has been selected. Fixes: 773424326b51 ("clk: samsung: exynos5420: add more registers to restore list") Signed-off-by: Marian Mihailescu Signed-off-by: Sylwester Nawrocki Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos5420.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 47a14f93f869..2f54df5bef8e 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -170,6 +170,8 @@ static const unsigned long exynos5x_clk_regs[] __initconst = { GATE_BUS_CPU, GATE_SCLK_CPU, CLKOUT_CMU_CPU, + APLL_CON0, + KPLL_CON0, CPLL_CON0, DPLL_CON0, EPLL_CON0, From 736af028aee8f168c5ac1e87b51611286a99fc19 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Sep 2019 14:20:30 +0200 Subject: [PATCH 2311/3715] pinctl: ti: iodelay: fix error checking on pinctrl_count_index_with_args call commit 5ff8aca906f3a7a7db79fad92f2a4401107ef50d upstream. The call to pinctrl_count_index_with_args checks for a -EINVAL return however this function calls pinctrl_get_list_and_count and this can return -ENOENT. Rather than check for a specific error, fix this by checking for any error return to catch the -ENOENT case. Addresses-Coverity: ("Improper use of negative") Fixes: 003910ebc83b ("pinctrl: Introduce TI IOdelay configuration driver") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20190920122030.14340-1-colin.king@canonical.com Acked-by: Tony Lindgren Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 5c1b6325d80d..8ac1f1ce4442 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -496,7 +496,7 @@ static int ti_iodelay_dt_node_to_map(struct pinctrl_dev *pctldev, return -EINVAL; rows = pinctrl_count_index_with_args(np, name); - if (rows == -EINVAL) + if (rows < 0) return rows; *map = devm_kzalloc(iod->dev, sizeof(**map), GFP_KERNEL); From 6cce9e0baee21f4ac76828ae75d7783c1bdca726 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Nov 2019 15:37:39 +0200 Subject: [PATCH 2312/3715] pinctrl: lewisburg: Update pin list according to v1.1v6 commit e66ff71fd0dba36a53f91f39e4da6c7b84764f2e upstream. Version 1.1v6 of pin list has some changes in pin names for Intel Lewisburg. Update the driver accordingly. Note, it reveals the bug in the driver that misses two pins in GPP_L and has rather two extra ones. That's why the ordering of some groups is changed. Fixes: e480b745386e ("pinctrl: intel: Add Intel Lewisburg GPIO support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20191120133739.54332-1-andriy.shevchenko@linux.intel.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-lewisburg.c | 171 +++++++++++----------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-lewisburg.c b/drivers/pinctrl/intel/pinctrl-lewisburg.c index 14d56ea6cfdc..c2164db14e9c 100644 --- a/drivers/pinctrl/intel/pinctrl-lewisburg.c +++ b/drivers/pinctrl/intel/pinctrl-lewisburg.c @@ -34,6 +34,7 @@ .npins = ((e) - (s) + 1), \ } +/* Lewisburg */ static const struct pinctrl_pin_desc lbg_pins[] = { /* GPP_A */ PINCTRL_PIN(0, "RCINB"), @@ -73,7 +74,7 @@ static const struct pinctrl_pin_desc lbg_pins[] = { PINCTRL_PIN(33, "SRCCLKREQB_4"), PINCTRL_PIN(34, "SRCCLKREQB_5"), PINCTRL_PIN(35, "GPP_B_11"), - PINCTRL_PIN(36, "GLB_RST_WARN_N"), + PINCTRL_PIN(36, "SLP_S0B"), PINCTRL_PIN(37, "PLTRSTB"), PINCTRL_PIN(38, "SPKR"), PINCTRL_PIN(39, "GPP_B_15"), @@ -186,96 +187,96 @@ static const struct pinctrl_pin_desc lbg_pins[] = { PINCTRL_PIN(141, "GBE_PCI_DIS"), PINCTRL_PIN(142, "GBE_LAN_DIS"), PINCTRL_PIN(143, "GPP_I_10"), - PINCTRL_PIN(144, "GPIO_RCOMP_3P3"), /* GPP_J */ - PINCTRL_PIN(145, "GBE_LED_0_0"), - PINCTRL_PIN(146, "GBE_LED_0_1"), - PINCTRL_PIN(147, "GBE_LED_1_0"), - PINCTRL_PIN(148, "GBE_LED_1_1"), - PINCTRL_PIN(149, "GBE_LED_2_0"), - PINCTRL_PIN(150, "GBE_LED_2_1"), - PINCTRL_PIN(151, "GBE_LED_3_0"), - PINCTRL_PIN(152, "GBE_LED_3_1"), - PINCTRL_PIN(153, "GBE_SCL_0"), - PINCTRL_PIN(154, "GBE_SDA_0"), - PINCTRL_PIN(155, "GBE_SCL_1"), - PINCTRL_PIN(156, "GBE_SDA_1"), - PINCTRL_PIN(157, "GBE_SCL_2"), - PINCTRL_PIN(158, "GBE_SDA_2"), - PINCTRL_PIN(159, "GBE_SCL_3"), - PINCTRL_PIN(160, "GBE_SDA_3"), - PINCTRL_PIN(161, "GBE_SDP_0_0"), - PINCTRL_PIN(162, "GBE_SDP_0_1"), - PINCTRL_PIN(163, "GBE_SDP_1_0"), - PINCTRL_PIN(164, "GBE_SDP_1_1"), - PINCTRL_PIN(165, "GBE_SDP_2_0"), - PINCTRL_PIN(166, "GBE_SDP_2_1"), - PINCTRL_PIN(167, "GBE_SDP_3_0"), - PINCTRL_PIN(168, "GBE_SDP_3_1"), + PINCTRL_PIN(144, "GBE_LED_0_0"), + PINCTRL_PIN(145, "GBE_LED_0_1"), + PINCTRL_PIN(146, "GBE_LED_1_0"), + PINCTRL_PIN(147, "GBE_LED_1_1"), + PINCTRL_PIN(148, "GBE_LED_2_0"), + PINCTRL_PIN(149, "GBE_LED_2_1"), + PINCTRL_PIN(150, "GBE_LED_3_0"), + PINCTRL_PIN(151, "GBE_LED_3_1"), + PINCTRL_PIN(152, "GBE_SCL_0"), + PINCTRL_PIN(153, "GBE_SDA_0"), + PINCTRL_PIN(154, "GBE_SCL_1"), + PINCTRL_PIN(155, "GBE_SDA_1"), + PINCTRL_PIN(156, "GBE_SCL_2"), + PINCTRL_PIN(157, "GBE_SDA_2"), + PINCTRL_PIN(158, "GBE_SCL_3"), + PINCTRL_PIN(159, "GBE_SDA_3"), + PINCTRL_PIN(160, "GBE_SDP_0_0"), + PINCTRL_PIN(161, "GBE_SDP_0_1"), + PINCTRL_PIN(162, "GBE_SDP_1_0"), + PINCTRL_PIN(163, "GBE_SDP_1_1"), + PINCTRL_PIN(164, "GBE_SDP_2_0"), + PINCTRL_PIN(165, "GBE_SDP_2_1"), + PINCTRL_PIN(166, "GBE_SDP_3_0"), + PINCTRL_PIN(167, "GBE_SDP_3_1"), /* GPP_K */ - PINCTRL_PIN(169, "GBE_RMIICLK"), - PINCTRL_PIN(170, "GBE_RMII_TXD_0"), - PINCTRL_PIN(171, "GBE_RMII_TXD_1"), + PINCTRL_PIN(168, "GBE_RMIICLK"), + PINCTRL_PIN(169, "GBE_RMII_RXD_0"), + PINCTRL_PIN(170, "GBE_RMII_RXD_1"), + PINCTRL_PIN(171, "GBE_RMII_CRS_DV"), PINCTRL_PIN(172, "GBE_RMII_TX_EN"), - PINCTRL_PIN(173, "GBE_RMII_CRS_DV"), - PINCTRL_PIN(174, "GBE_RMII_RXD_0"), - PINCTRL_PIN(175, "GBE_RMII_RXD_1"), - PINCTRL_PIN(176, "GBE_RMII_RX_ER"), - PINCTRL_PIN(177, "GBE_RMII_ARBIN"), - PINCTRL_PIN(178, "GBE_RMII_ARB_OUT"), - PINCTRL_PIN(179, "PE_RST_N"), - PINCTRL_PIN(180, "GPIO_RCOMP_1P8_3P3"), + PINCTRL_PIN(173, "GBE_RMII_TXD_0"), + PINCTRL_PIN(174, "GBE_RMII_TXD_1"), + PINCTRL_PIN(175, "GBE_RMII_RX_ER"), + PINCTRL_PIN(176, "GBE_RMII_ARBIN"), + PINCTRL_PIN(177, "GBE_RMII_ARB_OUT"), + PINCTRL_PIN(178, "PE_RST_N"), /* GPP_G */ - PINCTRL_PIN(181, "FAN_TACH_0"), - PINCTRL_PIN(182, "FAN_TACH_1"), - PINCTRL_PIN(183, "FAN_TACH_2"), - PINCTRL_PIN(184, "FAN_TACH_3"), - PINCTRL_PIN(185, "FAN_TACH_4"), - PINCTRL_PIN(186, "FAN_TACH_5"), - PINCTRL_PIN(187, "FAN_TACH_6"), - PINCTRL_PIN(188, "FAN_TACH_7"), - PINCTRL_PIN(189, "FAN_PWM_0"), - PINCTRL_PIN(190, "FAN_PWM_1"), - PINCTRL_PIN(191, "FAN_PWM_2"), - PINCTRL_PIN(192, "FAN_PWM_3"), - PINCTRL_PIN(193, "GSXDOUT"), - PINCTRL_PIN(194, "GSXSLOAD"), - PINCTRL_PIN(195, "GSXDIN"), - PINCTRL_PIN(196, "GSXSRESETB"), - PINCTRL_PIN(197, "GSXCLK"), - PINCTRL_PIN(198, "ADR_COMPLETE"), - PINCTRL_PIN(199, "NMIB"), - PINCTRL_PIN(200, "SMIB"), - PINCTRL_PIN(201, "SSATA_DEVSLP_0"), - PINCTRL_PIN(202, "SSATA_DEVSLP_1"), - PINCTRL_PIN(203, "SSATA_DEVSLP_2"), - PINCTRL_PIN(204, "SSATAXPCIE0_SSATAGP0"), + PINCTRL_PIN(179, "FAN_TACH_0"), + PINCTRL_PIN(180, "FAN_TACH_1"), + PINCTRL_PIN(181, "FAN_TACH_2"), + PINCTRL_PIN(182, "FAN_TACH_3"), + PINCTRL_PIN(183, "FAN_TACH_4"), + PINCTRL_PIN(184, "FAN_TACH_5"), + PINCTRL_PIN(185, "FAN_TACH_6"), + PINCTRL_PIN(186, "FAN_TACH_7"), + PINCTRL_PIN(187, "FAN_PWM_0"), + PINCTRL_PIN(188, "FAN_PWM_1"), + PINCTRL_PIN(189, "FAN_PWM_2"), + PINCTRL_PIN(190, "FAN_PWM_3"), + PINCTRL_PIN(191, "GSXDOUT"), + PINCTRL_PIN(192, "GSXSLOAD"), + PINCTRL_PIN(193, "GSXDIN"), + PINCTRL_PIN(194, "GSXSRESETB"), + PINCTRL_PIN(195, "GSXCLK"), + PINCTRL_PIN(196, "ADR_COMPLETE"), + PINCTRL_PIN(197, "NMIB"), + PINCTRL_PIN(198, "SMIB"), + PINCTRL_PIN(199, "SSATA_DEVSLP_0"), + PINCTRL_PIN(200, "SSATA_DEVSLP_1"), + PINCTRL_PIN(201, "SSATA_DEVSLP_2"), + PINCTRL_PIN(202, "SSATAXPCIE0_SSATAGP0"), /* GPP_H */ - PINCTRL_PIN(205, "SRCCLKREQB_6"), - PINCTRL_PIN(206, "SRCCLKREQB_7"), - PINCTRL_PIN(207, "SRCCLKREQB_8"), - PINCTRL_PIN(208, "SRCCLKREQB_9"), - PINCTRL_PIN(209, "SRCCLKREQB_10"), - PINCTRL_PIN(210, "SRCCLKREQB_11"), - PINCTRL_PIN(211, "SRCCLKREQB_12"), - PINCTRL_PIN(212, "SRCCLKREQB_13"), - PINCTRL_PIN(213, "SRCCLKREQB_14"), - PINCTRL_PIN(214, "SRCCLKREQB_15"), - PINCTRL_PIN(215, "SML2CLK"), - PINCTRL_PIN(216, "SML2DATA"), - PINCTRL_PIN(217, "SML2ALERTB"), - PINCTRL_PIN(218, "SML3CLK"), - PINCTRL_PIN(219, "SML3DATA"), - PINCTRL_PIN(220, "SML3ALERTB"), - PINCTRL_PIN(221, "SML4CLK"), - PINCTRL_PIN(222, "SML4DATA"), - PINCTRL_PIN(223, "SML4ALERTB"), - PINCTRL_PIN(224, "SSATAXPCIE1_SSATAGP1"), - PINCTRL_PIN(225, "SSATAXPCIE2_SSATAGP2"), - PINCTRL_PIN(226, "SSATAXPCIE3_SSATAGP3"), - PINCTRL_PIN(227, "SSATAXPCIE4_SSATAGP4"), - PINCTRL_PIN(228, "SSATAXPCIE5_SSATAGP5"), + PINCTRL_PIN(203, "SRCCLKREQB_6"), + PINCTRL_PIN(204, "SRCCLKREQB_7"), + PINCTRL_PIN(205, "SRCCLKREQB_8"), + PINCTRL_PIN(206, "SRCCLKREQB_9"), + PINCTRL_PIN(207, "SRCCLKREQB_10"), + PINCTRL_PIN(208, "SRCCLKREQB_11"), + PINCTRL_PIN(209, "SRCCLKREQB_12"), + PINCTRL_PIN(210, "SRCCLKREQB_13"), + PINCTRL_PIN(211, "SRCCLKREQB_14"), + PINCTRL_PIN(212, "SRCCLKREQB_15"), + PINCTRL_PIN(213, "SML2CLK"), + PINCTRL_PIN(214, "SML2DATA"), + PINCTRL_PIN(215, "SML2ALERTB"), + PINCTRL_PIN(216, "SML3CLK"), + PINCTRL_PIN(217, "SML3DATA"), + PINCTRL_PIN(218, "SML3ALERTB"), + PINCTRL_PIN(219, "SML4CLK"), + PINCTRL_PIN(220, "SML4DATA"), + PINCTRL_PIN(221, "SML4ALERTB"), + PINCTRL_PIN(222, "SSATAXPCIE1_SSATAGP1"), + PINCTRL_PIN(223, "SSATAXPCIE2_SSATAGP2"), + PINCTRL_PIN(224, "SSATAXPCIE3_SSATAGP3"), + PINCTRL_PIN(225, "SSATAXPCIE4_SSATAGP4"), + PINCTRL_PIN(226, "SSATAXPCIE5_SSATAGP5"), /* GPP_L */ + PINCTRL_PIN(227, "GPP_L_0"), + PINCTRL_PIN(228, "EC_CSME_INTR_OUT"), PINCTRL_PIN(229, "VISA2CH0_D0"), PINCTRL_PIN(230, "VISA2CH0_D1"), PINCTRL_PIN(231, "VISA2CH0_D2"), From e35d296079d581036d929a09fc2ee29246207d3b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Mar 2019 13:46:11 +0100 Subject: [PATCH 2313/3715] scsi: sd: enable compat ioctls for sed-opal commit 142b2ac82e31c174936c5719fa12ae28f51a55b7 upstream. The sed_ioctl() function is written to be compatible between 32-bit and 64-bit processes, however compat mode is only wired up for nvme, not for sd. Add the missing call to sed_ioctl() in sd_compat_ioctl(). Fixes: d80210f25ff0 ("sd: add support for TCG OPAL self encrypting disks") Cc: linux-scsi@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index dd7ca76c000a..2955b856e9ec 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1697,20 +1697,30 @@ static void sd_rescan(struct device *dev) static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; + struct gendisk *disk = bdev->bd_disk; + struct scsi_disk *sdkp = scsi_disk(disk); + struct scsi_device *sdev = sdkp->device; + void __user *p = compat_ptr(arg); int error; + error = scsi_verify_blk_ioctl(bdev, cmd); + if (error < 0) + return error; + error = scsi_ioctl_block_when_processing_errors(sdev, cmd, (mode & FMODE_NDELAY) != 0); if (error) return error; + + if (is_sed_ioctl(cmd)) + return sed_ioctl(sdkp->opal_dev, cmd, p); /* * Let the static ioctl translation table take care of it. */ if (!sdev->host->hostt->compat_ioctl) return -ENOIOCTLCMD; - return sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); + return sdev->host->hostt->compat_ioctl(sdev, cmd, p); } #endif From d76a73388658d34ffa22a040d47be6e35f7b70ff Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 12 Dec 2018 18:13:26 +0100 Subject: [PATCH 2314/3715] arm64: dts: apq8096-db820c: Increase load on l21 for SDCARD commit e38161bd325ea541ef2f258d8e28281077dde524 upstream. In the same way as for msm8974-hammerhead, l21 load, used for SDCARD VMMC, needs to be increased in order to prevent any voltage drop issues (due to limited current) happening with some SDCARDS or during specific operations (e.g. write). Reviewed-by: Bjorn Andersson Fixes: 660a9763c6a9 (arm64: dts: qcom: db820c: Add pm8994 regulator node) Signed-off-by: Loic Poulain Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 789f3e87321e..7a510505e0c2 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -262,6 +262,8 @@ l21 { regulator-min-microvolt = <2950000>; regulator-max-microvolt = <2950000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l22 { regulator-min-microvolt = <3300000>; From 6b24f8fa8f265948c2e256d7f30315854fe8735e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 3 Jun 2019 22:03:44 +0200 Subject: [PATCH 2315/3715] af_unix: add compat_ioctl support commit 5f6beb9e0f633f3cc845cdd67973c506372931b4 upstream. The af_unix protocol family has a custom ioctl command (inexplicibly based on SIOCPROTOPRIVATE), but never had a compat_ioctl handler for 32-bit applications. Since all commands are compatible here, add a trivial wrapper that performs the compat_ptr() conversion for SIOCOUTQ/SIOCINQ. SIOCUNIXFILE does not use the argument, but it doesn't hurt to also use compat_ptr() here. Fixes: ba94f3088b79 ("unix: add ioctl to open a unix socket file with O_PATH") Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Eric Dumazet Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 99f581a61cfa..091e93798eac 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -644,6 +644,9 @@ static unsigned int unix_poll(struct file *, struct socket *, poll_table *); static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); +#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +#endif static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); @@ -685,6 +688,9 @@ static const struct proto_ops unix_stream_ops = { .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -708,6 +714,9 @@ static const struct proto_ops unix_dgram_ops = { .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = sock_no_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -730,6 +739,9 @@ static const struct proto_ops unix_seqpacket_ops = { .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -2650,6 +2662,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } +#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; From 2e3f1f153b72509c2314dea9581fe0e35e4db94c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 3 Jun 2019 23:06:00 +0200 Subject: [PATCH 2316/3715] compat_ioctl: handle SIOCOUTQNSD commit 9d7bf41fafa5b5ddd4c13eb39446b0045f0a8167 upstream. Unlike the normal SIOCOUTQ, SIOCOUTQNSD was never handled in compat mode. Add it to the common socket compat handler along with similar ones. Fixes: 2f4e1b397097 ("tcp: ioctl type SIOCOUTQNSD returns amount of data not sent") Cc: Eric Dumazet Cc: netdev@vger.kernel.org Cc: "David S. Miller" Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index 5b134a6b6216..6a5ec658fcd8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3267,6 +3267,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSARP: case SIOCGARP: case SIOCDARP: + case SIOCOUTQNSD: case SIOCATMARK: return sock_do_ioctl(net, sock, cmd, arg); } From 7b7e8086030aa534245edd8d336b8d06a5591443 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 6 Nov 2019 15:30:48 -0600 Subject: [PATCH 2317/3715] PCI/PTM: Remove spurious "d" from granularity message commit 127a7709495db52a41012deaebbb7afc231dad91 upstream. The granularity message has an extra "d": pci 0000:02:00.0: PTM enabled, 4dns granularity Remove the "d" so the message is simply "PTM enabled, 4ns granularity". Fixes: 8b2ec318eece ("PCI: Add PTM clock granularity information") Link: https://lore.kernel.org/r/20191106222420.10216-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Andrew Murray Cc: Jonathan Yong Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/ptm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index bab8ac63c4f3..3008bba360f3 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -29,7 +29,7 @@ static void pci_ptm_info(struct pci_dev *dev) snprintf(clock_desc, sizeof(clock_desc), ">254ns"); break; default: - snprintf(clock_desc, sizeof(clock_desc), "%udns", + snprintf(clock_desc, sizeof(clock_desc), "%uns", dev->ptm_granularity); break; } From 6f0c76be8b0cb0250a68582c6bb2cc6c774ce597 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 18 Nov 2019 17:55:53 +1100 Subject: [PATCH 2318/3715] powerpc/powernv: Disable native PCIe port management commit 9d72dcef891030545f39ad386a30cf91df517fb2 upstream. On PowerNV the PCIe topology is (currently) managed by the powernv platform code in Linux in cooperation with the platform firmware. Linux's native PCIe port service drivers operate independently of both and this can cause problems. The main issue is that the portbus driver will conflict with the platform specific hotplug driver (pnv_php) over ownership of the MSI used to notify the host when a hotplug event occurs. The portbus driver claims this MSI on behalf of the individual port services because the same interrupt is used for hotplug events, PMEs (on root ports), and link bandwidth change notifications. The portbus driver will always claim the interrupt even if the individual port service drivers, such as pciehp, are compiled out. The second, bigger, problem is that the hotplug port service driver fundamentally does not work on PowerNV. The platform assumes that all PCI devices have a corresponding arch-specific handle derived from the DT node for the device (pci_dn) and without one the platform will not allow a PCI device to be enabled. This problem is largely due to historical baggage, but it can't be resolved without significant re-factoring of the platform PCI support. We can fix these problems in the interim by setting the "pcie_ports_disabled" flag during platform initialisation. The flag indicates the platform owns the PCIe ports which stops the portbus driver from being registered. This does have the side effect of disabling all port services drivers that is: AER, PME, BW notifications, hotplug, and DPC. However, this is not a huge disadvantage on PowerNV since these services are either unused or handled through other means. Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver") Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191118065553.30362-1-oohall@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index e2d031a3ec15..961c131a5b7e 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1118,6 +1118,23 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); From 9eee44f069bd76481598013c53e65ecd385d7438 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 7 Nov 2019 06:42:53 +0000 Subject: [PATCH 2319/3715] tty: serial: imx: use the sg count from dma_map_sg commit 596fd8dffb745afcebc0ec6968e17fe29f02044c upstream. The dmaengine_prep_slave_sg needs to use sg count returned by dma_map_sg, not use sport->dma_tx_nents, because the return value of dma_map_sg is not always same with "nents". Fixes: b4cdc8f61beb ("serial: imx: add DMA support for imx6q") Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1573108875-26530-1-git-send-email-peng.fan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index aae68230fb7b..a81a5be0cf7a 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -542,7 +542,7 @@ static void imx_dma_tx(struct imx_port *sport) dev_err(dev, "DMA mapping error for TX.\n"); return; } - desc = dmaengine_prep_slave_sg(chan, sgl, sport->dma_tx_nents, + desc = dmaengine_prep_slave_sg(chan, sgl, ret, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); if (!desc) { dma_unmap_sg(dev, sgl, sport->dma_tx_nents, From 77f33d715584a86c28758a36cd796dc5e5e592c9 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 13 Nov 2019 05:37:42 +0000 Subject: [PATCH 2320/3715] tty: serial: pch_uart: correct usage of dma_unmap_sg commit 74887542fdcc92ad06a48c0cca17cdf09fc8aa00 upstream. Per Documentation/DMA-API-HOWTO.txt, To unmap a scatterlist, just call: dma_unmap_sg(dev, sglist, nents, direction); .. note:: The 'nents' argument to the dma_unmap_sg call must be the _same_ one you passed into the dma_map_sg call, it should _NOT_ be the 'count' value _returned_ from the dma_map_sg call. However in the driver, priv->nent is directly assigned with value returned from dma_map_sg, and dma_unmap_sg use priv->nent for unmap, this breaks the API usage. So introduce a new entry orig_nent to remember 'nents'. Fixes: da3564ee027e ("pch_uart: add multi-scatter processing") Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1573623259-6339-1-git-send-email-peng.fan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index d9123f995705..15ddcbd1f9d2 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -247,6 +247,7 @@ struct eg20t_port { struct dma_chan *chan_rx; struct scatterlist *sg_tx_p; int nent; + int orig_nent; struct scatterlist sg_rx; int tx_dma_use; void *rx_buf_virt; @@ -801,9 +802,10 @@ static void pch_dma_tx_complete(void *arg) } xmit->tail &= UART_XMIT_SIZE - 1; async_tx_ack(priv->desc_tx); - dma_unmap_sg(port->dev, sg, priv->nent, DMA_TO_DEVICE); + dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE); priv->tx_dma_use = 0; priv->nent = 0; + priv->orig_nent = 0; kfree(priv->sg_tx_p); pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_TX_INT); } @@ -1027,6 +1029,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) dev_err(priv->port.dev, "%s:dma_map_sg Failed\n", __func__); return 0; } + priv->orig_nent = num; priv->nent = nent; for (i = 0; i < nent; i++, sg++) { From 308f0585150c12e078b76331d5dc300b1dc005aa Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:39 -0300 Subject: [PATCH 2321/3715] media: ov6650: Fix incorrect use of JPEG colorspace commit 12500731895ef09afc5b66b86b76c0884fb9c7bf upstream. Since its initial submission, the driver selects V4L2_COLORSPACE_JPEG for supported formats other than V4L2_MBUS_FMT_SBGGR8_1X8. According to v4l2-compliance test program, V4L2_COLORSPACE_JPEG applies exclusively to V4L2_PIX_FMT_JPEG. Since the sensor does not support JPEG format, fix it to always select V4L2_COLORSPACE_SRGB. Fixes: 2f6e2404799a ("[media] SoC Camera: add driver for OV6650 sensor") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov6650.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 348296be4925..5d6a231a4163 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -203,7 +203,6 @@ struct ov6650 { unsigned long pclk_max; /* from resolution and format */ struct v4l2_fract tpf; /* as requested with s_parm */ u32 code; - enum v4l2_colorspace colorspace; }; @@ -520,7 +519,7 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, mf->width = priv->rect.width >> priv->half_scale; mf->height = priv->rect.height >> priv->half_scale; mf->code = priv->code; - mf->colorspace = priv->colorspace; + mf->colorspace = V4L2_COLORSPACE_SRGB; mf->field = V4L2_FIELD_NONE; return 0; @@ -627,11 +626,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) priv->pclk_max = 8000000; } - if (code == MEDIA_BUS_FMT_SBGGR8_1X8) - priv->colorspace = V4L2_COLORSPACE_SRGB; - else if (code != 0) - priv->colorspace = V4L2_COLORSPACE_JPEG; - if (half_scale) { dev_dbg(&client->dev, "max resolution: QCIF\n"); coma_set |= COMA_QCIF; @@ -666,7 +660,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) priv->code = code; if (!ret) { - mf->colorspace = priv->colorspace; mf->width = priv->rect.width >> half_scale; mf->height = priv->rect.height >> half_scale; } @@ -689,6 +682,7 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, &mf->height, 2, H_CIF, 1, 0); mf->field = V4L2_FIELD_NONE; + mf->colorspace = V4L2_COLORSPACE_SRGB; switch (mf->code) { case MEDIA_BUS_FMT_Y10_1X10: @@ -699,13 +693,11 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, case MEDIA_BUS_FMT_YUYV8_2X8: case MEDIA_BUS_FMT_VYUY8_2X8: case MEDIA_BUS_FMT_UYVY8_2X8: - mf->colorspace = V4L2_COLORSPACE_JPEG; break; default: mf->code = MEDIA_BUS_FMT_SBGGR8_1X8; /* fall through */ case MEDIA_BUS_FMT_SBGGR8_1X8: - mf->colorspace = V4L2_COLORSPACE_SRGB; break; } @@ -1020,7 +1012,6 @@ static int ov6650_probe(struct i2c_client *client, priv->rect.height = H_CIF; priv->half_scale = false; priv->code = MEDIA_BUS_FMT_YUYV8_2X8; - priv->colorspace = V4L2_COLORSPACE_JPEG; ret = ov6650_video_probe(client); if (ret) From d739c826a15e887efb2c72355619b87e8dfc565c Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:40 -0300 Subject: [PATCH 2322/3715] media: ov6650: Fix some format attributes not under control commit 1c6a2b63095154bbf9e8f38d79487a728331bf65 upstream. User arguments passed to .get/set_fmt() pad operation callbacks may contain unsupported values. The driver takes control over frame size and pixel code as well as colorspace and field attributes but has never cared for remainig format attributes, i.e., ycbcr_enc, quantization and xfer_func, introduced by commit 11ff030c7365 ("[media] v4l2-mediabus: improve colorspace support"). Fix it. Set up a static v4l2_mbus_framefmt structure with attributes initialized to reasonable defaults and use it for updating content of user provided arguments. In case of V4L2_SUBDEV_FORMAT_ACTIVE, postpone frame size update, now performed from inside ov6650_s_fmt() helper, util the user argument is first updated in ov6650_set_fmt() with default frame format content. For V4L2_SUBDEV_FORMAT_TRY, don't copy all attributes to pad config, only those handled by the driver, then fill the response with the default frame format updated with resulting pad config format code and frame size. Fixes: 11ff030c7365 ("[media] v4l2-mediabus: improve colorspace support") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov6650.c | 51 +++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 5d6a231a4163..044ede441fd6 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -215,6 +215,17 @@ static u32 ov6650_codes[] = { MEDIA_BUS_FMT_Y8_1X8, }; +static const struct v4l2_mbus_framefmt ov6650_def_fmt = { + .width = W_CIF, + .height = H_CIF, + .code = MEDIA_BUS_FMT_SBGGR8_1X8, + .colorspace = V4L2_COLORSPACE_SRGB, + .field = V4L2_FIELD_NONE, + .ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT, + .quantization = V4L2_QUANTIZATION_DEFAULT, + .xfer_func = V4L2_XFER_FUNC_DEFAULT, +}; + /* read a register */ static int ov6650_reg_read(struct i2c_client *client, u8 reg, u8 *val) { @@ -516,11 +527,13 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, if (format->pad) return -EINVAL; + /* initialize response with default media bus frame format */ + *mf = ov6650_def_fmt; + + /* update media bus format code and frame size */ mf->width = priv->rect.width >> priv->half_scale; mf->height = priv->rect.height >> priv->half_scale; mf->code = priv->code; - mf->colorspace = V4L2_COLORSPACE_SRGB; - mf->field = V4L2_FIELD_NONE; return 0; } @@ -659,10 +672,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) if (!ret) priv->code = code; - if (!ret) { - mf->width = priv->rect.width >> half_scale; - mf->height = priv->rect.height >> half_scale; - } return ret; } @@ -681,9 +690,6 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, v4l_bound_align_image(&mf->width, 2, W_CIF, 1, &mf->height, 2, H_CIF, 1, 0); - mf->field = V4L2_FIELD_NONE; - mf->colorspace = V4L2_COLORSPACE_SRGB; - switch (mf->code) { case MEDIA_BUS_FMT_Y10_1X10: mf->code = MEDIA_BUS_FMT_Y8_1X8; @@ -701,10 +707,31 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, break; } - if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE) - return ov6650_s_fmt(sd, mf); - cfg->try_fmt = *mf; + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + /* store media bus format code and frame size in pad config */ + cfg->try_fmt.width = mf->width; + cfg->try_fmt.height = mf->height; + cfg->try_fmt.code = mf->code; + /* return default mbus frame format updated with pad config */ + *mf = ov6650_def_fmt; + mf->width = cfg->try_fmt.width; + mf->height = cfg->try_fmt.height; + mf->code = cfg->try_fmt.code; + + } else { + /* apply new media bus format code and frame size */ + int ret = ov6650_s_fmt(sd, mf); + + if (ret) + return ret; + + /* return default format updated with active size and code */ + *mf = ov6650_def_fmt; + mf->width = priv->rect.width >> priv->half_scale; + mf->height = priv->rect.height >> priv->half_scale; + mf->code = priv->code; + } return 0; } From 68b315b4df8f8a6ecb2345874e3e08177978734d Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 3 Sep 2019 17:11:41 -0300 Subject: [PATCH 2323/3715] media: ov6650: Fix .get_fmt() V4L2_SUBDEV_FORMAT_TRY support commit 39034bb0c26b76a2c3abc54aa28c185f18b40c2f upstream. Commit da298c6d98d5 ("[media] v4l2: replace video op g_mbus_fmt by pad op get_fmt") converted a former ov6650_g_fmt() video operation callback to an ov6650_get_fmt() pad operation callback. However, the converted function disregards a format->which flag that pad operations should obey and always returns active frame format settings. That can be fixed by always responding to V4L2_SUBDEV_FORMAT_TRY with -EINVAL, or providing the response from a pad config argument, likely updated by a former user call to V4L2_SUBDEV_FORMAT_TRY .set_fmt(). Since implementation of the latter is trivial, go for it. Fixes: da298c6d98d5 ("[media] v4l2: replace video op g_mbus_fmt by pad op get_fmt") Signed-off-by: Janusz Krzysztofik Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov6650.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 044ede441fd6..4f67a515bdd8 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -531,10 +531,16 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, *mf = ov6650_def_fmt; /* update media bus format code and frame size */ - mf->width = priv->rect.width >> priv->half_scale; - mf->height = priv->rect.height >> priv->half_scale; - mf->code = priv->code; + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + mf->width = cfg->try_fmt.width; + mf->height = cfg->try_fmt.height; + mf->code = cfg->try_fmt.code; + } else { + mf->width = priv->rect.width >> priv->half_scale; + mf->height = priv->rect.height >> priv->half_scale; + mf->code = priv->code; + } return 0; } From 4b4c9d23cf49e1aa694b042f3c5e01c291768eac Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Fri, 18 Oct 2019 07:20:52 -0300 Subject: [PATCH 2324/3715] media: exynos4-is: Fix recursive locking in isp_video_release() commit 704c6c80fb471d1bb0ef0d61a94617d1d55743cd upstream. >From isp_video_release(), &isp->video_lock is held and subsequent vb2_fop_release() tries to lock vdev->lock which is same with the previous one. Replace vb2_fop_release() with _vb2_fop_release() to fix the recursive locking. Fixes: 1380f5754cb0 ("[media] videobuf2: Add missing lock held on vb2_fop_release") Signed-off-by: Seung-Woo Kim Reviewed-by: Sylwester Nawrocki Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/exynos4-is/fimc-isp-video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index a920164f53f1..39340abefd14 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -316,7 +316,7 @@ static int isp_video_release(struct file *file) ivc->streaming = 0; } - vb2_fop_release(file); + _vb2_fop_release(file, NULL); if (v4l2_fh_is_singular_file(file)) { fimc_pipeline_call(&ivc->ve, close); From 0e08a1875bb2eb5b7ae913c35ef2886823fcdffd Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 30 Oct 2019 21:48:59 +0300 Subject: [PATCH 2325/3715] mtd: spi-nor: fix silent truncation in spi_nor_read() commit a719a75a7761e4139dd099330d9fe3589d844f9b upstream. spi_nor_read() assigns the result of 'ssize_t spi_nor_read_data()' to the 'int ret' variable, while 'ssize_t' is a 64-bit type and *int* is a 32-bit type on the 64-bit machines. This silent truncation isn't really valid, so fix up the variable's type. Fixes: 59451e1233bd ("mtd: spi-nor: change return value of read/write") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index d550148177a0..114f75ec0088 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1216,7 +1216,7 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct spi_nor *nor = mtd_to_spi_nor(mtd); - int ret; + ssize_t ret; dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len); From 45d37bd0a80f25ecac6d9d23d2240bdc1c1a83b3 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 30 Oct 2019 21:53:03 +0300 Subject: [PATCH 2326/3715] mtd: spi-nor: fix silent truncation in spi_nor_read_raw() commit 3d63ee5deb466fd66ed6ffb164a87ce36425cf36 upstream. spi_nor_read_raw() assigns the result of 'ssize_t spi_nor_read_data()' to the 'int ret' variable, while 'ssize_t' is a 64-bit type and *int* is a 32-bit type on the 64-bit machines. This silent truncation isn't really valid, so fix up the variable's type. Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/spi-nor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 114f75ec0088..0fe3e39f870f 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1445,7 +1445,7 @@ static int macronix_quad_enable(struct spi_nor *nor) */ static int write_sr_cr(struct spi_nor *nor, u8 *sr_cr) { - int ret; + ssize_t ret; write_enable(nor); From 9f499bd50632840c1eeebdf1ff87ef8b76f5299a Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 18 Oct 2019 17:35:04 +0200 Subject: [PATCH 2327/3715] spi: atmel: fix handling of cs_change set on non-last xfer commit fed8d8c7a6dc2a76d7764842853d81c770b0788e upstream. The driver does the wrong thing when cs_change is set on a non-last xfer in a message. When cs_change is set, the driver deactivates the CS and leaves it off until a later xfer again has cs_change set whereas it should be briefly toggling CS off and on again. This patch brings the behaviour of the driver back in line with the documentation and common sense. The delay of 10 us is the same as is used by the default spi_transfer_one_message() function in spi.c. [gregory: rebased on for-5.5 from spi tree] Fixes: 8090d6d1a415 ("spi: atmel: Refactor spi-atmel to use SPI framework queue") Signed-off-by: Mans Rullgard Acked-by: Nicolas Ferre Signed-off-by: Gregory CLEMENT Link: https://lore.kernel.org/r/20191018153504.4249-1-gregory.clement@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index d19331b66222..7b739c449227 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -301,7 +301,6 @@ struct atmel_spi { bool use_cs_gpios; bool keep_cs; - bool cs_active; u32 fifo_size; }; @@ -1338,11 +1337,9 @@ static int atmel_spi_one_transfer(struct spi_master *master, &msg->transfers)) { as->keep_cs = true; } else { - as->cs_active = !as->cs_active; - if (as->cs_active) - cs_activate(as, msg->spi); - else - cs_deactivate(as, msg->spi); + cs_deactivate(as, msg->spi); + udelay(10); + cs_activate(as, msg->spi); } } @@ -1365,7 +1362,6 @@ static int atmel_spi_transfer_one_message(struct spi_master *master, atmel_spi_lock(as); cs_activate(as, spi); - as->cs_active = true; as->keep_cs = false; msg->status = 0; From b9ffea4c1225aa5b1eba344708400a6fe6a71a1f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 22 Oct 2019 17:47:03 -0700 Subject: [PATCH 2328/3715] rtlwifi: Remove unnecessary NULL check in rtl_regd_init commit 091c6e9c083f7ebaff00b37ad13562d51464d175 upstream. When building with Clang + -Wtautological-pointer-compare: drivers/net/wireless/realtek/rtlwifi/regd.c:389:33: warning: comparison of address of 'rtlpriv->regd' equal to a null pointer is always false [-Wtautological-pointer-compare] if (wiphy == NULL || &rtlpriv->regd == NULL) ~~~~~~~~~^~~~ ~~~~ 1 warning generated. The address of an array member is never NULL unless it is the first struct member so remove the unnecessary check. This was addressed in the staging version of the driver in commit f986978b32b3 ("Staging: rtlwifi: remove unnecessary NULL check"). While we are here, fix the following checkpatch warning: CHECK: Comparison to NULL could be written "!wiphy" 35: FILE: drivers/net/wireless/realtek/rtlwifi/regd.c:389: + if (wiphy == NULL) Fixes: 0c8173385e54 ("rtl8192ce: Add new driver") Link:https://github.com/ClangBuiltLinux/linux/issues/750 Signed-off-by: Nathan Chancellor Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/regd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c index 1bf3eb25c1da..72ca370331fb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/regd.c +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c @@ -427,7 +427,7 @@ int rtl_regd_init(struct ieee80211_hw *hw, struct wiphy *wiphy = hw->wiphy; struct country_code_to_enum_rd *country = NULL; - if (wiphy == NULL || &rtlpriv->regd == NULL) + if (!wiphy) return -EINVAL; /* init country_code from efuse channel plan */ From 872340aa041eb12c831abee7d26d8e8f685a6a70 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 17:29:00 +0800 Subject: [PATCH 2329/3715] f2fs: fix potential overflow commit 1f0d5c911b64165c9754139a26c8c2fad352c132 upstream. We expect 64-bit calculation result from below statement, however in 32-bit machine, looped left shift operation on pgoff_t type variable may cause overflow issue, fix it by forcing type cast. page->index << PAGE_SHIFT; Fixes: 26de9b117130 ("f2fs: avoid unnecessary updating inode during fsync") Fixes: 0a2aa8fbb969 ("f2fs: refactor __exchange_data_block for speed up") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ac3fa4bbed2d..afe7dcfff036 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1512,7 +1512,7 @@ static int __write_data_page(struct page *page, bool *submitted, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; - loff_t psize = (page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a90173b856f6..d98acc20a38a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1059,7 +1059,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } dn.ofs_in_node++; i++; - new_size = (dst + i) << PAGE_SHIFT; + new_size = (loff_t)(dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); From 4e1f6dfffa11e264a669df3998f960adb8efaac8 Mon Sep 17 00:00:00 2001 From: Kars de Jong Date: Sat, 16 Nov 2019 12:05:48 +0100 Subject: [PATCH 2330/3715] rtc: msm6242: Fix reading of 10-hour digit commit e34494c8df0cd96fc432efae121db3212c46ae48 upstream. The driver was reading the wrong register as the 10-hour digit due to a misplaced ')'. It was in fact reading the 1-second digit register due to this bug. Also remove the use of a magic number for the hour mask and use the define for it which was already present. Fixes: 4f9b9bba1dd1 ("rtc: Add an RTC driver for the Oki MSM6242") Tested-by: Kars de Jong Signed-off-by: Kars de Jong Link: https://lore.kernel.org/r/20191116110548.8562-1-jongk@linux-m68k.org Reviewed-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-msm6242.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index c1c5c4e3b3b4..c981301efbe5 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -132,7 +132,8 @@ static int msm6242_read_time(struct device *dev, struct rtc_time *tm) msm6242_read(priv, MSM6242_SECOND1); tm->tm_min = msm6242_read(priv, MSM6242_MINUTE10) * 10 + msm6242_read(priv, MSM6242_MINUTE1); - tm->tm_hour = (msm6242_read(priv, MSM6242_HOUR10 & 3)) * 10 + + tm->tm_hour = (msm6242_read(priv, MSM6242_HOUR10) & + MSM6242_HOUR10_HR_MASK) * 10 + msm6242_read(priv, MSM6242_HOUR1); tm->tm_mday = msm6242_read(priv, MSM6242_DAY10) * 10 + msm6242_read(priv, MSM6242_DAY1); From 749b39e7c6552de2db2b5610b22532fd3a9563fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johnson=20CH=20Chen=20=28=E9=99=B3=E6=98=AD=E5=8B=B3=29?= Date: Tue, 26 Nov 2019 06:51:11 +0000 Subject: [PATCH 2331/3715] gpio: mpc8xxx: Add platform device to gpiochip->parent [ Upstream commit 322f6a3182d42df18059a89c53b09d33919f755e ] Dear Linus Walleij, In old kernels, some APIs still try to use parent->of_node from struct gpio_chip, and it could be resulted in kernel panic because parent is NULL. Adding platform device to gpiochip->parent can fix this problem. Signed-off-by: Johnson Chen Link: https://patchwork.kernel.org/patch/11234609 Link: https://lore.kernel.org/r/HK0PR01MB3521489269F76467DFD7843FFA450@HK0PR01MB3521.apcprd01.prod.exchangelabs.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mpc8xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index e7783b852d69..d5f735ce0dd4 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -306,6 +306,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) return -ENOMEM; gc = &mpc8xxx_gc->gc; + gc->parent = &pdev->dev; if (of_property_read_bool(np, "little-endian")) { ret = bgpio_init(gc, &pdev->dev, 4, From 248a7fd151393b5b5e00d3d6a4e2144f67717ba0 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 18 Dec 2019 19:15:31 +0530 Subject: [PATCH 2332/3715] scsi: libcxgbi: fix NULL pointer dereference in cxgbi_device_destroy() [ Upstream commit 71482fde704efdd8c3abe0faf34d922c61e8d76b ] If cxgb4i_ddp_init() fails then cdev->cdev2ppm will be NULL, so add a check for NULL pointer before dereferencing it. Link: https://lore.kernel.org/r/1576676731-3068-1-git-send-email-varun@chelsio.com Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/cxgbi/libcxgbi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 902f5e03ec94..0d45658f163a 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -121,7 +121,8 @@ static inline void cxgbi_device_destroy(struct cxgbi_device *cdev) "cdev 0x%p, p# %u.\n", cdev, cdev->nports); cxgbi_hbas_remove(cdev); cxgbi_device_portmap_cleanup(cdev); - cxgbi_ppm_release(cdev->cdev2ppm(cdev)); + if (cdev->cdev2ppm) + cxgbi_ppm_release(cdev->cdev2ppm(cdev)); if (cdev->pmap.max_connect) cxgbi_free_big_mem(cdev->pmap.port_csk); kfree(cdev); From d3c981eb0bd7444039b5950fc06602e92f676f6c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 11 Dec 2019 11:28:57 -0500 Subject: [PATCH 2333/3715] rseq/selftests: Turn off timeout setting [ Upstream commit af9cb29c5488381083b0b5ccdfb3cd931063384a ] As the rseq selftests can run for a long period of time, disable the timeout that the general selftests have. Signed-off-by: Mathieu Desnoyers Cc: Shuah Khan Cc: Thomas Gleixner Cc: Peter Zijlstra (Intel) Cc: "Paul E. McKenney" Cc: Boqun Feng Cc: "H . Peter Anvin" Cc: Paul Turner Cc: Dmitry Vyukov Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/rseq/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/rseq/settings diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/rseq/settings @@ -0,0 +1 @@ +timeout=0 From ac9951c4894ee3a6c2d8b1e110a95b1ab1e8625b Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Sun, 24 Nov 2019 16:07:31 +0200 Subject: [PATCH 2334/3715] mips: cacheinfo: report shared CPU map [ Upstream commit 3b1313eb32c499d46dc4c3e896d19d9564c879c4 ] Report L1 caches as shared per core; L2 - per cluster. This fixes "perf" that went crazy if shared_cpu_map attribute not reported on sysfs, in form of /sys/devices/system/cpu/cpu*/cache/index*/shared_cpu_list /sys/devices/system/cpu/cpu*/cache/index*/shared_cpu_map Signed-off-by: Vladimir Kondratiev Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/kernel/cacheinfo.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 428ef2189203..3ea95568ece4 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -61,6 +61,25 @@ static int __init_cache_level(unsigned int cpu) return 0; } +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + static int __populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -68,14 +87,20 @@ static int __populate_cache_leaves(unsigned int cpu) struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); From babf91acca033201ce29fdb2914ea1ee64c186e2 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Mon, 9 Dec 2019 14:37:07 +0200 Subject: [PATCH 2335/3715] MIPS: Prevent link failure with kcov instrumentation [ Upstream commit a4a3893114a41e365274d5fab5d9ff5acc235ff0 ] __sanitizer_cov_trace_pc() is not linked in and causing link failure if KCOV_INSTRUMENT is enabled. Fix this by disabling instrumentation for compressed image. Signed-off-by: Jouni Hogander Signed-off-by: Paul Burton Cc: Lukas Bulwahn Cc: linux-mips@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/boot/compressed/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 331b9e0a8072..baa34e4deb78 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o From ed4e771283ff1f04e9f409ff4485ad0430c160b9 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 18 Dec 2019 19:09:06 +0000 Subject: [PATCH 2336/3715] dmaengine: k3dma: Avoid null pointer traversal [ Upstream commit 2f42e05b942fe2fbfb9bbc6e34e1dd8c3ce4f3a4 ] In some cases we seem to submit two transactions in a row, which causes us to lose track of the first. If we then cancel the request, we may still get an interrupt, which traverses a null ds_run value. So try to avoid starting a new transaction if the ds_run value is set. While this patch avoids the null pointer crash, I've had some reports of the k3dma driver still getting confused, which suggests the ds_run/ds_done value handling still isn't quite right. However, I've not run into an issue recently with it so I think this patch is worth pushing upstream to avoid the crash. Signed-off-by: John Stultz [add ss tag] Link: https://lore.kernel.org/r/20191218190906.6641-1-john.stultz@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/k3dma.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 219ae3b545db..803045c92f3b 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -222,9 +222,11 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id) c = p->vchan; if (c && (tc1 & BIT(i))) { spin_lock_irqsave(&c->vc.lock, flags); - vchan_cookie_complete(&p->ds_run->vd); - p->ds_done = p->ds_run; - p->ds_run = NULL; + if (p->ds_run != NULL) { + vchan_cookie_complete(&p->ds_run->vd); + p->ds_done = p->ds_run; + p->ds_run = NULL; + } spin_unlock_irqrestore(&c->vc.lock, flags); } if (c && (tc2 & BIT(i))) { @@ -264,6 +266,10 @@ static int k3_dma_start_txd(struct k3_dma_chan *c) if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d)) return -EAGAIN; + /* Avoid losing track of ds_run if a transaction is in flight */ + if (c->phy->ds_run) + return -EAGAIN; + if (vd) { struct k3_dma_desc_sw *ds = container_of(vd, struct k3_dma_desc_sw, vd); From 8e2b251811f2848f77b3eb58344623b146633097 Mon Sep 17 00:00:00 2001 From: "Alexander.Barabash@dell.com" Date: Wed, 25 Dec 2019 17:55:30 +0000 Subject: [PATCH 2337/3715] ioat: ioat_alloc_ring() failure handling. [ Upstream commit b0b5ce1010ffc50015eaec72b0028aaae3f526bb ] If dma_alloc_coherent() returns NULL in ioat_alloc_ring(), ring allocation must not proceed. Until now, if the first call to dma_alloc_coherent() in ioat_alloc_ring() returned NULL, the processing could proceed, failing with NULL-pointer dereferencing further down the line. Signed-off-by: Alexander Barabash Acked-by: Dave Jiang Link: https://lore.kernel.org/r/75e9c0e84c3345d693c606c64f8b9ab5@x13pwhopdag1307.AMER.DELL.COM Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ioat/dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index f70cc74032ea..e3899ae429e0 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -388,10 +388,11 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) descs->virt = dma_alloc_coherent(to_dev(ioat_chan), SZ_2M, &descs->hw, flags); - if (!descs->virt && (i > 0)) { + if (!descs->virt) { int idx; for (idx = 0; idx < i; idx++) { + descs = &ioat_chan->descs[idx]; dma_free_coherent(to_dev(ioat_chan), SZ_2M, descs->virt, descs->hw); descs->virt = NULL; From 42df34c76c87e797a56b67a511b7bfcff3879874 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 4 Jan 2020 12:59:59 -0800 Subject: [PATCH 2338/3715] hexagon: parenthesize registers in asm predicates [ Upstream commit 780a0cfda9006a9a22d6473c2d4c527f5c68eb2e ] Hexagon requires that register predicates in assembly be parenthesized. Link: https://github.com/ClangBuiltLinux/linux/issues/754 Link: http://lkml.kernel.org/r/20191209222956.239798-3-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Suggested-by: Sid Manning Acked-by: Brian Cain Cc: Lee Jones Cc: Andy Shevchenko Cc: Tuowen Zhao Cc: Mika Westerberg Cc: Luis Chamberlain Cc: Greg Kroah-Hartman Cc: Alexios Zavras Cc: Allison Randal Cc: Will Deacon Cc: Richard Fontana Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Boqun Feng Cc: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/hexagon/include/asm/atomic.h | 8 ++++---- arch/hexagon/include/asm/bitops.h | 8 ++++---- arch/hexagon/include/asm/cmpxchg.h | 2 +- arch/hexagon/include/asm/futex.h | 6 +++--- arch/hexagon/include/asm/spinlock.h | 20 ++++++++++---------- arch/hexagon/kernel/vm_entry.S | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index fb3dfb2a667e..d4e283b4f335 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -105,7 +105,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -121,7 +121,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -138,7 +138,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -187,7 +187,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 2691a1857d20..634306cda006 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -52,7 +52,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -76,7 +76,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -102,7 +102,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -237,7 +237,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index a6e34e2acbba..db258424059f 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -44,7 +44,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index c889f5993ecd..e8e5e47afb37 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 53a8d5885887..007056263b8e 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -44,9 +44,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -60,7 +60,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -75,7 +75,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -102,9 +102,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -118,7 +118,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -141,9 +141,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -163,7 +163,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 67c6ccc14770..9f4a73ff7203 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -382,7 +382,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 From ffcb1af55b18fe05089c6faf831839febeca22b1 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 4 Jan 2020 13:00:02 -0800 Subject: [PATCH 2339/3715] hexagon: work around compiler crash [ Upstream commit 63e80314ab7cf4783526d2e44ee57a90514911c9 ] Clang cannot translate the string "r30" into a valid register yet. Link: https://github.com/ClangBuiltLinux/linux/issues/755 Link: http://lkml.kernel.org/r/20191028155722.23419-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Suggested-by: Sid Manning Reviewed-by: Brian Cain Cc: Allison Randal Cc: Greg Kroah-Hartman Cc: Richard Fontana Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/hexagon/kernel/stacktrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 41866a06adf7..ec4ef682923d 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -24,8 +24,6 @@ #include #include -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -43,7 +41,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; From e6af540790d6057f9f31e693167d3bfc847af200 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Sat, 4 Jan 2020 13:00:18 -0800 Subject: [PATCH 2340/3715] ocfs2: call journal flush to mark journal as empty after journal recovery when mount [ Upstream commit 397eac17f86f404f5ba31d8c3e39ec3124b39fd3 ] If journal is dirty when mount, it will be replayed but jbd2 sb log tail cannot be updated to mark a new start because journal->j_flag has already been set with JBD2_ABORT first in journal_init_common. When a new transaction is committed, it will be recored in block 1 first(journal->j_tail is set to 1 in journal_reset). If emergency restart happens again before journal super block is updated unfortunately, the new recorded trans will not be replayed in the next mount. The following steps describe this procedure in detail. 1. mount and touch some files 2. these transactions are committed to journal area but not checkpointed 3. emergency restart 4. mount again and its journals are replayed 5. journal super block's first s_start is 1, but its s_seq is not updated 6. touch a new file and its trans is committed but not checkpointed 7. emergency restart again 8. mount and journal is dirty, but trans committed in 6 will not be replayed. This exception happens easily when this lun is used by only one node. If it is used by multi-nodes, other node will replay its journal and its journal super block will be updated after recovery like what this patch does. ocfs2_recover_node->ocfs2_replay_journal. The following jbd2 journal can be generated by touching a new file after journal is replayed, and seq 15 is the first valid commit, but first seq is 13 in journal super block. logdump: Block 0: Journal Superblock Seq: 0 Type: 4 (JBD2_SUPERBLOCK_V2) Blocksize: 4096 Total Blocks: 32768 First Block: 1 First Commit ID: 13 Start Log Blknum: 1 Error: 0 Feature Compat: 0 Feature Incompat: 2 block64 Feature RO compat: 0 Journal UUID: 4ED3822C54294467A4F8E87D2BA4BC36 FS Share Cnt: 1 Dynamic Superblk Blknum: 0 Per Txn Block Limit Journal: 0 Data: 0 Block 1: Journal Commit Block Seq: 14 Type: 2 (JBD2_COMMIT_BLOCK) Block 2: Journal Descriptor Seq: 15 Type: 1 (JBD2_DESCRIPTOR_BLOCK) No. Blocknum Flags 0. 587 none UUID: 00000000000000000000000000000000 1. 8257792 JBD2_FLAG_SAME_UUID 2. 619 JBD2_FLAG_SAME_UUID 3. 24772864 JBD2_FLAG_SAME_UUID 4. 8257802 JBD2_FLAG_SAME_UUID 5. 513 JBD2_FLAG_SAME_UUID JBD2_FLAG_LAST_TAG ... Block 7: Inode Inode: 8257802 Mode: 0640 Generation: 57157641 (0x3682809) FS Generation: 2839773110 (0xa9437fb6) CRC32: 00000000 ECC: 0000 Type: Regular Attr: 0x0 Flags: Valid Dynamic Features: (0x1) InlineData User: 0 (root) Group: 0 (root) Size: 7 Links: 1 Clusters: 0 ctime: 0x5de5d870 0x11104c61 -- Tue Dec 3 11:37:20.286280801 2019 atime: 0x5de5d870 0x113181a1 -- Tue Dec 3 11:37:20.288457121 2019 mtime: 0x5de5d870 0x11104c61 -- Tue Dec 3 11:37:20.286280801 2019 dtime: 0x0 -- Thu Jan 1 08:00:00 1970 ... Block 9: Journal Commit Block Seq: 15 Type: 2 (JBD2_COMMIT_BLOCK) The following is journal recovery log when recovering the upper jbd2 journal when mount again. syslog: ocfs2: File system on device (252,1) was not unmounted cleanly, recovering it. fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 0 fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 1 fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 2 fs/jbd2/recovery.c:(jbd2_journal_recover, 278): JBD2: recovery, exit status 0, recovered transactions 13 to 13 Due to first commit seq 13 recorded in journal super is not consistent with the value recorded in block 1(seq is 14), journal recovery will be terminated before seq 15 even though it is an unbroken commit, inode 8257802 is a new file and it will be lost. Link: http://lkml.kernel.org/r/20191217020140.2197-1-li.kai4@h3c.com Signed-off-by: Kai Li Reviewed-by: Joseph Qi Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/journal.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 2459ae9d2234..39bb80fb2934 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1080,6 +1080,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); + if (replayed) { + jbd2_journal_lock_updates(journal->j_journal); + status = jbd2_journal_flush(journal->j_journal); + jbd2_journal_unlock_updates(journal->j_journal); + if (status < 0) + mlog_errno(status); + } + status = ocfs2_journal_toggle_dirty(osb, 1, replayed); if (status < 0) { mlog_errno(status); From c1141b3aab36eb0d9b2bcae4aff69e77d0554386 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Jan 2020 19:45:55 +0100 Subject: [PATCH 2341/3715] Linux 4.14.166 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 166e18aa9ca9..7c62b4078c1b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 165 +SUBLEVEL = 166 EXTRAVERSION = NAME = Petit Gorille From 2f1f0637838408a0e99ee443a72b74f44fb54401 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 30 Nov 2019 19:53:37 +0100 Subject: [PATCH 2342/3715] dt-bindings: reset: meson8b: fix duplicate reset IDs commit 4881873f4cc1460f63d85fa81363d56be328ccdc upstream. According to the public S805 datasheet the RESET2 register uses the following bits for the PIC_DC, PSC and NAND reset lines: - PIC_DC is at bit 3 (meaning: RESET_VD_RMEM + 3) - PSC is at bit 4 (meaning: RESET_VD_RMEM + 4) - NAND is at bit 5 (meaning: RESET_VD_RMEM + 4) Update the reset IDs of these three reset lines so they don't conflict with PIC_DC and map to the actual hardware reset lines. Fixes: 79795e20a184eb ("dt-bindings: reset: Add bindings for the Meson SoC Reset Controller") Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- include/dt-bindings/reset/amlogic,meson8b-reset.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/dt-bindings/reset/amlogic,meson8b-reset.h b/include/dt-bindings/reset/amlogic,meson8b-reset.h index 614aff2c7aff..a03e86fe2c57 100644 --- a/include/dt-bindings/reset/amlogic,meson8b-reset.h +++ b/include/dt-bindings/reset/amlogic,meson8b-reset.h @@ -95,9 +95,9 @@ #define RESET_VD_RMEM 64 #define RESET_AUDIN 65 #define RESET_DBLK 66 -#define RESET_PIC_DC 66 -#define RESET_PSC 66 -#define RESET_NAND 66 +#define RESET_PIC_DC 67 +#define RESET_PSC 68 +#define RESET_NAND 69 #define RESET_GE2D 70 #define RESET_PARSER_REG 71 #define RESET_PARSER_FETCH 72 From 692dcea72e4aaf1d25833a1f42663bf83efd344c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 25 Dec 2019 08:34:29 -0800 Subject: [PATCH 2343/3715] clk: Don't try to enable critical clocks if prepare failed commit 12ead77432f2ce32dea797742316d15c5800cb32 upstream. The following traceback is seen if a critical clock fails to prepare. bcm2835-clk 3f101000.cprman: plld: couldn't lock PLL ------------[ cut here ]------------ Enabling unprepared plld_per WARNING: CPU: 1 PID: 1 at drivers/clk/clk.c:1014 clk_core_enable+0xcc/0x2c0 ... Call trace: clk_core_enable+0xcc/0x2c0 __clk_register+0x5c4/0x788 devm_clk_hw_register+0x4c/0xb0 bcm2835_register_pll_divider+0xc0/0x150 bcm2835_clk_probe+0x134/0x1e8 platform_drv_probe+0x50/0xa0 really_probe+0xd4/0x308 driver_probe_device+0x54/0xe8 device_driver_attach+0x6c/0x78 __driver_attach+0x54/0xd8 ... Check return values from clk_core_prepare() and clk_core_enable() and bail out if any of those functions returns an error. Cc: Jerome Brunet Fixes: 99652a469df1 ("clk: migrate the count of orphaned clocks at init") Signed-off-by: Guenter Roeck Link: https://lkml.kernel.org/r/20191225163429.29694-1-linux@roeck-us.net Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index a3f52f678211..8341a128dab1 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2482,11 +2482,17 @@ static int __clk_core_init(struct clk_core *core) if (core->flags & CLK_IS_CRITICAL) { unsigned long flags; - clk_core_prepare(core); + ret = clk_core_prepare(core); + if (ret) + goto out; flags = clk_enable_lock(); - clk_core_enable(core); + ret = clk_core_enable(core); clk_enable_unlock(flags); + if (ret) { + clk_core_unprepare(core); + goto out; + } } /* From 00bbc127415f104ed0f195a994fc3892f2d5383e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sat, 11 Jan 2020 17:40:03 +0100 Subject: [PATCH 2344/3715] ASoC: msm8916-wcd-analog: Fix selected events for MIC BIAS External1 commit e0beec88397b163c7c4ea6fcfb67e8e07a2671dc upstream. MIC BIAS External1 sets pm8916_wcd_analog_enable_micbias_ext1() as event handler, which ends up in pm8916_wcd_analog_enable_micbias_ext(). But pm8916_wcd_analog_enable_micbias_ext() only handles the POST_PMU event, which is not specified in the event flags for MIC BIAS External1. This means that the code in the event handler is never actually run. Set SND_SOC_DAPM_POST_PMU as the only event for the handler to fix this. Fixes: 585e881e5b9e ("ASoC: codecs: Add msm8916-wcd analog codec") Cc: Srinivas Kandagatla Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20200111164006.43074-2-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/msm8916-wcd-analog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 969283737787..3633eb30dd13 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -876,10 +876,10 @@ static const struct snd_soc_dapm_widget pm8916_wcd_analog_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("MIC BIAS External1", CDC_A_MICB_1_EN, 7, 0, pm8916_wcd_analog_enable_micbias_ext1, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("MIC BIAS External2", CDC_A_MICB_2_EN, 7, 0, pm8916_wcd_analog_enable_micbias_ext2, - SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_ADC_E("ADC1", NULL, CDC_A_TX_1_EN, 7, 0, pm8916_wcd_analog_enable_adc, From 43bb0a16b25d5030193935b5c292648fa9abc0fc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 15 Jan 2020 21:37:33 +0100 Subject: [PATCH 2345/3715] ALSA: seq: Fix racy access for queue timer in proc read commit 60adcfde92fa40fcb2dbf7cc52f9b096e0cd109a upstream. snd_seq_info_timer_read() reads the information of the timer assigned for each queue, but it's done in a racy way which may lead to UAF as spotted by syzkaller. This patch applies the missing q->timer_mutex lock while accessing the timer object as well as a slight code change to adapt the standard coding style. Reported-by: syzbot+2b2ef983f973e5c40943@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200115203733.26530-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_timer.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index b80985fbc334..0e1feb597586 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -479,15 +479,19 @@ void snd_seq_info_timer_read(struct snd_info_entry *entry, q = queueptr(idx); if (q == NULL) continue; - if ((tmr = q->timer) == NULL || - (ti = tmr->timeri) == NULL) { - queuefree(q); - continue; - } + mutex_lock(&q->timer_mutex); + tmr = q->timer; + if (!tmr) + goto unlock; + ti = tmr->timeri; + if (!ti) + goto unlock; snd_iprintf(buffer, "Timer for queue %i : %s\n", q->queue, ti->timer->name); resolution = snd_timer_resolution(ti) * tmr->ticks; snd_iprintf(buffer, " Period time : %lu.%09lu\n", resolution / 1000000000, resolution % 1000000000); snd_iprintf(buffer, " Skew : %u / %u\n", tmr->skew, tmr->skew_base); +unlock: + mutex_unlock(&q->timer_mutex); queuefree(q); } } From 8085d56065edc52628efb502e5fc03c7230c8fe2 Mon Sep 17 00:00:00 2001 From: Jari Ruusu Date: Sun, 12 Jan 2020 15:00:53 +0200 Subject: [PATCH 2346/3715] Fix built-in early-load Intel microcode alignment commit f5ae2ea6347a308cfe91f53b53682ce635497d0d upstream. Intel Software Developer's Manual, volume 3, chapter 9.11.6 says: "Note that the microcode update must be aligned on a 16-byte boundary and the size of the microcode update must be 1-KByte granular" When early-load Intel microcode is loaded from initramfs, userspace tool 'iucode_tool' has already 16-byte aligned those microcode bits in that initramfs image. Image that was created something like this: iucode_tool --write-earlyfw=FOO.cpio microcode-files... However, when early-load Intel microcode is loaded from built-in firmware BLOB using CONFIG_EXTRA_FIRMWARE= kernel config option, that 16-byte alignment is not guaranteed. Fix this by forcing all built-in firmware BLOBs to 16-byte alignment. [ If we end up having other firmware with much bigger alignment requirements, we might need to introduce some method for the firmware to specify it, this is the minimal "just increase the alignment a bit to account for this one special case" patch - Linus ] Signed-off-by: Jari Ruusu Cc: Borislav Petkov Cc: Fenghua Yu Cc: Luis Chamberlain Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- firmware/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firmware/Makefile b/firmware/Makefile index 168094a3fae7..30e6b738839e 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -19,7 +19,7 @@ quiet_cmd_fwbin = MK_FW $@ PROGBITS=$(if $(CONFIG_ARM),%,@)progbits; \ echo "/* Generated by firmware/Makefile */" > $@;\ echo " .section .rodata" >>$@;\ - echo " .p2align $${ASM_ALIGN}" >>$@;\ + echo " .p2align 4" >>$@;\ echo "_fw_$${FWSTR}_bin:" >>$@;\ echo " .incbin \"$(2)\"" >>$@;\ echo "_fw_end:" >>$@;\ From 0c7a7d8e62bd942bf8e5d80486132d3ec0173b69 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Jan 2020 08:35:25 -0500 Subject: [PATCH 2347/3715] block: fix an integer overflow in logical block size commit ad6bf88a6c19a39fb3b0045d78ea880325dfcf15 upstream. Logical block size has type unsigned short. That means that it can be at most 32768. However, there are architectures that can run with 64k pages (for example arm64) and on these architectures, it may be possible to create block devices with 64k block size. For exmaple (run this on an architecture with 64k pages): Mount will fail with this error because it tries to read the superblock using 2-sector access: device-mapper: writecache: I/O is not aligned, sector 2, size 1024, block size 65536 EXT4-fs (dm-0): unable to read superblock This patch changes the logical block size from unsigned short to unsigned int to avoid the overflow. Cc: stable@vger.kernel.org Reviewed-by: Martin K. Petersen Reviewed-by: Ming Lei Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-settings.c | 2 +- drivers/md/dm-snap-persistent.c | 2 +- drivers/md/raid0.c | 2 +- include/linux/blkdev.h | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 474b0b95fcd1..6c2faaa38cc1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -379,7 +379,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); * storage device can address. The default of 512 covers most * hardware. **/ -void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) +void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) { q->limits.logical_block_size = size; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index c5534d294773..00025569e807 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -17,7 +17,7 @@ #include "dm-bufio.h" #define DM_MSG_PREFIX "persistent snapshot" -#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ +#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U /* 16KB */ #define DM_PREFETCH_CHUNKS 12 diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 204adde004a3..cdafa5e0ea6d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -94,7 +94,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) char b[BDEVNAME_SIZE]; char b2[BDEVNAME_SIZE]; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - unsigned short blksize = 512; + unsigned blksize = 512; *private_conf = ERR_PTR(-ENOMEM); if (!conf) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d4af0e94059..ad940102451c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -343,6 +343,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_segment_size; unsigned int physical_block_size; + unsigned int logical_block_size; unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; @@ -353,7 +354,6 @@ struct queue_limits { unsigned int discard_granularity; unsigned int discard_alignment; - unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -1178,7 +1178,7 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); @@ -1436,7 +1436,7 @@ static inline unsigned int queue_max_segment_size(struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned short queue_logical_block_size(struct request_queue *q) +static inline unsigned queue_logical_block_size(struct request_queue *q) { int retval = 512; @@ -1446,7 +1446,7 @@ static inline unsigned short queue_logical_block_size(struct request_queue *q) return retval; } -static inline unsigned short bdev_logical_block_size(struct block_device *bdev) +static inline unsigned int bdev_logical_block_size(struct block_device *bdev) { return queue_logical_block_size(bdev_get_queue(bdev)); } From f27885c16525c3e4d4c5fa79ba9fcfcf3d1ab96c Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 17 Dec 2019 14:21:23 +0530 Subject: [PATCH 2348/3715] ARM: dts: am571x-idk: Fix gpios property to have the correct gpio number commit 0c4eb2a6b3c6b0facd0a3bccda5db22e7b3b6f96 upstream. commit d23f3839fe97d8dce03d ("ARM: dts: DRA7: Add pcie1 dt node for EP mode") while adding the dt node for EP mode for DRA7 platform, added rc node for am571x-idk and populated gpios property with "gpio3 23". However the GPIO_PCIE_SWRST line is actually connected to "gpio5 18". Fix it here. (The patch adding "gpio3 23" was tested with another am57x board in EP mode which doesn't rely on reset from host). Cc: stable # 4.14+ Fixes: d23f3839fe97d8dce03d ("ARM: dts: DRA7: Add pcie1 dt node for EP mode") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am571x-idk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index debf9464403e..96a4df4109d7 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -93,7 +93,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &pcie1_ep { From ca76e5b3504fc0a72168c4982053604b4d7814ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20M=C3=B6llendorf?= Date: Fri, 13 Dec 2019 14:50:55 +0100 Subject: [PATCH 2349/3715] iio: buffer: align the size of scan bytes to size of the largest element MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 883f616530692d81cb70f8a32d85c0d2afc05f69 upstream. Previous versions of `iio_compute_scan_bytes` only aligned each element to its own length (i.e. its own natural alignment). Because multiple consecutive sets of scan elements are buffered this does not work in case the computed scan bytes do not align with the natural alignment of the first scan element in the set. This commit fixes this by aligning the scan bytes to the natural alignment of the largest scan element in the set. Fixes: 959d2952d124 ("staging:iio: make iio_sw_buffer_preenable much more general.") Signed-off-by: Lars Möllendorf Reviewed-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index d50125766093..c3badf634378 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -570,7 +570,7 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, const unsigned long *mask, bool timestamp) { unsigned bytes = 0; - int length, i; + int length, i, largest = 0; /* How much space will the demuxed element take? */ for_each_set_bit(i, mask, @@ -578,13 +578,17 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, length = iio_storage_bytes_for_si(indio_dev, i); bytes = ALIGN(bytes, length); bytes += length; + largest = max(largest, length); } if (timestamp) { length = iio_storage_bytes_for_timestamp(indio_dev); bytes = ALIGN(bytes, length); bytes += length; + largest = max(largest, length); } + + bytes = ALIGN(bytes, largest); return bytes; } From a31be20233df25e202bfc8ed9a52a93b34385f7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jer=C3=B3nimo=20Borque?= Date: Thu, 9 Jan 2020 12:23:34 -0300 Subject: [PATCH 2350/3715] USB: serial: simple: Add Motorola Solutions TETRA MTP3xxx and MTP85xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 260e41ac4dd3e5acb90be624c03ba7f019615b75 upstream. Add device-ids for the Motorola Solutions TETRA radios MTP3xxx series and MTP85xx series $ lsusb -vd 0cad: Bus 001 Device 009: ID 0cad:9015 Motorola CGISS TETRA PEI interface Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x0cad Motorola CGISS idProduct 0x9015 bcdDevice 24.16 iManufacturer 1 iProduct 2 iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0037 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 3 bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Bus 001 Device 010: ID 0cad:9013 Motorola CGISS TETRA PEI interface Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x0cad Motorola CGISS idProduct 0x9013 bcdDevice 24.16 iManufacturer 1 iProduct 2 iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0037 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 3 bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Signed-off-by: Jerónimo Borque Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 511242111403..15e05ebf37ac 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -89,6 +89,8 @@ DEVICE(moto_modem, MOTO_IDS); #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9013) }, /* MTP3xxx */ \ + { USB_DEVICE(0x0cad, 0x9015) }, /* MTP85xx */ \ { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); From 223f97fc43cda4bce578641ffbec60aa02edfb1e Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Mon, 13 Jan 2020 15:14:05 +0100 Subject: [PATCH 2351/3715] USB: serial: option: Add support for Quectel RM500Q commit accf227de4d211b52c830a58b2df00d5739f2389 upstream. RM500Q is a 5G module from Quectel, supporting both standalone and non-standalone modes. Unlike other recent Quectel modems, it is possible to identify the diagnostic interface (bInterfaceProtocol is unique). Thus, there is no need to check for the number of endpoints or reserve interfaces. The interface number is still dynamic though, so matching on interface number is not possible and two entries have to be added to the table. Output from usb-devices with all interfaces enabled (order is diag, nmea, at_port, modem, rmnet and adb): Bus 004 Device 007: ID 2c7c:0800 Quectel Wireless Solutions Co., Ltd. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 3.20 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 9 idVendor 0x2c7c Quectel Wireless Solutions Co., Ltd. idProduct 0x0800 bcdDevice 4.14 iManufacturer 1 Quectel iProduct 2 LTE-A Module iSerial 3 40046d60 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 328 bNumInterfaces 6 bConfigurationValue 1 iConfiguration 4 DIAG_SER_RMNET bmAttributes 0xa0 (Bus Powered) Remote Wakeup MaxPower 224mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 48 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 05 24 01 00 00 ** UNRECOGNIZED: 04 24 02 02 ** UNRECOGNIZED: 05 24 06 00 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000a 1x 10 bytes bInterval 9 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 05 24 01 00 00 ** UNRECOGNIZED: 04 24 02 02 ** UNRECOGNIZED: 05 24 06 00 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000a 1x 10 bytes bInterval 9 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 05 24 01 00 00 ** UNRECOGNIZED: 04 24 02 02 ** UNRECOGNIZED: 05 24 06 00 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000a 1x 10 bytes bInterval 9 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 5 CDEV Serial Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x88 EP 8 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0008 1x 8 bytes bInterval 9 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x8e EP 14 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 6 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x0f EP 15 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 2 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 5 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 66 bInterfaceProtocol 1 iInterface 6 ADB Interface Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x05 EP 5 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x89 EP 9 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Binary Object Store Descriptor: bLength 5 bDescriptorType 15 wTotalLength 42 bNumDeviceCaps 3 USB 2.0 Extension Device Capability: bLength 7 bDescriptorType 16 bDevCapabilityType 2 bmAttributes 0x00000006 Link Power Management (LPM) Supported SuperSpeed USB Device Capability: bLength 10 bDescriptorType 16 bDevCapabilityType 3 bmAttributes 0x00 wSpeedsSupported 0x000f Device can operate at Low Speed (1Mbps) Device can operate at Full Speed (12Mbps) Device can operate at High Speed (480Mbps) Device can operate at SuperSpeed (5Gbps) bFunctionalitySupport 1 Lowest fully-functional device speed is Full Speed (12Mbps) bU1DevExitLat 1 micro seconds bU2DevExitLat 500 micro seconds ** UNRECOGNIZED: 14 10 0a 00 01 00 00 00 00 11 00 00 30 40 0a 00 b0 40 0a 00 Device Status: 0x0000 (Bus Powered) Signed-off-by: Kristian Evensen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e69e31539914..d40f31c18030 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 +#define QUECTEL_PRODUCT_RM500Q 0x0800 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1107,6 +1108,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), From 7f61deb9c4d54dff9e005f18f319a2c356041ab6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Jan 2020 18:22:13 +0100 Subject: [PATCH 2352/3715] USB: serial: opticon: fix control-message timeouts commit 5e28055f340275a8616eee88ef19186631b4d136 upstream. The driver was issuing synchronous uninterruptible control requests without using a timeout. This could lead to the driver hanging on open() or tiocmset() due to a malfunctioning (or malicious) device until the device is physically disconnected. The USB upper limit of five seconds per request should be more than enough. Fixes: 309a057932ab ("USB: opticon: add rts and cts support") Cc: stable # 2.6.39 Cc: Martin Jansen Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/opticon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index 58657d64678b..c37572a8bb06 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -116,7 +116,7 @@ static int send_control_msg(struct usb_serial_port *port, u8 requesttype, retval = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), requesttype, USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, - 0, 0, buffer, 1, 0); + 0, 0, buffer, 1, USB_CTRL_SET_TIMEOUT); kfree(buffer); if (retval < 0) From e19bcd176c4e844560749981221103c58e40e407 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Tue, 14 Jan 2020 14:29:23 +0100 Subject: [PATCH 2353/3715] USB: serial: option: add support for Quectel RM500Q in QDL mode commit f3eaabbfd093c93d791eb930cc68d9b15246a65e upstream. Add support for Quectel RM500Q in QDL mode. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 24 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0800 Rev= 0.00 S: Manufacturer=Qualcomm CDMA Technologies MSM S: Product=QUSB_BULK_SN:xxxxxxxx S: SerialNumber=xxxxxxxx C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=10 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms It is assumed that the ZLP flag required for other Qualcomm-based 5G devices also applies to Quectel RM500Q. Signed-off-by: Reinhard Speyerer Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d40f31c18030..eff353de47cd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1110,6 +1110,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), + .driver_info = ZLP }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From bcca7cb09cc58dabb871f4f2fca13399d344ef63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Jan 2020 17:07:05 +0100 Subject: [PATCH 2354/3715] USB: serial: suppress driver bind attributes commit fdb838efa31e1ed9a13ae6ad0b64e30fdbd00570 upstream. USB-serial drivers must not be unbound from their ports before the corresponding USB driver is unbound from the parent interface so suppress the bind and unbind attributes. Unbinding a serial driver while it's port is open is a sure way to trigger a crash as any driver state is released on unbind while port hangup is handled on the parent USB interface level. Drivers for multiport devices where ports share a resource such as an interrupt endpoint also generally cannot handle individual ports going away. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 8115b7cccf1a..3dc3464626fb 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1332,6 +1332,9 @@ static int usb_serial_register(struct usb_serial_driver *driver) return -EINVAL; } + /* Prevent individual ports from being unbound. */ + driver->driver.suppress_bind_attrs = true; + usb_serial_operations_init(driver); /* Add this device to our list of devices */ From 1c63fa75f0cb67928c251ac575590bda61fdcc0a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Jan 2020 10:50:22 +0100 Subject: [PATCH 2355/3715] USB: serial: ch341: handle unbound port at reset_resume commit 4d5ef53f75c22d28f490bcc5c771fcc610a9afa4 upstream. Check for NULL port data in reset_resume() to avoid dereferencing a NULL pointer in case the port device isn't bound to a driver (e.g. after a failed control request at port probe). Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after resume") Cc: stable # 2.6.30 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 578596d301b8..31cd798d2dac 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -592,9 +592,13 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { struct usb_serial_port *port = serial->port[0]; - struct ch341_private *priv = usb_get_serial_port_data(port); + struct ch341_private *priv; int ret; + priv = usb_get_serial_port_data(port); + if (!priv) + return 0; + /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); From e804bb78f21cebc10ddfba62d1dc3ac498a109cc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Jan 2020 10:50:24 +0100 Subject: [PATCH 2356/3715] USB: serial: io_edgeport: add missing active-port sanity check commit 1568c58d11a7c851bd09341aeefd6a1c308ac40d upstream. The driver receives the active port number from the device, but never made sure that the port number was valid. This could lead to a NULL-pointer dereference or memory corruption in case a device sends data for an invalid port. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 51b61545ccf2..467870f504a5 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -1733,7 +1733,8 @@ static void edge_break(struct tty_struct *tty, int break_state) static void process_rcvd_data(struct edgeport_serial *edge_serial, unsigned char *buffer, __u16 bufferLength) { - struct device *dev = &edge_serial->serial->dev->dev; + struct usb_serial *serial = edge_serial->serial; + struct device *dev = &serial->dev->dev; struct usb_serial_port *port; struct edgeport_port *edge_port; __u16 lastBufferLength; @@ -1838,9 +1839,8 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial, /* spit this data back into the tty driver if this port is open */ - if (rxLen) { - port = edge_serial->serial->port[ - edge_serial->rxPort]; + if (rxLen && edge_serial->rxPort < serial->num_ports) { + port = serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); if (edge_port->open) { dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n", @@ -1850,8 +1850,8 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial, rxLen); edge_port->port->icount.rx += rxLen; } - buffer += rxLen; } + buffer += rxLen; break; case EXPECT_HDR3: /* Expect 3rd byte of status header */ @@ -1886,6 +1886,8 @@ static void process_rcvd_status(struct edgeport_serial *edge_serial, __u8 code = edge_serial->rxStatusCode; /* switch the port pointer to the one being currently talked about */ + if (edge_serial->rxPort >= edge_serial->serial->num_ports) + return; port = edge_serial->serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); if (edge_port == NULL) { From 534afe14ec5f5aec941f66e3bc4b417e442b8298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Jan 2020 10:50:25 +0100 Subject: [PATCH 2357/3715] USB: serial: keyspan: handle unbound ports commit 3018dd3fa114b13261e9599ddb5656ef97a1fa17 upstream. Check for NULL port data in the control URB completion handlers to avoid dereferencing a NULL pointer in the unlikely case where a port device isn't bound to a driver (e.g. after an allocation failure on port probe()). Fixes: 0ca1268e109a ("USB Serial Keyspan: add support for USA-49WG & USA-28XG") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/keyspan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 2c5a53bdccd4..55a768487990 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1062,6 +1062,8 @@ static void usa49_glocont_callback(struct urb *urb) for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + continue; if (p_priv->resend_cont) { dev_dbg(&port->dev, "%s - sending setup\n", __func__); @@ -1463,6 +1465,8 @@ static void usa67_glocont_callback(struct urb *urb) for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + continue; if (p_priv->resend_cont) { dev_dbg(&port->dev, "%s - sending setup\n", __func__); From 3afe35a859aca32ad888193cf171a86a0a23eac8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Jan 2020 15:35:26 +0100 Subject: [PATCH 2358/3715] USB: serial: quatech2: handle unbound ports commit 9715a43eea77e42678a1002623f2d9a78f5b81a1 upstream. Check for NULL port data in the modem- and line-status handlers to avoid dereferencing a NULL pointer in the unlikely case where a port device isn't bound to a driver (e.g. after an allocation failure on port probe). Note that the other (stubbed) event handlers qt2_process_xmit_empty() and qt2_process_flush() would need similar sanity checks in case they are ever implemented. Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver") Cc: stable # 3.5 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/quatech2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 60e17d1444c3..f16e0b8c1ed4 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -867,7 +867,10 @@ static void qt2_update_msr(struct usb_serial_port *port, unsigned char *ch) u8 newMSR = (u8) *ch; unsigned long flags; + /* May be called from qt2_process_read_urb() for an unbound port. */ port_priv = usb_get_serial_port_data(port); + if (!port_priv) + return; spin_lock_irqsave(&port_priv->lock, flags); port_priv->shadowMSR = newMSR; @@ -895,7 +898,10 @@ static void qt2_update_lsr(struct usb_serial_port *port, unsigned char *ch) unsigned long flags; u8 newLSR = (u8) *ch; + /* May be called from qt2_process_read_urb() for an unbound port. */ port_priv = usb_get_serial_port_data(port); + if (!port_priv) + return; if (newLSR & UART_LSR_BI) newLSR &= (u8) (UART_LSR_OE | UART_LSR_BI); From f250729c0385b4371cb6bf33c6ff48e942b3baa0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 21:15:49 +0100 Subject: [PATCH 2359/3715] scsi: fnic: fix invalid stack access commit 42ec15ceaea74b5f7a621fc6686cbf69ca66c4cf upstream. gcc -O3 warns that some local variables are not properly initialized: drivers/scsi/fnic/vnic_dev.c: In function 'fnic_dev_hang_notify': drivers/scsi/fnic/vnic_dev.c:511:16: error: 'a0' is used uninitialized in this function [-Werror=uninitialized] vdev->args[0] = *a0; ~~~~~~~~~~~~~~^~~~~ drivers/scsi/fnic/vnic_dev.c:691:6: note: 'a0' was declared here u64 a0, a1; ^~ drivers/scsi/fnic/vnic_dev.c:512:16: error: 'a1' is used uninitialized in this function [-Werror=uninitialized] vdev->args[1] = *a1; ~~~~~~~~~~~~~~^~~~~ drivers/scsi/fnic/vnic_dev.c:691:10: note: 'a1' was declared here u64 a0, a1; ^~ drivers/scsi/fnic/vnic_dev.c: In function 'fnic_dev_mac_addr': drivers/scsi/fnic/vnic_dev.c:512:16: error: 'a1' is used uninitialized in this function [-Werror=uninitialized] vdev->args[1] = *a1; ~~~~~~~~~~~~~~^~~~~ drivers/scsi/fnic/vnic_dev.c:698:10: note: 'a1' was declared here u64 a0, a1; ^~ Apparently the code relies on the local variables occupying adjacent memory locations in the same order, but this is of course not guaranteed. Use an array of two u64 variables where needed to make it work correctly. I suspect there is also an endianness bug here, but have not digged in deep enough to be sure. Fixes: 5df6d737dd4b ("[SCSI] fnic: Add new Cisco PCI-Express FCoE HBA") Fixes: mmtom ("init/Kconfig: enable -O3 for all arches") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200107201602.4096790-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fnic/vnic_dev.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c index ba69d6112fa1..c5b89a003d2a 100644 --- a/drivers/scsi/fnic/vnic_dev.c +++ b/drivers/scsi/fnic/vnic_dev.c @@ -445,26 +445,26 @@ int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done) int vnic_dev_hang_notify(struct vnic_dev *vdev) { - u64 a0, a1; + u64 a0 = 0, a1 = 0; int wait = 1000; return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait); } int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) { - u64 a0, a1; + u64 a[2] = {}; int wait = 1000; int err, i; for (i = 0; i < ETH_ALEN; i++) mac_addr[i] = 0; - err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a[0], &a[1], wait); if (err) return err; for (i = 0; i < ETH_ALEN; i++) - mac_addr[i] = ((u8 *)&a0)[i]; + mac_addr[i] = ((u8 *)&a)[i]; return 0; } @@ -489,30 +489,30 @@ void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr) { - u64 a0 = 0, a1 = 0; + u64 a[2] = {}; int wait = 1000; int err; int i; for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; + ((u8 *)&a)[i] = addr[i]; - err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a[0], &a[1], wait); if (err) pr_err("Can't add addr [%pM], %d\n", addr, err); } void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr) { - u64 a0 = 0, a1 = 0; + u64 a[2] = {}; int wait = 1000; int err; int i; for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; + ((u8 *)&a)[i] = addr[i]; - err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a[0], &a[1], wait); if (err) pr_err("Can't del addr [%pM], %d\n", addr, err); } From fedf64ea8f461ac51d9772998b277a30cbf8375e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Jan 2020 15:34:14 +0300 Subject: [PATCH 2360/3715] scsi: mptfusion: Fix double fetch bug in ioctl commit 28d76df18f0ad5bcf5fa48510b225f0ed262a99b upstream. Tom Hatskevich reported that we look up "iocp" then, in the called functions we do a second copy_from_user() and look it up again. The problem that could cause is: drivers/message/fusion/mptctl.c 674 /* All of these commands require an interrupt or 675 * are unknown/illegal. 676 */ 677 if ((ret = mptctl_syscall_down(iocp, nonblock)) != 0) ^^^^ We take this lock. 678 return ret; 679 680 if (cmd == MPTFWDOWNLOAD) 681 ret = mptctl_fw_download(arg); ^^^ Then the user memory changes and we look up "iocp" again but a different one so now we are holding the incorrect lock and have a race condition. 682 else if (cmd == MPTCOMMAND) 683 ret = mptctl_mpt_command(arg); The security impact of this bug is not as bad as it could have been because these operations are all privileged and root already has enormous destructive power. But it's still worth fixing. This patch passes the "iocp" pointer to the functions to avoid the second lookup. That deletes 100 lines of code from the driver so it's a nice clean up as well. Link: https://lore.kernel.org/r/20200114123414.GA7957@kadam Reported-by: Tom Hatskevich Reviewed-by: Greg Kroah-Hartman Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/message/fusion/mptctl.c | 211 ++++++++------------------------ 1 file changed, 49 insertions(+), 162 deletions(-) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index cf6ce9f600ca..f9b2e652c399 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -100,19 +100,19 @@ struct buflist { * Function prototypes. Called from OS entry point mptctl_ioctl. * arg contents specific to function. */ -static int mptctl_fw_download(unsigned long arg); -static int mptctl_getiocinfo(unsigned long arg, unsigned int cmd); -static int mptctl_gettargetinfo(unsigned long arg); -static int mptctl_readtest(unsigned long arg); -static int mptctl_mpt_command(unsigned long arg); -static int mptctl_eventquery(unsigned long arg); -static int mptctl_eventenable(unsigned long arg); -static int mptctl_eventreport(unsigned long arg); -static int mptctl_replace_fw(unsigned long arg); +static int mptctl_fw_download(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_getiocinfo(MPT_ADAPTER *iocp, unsigned long arg, unsigned int cmd); +static int mptctl_gettargetinfo(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_readtest(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_mpt_command(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventquery(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventenable(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventreport(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_replace_fw(MPT_ADAPTER *iocp, unsigned long arg); -static int mptctl_do_reset(unsigned long arg); -static int mptctl_hp_hostinfo(unsigned long arg, unsigned int cmd); -static int mptctl_hp_targetinfo(unsigned long arg); +static int mptctl_do_reset(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_hp_hostinfo(MPT_ADAPTER *iocp, unsigned long arg, unsigned int cmd); +static int mptctl_hp_targetinfo(MPT_ADAPTER *iocp, unsigned long arg); static int mptctl_probe(struct pci_dev *, const struct pci_device_id *); static void mptctl_remove(struct pci_dev *); @@ -123,8 +123,8 @@ static long compat_mpctl_ioctl(struct file *f, unsigned cmd, unsigned long arg); /* * Private function calls. */ -static int mptctl_do_mpt_command(struct mpt_ioctl_command karg, void __user *mfPtr); -static int mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen); +static int mptctl_do_mpt_command(MPT_ADAPTER *iocp, struct mpt_ioctl_command karg, void __user *mfPtr); +static int mptctl_do_fw_download(MPT_ADAPTER *iocp, char __user *ufwbuf, size_t fwlen); static MptSge_t *kbuf_alloc_2_sgl(int bytes, u32 dir, int sge_offset, int *frags, struct buflist **blp, dma_addr_t *sglbuf_dma, MPT_ADAPTER *ioc); static void kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, @@ -656,19 +656,19 @@ __mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) * by TM and FW reloads. */ if ((cmd & ~IOCSIZE_MASK) == (MPTIOCINFO & ~IOCSIZE_MASK)) { - return mptctl_getiocinfo(arg, _IOC_SIZE(cmd)); + return mptctl_getiocinfo(iocp, arg, _IOC_SIZE(cmd)); } else if (cmd == MPTTARGETINFO) { - return mptctl_gettargetinfo(arg); + return mptctl_gettargetinfo(iocp, arg); } else if (cmd == MPTTEST) { - return mptctl_readtest(arg); + return mptctl_readtest(iocp, arg); } else if (cmd == MPTEVENTQUERY) { - return mptctl_eventquery(arg); + return mptctl_eventquery(iocp, arg); } else if (cmd == MPTEVENTENABLE) { - return mptctl_eventenable(arg); + return mptctl_eventenable(iocp, arg); } else if (cmd == MPTEVENTREPORT) { - return mptctl_eventreport(arg); + return mptctl_eventreport(iocp, arg); } else if (cmd == MPTFWREPLACE) { - return mptctl_replace_fw(arg); + return mptctl_replace_fw(iocp, arg); } /* All of these commands require an interrupt or @@ -678,15 +678,15 @@ __mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return ret; if (cmd == MPTFWDOWNLOAD) - ret = mptctl_fw_download(arg); + ret = mptctl_fw_download(iocp, arg); else if (cmd == MPTCOMMAND) - ret = mptctl_mpt_command(arg); + ret = mptctl_mpt_command(iocp, arg); else if (cmd == MPTHARDRESET) - ret = mptctl_do_reset(arg); + ret = mptctl_do_reset(iocp, arg); else if ((cmd & ~IOCSIZE_MASK) == (HP_GETHOSTINFO & ~IOCSIZE_MASK)) - ret = mptctl_hp_hostinfo(arg, _IOC_SIZE(cmd)); + ret = mptctl_hp_hostinfo(iocp, arg, _IOC_SIZE(cmd)); else if (cmd == HP_GETTARGETINFO) - ret = mptctl_hp_targetinfo(arg); + ret = mptctl_hp_targetinfo(iocp, arg); else ret = -EINVAL; @@ -705,11 +705,10 @@ mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return ret; } -static int mptctl_do_reset(unsigned long arg) +static int mptctl_do_reset(MPT_ADAPTER *iocp, unsigned long arg) { struct mpt_ioctl_diag_reset __user *urinfo = (void __user *) arg; struct mpt_ioctl_diag_reset krinfo; - MPT_ADAPTER *iocp; if (copy_from_user(&krinfo, urinfo, sizeof(struct mpt_ioctl_diag_reset))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_do_reset - " @@ -718,12 +717,6 @@ static int mptctl_do_reset(unsigned long arg) return -EFAULT; } - if (mpt_verify_adapter(krinfo.hdr.iocnum, &iocp) < 0) { - printk(KERN_DEBUG MYNAM "%s@%d::mptctl_do_reset - ioc%d not found!\n", - __FILE__, __LINE__, krinfo.hdr.iocnum); - return -ENODEV; /* (-6) No such device or address */ - } - dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "mptctl_do_reset called.\n", iocp->name)); @@ -754,7 +747,7 @@ static int mptctl_do_reset(unsigned long arg) * -ENOMSG if FW upload returned bad status */ static int -mptctl_fw_download(unsigned long arg) +mptctl_fw_download(MPT_ADAPTER *iocp, unsigned long arg) { struct mpt_fw_xfer __user *ufwdl = (void __user *) arg; struct mpt_fw_xfer kfwdl; @@ -766,7 +759,7 @@ mptctl_fw_download(unsigned long arg) return -EFAULT; } - return mptctl_do_fw_download(kfwdl.iocnum, kfwdl.bufp, kfwdl.fwlen); + return mptctl_do_fw_download(iocp, kfwdl.bufp, kfwdl.fwlen); } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -784,11 +777,10 @@ mptctl_fw_download(unsigned long arg) * -ENOMSG if FW upload returned bad status */ static int -mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) +mptctl_do_fw_download(MPT_ADAPTER *iocp, char __user *ufwbuf, size_t fwlen) { FWDownload_t *dlmsg; MPT_FRAME_HDR *mf; - MPT_ADAPTER *iocp; FWDownloadTCSGE_t *ptsge; MptSge_t *sgl, *sgIn; char *sgOut; @@ -808,17 +800,10 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) pFWDownloadReply_t ReplyMsg = NULL; unsigned long timeleft; - if (mpt_verify_adapter(ioc, &iocp) < 0) { - printk(KERN_DEBUG MYNAM "ioctl_fwdl - ioc%d not found!\n", - ioc); - return -ENODEV; /* (-6) No such device or address */ - } else { - - /* Valid device. Get a message frame and construct the FW download message. - */ - if ((mf = mpt_get_msg_frame(mptctl_id, iocp)) == NULL) - return -EAGAIN; - } + /* Valid device. Get a message frame and construct the FW download message. + */ + if ((mf = mpt_get_msg_frame(mptctl_id, iocp)) == NULL) + return -EAGAIN; dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "mptctl_do_fwdl called. mptctl_id = %xh.\n", iocp->name, mptctl_id)); @@ -826,8 +811,6 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) iocp->name, ufwbuf)); dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "DbG: kfwdl.fwlen = %d\n", iocp->name, (int)fwlen)); - dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "DbG: kfwdl.ioc = %04xh\n", - iocp->name, ioc)); dlmsg = (FWDownload_t*) mf; ptsge = (FWDownloadTCSGE_t *) &dlmsg->SGL; @@ -1238,13 +1221,11 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE * -ENODEV if no such device/adapter */ static int -mptctl_getiocinfo (unsigned long arg, unsigned int data_size) +mptctl_getiocinfo (MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) { struct mpt_ioctl_iocinfo __user *uarg = (void __user *) arg; struct mpt_ioctl_iocinfo *karg; - MPT_ADAPTER *ioc; struct pci_dev *pdev; - int iocnum; unsigned int port; int cim_rev; struct scsi_device *sdev; @@ -1272,14 +1253,6 @@ mptctl_getiocinfo (unsigned long arg, unsigned int data_size) return PTR_ERR(karg); } - if (((iocnum = mpt_verify_adapter(karg->hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_getiocinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - kfree(karg); - return -ENODEV; - } - /* Verify the data transfer size is correct. */ if (karg->hdr.maxDataSize != data_size) { printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_getiocinfo - " @@ -1385,15 +1358,13 @@ mptctl_getiocinfo (unsigned long arg, unsigned int data_size) * -ENODEV if no such device/adapter */ static int -mptctl_gettargetinfo (unsigned long arg) +mptctl_gettargetinfo (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_targetinfo __user *uarg = (void __user *) arg; struct mpt_ioctl_targetinfo karg; - MPT_ADAPTER *ioc; VirtDevice *vdevice; char *pmem; int *pdata; - int iocnum; int numDevices = 0; int lun; int maxWordsLeft; @@ -1408,13 +1379,6 @@ mptctl_gettargetinfo (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_gettargetinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_gettargetinfo called.\n", ioc->name)); /* Get the port number and set the maximum number of bytes @@ -1510,12 +1474,10 @@ mptctl_gettargetinfo (unsigned long arg) * -ENODEV if no such device/adapter */ static int -mptctl_readtest (unsigned long arg) +mptctl_readtest (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_test __user *uarg = (void __user *) arg; struct mpt_ioctl_test karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_test))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_readtest - " @@ -1524,13 +1486,6 @@ mptctl_readtest (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_readtest() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_readtest called.\n", ioc->name)); /* Fill in the data and return the structure to the calling @@ -1571,12 +1526,10 @@ mptctl_readtest (unsigned long arg) * -ENODEV if no such device/adapter */ static int -mptctl_eventquery (unsigned long arg) +mptctl_eventquery (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventquery __user *uarg = (void __user *) arg; struct mpt_ioctl_eventquery karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventquery))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_eventquery - " @@ -1585,13 +1538,6 @@ mptctl_eventquery (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventquery() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventquery called.\n", ioc->name)); karg.eventEntries = MPTCTL_EVENT_LOG_SIZE; @@ -1610,12 +1556,10 @@ mptctl_eventquery (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_eventenable (unsigned long arg) +mptctl_eventenable (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventenable __user *uarg = (void __user *) arg; struct mpt_ioctl_eventenable karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventenable))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_eventenable - " @@ -1624,13 +1568,6 @@ mptctl_eventenable (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventenable() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventenable called.\n", ioc->name)); if (ioc->events == NULL) { @@ -1658,12 +1595,10 @@ mptctl_eventenable (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_eventreport (unsigned long arg) +mptctl_eventreport (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventreport __user *uarg = (void __user *) arg; struct mpt_ioctl_eventreport karg; - MPT_ADAPTER *ioc; - int iocnum; int numBytes, maxEvents, max; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventreport))) { @@ -1673,12 +1608,6 @@ mptctl_eventreport (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventreport() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventreport called.\n", ioc->name)); @@ -1712,12 +1641,10 @@ mptctl_eventreport (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_replace_fw (unsigned long arg) +mptctl_replace_fw (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_replace_fw __user *uarg = (void __user *) arg; struct mpt_ioctl_replace_fw karg; - MPT_ADAPTER *ioc; - int iocnum; int newFwSize; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_replace_fw))) { @@ -1727,13 +1654,6 @@ mptctl_replace_fw (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_replace_fw() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_replace_fw called.\n", ioc->name)); /* If caching FW, Free the old FW image @@ -1780,12 +1700,10 @@ mptctl_replace_fw (unsigned long arg) * -ENOMEM if memory allocation error */ static int -mptctl_mpt_command (unsigned long arg) +mptctl_mpt_command (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_command __user *uarg = (void __user *) arg; struct mpt_ioctl_command karg; - MPT_ADAPTER *ioc; - int iocnum; int rc; @@ -1796,14 +1714,7 @@ mptctl_mpt_command (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_mpt_command() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - - rc = mptctl_do_mpt_command (karg, &uarg->MF); + rc = mptctl_do_mpt_command (ioc, karg, &uarg->MF); return rc; } @@ -1821,9 +1732,8 @@ mptctl_mpt_command (unsigned long arg) * -EPERM if SCSI I/O and target is untagged */ static int -mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) +mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __user *mfPtr) { - MPT_ADAPTER *ioc; MPT_FRAME_HDR *mf = NULL; MPIHeader_t *hdr; char *psge; @@ -1832,7 +1742,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) dma_addr_t dma_addr_in; dma_addr_t dma_addr_out; int sgSize = 0; /* Num SG elements */ - int iocnum, flagsLength; + int flagsLength; int sz, rc = 0; int msgContext; u16 req_idx; @@ -1847,13 +1757,6 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) bufIn.kptr = bufOut.kptr = NULL; bufIn.len = bufOut.len = 0; - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_do_mpt_command() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - spin_lock_irqsave(&ioc->taskmgmt_lock, flags); if (ioc->ioc_reset_in_progress) { spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags); @@ -2418,17 +2321,15 @@ done_free_mem: * -ENOMEM if memory allocation error */ static int -mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size) +mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) { hp_host_info_t __user *uarg = (void __user *) arg; - MPT_ADAPTER *ioc; struct pci_dev *pdev; char *pbuf=NULL; dma_addr_t buf_dma; hp_host_info_t karg; CONFIGPARMS cfg; ConfigPageHeader_t hdr; - int iocnum; int rc, cim_rev; ToolboxIstwiReadWriteRequest_t *IstwiRWRequest; MPT_FRAME_HDR *mf = NULL; @@ -2452,12 +2353,6 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_hp_hostinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": mptctl_hp_hostinfo called.\n", ioc->name)); @@ -2670,15 +2565,13 @@ retry_wait: * -ENOMEM if memory allocation error */ static int -mptctl_hp_targetinfo(unsigned long arg) +mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) { hp_target_info_t __user *uarg = (void __user *) arg; SCSIDevicePage0_t *pg0_alloc; SCSIDevicePage3_t *pg3_alloc; - MPT_ADAPTER *ioc; MPT_SCSI_HOST *hd = NULL; hp_target_info_t karg; - int iocnum; int data_sz; dma_addr_t page_dma; CONFIGPARMS cfg; @@ -2692,12 +2585,6 @@ mptctl_hp_targetinfo(unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_hp_targetinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } if (karg.hdr.id >= MPT_MAX_FC_DEVICES) return -EINVAL; dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n", @@ -2865,7 +2752,7 @@ compat_mptfwxfer_ioctl(struct file *filp, unsigned int cmd, kfw.fwlen = kfw32.fwlen; kfw.bufp = compat_ptr(kfw32.bufp); - ret = mptctl_do_fw_download(kfw.iocnum, kfw.bufp, kfw.fwlen); + ret = mptctl_do_fw_download(iocp, kfw.bufp, kfw.fwlen); mutex_unlock(&iocp->ioctl_cmds.mutex); @@ -2919,7 +2806,7 @@ compat_mpt_command(struct file *filp, unsigned int cmd, /* Pass new structure to do_mpt_command */ - ret = mptctl_do_mpt_command (karg, &uarg->MF); + ret = mptctl_do_mpt_command (iocp, karg, &uarg->MF); mutex_unlock(&iocp->ioctl_cmds.mutex); From f8812ddea4ea3b8a6ee5e4b9fb2333da7ee4022c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 15 Jan 2020 14:42:34 +0100 Subject: [PATCH 2361/3715] ptrace: reintroduce usage of subjective credentials in ptrace_has_cap() commit 6b3ad6649a4c75504edeba242d3fd36b3096a57f upstream. Commit 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") introduced the ability to opt out of audit messages for accesses to various proc files since they are not violations of policy. While doing so it somehow switched the check from ns_capable() to has_ns_capability{_noaudit}(). That means it switched from checking the subjective credentials of the task to using the objective credentials. This is wrong since. ptrace_has_cap() is currently only used in ptrace_may_access() And is used to check whether the calling task (subject) has the CAP_SYS_PTRACE capability in the provided user namespace to operate on the target task (object). According to the cred.h comments this would mean the subjective credentials of the calling task need to be used. This switches ptrace_has_cap() to use security_capable(). Because we only call ptrace_has_cap() in ptrace_may_access() and in there we already have a stable reference to the calling task's creds under rcu_read_lock() there's no need to go through another series of dereferences and rcu locking done in ns_capable{_noaudit}(). As one example where this might be particularly problematic, Jann pointed out that in combination with the upcoming IORING_OP_OPENAT feature, this bug might allow unprivileged users to bypass the capability checks while asynchronously opening files like /proc/*/mem, because the capability checks for this would be performed against kernel credentials. To illustrate on the former point about this being exploitable: When io_uring creates a new context it records the subjective credentials of the caller. Later on, when it starts to do work it creates a kernel thread and registers a callback. The callback runs with kernel creds for ktask->real_cred and ktask->cred. To prevent this from becoming a full-blown 0-day io_uring will call override_cred() and override ktask->cred with the subjective credentials of the creator of the io_uring instance. With ptrace_has_cap() currently looking at ktask->real_cred this override will be ineffective and the caller will be able to open arbitray proc files as mentioned above. Luckily, this is currently not exploitable but will turn into a 0-day once IORING_OP_OPENAT{2} land in v5.6. Fix it now! Cc: Oleg Nesterov Cc: Eric Paris Cc: stable@vger.kernel.org Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: Jann Horn Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 09fb3f58a838..43a283041296 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -258,12 +258,17 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } -static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) +static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, + unsigned int mode) { + int ret; + if (mode & PTRACE_MODE_NOAUDIT) - return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE); else - return has_ns_capability(current, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE); + + return ret == 0; } /* Returns 0 on success, -errno on denial. */ @@ -315,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(tcred->user_ns, mode)) + if (ptrace_has_cap(cred, tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -334,7 +339,7 @@ ok: mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(mm->user_ns, mode))) + !ptrace_has_cap(cred, mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); From cd20d0e31c63f8c5ac1dbcb57b96a757c0dd56cd Mon Sep 17 00:00:00 2001 From: Keiya Nobuta Date: Thu, 9 Jan 2020 14:14:48 +0900 Subject: [PATCH 2362/3715] usb: core: hub: Improved device recognition on remote wakeup commit 9c06ac4c83df6d6fbdbf7488fbad822b4002ba19 upstream. If hub_activate() is called before D+ has stabilized after remote wakeup, the following situation might occur: __ ___________________ / \ / D+ __/ \__/ Hub _______________________________ | ^ ^ ^ | | | | Host _____v__|___|___________|______ | | | | | | | \-- Interrupt Transfer (*3) | | \-- ClearPortFeature (*2) | \-- GetPortStatus (*1) \-- Host detects remote wakeup - D+ goes high, Host starts running by remote wakeup - D+ is not stable, goes low - Host requests GetPortStatus at (*1) and gets the following hub status: - Current Connect Status bit is 0 - Connect Status Change bit is 1 - D+ stabilizes, goes high - Host requests ClearPortFeature and thus Connect Status Change bit is cleared at (*2) - After waiting 100 ms, Host starts the Interrupt Transfer at (*3) - Since the Connect Status Change bit is 0, Hub returns NAK. In this case, port_event() is not called in hub_event() and Host cannot recognize device. To solve this issue, flag change_bits even if only Connect Status Change bit is 1 when got in the first GetPortStatus. This issue occurs rarely because it only if D+ changes during a very short time between GetPortStatus and ClearPortFeature. However, it is fatal if it occurs in embedded system. Signed-off-by: Keiya Nobuta Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200109051448.28150-1-nobuta.keiya@fujitsu.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4efccf8bf99f..7d5ecf36a33c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1164,6 +1164,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || + (portchange & USB_PORT_STAT_C_CONNECTION) || (portstatus & USB_PORT_STAT_OVERCURRENT) || (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); From bf76318ed06c298d8464923dce81edf6ef349931 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 10 Dec 2019 22:30:42 -0500 Subject: [PATCH 2363/3715] x86/resctrl: Fix an imbalance in domain_remove_cpu() commit e278af89f1ba0a9ef20947db6afc2c9afa37e85b upstream. A system that supports resource monitoring may have multiple resources while not all of these resources are capable of monitoring. Monitoring related state is initialized only for resources that are capable of monitoring and correspondingly this state should subsequently only be removed from these resources that are capable of monitoring. domain_add_cpu() calls domain_setup_mon_state() only when r->mon_capable is true where it will initialize d->mbm_over. However, domain_remove_cpu() calls cancel_delayed_work(&d->mbm_over) without checking r->mon_capable resulting in an attempt to cancel d->mbm_over on all resources, even those that never initialized d->mbm_over because they are not capable of monitoring. Hence, it triggers a debugobjects warning when offlining CPUs because those timer debugobjects are never initialized: ODEBUG: assert_init not available (active state 0) object type: timer_list hint: 0x0 WARNING: CPU: 143 PID: 789 at lib/debugobjects.c:484 debug_print_object Hardware name: HP Synergy 680 Gen9/Synergy 680 Gen9 Compute Module, BIOS I40 05/23/2018 RIP: 0010:debug_print_object Call Trace: debug_object_assert_init del_timer try_to_grab_pending cancel_delayed_work resctrl_offline_cpu cpuhp_invoke_callback cpuhp_thread_fun smpboot_thread_fn kthread ret_from_fork Fixes: e33026831bdb ("x86/intel_rdt/mbm: Handle counter overflow") Signed-off-by: Qian Cai Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: john.stultz@linaro.org Cc: sboyd@kernel.org Cc: Cc: Thomas Gleixner Cc: tj@kernel.org Cc: Tony Luck Cc: Vikas Shivappa Cc: x86-ml Link: https://lkml.kernel.org/r/20191211033042.2188-1-cai@lca.pw Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 665d0f6cd62f..3f731d7f04bf 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -526,7 +526,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* From 991ef5ce9b4e3e87c9008744d58ba42ba70ab638 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 14:29:09 +0100 Subject: [PATCH 2364/3715] x86/efistub: Disable paging at mixed mode entry commit 4911ee401b7ceff8f38e0ac597cbf503d71e690c upstream. The EFI mixed mode entry code goes through the ordinary startup_32() routine before jumping into the kernel's EFI boot code in 64-bit mode. The 32-bit startup code must be entered with paging disabled, but this is not documented as a requirement for the EFI handover protocol, and so we should disable paging explicitly when entering the kernel from 32-bit EFI firmware. Signed-off-by: Ard Biesheuvel Cc: Cc: Arvind Sankar Cc: Hans de Goede Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224132909.102540-4-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/head_64.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4b3d92a37c80..39fdede523f2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -227,6 +227,11 @@ ENTRY(efi32_stub_entry) leal efi32_config(%ebp), %eax movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + jmp startup_32 ENDPROC(efi32_stub_entry) #endif From 14f820498a133d729a5782d4e502baa589cb2194 Mon Sep 17 00:00:00 2001 From: Yuya Fujita Date: Thu, 19 Dec 2019 08:08:32 +0000 Subject: [PATCH 2365/3715] perf hists: Fix variable name's inconsistency in hists__for_each() macro commit 55347ec340af401437680fd0e88df6739a967f9f upstream. Variable names are inconsistent in hists__for_each macro(). Due to this inconsistency, the macro replaces its second argument with "fmt" regardless of its original name. So far it works because only "fmt" is passed to the second argument. However, this behavior is not expected and should be fixed. Fixes: f0786af536bb ("perf hists: Introduce hists__for_each_format macro") Fixes: aa6f50af822a ("perf hists: Introduce hists__for_each_sort_list macro") Signed-off-by: Yuya Fujita Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/OSAPR01MB1588E1C47AC22043175DE1B2E8520@OSAPR01MB1588.jpnprd01.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/hist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index b99d68943f25..595f91f46811 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -317,10 +317,10 @@ static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) #define hists__for_each_format(hists, format) \ - perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + perf_hpp_list__for_each_format((hists)->hpp_list, format) #define hists__for_each_sort_list(hists, format) \ - perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) + perf_hpp_list__for_each_sort_list((hists)->hpp_list, format) extern struct perf_hpp_fmt perf_hpp__format[]; From 8ce84610f581ef104a8f274b50bc775599bbb446 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 20 Dec 2019 09:37:19 +0800 Subject: [PATCH 2366/3715] perf report: Fix incorrectly added dimensions as switch perf data file commit 0feba17bd7ee3b7e03d141f119049dcc23efa94e upstream. We observed an issue that was some extra columns displayed after switching perf data file in browser. The steps to reproduce: 1. perf record -a -e cycles,instructions -- sleep 3 2. perf report --group 3. In browser, we use hotkey 's' to switch to another perf.data 4. Now in browser, the extra columns 'Self' and 'Children' are displayed. The issue is setup_sorting() executed again after repeat path, so dimensions are added again. This patch checks the last key returned from __cmd_report(). If it's K_SWITCH_INPUT_DATA, skips the setup_sorting(). Fixes: ad0de0971b7f ("perf report: Enable the runtime switching of perf data file") Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Feng Tang Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191220013722.20592-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-report.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fd4dd12b8f9d..17b26661b2f6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -742,6 +742,7 @@ int cmd_report(int argc, const char **argv) struct stat st; bool has_br_stack = false; int branch_mode = -1; + int last_key = 0; bool branch_call_mode = false; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { @@ -1048,7 +1049,8 @@ repeat: else use_browser = 0; - if (setup_sorting(session->evlist) < 0) { + if ((last_key != K_SWITCH_INPUT_DATA) && + (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); if (field_order) @@ -1108,6 +1110,7 @@ repeat: ret = __cmd_report(&report); if (ret == K_SWITCH_INPUT_DATA) { perf_session__delete(session); + last_key = K_SWITCH_INPUT_DATA; goto repeat; } else ret = 0; From ccf53a8db73c58b1a8135ef5ea9eb99680c77a08 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 13 Jan 2020 16:29:13 -0800 Subject: [PATCH 2367/3715] mm/shmem.c: thp, shmem: fix conflict of above-47bit hint address and PMD alignment commit 991589974d9c9ecb24ee3799ec8c415c730598a2 upstream. Shmem/tmpfs tries to provide THP-friendly mappings if huge pages are enabled. But it doesn't work well with above-47bit hint address. Normally, the kernel doesn't create userspace mappings above 47-bit, even if the machine allows this (such as with 5-level paging on x86-64). Not all user space is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. Userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 47-bits. If the application doesn't need a particular address, but wants to allocate from whole address space it can specify -1 as a hint address. Unfortunately, this trick breaks THP alignment in shmem/tmp: shmem_get_unmapped_area() would not try to allocate PMD-aligned area if *any* hint address specified. This can be fixed by requesting the aligned area if the we failed to allocated at user-specified hint address. The request with inflated length will also take the user-specified hint address. This way we will not lose an allocation request from the full address space. [kirill@shutemov.name: fold in a fixup] Link: http://lkml.kernel.org/r/20191223231309.t6bh5hkbmokihpfu@box Link: http://lkml.kernel.org/r/20191220142548.7118-3-kirill.shutemov@linux.intel.com Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace") Signed-off-by: Kirill A. Shutemov Cc: "Willhalm, Thomas" Cc: Dan Williams Cc: "Bruggeman, Otto G" Cc: "Aneesh Kumar K . V" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 69106c600692..0b6db162083c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2052,9 +2052,10 @@ unsigned long shmem_get_unmapped_area(struct file *file, /* * Our priority is to support MAP_SHARED mapped hugely; * and support MAP_PRIVATE mapped hugely too, until it is COWed. - * But if caller specified an address hint, respect that as before. + * But if caller specified an address hint and we allocated area there + * successfully, respect that as before. */ - if (uaddr) + if (uaddr == addr) return addr; if (shmem_huge != SHMEM_HUGE_FORCE) { @@ -2088,7 +2089,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (inflated_len < len) return addr; - inflated_addr = get_area(NULL, 0, inflated_len, 0, flags); + inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) From 90515d01c05eea9475e324a6e1605251606fee74 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jan 2020 21:07:32 +0900 Subject: [PATCH 2368/3715] btrfs: fix memory leak in qgroup accounting commit 26ef8493e1ab771cb01d27defca2fa1315dc3980 upstream. When running xfstests on the current btrfs I get the following splat from kmemleak: unreferenced object 0xffff88821b2404e0 (size 32): comm "kworker/u4:7", pid 26663, jiffies 4295283698 (age 8.776s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 10 ff fd 26 82 88 ff ff ...........&.... 10 ff fd 26 82 88 ff ff 20 ff fd 26 82 88 ff ff ...&.... ..&.... backtrace: [<00000000f94fd43f>] ulist_alloc+0x25/0x60 [btrfs] [<00000000fd023d99>] btrfs_find_all_roots_safe+0x41/0x100 [btrfs] [<000000008f17bd32>] btrfs_find_all_roots+0x52/0x70 [btrfs] [<00000000b7660afb>] btrfs_qgroup_rescan_worker+0x343/0x680 [btrfs] [<0000000058e66778>] btrfs_work_helper+0xac/0x1e0 [btrfs] [<00000000f0188930>] process_one_work+0x1cf/0x350 [<00000000af5f2f8e>] worker_thread+0x28/0x3c0 [<00000000b55a1add>] kthread+0x109/0x120 [<00000000f88cbd17>] ret_from_fork+0x35/0x40 This corresponds to: (gdb) l *(btrfs_find_all_roots_safe+0x41) 0x8d7e1 is in btrfs_find_all_roots_safe (fs/btrfs/backref.c:1413). 1408 1409 tmp = ulist_alloc(GFP_NOFS); 1410 if (!tmp) 1411 return -ENOMEM; 1412 *roots = ulist_alloc(GFP_NOFS); 1413 if (!*roots) { 1414 ulist_free(tmp); 1415 return -ENOMEM; 1416 } 1417 Following the lifetime of the allocated 'roots' ulist, it gets freed again in btrfs_qgroup_account_extent(). But this does not happen if the function is called with the 'BTRFS_FS_QUOTA_ENABLED' flag cleared, then btrfs_qgroup_account_extent() does a short leave and directly returns. Instead of directly returning we should jump to the 'out_free' in order to free all resources as expected. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Johannes Thumshirn [ add comment ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cb6e8cb0de94..39a00b57ff01 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1928,8 +1928,12 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 nr_old_roots = 0; int ret = 0; + /* + * If quotas get disabled meanwhile, the resouces need to be freed and + * we can't just exit here. + */ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) - return 0; + goto out_free; if (new_roots) { if (!maybe_fs_roots(new_roots)) From 14dc0e8ee11b7a60e6517aee2e11b8d3162b1ce6 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 13 Jan 2020 16:29:23 -0800 Subject: [PATCH 2369/3715] mm/page-writeback.c: avoid potential division by zero in wb_min_max_ratio() commit 6d9e8c651dd979aa666bee15f086745f3ea9c4b3 upstream. Patch series "use div64_ul() instead of div_u64() if the divisor is unsigned long". We were first inspired by commit b0ab99e7736a ("sched: Fix possible divide by zero in avg_atom () calculation"), then refer to the recently analyzed mm code, we found this suspicious place. 201 if (min) { 202 min *= this_bw; 203 do_div(min, tot_bw); 204 } And we also disassembled and confirmed it: /usr/src/debug/kernel-4.9.168-016.ali3000/linux-4.9.168-016.ali3000.alios7.x86_64/mm/page-writeback.c: 201 0xffffffff811c37da <__wb_calc_thresh+234>: xor %r10d,%r10d 0xffffffff811c37dd <__wb_calc_thresh+237>: test %rax,%rax 0xffffffff811c37e0 <__wb_calc_thresh+240>: je 0xffffffff811c3800 <__wb_calc_thresh+272> /usr/src/debug/kernel-4.9.168-016.ali3000/linux-4.9.168-016.ali3000.alios7.x86_64/mm/page-writeback.c: 202 0xffffffff811c37e2 <__wb_calc_thresh+242>: imul %r8,%rax /usr/src/debug/kernel-4.9.168-016.ali3000/linux-4.9.168-016.ali3000.alios7.x86_64/mm/page-writeback.c: 203 0xffffffff811c37e6 <__wb_calc_thresh+246>: mov %r9d,%r10d ---> truncates it to 32 bits here 0xffffffff811c37e9 <__wb_calc_thresh+249>: xor %edx,%edx 0xffffffff811c37eb <__wb_calc_thresh+251>: div %r10 0xffffffff811c37ee <__wb_calc_thresh+254>: imul %rbx,%rax 0xffffffff811c37f2 <__wb_calc_thresh+258>: shr $0x2,%rax 0xffffffff811c37f6 <__wb_calc_thresh+262>: mul %rcx 0xffffffff811c37f9 <__wb_calc_thresh+265>: shr $0x2,%rdx 0xffffffff811c37fd <__wb_calc_thresh+269>: mov %rdx,%r10 This series uses div64_ul() instead of div_u64() if the divisor is unsigned long, to avoid truncation to 32-bit on 64-bit platforms. This patch (of 3): The variables 'min' and 'max' are unsigned long and do_div truncates them to 32 bits, which means it can test non-zero and be truncated to zero for division. Fix this issue by using div64_ul() instead. Link: http://lkml.kernel.org/r/20200102081442.8273-2-wenyang@linux.alibaba.com Fixes: 693108a8a667 ("writeback: make bdi->min/max_ratio handling cgroup writeback aware") Signed-off-by: Wen Yang Reviewed-by: Andrew Morton Cc: Qian Cai Cc: Tejun Heo Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page-writeback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a40c075fd8f1..29f9980c13ac 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, if (this_bw < tot_bw) { if (min) { min *= this_bw; - do_div(min, tot_bw); + min = div64_ul(min, tot_bw); } if (max < 100) { max *= this_bw; - do_div(max, tot_bw); + max = div64_ul(max, tot_bw); } } From 7e74b77cf84095e012c179952a4fc8b5689f1a75 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Dec 2019 11:17:41 +0100 Subject: [PATCH 2370/3715] net: stmmac: 16KB buffer must be 16 byte aligned commit 8605131747e7e1fd8f6c9f97a00287aae2b2c640 upstream. The 16KB RX Buffer must also be 16 byte aligned. Fix it. Fixes: 7ac6653a085b ("stmmac: Move the STMicroelectronics driver") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/common.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index efc4a1a8343a..e51b50d94074 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -338,9 +338,8 @@ struct dma_features { unsigned int rx_fifo_size; }; -/* GMAC TX FIFO is 8K, Rx FIFO is 16K */ -#define BUF_SIZE_16KiB 16384 -/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +/* RX Buffer size must be multiple of 4/8/16 bytes */ +#define BUF_SIZE_16KiB 16368 #define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 From 4a5bf2de396f1a310285cbcba59a3be0528140e3 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Dec 2019 11:17:42 +0100 Subject: [PATCH 2371/3715] net: stmmac: Enable 16KB buffer size commit b2f3a481c4cd62f78391b836b64c0a6e72b503d2 upstream. XGMAC supports maximum MTU that can go to 16KB. Lets add this check in the calculation of RX buffer size. Fixes: 7ac6653a085b ("stmmac: Move the STMicroelectronics driver") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e89466bd432d..81d446469a35 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1043,7 +1043,9 @@ static int stmmac_set_bfsize(int mtu, int bufsize) { int ret = bufsize; - if (mtu >= BUF_SIZE_4KiB) + if (mtu >= BUF_SIZE_8KiB) + ret = BUF_SIZE_16KiB; + else if (mtu >= BUF_SIZE_4KiB) ret = BUF_SIZE_8KiB; else if (mtu >= BUF_SIZE_2KiB) ret = BUF_SIZE_4KiB; From 12b803e03d3156aa83f9446c91551da72233bf5e Mon Sep 17 00:00:00 2001 From: John Ogness Date: Sun, 24 Jun 2018 00:32:06 +0200 Subject: [PATCH 2372/3715] USB: serial: io_edgeport: use irqsave() in USB's complete callback [ Upstream commit dd1fae527612543e560e84f2eba4f6ef2006ac55 ] The USB completion callback does not disable interrupts while acquiring the lock. We want to remove the local_irq_disable() invocation from __usb_hcd_giveback_urb() and therefore it is required for the callback handler to disable the interrupts while acquiring the lock. The callback may be invoked either in IRQ or BH context depending on the USB host controller. Use the _irqsave() variant of the locking primitives. Signed-off-by: John Ogness Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/io_edgeport.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 467870f504a5..8810de817095 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -652,6 +652,7 @@ static void edge_interrupt_callback(struct urb *urb) struct usb_serial_port *port; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; + unsigned long flags; int bytes_avail; int position; int txCredits; @@ -683,7 +684,7 @@ static void edge_interrupt_callback(struct urb *urb) if (length > 1) { bytes_avail = data[0] | (data[1] << 8); if (bytes_avail) { - spin_lock(&edge_serial->es_lock); + spin_lock_irqsave(&edge_serial->es_lock, flags); edge_serial->rxBytesAvail += bytes_avail; dev_dbg(dev, "%s - bytes_avail=%d, rxBytesAvail=%d, read_in_progress=%d\n", @@ -706,7 +707,8 @@ static void edge_interrupt_callback(struct urb *urb) edge_serial->read_in_progress = false; } } - spin_unlock(&edge_serial->es_lock); + spin_unlock_irqrestore(&edge_serial->es_lock, + flags); } } /* grab the txcredits for the ports if available */ @@ -719,9 +721,11 @@ static void edge_interrupt_callback(struct urb *urb) port = edge_serial->serial->port[portNumber]; edge_port = usb_get_serial_port_data(port); if (edge_port->open) { - spin_lock(&edge_port->ep_lock); + spin_lock_irqsave(&edge_port->ep_lock, + flags); edge_port->txCredits += txCredits; - spin_unlock(&edge_port->ep_lock); + spin_unlock_irqrestore(&edge_port->ep_lock, + flags); dev_dbg(dev, "%s - txcredits for port%d = %d\n", __func__, portNumber, edge_port->txCredits); @@ -762,6 +766,7 @@ static void edge_bulk_in_callback(struct urb *urb) int retval; __u16 raw_data_length; int status = urb->status; + unsigned long flags; if (status) { dev_dbg(&urb->dev->dev, "%s - nonzero read bulk status received: %d\n", @@ -781,7 +786,7 @@ static void edge_bulk_in_callback(struct urb *urb) usb_serial_debug_data(dev, __func__, raw_data_length, data); - spin_lock(&edge_serial->es_lock); + spin_lock_irqsave(&edge_serial->es_lock, flags); /* decrement our rxBytes available by the number that we just got */ edge_serial->rxBytesAvail -= raw_data_length; @@ -805,7 +810,7 @@ static void edge_bulk_in_callback(struct urb *urb) edge_serial->read_in_progress = false; } - spin_unlock(&edge_serial->es_lock); + spin_unlock_irqrestore(&edge_serial->es_lock, flags); } From abde18153f831447c61fbef12dd331aa9998cf1f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Jan 2020 10:50:23 +0100 Subject: [PATCH 2373/3715] USB: serial: io_edgeport: handle unbound ports on URB completion [ Upstream commit e37d1aeda737a20b1846a91a3da3f8b0f00cf690 ] Check for NULL port data in the shared interrupt and bulk completion callbacks to avoid dereferencing a NULL pointer in case a device sends data for a port device which isn't bound to a driver (e.g. due to a malicious device having unexpected endpoints or after an allocation failure on port probe). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/io_edgeport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 8810de817095..3705b64ab948 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -720,7 +720,7 @@ static void edge_interrupt_callback(struct urb *urb) if (txCredits) { port = edge_serial->serial->port[portNumber]; edge_port = usb_get_serial_port_data(port); - if (edge_port->open) { + if (edge_port && edge_port->open) { spin_lock_irqsave(&edge_port->ep_lock, flags); edge_port->txCredits += txCredits; @@ -1847,7 +1847,7 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial, if (rxLen && edge_serial->rxPort < serial->num_ports) { port = serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); - if (edge_port->open) { + if (edge_port && edge_port->open) { dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n", __func__, rxLen, edge_serial->rxPort); From 26da70406bd88d5c65d7ec79b01888771756c6a8 Mon Sep 17 00:00:00 2001 From: Bharath Vedartham Date: Mon, 13 May 2019 17:23:17 -0700 Subject: [PATCH 2374/3715] mm/huge_memory.c: make __thp_get_unmapped_area static [ Upstream commit b3b07077b01ecbbd98efede778c195567de25b71 ] __thp_get_unmapped_area is only used in mm/huge_memory.c. Make it static. Tested by building and booting the kernel. Link: http://lkml.kernel.org/r/20190504102353.GA22525@bharath12345-Inspiron-5559 Signed-off-by: Bharath Vedartham Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1adc2e6c50f9..6d835535946d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -502,7 +502,7 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, +static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, loff_t off, unsigned long flags, unsigned long size) { unsigned long addr; From ae6f36741273f59e0a0c9f1388461ee5d101b2b9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 13 Jan 2020 16:29:10 -0800 Subject: [PATCH 2375/3715] mm/huge_memory.c: thp: fix conflict of above-47bit hint address and PMD alignment [ Upstream commit 97d3d0f9a1cf132c63c0b8b8bd497b8a56283dd9 ] Patch series "Fix two above-47bit hint address vs. THP bugs". The two get_unmapped_area() implementations have to be fixed to provide THP-friendly mappings if above-47bit hint address is specified. This patch (of 2): Filesystems use thp_get_unmapped_area() to provide THP-friendly mappings. For DAX in particular. Normally, the kernel doesn't create userspace mappings above 47-bit, even if the machine allows this (such as with 5-level paging on x86-64). Not all user space is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. Userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 47-bits. If the application doesn't need a particular address, but wants to allocate from whole address space it can specify -1 as a hint address. Unfortunately, this trick breaks thp_get_unmapped_area(): the function would not try to allocate PMD-aligned area if *any* hint address specified. Modify the routine to handle it correctly: - Try to allocate the space at the specified hint address with length padding required for PMD alignment. - If failed, retry without length padding (but with the same hint address); - If the returned address matches the hint address return it. - Otherwise, align the address as required for THP and return. The user specified hint address is passed down to get_unmapped_area() so above-47bit hint address will be taken into account without breaking alignment requirements. Link: http://lkml.kernel.org/r/20191220142548.7118-2-kirill.shutemov@linux.intel.com Fixes: b569bab78d8d ("x86/mm: Prepare to expose larger address space to userspace") Signed-off-by: Kirill A. Shutemov Reported-by: Thomas Willhalm Tested-by: Dan Williams Cc: "Aneesh Kumar K . V" Cc: "Bruggeman, Otto G" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6d835535946d..92915cc87549 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -502,13 +502,13 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, +static unsigned long __thp_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, loff_t off, unsigned long flags, unsigned long size) { - unsigned long addr; loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad; + unsigned long len_pad, ret; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -517,30 +517,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le if (len_pad < len || (off + len_pad) < off) return 0; - addr = current->mm->get_unmapped_area(filp, 0, len_pad, + ret = current->mm->get_unmapped_area(filp, addr, len_pad, off >> PAGE_SHIFT, flags); - if (IS_ERR_VALUE(addr)) + + /* + * The failure might be due to length padding. The caller will retry + * without the padding. + */ + if (IS_ERR_VALUE(ret)) return 0; - addr += (off - addr) & (size - 1); - return addr; + /* + * Do not try to align to THP boundary if allocation at the address + * hint succeeds. + */ + if (ret == addr) + return addr; + + ret += (off - ret) & (size - 1); + return ret; } unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + unsigned long ret; loff_t off = (loff_t)pgoff << PAGE_SHIFT; - if (addr) - goto out; if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD)) goto out; - addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE); - if (addr) - return addr; - - out: + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); + if (ret) + return ret; +out: return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(thp_get_unmapped_area); From 9510f18a019aa30d50d499fdae1176183132368c Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 20 Nov 2019 09:15:17 -0600 Subject: [PATCH 2376/3715] arm64: dts: agilex/stratix10: fix pmu interrupt numbers [ Upstream commit 210de0e996aee8e360ccc9e173fe7f0a7ed2f695 ] Fix up the correct interrupt numbers for the PMU unit on Agilex and Stratix10. Fixes: 78cd6a9d8e15 ("arm64: dts: Add base stratix 10 dtsi") Cc: linux-stable Reported-by: Meng Li Signed-off-by: Dinh Nguyen Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index e79f3defe002..c2ad4f97cef0 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -56,10 +56,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, From 99e739733fd2794ad9b27d99294061e756046ae2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 13 Jan 2020 19:21:07 +0100 Subject: [PATCH 2377/3715] cfg80211: fix page refcount issue in A-MSDU decap commit 81c044fc3bdc5b7be967cd3682528ea94b58c06a upstream. The fragments attached to a skb can be part of a compound page. In that case, page_ref_inc will increment the refcount for the wrong page. Fix this by using get_page instead, which calls page_ref_inc on the compound head and also checks for overflow. Fixes: 2b67f944f88c ("cfg80211: reuse existing page fragments in A-MSDU rx") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200113182107.20461-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 2234817f5dbb..935929b45411 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -652,7 +652,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page, struct skb_shared_info *sh = skb_shinfo(skb); int page_offset; - page_ref_inc(page); + get_page(page); page_offset = ptr - page_address(page); skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size); } From 071b66b3808260ab35b725fc9f7e5466637a2b9e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 10 Jan 2020 11:53:08 -0800 Subject: [PATCH 2378/3715] netfilter: fix a use-after-free in mtype_destroy() commit c120959387efa51479056fd01dc90adfba7a590c upstream. map->members is freed by ip_set_free() right before using it in mtype_ext_cleanup() again. So we just have to move it down. Reported-by: syzbot+4c3cc6dbe7259dbf9054@syzkaller.appspotmail.com Fixes: 40cd63bf33b2 ("netfilter: ipset: Support extensions which need a per data destroy function") Acked-by: Jozsef Kadlecsik Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 8ad2b52a0b32..b0701f6259cc 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -64,9 +64,9 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); - ip_set_free(map->members); if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); + ip_set_free(map->members); ip_set_free(map); set->data = NULL; From 4130fabf9ed246d35108182bb1e444d4ceeccff7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Jan 2020 23:19:53 +0100 Subject: [PATCH 2379/3715] netfilter: arp_tables: init netns pointer in xt_tgdtor_param struct commit 212e7f56605ef9688d0846db60c6c6ec06544095 upstream. An earlier commit (1b789577f655060d98d20e, "netfilter: arp_tables: init netns pointer in xt_tgchk_param struct") fixed missing net initialization for arptables, but turns out it was incomplete. We can get a very similar struct net NULL deref during error unwinding: general protection fault: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:xt_rateest_put+0xa1/0x440 net/netfilter/xt_RATEEST.c:77 xt_rateest_tg_destroy+0x72/0xa0 net/netfilter/xt_RATEEST.c:175 cleanup_entry net/ipv4/netfilter/arp_tables.c:509 [inline] translate_table+0x11f4/0x1d80 net/ipv4/netfilter/arp_tables.c:587 do_replace net/ipv4/netfilter/arp_tables.c:981 [inline] do_arpt_set_ctl+0x317/0x650 net/ipv4/netfilter/arp_tables.c:1461 Also init the netns pointer in xt_tgdtor_param struct. Fixes: add67461240c1d ("netfilter: add struct net * to target parameters") Reported-by: syzbot+91bdd8eece0f6629ec8b@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/arp_tables.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e288489ae3d5..6dd727e0a72f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -506,12 +506,13 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return 0; } -static inline void cleanup_entry(struct arpt_entry *e) +static void cleanup_entry(struct arpt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; t = arpt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_ARP; @@ -601,7 +602,7 @@ static int translate_table(struct net *net, xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; - cleanup_entry(iter); + cleanup_entry(iter, net); } return ret; } @@ -926,7 +927,7 @@ static int __do_replace(struct net *net, const char *name, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, @@ -990,7 +991,7 @@ static int do_replace(struct net *net, const void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1287,7 +1288,7 @@ static int compat_do_replace(struct net *net, void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1514,7 +1515,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -static void __arpt_unregister_table(struct xt_table *table) +static void __arpt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -1526,7 +1527,7 @@ static void __arpt_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter); + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); @@ -1566,7 +1567,7 @@ int arpt_register_table(struct net *net, ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret != 0) { - __arpt_unregister_table(new_table); + __arpt_unregister_table(net, new_table); *res = NULL; } @@ -1581,7 +1582,7 @@ void arpt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); - __arpt_unregister_table(table); + __arpt_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ From 2dc70c6ea87a0df09116b8b5739944f8525d48c7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Jan 2020 18:23:58 +0100 Subject: [PATCH 2380/3715] NFC: pn533: fix bulk-message timeout commit a112adafcb47760feff959ee1ecd10b74d2c5467 upstream. The driver was doing a synchronous uninterruptible bulk-transfer without using a timeout. This could lead to the driver hanging on probe due to a malfunctioning (or malicious) device until the device is physically disconnected. While sleeping in probe the driver prevents other devices connected to the same hub from being added to (or removed from) the bus. An arbitrary limit of five seconds should be more than enough. Fixes: dbafc28955fa ("NFC: pn533: don't send USB data off of the stack") Signed-off-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn533/usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index fcb57d64d97e..a2c9b3f3bc23 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -403,7 +403,7 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) cmd, sizeof(cmd), false); rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd), - &transferred, 0); + &transferred, 5000); kfree(buffer); if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, From 83bd51eb8f992b64711c2402dd9f78cbea35b946 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 28 Nov 2019 12:25:45 +0100 Subject: [PATCH 2381/3715] batman-adv: Fix DAT candidate selection on little endian systems commit 4cc4a1708903f404d2ca0dfde30e71e052c6cbc9 upstream. The distributed arp table is using a DHT to store and retrieve MAC address information for an IP address. This is done using unicast messages to selected peers. The potential peers are looked up using the IP address and the VID. While the IP address is always stored in big endian byte order, this is not the case of the VID. It can (depending on the host system) either be big endian or little endian. The host must therefore always convert it to big endian to ensure that all devices calculate the same peers for the same lookup data. Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/distributed-arp-table.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8d1d0fdb157e..1519cbf70150 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -243,6 +243,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) u32 hash = 0; const struct batadv_dat_entry *dat = data; const unsigned char *key; + __be16 vid; u32 i; key = (const unsigned char *)&dat->ip; @@ -252,7 +253,8 @@ static u32 batadv_hash_dat(const void *data, u32 size) hash ^= (hash >> 6); } - key = (const unsigned char *)&dat->vid; + vid = htons(dat->vid); + key = (__force const unsigned char *)&vid; for (i = 0; i < sizeof(dat->vid); i++) { hash += key[i]; hash += (hash << 10); From 93c81624bb12329445e76a88dbb45ac0ef55d152 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Jan 2020 13:00:35 -0800 Subject: [PATCH 2382/3715] macvlan: use skb_reset_mac_header() in macvlan_queue_xmit() [ Upstream commit 1712b2fff8c682d145c7889d2290696647d82dab ] I missed the fact that macvlan_broadcast() can be used both in RX and TX. skb_eth_hdr() makes only sense in TX paths, so we can not use it blindly in macvlan_broadcast() Fixes: 96cc4b69581d ("macvlan: do not assume mac_header is set in macvlan_broadcast()") Signed-off-by: Eric Dumazet Reported-by: Jurgen Van Ham Tested-by: Matteo Croce Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macvlan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 2b977655834c..ab539136d5bf 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -263,7 +263,7 @@ static void macvlan_broadcast(struct sk_buff *skb, struct net_device *src, enum macvlan_mode mode) { - const struct ethhdr *eth = skb_eth_hdr(skb); + const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; struct sk_buff *nskb; unsigned int i; @@ -515,10 +515,11 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) const struct macvlan_dev *dest; if (vlan->mode == MACVLAN_MODE_BRIDGE) { - const struct ethhdr *eth = (void *)skb->data; + const struct ethhdr *eth = skb_eth_hdr(skb); /* send to other bridge ports directly */ if (is_multicast_ether_addr(eth->h_dest)) { + skb_reset_mac_header(skb); macvlan_broadcast(skb, port, dev, MACVLAN_MODE_BRIDGE); goto xmit_world; } From a1eb61dd2b1c4d3a3b23dd824ba71c877caba18c Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Tue, 14 Jan 2020 15:09:50 +0200 Subject: [PATCH 2383/3715] hv_netvsc: Fix memory leak when removing rndis device [ Upstream commit 536dc5df2808efbefc5acee334d3c4f701790ec0 ] kmemleak detects the following memory leak when hot removing a network device: unreferenced object 0xffff888083f63600 (size 256): comm "kworker/0:1", pid 12, jiffies 4294831717 (age 1113.676s) hex dump (first 32 bytes): 00 40 c7 33 80 88 ff ff 00 00 00 00 10 00 00 00 .@.3............ 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... backtrace: [<00000000d4a8f5be>] rndis_filter_device_add+0x117/0x11c0 [hv_netvsc] [<000000009c02d75b>] netvsc_probe+0x5e7/0xbf0 [hv_netvsc] [<00000000ddafce23>] vmbus_probe+0x74/0x170 [hv_vmbus] [<00000000046e64f1>] really_probe+0x22f/0xb50 [<000000005cc35eb7>] driver_probe_device+0x25e/0x370 [<0000000043c642b2>] bus_for_each_drv+0x11f/0x1b0 [<000000005e3d09f0>] __device_attach+0x1c6/0x2f0 [<00000000a72c362f>] bus_probe_device+0x1a6/0x260 [<0000000008478399>] device_add+0x10a3/0x18e0 [<00000000cf07b48c>] vmbus_device_register+0xe7/0x1e0 [hv_vmbus] [<00000000d46cf032>] vmbus_add_channel_work+0x8ab/0x1770 [hv_vmbus] [<000000002c94bb64>] process_one_work+0x919/0x17d0 [<0000000096de6781>] worker_thread+0x87/0xb40 [<00000000fbe7397e>] kthread+0x333/0x3f0 [<000000004f844269>] ret_from_fork+0x3a/0x50 rndis_filter_device_add() allocates an instance of struct rndis_device which never gets deallocated as rndis_filter_device_remove() sets net_device->extension which points to the rndis_device struct to NULL, leaving the rndis_device dangling. Since net_device->extension is eventually freed in free_netvsc_device(), we refrain from setting it to NULL inside rndis_filter_device_remove() Signed-off-by: Mohammed Gamal Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/rndis_filter.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index b19557c035f2..aa0bbffe4900 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1331,8 +1331,6 @@ void rndis_filter_device_remove(struct hv_device *dev, /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); - net_dev->extension = NULL; - netvsc_device_remove(dev); } From e9a80cf49026b21729eac485fbe3dad2728c75e0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 15 Jan 2020 11:56:52 +0300 Subject: [PATCH 2384/3715] net: dsa: tag_qca: fix doubled Tx statistics [ Upstream commit bd5874da57edd001b35cf28ae737779498c16a56 ] DSA subsystem takes care of netdev statistics since commit 4ed70ce9f01c ("net: dsa: Refactor transmit path to eliminate duplication"), so any accounting inside tagger callbacks is redundant and can lead to messing up the stats. This bug is present in Qualcomm tagger since day 0. Fixes: cafdc45c949b ("net-next: dsa: add Qualcomm tag RX/TX handler") Reviewed-by: Andrew Lunn Signed-off-by: Alexander Lobakin Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/tag_qca.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index b8c05f1cf47d..af3a12a36d88 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -41,9 +41,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u16 *phdr, hdr; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - if (skb_cow_head(skb, 0) < 0) return NULL; From 56cffa965c195cf64f8c1f40ac8f3f897d59f08c Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 16 Jan 2020 15:41:17 +0800 Subject: [PATCH 2385/3715] net: hns: fix soft lockup when there is not enough memory [ Upstream commit 49edd6a2c456150870ddcef5b7ed11b21d849e13 ] When there is not enough memory and napi_alloc_skb() return NULL, the HNS driver will print error message, and than try again, if the memory is not enough for a while, huge error message and the retry operation will cause soft lockup. When napi_alloc_skb() return NULL because of no memory, we can get a warn_alloc() call trace, so this patch deletes the error message. We already use polling mode to handle irq, but the retry operation will render the polling weight inactive, this patch just return budget when the rx is not completed to avoid dead loop. Fixes: 36eedfde1a36 ("net: hns: Optimize hns_nic_common_poll for better performance") Fixes: b5996f11ea54 ("net: add Hisilicon Network Subsystem basic ethernet support") Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index b681c07b33fb..0733745f4be6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -669,7 +669,6 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, skb = *out_skb = napi_alloc_skb(&ring_data->napi, HNS_RX_HEAD_SIZE); if (unlikely(!skb)) { - netdev_err(ndev, "alloc rx skb fail\n"); ring->stats.sw_err_cnt++; return -ENOMEM; } @@ -1180,7 +1179,6 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) container_of(napi, struct hns_nic_ring_data, napi); struct hnae_ring *ring = ring_data->ring; -try_again: clean_complete += ring_data->poll_one( ring_data, budget - clean_complete, ring_data->ex_process); @@ -1190,7 +1188,7 @@ try_again: napi_complete(napi); ring->q->handle->dev->ops->toggle_ring_irq(ring, 0); } else { - goto try_again; + return budget; } } From 7ebdc211ed5c22389a5baa3e181dd99884154b30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jan 2020 09:27:11 -0800 Subject: [PATCH 2386/3715] net: usb: lan78xx: limit size of local TSO packets [ Upstream commit f8d7408a4d7f60f8b2df0f81decdc882dd9c20dc ] lan78xx_tx_bh() makes sure to not exceed MAX_SINGLE_PACKET_SIZE bytes in the aggregated packets it builds, but does nothing to prevent large GSO packets being submitted. Pierre-Francois reported various hangs when/if TSO is enabled. For localy generated packets, we can use netif_set_gso_max_size() to limit the size of TSO packets. Note that forwarded packets could still hit the issue, so a complete fix might require implementing .ndo_features_check for this driver, forcing a software segmentation if the size of the TSO packet exceeds MAX_SINGLE_PACKET_SIZE. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Eric Dumazet Reported-by: RENARD Pierre-Francois Tested-by: RENARD Pierre-Francois Cc: Stefan Wahren Cc: Woojung Huh Cc: Microchip Linux Driver Support Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c23f35dba718..ee7194a9e231 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3612,6 +3612,7 @@ static int lan78xx_probe(struct usb_interface *intf, /* MTU range: 68 - 9000 */ netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; + netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER); dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; From d14f54ff5ed255f0fae840501b1eb0be6993aced Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Jan 2020 14:54:48 +0000 Subject: [PATCH 2387/3715] net/wan/fsl_ucc_hdlc: fix out of bounds write on array utdm_info [ Upstream commit ddf420390526ede3b9ff559ac89f58cb59d9db2f ] Array utdm_info is declared as an array of MAX_HDLC_NUM (4) elements however up to UCC_MAX_NUM (8) elements are potentially being written to it. Currently we have an array out-of-bounds write error on the last 4 elements. Fix this by making utdm_info UCC_MAX_NUM elements in size. Addresses-Coverity: ("Out-of-bounds write") Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/fsl_ucc_hdlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 289dff262948..571a1ff8f81f 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -76,7 +76,7 @@ static struct ucc_tdm_info utdm_primary_info = { }, }; -static struct ucc_tdm_info utdm_info[MAX_HDLC_NUM]; +static struct ucc_tdm_info utdm_info[UCC_MAX_NUM]; static int uhdlc_init(struct ucc_hdlc_private *priv) { From a05776cb7264cd3515980d034f50b7ef110239a8 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Mon, 13 Jan 2020 14:00:09 +0100 Subject: [PATCH 2388/3715] ptp: free ptp device pin descriptors properly [ Upstream commit 75718584cb3c64e6269109d4d54f888ac5a5fd15 ] There is a bug in ptp_clock_unregister(), where ptp_cleanup_pin_groups() first frees ptp->pin_{,dev_}attr, but then posix_clock_unregister() needs them to destroy a related sysfs device. These functions can not be just swapped, as posix_clock_unregister() frees ptp which is needed in the ptp_cleanup_pin_groups(). Fix this by calling ptp_cleanup_pin_groups() in ptp_clock_release(), right before ptp is freed. This makes this patch fix an UAF bug in a patch which fixes an UAF bug. Reported-by: Antti Laakso Fixes: a33121e5487b ("ptp: fix the race between the release of ptp_clock and cdev") Link: https://lore.kernel.org/netdev/3d2bd09735dbdaf003585ca376b7c1e5b69a19bd.camel@intel.com/ Signed-off-by: Vladis Dronov Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index b818f65480c1..e232233beb8f 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -179,6 +179,7 @@ static void ptp_clock_release(struct device *dev) { struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); + ptp_cleanup_pin_groups(ptp); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, ptp->index); @@ -315,9 +316,8 @@ int ptp_clock_unregister(struct ptp_clock *ptp) if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - ptp_cleanup_pin_groups(ptp); - posix_clock_unregister(&ptp->clock); + return 0; } EXPORT_SYMBOL(ptp_clock_unregister); From 8b7b68bd78b45e4e77b48cab2d40d769a5f67754 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Jan 2020 09:27:29 +0100 Subject: [PATCH 2389/3715] r8152: add missing endpoint sanity check [ Upstream commit 86f3f4cd53707ceeec079b83205c8d3c756eca93 ] Add missing endpoint sanity check to probe in order to prevent a NULL-pointer dereference (or slab out-of-bounds access) when retrieving the interrupt-endpoint bInterval on ndo_open() in case a device lacks the expected endpoints. Fixes: 40a82917b1d3 ("net/usb/r8152: enable interrupt transfer") Cc: hayeswang Signed-off-by: Johan Hovold Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 6a86a03c5e95..0083c60f5cdf 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5158,6 +5158,9 @@ static int rtl8152_probe(struct usb_interface *intf, return -ENODEV; } + if (intf->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { From 43dfcc773511174d8b1650fd594d9e667e43a47d Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Tue, 14 Jan 2020 17:23:40 +0800 Subject: [PATCH 2390/3715] tcp: fix marked lost packets not being retransmitted [ Upstream commit e176b1ba476cf36f723cfcc7a9e57f3cb47dec70 ] When the packet pointed to by retransmit_skb_hint is unlinked by ACK, retransmit_skb_hint will be set to NULL in tcp_clean_rtx_queue(). If packet loss is detected at this time, retransmit_skb_hint will be set to point to the current packet loss in tcp_verify_retransmit_hint(), then the packets that were previously marked lost but not retransmitted due to the restriction of cwnd will be skipped and cannot be retransmitted. To fix this, when retransmit_skb_hint is NULL, retransmit_skb_hint can be reset only after all marked lost packets are retransmitted (retrans_out >= lost_out), otherwise we need to traverse from tcp_rtx_queue_head in tcp_xmit_retransmit_queue(). Packetdrill to demonstrate: // Disable RACK and set max_reordering to keep things simple 0 `sysctl -q net.ipv4.tcp_recovery=0` +0 `sysctl -q net.ipv4.tcp_max_reordering=3` // Establish a connection +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +.1 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 <...> +.01 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 // Send 8 data segments +0 write(4, ..., 8000) = 8000 +0 > P. 1:8001(8000) ack 1 // Enter recovery and 1:3001 is marked lost +.01 < . 1:1(0) ack 1 win 257 +0 < . 1:1(0) ack 1 win 257 +0 < . 1:1(0) ack 1 win 257 // Retransmit 1:1001, now retransmit_skb_hint points to 1001:2001 +0 > . 1:1001(1000) ack 1 // 1001:2001 was ACKed causing retransmit_skb_hint to be set to NULL +.01 < . 1:1(0) ack 2001 win 257 // Now retransmit_skb_hint points to 4001:5001 which is now marked lost // BUG: 2001:3001 was not retransmitted +0 > . 2001:3001(1000) ack 1 Signed-off-by: Pengcheng Yang Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d2b1c39c4223..29f3df4ddd1f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -932,9 +932,10 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if (!tp->retransmit_skb_hint || - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || + (tp->retransmit_skb_hint && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq))) tp->retransmit_skb_hint = skb; } From 6adce8341393aa6bd7d51d23eb2c93aef91ea7cd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Dec 2019 13:14:44 -0700 Subject: [PATCH 2391/3715] xen/blkfront: Adjust indentation in xlvbd_alloc_gendisk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 589b72894f53124a39d1bb3c0cecaf9dcabac417 upstream. Clang warns: ../drivers/block/xen-blkfront.c:1117:4: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] nr_parts = PARTS_PER_DISK; ^ ../drivers/block/xen-blkfront.c:1115:3: note: previous statement is here if (err) ^ This is because there is a space at the beginning of this line; remove it so that the indentation is consistent according to the Linux kernel coding style and clang no longer warns. While we are here, the previous line has some trailing whitespace; clean that up as well. Fixes: c80a420995e7 ("xen-blkfront: handle Xen major numbers other than XENVBD") Link: https://github.com/ClangBuiltLinux/linux/issues/791 Signed-off-by: Nathan Chancellor Reviewed-by: Juergen Gross Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 32ac5f551e55..e6887714fe0a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1115,8 +1115,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (!VDEV_IS_EXTENDED(info->vdevice)) { err = xen_translate_vdev(info->vdevice, &minor, &offset); if (err) - return err; - nr_parts = PARTS_PER_DISK; + return err; + nr_parts = PARTS_PER_DISK; } else { minor = BLKIF_MINOR_EXT(info->vdevice); nr_parts = PARTS_PER_EXT_DISK; From f5a62384b51cc1f37d7eae781283696557f56d10 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Oct 2019 14:45:01 +0300 Subject: [PATCH 2392/3715] cw1200: Fix a signedness bug in cw1200_load_firmware() commit 4a50d454502f1401171ff061a5424583f91266db upstream. The "priv->hw_type" is an enum and in this context GCC will treat it as an unsigned int so the error handling will never trigger. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/st/cw1200/fwio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/fwio.c b/drivers/net/wireless/st/cw1200/fwio.c index 30e7646d04af..16be7fa82a23 100644 --- a/drivers/net/wireless/st/cw1200/fwio.c +++ b/drivers/net/wireless/st/cw1200/fwio.c @@ -323,12 +323,12 @@ int cw1200_load_firmware(struct cw1200_common *priv) goto out; } - priv->hw_type = cw1200_get_hw_type(val32, &major_revision); - if (priv->hw_type < 0) { + ret = cw1200_get_hw_type(val32, &major_revision); + if (ret < 0) { pr_err("Can't deduce hardware type.\n"); - ret = -ENOTSUPP; goto out; } + priv->hw_type = ret; /* Set DPLL Reg value, and read back to confirm writes work */ ret = cw1200_reg_write_32(priv, ST90TDS_TSET_GEN_R_W_REG_ID, From bde97eaa4b3686910a066b024595e1ffac202b1a Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 9 Sep 2019 19:01:22 +0400 Subject: [PATCH 2393/3715] arm64: dts: meson-gxl-s905x-khadas-vim: fix gpio-keys-polled node commit d5f6fa904ecbadbb8e9fa6302b0fc165bec0559a upstream. Fix DTC warnings: arch/arm/dts/meson-gxl-s905x-khadas-vim.dtb: Warning (avoid_unnecessary_addr_size): /gpio-keys-polled: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Fixes: e15d2774b8c0 ("ARM64: dts: meson-gxl: add support for the Khadas VIM board") Signed-off-by: Christian Hewitt Reviewed-by: Kevin Hilman Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index fb5db5f33e8c..ce4a116382bf 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -33,11 +33,9 @@ gpio-keys-polled { compatible = "gpio-keys-polled"; - #address-cells = <1>; - #size-cells = <0>; poll-interval = <100>; - button@0 { + power-button { label = "power"; linux,code = ; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; From 162b3b1a0b10e5cefc4c8b83f766b54ab2e43b39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Jan 2020 12:53:59 +0100 Subject: [PATCH 2394/3715] cfg80211: check for set_wiphy_params commit 24953de0a5e31dcca7e82c8a3c79abc2dfe8fb6e upstream. Check if set_wiphy_params is assigned and return an error if not, some drivers (e.g. virt_wifi where syzbot reported it) don't have it. Reported-by: syzbot+e8a797964a4180eb57d5@syzkaller.appspotmail.com Reported-by: syzbot+34b582cf32c1db008f8e@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200113125358.ac07f276efff.Ibd85ee1b12e47b9efb00a2adc5cd3fac50da791a@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/rdev-ops.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 0c06240d25af..249919bdfc64 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -537,6 +537,10 @@ static inline int rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) { int ret; + + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; + trace_rdev_set_wiphy_params(&rdev->wiphy, changed); ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); trace_rdev_return_int(&rdev->wiphy, ret); From f32935bcfd73851138595e25351f2139ded24fc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2019 20:56:19 -0800 Subject: [PATCH 2395/3715] tick/sched: Annotate lockless access to last_jiffies_update commit de95a991bb72e009f47e0c4bbc90fc5f594588d5 upstream. syzbot (KCSAN) reported a data-race in tick_do_update_jiffies64(): BUG: KCSAN: data-race in tick_do_update_jiffies64 / tick_do_update_jiffies64 write to 0xffffffff8603d008 of 8 bytes by interrupt on cpu 1: tick_do_update_jiffies64+0x100/0x250 kernel/time/tick-sched.c:73 tick_sched_do_timer+0xd4/0xe0 kernel/time/tick-sched.c:138 tick_sched_timer+0x43/0xe0 kernel/time/tick-sched.c:1292 __run_hrtimer kernel/time/hrtimer.c:1514 [inline] __hrtimer_run_queues+0x274/0x5f0 kernel/time/hrtimer.c:1576 hrtimer_interrupt+0x22a/0x480 kernel/time/hrtimer.c:1638 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1110 [inline] smp_apic_timer_interrupt+0xdc/0x280 arch/x86/kernel/apic/apic.c:1135 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 arch_local_irq_restore arch/x86/include/asm/paravirt.h:756 [inline] kcsan_setup_watchpoint+0x1d4/0x460 kernel/kcsan/core.c:436 check_access kernel/kcsan/core.c:466 [inline] __tsan_read1 kernel/kcsan/core.c:593 [inline] __tsan_read1+0xc2/0x100 kernel/kcsan/core.c:593 kallsyms_expand_symbol.constprop.0+0x70/0x160 kernel/kallsyms.c:79 kallsyms_lookup_name+0x7f/0x120 kernel/kallsyms.c:170 insert_report_filterlist kernel/kcsan/debugfs.c:155 [inline] debugfs_write+0x14b/0x2d0 kernel/kcsan/debugfs.c:256 full_proxy_write+0xbd/0x100 fs/debugfs/file.c:225 __vfs_write+0x67/0xc0 fs/read_write.c:494 vfs_write fs/read_write.c:558 [inline] vfs_write+0x18a/0x390 fs/read_write.c:542 ksys_write+0xd5/0x1b0 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __x64_sys_write+0x4c/0x60 fs/read_write.c:620 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffffffff8603d008 of 8 bytes by task 0 on cpu 0: tick_do_update_jiffies64+0x2b/0x250 kernel/time/tick-sched.c:62 tick_nohz_update_jiffies kernel/time/tick-sched.c:505 [inline] tick_nohz_irq_enter kernel/time/tick-sched.c:1257 [inline] tick_irq_enter+0x139/0x1c0 kernel/time/tick-sched.c:1274 irq_enter+0x4f/0x60 kernel/softirq.c:354 entering_irq arch/x86/include/asm/apic.h:517 [inline] entering_ack_irq arch/x86/include/asm/apic.h:523 [inline] smp_apic_timer_interrupt+0x55/0x280 arch/x86/kernel/apic/apic.c:1133 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:60 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc7+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Use READ_ONCE() and WRITE_ONCE() to annotate this expected race. Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191205045619.204946-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a8fa0a896b78..3c7b400512eb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -60,8 +60,9 @@ static void tick_do_update_jiffies64(ktime_t now) /* * Do a quick check without holding jiffies_lock: + * The READ_ONCE() pairs with two updates done later in this function. */ - delta = ktime_sub(now, last_jiffies_update); + delta = ktime_sub(now, READ_ONCE(last_jiffies_update)); if (delta < tick_period) return; @@ -72,8 +73,9 @@ static void tick_do_update_jiffies64(ktime_t now) if (delta >= tick_period) { delta = ktime_sub(delta, tick_period); - last_jiffies_update = ktime_add(last_jiffies_update, - tick_period); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add(last_jiffies_update, tick_period)); /* Slow path for long timeouts */ if (unlikely(delta >= tick_period)) { @@ -81,8 +83,10 @@ static void tick_do_update_jiffies64(ktime_t now) ticks = ktime_divns(delta, incr); - last_jiffies_update = ktime_add_ns(last_jiffies_update, - incr * ticks); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add_ns(last_jiffies_update, + incr * ticks)); } do_timer(++ticks); From 4f2b5f109b247d5afe50d28c93efba9ec8f61be5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 28 Nov 2019 15:33:57 +0000 Subject: [PATCH 2396/3715] Revert "arm64: dts: juno: add dma-ranges property" commit 54fb3fe0f211d4729a2551cf9497bd612189af9d upstream. This reverts commit 193d00a2b35ee3353813b4006a18131122087205. Commit 951d48855d86 ("of: Make of_dma_get_range() work on bus nodes") reworked the logic such that of_dma_get_range() works correctly starting from a bus node containing "dma-ranges". Since on Juno we don't have a SoC level bus node and "dma-ranges" is present only in the root node, we get the following error: OF: translation of DMA address(0) to CPU address failed node(/sram@2e000000) OF: translation of DMA address(0) to CPU address failed node(/uart@7ff80000) ... OF: translation of DMA address(0) to CPU address failed node(/mhu@2b1f0000) OF: translation of DMA address(0) to CPU address failed node(/iommu@2b600000) OF: translation of DMA address(0) to CPU address failed node(/iommu@2b600000) OF: translation of DMA address(0) to CPU address failed node(/iommu@2b600000) So let's fix it by dropping the "dma-ranges" property for now. This should be fine since it doesn't represent any kind of device-visible restriction; it was only there for completeness, and we've since given in to the assumption that missing "dma-ranges" implies a 1:1 mapping anyway. We can add it later with a proper SoC bus node and moving all the devices that belong there along with the "dma-ranges" if required. Fixes: 193d00a2b35e ("arm64: dts: juno: add dma-ranges property") Cc: Rob Herring Cc: Liviu Dudau Cc: Lorenzo Pieralisi Acked-by: Robin Murphy Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/arm/juno-base.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index f165f04db0c9..13ee8ffa9bbf 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -5,7 +5,6 @@ /* * Devices shared by all Juno boards */ - dma-ranges = <0 0 0 0 0x100 0>; memtimer: timer@2a810000 { compatible = "arm,armv7-timer-mem"; From f6c6d170f947638e45166efc3e13e97a70067460 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 15 Jan 2020 13:00:59 -0500 Subject: [PATCH 2397/3715] reiserfs: fix handling of -EOPNOTSUPP in reiserfs_for_each_xattr commit 394440d469413fa9b74f88a11f144d76017221f2 upstream. Commit 60e4cf67a58 (reiserfs: fix extended attributes on the root directory) introduced a regression open_xa_root started returning -EOPNOTSUPP but it was not handled properly in reiserfs_for_each_xattr. When the reiserfs module is built without CONFIG_REISERFS_FS_XATTR, deleting an inode would result in a warning and chowning an inode would also result in a warning and then fail to complete. With CONFIG_REISERFS_FS_XATTR enabled, the xattr root would always be present for read-write operations. This commit handles -EOPNOSUPP in the same way -ENODATA is handled. Fixes: 60e4cf67a582 ("reiserfs: fix extended attributes on the root directory") CC: stable@vger.kernel.org # Commit 60e4cf67a58 was picked up by stable Link: https://lore.kernel.org/r/20200115180059.6935-1-jeffm@suse.com Reported-by: Michael Brunnbauer Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/xattr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 29a0c0969e91..28f6daf371d3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -319,8 +319,12 @@ static int reiserfs_for_each_xattr(struct inode *inode, out_dir: dput(dir); out: - /* -ENODATA isn't an error */ - if (err == -ENODATA) + /* + * -ENODATA: this object doesn't have any xattrs + * -EOPNOTSUPP: this file system doesn't have xattrs enabled on disk. + * Neither are errors + */ + if (err == -ENODATA || err == -EOPNOTSUPP) err = 0; return err; } From bf94a6aa2aace00d656389d44375a124892a77ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 Oct 2019 13:23:24 +0300 Subject: [PATCH 2398/3715] scsi: esas2r: unlock on error in esas2r_nvram_read_direct() commit 906ca6353ac09696c1bf0892513c8edffff5e0a6 upstream. This error path is missing an unlock. Fixes: 26780d9e12ed ("[SCSI] esas2r: ATTO Technology ExpressSAS 6G SAS/SATA RAID Adapter Driver") Link: https://lore.kernel.org/r/20191022102324.GA27540@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/esas2r/esas2r_flash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/esas2r/esas2r_flash.c b/drivers/scsi/esas2r/esas2r_flash.c index 7bd376d95ed5..b02ac389e6c6 100644 --- a/drivers/scsi/esas2r/esas2r_flash.c +++ b/drivers/scsi/esas2r/esas2r_flash.c @@ -1197,6 +1197,7 @@ bool esas2r_nvram_read_direct(struct esas2r_adapter *a) if (!esas2r_read_flash_block(a, a->nvram, FLS_OFFSET_NVR, sizeof(struct esas2r_sas_nvram))) { esas2r_hdebug("NVRAM read failed, using defaults"); + up(&a->nvram_semaphore); return false; } From f88c50ceca6d4cc777b3f6e0d49c4cdcfc7b67f6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 5 Nov 2019 17:25:27 +0800 Subject: [PATCH 2399/3715] scsi: qla4xxx: fix double free bug commit 3fe3d2428b62822b7b030577cd612790bdd8c941 upstream. The variable init_fw_cb is released twice, resulting in a double free bug. The call to the function dma_free_coherent() before goto is removed to get rid of potential double free. Fixes: 2a49a78ed3c8 ("[SCSI] qla4xxx: added IPv6 support.") Link: https://lore.kernel.org/r/1572945927-27796-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla4xxx/ql4_mbx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 1da04f323d38..c402fc583da3 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -641,9 +641,6 @@ int qla4xxx_initialize_fw_cb(struct scsi_qla_host * ha) if (qla4xxx_get_ifcb(ha, &mbox_cmd[0], &mbox_sts[0], init_fw_cb_dma) != QLA_SUCCESS) { - dma_free_coherent(&ha->pdev->dev, - sizeof(struct addr_ctrl_blk), - init_fw_cb, init_fw_cb_dma); goto exit_init_fw_cb; } From 3bf34ef5202275b5515a5756b59e10094b4965fe Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 20:32:21 +0800 Subject: [PATCH 2400/3715] scsi: bnx2i: fix potential use after free commit 29d28f2b8d3736ac61c28ef7e20fda63795b74d9 upstream. The member hba->pcidev may be used after its reference is dropped. Move the put function to where it is never used to avoid potential use after free issues. Fixes: a77171806515 ("[SCSI] bnx2i: Removed the reference to the netdev->base_addr") Link: https://lore.kernel.org/r/1573043541-19126-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bnx2i/bnx2i_iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 03c104b47f31..b832bd0ce202 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -915,12 +915,12 @@ void bnx2i_free_hba(struct bnx2i_hba *hba) INIT_LIST_HEAD(&hba->ep_ofld_list); INIT_LIST_HEAD(&hba->ep_active_list); INIT_LIST_HEAD(&hba->ep_destroy_list); - pci_dev_put(hba->pcidev); if (hba->regview) { pci_iounmap(hba->pcidev, hba->regview); hba->regview = NULL; } + pci_dev_put(hba->pcidev); bnx2i_free_mp_bdt(hba); bnx2i_release_free_cid_que(hba); iscsi_host_free(shost); From 3cb816cf0e2409d669be267a998fce3cf0e5690b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 7 Nov 2019 13:55:25 -0800 Subject: [PATCH 2401/3715] scsi: target: core: Fix a pr_debug() argument commit c941e0d172605731de9b4628bd4146d35cf2e7d6 upstream. Print the string for which conversion failed instead of printing the function name twice. Fixes: 2650d71e244f ("target: move transport ID handling to the core") Cc: Christoph Hellwig Link: https://lore.kernel.org/r/20191107215525.64415-1-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_fabric_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 508da345b73f..95aa47ac4dcd 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -131,7 +131,7 @@ static int srp_get_pr_transport_id( memset(buf + 8, 0, leading_zero_bytes); rc = hex2bin(buf + 8 + leading_zero_bytes, p, count); if (rc < 0) { - pr_debug("hex2bin failed for %s: %d\n", __func__, rc); + pr_debug("hex2bin failed for %s: %d\n", p, rc); return rc; } From ba0ef168a2635cb1c80b30c185ddf06366223e11 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 Nov 2019 13:40:47 +0800 Subject: [PATCH 2402/3715] scsi: qla2xxx: Fix qla2x00_request_irqs() for MSI commit 45dc8f2d9c94ed74a5e31e63e9136a19a7e16081 upstream. Commit 4fa183455988 ("scsi: qla2xxx: Utilize pci_alloc_irq_vectors/ pci_free_irq_vectors calls.") use pci_alloc_irq_vectors() to replace pci_enable_msi() but it didn't handle the return value correctly. This bug make qla2x00 always fail to setup MSI if MSI-X fail, so fix it. BTW, improve the log message of return value in qla2x00_request_irqs() to avoid confusion. Fixes: 4fa183455988 ("scsi: qla2xxx: Utilize pci_alloc_irq_vectors/pci_free_irq_vectors calls.") Cc: Michael Hernandez Link: https://lore.kernel.org/r/1574314847-14280-1-git-send-email-chenhc@lemote.com Signed-off-by: Huacai Chen Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_isr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 7f2da56274bd..648916a9082c 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3519,7 +3519,7 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp) skip_msix: ql_log(ql_log_info, vha, 0x0037, - "Falling back-to MSI mode -%d.\n", ret); + "Falling back-to MSI mode -- ret=%d.\n", ret); if (!IS_QLA24XX(ha) && !IS_QLA2532(ha) && !IS_QLA8432(ha) && !IS_QLA8001(ha) && !IS_P3P_TYPE(ha) && !IS_QLAFX00(ha) && @@ -3527,13 +3527,13 @@ skip_msix: goto skip_msi; ret = pci_alloc_irq_vectors(ha->pdev, 1, 1, PCI_IRQ_MSI); - if (!ret) { + if (ret > 0) { ql_dbg(ql_dbg_init, vha, 0x0038, "MSI: Enabled.\n"); ha->flags.msi_enabled = 1; } else ql_log(ql_log_warn, vha, 0x0039, - "Falling back-to INTa mode -- %d.\n", ret); + "Falling back-to INTa mode -- ret=%d.\n", ret); skip_msi: /* Skip INTx on ISP82xx. */ From 12b61fb73c7c02ba89b722d95a25619473684ff8 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Fri, 22 Nov 2019 22:19:22 +0000 Subject: [PATCH 2403/3715] scsi: qla2xxx: fix rports not being mark as lost in sync fabric scan commit d341e9a8f2cffe4000c610225c629f62c7489c74 upstream. In qla2x00_find_all_fabric_devs(), fcport->flags & FCF_LOGIN_NEEDED is a necessary condition for logging into new rports, but not for dropping lost ones. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Link: https://lore.kernel.org/r/20191122221912.20100-2-martin.wilck@suse.com Tested-by: David Bond Signed-off-by: Martin Wilck Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index bd2421863510..a66f7cec797c 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5145,8 +5145,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha) if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; - if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 || - (fcport->flags & FCF_LOGIN_NEEDED) == 0) + if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; if (fcport->scan_state == QLA_FCPORT_SCAN) { @@ -5171,7 +5170,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha) } } - if (fcport->scan_state == QLA_FCPORT_FOUND) + if (fcport->scan_state == QLA_FCPORT_FOUND && + (fcport->flags & FCF_LOGIN_NEEDED) != 0) qla24xx_fcport_handle_login(vha, fcport); } return (rval); From 1958113a576a0d9d88f267f64fb2167dfb649d0e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 1 Nov 2019 14:14:47 -0700 Subject: [PATCH 2404/3715] scsi: core: scsi_trace: Use get_unaligned_be*() commit b1335f5b0486f61fb66b123b40f8e7a98e49605d upstream. This patch fixes an unintended sign extension on left shifts. From Colin King: "Shifting a u8 left will cause the value to be promoted to an integer. If the top bit of the u8 is set then the following conversion to an u64 will sign extend the value causing the upper 32 bits to be set in the result." Fix this by using get_unaligned_be*() instead. Fixes: bf8162354233 ("[SCSI] add scsi trace core functions and put trace points") Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Douglas Gilbert Link: https://lore.kernel.org/r/20191101211447.187151-1-bvanassche@acm.org Reported-by: Colin Ian King Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_trace.c | 101 ++++++++++---------------------------- 1 file changed, 27 insertions(+), 74 deletions(-) diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c index 617a60737590..22472d140ef7 100644 --- a/drivers/scsi/scsi_trace.c +++ b/drivers/scsi/scsi_trace.c @@ -21,7 +21,7 @@ #include #define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) -#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9]) +#define SERVICE_ACTION32(cdb) (get_unaligned_be16(&cdb[8])) static const char * scsi_trace_misc(struct trace_seq *, unsigned char *, int); @@ -51,17 +51,12 @@ static const char * scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba, txlen; - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[7] << 8); - txlen |= cdb[8]; + lba = get_unaligned_be32(&cdb[2]); + txlen = get_unaligned_be16(&cdb[7]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%u txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); if (cdb[0] == WRITE_SAME) @@ -76,19 +71,12 @@ static const char * scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba, txlen; - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[6] << 24); - txlen |= (cdb[7] << 16); - txlen |= (cdb[8] << 8); - txlen |= cdb[9]; + lba = get_unaligned_be32(&cdb[2]); + txlen = get_unaligned_be32(&cdb[6]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%u txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); trace_seq_putc(p, 0); @@ -99,23 +87,13 @@ static const char * scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u64 lba; + u32 txlen; - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - txlen |= (cdb[10] << 24); - txlen |= (cdb[11] << 16); - txlen |= (cdb[12] << 8); - txlen |= cdb[13]; + lba = get_unaligned_be64(&cdb[2]); + txlen = get_unaligned_be32(&cdb[10]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%llu txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); if (cdb[0] == WRITE_SAME_16) @@ -130,8 +108,8 @@ static const char * scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p), *cmd; - sector_t lba = 0, txlen = 0; - u32 ei_lbrt = 0; + u64 lba; + u32 ei_lbrt, txlen; switch (SERVICE_ACTION32(cdb)) { case READ_32: @@ -151,26 +129,12 @@ scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) goto out; } - lba |= ((u64)cdb[12] << 56); - lba |= ((u64)cdb[13] << 48); - lba |= ((u64)cdb[14] << 40); - lba |= ((u64)cdb[15] << 32); - lba |= (cdb[16] << 24); - lba |= (cdb[17] << 16); - lba |= (cdb[18] << 8); - lba |= cdb[19]; - ei_lbrt |= (cdb[20] << 24); - ei_lbrt |= (cdb[21] << 16); - ei_lbrt |= (cdb[22] << 8); - ei_lbrt |= cdb[23]; - txlen |= (cdb[28] << 24); - txlen |= (cdb[29] << 16); - txlen |= (cdb[30] << 8); - txlen |= cdb[31]; + lba = get_unaligned_be64(&cdb[12]); + ei_lbrt = get_unaligned_be32(&cdb[20]); + txlen = get_unaligned_be32(&cdb[28]); - trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u", - cmd, (unsigned long long)lba, - (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt); + trace_seq_printf(p, "%s_32 lba=%llu txlen=%u protect=%u ei_lbrt=%u", + cmd, lba, txlen, cdb[10] >> 5, ei_lbrt); if (SERVICE_ACTION32(cdb) == WRITE_SAME_32) trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1); @@ -185,7 +149,7 @@ static const char * scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - unsigned int regions = cdb[7] << 8 | cdb[8]; + unsigned int regions = get_unaligned_be16(&cdb[7]); trace_seq_printf(p, "regions=%u", (regions - 8) / 16); trace_seq_putc(p, 0); @@ -197,8 +161,8 @@ static const char * scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p), *cmd; - sector_t lba = 0; - u32 alloc_len = 0; + u64 lba; + u32 alloc_len; switch (SERVICE_ACTION16(cdb)) { case SAI_READ_CAPACITY_16: @@ -212,21 +176,10 @@ scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) goto out; } - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - alloc_len |= (cdb[10] << 24); - alloc_len |= (cdb[11] << 16); - alloc_len |= (cdb[12] << 8); - alloc_len |= cdb[13]; + lba = get_unaligned_be64(&cdb[2]); + alloc_len = get_unaligned_be32(&cdb[10]); - trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, - (unsigned long long)lba, alloc_len); + trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, lba, alloc_len); out: trace_seq_putc(p, 0); From 376bbcf2718ee48cc020771b0d3057ad6a27eb93 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Oct 2019 17:46:25 +0900 Subject: [PATCH 2405/3715] perf probe: Fix wrong address verification commit 07d369857808b7e8e471bbbbb0074a6718f89b31 upstream. Since there are some DIE which has only ranges instead of the combination of entrypc/highpc, address verification must use dwarf_haspc() instead of dwarf_entrypc/dwarf_highpc. Also, the ranges only DIE will have a partial code in different section (e.g. unlikely code will be in text.unlikely as "FUNC.cold" symbol). In that case, we can not use dwarf_entrypc() or die_entrypc(), because the offset from original DIE can be a minus value. Instead, this simply gets the symbol and offset from symtab. Without this patch; # perf probe -D clear_tasks_mm_cpumask:1 Failed to get entry address of clear_tasks_mm_cpumask Error: Failed to add events. And with this patch: # perf probe -D clear_tasks_mm_cpumask:1 p:probe/clear_tasks_mm_cpumask clear_tasks_mm_cpumask+0 p:probe/clear_tasks_mm_cpumask_1 clear_tasks_mm_cpumask+5 p:probe/clear_tasks_mm_cpumask_2 clear_tasks_mm_cpumask+8 p:probe/clear_tasks_mm_cpumask_3 clear_tasks_mm_cpumask+16 p:probe/clear_tasks_mm_cpumask_4 clear_tasks_mm_cpumask+82 Committer testing: I managed to reproduce the above: [root@quaco ~]# perf probe -D clear_tasks_mm_cpumask:1 p:probe/clear_tasks_mm_cpumask _text+919968 p:probe/clear_tasks_mm_cpumask_1 _text+919973 p:probe/clear_tasks_mm_cpumask_2 _text+919976 [root@quaco ~]# But then when trying to actually put the probe in place, it fails if I use :0 as the offset: [root@quaco ~]# perf probe -L clear_tasks_mm_cpumask | head -5 0 void clear_tasks_mm_cpumask(int cpu) 1 { 2 struct task_struct *p; [root@quaco ~]# perf probe clear_tasks_mm_cpumask:0 Probe point 'clear_tasks_mm_cpumask' not found. Error: Failed to add events. [root@quaco The next patch is needed to fix this case. Fixes: 576b523721b7 ("perf probe: Fix probing symbols with optimization suffix") Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/157199318513.8075.10463906803299647907.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-finder.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 30a5e92b67bd..893193bd28c1 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -615,38 +615,26 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, const char *function, struct probe_trace_point *tp) { - Dwarf_Addr eaddr, highaddr; + Dwarf_Addr eaddr; GElf_Sym sym; const char *symbol; /* Verify the address is correct */ - if (dwarf_entrypc(sp_die, &eaddr) != 0) { - pr_warning("Failed to get entry address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (dwarf_highpc(sp_die, &highaddr) != 0) { - pr_warning("Failed to get end address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (paddr > highaddr) { - pr_warning("Offset specified is greater than size of %s\n", + if (!dwarf_haspc(sp_die, paddr)) { + pr_warning("Specified offset is out of %s\n", dwarf_diename(sp_die)); return -EINVAL; } - symbol = dwarf_diename(sp_die); + /* Try to get actual symbol name from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); if (!symbol) { - /* Try to get the symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); - if (!symbol) { - pr_warning("Failed to find symbol at 0x%lx\n", - (unsigned long)paddr); - return -ENOENT; - } - eaddr = sym.st_value; + pr_warning("Failed to find symbol at 0x%lx\n", + (unsigned long)paddr); + return -ENOENT; } + eaddr = sym.st_value; + tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; tp->symbol = strdup(symbol); From 9513f5a492b9e766eaac9aec0e573d070ba62af0 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Nov 2019 18:31:25 +0100 Subject: [PATCH 2406/3715] regulator: ab8500: Remove SYSCLKREQ from enum ab8505_regulator_id commit 458ea3ad033fc86e291712ce50cbe60c3428cf30 upstream. Those regulators are not actually supported by the AB8500 regulator driver. There is no ab8500_regulator_info for them and no entry in ab8505_regulator_match. As such, they cannot be registered successfully, and looking them up in ab8505_regulator_match causes an out-of-bounds array read. Fixes: 547f384f33db ("regulator: ab8500: add support for ab8505") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20191106173125.14496-2-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/linux/regulator/ab8500.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 260c4aa1d976..3f6b8b9ef49d 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -43,8 +43,6 @@ enum ab8505_regulator_id { AB8505_LDO_ANAMIC2, AB8505_LDO_AUX8, AB8505_LDO_ANA, - AB8505_SYSCLKREQ_2, - AB8505_SYSCLKREQ_4, AB8505_NUM_REGULATORS, }; From 8bac50406cca10a219aa899243d49c57ddaf7c5b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 23 Jan 2020 08:20:37 +0100 Subject: [PATCH 2407/3715] Linux 4.14.167 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7c62b4078c1b..3e8eaabf2bcb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 166 +SUBLEVEL = 167 EXTRAVERSION = NAME = Petit Gorille From 6fc3e71c91ef7030e70e4e5dad135841f32b9e4b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 14 Oct 2019 14:11:57 -0700 Subject: [PATCH 2408/3715] UPSTREAM: mm/slub.c: init_on_free=1 should wipe freelist ptr for bulk allocations Upstream commit 0f181f9fbea8bc7ea2f7e13ae7f8c256b39e254c. slab_alloc_node() already zeroed out the freelist pointer if init_on_free was on. Thibaut Sautereau noticed that the same needs to be done for kmem_cache_alloc_bulk(), which performs the allocations separately. kmem_cache_alloc_bulk() is currently used in two places in the kernel, so this change is unlikely to have a major performance impact. SLAB doesn't require a similar change, as auto-initialization makes the allocator store the freelist pointers off-slab. Link: http://lkml.kernel.org/r/20191007091605.30530-1-glider@google.com Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Signed-off-by: Alexander Potapenko Reported-by: Thibaut Sautereau Reported-by: Kees Cook Cc: Christoph Lameter Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 138435492 Test: Boot an ARM64 mobile device with and without init_on_alloc=1 Change-Id: I05281c27d830867567d47d6a52faec7dc55e82fa Signed-off-by: Alexander Potapenko --- mm/slub.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 6f4d7d869a07..f9957084042a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2691,6 +2691,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return p; } +/* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call @@ -2779,12 +2790,8 @@ redo: prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } - /* - * If the object has been wiped upon free, make sure it's fully - * initialized by zeroing out freelist pointer. - */ - if (unlikely(slab_want_init_on_free(s)) && object) - memset(object + s->offset, 0, sizeof(void *)); + + maybe_wipe_obj_freeptr(s, object); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); @@ -3199,10 +3206,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, goto error; c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + maybe_wipe_obj_freeptr(s, p[i]); } c->tid = next_tid(c->tid); local_irq_enable(); From 1871e5516480b7959f8570c7c105516ad39fa5d9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 15 Nov 2019 17:34:50 -0800 Subject: [PATCH 2409/3715] UPSTREAM: mm: slub: really fix slab walking for init_on_free Upstream commit aea4df4c53f754cc229edde6c5465e481311cc49. Commit 1b7e816fc80e ("mm: slub: Fix slab walking for init_on_free") fixed one problem with the slab walking but missed a key detail: When walking the list, the head and tail pointers need to be updated since we end up reversing the list as a result. Without doing this, bulk free is broken. One way this is exposed is a NULL pointer with slub_debug=F: ============================================================================= BUG skbuff_head_cache (Tainted: G T): Object already free ----------------------------------------------------------------------------- INFO: Slab 0x000000000d2d2f8f objects=16 used=3 fp=0x0000000064309071 flags=0x3fff00000000201 BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: 0000 [#1] PREEMPT SMP PTI RIP: 0010:print_trailer+0x70/0x1d5 Call Trace: free_debug_processing.cold.37+0xc9/0x149 __slab_free+0x22a/0x3d0 kmem_cache_free_bulk+0x415/0x420 __kfree_skb_flush+0x30/0x40 net_rx_action+0x2dd/0x480 __do_softirq+0xf0/0x246 irq_exit+0x93/0xb0 do_IRQ+0xa0/0x110 common_interrupt+0xf/0xf Given we're now almost identical to the existing debugging code which correctly walks the list, combine with that. Link: https://lkml.kernel.org/r/20191104170303.GA50361@gandi.net Link: http://lkml.kernel.org/r/20191106222208.26815-1-labbott@redhat.com Fixes: 1b7e816fc80e ("mm: slub: Fix slab walking for init_on_free") Signed-off-by: Laura Abbott Reported-by: Thibaut Sautereau Acked-by: David Rientjes Tested-by: Alexander Potapenko Acked-by: Alexander Potapenko Cc: Kees Cook Cc: "David S. Miller" Cc: Vlastimil Babka Cc: Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 138435492 Test: Boot an ARM64 mobile device with and without init_on_alloc=1 Change-Id: I33bbdadfe85ed73a70ba9edbd708105492a1b08a Signed-off-by: Alexander Potapenko --- mm/slub.c | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f9957084042a..22704e373480 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1416,12 +1416,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; int rsize; - if (slab_want_init_on_free(s)) { - void *p = NULL; + /* Head and tail of the reconstructed freelist */ + *head = NULL; + *tail = NULL; - do { - object = next; - next = get_freepointer(s, object); + do { + object = next; + next = get_freepointer(s, object); + + if (slab_want_init_on_free(s)) { /* * Clear the object and the metadata, but don't touch * the redzone. @@ -1431,29 +1434,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - set_freepointer(s, object, p); - p = object; - } while (object != old_tail); - } -/* - * Compiler cannot detect this function can be removed if slab_free_hook() - * evaluates to nothing. Thus, catch all relevant config debug options here. - */ -#if defined(CONFIG_LOCKDEP) || \ - defined(CONFIG_DEBUG_KMEMLEAK) || \ - defined(CONFIG_DEBUG_OBJECTS_FREE) || \ - defined(CONFIG_KASAN) - - next = *head; - - /* Head and tail of the reconstructed freelist */ - *head = NULL; - *tail = NULL; - - do { - object = next; - next = get_freepointer(s, object); + } /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { /* Move object to the new freelist */ @@ -1468,9 +1450,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *tail = NULL; return *head != NULL; -#else - return true; -#endif } static void *setup_object(struct kmem_cache *s, struct page *page, From 6cef48327f062ddc4e9978454e1d99dcbf53f401 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 14 Oct 2019 14:12:00 -0700 Subject: [PATCH 2410/3715] UPSTREAM: lib/test_meminit: add a kmem_cache_alloc_bulk() test Upstream commit 03a9349ac0e095dea6ef8b5b7b14f9c23e5fabe6. Make sure allocations from kmem_cache_alloc_bulk() and kmem_cache_free_bulk() are properly initialized. Link: http://lkml.kernel.org/r/20191007091605.30530-2-glider@google.com Signed-off-by: Alexander Potapenko Cc: Kees Cook Cc: Christoph Lameter Cc: Laura Abbott Cc: Thibaut Sautereau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 138435492 Test: Boot an ARM64 mobile device with and without init_on_alloc=1 Change-Id: I92476545a6212483136e58008736f333b0b36217 Signed-off-by: Alexander Potapenko --- lib/test_meminit.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9729f271d150..9742e5cb853a 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -297,6 +297,32 @@ out: return 1; } +static int __init do_kmem_cache_size_bulk(int size, int *total_failures) +{ + struct kmem_cache *c; + int i, iter, maxiter = 1024; + int num, bytes; + bool fail = false; + void *objects[10]; + + c = kmem_cache_create("test_cache", size, size, 0, NULL); + for (iter = 0; (iter < maxiter) && !fail; iter++) { + num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects), + objects); + for (i = 0; i < num; i++) { + bytes = count_nonzero_bytes(objects[i], size); + if (bytes) + fail = true; + fill_with_garbage(objects[i], size); + } + + if (num) + kmem_cache_free_bulk(c, num, objects); + } + *total_failures += fail; + return 1; +} + /* * Test kmem_cache allocation by creating caches of different sizes, with and * without constructors, with and without SLAB_TYPESAFE_BY_RCU. @@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures) num_tests += do_kmem_cache_size(size, ctor, rcu, zero, &failures); } + num_tests += do_kmem_cache_size_bulk(size, &failures); } REPORT_FAILURES_IN_FN(); *total_failures += failures; From 871760779918efb0ece64890d9a5e9b0fcbda57d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 4 Dec 2019 16:51:53 -0800 Subject: [PATCH 2411/3715] UPSTREAM: lib/test_meminit.c: add bulk alloc/free tests Upstream commit dc5c5ad79f0cc2d8756d161dbdee7b370f35f5bb. kmem_cache_alloc_bulk/kmem_cache_free_bulk are used to make multiple allocations of the same size to avoid the overhead of multiple kmalloc/kfree calls. Extend the kmem_cache tests to make some calls to these APIs. Link: http://lkml.kernel.org/r/20191107191447.23058-1-labbott@redhat.com Signed-off-by: Laura Abbott Reviewed-by: Kees Cook Tested-by: Alexander Potapenko Cc: Laura Abbott Cc: Christoph Lameter Cc: Nick Desaulniers Cc: Kostya Serebryany Cc: Dmitry Vyukov Cc: Sandeep Patil Cc: Jann Horn Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 138435492 Test: Boot an ARM64 mobile device with and without init_on_alloc=1 Change-Id: Ic93b19613791f2d2b485457a9bbc0437279579bd Signed-off-by: Alexander Potapenko --- lib/test_meminit.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9742e5cb853a..e4f706a404b3 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -183,6 +183,9 @@ static bool __init check_buf(void *buf, int size, bool want_ctor, return fail; } +#define BULK_SIZE 100 +static void *bulk_array[BULK_SIZE]; + /* * Test kmem_cache with given parameters: * want_ctor - use a constructor; @@ -203,9 +206,24 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor, want_rcu ? SLAB_TYPESAFE_BY_RCU : 0, want_ctor ? test_ctor : NULL); for (iter = 0; iter < 10; iter++) { + /* Do a test of bulk allocations */ + if (!want_rcu && !want_ctor) { + int ret; + + ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array); + if (!ret) { + fail = true; + } else { + int i; + for (i = 0; i < ret; i++) + fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero); + kmem_cache_free_bulk(c, ret, bulk_array); + } + } + buf = kmem_cache_alloc(c, alloc_mask); /* Check that buf is zeroed, if it must be. */ - fail = check_buf(buf, size, want_ctor, want_rcu, want_zero); + fail |= check_buf(buf, size, want_ctor, want_rcu, want_zero); fill_with_garbage_skip(buf, size, want_ctor ? CTOR_BYTES : 0); if (!want_rcu) { From 1ed0039bde16a13c35feb3eb360d798dbc745106 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Wed, 22 Jan 2020 11:19:58 +0100 Subject: [PATCH 2412/3715] ANDROID: selinux: modify RTM_GETLINK permission Map the permission gating RTM_GETLINK messages to a new permission so that it can be distinguished from the other netlink route permissions in selinux policy. This is a temporary Android-only patch that will be deprecated in newer kernels once the long-term solution lands as discusssed on the mailing list [1]. The maintainer's recommended solution is more general, much more complex, and likely not suitable for backporting. This patch provides the minimal change needed for Android including the userspace settable trigger which ensures that the permission change is only applied to the newest version of Android which contains the changes needed for userpace compatibility. [1]: https://lore.kernel.org/selinux/20200116142653.61738-1-jeffv@google.com/ Bug: 141455849 Bug: 148218425 Test: CtsSelinuxTargetSdkCurrentTestCases Test: atest bionic-unit-tests-static Test: atest NetworkInterfaceTest Test: Connect to Wi-Fi network Test: Set up hotspot Test: Cast from device Test: Pair Bluetooth device Test: Call getifaddrs() directly from within an app. Test: Call NetworkInterface#getNetworkInterfaces() from within an app. Change-Id: I7b44ce60ad98f858c412722d41b9842f8577151f Signed-off-by: Jeff Vander Stoep --- security/selinux/include/classmap.h | 2 +- security/selinux/include/security.h | 9 +++++++++ security/selinux/nlmsgtab.c | 26 +++++++++++++++++++++++++- security/selinux/ss/policydb.c | 4 ++++ security/selinux/ss/policydb.h | 2 ++ security/selinux/ss/services.c | 3 +++ 6 files changed, 44 insertions(+), 2 deletions(-) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 000effa857aa..34631690b5f9 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -115,7 +115,7 @@ struct security_class_mapping secclass_map[] = { { COMMON_IPC_PERMS, NULL } }, { "netlink_route_socket", { COMMON_SOCK_PERMS, - "nlmsg_read", "nlmsg_write", NULL } }, + "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", NULL } }, { "netlink_tcpdiag_socket", { COMMON_SOCK_PERMS, "nlmsg_read", "nlmsg_write", NULL } }, diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 0b3f3cc0c6a7..f64e33b23cd9 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -104,6 +104,7 @@ struct selinux_state { bool checkreqprot; bool initialized; bool policycap[__POLICYDB_CAPABILITY_MAX]; + bool android_netlink_route; struct selinux_avc *avc; struct selinux_ss *ss; }; @@ -176,6 +177,13 @@ static inline bool selinux_policycap_nnp_nosuid_transition(void) return state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]; } +static inline bool selinux_android_nlroute_getlink(void) +{ + struct selinux_state *state = &selinux_state; + + return state->android_netlink_route; +} + int security_mls_enabled(struct selinux_state *state); int security_load_policy(struct selinux_state *state, void *data, size_t len); @@ -390,6 +398,7 @@ extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern void avtab_cache_init(void); extern void ebitmap_cache_init(void); extern void hashtab_cache_init(void); +extern void selinux_nlmsg_init(void); extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); #endif /* _SELINUX_SECURITY_H_ */ diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 7b7433a1a34c..6a93edf01cfb 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -28,7 +28,7 @@ struct nlmsg_perm { u32 perm; }; -static const struct nlmsg_perm nlmsg_route_perms[] = +static struct nlmsg_perm nlmsg_route_perms[] = { { RTM_NEWLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, @@ -195,3 +195,27 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) return err; } + +static void nlmsg_set_getlink_perm(u32 perm) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nlmsg_route_perms); i++) { + if (nlmsg_route_perms[i].nlmsg_type == RTM_GETLINK) { + nlmsg_route_perms[i].perm = perm; + break; + } + } +} + +/** + * Use nlmsg_readpriv as the permission for RTM_GETLINK messages if the + * netlink_route_getlink policy capability is set. Otherwise use nlmsg_read. + */ +void selinux_nlmsg_init(void) +{ + if (selinux_android_nlroute_getlink()) + nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV); + else + nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READ); +} diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 57e608f8a20c..2472b2a66f70 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2400,6 +2400,10 @@ int policydb_read(struct policydb *p, void *fp) p->reject_unknown = !!(le32_to_cpu(buf[1]) & REJECT_UNKNOWN); p->allow_unknown = !!(le32_to_cpu(buf[1]) & ALLOW_UNKNOWN); + if ((le32_to_cpu(buf[1]) & POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE)) { + p->android_netlink_route = 1; + } + if (p->policyvers >= POLICYDB_VERSION_POLCAP) { rc = ebitmap_read(&p->policycaps, fp); if (rc) diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 215f8f30ac5a..dbb0ed57ed8b 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -238,6 +238,7 @@ struct genfs { /* The policy database */ struct policydb { int mls_enabled; + int android_netlink_route; /* symbol tables */ struct symtab symtab[SYM_NUM]; @@ -324,6 +325,7 @@ extern int policydb_write(struct policydb *p, void *fp); #define PERM_SYMTAB_SIZE 32 #define POLICYDB_CONFIG_MLS 1 +#define POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE (1 << 31) /* the config flags related to unknown classes/perms are bits 2 and 3 */ #define REJECT_UNKNOWN 0x00000002 diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index c21b0cfe0de3..98c418060032 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2107,6 +2107,9 @@ static void security_load_policycaps(struct selinux_state *state) pr_info("SELinux: unknown policy capability %u\n", i); } + + state->android_netlink_route = p->android_netlink_route; + selinux_nlmsg_init(); } static int security_preserve_bools(struct selinux_state *state, From 44a6aea9c219f55862776435b918c539855a9e69 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Thu, 23 Jan 2020 09:51:14 +0100 Subject: [PATCH 2413/3715] Revert "ANDROID: security,perf: Allow further restriction of perf_event_open" Unfork Android. This reverts commit 8e5e42d5ae20f0324170d01ccf374a1571e82d9b. Perf_event_paranoid=3 is no longer needed on Android. Access control of perf events is now done by selinux. See: https://patchwork.kernel.org/patch/11185793/ Bug: 120445712 Bug: 137092007 Signed-off-by: Jeff Vander Stoep Change-Id: Iba493424174b30baff460caaa25a54a472c87bd4 --- Documentation/sysctl/kernel.txt | 4 +--- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - include/linux/perf_event.h | 5 ----- kernel/events/core.c | 8 -------- security/Kconfig | 9 --------- 6 files changed, 1 insertion(+), 27 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index b757d6eb365b..694968c7523c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -653,8 +653,7 @@ allowed to execute. perf_event_paranoid: Controls use of the performance events system by unprivileged -users (without CAP_SYS_ADMIN). The default value is 3 if -CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. +users (without CAP_SYS_ADMIN). The default value is 2. -1: Allow use of (almost) all events by all users Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK @@ -662,7 +661,6 @@ CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. Disallow raw tracepoint access by users without CAP_SYS_ADMIN >=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN ->=3: Disallow all event access by users without CAP_SYS_ADMIN ============================================================== diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c2700608d034..fe9a24b182f2 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -455,7 +455,6 @@ CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y -CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_LSM_MMAP_MIN_ADDR=65536 diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 0cee03ee5f2c..095a60dc538c 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -483,7 +483,6 @@ CONFIG_IO_DELAY_NONE=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_FRAME_POINTER=y -CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_PATH=y diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5d798eb5ac0a..ff924b5fd0b8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1187,11 +1187,6 @@ int perf_event_max_stack_handler(struct ctl_table *table, int write, #define PERF_SECURITY_KERNEL 2 #define PERF_SECURITY_TRACEPOINT 3 -static inline bool perf_paranoid_any(void) -{ - return sysctl_perf_event_paranoid > 2; -} - static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; diff --git a/kernel/events/core.c b/kernel/events/core.c index e24e2d558cbc..70d70c2db18a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -397,13 +397,8 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv - * 3 - disallow all unpriv perf event use */ -#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT -int sysctl_perf_event_paranoid __read_mostly = 3; -#else int sysctl_perf_event_paranoid __read_mostly = 2; -#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -10054,9 +10049,6 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - /* Do we allow access to perf_event_open(2) ? */ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) diff --git a/security/Kconfig b/security/Kconfig index 8b6c5e9528e0..3a66cd8363c0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -18,15 +18,6 @@ config SECURITY_DMESG_RESTRICT If you are unsure how to answer this question, answer N. -config SECURITY_PERF_EVENTS_RESTRICT - bool "Restrict unprivileged use of performance events" - depends on PERF_EVENTS - help - If you say Y here, the kernel.perf_event_paranoid sysctl - will be set to 3 by default, and no unprivileged use of the - perf_event_open syscall will be permitted unless it is - changed. - config SECURITY bool "Enable different security models" depends on SYSFS From f48fcae0ed4491875e6f98356ca5173dad3065ea Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 30 Nov 2019 17:58:29 -0800 Subject: [PATCH 2414/3715] UPSTREAM: mm/page_io.c: annotate refault stalls from swap_readpage If a block device supports rw_page operation, it doesn't submit bios so the annotation in submit_bio() for refault stall doesn't work. It happens with zram in android, especially swap read path which could consume CPU cycle for decompress. It is also a problem for zswap which uses frontswap. Annotate swap_readpage() to account the synchronous IO overhead to prevent underreport memory pressure. [akpm@linux-foundation.org: add comment, per Johannes] Link: http://lkml.kernel.org/r/20191010152134.38545-1-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Seth Jennings Cc: Dan Streetman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 937790699be9c8100e5358625e7dfa8b32bd33f2) Bug: 142418748 Signed-off-by: Suren Baghdasaryan Change-Id: I8a63030888996b6c0a3a9abe3f2d0eca0a0d765b --- mm/page_io.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 5d882de3fbfd..9f8fd8f42b0d 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -354,10 +355,19 @@ int swap_readpage(struct page *page, bool do_poll) struct swap_info_struct *sis = page_swap_info(page); blk_qc_t qc; struct gendisk *disk; + unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); + + /* + * Count submission time as memory stall. When the device is congested, + * or the submitting cgroup IO-throttled, submission can be a + * significant part of overall IO time. + */ + psi_memstall_enter(&pflags); + if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); @@ -371,7 +381,7 @@ int swap_readpage(struct page *page, bool do_poll) ret = mapping->a_ops->readpage(swap_file, page); if (!ret) count_vm_event(PSWPIN); - return ret; + goto out; } ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); @@ -382,7 +392,7 @@ int swap_readpage(struct page *page, bool do_poll) } count_vm_event(PSWPIN); - return 0; + goto out; } ret = 0; @@ -415,6 +425,7 @@ int swap_readpage(struct page *page, bool do_poll) bio_put(bio); out: + psi_memstall_leave(&pflags); return ret; } From 5bbf2879a5bddbf4e8f8a18cabaae9ea5d6ec3b8 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Sun, 8 Dec 2019 12:43:44 -0800 Subject: [PATCH 2415/3715] FROMLIST: security: selinux: allow per-file labelling for binderfs This patch allows genfscon per-file labeling for binderfs. This is required to have separate permissions to allow access to binder, hwbinder and vndbinder devices which are relocating to binderfs. Acked-by: Jeff Vander Stoep Acked-by: Mark Salyzyn Signed-off-by: Hridya Valsaraju Bug: 136497735 (cherry picked from commit 7a4b51947475a7f67e2bd06c4a4c768e2e64a975 git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git master) Link: https://lore.kernel.org/patchwork/patch/1175776/ Change-Id: I105cc54b30ddd4120dc23a363bddc2f9d00e4dc4 --- security/selinux/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2595465dcd3b..134c7b5f8a0b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -870,6 +870,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || + !strcmp(sb->s_type->name, "binder") || !strcmp(sb->s_type->name, "cgroup") || !strcmp(sb->s_type->name, "cgroup2")) sbsec->flags |= SE_SBGENFS; From b5bfb6b8912d1f198208c0b1f6d3ba46621fd714 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Apr 2018 15:21:13 +1000 Subject: [PATCH 2416/3715] UPSTREAM: libnvdimm: Add of_node to region and bus descriptors We want to be able to cross reference the region and bus devices with the device tree node that they were spawned from. libNVDIMM handles creating the actual devices for these internally, so we need to pass in a pointer to the relevant node in the descriptor. Signed-off-by: Oliver O'Halloran Acked-by: Dan Williams Acked-by: Balbir Singh Signed-off-by: Dan Williams (cherry picked from commit 1ff19f487a7e55bf3cebc96ea2a9a38d66fb7db7) Bug: 146400078 Bug: 148297388 Change-Id: Id56dd9b51ab5bc79d89e79a7dc2e5dfb2a503bde Signed-off-by: Alistair Delva --- drivers/nvdimm/bus.c | 1 + drivers/nvdimm/region_devs.c | 1 + include/linux/libnvdimm.h | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2f1b54fab399..4587c9773560 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.release = nvdimm_bus_release; nvdimm_bus->dev.groups = nd_desc->attr_groups; nvdimm_bus->dev.bus = &nvdimm_bus_type; + nvdimm_bus->dev.of_node = nd_desc->of_node; dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); rc = device_register(&nvdimm_bus->dev); if (rc) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 708043d20d0d..bd68b7b01d08 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -999,6 +999,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, dev->parent = &nvdimm_bus->dev; dev->type = dev_type; dev->groups = ndr_desc->attr_groups; + dev->of_node = ndr_desc->of_node; nd_region->ndr_size = resource_size(ndr_desc->res); nd_region->ndr_start = ndr_desc->res->start; nd_device_register(dev); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3eaad2fbf284..51edfc6394f0 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -53,12 +53,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); +struct device_node; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; + struct device_node *of_node; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, @@ -100,6 +102,7 @@ struct nd_region_desc { int num_lanes; int numa_node; unsigned long flags; + struct device_node *of_node; }; struct device; From 3c91fbaf6c5e131a3d9b2c0e949e39c419c4efc1 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Apr 2018 15:21:14 +1000 Subject: [PATCH 2417/3715] UPSTREAM: libnvdimm: Add device-tree based driver This patch adds peliminary device-tree bindings for persistent memory regions. The driver registers a libnvdimm bus for each pmem-region node and each address range under the node is converted to a region within that bus. Signed-off-by: Oliver O'Halloran Signed-off-by: Dan Williams (cherry picked from commit 7171976089528cb3d057a6fb288e7f8f89ab7f68) Bug: 146400078 Bug: 148297388 Change-Id: I1dc76c36adecd82c2e9a248c8012c7b31c146cd7 Signed-off-by: Alistair Delva --- MAINTAINERS | 7 +++ drivers/nvdimm/Kconfig | 10 ++++ drivers/nvdimm/Makefile | 1 + drivers/nvdimm/of_pmem.c | 119 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+) create mode 100644 drivers/nvdimm/of_pmem.c diff --git a/MAINTAINERS b/MAINTAINERS index 2cb45b54ed30..77c68f63e29f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7886,6 +7886,13 @@ Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem* +LIBNVDIMM: DEVICETREE BINDINGS +M: Oliver O'Halloran +L: linux-nvdimm@lists.01.org +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/of_pmem.c + LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams L: linux-nvdimm@lists.01.org diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 5bdd499b5f4f..250471fa0fe6 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -102,4 +102,14 @@ config NVDIMM_DAX Select Y if unsure +config OF_PMEM + tristate "Device-tree support for persistent memory regions" + depends on OF + default LIBNVDIMM + help + Allows regions of persistent memory to be described in the + device-tree. + + Select Y if unsure. + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 447e0e14f3b6..1192946e66af 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o +obj-$(CONFIG_OF_PMEM) += of_pmem.o nd_pmem-y := pmem.o diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c new file mode 100644 index 000000000000..85013bad35de --- /dev/null +++ b/drivers/nvdimm/of_pmem.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) "of_pmem: " fmt + +#include +#include +#include +#include +#include +#include + +static const struct attribute_group *region_attr_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static const struct attribute_group *bus_attr_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +struct of_pmem_private { + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; +}; + +static int of_pmem_region_probe(struct platform_device *pdev) +{ + struct of_pmem_private *priv; + struct device_node *np; + struct nvdimm_bus *bus; + bool is_volatile; + int i; + + np = dev_of_node(&pdev->dev); + if (!np) + return -ENXIO; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus_desc.attr_groups = bus_attr_groups; + priv->bus_desc.provider_name = "of_pmem"; + priv->bus_desc.module = THIS_MODULE; + priv->bus_desc.of_node = np; + + priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc); + if (!bus) { + kfree(priv); + return -ENODEV; + } + platform_set_drvdata(pdev, priv); + + is_volatile = !!of_find_property(np, "volatile", NULL); + dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n", + is_volatile ? "volatile" : "non-volatile", np); + + for (i = 0; i < pdev->num_resources; i++) { + struct nd_region_desc ndr_desc; + struct nd_region *region; + + /* + * NB: libnvdimm copies the data from ndr_desc into it's own + * structures so passing a stack pointer is fine. + */ + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.attr_groups = region_attr_groups; + ndr_desc.numa_node = of_node_to_nid(np); + ndr_desc.res = &pdev->resource[i]; + ndr_desc.of_node = np; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + + if (is_volatile) + region = nvdimm_volatile_region_create(bus, &ndr_desc); + else + region = nvdimm_pmem_region_create(bus, &ndr_desc); + + if (!region) + dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", + ndr_desc.res, np); + else + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", + ndr_desc.res, np); + } + + return 0; +} + +static int of_pmem_region_remove(struct platform_device *pdev) +{ + struct of_pmem_private *priv = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(priv->bus); + kfree(priv); + + return 0; +} + +static const struct of_device_id of_pmem_region_match[] = { + { .compatible = "pmem-region" }, + { }, +}; + +static struct platform_driver of_pmem_region_driver = { + .probe = of_pmem_region_probe, + .remove = of_pmem_region_remove, + .driver = { + .name = "of_pmem", + .owner = THIS_MODULE, + .of_match_table = of_pmem_region_match, + }, +}; + +module_platform_driver(of_pmem_region_driver); +MODULE_DEVICE_TABLE(of, of_pmem_region_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); From 9ca89392289821befd5af5e9e99637a1ecd7cf02 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 16 Apr 2018 11:58:16 -0500 Subject: [PATCH 2418/3715] UPSTREAM: libnvdimm, of_pmem: use dev_to_node() instead of of_node_to_nid() Remove the direct dependency on of_node_to_nid() by using dev_to_node() instead. Any DT platform device will have its NUMA node id set when the device is created. With this, commit 291717b6fbdb ("libnvdimm, of_pmem: workaround OF_NUMA=n build error") can be reverted. Fixes: 717197608952 ("libnvdimm: Add device-tree based driver") Cc: Dan Williams Cc: Oliver O'Halloran Cc: linux-nvdimm@lists.01.org Signed-off-by: Rob Herring Signed-off-by: Dan Williams (cherry picked from commit df3f126482dba8e00cdbfc8fc44a05a5a35b1704) Bug: 146400078 Bug: 148297388 Change-Id: I87aed3d23c38a10b0f25e23cd438ade4f8073f28 Signed-off-by: Alistair Delva --- drivers/nvdimm/of_pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 85013bad35de..0a701837dfc0 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -67,7 +67,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) */ memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = of_node_to_nid(np); + ndr_desc.numa_node = dev_to_node(&pdev->dev); ndr_desc.res = &pdev->resource[i]; ndr_desc.of_node = np; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); From afab16336a4d08b147f87cf85b06d0af6a31c2e5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 26 Feb 2019 01:42:53 +0000 Subject: [PATCH 2419/3715] UPSTREAM: libnvdimm/of_pmem: Fix platform_no_drv_owner.cocci warnings Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: YueHaibing Signed-off-by: Dan Williams (cherry picked from commit 316720b9c2341307b9a17103cdafa1ca9b2fb872) Bug: 146400078 Bug: 148297388 Change-Id: I16e7543bcb786e20e96ea4250a809bb3b7f1ec32 Signed-off-by: Alistair Delva --- drivers/nvdimm/of_pmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 0a701837dfc0..11b9821eba85 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -108,7 +108,6 @@ static struct platform_driver of_pmem_region_driver = { .remove = of_pmem_region_remove, .driver = { .name = "of_pmem", - .owner = THIS_MODULE, .of_match_table = of_pmem_region_match, }, }; From 28899aa1830df4283c7d8cef324abaad60109852 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Aug 2019 09:30:29 +0530 Subject: [PATCH 2420/3715] UPSTREAM: libnvdimm/of_pmem: Provide a unique name for bus provider ndctl binaries, v66 and older, mistakenly require the ndbus to have unique names. If not while enumerating the bus in userspace it drops bus with similar names. This results in us not listing devices beneath the bus. Signed-off-by: Aneesh Kumar K.V Tested-by: Vaibhav Jain Link: https://lore.kernel.org/r/20190807040029.11344-1-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams (cherry picked from commit 49bddc73d15c25a68e4294d76fc74519fda984cd) Bug: 146400078 Bug: 148297388 Change-Id: Ieda4557bbda63e554e2eda6b87d7ba2a6e149e3b Signed-off-by: Alistair Delva --- drivers/nvdimm/of_pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 11b9821eba85..6e5cbfd8cb13 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -42,7 +42,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) return -ENOMEM; priv->bus_desc.attr_groups = bus_attr_groups; - priv->bus_desc.provider_name = "of_pmem"; + priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL); priv->bus_desc.module = THIS_MODULE; priv->bus_desc.of_node = np; From 75875b04b1aeec995e2121d874e4bf62040a717b Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 5 Jul 2019 19:33:22 +0530 Subject: [PATCH 2421/3715] BACKPORT: libnvdimm: nd_region flush callback support This patch adds functionality to perform flush from guest to host over VIRTIO. We are registering a callback based on 'nd_region' type. virtio_pmem driver requires this special flush function. For rest of the region types we are registering existing flush function. Report error returned by host fsync failure to userspace. Signed-off-by: Pankaj Gupta Signed-off-by: Dan Williams (cherry picked from commit c5d4355d10d414a96ca870b731756b89d068d57a) [adelva: backport around some nvdimm refactors in >4.14] Bug: 146400078 Bug: 148297388 Change-Id: Icf6ff5327b3c74455a4d53d2d37ac7fef7fbda85 Signed-off-by: Alistair Delva --- drivers/acpi/nfit/core.c | 4 ++-- drivers/nvdimm/claim.c | 6 ++++-- drivers/nvdimm/nd.h | 1 + drivers/nvdimm/pmem.c | 13 ++++++++----- drivers/nvdimm/region_devs.c | 26 ++++++++++++++++++++++++-- include/linux/libnvdimm.h | 9 ++++++++- 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 05fb821c2558..7e91575496b6 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1980,7 +1980,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->addr.base + offset); - nvdimm_flush(nfit_blk->nd_region); + nvdimm_flush(nfit_blk->nd_region, NULL); if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); @@ -2029,7 +2029,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - nvdimm_flush(nfit_blk->nd_region); + nvdimm_flush(nfit_blk->nd_region, NULL); rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b2fc29b8279b..32f2aaf62f27 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -263,7 +263,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); sector_t sector = offset >> 9; - int rc = 0; + int rc = 0, ret = 0; if (unlikely(!size)) return 0; @@ -299,7 +299,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, } memcpy_flushcache(nsio->addr + offset, buf, size); - nvdimm_flush(to_nd_region(ndns->dev.parent)); + ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL); + if (ret) + rc = ret; return rc; } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e3f060f0b83e..b79a8d0f9b48 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -170,6 +170,7 @@ struct nd_region { struct badblocks bb; struct nd_interleave_set *nd_set; struct nd_percpu_lane __percpu *lane; + int (*flush)(struct nd_region *nd_region, struct bio *bio); struct nd_mapping mapping[0]; }; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 80f8bbf83742..76934fadca97 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -169,6 +169,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { + int ret = 0; blk_status_t rc = 0; bool do_acct; unsigned long start; @@ -178,7 +179,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct nd_region *nd_region = to_region(pmem); if (bio->bi_opf & REQ_FLUSH) - nvdimm_flush(nd_region); + ret = nvdimm_flush(nd_region, bio); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { @@ -194,7 +195,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) nd_iostat_end(bio, start); if (bio->bi_opf & REQ_FUA) - nvdimm_flush(nd_region); + ret = nvdimm_flush(nd_region, bio); + + if (ret) + bio->bi_status = errno_to_blk_status(ret); bio_endio(bio); return BLK_QC_T_NONE; @@ -415,7 +419,6 @@ static int pmem_attach_disk(struct device *dev, } dax_write_cache(dax_dev, wbc); pmem->dax_dev = dax_dev; - gendev = disk_to_dev(disk); gendev->groups = pmem_attribute_groups; @@ -473,14 +476,14 @@ static int nd_pmem_remove(struct device *dev) sysfs_put(pmem->bb_state); pmem->bb_state = NULL; } - nvdimm_flush(to_nd_region(dev->parent)); + nvdimm_flush(to_nd_region(dev->parent), NULL); return 0; } static void nd_pmem_shutdown(struct device *dev) { - nvdimm_flush(to_nd_region(dev->parent)); + nvdimm_flush(to_nd_region(dev->parent), NULL); } static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index bd68b7b01d08..c0e6a6d235de 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -282,7 +282,9 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att return rc; if (!flush) return -EINVAL; - nvdimm_flush(nd_region); + rc = nvdimm_flush(nd_region, NULL); + if (rc) + return rc; return len; } @@ -1002,6 +1004,11 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, dev->of_node = ndr_desc->of_node; nd_region->ndr_size = resource_size(ndr_desc->res); nd_region->ndr_start = ndr_desc->res->start; + if (ndr_desc->flush) + nd_region->flush = ndr_desc->flush; + else + nd_region->flush = NULL; + nd_device_register(dev); return nd_region; @@ -1042,11 +1049,24 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, } EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); +int nvdimm_flush(struct nd_region *nd_region, struct bio *bio) +{ + int rc = 0; + + if (!nd_region->flush) + rc = generic_nvdimm_flush(nd_region); + else { + if (nd_region->flush(nd_region, bio)) + rc = -EIO; + } + + return rc; +} /** * nvdimm_flush - flush any posted write queues between the cpu and pmem media * @nd_region: blk or interleaved pmem region */ -void nvdimm_flush(struct nd_region *nd_region) +int generic_nvdimm_flush(struct nd_region *nd_region) { struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); int i, idx; @@ -1070,6 +1090,8 @@ void nvdimm_flush(struct nd_region *nd_region) if (ndrd_get_flush_wpq(ndrd, i, 0)) writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); wmb(); + + return 0; } EXPORT_SYMBOL_GPL(nvdimm_flush); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 51edfc6394f0..84284e3353ed 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -18,6 +18,7 @@ #include #include #include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -36,6 +37,9 @@ enum { /* region flag indicating to direct-map persistent memory by default */ ND_REGION_PAGEMAP = 0, + /* Platform provides asynchronous flush mechanism */ + ND_REGION_ASYNC = 3, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -92,6 +96,7 @@ struct nd_mapping_desc { int position; }; +struct nd_region; struct nd_region_desc { struct resource *res; struct nd_mapping_desc *mapping; @@ -103,6 +108,7 @@ struct nd_region_desc { int numa_node; unsigned long flags; struct device_node *of_node; + int (*flush)(struct nd_region *nd_region, struct bio *bio); }; struct device; @@ -174,7 +180,8 @@ unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); -void nvdimm_flush(struct nd_region *nd_region); +int nvdimm_flush(struct nd_region *nd_region, struct bio *bio); +int generic_nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); From e2bc3b41bd16c48f198ac7dc7a552b6b2245e469 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 5 Jul 2019 19:33:23 +0530 Subject: [PATCH 2422/3715] UPSTREAM: virtio-pmem: Add virtio pmem driver This patch adds virtio-pmem driver for KVM guest. Guest reads the persistent memory range information from Qemu over VIRTIO and registers it on nvdimm_bus. It also creates a nd_region object with the persistent memory range information so that existing 'nvdimm/pmem' driver can reserve this into system memory map. This way 'virtio-pmem' driver uses existing functionality of pmem driver to register persistent memory compatible for DAX capable filesystems. This also provides function to perform guest flush over VIRTIO from 'pmem' driver when userspace performs flush on DAX memory range. Signed-off-by: Pankaj Gupta Reviewed-by: Yuval Shaia Acked-by: Michael S. Tsirkin Acked-by: Jakub Staron Tested-by: Jakub Staron Reviewed-by: Cornelia Huck Signed-off-by: Dan Williams (cherry picked from commit 6e84200c0a2994b991259d19450eee561029bf70) Bug: 146400078 Bug: 148297388 Change-Id: Ie3457fe184f29984d181bc0afa9267e2567a2caf Signed-off-by: Alistair Delva --- drivers/nvdimm/Makefile | 1 + drivers/nvdimm/nd_virtio.c | 125 +++++++++++++++++++++++++++++++ drivers/nvdimm/virtio_pmem.c | 122 ++++++++++++++++++++++++++++++ drivers/nvdimm/virtio_pmem.h | 55 ++++++++++++++ drivers/virtio/Kconfig | 11 +++ include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_pmem.h | 34 +++++++++ 7 files changed, 349 insertions(+) create mode 100644 drivers/nvdimm/nd_virtio.c create mode 100644 drivers/nvdimm/virtio_pmem.c create mode 100644 drivers/nvdimm/virtio_pmem.h create mode 100644 include/uapi/linux/virtio_pmem.h diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 1192946e66af..5b4f3d50b83c 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_OF_PMEM) += of_pmem.o +obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o nd_pmem-y := pmem.o diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c new file mode 100644 index 000000000000..8645275c08c2 --- /dev/null +++ b/drivers/nvdimm/nd_virtio.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + */ +#include "virtio_pmem.h" +#include "nd.h" + + /* The interrupt handler */ +void virtio_pmem_host_ack(struct virtqueue *vq) +{ + struct virtio_pmem *vpmem = vq->vdev->priv; + struct virtio_pmem_request *req_data, *req_buf; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) { + req_data->done = true; + wake_up(&req_data->host_acked); + + if (!list_empty(&vpmem->req_list)) { + req_buf = list_first_entry(&vpmem->req_list, + struct virtio_pmem_request, list); + req_buf->wq_buf_avail = true; + wake_up(&req_buf->wq_buf); + list_del(&req_buf->list); + } + } + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); +} +EXPORT_SYMBOL_GPL(virtio_pmem_host_ack); + + /* The request submission function */ +static int virtio_pmem_flush(struct nd_region *nd_region) +{ + struct virtio_device *vdev = nd_region->provider_data; + struct virtio_pmem *vpmem = vdev->priv; + struct virtio_pmem_request *req_data; + struct scatterlist *sgs[2], sg, ret; + unsigned long flags; + int err, err1; + + might_sleep(); + req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); + if (!req_data) + return -ENOMEM; + + req_data->done = false; + init_waitqueue_head(&req_data->host_acked); + init_waitqueue_head(&req_data->wq_buf); + INIT_LIST_HEAD(&req_data->list); + req_data->req.type = cpu_to_virtio32(vdev, VIRTIO_PMEM_REQ_TYPE_FLUSH); + sg_init_one(&sg, &req_data->req, sizeof(req_data->req)); + sgs[0] = &sg; + sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp)); + sgs[1] = &ret; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + /* + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual + * queue does not have free descriptor. We add the request + * to req_list and wait for host_ack to wake us up when free + * slots are available. + */ + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data, + GFP_ATOMIC)) == -ENOSPC) { + + dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n"); + req_data->wq_buf_avail = false; + list_add_tail(&req_data->list, &vpmem->req_list); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + + /* A host response results in "host_ack" getting called */ + wait_event(req_data->wq_buf, req_data->wq_buf_avail); + spin_lock_irqsave(&vpmem->pmem_lock, flags); + } + err1 = virtqueue_kick(vpmem->req_vq); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + /* + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't + * do anything about that. + */ + if (err || !err1) { + dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); + err = -EIO; + } else { + /* A host repsonse results in "host_ack" getting called */ + wait_event(req_data->host_acked, req_data->done); + err = virtio32_to_cpu(vdev, req_data->resp.ret); + } + + kfree(req_data); + return err; +}; + +/* The asynchronous flush callback function */ +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio) +{ + /* + * Create child bio for asynchronous flush and chain with + * parent bio. Otherwise directly call nd_region flush. + */ + if (bio && bio->bi_iter.bi_sector != -1) { + struct bio *child = bio_alloc(GFP_ATOMIC, 0); + + if (!child) + return -ENOMEM; + bio_copy_dev(child, bio); + child->bi_opf = REQ_PREFLUSH; + child->bi_iter.bi_sector = -1; + bio_chain(child, bio); + submit_bio(child); + return 0; + } + if (virtio_pmem_flush(nd_region)) + return -EIO; + + return 0; +}; +EXPORT_SYMBOL_GPL(async_pmem_flush); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c new file mode 100644 index 000000000000..5e3d07b47e0c --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and registers the virtual pmem device + * with libnvdimm core. + */ +#include "virtio_pmem.h" +#include "nd.h" + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + + /* Initialize virt queue */ +static int init_vq(struct virtio_pmem *vpmem) +{ + /* single vq */ + vpmem->req_vq = virtio_find_single_vq(vpmem->vdev, + virtio_pmem_host_ack, "flush_queue"); + if (IS_ERR(vpmem->req_vq)) + return PTR_ERR(vpmem->req_vq); + + spin_lock_init(&vpmem->pmem_lock); + INIT_LIST_HEAD(&vpmem->req_list); + + return 0; +}; + +static int virtio_pmem_probe(struct virtio_device *vdev) +{ + struct nd_region_desc ndr_desc = {}; + int nid = dev_to_node(&vdev->dev); + struct nd_region *nd_region; + struct virtio_pmem *vpmem; + struct resource res; + int err = 0; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL); + if (!vpmem) { + err = -ENOMEM; + goto out_err; + } + + vpmem->vdev = vdev; + vdev->priv = vpmem; + err = init_vq(vpmem); + if (err) { + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); + goto out_err; + } + + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + start, &vpmem->start); + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + size, &vpmem->size); + + res.start = vpmem->start; + res.end = vpmem->start + vpmem->size - 1; + vpmem->nd_desc.provider_name = "virtio-pmem"; + vpmem->nd_desc.module = THIS_MODULE; + + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, + &vpmem->nd_desc); + if (!vpmem->nvdimm_bus) { + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); + err = -ENXIO; + goto out_vq; + } + + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); + + ndr_desc.res = &res; + ndr_desc.numa_node = nid; + ndr_desc.flush = async_pmem_flush; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); + if (!nd_region) { + dev_err(&vdev->dev, "failed to create nvdimm region\n"); + err = -ENXIO; + goto out_nd; + } + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); + return 0; +out_nd: + nvdimm_bus_unregister(vpmem->nvdimm_bus); +out_vq: + vdev->config->del_vqs(vdev); +out_err: + return err; +} + +static void virtio_pmem_remove(struct virtio_device *vdev) +{ + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); + + nvdimm_bus_unregister(nvdimm_bus); + vdev->config->del_vqs(vdev); + vdev->config->reset(vdev); +} + +static struct virtio_driver virtio_pmem_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_pmem_probe, + .remove = virtio_pmem_remove, +}; + +module_virtio_driver(virtio_pmem_driver); +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio pmem driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h new file mode 100644 index 000000000000..0dddefe594c4 --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * virtio_pmem.h: virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + **/ + +#ifndef _LINUX_VIRTIO_PMEM_H +#define _LINUX_VIRTIO_PMEM_H + +#include +#include +#include +#include + +struct virtio_pmem_request { + struct virtio_pmem_req req; + struct virtio_pmem_resp resp; + + /* Wait queue to process deferred work after ack from host */ + wait_queue_head_t host_acked; + bool done; + + /* Wait queue to process deferred work after virt queue buffer avail */ + wait_queue_head_t wq_buf; + bool wq_buf_avail; + struct list_head list; +}; + +struct virtio_pmem { + struct virtio_device *vdev; + + /* Virtio pmem request queue */ + struct virtqueue *req_vq; + + /* nvdimm bus registers virtio pmem device */ + struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor nd_desc; + + /* List to store deferred work if virtqueue is full */ + struct list_head req_list; + + /* Synchronize virtqueue data */ + spinlock_t pmem_lock; + + /* Memory region information */ + __u64 start; + __u64 size; +}; + +void virtio_pmem_host_ack(struct virtqueue *vq); +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio); +#endif diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index cff773f15b7e..89bc931de8df 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -38,6 +38,17 @@ config VIRTIO_PCI_LEGACY If unsure, say Y. +config VIRTIO_PMEM + tristate "Support for virtio pmem driver" + depends on VIRTIO + depends on LIBNVDIMM + help + This driver provides access to virtio-pmem devices, storage devices + that are mapped into the physical address space - similar to NVDIMMs + - with a virtio-based flushing interface. + + If unsure, say Y. + config VIRTIO_BALLOON tristate "Virtio balloon driver" depends on VIRTIO diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 6d5c3b2d4f4d..32b2f94d1f58 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -43,5 +43,6 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h new file mode 100644 index 000000000000..efcd72f2d20d --- /dev/null +++ b/include/uapi/linux/virtio_pmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Definitions for virtio-pmem devices. + * + * Copyright (C) 2019 Red Hat, Inc. + * + * Author(s): Pankaj Gupta + */ + +#ifndef _UAPI_LINUX_VIRTIO_PMEM_H +#define _UAPI_LINUX_VIRTIO_PMEM_H + +#include +#include +#include + +struct virtio_pmem_config { + __u64 start; + __u64 size; +}; + +#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 + +struct virtio_pmem_resp { + /* Host return status corresponding to flush request */ + __u32 ret; +}; + +struct virtio_pmem_req { + /* command type */ + __u32 type; +}; + +#endif From 7a6c152e34bcb88832909888a837731be7489aaa Mon Sep 17 00:00:00 2001 From: Kenny Root Date: Fri, 24 Jan 2020 14:08:53 -0800 Subject: [PATCH 2423/3715] ANDROID: cuttlefish_defconfig: enable NVDIMM/PMEM options Options needed for the virtual platform and physical platforms via DT to define PMEM regions for resume-on-reboot feature Bug: 146400078 Change-Id: Icaffbd3a7425ac3d9914378fb356d32f9393eec0 Signed-off-by: Kenny Root --- arch/arm64/configs/cuttlefish_defconfig | 4 ++++ arch/x86/configs/x86_64_cuttlefish_defconfig | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index fe9a24b182f2..22f93af4ec6e 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -403,6 +403,7 @@ CONFIG_RTC_DRV_PL030=y CONFIG_RTC_DRV_PL031=y CONFIG_VIRTIO_PCI=y # CONFIG_VIRTIO_PCI_LEGACY is not set +CONFIG_VIRTIO_PMEM=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y @@ -418,6 +419,9 @@ CONFIG_MAILBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_LIBNVDIMM=y +# CONFIG_ND_BLK is not set +# CONFIG_BTT is not set CONFIG_ARM_SCPI_PROTOCOL=y # CONFIG_ARM_SCPI_POWER_DOMAIN is not set CONFIG_EXT4_FS=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 095a60dc538c..10e69e97bc8f 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -419,6 +419,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TEST=y CONFIG_SW_SYNC=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_PMEM=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y @@ -432,6 +433,9 @@ CONFIG_ION_SYSTEM_HEAP=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_LIBNVDIMM=y +# CONFIG_ND_BLK is not set +# CONFIG_BTT is not set # CONFIG_FIRMWARE_MEMMAP is not set CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y From ebf4322ff50d20790f3fb77c1a9ef817a5c318bc Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Mon, 25 Nov 2019 14:19:33 -0800 Subject: [PATCH 2424/3715] ANDROID: cuttlefish_defconfig: enable CONFIG_IKHEADERS as m Change-Id: I584b09e1565a1453567e692fdff9e92790b4a29e Signed-off-by: Ram Muthiah Bug: 143710295 Test: Treehugger --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 22f93af4ec6e..d0a8c06670c1 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -9,6 +9,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_IKHEADERS=m CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 10e69e97bc8f..022325173f80 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -12,6 +12,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_IKHEADERS=m CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y From 0dc39d68bf2715a87705826efb0d032ee5a7018d Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 27 Dec 2019 14:37:40 +0100 Subject: [PATCH 2425/3715] FROMGIT: ext4: Add EXT4_IOC_FSGETXATTR/EXT4_IOC_FSSETXATTR to compat_ioctl. These are backed by 'struct fsxattr' which has the same size on all architectures. Signed-off-by: Martijn Coenen Link: https://lore.kernel.org/patchwork/patch/1172430/ Signed-off-by: Martijn Coenen (cherry picked from commit a54d8d34d2354f3a2a9dda00d9dd6666a50c486b git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev) Change-Id: Ie7a047b94415ef7c19fa534f47741ebd79dcc909 --- fs/ext4/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3ba07ccb24d2..f852d90a1562 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1247,6 +1247,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: + case EXT4_IOC_FSGETXATTR: + case EXT4_IOC_FSSETXATTR: break; default: return -ENOIOCTLCMD; From e71ab588b09178efc05c401db29b6c8f426b9917 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Oct 2019 17:00:45 -0700 Subject: [PATCH 2426/3715] xfs: Sanity check flags of Q_XQUOTARM call commit 3dd4d40b420846dd35869ccc8f8627feef2cff32 upstream. Flags passed to Q_XQUOTARM were not sanity checked for invalid values. Fix that. Fixes: 9da93f9b7cdf ("xfs: fix Q_XQUOTARM ioctl") Reported-by: Yang Xu Signed-off-by: Jan Kara Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_quotaops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index a65108594a07..21bc6d2d23ca 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -214,6 +214,9 @@ xfs_fs_rm_xquota( if (XFS_IS_QUOTA_ON(mp)) return -EINVAL; + if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA)) + return -EINVAL; + if (uflags & FS_USER_QUOTA) flags |= XFS_DQ_USER; if (uflags & FS_GROUP_QUOTA) From 137875d425bb04eb6fbf98f50fdae0a592dee96b Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Wed, 4 Sep 2019 08:56:25 +0300 Subject: [PATCH 2427/3715] mfd: intel-lpss: Add default I2C device properties for Gemini Lake commit 3f31bc67e4dc6a555341dffefe328ddd58e8b431 upstream. It turned out Intel Gemini Lake doesn't use the same I2C timing parameters as Broxton. I got confirmation from the Windows team that Gemini Lake systems should use updated timing parameters that differ from those used in Broxton based systems. Fixes: f80e78aa11ad ("mfd: intel-lpss: Add Intel Gemini Lake PCI IDs") Tested-by: Chris Chiu Signed-off-by: Jarkko Nikula Acked-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/intel-lpss-pci.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 2b7e8eeaa59e..0504761516f7 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -126,6 +126,18 @@ static const struct intel_lpss_platform_info apl_i2c_info = { .properties = apl_i2c_properties, }; +static struct property_entry glk_i2c_properties[] = { + PROPERTY_ENTRY_U32("i2c-sda-hold-time-ns", 313), + PROPERTY_ENTRY_U32("i2c-sda-falling-time-ns", 171), + PROPERTY_ENTRY_U32("i2c-scl-falling-time-ns", 290), + { }, +}; + +static const struct intel_lpss_platform_info glk_i2c_info = { + .clk_rate = 133000000, + .properties = glk_i2c_properties, +}; + static const struct intel_lpss_platform_info cnl_i2c_info = { .clk_rate = 216000000, .properties = spt_i2c_properties, @@ -165,14 +177,14 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x1ac6), (kernel_ulong_t)&bxt_info }, { PCI_VDEVICE(INTEL, 0x1aee), (kernel_ulong_t)&bxt_uart_info }, /* GLK */ - { PCI_VDEVICE(INTEL, 0x31ac), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31ae), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b0), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b2), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b4), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b6), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b8), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31ba), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ac), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ae), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b0), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b2), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b4), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b6), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b8), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ba), (kernel_ulong_t)&glk_i2c_info }, { PCI_VDEVICE(INTEL, 0x31bc), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x31be), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x31c0), (kernel_ulong_t)&bxt_uart_info }, From 7e4108de535ce81a68eb7418ad92ce4b87eb5e61 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 4 Dec 2019 11:50:15 +0000 Subject: [PATCH 2428/3715] powerpc/archrandom: fix arch_get_random_seed_int() commit b6afd1234cf93aa0d71b4be4788c47534905f0be upstream. Commit 01c9348c7620ec65 powerpc: Use hardware RNG for arch_get_random_seed_* not arch_get_random_* updated arch_get_random_[int|long]() to be NOPs, and moved the hardware RNG backing to arch_get_random_seed_[int|long]() instead. However, it failed to take into account that arch_get_random_int() was implemented in terms of arch_get_random_long(), and so we ended up with a version of the former that is essentially a NOP as well. Fix this by calling arch_get_random_seed_long() from arch_get_random_seed_int() instead. Fixes: 01c9348c7620ec65 ("powerpc: Use hardware RNG for arch_get_random_seed_* not arch_get_random_*") Signed-off-by: Ard Biesheuvel Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191204115015.18015-1-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/archrandom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9c63b596e6ce..a09595f00cab 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -28,7 +28,7 @@ static inline int arch_get_random_seed_int(unsigned int *v) unsigned long val; int rc; - rc = arch_get_random_long(&val); + rc = arch_get_random_seed_long(&val); if (rc) *v = val; From 3d6331c0f5b4b2b40e109ab6df11c3a880eda3be Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 28 Nov 2019 10:10:07 +0700 Subject: [PATCH 2429/3715] tipc: fix wrong timeout input for tipc_wait_for_cond() commit 12db3c8083fcab4270866a88191933f2d9f24f89 upstream. In function __tipc_shutdown(), the timeout value passed to tipc_wait_for_cond() is not jiffies. This commit fixes it by converting that value from milliseconds to jiffies. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Tung Nguyen Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 21929ba196eb..d9ec6335c7dc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -487,7 +487,7 @@ static void __tipc_shutdown(struct socket *sock, int error) struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); - long timeout = CONN_TIMEOUT_DEFAULT; + long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); u32 dnode = tsk_peer_node(tsk); struct sk_buff *skb; From ea6e0910c76008e4db710d9f02b743714daad611 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 21 Sep 2019 10:44:01 +0200 Subject: [PATCH 2430/3715] mt7601u: fix bbp version check in mt7601u_wait_bbp_ready commit 15e14f76f85f4f0eab3b8146e1cd3c58ce272823 upstream. Fix bbp ready check in mt7601u_wait_bbp_ready. The issue is reported by coverity with the following error: Logical vs. bitwise operator The expression's value does not depend on the operands; inadvertent use of the wrong operator is a likely logic error. Addresses-Coverity-ID: 1309441 ("Logical vs. bitwise operator") Fixes: c869f77d6abb ("add mt7601u driver") Acked-by: Jakub Kicinski Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt7601u/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index ca09a5d4305e..71a47459bf8a 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -221,7 +221,7 @@ int mt7601u_wait_bbp_ready(struct mt7601u_dev *dev) do { val = mt7601u_bbp_rr(dev, MT_BBP_REG_VERSION); - if (val && ~val) + if (val && val != 0xff) break; } while (--i); From 90225af01dc97d907e9e44e694fba55212bb228e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 14 Nov 2019 13:58:49 +0100 Subject: [PATCH 2431/3715] crypto: sun4i-ss - fix big endian issues commit d1d787bcebfe122a5bd443ae565696661e2e9656 upstream. When testing BigEndian kernel, the sun4i-ss was failling all crypto tests. This patch fix endian issues with it. Fixes: 6298e948215f ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/sunxi-ss/sun4i-ss-hash.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c index 1a724263761b..2d178e013535 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c @@ -179,7 +179,7 @@ static int sun4i_hash(struct ahash_request *areq) */ unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo; unsigned int in_i = 0; - u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0; + u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0; struct sun4i_req_ctx *op = ahash_request_ctx(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm); @@ -188,6 +188,7 @@ static int sun4i_hash(struct ahash_request *areq) struct sg_mapping_iter mi; int in_r, err = 0; size_t copied = 0; + __le32 wb = 0; dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x", __func__, crypto_tfm_alg_name(areq->base.tfm), @@ -399,7 +400,7 @@ hash_final: nbw = op->len - 4 * nwait; if (nbw) { - wb = *(u32 *)(op->buf + nwait * 4); + wb = cpu_to_le32(*(u32 *)(op->buf + nwait * 4)); wb &= GENMASK((nbw * 8) - 1, 0); op->byte_count += nbw; @@ -408,7 +409,7 @@ hash_final: /* write the remaining bytes of the nbw buffer */ wb |= ((1 << 7) << (nbw * 8)); - bf[j++] = wb; + bf[j++] = le32_to_cpu(wb); /* * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1) @@ -427,13 +428,13 @@ hash_final: /* write the length of data */ if (op->mode == SS_OP_SHA1) { - __be64 bits = cpu_to_be64(op->byte_count << 3); - bf[j++] = lower_32_bits(bits); - bf[j++] = upper_32_bits(bits); + __be64 *bits = (__be64 *)&bf[j]; + *bits = cpu_to_be64(op->byte_count << 3); + j += 2; } else { - __le64 bits = op->byte_count << 3; - bf[j++] = lower_32_bits(bits); - bf[j++] = upper_32_bits(bits); + __le64 *bits = (__le64 *)&bf[j]; + *bits = cpu_to_le64(op->byte_count << 3); + j += 2; } writesl(ss->base + SS_RXFIFO, bf, j); @@ -475,7 +476,7 @@ hash_final: } } else { for (i = 0; i < 4; i++) { - v = readl(ss->base + SS_MD0 + i * 4); + v = cpu_to_le32(readl(ss->base + SS_MD0 + i * 4)); memcpy(areq->result + i * 4, &v, 4); } } From 204ad0b43e88bee9b46b1ac5e8c65d1bdf3da35b Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 6 Aug 2018 08:19:09 +0200 Subject: [PATCH 2432/3715] drm/sti: do not remove the drm_bridge that was never added [ Upstream commit 66e31a72dc38543b2d9d1ce267dc78ba9beebcfd ] Removing the drm_bridge_remove call should avoid a NULL dereference during list processing in drm_bridge_remove if the error path is ever taken. The more natural approach would perhaps be to add a drm_bridge_add, but there are several other bridges that never call drm_bridge_add. Just removing the drm_bridge_remove is the easier fix. Fixes: 84601dbdea36 ("drm: sti: rework init sequence") Acked-by: Daniel Vetter Signed-off-by: Peter Rosin Signed-off-by: Benjamin Gaignard Link: https://patchwork.freedesktop.org/patch/msgid/20180806061910.29914-2-peda@axentia.se Signed-off-by: Sasha Levin --- drivers/gpu/drm/sti/sti_hda.c | 1 - drivers/gpu/drm/sti/sti_hdmi.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index cf65e32b5090..0399bb18d387 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -721,7 +721,6 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data) return 0; err_sysfs: - drm_bridge_remove(bridge); return -EINVAL; } diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index 30f02d2fdd03..bbb195a92e93 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1314,7 +1314,6 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data) return 0; err_sysfs: - drm_bridge_remove(bridge); hdmi->drm_connector = NULL; return -EINVAL; } From 81f7d38650d13e431464e4935d188900bbd2ce5b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:42:50 +0300 Subject: [PATCH 2433/3715] drm/virtio: fix bounds check in virtio_gpu_cmd_get_capset() [ Upstream commit 09c4b49457434fa74749ad6194ef28464d9f5df9 ] This doesn't affect runtime because in the current code "idx" is always valid. First, we read from "vgdev->capsets[idx].max_size" before checking whether "idx" is within bounds. And secondly the bounds check is off by one so we could end up reading one element beyond the end of the vgdev->capsets[] array. Fixes: 62fb7a5e1096 ("virtio-gpu: add 3d/virgl support") Signed-off-by: Dan Carpenter Link: http://patchwork.freedesktop.org/patch/msgid/20180704094250.m7sgvvzg3dhcvv3h@kili.mountain Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_vq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 21c2de81f3e3..a3be65e689fd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -648,11 +648,11 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev, { struct virtio_gpu_get_capset *cmd_p; struct virtio_gpu_vbuffer *vbuf; - int max_size = vgdev->capsets[idx].max_size; + int max_size; struct virtio_gpu_drv_cap_cache *cache_ent; void *resp_buf; - if (idx > vgdev->num_capsets) + if (idx >= vgdev->num_capsets) return -EINVAL; if (version > vgdev->capsets[idx].max_version) @@ -662,6 +662,7 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev, if (!cache_ent) return -ENOMEM; + max_size = vgdev->capsets[idx].max_size; cache_ent->caps_cache = kmalloc(max_size, GFP_KERNEL); if (!cache_ent->caps_cache) { kfree(cache_ent); From 961e9f394d4ce5ec9518bd2c04469da5d2d9d3b3 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 11 Sep 2018 16:18:36 +0200 Subject: [PATCH 2434/3715] ALSA: hda: fix unused variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5b03006d5c58ddd31caf542eef4d0269bcf265b3 ] When CONFIG_X86=n function azx_snoop doesn't use the variable chip it only returns true. sound/pci/hda/hda_intel.c: In function ‘dma_alloc_pages’: sound/pci/hda/hda_intel.c:2002:14: warning: unused variable ‘chip’ [-Wunused-variable] struct azx *chip = bus_to_azx(bus); ^~~~ Create a inline function of azx_snoop. Fixes: a41d122449be ("ALSA: hda - Embed bus into controller object") Signed-off-by: Anders Roxell Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_controller.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 8a9dd4767b1e..63cc10604afc 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -176,11 +176,10 @@ struct azx { #define azx_bus(chip) (&(chip)->bus.core) #define bus_to_azx(_bus) container_of(_bus, struct azx, bus.core) -#ifdef CONFIG_X86 -#define azx_snoop(chip) ((chip)->snoop) -#else -#define azx_snoop(chip) true -#endif +static inline bool azx_snoop(struct azx *chip) +{ + return !IS_ENABLED(CONFIG_X86) || chip->snoop; +} /* * macros for easy use From 49f3e22df7169a098a13384b51a2caabbd77604c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 13 Sep 2018 18:12:09 +0200 Subject: [PATCH 2435/3715] apparmor: don't try to replace stale label in ptrace access check [ Upstream commit 1f8266ff58840d698a1e96d2274189de1bdf7969 ] As a comment above begin_current_label_crit_section() explains, begin_current_label_crit_section() must run in sleepable context because when label_is_stale() is true, aa_replace_current_label() runs, which uses prepare_creds(), which can sleep. Until now, the ptrace access check (which runs with a task lock held) violated this rule. Also add a might_sleep() assertion to begin_current_label_crit_section(), because asserts are less likely to be ignored than comments. Fixes: b2d09ae449ced ("apparmor: move ptrace checks to using labels") Signed-off-by: Jann Horn Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/include/context.h | 2 ++ security/apparmor/lsm.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/include/context.h b/security/apparmor/include/context.h index 6ae07e9aaa17..812cdec9dd3b 100644 --- a/security/apparmor/include/context.h +++ b/security/apparmor/include/context.h @@ -191,6 +191,8 @@ static inline struct aa_label *begin_current_label_crit_section(void) { struct aa_label *label = aa_current_raw_label(); + might_sleep(); + if (label_is_stale(label)) { label = aa_get_newest_label(label); if (aa_replace_current_label(label) == 0) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 1346ee5be04f..4f08023101f3 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -108,12 +108,12 @@ static int apparmor_ptrace_access_check(struct task_struct *child, struct aa_label *tracer, *tracee; int error; - tracer = begin_current_label_crit_section(); + tracer = __begin_current_label_crit_section(); tracee = aa_get_task_label(child); error = aa_may_ptrace(tracer, tracee, mode == PTRACE_MODE_READ ? AA_PTRACE_READ : AA_PTRACE_TRACE); aa_put_label(tracee); - end_current_label_crit_section(tracer); + __end_current_label_crit_section(tracer); return error; } From 7f4a33710523bb7be8b2d5c3c64ee737b957021f Mon Sep 17 00:00:00 2001 From: Jitendra Bhivare Date: Tue, 28 Aug 2018 10:22:58 -0700 Subject: [PATCH 2436/3715] PCI: iproc: Remove PAXC slot check to allow VF support [ Upstream commit 4da6b4480766e5bc9c4d7bc14bf1d0939a1a5fa7 ] Fix previous incorrect logic that limits PAXC slot number to zero only. In order for SRIOV/VF to work, we need to allow the slot number to be greater than zero. Fixes: 46560388c476c ("PCI: iproc: Allow multiple devices except on PAXC") Signed-off-by: Jitendra Bhivare Signed-off-by: Ray Jui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Andy Gospodarek Signed-off-by: Sasha Levin --- drivers/pci/host/pcie-iproc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index c0ecc9f35667..8f8dac0155d6 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -573,14 +573,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, return (pcie->base + offset); } - /* - * PAXC is connected to an internally emulated EP within the SoC. It - * allows only one device. - */ - if (pcie->ep_is_internal) - if (slot > 0) - return NULL; - return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); } From 0442e745fbbc4678fbd341ebe9876b4041d7fd12 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 22 Sep 2018 01:25:26 +0800 Subject: [PATCH 2437/3715] drm/hisilicon: hibmc: Don't overwrite fb helper surface depth [ Upstream commit 0ff9f49646353ce31312411e7e7bd2281492a40e ] Currently the driver overwrites the surface depth provided by the fb helper to give an invalid bpp/surface depth combination. This has been exposed by commit 70109354fed2 ("drm: Reject unknown legacy bpp and depth for drm_mode_addfb ioctl"), which now causes the driver to fail to probe. Fix by not overwriting the surface depth. Fixes: d1667b86795a ("drm/hisilicon/hibmc: Add support for frame buffer") Signed-off-by: John Garry Reviewed-by: Xinliang Liu Signed-off-by: Xinliang Liu Signed-off-by: Sasha Levin --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c index 8bd29075ae4e..edcca1761500 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c @@ -71,7 +71,6 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper, DRM_DEBUG_DRIVER("surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, sizes->surface_bpp); - sizes->surface_depth = 32; bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8); From 19f75f6e5d3df311c0845c88cb60143f1621febd Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 30 Sep 2018 01:57:42 -0400 Subject: [PATCH 2438/3715] IB/rxe: replace kvfree with vfree [ Upstream commit 721ad7e643f7002efa398838693f90284ea216d1 ] The buf is allocated by vmalloc_user in the function rxe_queue_init. So it is better to free it by vfree. Fixes: 8700e3e7c485 ("Soft RoCE driver") Reviewed-by: Leon Romanovsky Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_cq.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_qp.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index c4aabf78dc90..f6e036ded046 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -30,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - +#include #include "rxe.h" #include "rxe_loc.h" #include "rxe_queue.h" @@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); if (err) { - kvfree(cq->queue->buf); + vfree(cq->queue->buf); kfree(cq->queue); return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index aeea994b04c4..25055a68a2c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "rxe.h" #include "rxe_loc.h" @@ -255,7 +256,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->sq.queue->buf_size, &qp->sq.queue->ip); if (err) { - kvfree(qp->sq.queue->buf); + vfree(qp->sq.queue->buf); kfree(qp->sq.queue); return err; } @@ -308,7 +309,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, qp->rq.queue->buf_size, &qp->rq.queue->ip); if (err) { - kvfree(qp->rq.queue->buf); + vfree(qp->rq.queue->buf); kfree(qp->rq.queue); return err; } From 805d9b456cea5b0c09e2a2cab291bd6291f8ed85 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Wed, 26 Sep 2018 10:56:03 -0700 Subject: [PATCH 2439/3715] IB/hfi1: Add mtu check for operational data VLs [ Upstream commit eb50130964e8c1379f37c3d3bab33a411ec62e98 ] Since Virtual Lanes BCT credits and MTU are set through separate MADs, we have to ensure both are valid, and data VLs are ready for transmission before we allow port transition to Armed state. Fixes: 5e2d6764a729 ("IB/hfi1: Verify port data VLs credits on transition to Armed") Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/chip.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9f78bb07744c..4a0b7c003477 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10552,12 +10552,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, } } -/* - * Verify if BCT for data VLs is non-zero. +/** + * data_vls_operational() - Verify if data VL BCT credits and MTU + * are both set. + * @ppd: pointer to hfi1_pportdata structure + * + * Return: true - Ok, false -otherwise. */ static inline bool data_vls_operational(struct hfi1_pportdata *ppd) { - return !!ppd->actual_vls_operational; + int i; + u64 reg; + + if (!ppd->actual_vls_operational) + return false; + + for (i = 0; i < ppd->vls_supported; i++) { + reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i)); + if ((reg && !ppd->dd->vld[i].mtu) || + (!reg && ppd->dd->vld[i].mtu)) + return false; + } + + return true; } /* @@ -10662,7 +10679,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (!data_vls_operational(ppd)) { dd_dev_err(dd, - "%s: data VLs not operational\n", __func__); + "%s: Invalid data VL credits or mtu\n", + __func__); ret = -EINVAL; break; } From 48eba57c43a1ee1810a257152c89cf7dfa812d56 Mon Sep 17 00:00:00 2001 From: Nicolas Huaman Date: Thu, 4 Oct 2018 16:42:05 +0200 Subject: [PATCH 2440/3715] ALSA: usb-audio: update quirk for B&W PX to remove microphone [ Upstream commit c369c8db15d51fa175d2ba85928f79d16af6b562 ] A quirk in snd-usb-audio was added to automate setting sample rate to 4800k and remove the previously exposed nonfunctional microphone for the Bowers & Wilkins PX: commit 240a8af929c7c57dcde28682725b29cf8474e8e5 https://lore.kernel.org/patchwork/patch/919689/ However the headphones where updated shortly after that to remove the unintentional microphone functionality. I guess because of this the headphones now crash when connecting them via USB while the quirk is active. Dmesg: snd-usb-audio: probe of 2-3:1.0 failed with error -22 usb 2-3: 2:1: cannot get min/max values for control 2 (id 2) This patch removes the microfone and allows the headphones to connect and work out of the box. It is based on the current mainline kernel and successfully applied an tested on my machine (4.18.10.arch1-1). Fixes: 240a8af929c7 ("ALSA: usb-audio: Add a quirck for B&W PX headphones") Signed-off-by: Nicolas Huaman Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index d32727c74a16..c892b4d1e733 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3293,19 +3293,14 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .ifnum = 0, .type = QUIRK_AUDIO_STANDARD_MIXER, }, - /* Capture */ - { - .ifnum = 1, - .type = QUIRK_IGNORE_INTERFACE, - }, /* Playback */ { - .ifnum = 2, + .ifnum = 1, .type = QUIRK_AUDIO_FIXED_ENDPOINT, .data = &(const struct audioformat) { .formats = SNDRV_PCM_FMTBIT_S16_LE, .channels = 2, - .iface = 2, + .iface = 1, .altsetting = 1, .altset_idx = 1, .attributes = UAC_EP_CS_ATTR_FILL_MAX | From 27db95dfd2045e39226feba0610f67063f8f6e82 Mon Sep 17 00:00:00 2001 From: "Spencer E. Olson" Date: Wed, 3 Oct 2018 14:54:16 -0600 Subject: [PATCH 2441/3715] staging: comedi: ni_mio_common: protect register write overflow [ Upstream commit 1cbca5852d6c16e85a21487a15d211195aacd4a1 ] Fixes two problems introduced as early as commit 03aef4b6dc12 ("Staging: comedi: add ni_mio_common code"): (1) Ensures that the last four bits of NISTC_RTSI_TRIGB_OUT_REG register is not unduly overwritten on e-series devices. On e-series devices, the first three of the last four bits are reserved. The last bit defines the output selection of the RGOUT0 pin, otherwise known as RTSI_Sub_Selection. For m-series devices, these last four bits are indeed used as the output selection of the RTSI7 pin (and the RTSI_Sub_Selection bit for the RGOUT0 pin is moved to the RTSI_Trig_Direction register. (2) Allows all 4 RTSI_BRD lines to be treated as valid sources for RTSI lines. This patch also cleans up the ni_get_rtsi_routing command for readability. Fixes: 03aef4b6dc12 ("Staging: comedi: add ni_mio_common code") Signed-off-by: Spencer E. Olson Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- .../staging/comedi/drivers/ni_mio_common.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 36361bdf934a..2f82dcb1fd06 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -4991,7 +4991,10 @@ static int ni_valid_rtsi_output_source(struct comedi_device *dev, case NI_RTSI_OUTPUT_G_SRC0: case NI_RTSI_OUTPUT_G_GATE0: case NI_RTSI_OUTPUT_RGOUT0: - case NI_RTSI_OUTPUT_RTSI_BRD_0: + case NI_RTSI_OUTPUT_RTSI_BRD(0): + case NI_RTSI_OUTPUT_RTSI_BRD(1): + case NI_RTSI_OUTPUT_RTSI_BRD(2): + case NI_RTSI_OUTPUT_RTSI_BRD(3): return 1; case NI_RTSI_OUTPUT_RTSI_OSC: return (devpriv->is_m_series) ? 1 : 0; @@ -5012,11 +5015,18 @@ static int ni_set_rtsi_routing(struct comedi_device *dev, devpriv->rtsi_trig_a_output_reg |= NISTC_RTSI_TRIG(chan, src); ni_stc_writew(dev, devpriv->rtsi_trig_a_output_reg, NISTC_RTSI_TRIGA_OUT_REG); - } else if (chan < 8) { + } else if (chan < NISTC_RTSI_TRIG_NUM_CHAN(devpriv->is_m_series)) { devpriv->rtsi_trig_b_output_reg &= ~NISTC_RTSI_TRIG_MASK(chan); devpriv->rtsi_trig_b_output_reg |= NISTC_RTSI_TRIG(chan, src); ni_stc_writew(dev, devpriv->rtsi_trig_b_output_reg, NISTC_RTSI_TRIGB_OUT_REG); + } else if (chan != NISTC_RTSI_TRIG_OLD_CLK_CHAN) { + /* probably should never reach this, since the + * ni_valid_rtsi_output_source above errors out if chan is too + * high + */ + dev_err(dev->class_dev, "%s: unknown rtsi channel\n", __func__); + return -EINVAL; } return 2; } @@ -5032,12 +5042,12 @@ static unsigned int ni_get_rtsi_routing(struct comedi_device *dev, } else if (chan < NISTC_RTSI_TRIG_NUM_CHAN(devpriv->is_m_series)) { return NISTC_RTSI_TRIG_TO_SRC(chan, devpriv->rtsi_trig_b_output_reg); - } else { - if (chan == NISTC_RTSI_TRIG_OLD_CLK_CHAN) - return NI_RTSI_OUTPUT_RTSI_OSC; - dev_err(dev->class_dev, "bug! should never get here?\n"); - return 0; + } else if (chan == NISTC_RTSI_TRIG_OLD_CLK_CHAN) { + return NI_RTSI_OUTPUT_RTSI_OSC; } + + dev_err(dev->class_dev, "%s: unknown rtsi channel\n", __func__); + return -EINVAL; } static int ni_rtsi_insn_config(struct comedi_device *dev, From bfd5e35ba3b4c33dd846d521fdf7933a52c8c543 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 12 Oct 2018 12:12:28 +0200 Subject: [PATCH 2442/3715] pwm: lpss: Release runtime-pm reference from the driver's remove callback [ Upstream commit 42885551cedb45961879d2fc3dc3c4dc545cc23e ] For each pwm output which gets enabled through pwm_lpss_apply(), we do a pm_runtime_get_sync(). This commit adds pm_runtime_put() calls to pwm_lpss_remove() to balance these when the driver gets removed with some of the outputs still enabled. Fixes: f080be27d7d9 ("pwm: lpss: Add support for runtime PM") Acked-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-lpss.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 1e69c1c9ec09..7a4a6406cf69 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -216,6 +216,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe); int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) { + int i; + + for (i = 0; i < lpwm->info->npwm; i++) { + if (pwm_is_enabled(&lpwm->chip.pwms[i])) + pm_runtime_put(lpwm->chip.dev); + } return pwmchip_remove(&lpwm->chip); } EXPORT_SYMBOL_GPL(pwm_lpss_remove); From 5a5aee840210828e511cf45a94692d2854836361 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sun, 21 Oct 2018 18:34:46 +0200 Subject: [PATCH 2443/3715] drm/sun4i: hdmi: Fix double flag assignation [ Upstream commit 1e0ff648940e603cab6c52cf3723017d30d78f30 ] The is_double flag is a boolean currently assigned to the value of the d variable, that is either 1 or 2. It means that this is_double variable is always set to true, even though the initial intent was to have it set to true when d is 2. Fix this. Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Reported-by: Dan Carpenter Signed-off-by: Maxime Ripard Reviewed-by: Giulio Benetti Link: https://patchwork.freedesktop.org/patch/msgid/20181021163446.29135-2-maxime.ripard@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index 5cf2527bffc8..d7a8fea94557 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -50,7 +50,7 @@ static unsigned long sun4i_tmds_calc_divider(unsigned long rate, (rate - tmp_rate) < (rate - best_rate)) { best_rate = tmp_rate; best_m = m; - is_double = d; + is_double = (d == 2) ? true : false; } } } From b92c7db5c89f62989cdd5049b7f4a9b83082324f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 31 Oct 2018 09:56:42 +0000 Subject: [PATCH 2444/3715] mlxsw: reg: QEEC: Add minimum shaper fields [ Upstream commit 8b931821aa04823e2e5df0ae93937baabbd23286 ] Add QEEC.mise (minimum shaper enable) and QEEC.min_shaper_rate to enable configuration of minimum shaper. Increase the QEEC length to 0x20 as well: that's the length that the register has had for a long time now, but with the configurations that mlxsw typically exercises, the firmware tolerated 0x1C-sized packets. With mise=true however, FW rejects packets unless they have the full required length. Fixes: b9b7cee40579 ("mlxsw: reg: Add QoS ETS Element Configuration register") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8ab7a4f98a07..e7974ba06432 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2452,7 +2452,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, * Configures the ETS elements. */ #define MLXSW_REG_QEEC_ID 0x400D -#define MLXSW_REG_QEEC_LEN 0x1C +#define MLXSW_REG_QEEC_LEN 0x20 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); @@ -2494,6 +2494,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); */ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); +/* reg_qeec_mise + * Min shaper configuration enable. Enables configuration of the min + * shaper on this ETS element + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -2510,6 +2519,17 @@ enum { */ MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); +/* The smallest permitted min shaper rate. */ +#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */ + +/* reg_qeec_min_shaper_rate + * Min shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); + /* reg_qeec_mase * Max shaper configuration enable. Enables configuration of the max * shaper on this ETS element. From e3708b8b2762296d7dde4fcac127c5bfffc3b8e5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Aug 2018 17:13:06 -0500 Subject: [PATCH 2445/3715] NTB: ntb_hw_idt: replace IS_ERR_OR_NULL with regular NULL checks [ Upstream commit 1b7619828d0c341612f58683e73f279c37e70bbc ] Both devm_kcalloc() and devm_kzalloc() return NULL on error. They never return error pointers. The use of IS_ERR_OR_NULL is currently applied to the wrong context. Fix this by replacing IS_ERR_OR_NULL with regular NULL checks. Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/idt/ntb_hw_idt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index d44d7ef38fe8..b68e2cad74cc 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -1105,9 +1105,9 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, } /* Allocate memory for memory window descriptors */ - ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, - sizeof(*ret_mws), GFP_KERNEL); - if (IS_ERR_OR_NULL(ret_mws)) + ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws), + GFP_KERNEL); + if (!ret_mws) return ERR_PTR(-ENOMEM); /* Copy the info of detected memory windows */ @@ -2393,7 +2393,7 @@ static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev, /* Allocate memory for the IDT PCIe-device descriptor */ ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL); - if (IS_ERR_OR_NULL(ndev)) { + if (!ndev) { dev_err(&pdev->dev, "Memory allocation failed for descriptor"); return ERR_PTR(-ENOMEM); } From 8a0c3bc2cf9e08a6abed982fdeba0e8aca48fbe6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 27 Oct 2018 15:49:26 +0100 Subject: [PATCH 2446/3715] pcrypt: use format specifier in kobject_add [ Upstream commit b1e3874c75ab15288f573b3532e507c37e8e7656 ] Passing string 'name' as the format specifier is potentially hazardous because name could (although very unlikely to) have a format specifier embedded in it causing issues when parsing the non-existent arguments to these. Follow best practice by using the "%s" format string for the string 'name'. Cleans up clang warning: crypto/pcrypt.c:397:40: warning: format string is not a string literal (potentially insecure) [-Wformat-security] Fixes: a3fb1e330dd2 ("pcrypt: Added sysfs interface to pcrypt") Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/pcrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8ec3d4ba4a8..a5718c0a3dc4 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -394,7 +394,7 @@ static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name) int ret; pinst->kobj.kset = pcrypt_kset; - ret = kobject_add(&pinst->kobj, NULL, name); + ret = kobject_add(&pinst->kobj, NULL, "%s", name); if (!ret) kobject_uevent(&pinst->kobj, KOBJ_ADD); From b6e209a13a61b65848e214319e3348e5e23346cb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 19 Nov 2018 11:32:41 +0800 Subject: [PATCH 2447/3715] exportfs: fix 'passing zero to ERR_PTR()' warning [ Upstream commit 909e22e05353a783c526829427e9a8de122fba9c ] Fix a static code checker warning: fs/exportfs/expfs.c:171 reconnect_one() warn: passing zero to 'ERR_PTR' The error path for lookup_one_len_unlocked failure should set err to PTR_ERR. Fixes: bbf7a8a3562f ("exportfs: move most of reconnect_path to helper function") Signed-off-by: YueHaibing Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exportfs/expfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a561ae17cf43..c08960040dd0 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -147,6 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + err = PTR_ERR(tmp); goto out_err; } if (tmp != dentry) { From fc27e03fc4769daeeb17947f99001eb94221922c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 13 Nov 2018 17:46:14 -0500 Subject: [PATCH 2448/3715] drm/dp_mst: Skip validating ports during destruction, just ref [ Upstream commit c54c7374ff44de5e609506aca7c0deae4703b6d1 ] Jerry Zuo pointed out a rather obscure hotplugging issue that it seems I accidentally introduced into DRM two years ago. Pretend we have a topology like this: |- DP-1: mst_primary |- DP-4: active display |- DP-5: disconnected |- DP-6: active hub |- DP-7: active display |- DP-8: disconnected |- DP-9: disconnected If we unplug DP-6, the topology starting at DP-7 will be destroyed but it's payloads will live on in DP-1's VCPI allocations and thus require removal. However, this removal currently fails because drm_dp_update_payload_part1() will (rightly so) try to validate the port before accessing it, fail then abort. If we keep going, eventually we run the MST hub out of bandwidth and all new allocations will start to fail (or in my case; all new displays just start flickering a ton). We could just teach drm_dp_update_payload_part1() not to drop the port ref in this case, but then we also need to teach drm_dp_destroy_payload_step1() to do the same thing, then hope no one ever adds anything to the that requires a validated port reference in drm_dp_destroy_connector_work(). Kind of sketchy. So let's go with a more clever solution: any port that drm_dp_destroy_connector_work() interacts with is guaranteed to still exist in memory until we say so. While said port might not be valid we don't really care: that's the whole reason we're destroying it in the first place! So, teach drm_dp_get_validated_port_ref() to use the all mighty current_work() function to avoid attempting to validate ports from the context of mgr->destroy_connector_work. I can't see any situation where this wouldn't be safe, and this avoids having to play whack-a-mole in the future of trying to work around port validation. Signed-off-by: Lyude Paul Fixes: 263efde31f97 ("drm/dp/mst: Get validated port ref in drm_dp_update_payload_part1()") Reported-by: Jerry Zuo Cc: Jerry Zuo Cc: Harry Wentland Cc: # v4.6+ Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20181113224613.28809-1-lyude@redhat.com Signed-off-by: Sean Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c8c83f84aced..9d94c306c8ca 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -982,9 +982,20 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_ static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port) { struct drm_dp_mst_port *rport = NULL; + mutex_lock(&mgr->lock); - if (mgr->mst_primary) - rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port); + /* + * Port may or may not be 'valid' but we don't care about that when + * destroying the port and we are guaranteed that the port pointer + * will be valid until we've finished + */ + if (current_work() == &mgr->destroy_connector_work) { + kref_get(&port->kref); + rport = port; + } else if (mgr->mst_primary) { + rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, + port); + } mutex_unlock(&mgr->lock); return rport; } From 9b1cee5e5f1fb7d9938066acd36f8be60e017742 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 28 Nov 2018 09:02:41 +0000 Subject: [PATCH 2449/3715] net: phy: Fix not to call phy_resume() if PHY is not attached [ Upstream commit ef1b5bf506b1f0ee3edc98533e1f3ecb105eb46a ] This patch fixes an issue that mdio_bus_phy_resume() doesn't call phy_resume() if the PHY is not attached. Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a98c227a4c2e..99dae55cd334 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -76,7 +76,7 @@ static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); #ifdef CONFIG_PM -static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) +static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) { struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); @@ -88,10 +88,11 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) /* PHY not attached? May suspend if the PHY has not already been * suspended as part of a prior call to phy_disconnect() -> * phy_detach() -> phy_suspend() because the parent netdev might be the - * MDIO bus driver and clock gated at this point. + * MDIO bus driver and clock gated at this point. Also may resume if + * PHY is not attached. */ if (!netdev) - return !phydev->suspended; + return suspend ? !phydev->suspended : phydev->suspended; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -121,7 +122,7 @@ static int mdio_bus_phy_suspend(struct device *dev) if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); - if (!mdio_bus_phy_may_suspend(phydev)) + if (!mdio_bus_phy_may_suspend(phydev, true)) return 0; return phy_suspend(phydev); @@ -132,7 +133,7 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!mdio_bus_phy_may_suspend(phydev, false)) goto no_resume; ret = phy_resume(phydev); From c7c34c31038087b6ba47c193f38a055d3adcd71d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 9 Dec 2018 15:53:49 +0200 Subject: [PATCH 2450/3715] IB/rxe: Fix incorrect cache cleanup in error flow [ Upstream commit 6db21d8986e14e2e86573a3b055b05296188bd2c ] Array iterator stays at the same slot, fix it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yuval Shaia Reviewed-by: Bart Van Assche Reviewed-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_pool.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b4a8acc7bb7d..0e2425f28233 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) return rxe_type_info[pool->type].cache; } +static void rxe_cache_clean(size_t cnt) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < cnt; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + int rxe_cache_init(void) { int err; @@ -136,24 +148,14 @@ int rxe_cache_init(void) return 0; err1: - while (--i >= 0) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(i); return err; } void rxe_cache_exit(void) { - int i; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(RXE_NUM_TYPES); } static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) From 0c52e16253a5d6d810696a647107ea55909708f2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 6 Dec 2018 19:28:51 +0100 Subject: [PATCH 2451/3715] staging: bcm2835-camera: Abort probe if there is no camera [ Upstream commit 7566f39dfdc11f8a97d5810c6e6295a88f97ef91 ] Abort the probing of the camera driver in case there isn't a camera actually connected to the Raspberry Pi. This solution also avoids a NULL ptr dereference of mmal instance on driver unload. Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Signed-off-by: Stefan Wahren Reviewed-by: Nicolas Saenz Julienne Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- .../vc04_services/bcm2835-camera/bcm2835-camera.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index 377da037f31c..b521752d9aa0 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -1849,6 +1849,12 @@ static int __init bm2835_mmal_init(void) num_cameras = get_num_cameras(instance, resolutions, MAX_BCM2835_CAMERAS); + + if (num_cameras < 1) { + ret = -ENODEV; + goto cleanup_mmal; + } + if (num_cameras > MAX_BCM2835_CAMERAS) num_cameras = MAX_BCM2835_CAMERAS; @@ -1948,6 +1954,9 @@ cleanup_gdev: pr_info("%s: error %d while loading driver\n", BM2835_MMAL_MODULE_NAME, ret); +cleanup_mmal: + vchiq_mmal_finalise(instance); + return ret; } From 336384d871a9797a620063158c3d80988ea0cc47 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Mon, 10 Dec 2018 17:12:20 +0800 Subject: [PATCH 2452/3715] switchtec: Remove immediate status check after submitting MRPC command [ Upstream commit 526180408b815aa7b96fd48bd23cdd33ef04e38e ] After submitting a Firmware Download MRPC command, Switchtec firmware will delay Management EP BAR MemRd TLP responses by more than 10ms. This is a firmware limitation. Delayed MemRd completions are a problem for systems with a low Completion Timeout (CTO). The current driver checks the MRPC status immediately after submitting an MRPC command, which results in a delayed MemRd completion that may cause a Completion Timeout. Remove the immediate status check and rely on the check after receiving an interrupt or timing out. This is only a software workaround to the READ issue and a proper fix of this should be done in firmware. Fixes: 080b47def5e5 ("MicroSemi Switchtec management interface driver") Signed-off-by: Kelvin Cao Signed-off-by: Wesley Sheng Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 0941555b84a5..73dba2739849 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -399,10 +399,6 @@ static void mrpc_cmd_submit(struct switchtec_dev *stdev) stuser->data, stuser->data_len); iowrite32(stuser->cmd, &stdev->mmio_mrpc->cmd); - stuser->status = ioread32(&stdev->mmio_mrpc->status); - if (stuser->status != SWITCHTEC_MRPC_STATUS_INPROGRESS) - mrpc_complete_cmd(stdev); - schedule_delayed_work(&stdev->mrpc_timeout, msecs_to_jiffies(500)); } From da93a64fe66594d356097b09d590cfe188117222 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 10:57:27 +0100 Subject: [PATCH 2453/3715] pinctrl: sh-pfc: r8a7740: Add missing REF125CK pin to gether_gmii group [ Upstream commit 1ebc589a7786f17f97b9e87b44e0fb4d0290d8f8 ] The gether_gmii_mux[] array contains the REF125CK pin mark, but the gether_gmii_pins[] array lacks the corresponding pin number. Fixes: bae11d30d0cafdc5 ("sh-pfc: r8a7740: Add GETHER pin groups and functions") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7740.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c index 35f436bcb849..d8077065636e 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c @@ -1982,7 +1982,7 @@ static const unsigned int gether_gmii_pins[] = { */ 185, 186, 187, 188, 189, 190, 191, 192, 174, 161, 204, 171, 170, 169, 168, 167, 166, 173, 172, 176, 184, 183, 203, - 205, 163, 206, 207, + 205, 163, 206, 207, 158, }; static const unsigned int gether_gmii_mux[] = { ET_ERXD0_MARK, ET_ERXD1_MARK, ET_ERXD2_MARK, ET_ERXD3_MARK, From da8432d949db61082b75154272e37dbafa29e4d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 11:00:27 +0100 Subject: [PATCH 2454/3715] pinctrl: sh-pfc: r8a7740: Add missing LCD0 marks to lcd0_data24_1 group [ Upstream commit 96bb2a6ab4eca10e5b6490b3f0738e9f7ec22c2b ] The lcd0_data24_1_pins[] array contains the LCD0 D1[2-5] pin numbers, but the lcd0_data24_1_mux[] array lacks the corresponding pin marks. Fixes: 06c7dd866da70f6c ("sh-pfc: r8a7740: Add LCDC0 and LCDC1 pin groups and functions") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7740.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c index d8077065636e..e9739dbcb356 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c @@ -2154,6 +2154,7 @@ static const unsigned int lcd0_data24_1_mux[] = { LCD0_D0_MARK, LCD0_D1_MARK, LCD0_D2_MARK, LCD0_D3_MARK, LCD0_D4_MARK, LCD0_D5_MARK, LCD0_D6_MARK, LCD0_D7_MARK, LCD0_D8_MARK, LCD0_D9_MARK, LCD0_D10_MARK, LCD0_D11_MARK, + LCD0_D12_MARK, LCD0_D13_MARK, LCD0_D14_MARK, LCD0_D15_MARK, LCD0_D16_MARK, LCD0_D17_MARK, LCD0_D18_PORT163_MARK, LCD0_D19_PORT162_MARK, LCD0_D20_PORT161_MARK, LCD0_D21_PORT158_MARK, LCD0_D22_PORT160_MARK, LCD0_D23_PORT159_MARK, From 550ba4ad0da3d94a0ee5a1016e23467f854ba603 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 11:05:57 +0100 Subject: [PATCH 2455/3715] pinctrl: sh-pfc: r8a7791: Remove bogus ctrl marks from qspi_data4_b group [ Upstream commit 884fa25fb6e5e63ab970d612a628313bb68f37cc ] The qspi_data4_b_mux[] array contains pin marks for the clock and chip select pins. The qspi_data4_b_pins[] array rightfully does not contain the corresponding pin numbers, as the control pins are provided by a separate group (qspi_ctrl_b). Fixes: 2d0c386f135e4186 ("pinctrl: sh-pfc: r8a7791: Add QSPI pin groups") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7791.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index c01ef02d326b..8600ba82f59c 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -3220,8 +3220,7 @@ static const unsigned int qspi_data4_b_pins[] = { RCAR_GP_PIN(6, 4), }; static const unsigned int qspi_data4_b_mux[] = { - SPCLK_B_MARK, MOSI_IO0_B_MARK, MISO_IO1_B_MARK, - IO2_B_MARK, IO3_B_MARK, SSL_B_MARK, + MOSI_IO0_B_MARK, MISO_IO1_B_MARK, IO2_B_MARK, IO3_B_MARK, }; /* - SCIF0 ------------------------------------------------------------------ */ static const unsigned int scif0_data_pins[] = { From c6e7548315d703b5194f793c346c7fd93bd0d42f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 11:12:20 +0100 Subject: [PATCH 2456/3715] pinctrl: sh-pfc: r8a7791: Remove bogus marks from vin1_b_data18 group [ Upstream commit 0d6256cb880166a4111bebce35790019e56b6e1b ] The vin1_b_data18_mux[] arrays contains pin marks for the 2 LSB bits of the color components. The vin1_b_data18_pins[] array rightfully does not include the corresponding pin numbers, as RGB18 is subset of RGB24, containing only the 6 MSB bits of each component. Fixes: 8e32c9671f84acd8 ("pinctrl: sh-pfc: r8a7791: Add VIN pins") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7791.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index 8600ba82f59c..d34982ea66bf 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -4348,17 +4348,14 @@ static const unsigned int vin1_b_data18_pins[] = { }; static const unsigned int vin1_b_data18_mux[] = { /* B */ - VI1_DATA0_B_MARK, VI1_DATA1_B_MARK, VI1_DATA2_B_MARK, VI1_DATA3_B_MARK, VI1_DATA4_B_MARK, VI1_DATA5_B_MARK, VI1_DATA6_B_MARK, VI1_DATA7_B_MARK, /* G */ - VI1_G0_B_MARK, VI1_G1_B_MARK, VI1_G2_B_MARK, VI1_G3_B_MARK, VI1_G4_B_MARK, VI1_G5_B_MARK, VI1_G6_B_MARK, VI1_G7_B_MARK, /* R */ - VI1_R0_B_MARK, VI1_R1_B_MARK, VI1_R2_B_MARK, VI1_R3_B_MARK, VI1_R4_B_MARK, VI1_R5_B_MARK, VI1_R6_B_MARK, VI1_R7_B_MARK, From 7dcf563d22795fef5d4a3476bed486fc1f013efe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 11:20:14 +0100 Subject: [PATCH 2457/3715] pinctrl: sh-pfc: sh73a0: Add missing TO pin to tpu4_to3 group [ Upstream commit 124cde98f856b6206b804acbdec3b7c80f8c3427 ] The tpu4_to3_mux[] array contains the TPU4TO3 pin mark, but the tpu4_to3_pins[] array lacks the corresponding pin number. Add the missing pin number, for non-GPIO pin F26. Fixes: 5da4eb049de803c7 ("sh-pfc: sh73a0: Add TPU pin groups and functions") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh73a0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c index d25e6f674d0a..f8fbedb46585 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c @@ -3086,6 +3086,7 @@ static const unsigned int tpu4_to2_mux[] = { }; static const unsigned int tpu4_to3_pins[] = { /* TO */ + PIN_NUMBER(6, 26), }; static const unsigned int tpu4_to3_mux[] = { TPU4TO3_MARK, From 8c387ed6cc6d234d3c0eafa9ecadaaac6aa27b30 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 14:21:16 +0100 Subject: [PATCH 2458/3715] pinctrl: sh-pfc: r8a7794: Remove bogus IPSR9 field [ Upstream commit 6a6c195d98a1a5e70faa87f594d7564af1dd1bed ] The Peripheral Function Select Register 9 contains 12 fields, but the variable field descriptor contains a 13th bogus field of 3 bits. Fixes: 43c4436e2f1890a7 ("pinctrl: sh-pfc: add R8A7794 PFC support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7794.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c index a0ed220071f5..93bdd3e8fb67 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c @@ -4742,7 +4742,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_AVB_MDC, FN_SSI_SDATA6_B, 0, 0, } }, { PINMUX_CFG_REG_VAR("IPSR9", 0xE6060044, 32, - 1, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3) { + 1, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3) { /* IP9_31 [1] */ 0, 0, /* IP9_30_28 [3] */ From 7fb9bf82e2a0e8271110498a029f8092bc2debc4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Dec 2018 14:42:16 +0100 Subject: [PATCH 2459/3715] pinctrl: sh-pfc: sh7734: Add missing IPSR11 field [ Upstream commit 94482af7055e1ffa211c1135256b85590ebcac99 ] The Peripheral Function Select Register 11 contains 3 reserved bits and 15 variable-width fields, but the variable field descriptor does not contain the 3-bit field IP11[25:23]. Fixes: 856cb4bb337ee504 ("sh: Add support pinmux for SH7734") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7734.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c index 3eccc9b3ca84..05ccb27f7781 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c @@ -2237,7 +2237,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_LCD_DATA15_B, 0, 0, 0 } }, { PINMUX_CFG_REG_VAR("IPSR11", 0xFFFC0048, 32, - 3, 1, 2, 2, 2, 3, 3, 1, 2, 3, 3, 1, 1, 1, 1) { + 3, 1, 2, 3, 2, 2, 3, 3, 1, 2, 3, 3, 1, 1, 1, 1) { /* IP11_31_29 [3] */ 0, 0, 0, 0, 0, 0, 0, 0, /* IP11_28 [1] */ From 5a442c5c032d6afcc26225ef5180b8ac4c85696e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Dec 2018 13:59:42 +0100 Subject: [PATCH 2460/3715] pinctrl: sh-pfc: r8a77995: Remove bogus SEL_PWM[0-3]_3 configurations [ Upstream commit e28dc3f09c9d2555a9bd982f0847988591052226 ] While the SEL_PWM[0-3] fields in the Module Select Register 0 support 4 possible configurations per PWM pin, only the first 3 are valid. Replace the invalid and unused configurations for SEL_PWM[0-3]_3 by dummies. Fixes: 794a6711764658a1 ("pinctrl: sh-pfc: Initial R8A77995 PFC support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a77995.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a77995.c b/drivers/pinctrl/sh-pfc/pfc-r8a77995.c index 4f5ee1d7317d..36421df1b326 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a77995.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a77995.c @@ -391,10 +391,10 @@ FM(IP12_31_28) IP12_31_28 \ #define MOD_SEL0_27 FM(SEL_MSIOF3_0) FM(SEL_MSIOF3_1) #define MOD_SEL0_26 FM(SEL_HSCIF3_0) FM(SEL_HSCIF3_1) #define MOD_SEL0_25 FM(SEL_SCIF4_0) FM(SEL_SCIF4_1) -#define MOD_SEL0_24_23 FM(SEL_PWM0_0) FM(SEL_PWM0_1) FM(SEL_PWM0_2) FM(SEL_PWM0_3) -#define MOD_SEL0_22_21 FM(SEL_PWM1_0) FM(SEL_PWM1_1) FM(SEL_PWM1_2) FM(SEL_PWM1_3) -#define MOD_SEL0_20_19 FM(SEL_PWM2_0) FM(SEL_PWM2_1) FM(SEL_PWM2_2) FM(SEL_PWM2_3) -#define MOD_SEL0_18_17 FM(SEL_PWM3_0) FM(SEL_PWM3_1) FM(SEL_PWM3_2) FM(SEL_PWM3_3) +#define MOD_SEL0_24_23 FM(SEL_PWM0_0) FM(SEL_PWM0_1) FM(SEL_PWM0_2) F_(0, 0) +#define MOD_SEL0_22_21 FM(SEL_PWM1_0) FM(SEL_PWM1_1) FM(SEL_PWM1_2) F_(0, 0) +#define MOD_SEL0_20_19 FM(SEL_PWM2_0) FM(SEL_PWM2_1) FM(SEL_PWM2_2) F_(0, 0) +#define MOD_SEL0_18_17 FM(SEL_PWM3_0) FM(SEL_PWM3_1) FM(SEL_PWM3_2) F_(0, 0) #define MOD_SEL0_15 FM(SEL_IRQ_0_0) FM(SEL_IRQ_0_1) #define MOD_SEL0_14 FM(SEL_IRQ_1_0) FM(SEL_IRQ_1_1) #define MOD_SEL0_13 FM(SEL_IRQ_2_0) FM(SEL_IRQ_2_1) From d2e6f04f6ee3fe5e4f72eb1763f228c4a0c2c701 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Dec 2018 14:27:56 +0100 Subject: [PATCH 2461/3715] pinctrl: sh-pfc: sh7269: Add missing PCIOR0 field [ Upstream commit 9540cbdfcd861caf67a6f0e4bb7f46d41c4aad86 ] The Port C I/O Register 0 contains 7 reserved bits, but the descriptor contains only dummy configuration values for 6 reserved bits, thus breaking the configuration of all subsequent fields in the register. Fix this by adding the two missing configuration values. Fixes: f5e811f2a43117b2 ("sh-pfc: Add sh7269 pinmux support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7269.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index a50d22bef1f4..cfdb4fc177c3 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -2119,7 +2119,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { }, { PINMUX_CFG_REG("PCIOR0", 0xfffe3852, 16, 1) { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PC8_IN, PC8_OUT, PC7_IN, PC7_OUT, PC6_IN, PC6_OUT, From 311550b992d4694a223a7f0c2a5aaeac00f4249d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Dec 2018 14:32:34 +0100 Subject: [PATCH 2462/3715] pinctrl: sh-pfc: sh7734: Remove bogus IPSR10 value [ Upstream commit 4d374bacd7c9665179f9752a52d5d602c45d8190 ] The IP10[5:3] field in Peripheral Function Select Register 10 has a width of 3 bits, i.e. it allows programming one out of 8 different configurations. However, 9 values are provided instead of 8, overflowing into the subsequent field in the register, and thus breaking the configuration of the latter. Fix this by dropping a bogus zero value. Fixes: ac1ebc2190f575fc ("sh-pfc: Add sh7734 pinmux support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7734.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c index 05ccb27f7781..c691e5e9d9de 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c @@ -2231,7 +2231,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_LCD_CL1_B, 0, 0, /* IP10_5_3 [3] */ FN_SSI_WS23, FN_VI1_5_B, FN_TX1_D, FN_HSCK0_C, FN_FALE_B, - FN_LCD_DON_B, 0, 0, 0, + FN_LCD_DON_B, 0, 0, /* IP10_2_0 [3] */ FN_SSI_SCK23, FN_VI1_4_B, FN_RX1_D, FN_FCLE_B, FN_LCD_DATA15_B, 0, 0, 0 } From 43c3e957462b3f13dab55197bb3bcb9155b301b8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:16:02 +0000 Subject: [PATCH 2463/3715] vxlan: changelink: Fix handling of default remotes [ Upstream commit ce5e098f7a10b4bf8e948c12fa350320c5c3afad ] Default remotes are stored as FDB entries with an Ethernet address of 00:00:00:00:00:00. When a request is made to change a remote address of a VXLAN device, vxlan_changelink() first deletes the existing default remote, and then creates a new FDB entry. This works well as long as the list of default remotes matches exactly the configuration of a VXLAN remote address. Thus when the VXLAN device has a remote of X, there should be exactly one default remote FDB entry X. If the VXLAN device has no remote address, there should be no such entry. Besides using "ip link set", it is possible to manipulate the list of default remotes by using the "bridge fdb". It is therefore easy to break the above condition. Under such circumstances, the __vxlan_fdb_delete() call doesn't delete the FDB entry itself, but just one remote. The following vxlan_fdb_create() then creates a new FDB entry, leading to a situation where two entries exist for the address 00:00:00:00:00:00, each with a different subset of default remotes. An even more obvious breakage rooted in the same cause can be observed when a remote address is configured for a VXLAN device that did not have one before. In that case vxlan_changelink() doesn't remove any remote, and just creates a new FDB entry for the new address: $ ip link add name vx up type vxlan id 2000 dstport 4789 $ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent $ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent $ ip link set dev vx type vxlan remote 192.0.2.30 $ bridge fdb sh dev vx | grep 00:00:00:00:00:00 00:00:00:00:00:00 dst 192.0.2.30 self permanent <- new entry, 1 rdst 00:00:00:00:00:00 dst 192.0.2.20 self permanent <- orig. entry, 2 rdsts 00:00:00:00:00:00 dst 192.0.2.30 self permanent To fix this, instead of calling vxlan_fdb_create() directly, defer to vxlan_fdb_update(). That has logic to handle the duplicates properly. Additionally, it also handles notifications, so drop that call from changelink as well. Fixes: 0241b836732f ("vxlan: fix default fdb entry netlink notify ordering during netdev create") Signed-off-by: Petr Machata Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5aa7d5091f4d..4d97a7b5fe3c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3494,7 +3494,6 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; - struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3520,19 +3519,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, + NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF, &f); + NTF_SELF); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } From 15751bde70756fb2e84e6f9fb96c183fe4b31954 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Dec 2018 00:38:30 -0800 Subject: [PATCH 2464/3715] Input: nomadik-ske-keypad - fix a loop timeout test [ Upstream commit 4d8f727b83bcd6702c2d210330872c9122d2d360 ] The loop exits with "timeout" set to -1 not to 0. Fixes: 1158f0f16224 ("Input: add support for Nomadik SKE keypad controller") Signed-off-by: Dan Carpenter Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/nomadik-ske-keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/nomadik-ske-keypad.c b/drivers/input/keyboard/nomadik-ske-keypad.c index 8567ee47761e..ae3b04557074 100644 --- a/drivers/input/keyboard/nomadik-ske-keypad.c +++ b/drivers/input/keyboard/nomadik-ske-keypad.c @@ -100,7 +100,7 @@ static int __init ske_keypad_chip_init(struct ske_keypad *keypad) while ((readl(keypad->reg_base + SKE_RIS) != 0x00000000) && timeout--) cpu_relax(); - if (!timeout) + if (timeout == -1) return -EINVAL; /* From 4483d0e723477f76b59fe0b8213fd35d6d0a9ecb Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:10:01 -0500 Subject: [PATCH 2465/3715] clk: highbank: fix refcount leak in hb_clk_init() [ Upstream commit 5eb8ba90958de1285120dae5d3a5d2b1a360b3b4 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 26cae166cff9 ("ARM: highbank: remove custom .init_time hook") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-highbank.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-highbank.c b/drivers/clk/clk-highbank.c index 727ed8e1bb72..8e4581004695 100644 --- a/drivers/clk/clk-highbank.c +++ b/drivers/clk/clk-highbank.c @@ -293,6 +293,7 @@ static __init struct clk *hb_clk_init(struct device_node *node, const struct clk /* Map system registers */ srnp = of_find_compatible_node(NULL, NULL, "calxeda,hb-sregs"); hb_clk->reg = of_iomap(srnp, 0); + of_node_put(srnp); BUG_ON(!hb_clk->reg); hb_clk->reg += reg; From 6c3e2bc4635e31b48147b34c7810c187e351b8d6 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:14:42 -0500 Subject: [PATCH 2466/3715] clk: qoriq: fix refcount leak in clockgen_init() [ Upstream commit 70af6c5b5270e8101f318c4b69cc98a726edfab9 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 0dfc86b3173f ("clk: qoriq: Move chip-specific knowledge into driver") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-qoriq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 1a292519d84f..999a90a16609 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -1382,6 +1382,7 @@ static void __init clockgen_init(struct device_node *np) pr_err("%s: Couldn't map %pOF regs\n", __func__, guts); } + of_node_put(guts); } } From 0e5dbea8fdaa1b4f64e00bac5ec5844c429f324d Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:29:02 -0500 Subject: [PATCH 2467/3715] clk: socfpga: fix refcount leak [ Upstream commit 7f9705beeb3759e69165e7aff588f6488ff6c1ac ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 5343325ff3dd ("clk: socfpga: add a clock driver for the Arria 10 platform") Fixes: a30d27ed739b ("clk: socfpga: fix clock driver for 3.15") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/socfpga/clk-pll-a10.c | 1 + drivers/clk/socfpga/clk-pll.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/socfpga/clk-pll-a10.c b/drivers/clk/socfpga/clk-pll-a10.c index 35fabe1a32c3..269467e8e07e 100644 --- a/drivers/clk/socfpga/clk-pll-a10.c +++ b/drivers/clk/socfpga/clk-pll-a10.c @@ -95,6 +95,7 @@ static struct clk * __init __socfpga_pll_init(struct device_node *node, clkmgr_np = of_find_compatible_node(NULL, NULL, "altr,clk-mgr"); clk_mgr_a10_base_addr = of_iomap(clkmgr_np, 0); + of_node_put(clkmgr_np); BUG_ON(!clk_mgr_a10_base_addr); pll_clk->hw.reg = clk_mgr_a10_base_addr + reg; diff --git a/drivers/clk/socfpga/clk-pll.c b/drivers/clk/socfpga/clk-pll.c index c7f463172e4b..b4b44e9b5901 100644 --- a/drivers/clk/socfpga/clk-pll.c +++ b/drivers/clk/socfpga/clk-pll.c @@ -100,6 +100,7 @@ static __init struct clk *__socfpga_pll_init(struct device_node *node, clkmgr_np = of_find_compatible_node(NULL, NULL, "altr,clk-mgr"); clk_mgr_base_addr = of_iomap(clkmgr_np, 0); + of_node_put(clkmgr_np); BUG_ON(!clk_mgr_base_addr); pll_clk->hw.reg = clk_mgr_base_addr + reg; From 3e080a42d5011fd0a6bf1fdbbe4b7476ecaeb47c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:32:15 -0500 Subject: [PATCH 2468/3715] clk: samsung: exynos4: fix refcount leak in exynos4_get_xom() [ Upstream commit cee82eb9532090cd1dc953e845d71f9b1445c84e ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: e062b571777f ("clk: exynos4: register clocks using common clock framework") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index d8d3cb67b402..3d3026221927 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -1240,6 +1240,7 @@ static unsigned long __init exynos4_get_xom(void) xom = readl(chipid_base + 8); iounmap(chipid_base); + of_node_put(np); } return xom; From f55851dbf0ee077452b79678d795943e5e1f5121 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:53:00 -0500 Subject: [PATCH 2469/3715] clk: imx6q: fix refcount leak in imx6q_clocks_init() [ Upstream commit c9ec1d8fef31b5fc9e90e99f9bd685db5caa7c5e ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 2acd1b6f889c ("ARM: i.MX6: implement clocks using common clock framework") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx6q.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index 8eb93eb2f857..e0547654cb7b 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -431,6 +431,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-anatop"); anatop_base = base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); /* Audio/video PLL post dividers do not work on i.MX6q revision 1.0 */ if (clk_on_imx6q() && imx_get_soc_revision() == IMX_CHIP_REVISION_1_0) { From b88284ee4364acac121148fed4649f2214fd1b94 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:55:10 -0500 Subject: [PATCH 2470/3715] clk: imx6sx: fix refcount leak in imx6sx_clocks_init() [ Upstream commit 1731e14fb30212dd8c1e9f8fc1af061e56498c55 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: d55135689019 ("ARM: imx: add clock driver for imx6sx") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx6sx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c index e6d389e333d7..baa07553a0dd 100644 --- a/drivers/clk/imx/clk-imx6sx.c +++ b/drivers/clk/imx/clk-imx6sx.c @@ -164,6 +164,7 @@ static void __init imx6sx_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx6sx-anatop"); base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); clks[IMX6SX_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels)); clks[IMX6SX_PLL2_BYPASS_SRC] = imx_clk_mux("pll2_bypass_src", base + 0x30, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels)); From 006495b2ecd3c677cdfc54332b102e4165ce7e62 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:57:16 -0500 Subject: [PATCH 2471/3715] clk: imx7d: fix refcount leak in imx7d_clocks_init() [ Upstream commit 5f8c183a996b76bb09748073c856e4246fd4ce95 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 8f6d8094b215 ("ARM: imx: add imx7d clk tree support") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx7d.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index 0ac9b30c8b90..9f5e5b9d4a25 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -416,6 +416,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx7d-anatop"); base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); clks[IMX7D_PLL_ARM_MAIN_SRC] = imx_clk_mux("pll_arm_main_src", base + 0x60, 14, 2, pll_bypass_src_sel, ARRAY_SIZE(pll_bypass_src_sel)); clks[IMX7D_PLL_DRAM_MAIN_SRC] = imx_clk_mux("pll_dram_main_src", base + 0x70, 14, 2, pll_bypass_src_sel, ARRAY_SIZE(pll_bypass_src_sel)); From ddbaa5cac637e24b77f2a9b28507599cac6006f5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:59:36 -0500 Subject: [PATCH 2472/3715] clk: vf610: fix refcount leak in vf610_clocks_init() [ Upstream commit 567177024e0313e4f0dcba7ba10c0732e50e655d ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Fixes: 1f2c5fd5f048 ("ARM: imx: add VF610 clock support") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-vf610.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c index 6dae54325a91..a334667c450a 100644 --- a/drivers/clk/imx/clk-vf610.c +++ b/drivers/clk/imx/clk-vf610.c @@ -203,6 +203,7 @@ static void __init vf610_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,vf610-anatop"); anatop_base = of_iomap(np, 0); BUG_ON(!anatop_base); + of_node_put(np); np = ccm_node; ccm_base = of_iomap(np, 0); From a924a933d8b493a1eda0c1aa4d5b864a3dd415c3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:36:58 -0500 Subject: [PATCH 2473/3715] clk: armada-370: fix refcount leak in a370_clk_init() [ Upstream commit a3c24050bdf70c958a8d98c2823b66ea761e6a31 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Reviewed-by: Gregory CLEMENT Fixes: 07ad6836fa21 ("clk: mvebu: armada-370: maintain clock init order") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/armada-370.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index 2c7c1085f883..8fdfa97900cd 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -177,8 +177,10 @@ static void __init a370_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &a370_coreclks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, a370_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(a370_clk, "marvell,armada-370-core-clock", a370_clk_init); From 0843f4fac91a3c0a9249f378a97b53e8f325d2ad Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:40:19 -0500 Subject: [PATCH 2474/3715] clk: kirkwood: fix refcount leak in kirkwood_clk_init() [ Upstream commit e7beeab9c61591cd0e690d8733d534c3f4278ff8 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Reviewed-by: Gregory CLEMENT Fixes: 58d516ae95cb ("clk: mvebu: kirkwood: maintain clock init order") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/kirkwood.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c index a2a8d614039d..890ebf623261 100644 --- a/drivers/clk/mvebu/kirkwood.c +++ b/drivers/clk/mvebu/kirkwood.c @@ -333,6 +333,8 @@ static void __init kirkwood_clk_init(struct device_node *np) if (cgnp) { mvebu_clk_gating_setup(cgnp, kirkwood_gating_desc); kirkwood_clk_muxing_setup(cgnp, kirkwood_mux_desc); + + of_node_put(cgnp); } } CLK_OF_DECLARE(kirkwood_clk, "marvell,kirkwood-core-clock", From 3af85e1c58892fb137f9cf722bf5ef777f62fcaf Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:42:26 -0500 Subject: [PATCH 2475/3715] clk: armada-xp: fix refcount leak in axp_clk_init() [ Upstream commit db20a90a4b6745dad62753f8bd2f66afdd5abc84 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Reviewed-by: Gregory CLEMENT Fixes: 0a11a6ae9437 ("clk: mvebu: armada-xp: maintain clock init order") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/armada-xp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mvebu/armada-xp.c b/drivers/clk/mvebu/armada-xp.c index 0ec44ae9a2a2..df529982adc9 100644 --- a/drivers/clk/mvebu/armada-xp.c +++ b/drivers/clk/mvebu/armada-xp.c @@ -228,7 +228,9 @@ static void __init axp_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &axp_coreclks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, axp_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(axp_clk, "marvell,armada-xp-core-clock", axp_clk_init); From 938021a99d967a9310505d3b33b5fcb258cdd08c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:48:05 -0500 Subject: [PATCH 2476/3715] clk: mv98dx3236: fix refcount leak in mv98dx3236_clk_init() [ Upstream commit 9b4eedf627045ae5ddcff60a484200cdd554c413 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Reviewed-by: Gregory CLEMENT Fixes: 337072604224 ("clk: mvebu: Expand mv98dx3236-core-clock support") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/mv98dx3236.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mvebu/mv98dx3236.c b/drivers/clk/mvebu/mv98dx3236.c index 6e203af73cac..c8a0d03d2cd6 100644 --- a/drivers/clk/mvebu/mv98dx3236.c +++ b/drivers/clk/mvebu/mv98dx3236.c @@ -174,7 +174,9 @@ static void __init mv98dx3236_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &mv98dx3236_core_clocks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, mv98dx3236_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(mv98dx3236_clk, "marvell,mv98dx3236-core-clock", mv98dx3236_clk_init); From c9585b6996e002e55ba9c5db257e8bcc60574443 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Dec 2018 08:50:13 -0500 Subject: [PATCH 2477/3715] clk: dove: fix refcount leak in dove_clk_init() [ Upstream commit 8d726c5128298386b907963033be93407b0c4275 ] The of_find_compatible_node() returns a node pointer with refcount incremented, but there is the lack of use of the of_node_put() when done. Add the missing of_node_put() to release the refcount. Signed-off-by: Yangtao Li Reviewed-by: Gregory CLEMENT Fixes: 8f7fc5450b64 ("clk: mvebu: dove: maintain clock init order") Fixes: 63b8d92c793f ("clk: add Dove PLL divider support for GPU, VMeta and AXI clocks") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/dove.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mvebu/dove.c b/drivers/clk/mvebu/dove.c index 59fad9546c84..5f258c9bb68b 100644 --- a/drivers/clk/mvebu/dove.c +++ b/drivers/clk/mvebu/dove.c @@ -190,10 +190,14 @@ static void __init dove_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &dove_coreclks); - if (ddnp) + if (ddnp) { dove_divider_clk_init(ddnp); + of_node_put(ddnp); + } - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, dove_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(dove_clk, "marvell,dove-core-clock", dove_clk_init); From 97f78f43c0fa18b80fd60a6c6f9fabc1a62faaf9 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 30 Dec 2018 12:55:09 +0100 Subject: [PATCH 2478/3715] MIPS: BCM63XX: drop unused and broken DSP platform device [ Upstream commit 682fee802843b332f9c51ffc8e062de5ff773f2e ] Trying to register the DSP platform device results in a null pointer access: [ 0.124184] CPU 0 Unable to handle kernel paging request at virtual address 00000000, epc == 804e305c, ra == 804e6f20 [ 0.135208] Oops[#1]: [ 0.137514] CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.87 ... [ 0.197117] epc : 804e305c bcm63xx_dsp_register+0x80/0xa4 [ 0.202838] ra : 804e6f20 board_register_devices+0x258/0x390 ... This happens because it tries to copy the passed platform data over the platform_device's unpopulated platform_data. Since this code has been broken since its submission, no driver was ever submitted for it, and apparently nobody was using it, just remove it instead of trying to fix it. Fixes: e7300d04bd08 ("MIPS: BCM63xx: Add support for the Broadcom BCM63xx family of SOCs.") Signed-off-by: Jonas Gorski Signed-off-by: Paul Burton Acked-by: Florian Fainelli Cc: linux-mips@linux-mips.org Cc: Ralf Baechle Cc: James Hogan Signed-off-by: Sasha Levin --- arch/mips/bcm63xx/Makefile | 6 +- arch/mips/bcm63xx/boards/board_bcm963xx.c | 20 ------- arch/mips/bcm63xx/dev-dsp.c | 56 ------------------- .../asm/mach-bcm63xx/bcm63xx_dev_dsp.h | 14 ----- .../include/asm/mach-bcm63xx/board_bcm963xx.h | 5 -- 5 files changed, 3 insertions(+), 98 deletions(-) delete mode 100644 arch/mips/bcm63xx/dev-dsp.c delete mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile index c69f297fc1df..d89651e538f6 100644 --- a/arch/mips/bcm63xx/Makefile +++ b/arch/mips/bcm63xx/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += clk.o cpu.o cs.o gpio.o irq.o nvram.o prom.o reset.o \ - setup.o timer.o dev-dsp.o dev-enet.o dev-flash.o \ - dev-pcmcia.o dev-rng.o dev-spi.o dev-hsspi.o dev-uart.o \ - dev-wdt.o dev-usb-usbd.o + setup.o timer.o dev-enet.o dev-flash.o dev-pcmcia.o \ + dev-rng.o dev-spi.o dev-hsspi.o dev-uart.o dev-wdt.o \ + dev-usb-usbd.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-y += boards/ diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index b2097c0d2ed7..36ec3dc2c999 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -289,14 +288,6 @@ static struct board_info __initdata board_96348gw_10 = { .has_pccard = 1, .has_ehci0 = 1, - .has_dsp = 1, - .dsp = { - .gpio_rst = 6, - .gpio_int = 34, - .cs = 2, - .ext_irq = 2, - }, - .leds = { { .name = "adsl-fail", @@ -401,14 +392,6 @@ static struct board_info __initdata board_96348gw = { .has_ohci0 = 1, - .has_dsp = 1, - .dsp = { - .gpio_rst = 6, - .gpio_int = 34, - .ext_irq = 2, - .cs = 2, - }, - .leds = { { .name = "adsl-fail", @@ -898,9 +881,6 @@ int __init board_register_devices(void) if (board.has_usbd) bcm63xx_usbd_register(&board.usbd); - if (board.has_dsp) - bcm63xx_dsp_register(&board.dsp); - /* Generate MAC address for WLAN and register our SPROM, * do this after registering enet devices */ diff --git a/arch/mips/bcm63xx/dev-dsp.c b/arch/mips/bcm63xx/dev-dsp.c deleted file mode 100644 index 5bb5b154c9bd..000000000000 --- a/arch/mips/bcm63xx/dev-dsp.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Broadcom BCM63xx VoIP DSP registration - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2009 Florian Fainelli - */ - -#include -#include -#include - -#include -#include -#include -#include - -static struct resource voip_dsp_resources[] = { - { - .start = -1, /* filled at runtime */ - .end = -1, /* filled at runtime */ - .flags = IORESOURCE_MEM, - }, - { - .start = -1, /* filled at runtime */ - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device bcm63xx_voip_dsp_device = { - .name = "bcm63xx-voip-dsp", - .id = -1, - .num_resources = ARRAY_SIZE(voip_dsp_resources), - .resource = voip_dsp_resources, -}; - -int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd) -{ - struct bcm63xx_dsp_platform_data *dpd; - u32 val; - - /* Get the memory window */ - val = bcm_mpi_readl(MPI_CSBASE_REG(pd->cs - 1)); - val &= MPI_CSBASE_BASE_MASK; - voip_dsp_resources[0].start = val; - voip_dsp_resources[0].end = val + 0xFFFFFFF; - voip_dsp_resources[1].start = pd->ext_irq; - - /* copy given platform data */ - dpd = bcm63xx_voip_dsp_device.dev.platform_data; - memcpy(dpd, pd, sizeof (*pd)); - - return platform_device_register(&bcm63xx_voip_dsp_device); -} diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h deleted file mode 100644 index 4e4970787371..000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BCM63XX_DSP_H -#define __BCM63XX_DSP_H - -struct bcm63xx_dsp_platform_data { - unsigned gpio_rst; - unsigned gpio_int; - unsigned cs; - unsigned ext_irq; -}; - -int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd); - -#endif /* __BCM63XX_DSP_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h index 5e5b1bc4a324..830f53f28e3f 100644 --- a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h +++ b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h @@ -7,7 +7,6 @@ #include #include #include -#include /* * flash mapping @@ -31,7 +30,6 @@ struct board_info { unsigned int has_ohci0:1; unsigned int has_ehci0:1; unsigned int has_usbd:1; - unsigned int has_dsp:1; unsigned int has_uart0:1; unsigned int has_uart1:1; @@ -43,9 +41,6 @@ struct board_info { /* USB config */ struct bcm63xx_usbd_platform_data usbd; - /* DSP config */ - struct bcm63xx_dsp_platform_data dsp; - /* GPIO LEDs */ struct gpio_led leds[5]; From 4963f23081cc55c4e92269c8e0c13f85c63e9094 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 7 Jan 2019 17:27:54 +0200 Subject: [PATCH 2479/3715] IB/usnic: Fix out of bounds index check in query pkey [ Upstream commit 4959d5da5737dd804255c75b8cea0a2929ce279a ] The pkey table size is one element, index should be tested for > 0 instead of > 1. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Gal Pressman Acked-by: Parvi Kaustubhi Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index fdfa25059723..2602c7375d58 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -423,7 +423,7 @@ struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num) int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; From 499cd1060357131a4348534f8cf35b8b4e0efb5f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 7 Jan 2019 17:27:55 +0200 Subject: [PATCH 2480/3715] RDMA/ocrdma: Fix out of bounds index check in query pkey [ Upstream commit b188940796c7be31c1b8c25a9a0e0842c2e7a49e ] The pkey table size is one element, index should be tested for > 0 instead of > 1. Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 27d5e8d9f08d..7683d13dad3d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -55,7 +55,7 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; From 346e59489df79cb0e1dd2647560b6e64a3c0bbb2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 7 Jan 2019 17:27:56 +0200 Subject: [PATCH 2481/3715] RDMA/qedr: Fix out of bounds index check in query pkey [ Upstream commit dbe30dae487e1a232158c24b432d45281c2805b7 ] The pkey table size is QEDR_ROCE_PKEY_TABLE_LEN, index should be tested for >= QEDR_ROCE_PKEY_TABLE_LEN instead of > QEDR_ROCE_PKEY_TABLE_LEN. Fixes: a7efd7773e31 ("qedr: Add support for PD,PKEY and CQ verbs") Signed-off-by: Gal Pressman Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 656e7c1a4449..8bfe9073da78 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -63,7 +63,7 @@ static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > QEDR_ROCE_PKEY_TABLE_LEN) + if (index >= QEDR_ROCE_PKEY_TABLE_LEN) return -EINVAL; *pkey = QEDR_ROCE_PKEY_DEFAULT; From f5ed3f2b4b72f584e639863ecbac090c622f0ffc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 17 Dec 2018 17:18:30 +0800 Subject: [PATCH 2482/3715] drm/shmob: Fix return value check in shmob_drm_probe [ Upstream commit 06c3bbd3c12737a50c2e981821b5585e1786e73d ] In case of error, the function devm_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 8f1597c8f1a5 ("drm: shmobile: Perform initialization/cleanup at probe/remove time") Signed-off-by: YueHaibing Reviewed-by: Simon Horman Reviewed-by: Kieran Bingham Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Sasha Levin --- drivers/gpu/drm/shmobile/shmob_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c index 592572554eb0..58d8a98c749b 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c @@ -233,8 +233,8 @@ static int shmob_drm_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); sdev->mmio = devm_ioremap_resource(&pdev->dev, res); - if (sdev->mmio == NULL) - return -ENOMEM; + if (IS_ERR(sdev->mmio)) + return PTR_ERR(sdev->mmio); ret = shmob_drm_setup_clocks(sdev, pdata->clk_source); if (ret < 0) From 91eebda6ad45d30fc46c910c090272693a8ef99d Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 14 Dec 2018 12:01:02 +0100 Subject: [PATCH 2483/3715] arm64: dts: apq8016-sbc: Increase load on l11 for SDCARD [ Upstream commit af61bef513ba179559e56908b8c465e587bc3890 ] In the same way as for msm8974-hammerhead, l11 load, used for SDCARD VMMC, needs to be increased in order to prevent any voltage drop issues (due to limited current) happening with some SDCARDS or during specific operations (e.g. write). Tested on Dragonboard-410c and DART-SD410 boards. Fixes: 4c7d53d16d77 (arm64: dts: apq8016-sbc: add regulators support) Reported-by: Manabu Igusa Signed-off-by: Loic Poulain Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi index b6b44fdf7fac..c1028b47edde 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi @@ -458,6 +458,8 @@ l11 { regulator-min-microvolt = <1750000>; regulator-max-microvolt = <3337000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l12 { From 45ad6d87fd90814aee96d9d6cd2dd76fe0e1d5e5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 4 Jan 2019 18:08:09 +0000 Subject: [PATCH 2484/3715] spi: cadence: Correct initialisation of runtime PM [ Upstream commit 734882a8bf984c2ac8a57d8ac3ee53230bd0bed8 ] Currently the driver calls pm_runtime_put_autosuspend but without ever having done a pm_runtime_get, this causes the reference count in the pm runtime core to become -1. The bad reference count causes the core to sometimes suspend whilst an active SPI transfer is in progress. arizona spi0.1: SPI transfer timed out spi_master spi0: failed to transfer one message from queue The correct proceedure is to do all the initialisation that requires the hardware to be powered up before enabling the PM runtime, then enable the PM runtime having called pm_runtime_set_active to inform it that the hardware is currently powered up. The core will then power it down at it's leisure and no explicit pm_runtime_put is required. Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-cadence.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 02bd1eba045b..d08ad93d97a1 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -584,11 +584,6 @@ static int cdns_spi_probe(struct platform_device *pdev) goto clk_dis_apb; } - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - ret = of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs); if (ret < 0) master->num_chipselect = CDNS_SPI_DEFAULT_NUM_CS; @@ -603,8 +598,10 @@ static int cdns_spi_probe(struct platform_device *pdev) /* SPI controller initializations */ cdns_spi_init_hw(xspi); - pm_runtime_mark_last_busy(&pdev->dev); - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); irq = platform_get_irq(pdev, 0); if (irq <= 0) { From 70da6cce28e018bf19d0ace8d2e34cfbe14fe159 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 11 Jan 2019 20:27:18 +0530 Subject: [PATCH 2485/3715] RDMA/iw_cxgb4: Fix the unchecked ep dereference [ Upstream commit 3352976c892301fd576a2e9ff0ac7337b2e2ca48 ] The patch 944661dd97f4: "RDMA/iw_cxgb4: atomically lookup ep and get a reference" from May 6, 2016, leads to the following Smatch complaint: drivers/infiniband/hw/cxgb4/cm.c:2953 terminate() error: we previously assumed 'ep' could be null (see line 2945) Fixes: 944661dd97f4 ("RDMA/iw_cxgb4: atomically lookup ep and get a reference") Reported-by: Dan Carpenter Signed-off-by: Raju Rangoju Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index bb36cdf82a8d..3668cc71b47e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2923,15 +2923,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); BUG_ON(!ep); - if (ep && ep->com.qp) { - pr_warn("TERM received tid %u qpid %u\n", - tid, ep->com.qp->wq.sq.qid); - attrs.next_state = C4IW_QP_STATE_TERMINATE; - c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ep) { + if (ep->com.qp) { + pr_warn("TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + + c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); - c4iw_put_ep(&ep->com); return 0; } From 199b745a0bda8f0f5bf73c5538c91b4a644047d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Jan 2019 13:49:46 +0300 Subject: [PATCH 2486/3715] drm/etnaviv: NULL vs IS_ERR() buf in etnaviv_core_dump() [ Upstream commit f8261c376e7f8cb9024af5a6c54be540c7f9108e ] The etnaviv_gem_get_pages() never returns NULL. It returns error pointers on error. Fixes: a8c21a5451d8 ("drm/etnaviv: add initial etnaviv DRM driver") Signed-off-by: Dan Carpenter Signed-off-by: Lucas Stach Signed-off-by: Sasha Levin --- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 2d955d7d7b6d..e154e6fb64da 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -207,7 +207,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) mutex_lock(&obj->lock); pages = etnaviv_gem_get_pages(obj); mutex_unlock(&obj->lock); - if (pages) { + if (!IS_ERR(pages)) { int j; iter.hdr->data[0] = bomap - bomap_start; From 69984b65b6ea27570a65c13a81ad735476aeb473 Mon Sep 17 00:00:00 2001 From: Pawe? Chmiel Date: Wed, 9 Jan 2019 13:00:41 -0500 Subject: [PATCH 2487/3715] media: s5p-jpeg: Correct step and max values for V4L2_CID_JPEG_RESTART_INTERVAL [ Upstream commit 19c624c6b29e244c418f8b44a711cbf5e82e3cd4 ] This commit corrects max and step values for v4l2 control for V4L2_CID_JPEG_RESTART_INTERVAL. Max should be 0xffff and step should be 1. It was found by using v4l2-compliance tool and checking result of VIDIOC_QUERY_EXT_CTRL/QUERYMENU test. Previously it was complaining that step was bigger than difference between max and min. Fixes: 15f4bc3b1f42 ("[media] s5p-jpeg: Add JPEG controls support") Signed-off-by: Pawe? Chmiel Reviewed-by: Jacek Anaszewski Reviewed-by: Sylwester Nawrocki Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/s5p-jpeg/jpeg-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 4568e68e15fa..85a5e33600c0 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -2005,7 +2005,7 @@ static int s5p_jpeg_controls_create(struct s5p_jpeg_ctx *ctx) v4l2_ctrl_new_std(&ctx->ctrl_handler, &s5p_jpeg_ctrl_ops, V4L2_CID_JPEG_RESTART_INTERVAL, - 0, 3, 0xffff, 0); + 0, 0xffff, 1, 0); if (ctx->jpeg->variant->version == SJPEG_S5P) mask = ~0x06; /* 422, 420 */ } From f3691b5e7d89c4367493887902d8b64a1266c360 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 15 Jan 2019 16:19:00 +0900 Subject: [PATCH 2488/3715] kbuild: mark prepare0 as PHONY to fix external module build [ Upstream commit e00d8880481497474792d28c14479a9fb6752046 ] Commit c3ff2a5193fa ("powerpc/32: add stack protector support") caused kernel panic on PowerPC when an external module is used with CONFIG_STACKPROTECTOR because the 'prepare' target was not executed for the external module build. Commit e07db28eea38 ("kbuild: fix single target build for external module") turned it into a build error because the 'prepare' target is now executed but the 'prepare0' target is missing for the external module build. External module on arm/arm64 with CONFIG_STACKPROTECTOR_PER_TASK is also broken in the same way. Move 'PHONY += prepare0' to the common place. GNU Make is fine with missing rule for phony targets. I also removed the comment which is wrong irrespective of this commit. I minimize the change so it can be easily backported to 4.20.x To fix v4.20, please backport e07db28eea38 ("kbuild: fix single target build for external module"), and then this commit. Link: https://bugzilla.kernel.org/show_bug.cgi?id=201891 Fixes: e07db28eea38 ("kbuild: fix single target build for external module") Fixes: c3ff2a5193fa ("powerpc/32: add stack protector support") Fixes: 189af4657186 ("ARM: smp: add support for per-task stack canaries") Fixes: 0a1213fa7432 ("arm64: enable per-task stack canaries") Cc: linux-stable # v4.20 Reported-by: Samuel Holland Reported-by: Alexey Kardashevskiy Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Tested-by: Alexey Kardashevskiy Signed-off-by: Sasha Levin --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e8eaabf2bcb..b538e6170f73 100644 --- a/Makefile +++ b/Makefile @@ -971,6 +971,7 @@ ifdef CONFIG_STACK_VALIDATION endif endif +PHONY += prepare0 ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ @@ -1065,8 +1066,7 @@ include/config/kernel.release: include/config/auto.conf FORCE # archprepare is used in arch Makefiles and when processed asm symlink, # version.h and scripts_basic is processed / created. -# Listed in dependency order -PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 +PHONY += prepare archprepare prepare1 prepare2 prepare3 # prepare3 is used to check if we are building in a separate output directory, # and if so do: From bc757cbe47a3e4b8e83c0b471890eeb7a3cb986a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 9 Jan 2019 06:11:18 +0000 Subject: [PATCH 2489/3715] crypto: brcm - Fix some set-but-not-used warning [ Upstream commit 707d0cf8f7cff6dfee9197002859912310532c4f ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/crypto/bcm/cipher.c: In function 'handle_ahash_req': drivers/crypto/bcm/cipher.c:720:15: warning: variable 'chunk_start' set but not used [-Wunused-but-set-variable] drivers/crypto/bcm/cipher.c: In function 'spu_rx_callback': drivers/crypto/bcm/cipher.c:1679:31: warning: variable 'areq' set but not used [-Wunused-but-set-variable] drivers/crypto/bcm/cipher.c:1678:22: warning: variable 'ctx' set but not used [-Wunused-but-set-variable] Fixes: 9d12ba86f818 ("crypto: brcm - Add Broadcom SPU driver") Signed-off-by: YueHaibing Reviewed-by: Raveendra Padasalagi Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/bcm/cipher.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 84422435f39b..279e907590e9 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -718,7 +718,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx) */ unsigned int new_data_len; - unsigned int chunk_start = 0; + unsigned int __maybe_unused chunk_start = 0; u32 db_size; /* Length of data field, incl gcm and hash padding */ int pad_len = 0; /* total pad len, including gcm, hash, stat padding */ u32 data_pad_len = 0; /* length of GCM/CCM padding */ @@ -1676,8 +1676,6 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg) struct spu_hw *spu = &iproc_priv.spu; struct brcm_message *mssg = msg; struct iproc_reqctx_s *rctx; - struct iproc_ctx_s *ctx; - struct crypto_async_request *areq; int err = 0; rctx = mssg->ctx; @@ -1687,8 +1685,6 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg) err = -EFAULT; goto cb_finish; } - areq = rctx->parent; - ctx = rctx->ctx; /* process the SPU status */ err = spu->spu_status_process(rctx->msg_buf.rx_stat); From 478428c99780eae874e04d07a1ce32fa99a9a484 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 10 Jan 2019 12:17:58 -0800 Subject: [PATCH 2490/3715] crypto: tgr192 - fix unaligned memory access [ Upstream commit f990f7fb58ac8ac9a43316f09a48cff1a49dda42 ] Fix an unaligned memory access in tgr192_transform() by using the unaligned access helpers. Fixes: 06ace7a9bafe ("[CRYPTO] Use standard byte order macros wherever possible") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/tgr192.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/tgr192.c b/crypto/tgr192.c index 321bc6ff2a9d..904c8444aa0a 100644 --- a/crypto/tgr192.c +++ b/crypto/tgr192.c @@ -25,8 +25,9 @@ #include #include #include -#include #include +#include +#include #define TGR192_DIGEST_SIZE 24 #define TGR160_DIGEST_SIZE 20 @@ -468,10 +469,9 @@ static void tgr192_transform(struct tgr192_ctx *tctx, const u8 * data) u64 a, b, c, aa, bb, cc; u64 x[8]; int i; - const __le64 *ptr = (const __le64 *)data; for (i = 0; i < 8; i++) - x[i] = le64_to_cpu(ptr[i]); + x[i] = get_unaligned_le64(data + i * sizeof(__le64)); /* save */ a = aa = tctx->a; From 886969f813beca055d92fa106a7dafa3702819d0 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 18 Jan 2019 10:06:52 +0100 Subject: [PATCH 2491/3715] ASoC: imx-sgtl5000: put of nodes if finding codec fails [ Upstream commit d9866572486802bc598a3e8576a5231378d190de ] Make sure to properly put the of node in case finding the codec fails. Fixes: 81e8e4926167 ("ASoC: fsl: add sgtl5000 clock support for imx-sgtl5000") Signed-off-by: Stefan Agner Reviewed-by: Daniel Baluta Acked-by: Nicolin Chen Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/imx-sgtl5000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 8e525f7ac08d..3d99a8579c99 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -119,7 +119,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) codec_dev = of_find_i2c_device_by_node(codec_np); if (!codec_dev) { dev_err(&pdev->dev, "failed to find codec platform device\n"); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto fail; } data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); From fafa1309c2a4926a8744906d0adc2edc164dd4e3 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 17 Jan 2019 15:45:45 +0000 Subject: [PATCH 2492/3715] IB/iser: Pass the correct number of entries for dma mapped SGL [ Upstream commit 57b26497fabe1b9379b59fbc7e35e608e114df16 ] ib_dma_map_sg() augments the SGL into a 'dma mapped SGL'. This process may change the number of entries and the lengths of each entry. Code that touches dma_address is iterating over the 'dma mapped SGL' and must use dma_nents which returned from ib_dma_map_sg(). ib_sg_to_pages() and ib_map_mr_sg() are using dma_address so they must use dma_nents. Fixes: 39405885005a ("IB/iser: Port to new fast registration API") Fixes: bfe066e256d5 ("IB/iser: Reuse ib_sg_to_pages") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/iser/iser_memory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 322209d5ff58..19883169e7b7 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -240,8 +240,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->size, NULL, iser_set_page); - if (unlikely(plen < mem->size)) { + mem->dma_nents, NULL, iser_set_page); + if (unlikely(plen < mem->dma_nents)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); iser_dump_page_vec(page_vec); @@ -450,10 +450,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); - if (unlikely(n != mem->size)) { + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", - n, mem->size); + n, mem->dma_nents); return n < 0 ? n : -EINVAL; } From 50e12e2be9c0739c472a4b37f2ef1e5ddbfb9f68 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 6 Jan 2019 08:21:03 +0000 Subject: [PATCH 2493/3715] rtc: cmos: ignore bogus century byte [ Upstream commit 2a4daadd4d3e507138f8937926e6a4df49c6bfdc ] Older versions of Libreboot and Coreboot had an invalid value (`3' in my case) in the century byte affecting the GM45 in the Thinkpad X200. Not everybody's updated their firmwares, and Linux <= 4.2 was able to read the RTC without problems, so workaround this by ignoring invalid values. Fixes: 3c217e51d8a272b9 ("rtc: cmos: century support") Cc: Alexandre Belloni Cc: Alessandro Zummo Cc: Sylvain Chouleur Cc: Patrick McDermott Cc: linux-rtc@vger.kernel.org Signed-off-by: Eric Wong Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 2f1772a358ca..18a6f15e313d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -82,7 +82,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) time->tm_year += real_year - 72; #endif - if (century) + if (century > 20) time->tm_year += (century - 19) * 100; /* From 07e548c4126bda5687c3961ff07de9fb57028c2b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 Jan 2019 20:00:22 +0800 Subject: [PATCH 2494/3715] spi/topcliff_pch: Fix potential NULL dereference on allocation error [ Upstream commit e902cdcb5112b89ee445588147964723fd69ffb4 ] In pch_spi_handle_dma, it doesn't check for NULL returns of kcalloc so it would result in an Oops. Fixes: c37f3c2749b5 ("spi/topcliff_pch: DMA support") Signed-off-by: YueHaibing Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-topcliff-pch.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 4389ab80c23e..fa730a871d25 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1008,6 +1008,9 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) /* RX */ dma->sg_rx_p = kcalloc(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC); + if (!dma->sg_rx_p) + return; + sg_init_table(dma->sg_rx_p, num); /* Initialize SG table */ /* offset, length setting */ sg = dma->sg_rx_p; @@ -1068,6 +1071,9 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) } dma->sg_tx_p = kcalloc(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC); + if (!dma->sg_tx_p) + return; + sg_init_table(dma->sg_tx_p, num); /* Initialize SG table */ /* offset, length setting */ sg = dma->sg_tx_p; From 0bac5b8392da768ffe67ccc0310f35e4a64a277f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 25 Jan 2019 11:23:04 +0800 Subject: [PATCH 2495/3715] clk: sunxi-ng: sun8i-a23: Enable PLL-MIPI LDOs when ungating it [ Upstream commit 108a459ef4cd17a28711d81092044e597b5c7618 ] The PLL-MIPI clock is somewhat special as it has its own LDOs which need to be turned on for this PLL to actually work and output a clock signal. Add the 2 LDO enable bits to the gate bits. Fixes: 5690879d93e8 ("clk: sunxi-ng: Add A23 CCU") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun8i-a23.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c index d93b452f0df9..1cef040ebe82 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c @@ -132,7 +132,7 @@ static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_mipi_clk, "pll-mipi", 8, 4, /* N */ 4, 2, /* K */ 0, 4, /* M */ - BIT(31), /* gate */ + BIT(31) | BIT(23) | BIT(22), /* gate */ BIT(28), /* lock */ CLK_SET_RATE_UNGATE); From 61c39d4a8938e8a537e466e11cb005d51004e0e6 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 21 Oct 2018 18:27:26 +0300 Subject: [PATCH 2496/3715] iwlwifi: mvm: avoid possible access out of array. [ Upstream commit b0d795a9ae558209656b18930c2b4def5f8fdfb8 ] The value in txq_id can be out of array scope, validate it before accessing the array. Signed-off-by: Mordechay Goodstein Fixes: cf961e16620f ("iwlwifi: mvm: support dqa-mode agg on non-shared queue") Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0cfdbaa2af3a..684c0f65a052 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2417,7 +2417,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_tid_data *tid_data; u16 normalized_ssn; - int txq_id; + u16 txq_id; int ret; if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) @@ -2452,17 +2452,24 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ txq_id = mvmsta->tid_data[tid].txq_id; if (txq_id == IWL_MVM_INVALID_QUEUE) { - txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, - IWL_MVM_DQA_MIN_DATA_QUEUE, - IWL_MVM_DQA_MAX_DATA_QUEUE); - if (txq_id < 0) { - ret = txq_id; + ret = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, + IWL_MVM_DQA_MIN_DATA_QUEUE, + IWL_MVM_DQA_MAX_DATA_QUEUE); + if (ret < 0) { IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto release_locks; } + txq_id = ret; + /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; + } else if (WARN_ON(txq_id >= IWL_MAX_HW_QUEUES)) { + ret = -ENXIO; + IWL_ERR(mvm, "tid_id %d out of range (0, %d)!\n", + tid, IWL_MAX_HW_QUEUES - 1); + goto out; + } else if (unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { ret = -ENXIO; From 1abaee5e625d202274ed9bb32431d9958fae82cd Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 26 Dec 2018 19:21:21 +0200 Subject: [PATCH 2497/3715] net/mlx5: Take lock with IRQs disabled to avoid deadlock [ Upstream commit 33814e5d127e21f53b52e17b0722c1b57d4f4d29 ] The lock in qp_table might be taken from process context or from interrupt context. This may lead to a deadlock unless it is taken with IRQs disabled. Discovered by lockdep ================================ WARNING: inconsistent lock state 4.20.0-rc6 -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} python/12572 [HC1[1]:SC0[0]:HE0:SE1] takes: 00000000052a4df4 (&(&table->lock)->rlock#2){?.+.}, /0x50 [mlx5_core] {HARDIRQ-ON-W} state was registered at: _raw_spin_lock+0x33/0x70 mlx5_get_rsc+0x1a/0x50 [mlx5_core] mlx5_ib_eqe_pf_action+0x493/0x1be0 [mlx5_ib] process_one_work+0x90c/0x1820 worker_thread+0x87/0xbb0 kthread+0x320/0x3e0 ret_from_fork+0x24/0x30 irq event stamp: 103928 hardirqs last enabled at (103927): [] nk+0x1a/0x1c hardirqs last disabled at (103928): [] unk+0x1a/0x1c softirqs last enabled at (103924): [] tcp_sendmsg+0x31/0x40 softirqs last disabled at (103922): [] 80 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&table->lock)->rlock#2); lock(&(&table->lock)->rlock#2); *** DEADLOCK *** Fixes: 032080ab43ac ("IB/mlx5: Lock QP during page fault handling") Signed-off-by: Moni Shoua Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 5f091c6ea049..b92d5690287b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -44,14 +44,15 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, { struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; + unsigned long flags; - spin_lock(&table->lock); + spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); if (common) atomic_inc(&common->refcount); - spin_unlock(&table->lock); + spin_unlock_irqrestore(&table->lock, flags); if (!common) { mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", From f401472074c6900cb0a87aa91fc4d86b34df0374 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2018 09:51:56 +0100 Subject: [PATCH 2498/3715] iwlwifi: mvm: fix A-MPDU reference assignment [ Upstream commit 1f7698abedeeb3fef3cbcf78e16f925df675a179 ] The current code assigns the reference, and then goes to increment it if the toggle bit has changed. That way, we get Toggle 0 0 0 0 1 1 1 1 ID 1 1 1 1 1 2 2 2 Fix that by assigning the post-toggle ID to get Toggle 0 0 0 0 1 1 1 1 ID 1 1 1 1 2 2 2 2 Reported-by: Danny Alexander Signed-off-by: Johannes Berg Fixes: fbe4112791b8 ("iwlwifi: mvm: update mpdu metadata API") Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7fb8bbaf2142..1a12e829e98b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -871,12 +871,12 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->ampdu_reference = mvm->ampdu_ref; /* toggle is switched whenever new aggregation starts */ if (toggle_bit != mvm->ampdu_toggle) { mvm->ampdu_ref++; mvm->ampdu_toggle = toggle_bit; } + rx_status->ampdu_reference = mvm->ampdu_ref; } rcu_read_lock(); From 35eb06fa70302bc971342646ee3a0012e6d2e401 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 30 Jan 2019 18:30:51 +0800 Subject: [PATCH 2499/3715] tty: ipwireless: Fix potential NULL pointer dereference [ Upstream commit 7dd50e205b3348dc7784efbdf85723551de64a25 ] There is a potential NULL pointer dereference in case alloc_ctrl_packet() fails and returns NULL. Fixes: 099dc4fb6265 ("ipwireless: driver for PC Card 3G/UMTS modem") Signed-off-by: YueHaibing Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/ipwireless/hardware.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c index a6b8240af6cd..960e9375a1a9 100644 --- a/drivers/tty/ipwireless/hardware.c +++ b/drivers/tty/ipwireless/hardware.c @@ -1516,6 +1516,8 @@ static void ipw_send_setup_packet(struct ipw_hardware *hw) sizeof(struct ipw_setup_get_version_query_packet), ADDR_SETUP_PROT, TL_PROTOCOLID_SETUP, TL_SETUP_SIGNO_GET_VERSION_QRY); + if (!ver_packet) + return; ver_packet->header.length = sizeof(struct tl_setup_get_version_qry); /* From 1b97b03ace9e5f2d84dba9e8e347e76cfda40dbf Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Wed, 23 Jan 2019 06:45:37 +0800 Subject: [PATCH 2500/3715] driver: uio: fix possible memory leak in __uio_register_device [ Upstream commit 1a392b3de7c5747506b38fc14b2e79977d3c7770 ] 'idev' is malloced in __uio_register_device() and leak free it before leaving from the uio_get_minor() error handing case, it will cause memory leak. Fixes: a93e7b331568 ("uio: Prevent device destruction while fds are open") Signed-off-by: Liu Jian Reviewed-by: Hamish Martin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index fb5c9701b1fb..4e9b0ff79b13 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -939,8 +939,10 @@ int __uio_register_device(struct module *owner, atomic_set(&idev->event, 0); ret = uio_get_minor(idev); - if (ret) + if (ret) { + kfree(idev); return ret; + } idev->dev.devt = MKDEV(uio_major, idev->minor); idev->dev.class = &uio_class; From 548752cabfaaf3945f2da9d202e70015c2991e9a Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Wed, 23 Jan 2019 06:45:38 +0800 Subject: [PATCH 2501/3715] driver: uio: fix possible use-after-free in __uio_register_device [ Upstream commit 221a1f4ac12d2ab46246c160b2e00d1b1160d5d9 ] In uio_dev_add_attributes() error handing case, idev is used after device_unregister(), in which 'idev' has been released, touch idev cause use-after-free. Fixes: a93e7b331568 ("uio: Prevent device destruction while fds are open") Signed-off-by: Liu Jian Reviewed-by: Hamish Martin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 4e9b0ff79b13..7c18536a3742 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -944,6 +944,7 @@ int __uio_register_device(struct module *owner, return ret; } + device_initialize(&idev->dev); idev->dev.devt = MKDEV(uio_major, idev->minor); idev->dev.class = &uio_class; idev->dev.parent = parent; @@ -954,7 +955,7 @@ int __uio_register_device(struct module *owner, if (ret) goto err_device_create; - ret = device_register(&idev->dev); + ret = device_add(&idev->dev); if (ret) goto err_device_create; @@ -986,9 +987,10 @@ int __uio_register_device(struct module *owner, err_request_irq: uio_dev_del_attributes(idev); err_uio_dev_add_attributes: - device_unregister(&idev->dev); + device_del(&idev->dev); err_device_create: uio_free_minor(idev); + put_device(&idev->dev); return ret; } EXPORT_SYMBOL_GPL(__uio_register_device); From d7a7f04e10017bf95eea0cb294287b4ab26e541a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 23 Jan 2019 11:24:18 +0000 Subject: [PATCH 2502/3715] crypto: crypto4xx - Fix wrong ppc4xx_trng_probe()/ppc4xx_trng_remove() arguments [ Upstream commit 6e88098ca43a3d80ae86908f7badba683c8a0d84 ] When building without CONFIG_HW_RANDOM_PPC4XX, I hit the following build failure: drivers/crypto/amcc/crypto4xx_core.c: In function 'crypto4xx_probe': drivers/crypto/amcc/crypto4xx_core.c:1407:20: error: passing argument 1 of 'ppc4xx_trng_probe' from incompatible pointer type [-Werror=incompatible-pointer-types] In file included from drivers/crypto/amcc/crypto4xx_core.c:50:0: drivers/crypto/amcc/crypto4xx_trng.h:28:20: note: expected 'struct crypto4xx_device *' but argument is of type 'struct crypto4xx_core_device *' drivers/crypto/amcc/crypto4xx_core.c: In function 'crypto4xx_remove': drivers/crypto/amcc/crypto4xx_core.c:1434:21: error: passing argument 1 of 'ppc4xx_trng_remove' from incompatible pointer type [-Werror=incompatible-pointer-types] In file included from drivers/crypto/amcc/crypto4xx_core.c:50:0: drivers/crypto/amcc/crypto4xx_trng.h:30:20: note: expected 'struct crypto4xx_device *' but argument is of type 'struct crypto4xx_core_device *' This patch fix the needed argument of ppc4xx_trng_probe()/ppc4xx_trng_remove() in that case. Fixes: 5343e674f32f ("crypto4xx: integrate ppc4xx-rng into crypto4xx") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/amcc/crypto4xx_trng.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_trng.h b/drivers/crypto/amcc/crypto4xx_trng.h index 931d22531f51..7bbda51b7337 100644 --- a/drivers/crypto/amcc/crypto4xx_trng.h +++ b/drivers/crypto/amcc/crypto4xx_trng.h @@ -26,9 +26,9 @@ void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev); void ppc4xx_trng_remove(struct crypto4xx_core_device *core_dev); #else static inline void ppc4xx_trng_probe( - struct crypto4xx_device *dev __maybe_unused) { } + struct crypto4xx_core_device *dev __maybe_unused) { } static inline void ppc4xx_trng_remove( - struct crypto4xx_device *dev __maybe_unused) { } + struct crypto4xx_core_device *dev __maybe_unused) { } #endif #endif From d7ee5bfb5541b2d8b652f1026c12a5a631d14b8e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Feb 2019 01:47:53 +0100 Subject: [PATCH 2503/3715] driver core: Do not resume suppliers under device_links_write_lock() [ Upstream commit 5db25c9eb893df8f6b93c1d97b8006d768e1b6f5 ] It is incorrect to call pm_runtime_get_sync() under device_links_write_lock(), because it may end up trying to take device_links_read_lock() while resuming the target device and that will deadlock in the non-SRCU case, so avoid that by resuming the supplier device in device_link_add() before calling device_links_write_lock(). Fixes: 21d5c57b3726 ("PM / runtime: Use device links") Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links") Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/core.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2b0a1054535c..93c2fc58013e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -180,11 +180,20 @@ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; + bool rpm_put_supplier = false; if (!consumer || !supplier || ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE))) return NULL; + if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { + if (pm_runtime_get_sync(supplier) < 0) { + pm_runtime_put_noidle(supplier); + return NULL; + } + rpm_put_supplier = true; + } + device_links_write_lock(); device_pm_lock(); @@ -209,13 +218,8 @@ struct device_link *device_link_add(struct device *consumer, if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) { - if (pm_runtime_get_sync(supplier) < 0) { - pm_runtime_put_noidle(supplier); - kfree(link); - link = NULL; - goto out; - } link->rpm_active = true; + rpm_put_supplier = false; } pm_runtime_new_link(consumer); /* @@ -286,6 +290,10 @@ struct device_link *device_link_add(struct device *consumer, out: device_pm_unlock(); device_links_write_unlock(); + + if (rpm_put_supplier) + pm_runtime_put(supplier); + return link; } EXPORT_SYMBOL_GPL(device_link_add); From 25b8cd12a15c73acf75dcb0483d3857cfbae4c48 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sat, 26 Jan 2019 16:29:20 +0200 Subject: [PATCH 2504/3715] ARM: dts: lpc32xx: add required clocks property to keypad device node [ Upstream commit 3e88bc38b9f6fe4b69cecf81badd3c19fde97f97 ] NXP LPC32xx keypad controller requires a clock property to be defined. The change fixes the driver initialization problem: lpc32xx_keys 40050000.key: failed to get clock lpc32xx_keys: probe of 40050000.key failed with error -2 Fixes: 93898eb775e5 ("arm: dts: lpc32xx: add clock properties to device nodes") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index d077bd2b9583..2ca881055ef0 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -462,6 +462,7 @@ key: key@40050000 { compatible = "nxp,lpc3220-key"; reg = <0x40050000 0x1000>; + clocks = <&clk LPC32XX_CLK_KEY>; interrupts = <54 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; From 53e4e15bb1539a2fa74333f01d7f3cd2a9778a18 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sat, 26 Jan 2019 16:29:21 +0200 Subject: [PATCH 2505/3715] ARM: dts: lpc32xx: reparent keypad controller to SIC1 [ Upstream commit 489261c45f0ebbc1c2813f337bbdf858267f5033 ] After switching to a new interrupt controller scheme by separating SIC1 and SIC2 from MIC interrupt controller just one SoC keypad controller was not taken into account, fix it now: WARNING: CPU: 0 PID: 1 at kernel/irq/irqdomain.c:524 irq_domain_associate+0x50/0x1b0 error: hwirq 0x36 is too large for interrupt-controller@40008000 ... lpc32xx_keys 40050000.key: failed to get platform irq lpc32xx_keys: probe of 40050000.key failed with error -22 Fixes: 9b8ad3fb81ae ("ARM: dts: lpc32xx: reparent SIC1 and SIC2 interrupts from MIC") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 2ca881055ef0..9f9386c926d1 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -463,7 +463,8 @@ compatible = "nxp,lpc3220-key"; reg = <0x40050000 0x1000>; clocks = <&clk LPC32XX_CLK_KEY>; - interrupts = <54 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&sic1>; + interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; From e3064d2f21f4a684539384055956605e4dfdc97b Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 29 Jan 2019 21:20:39 +0200 Subject: [PATCH 2506/3715] ARM: dts: lpc32xx: fix ARM PrimeCell LCD controller variant [ Upstream commit 7a0790a4121cbcd111cc537cdc801c46ccb789ee ] ARM PrimeCell PL111 LCD controller is found on On NXP LPC3230 and LPC3250 SoCs variants, the original reference in compatible property to an older one ARM PrimeCell PL110 is invalid. Fixes: e04920d9efcb3 ("ARM: LPC32xx: DTS files for device tree conversion") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 9f9386c926d1..a08ebc950923 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -139,7 +139,7 @@ }; clcd: clcd@31040000 { - compatible = "arm,pl110", "arm,primecell"; + compatible = "arm,pl111", "arm,primecell"; reg = <0x31040000 0x1000>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk LPC32XX_CLK_LCD>; From f68e0cc797f7813608f934496c4db4276d1073e8 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 29 Jan 2019 21:20:40 +0200 Subject: [PATCH 2507/3715] ARM: dts: lpc32xx: fix ARM PrimeCell LCD controller clocks property [ Upstream commit 30fc01bae3cda747e7d9c352b1aa51ca113c8a9d ] The originally added ARM PrimeCell PL111 clocks property misses the required "clcdclk" clock, which is the same as a clock to enable the LCD controller on NXP LPC3230 and NXP LPC3250 SoCs. Fixes: 93898eb775e5 ("arm: dts: lpc32xx: add clock properties to device nodes") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index a08ebc950923..c5b119ddb70b 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -142,8 +142,8 @@ compatible = "arm,pl111", "arm,primecell"; reg = <0x31040000 0x1000>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk LPC32XX_CLK_LCD>; - clock-names = "apb_pclk"; + clocks = <&clk LPC32XX_CLK_LCD>, <&clk LPC32XX_CLK_LCD>; + clock-names = "clcdclk", "apb_pclk"; status = "disabled"; }; From 505672b687af62983b8bd9d1c5c243cd3e518fad Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 29 Jan 2019 21:20:41 +0200 Subject: [PATCH 2508/3715] ARM: dts: lpc32xx: phy3250: fix SD card regulator voltage [ Upstream commit dc141b99fc36cf910a1d8d5ee30f43f2442fd1bd ] The fixed voltage regulator on Phytec phyCORE-LPC3250 board, which supplies SD/MMC card's power, has a constant output voltage level of either 3.15V or 3.3V, the actual value depends on JP4 position, the power rail is referenced as VCC_SDIO in the board hardware manual. Fixes: d06670e96267 ("arm: dts: phy3250: add SD fixed regulator") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc3250-phy3250.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/lpc3250-phy3250.dts b/arch/arm/boot/dts/lpc3250-phy3250.dts index b7bd3a110a8d..dd0bdf765599 100644 --- a/arch/arm/boot/dts/lpc3250-phy3250.dts +++ b/arch/arm/boot/dts/lpc3250-phy3250.dts @@ -49,8 +49,8 @@ sd_reg: regulator@2 { compatible = "regulator-fixed"; regulator-name = "sd_reg"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; gpio = <&gpio 5 5 0>; enable-active-high; }; From 5cd9f229dd3e4980580406f5a47230ec5ee836d7 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 12 Dec 2018 09:45:11 +0200 Subject: [PATCH 2509/3715] iwlwifi: mvm: fix RSS config command [ Upstream commit 608dce95db10b8ee1a26dbce3f60204bb69812a5 ] The hash mask is a bitmap, so we should use BIT() on the enum values. Signed-off-by: Sara Sharon Fixes: 43413a975d06 ("iwlwifi: mvm: support rss queues configuration command") Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 78228f870f8f..754dcc1c1f40 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -107,12 +107,12 @@ static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm) int i; struct iwl_rss_config_cmd cmd = { .flags = cpu_to_le32(IWL_RSS_ENABLE), - .hash_mask = IWL_RSS_HASH_TYPE_IPV4_TCP | - IWL_RSS_HASH_TYPE_IPV4_UDP | - IWL_RSS_HASH_TYPE_IPV4_PAYLOAD | - IWL_RSS_HASH_TYPE_IPV6_TCP | - IWL_RSS_HASH_TYPE_IPV6_UDP | - IWL_RSS_HASH_TYPE_IPV6_PAYLOAD, + .hash_mask = BIT(IWL_RSS_HASH_TYPE_IPV4_TCP) | + BIT(IWL_RSS_HASH_TYPE_IPV4_UDP) | + BIT(IWL_RSS_HASH_TYPE_IPV4_PAYLOAD) | + BIT(IWL_RSS_HASH_TYPE_IPV6_TCP) | + BIT(IWL_RSS_HASH_TYPE_IPV6_UDP) | + BIT(IWL_RSS_HASH_TYPE_IPV6_PAYLOAD), }; if (mvm->trans->num_rx_queues == 1) From 526eb7a254e9ffabfe567438dc091ad5ebb5c7ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 2 Feb 2019 22:34:49 +0000 Subject: [PATCH 2510/3715] staging: most: cdev: add missing check for cdev_add failure [ Upstream commit 5ae890780e1b4d08f2c0c5d4ea96fc3928fc0ee9 ] Currently the call to cdev_add is missing a check for failure. Fix this by checking for failure and exiting via a new error path that ensures the allocated comp_channel struct is kfree'd. Detected by CoverityScan, CID#1462359 ("Unchecked return value") Fixes: 9bc79bbcd0c5 ("Staging: most: add MOST driver's aim-cdev module") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/most/aim-cdev/cdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/most/aim-cdev/cdev.c b/drivers/staging/most/aim-cdev/cdev.c index 1e5cbc893496..d000b6ff8a7d 100644 --- a/drivers/staging/most/aim-cdev/cdev.c +++ b/drivers/staging/most/aim-cdev/cdev.c @@ -455,7 +455,9 @@ static int aim_probe(struct most_interface *iface, int channel_id, c->devno = MKDEV(major, current_minor); cdev_init(&c->cdev, &channel_fops); c->cdev.owner = THIS_MODULE; - cdev_add(&c->cdev, c->devno, 1); + retval = cdev_add(&c->cdev, c->devno, 1); + if (retval < 0) + goto err_free_c; c->iface = iface; c->cfg = cfg; c->channel_id = channel_id; @@ -491,6 +493,7 @@ error_create_device: list_del(&c->list); error_alloc_kfifo: cdev_del(&c->cdev); +err_free_c: kfree(c); error_alloc_channel: ida_simple_remove(&minor_id, current_minor); From 7d7e9d2378899ee8e7abf8b74561d18958739042 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Feb 2019 18:04:49 +0000 Subject: [PATCH 2511/3715] rtc: ds1672: fix unintended sign extension [ Upstream commit f0c04c276739ed8acbb41b4868e942a55b128dca ] Shifting a u8 by 24 will cause the value to be promoted to an integer. If the top bit of the u8 is set then the following conversion to an unsigned long will sign extend the value causing the upper 32 bits to be set in the result. Fix this by casting the u8 value to an unsigned long before the shift. Detected by CoverityScan, CID#138801 ("Unintended sign extension") Fixes: edf1aaa31fc5 ("[PATCH] RTC subsystem: DS1672 driver") Signed-off-by: Colin Ian King Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-ds1672.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 9caaccccaa57..b1ebca099b0d 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -58,7 +58,8 @@ static int ds1672_get_datetime(struct i2c_client *client, struct rtc_time *tm) "%s: raw read data - counters=%02x,%02x,%02x,%02x\n", __func__, buf[0], buf[1], buf[2], buf[3]); - time = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + time = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; rtc_time_to_tm(time, tm); From 032c10fb504cf15575a69e2f7e673c72c720ade7 Mon Sep 17 00:00:00 2001 From: Michael Kao Date: Fri, 1 Feb 2019 15:38:07 +0800 Subject: [PATCH 2512/3715] thermal: mediatek: fix register index error [ Upstream commit eb9aecd90d1a39601e91cd08b90d5fee51d321a6 ] The index of msr and adcpnp should match the sensor which belongs to the selected bank in the for loop. Fixes: b7cf0053738c ("thermal: Add Mediatek thermal driver for mt2701.") Signed-off-by: Michael Kao Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/mtk_thermal.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 1e61c09153c9..76b92083744c 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -407,7 +407,8 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank) u32 raw; for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) { - raw = readl(mt->thermal_base + conf->msr[i]); + raw = readl(mt->thermal_base + + conf->msr[conf->bank_data[bank->id].sensors[i]]); temp = raw_to_mcelsius(mt, conf->bank_data[bank->id].sensors[i], @@ -544,7 +545,8 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, for (i = 0; i < conf->bank_data[num].num_sensors; i++) writel(conf->sensor_mux_values[conf->bank_data[num].sensors[i]], - mt->thermal_base + conf->adcpnp[i]); + mt->thermal_base + + conf->adcpnp[conf->bank_data[num].sensors[i]]); writel((1 << conf->bank_data[num].num_sensors) - 1, mt->thermal_base + TEMP_MONCTL0); From 391fd358568db5b98464b26c8fca0b2c03f6fef4 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Wed, 6 Feb 2019 21:45:29 -0800 Subject: [PATCH 2513/3715] net: phy: fixed_phy: Fix fixed_phy not checking GPIO [ Upstream commit 8f289805616e81f7c1690931aa8a586c76f4fa88 ] Fix fixed_phy not checking GPIO if no link_update callback is registered. In the original version all users registered a link_update callback so the issue was masked. Fixes: a5597008dbc2 ("phy: fixed_phy: Add gpio to determine link up/down.") Reviewed-by: Andrew Lunn Signed-off-by: Moritz Fischer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/fixed_phy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index eb5167210681..3ab2eb677a59 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -67,11 +67,11 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) do { s = read_seqcount_begin(&fp->seqcount); /* Issue callback if user registered it. */ - if (fp->link_update) { + if (fp->link_update) fp->link_update(fp->phydev->attached_dev, &fp->status); - fixed_phy_update(fp); - } + /* Check the GPIO for change in status */ + fixed_phy_update(fp); state = fp->status; } while (read_seqcount_retry(&fp->seqcount, s)); From 0b9eaf82c2a711fcd2c6d011bac59bbc7d26be83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 25 Jan 2019 15:35:58 +0100 Subject: [PATCH 2514/3715] rtc: ds1307: rx8130: Fix alarm handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f929cad943380370b6db31fcb7a38d898d91089 ] When the EXTENSION.WADA bit is set, register 0x19 contains a bitmap of week days, not a day of month. As Linux only handles a single alarm without repetition using day of month is more flexible, so clear this bit. (Otherwise a value depending on time.tm_wday would have to be written to register 0x19.) Also optimize setting the AIE bit to use a single register write instead of a bulk write of three registers. Fixes: ee0981be7704 ("rtc: ds1307: Add support for Epson RX8130CE") Signed-off-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-ds1307.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index e7d9215c9201..8d45d93b1db6 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -733,8 +733,8 @@ static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t) if (ret < 0) return ret; - ctl[0] &= ~RX8130_REG_EXTENSION_WADA; - ctl[1] |= RX8130_REG_FLAG_AF; + ctl[0] &= RX8130_REG_EXTENSION_WADA; + ctl[1] &= ~RX8130_REG_FLAG_AF; ctl[2] &= ~RX8130_REG_CONTROL0_AIE; ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, @@ -757,8 +757,7 @@ static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t) ctl[2] |= RX8130_REG_CONTROL0_AIE; - return regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, - sizeof(ctl)); + return regmap_write(ds1307->regmap, RX8130_REG_CONTROL0, ctl[2]); } static int rx8130_alarm_irq_enable(struct device *dev, unsigned int enabled) From 3726373c3f243b1c41d35a2cec03c3149679bc4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Feb 2019 09:50:53 +0000 Subject: [PATCH 2515/3715] rtc: 88pm860x: fix unintended sign extension [ Upstream commit dc9e47160626cdb58d5c39a4f43dcfdb27a5c004 ] Shifting a u8 by 24 will cause the value to be promoted to an integer. If the top bit of the u8 is set then the following conversion to an unsigned long will sign extend the value causing the upper 32 bits to be set in the result. Fix this by casting the u8 value to an unsigned long before the shift. Detected by CoverityScan, CID#144925-144928 ("Unintended sign extension") Fixes: 008b30408c40 ("mfd: Add rtc support to 88pm860x") Signed-off-by: Colin Ian King Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-88pm860x.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 166faae3a59c..7d3e5168fcef 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -115,11 +115,13 @@ static int pm860x_rtc_read_time(struct device *dev, struct rtc_time *tm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -145,7 +147,8 @@ static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm) /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; base = ticks - data; dev_dbg(info->dev, "set base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -170,10 +173,12 @@ static int pm860x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; pm860x_bulk_read(info->i2c, PM8607_RTC_EXPIRE1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -198,11 +203,13 @@ static int pm860x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); From 6518484b920e1a409184c29d9db5f8e39be4e861 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Feb 2019 10:08:11 +0000 Subject: [PATCH 2516/3715] rtc: 88pm80x: fix unintended sign extension [ Upstream commit fb0b322537a831b5b0cb948c56f8f958ce493d3a ] Shifting a u8 by 24 will cause the value to be promoted to an integer. If the top bit of the u8 is set then the following conversion to an unsigned long will sign extend the value causing the upper 32 bits to be set in the result. Fix this by casting the u8 value to an unsigned long before the shift. Detected by CoverityScan, CID#714646-714649 ("Unintended sign extension") Fixes: 2985c29c1964 ("rtc: Add rtc support to 88PM80X PMIC") Signed-off-by: Colin Ian King Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-88pm80x.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 466bf7f9a285..7da2a1fb50f8 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -116,12 +116,14 @@ static int pm80x_rtc_read_time(struct device *dev, struct rtc_time *tm) unsigned char buf[4]; unsigned long ticks, base, data; regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -144,7 +146,8 @@ static int pm80x_rtc_set_time(struct device *dev, struct rtc_time *tm) /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; base = ticks - data; dev_dbg(info->dev, "set base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -165,11 +168,13 @@ static int pm80x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) int ret; regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); regmap_raw_read(info->map, PM800_RTC_EXPIRE1_1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -192,12 +197,14 @@ static int pm80x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) regmap_update_bits(info->map, PM800_RTC_CONTROL, PM800_ALARM1_EN, 0); regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); From 814b711f62dc28f367f7d636501b2aa725961cd1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Feb 2019 10:31:02 +0000 Subject: [PATCH 2517/3715] rtc: pm8xxx: fix unintended sign extension [ Upstream commit e42280886018c6f77f0a90190f7cba344b0df3e0 ] Shifting a u8 by 24 will cause the value to be promoted to an integer. If the top bit of the u8 is set then the following conversion to an unsigned long will sign extend the value causing the upper 32 bits to be set in the result. Fix this by casting the u8 value to an unsigned long before the shift. Detected by CoverityScan, CID#1309693 ("Unintended sign extension") Fixes: 9a9a54ad7aa2 ("drivers/rtc: add support for Qualcomm PMIC8xxx RTC") Signed-off-by: Colin Ian King Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pm8xxx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index fac835530671..a1b4b0ed1f19 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -186,7 +186,8 @@ static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm) } } - secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + secs = value[0] | (value[1] << 8) | (value[2] << 16) | + ((unsigned long)value[3] << 24); rtc_time_to_tm(secs, tm); @@ -267,7 +268,8 @@ static int pm8xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) return rc; } - secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + secs = value[0] | (value[1] << 8) | (value[2] << 16) | + ((unsigned long)value[3] << 24); rtc_time_to_tm(secs, &alarm->time); From 5bcb908399dc48093396dba7fe5f7a93e13cbad8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 8 Feb 2019 19:24:45 +0100 Subject: [PATCH 2518/3715] fbdev: chipsfb: remove set but not used variable 'size' [ Upstream commit 8e71fa5e4d86bedfd26df85381d65d6b4c860020 ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/video/fbdev/chipsfb.c: In function 'chipsfb_pci_init': drivers/video/fbdev/chipsfb.c:352:22: warning: variable 'size' set but not used [-Wunused-but-set-variable] Fixes: 8c8709334cec ("[PATCH] ppc32: Remove CONFIG_PMAC_PBOOK"). Signed-off-by: YueHaibing Acked-by: Michael Ellerman Cc: Daniel Vetter Cc: Christophe Leroy [b.zolnierkie: minor commit summary and description fixups] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/chipsfb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index f103665cad43..f9b366d17587 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -350,7 +350,7 @@ static void init_chips(struct fb_info *p, unsigned long addr) static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) { struct fb_info *p; - unsigned long addr, size; + unsigned long addr; unsigned short cmd; int rc = -ENODEV; @@ -362,7 +362,6 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) goto err_disable; addr = pci_resource_start(dp, 0); - size = pci_resource_len(dp, 0); if (addr == 0) goto err_disable; From d53fea06ecdcf579c2ef4f285734e4964d72a3f2 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 1 Feb 2019 12:44:41 -0800 Subject: [PATCH 2519/3715] iw_cxgb4: use tos when importing the endpoint [ Upstream commit cb3ba0bde881f0cb7e3945d2a266901e2bd18c92 ] import_ep() is passed the correct tos, but doesn't use it correctly. Fixes: ac8e4c69a021 ("cxgb4/iw_cxgb4: TOS support") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3668cc71b47e..942403e42dd0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2056,7 +2056,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); From d47846800bcc9883f2f0f954f16dcd1ed8dacd75 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 1 Feb 2019 12:44:53 -0800 Subject: [PATCH 2520/3715] iw_cxgb4: use tos when finding ipv6 routes [ Upstream commit c8a7eb554a83214c3d8ee5cb322da8c72810d2dc ] When IPv6 support was added, the correct tos was not passed to cxgb_find_route6(). This potentially results in the wrong route entry. Fixes: 830662f6f032 ("RDMA/cxgb4: Add support for active and passive open connection with IPv6 address") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 942403e42dd0..7eb1cc1b1aa0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2147,7 +2147,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, + ep->com.cm_id->tos, raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3298,7 +3299,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, cm_id->tos, raddr6->sin6_scope_id); } if (!ep->dst) { From a580884bdb09f906d4eb7be5c2387b6d803dce95 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Feb 2019 12:08:19 +0300 Subject: [PATCH 2521/3715] drm/etnaviv: potential NULL dereference [ Upstream commit 9e05352340d3a3e68c144136db9810b26ebb88c3 ] The etnaviv_gem_prime_get_sg_table() is supposed to return error pointers. Otherwise it can lead to a NULL dereference when it's called from drm_gem_map_dma_buf(). Fixes: 5f4a4a73f437 ("drm/etnaviv: fix gem_prime_get_sg_table to return new SG table") Signed-off-by: Dan Carpenter Reviewed-by: Christian Gmeiner Signed-off-by: Lucas Stach Signed-off-by: Sasha Levin --- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index ae884723e9b1..880b95511b98 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -26,7 +26,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj) int npages = obj->size >> PAGE_SHIFT; if (WARN_ON(!etnaviv_obj->pages)) /* should have already pinned! */ - return NULL; + return ERR_PTR(-EINVAL); return drm_prime_pages_to_sg(etnaviv_obj->pages, npages); } From 27aa08b95d655306cc88647412595f8a4e9f86a8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Jan 2019 13:04:52 +0100 Subject: [PATCH 2522/3715] pinctrl: sh-pfc: emev2: Add missing pinmux functions [ Upstream commit 1ecd8c9cb899ae277e6986ae134635cb1a50f5de ] The err_rst_reqb, ext_clki, lowpwr, and ref_clko pin groups are present, but no pinmux functions refer to them, hence they can not be selected. Fixes: 1e7d5d849cf4f0c5 ("sh-pfc: Add emev2 pinmux support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-emev2.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-emev2.c b/drivers/pinctrl/sh-pfc/pfc-emev2.c index 1cbbe04d7df6..eafd8edbcbe9 100644 --- a/drivers/pinctrl/sh-pfc/pfc-emev2.c +++ b/drivers/pinctrl/sh-pfc/pfc-emev2.c @@ -1263,6 +1263,14 @@ static const char * const dtv_groups[] = { "dtv_b", }; +static const char * const err_rst_reqb_groups[] = { + "err_rst_reqb", +}; + +static const char * const ext_clki_groups[] = { + "ext_clki", +}; + static const char * const iic0_groups[] = { "iic0", }; @@ -1285,6 +1293,10 @@ static const char * const lcd_groups[] = { "yuv3", }; +static const char * const lowpwr_groups[] = { + "lowpwr", +}; + static const char * const ntsc_groups[] = { "ntsc_clk", "ntsc_data", @@ -1298,6 +1310,10 @@ static const char * const pwm1_groups[] = { "pwm1", }; +static const char * const ref_clko_groups[] = { + "ref_clko", +}; + static const char * const sd_groups[] = { "sd_cki", }; @@ -1391,13 +1407,17 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(cam), SH_PFC_FUNCTION(cf), SH_PFC_FUNCTION(dtv), + SH_PFC_FUNCTION(err_rst_reqb), + SH_PFC_FUNCTION(ext_clki), SH_PFC_FUNCTION(iic0), SH_PFC_FUNCTION(iic1), SH_PFC_FUNCTION(jtag), SH_PFC_FUNCTION(lcd), + SH_PFC_FUNCTION(lowpwr), SH_PFC_FUNCTION(ntsc), SH_PFC_FUNCTION(pwm0), SH_PFC_FUNCTION(pwm1), + SH_PFC_FUNCTION(ref_clko), SH_PFC_FUNCTION(sd), SH_PFC_FUNCTION(sdi0), SH_PFC_FUNCTION(sdi1), From 8ca523cc81fbc0775efda6fe25456af78770d6af Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Jan 2019 17:07:43 +0100 Subject: [PATCH 2523/3715] pinctrl: sh-pfc: r8a7791: Fix scifb2_data_c pin group [ Upstream commit a4b0350047f1b10207e25e72d7cd3f7826e93769 ] The entry for "scifb2_data_c" in the SCIFB2 pin group array contains a typo, thus the group cannot be selected. Fixes: 5088451962389924 ("pinctrl: sh-pfc: r8a7791 PFC support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7791.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index d34982ea66bf..e4774b220040 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -5209,7 +5209,7 @@ static const char * const scifb2_groups[] = { "scifb2_data_b", "scifb2_clk_b", "scifb2_ctrl_b", - "scifb0_data_c", + "scifb2_data_c", "scifb2_clk_c", "scifb2_data_d", }; From b95aa104952712b8db2263e9da99e59775cbd104 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Jan 2019 17:14:07 +0100 Subject: [PATCH 2524/3715] pinctrl: sh-pfc: r8a7792: Fix vin1_data18_b pin group [ Upstream commit b9fd50488b4939ce5b3a026d29e752e17c2d1800 ] The vin1_data18_b pin group itself is present, but it is not listed in the VIN1 pin group array, and thus cannot be selected. Fixes: 7dd74bb1f058786e ("pinctrl: sh-pfc: r8a7792: Add VIN pin groups") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-r8a7792.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7792.c b/drivers/pinctrl/sh-pfc/pfc-r8a7792.c index cc3597f66605..46c41ca6ea38 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7792.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7792.c @@ -1916,6 +1916,7 @@ static const char * const vin1_groups[] = { "vin1_data8", "vin1_data24_b", "vin1_data20_b", + "vin1_data18_b", "vin1_data16_b", "vin1_sync", "vin1_field", From 36f3abf8c0fccd453e45e9ab6638cbaa9ad48586 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Jan 2019 16:51:21 +0100 Subject: [PATCH 2525/3715] pinctrl: sh-pfc: sh73a0: Fix fsic_spdif pin groups [ Upstream commit 0e6e448bdcf896d001a289a6112a704542d51516 ] There are two pin groups for the FSIC SPDIF signal, but the FSIC pin group array lists only one, and it refers to a nonexistent group. Fixes: 2ecd4154c906b7d6 ("sh-pfc: sh73a0: Add FSI pin groups and functions") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh73a0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c index f8fbedb46585..6dca760f9f28 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c @@ -3367,7 +3367,8 @@ static const char * const fsic_groups[] = { "fsic_sclk_out", "fsic_data_in", "fsic_data_out", - "fsic_spdif", + "fsic_spdif_0", + "fsic_spdif_1", }; static const char * const fsid_groups[] = { From 7020a36d86bc76f49fffaf3cdf668b4a781a7b4f Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 11 Feb 2019 17:04:00 +0800 Subject: [PATCH 2526/3715] PCI: endpoint: functions: Use memcpy_fromio()/memcpy_toio() [ Upstream commit 726dabfde6aa35a4f1508e235ae37edbbf9fbc65 ] Functions copying from/to IO addresses should use the memcpy_fromio()/memcpy_toio() API rather than plain memcpy(). Fix the issue detected through the sparse tool. Fixes: 349e7a85b25f ("PCI: endpoint: functions: Add an EP function to test PCI") Suggested-by: Kishon Vijay Abraham I Signed-off-by: Wen Yang [lorenzo.pieralisi@arm.com: updated log] Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I CC: Lorenzo Pieralisi CC: Bjorn Helgaas CC: Gustavo Pimentel CC: Niklas Cassel CC: Greg Kroah-Hartman CC: Cyrille Pitchen CC: linux-pci@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/pci/endpoint/functions/pci-epf-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index f9308c2f22e6..c2541a772abc 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -177,7 +177,7 @@ static int pci_epf_test_read(struct pci_epf_test *epf_test) goto err_map_addr; } - memcpy(buf, src_addr, reg->size); + memcpy_fromio(buf, src_addr, reg->size); crc32 = crc32_le(~0, buf, reg->size); if (crc32 != reg->checksum) @@ -231,7 +231,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test) get_random_bytes(buf, reg->size); reg->checksum = crc32_le(~0, buf, reg->size); - memcpy(dst_addr, buf, reg->size); + memcpy_toio(dst_addr, buf, reg->size); /* * wait 1ms inorder for the write to complete. Without this delay L3 From 30cf71254f9f661906f5c3eac379723899a5633e Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Mon, 11 Feb 2019 10:04:26 -0500 Subject: [PATCH 2527/3715] usb: phy: twl6030-usb: fix possible use-after-free on remove [ Upstream commit 5895d311d28f2605e2f71c1a3e043ed38f3ac9d2 ] In remove(), use cancel_delayed_work_sync() to cancel the delayed work. Otherwise there's a chance that this work will continue to run until after the device has been removed. This issue was detected with the help of Coccinelle. Cc: Tony Lindgren Cc: Bin Liu Fixes: b6a619a883c3 ("usb: phy: Check initial state for twl6030") Signed-off-by: Sven Van Asbroeck Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/phy/phy-twl6030-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index b5dc077ed7d3..8e14fa221191 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -413,7 +413,7 @@ static int twl6030_usb_remove(struct platform_device *pdev) { struct twl6030_usb *twl = platform_get_drvdata(pdev); - cancel_delayed_work(&twl->get_status_work); + cancel_delayed_work_sync(&twl->get_status_work); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, REG_INT_MSK_LINE_C); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, From fa8552dae83acba4c9442fadbddab1d278168531 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:08 +0800 Subject: [PATCH 2528/3715] block: don't use bio->bi_vcnt to figure out segment number [ Upstream commit 1a67356e9a4829da2935dd338630a550c59c8489 ] It is wrong to use bio->bi_vcnt to figure out how many segments there are in the bio even though CLONED flag isn't set on this bio, because this bio may be splitted or advanced. So always use bio_segments() in blk_recount_segments(), and it shouldn't cause any performance loss now because the physical segment number is figured out in blk_queue_split() and BIO_SEG_VALID is set meantime since bdced438acd83ad83a6c ("block: setup bi_phys_segments after splitting"). Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Fixes: 76d8137a3113 ("blk-merge: recaculate segment if it isn't less than max segments") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-merge.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index f61b50a01bc7..415b5dafd9e6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -299,13 +299,7 @@ void blk_recalc_rq_segments(struct request *rq) void blk_recount_segments(struct request_queue *q, struct bio *bio) { - unsigned short seg_cnt; - - /* estimate segment number by bi_vcnt for non-cloned bio */ - if (bio_flagged(bio, BIO_CLONED)) - seg_cnt = bio_segments(bio); - else - seg_cnt = bio->bi_vcnt; + unsigned short seg_cnt = bio_segments(bio); if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && (seg_cnt < queue_max_segments(q))) From b0515d58763c039d326172e1aeed5c6a96a52b4c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Feb 2019 16:20:37 +0000 Subject: [PATCH 2529/3715] keys: Timestamp new keys [ Upstream commit 7c1857bdbdf1e4c541e45eab477ee23ed4333ea4 ] Set the timestamp on new keys rather than leaving it unset. Fixes: 31d5a79d7f3d ("KEYS: Do LRU discard in full keyrings") Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Sasha Levin --- security/keys/key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/key.c b/security/keys/key.c index 87172f99f73e..17244f5f54c6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -297,6 +297,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->gid = gid; key->perm = perm; key->restrict_link = restrict_link; + key->last_used_at = ktime_get_real_seconds(); if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; From 6de29266dd3971d38956ee4d3b1fefee6cb4cbe5 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 15 Feb 2019 17:16:06 +0100 Subject: [PATCH 2530/3715] vfio_pci: Enable memory accesses before calling pci_map_rom [ Upstream commit 0cfd027be1d6def4a462cdc180c055143af24069 ] pci_map_rom/pci_get_rom_size() performs memory access in the ROM. In case the Memory Space accesses were disabled, readw() is likely to trigger a synchronous external abort on some platforms. In case memory accesses were disabled, re-enable them before the call and disable them back again just after. Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Signed-off-by: Eric Auger Suggested-by: Alex Williamson Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 9bd3e7911af2..550ab7707b57 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -717,6 +717,7 @@ static long vfio_pci_ioctl(void *device_data, { void __iomem *io; size_t size; + u16 orig_cmd; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; @@ -732,15 +733,23 @@ static long vfio_pci_ioctl(void *device_data, break; } - /* Is it really there? */ - io = pci_map_rom(pdev, &size); - if (!io || !size) { - info.size = 0; - break; - } - pci_unmap_rom(pdev, io); + /* + * Is it really there? Enable memory decode for + * implicit access in pci_map_rom(). + */ + pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd); + pci_write_config_word(pdev, PCI_COMMAND, + orig_cmd | PCI_COMMAND_MEMORY); - info.flags = VFIO_REGION_INFO_FLAG_READ; + io = pci_map_rom(pdev, &size); + if (io) { + info.flags = VFIO_REGION_INFO_FLAG_READ; + pci_unmap_rom(pdev, io); + } else { + info.size = 0; + } + + pci_write_config_word(pdev, PCI_COMMAND, orig_cmd); break; } case VFIO_PCI_VGA_REGION_INDEX: From be889c947784c83f8eb69156c23cc36067fb8226 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Mon, 18 Feb 2019 19:54:40 +0000 Subject: [PATCH 2531/3715] hwmon: (pmbus/tps53679) Fix driver info initialization in probe routine [ Upstream commit ff066653aeed8ee2d4dadb1e35774dd91ecbb19f ] Fix tps53679_probe() by using dynamically allocated "pmbus_driver_info" structure instead of static. Usage of static structures causes overwritten of the field "vrm_version", in case the system is equipped with several tps53679 devices with the different "vrm_version". In such case the last probed device overwrites this field for all others. Fixes: 610526527a13 ("hwmon: (pmbus) Add support for Texas Instruments tps53679 device") Signed-off-by: Vadim Pasternak Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/pmbus/tps53679.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c index 85b515cd9df0..2bc352c5357f 100644 --- a/drivers/hwmon/pmbus/tps53679.c +++ b/drivers/hwmon/pmbus/tps53679.c @@ -80,7 +80,14 @@ static struct pmbus_driver_info tps53679_info = { static int tps53679_probe(struct i2c_client *client, const struct i2c_device_id *id) { - return pmbus_do_probe(client, id, &tps53679_info); + struct pmbus_driver_info *info; + + info = devm_kmemdup(&client->dev, &tps53679_info, sizeof(*info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + return pmbus_do_probe(client, id, info); } static const struct i2c_device_id tps53679_id[] = { From 3af53fe7cffc149f38f93ad8da77b99c0425ef06 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 12 Feb 2019 15:37:45 +1100 Subject: [PATCH 2532/3715] KVM: PPC: Release all hardware TCE tables attached to a group [ Upstream commit a67614cc05a5052b265ea48196dab2fce11f5f2e ] The SPAPR TCE KVM device references all hardware IOMMU tables assigned to some IOMMU group to ensure that in-kernel KVM acceleration of H_PUT_TCE can work. The tables are references when an IOMMU group gets registered with the VFIO KVM device by the KVM_DEV_VFIO_GROUP_ADD ioctl; KVM_DEV_VFIO_GROUP_DEL calls into the dereferencing code in kvm_spapr_tce_release_iommu_group() which walks through the list of LIOBNs, finds a matching IOMMU table and calls kref_put() when found. However that code stops after the very first successful derefencing leaving other tables referenced till the SPAPR TCE KVM device is destroyed which normally happens on guest reboot or termination so if we do hotplug and unplug in a loop, we are leaking IOMMU tables here. This removes a premature return to let kvm_spapr_tce_release_iommu_group() find and dereference all attached tables. Fixes: 121f80ba68f ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_64_vio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5e4446296021..ef6a58838e7c 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -134,7 +134,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, continue; kref_put(&stit->kref, kvm_spapr_tce_liobn_put); - return; } } } From 1e4d478a2878419f981631bea5521d59e083768e Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 15 Feb 2019 10:24:22 +0100 Subject: [PATCH 2533/3715] staging: r8822be: check kzalloc return or bail [ Upstream commit e4b08e16b7d9d030b6475ef48f94d734a39f3c81 ] The kzalloc() in halmac_parse_psd_data_88xx() can fail and return NULL so check the psd_set->data after allocation and if allocation failed return HALMAC_CMD_PROCESS_ERROR. Signed-off-by: Nicholas Mc Guire Fixes: 938a0447f094 ("staging: r8822be: Add code for halmac sub-drive") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- .../staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c index 544f638ed3ef..15091ee587db 100644 --- a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c +++ b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c @@ -2492,8 +2492,11 @@ halmac_parse_psd_data_88xx(struct halmac_adapter *halmac_adapter, u8 *c2h_buf, segment_size = (u8)PSD_DATA_GET_SEGMENT_SIZE(c2h_buf); psd_set->data_size = total_size; - if (!psd_set->data) + if (!psd_set->data) { psd_set->data = kzalloc(psd_set->data_size, GFP_KERNEL); + if (!psd_set->data) + return HALMAC_CMD_PROCESS_ERROR; + } if (segment_id == 0) psd_set->segment_size = segment_size; From 11d671a2d81df342d448322cbf2935d2a45a45a1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 18 Feb 2019 18:27:06 +0000 Subject: [PATCH 2534/3715] dmaengine: mv_xor: Use correct device for DMA API [ Upstream commit 3e5daee5ecf314da33a890fabaa2404244cd2a36 ] Using dma_dev->dev for mappings before it's assigned with the correct device is unlikely to work as expected, and with future dma-direct changes, passing a NULL device may end up crashing entirely. I don't know enough about this hardware or the mv_xor_prep_dma_interrupt() operation to implement the appropriate error-handling logic that would have revealed those dma_map_single() calls failing on arm64 for as long as the driver has been enabled there, but moving the assignment earlier will at least make the current code operate as intended. Fixes: 22843545b200 ("dma: mv_xor: Add support for DMA_INTERRUPT") Reported-by: John David Anglin Tested-by: John David Anglin Signed-off-by: Robin Murphy Acked-by: Thomas Petazzoni Tested-by: Thomas Petazzoni Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/mv_xor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1993889003fd..1c57577f49fe 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1059,6 +1059,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->op_in_desc = XOR_MODE_IN_DESC; dma_dev = &mv_chan->dmadev; + dma_dev->dev = &pdev->dev; mv_chan->xordev = xordev; /* @@ -1091,7 +1092,6 @@ mv_xor_channel_add(struct mv_xor_device *xordev, dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; dma_dev->device_tx_status = mv_xor_status; dma_dev->device_issue_pending = mv_xor_issue_pending; - dma_dev->dev = &pdev->dev; /* set prep routines based on capability */ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) From 2792ec5b8af1b98a0e9488860178088106f198fa Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 18 Feb 2019 22:34:51 +0800 Subject: [PATCH 2535/3715] cdc-wdm: pass return value of recover_from_urb_loss [ Upstream commit 0742a338f5b3446a26de551ad8273fb41b2787f2 ] 'rv' is the correct return value, pass it upstream instead of 0 Fixes: 17d80d562fd7 ("USB: autosuspend for cdc-wdm") Signed-off-by: YueHaibing Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index a593cdfc897f..d5d42dccda10 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -1085,7 +1085,7 @@ static int wdm_post_reset(struct usb_interface *intf) rv = recover_from_urb_loss(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); - return 0; + return rv; } static struct usb_driver wdm_driver = { From ea988ebd9f2a774d9c9b0f996a88e0890643d773 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 19 Feb 2019 18:00:00 +0800 Subject: [PATCH 2536/3715] regulator: pv88060: Fix array out-of-bounds access [ Upstream commit 7cd415f875591bc66c5ecb49bf84ef97e80d7b0e ] Fix off-by-one while iterating current_limits array. The valid index should be 0 ~ n_current_limits -1. Fixes: f307a7e9b7af ("regulator: pv88060: new regulator driver") Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pv88060-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/pv88060-regulator.c b/drivers/regulator/pv88060-regulator.c index a9446056435f..1f2d8180506b 100644 --- a/drivers/regulator/pv88060-regulator.c +++ b/drivers/regulator/pv88060-regulator.c @@ -135,7 +135,7 @@ static int pv88060_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, From ecd54f78e7156ec2ee15c6f017e1f0e10cbc7bf5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 19 Feb 2019 18:00:01 +0800 Subject: [PATCH 2537/3715] regulator: pv88080: Fix array out-of-bounds access [ Upstream commit 3c413f594c4f9df40061445667ca11a12bc8ee34 ] Fix off-by-one while iterating current_limits array. The valid index should be 0 ~ n_current_limits -1. Fixes: 99cf3af5e2d5 ("regulator: pv88080: new regulator driver") Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pv88080-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c index 9a08cb2de501..6770e4de2097 100644 --- a/drivers/regulator/pv88080-regulator.c +++ b/drivers/regulator/pv88080-regulator.c @@ -279,7 +279,7 @@ static int pv88080_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, From 2797d17a7a22aa4480733df4b983bd8cc94aac0a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 19 Feb 2019 18:00:02 +0800 Subject: [PATCH 2538/3715] regulator: pv88090: Fix array out-of-bounds access [ Upstream commit a5455c9159414748bed4678184bf69989a4f7ba3 ] Fix off-by-one while iterating current_limits array. The valid index should be 0 ~ n_current_limits -1. Fixes: c90456e36d9c ("regulator: pv88090: new regulator driver") Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pv88090-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/pv88090-regulator.c b/drivers/regulator/pv88090-regulator.c index 7a0c15957bd0..2302b0df7630 100644 --- a/drivers/regulator/pv88090-regulator.c +++ b/drivers/regulator/pv88090-regulator.c @@ -157,7 +157,7 @@ static int pv88090_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, From f4ece3516ea926242fdeed6975a5751a203656ab Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 19 Feb 2019 12:29:43 +0530 Subject: [PATCH 2539/3715] net: dsa: qca8k: Enable delay for RGMII_ID mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a968b5e9d5879f9535d6099505f9e14abcafb623 ] RGMII_ID specifies that we should have internal delay, so resurrect the delay addition routine but under the RGMII_ID mode. Fixes: 40269aa9f40a ("net: dsa: qca8k: disable delay for RGMII mode") Tested-by: Michal Vokáč Signed-off-by: Vinod Koul Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/qca8k.c | 12 ++++++++++++ drivers/net/dsa/qca8k.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 8e49974ffa0e..8ee59b20b47a 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -459,6 +459,18 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); break; + case PHY_INTERFACE_MODE_RGMII_ID: + /* RGMII_ID needs internal delay. This is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(QCA8K_MAX_DELAY) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(QCA8K_MAX_DELAY)); + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; case PHY_INTERFACE_MODE_SGMII: qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); break; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 613fe5c50236..d146e54c8a6c 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -40,6 +40,7 @@ ((0x8 + (x & 0x3)) << 22) #define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ ((0x10 + (x & 0x3)) << 20) +#define QCA8K_MAX_DELAY 3 #define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) #define QCA8K_PORT_PAD_SGMII_EN BIT(7) #define QCA8K_REG_MODULE_EN 0x030 From 0fd24a6a8a063c064a664797d4913d5f365f56a2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Nov 2018 17:09:18 +0000 Subject: [PATCH 2540/3715] drm/nouveau/bios/ramcfg: fix missing parentheses when calculating RON [ Upstream commit 13649101a25c53c87f4ab98a076dfe61f3636ab1 ] Currently, the expression for calculating RON is always going to result in zero no matter the value of ram->mr[1] because the ! operator has higher precedence than the shift >> operator. I believe the missing parentheses around the expression before appying the ! operator will result in the desired result. [ Note, not tested ] Detected by CoveritScan, CID#1324005 ("Operands don't affect result") Fixes: c25bf7b6155c ("drm/nouveau/bios/ramcfg: Separate out RON pull value") Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c index 60ece0a8a2e1..1d2d6bae73cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c @@ -87,7 +87,7 @@ nvkm_gddr3_calc(struct nvkm_ram *ram) WR = (ram->next->bios.timing[2] & 0x007f0000) >> 16; /* XXX: Get these values from the VBIOS instead */ DLL = !(ram->mr[1] & 0x1); - RON = !(ram->mr[1] & 0x300) >> 8; + RON = !((ram->mr[1] & 0x300) >> 8); break; default: return -ENOSYS; From 1cdadf8223915491fd3b774581b199b76fcc48e0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Dec 2018 15:29:49 +0000 Subject: [PATCH 2541/3715] drm/nouveau/pmu: don't print reply values if exec is false [ Upstream commit b1d03fc36ec9834465a08c275c8d563e07f6f6bf ] Currently the uninitialized values in the array reply are printed out when exec is false and nvkm_pmu_send has not updated the array. Avoid confusion by only dumping out these values if they have been actually updated. Detected by CoverityScan, CID#1271291 ("Uninitialized scaler variable") Fixes: ebb58dc2ef8c ("drm/nouveau/pmu: rename from pwr (no binary change)") Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c index 11b28b086a06..7b052879af72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c @@ -88,10 +88,10 @@ nvkm_memx_fini(struct nvkm_memx **pmemx, bool exec) if (exec) { nvkm_pmu_send(pmu, reply, PROC_MEMX, MEMX_MSG_EXEC, memx->base, finish); + nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", + reply[0], reply[1]); } - nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", - reply[0], reply[1]); kfree(memx); return 0; } From 23e85b26ea102e64d39e1d12e109d65b51d27575 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 Feb 2019 16:46:50 +0100 Subject: [PATCH 2542/3715] ASoC: qcom: Fix of-node refcount unbalance in apq8016_sbc_parse_of() [ Upstream commit 8d1667200850f8753c0265fa4bd25c9a6e5f94ce ] The apq8016 driver leaves the of-node refcount at aborting from the loop of for_each_child_of_node() in the error path. Not only the iterator node of for_each_child_of_node(), the children nodes referred from it for codec and cpu have to be properly unreferenced. Fixes: bdb052e81f62 ("ASoC: qcom: add apq8016 sound card support") Cc: Patrick Lai Cc: Banajit Goswami Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/apq8016_sbc.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index d49adc822a11..8e6b88d68ca6 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -163,41 +163,52 @@ static struct apq8016_sbc_data *apq8016_sbc_parse_of(struct snd_soc_card *card) if (!cpu || !codec) { dev_err(dev, "Can't find cpu/codec DT node\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto error; } link->cpu_of_node = of_parse_phandle(cpu, "sound-dai", 0); if (!link->cpu_of_node) { dev_err(card->dev, "error getting cpu phandle\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto error; } ret = snd_soc_of_get_dai_name(cpu, &link->cpu_dai_name); if (ret) { dev_err(card->dev, "error getting cpu dai name\n"); - return ERR_PTR(ret); + goto error; } ret = snd_soc_of_get_dai_link_codecs(dev, codec, link); if (ret < 0) { dev_err(card->dev, "error getting codec dai name\n"); - return ERR_PTR(ret); + goto error; } link->platform_of_node = link->cpu_of_node; ret = of_property_read_string(np, "link-name", &link->name); if (ret) { dev_err(card->dev, "error getting codec dai_link name\n"); - return ERR_PTR(ret); + goto error; } link->stream_name = link->name; link->init = apq8016_sbc_dai_init; link++; + + of_node_put(cpu); + of_node_put(codec); } return data; + + error: + of_node_put(np); + of_node_put(cpu); + of_node_put(codec); + return ERR_PTR(ret); } static const struct snd_soc_dapm_widget apq8016_sbc_dapm_widgets[] = { From 69c5a33b72bd84a36a158c7a0401c67d776ddeec Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 30 Jan 2019 07:58:38 -0600 Subject: [PATCH 2543/3715] fs/nfs: Fix nfs_parse_devname to not modify it's argument [ Upstream commit 40cc394be1aa18848b8757e03bd8ed23281f572e ] In the rare and unsupported case of a hostname list nfs_parse_devname will modify dev_name. There is no need to modify dev_name as the all that is being computed is the length of the hostname, so the computed length can just be shorted. Fixes: dc04589827f7 ("NFS: Use common device name parsing logic for NFSv4 and NFSv2/v3") Signed-off-by: "Eric W. Biederman" Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f464f8d9060c..470b761839a5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1925,7 +1925,7 @@ static int nfs_parse_devname(const char *dev_name, /* kill possible hostname list: not supported */ comma = strchr(dev_name, ','); if (comma != NULL && comma < end) - *comma = 0; + len = comma - dev_name; } if (len > maxnamlen) From 5ada2bd122e42038dd0df468d1642095a05a33be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 20 Feb 2019 22:25:24 -0700 Subject: [PATCH 2544/3715] staging: rtlwifi: Use proper enum for return in halmac_parse_psd_data_88xx [ Upstream commit e8edc32d70a4e09160835792eb5d1af71a0eec14 ] Clang warns: drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c:2472:11: warning: implicit conversion from enumeration type 'enum halmac_cmd_process_status' to different enumeration type 'enum halmac_ret_status' [-Wenum-conversion] return HALMAC_CMD_PROCESS_ERROR; ~~~~~~ ^~~~~~~~~~~~~~~~~~~~~~~~ 1 warning generated. Fix this by using the proper enum for allocation failures, HALMAC_RET_MALLOC_FAIL, which is used in the rest of this file. Fixes: e4b08e16b7d9 ("staging: r8822be: check kzalloc return or bail") Link: https://github.com/ClangBuiltLinux/linux/issues/375 Signed-off-by: Nathan Chancellor Reviewed-by: Nicholas Mc Guire Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c index 15091ee587db..65edd14a1147 100644 --- a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c +++ b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c @@ -2495,7 +2495,7 @@ halmac_parse_psd_data_88xx(struct halmac_adapter *halmac_adapter, u8 *c2h_buf, if (!psd_set->data) { psd_set->data = kzalloc(psd_set->data_size, GFP_KERNEL); if (!psd_set->data) - return HALMAC_CMD_PROCESS_ERROR; + return HALMAC_RET_MALLOC_FAIL; } if (segment_id == 0) From afdb6f57d778dff7b58e7e1cba5d12ca9baff795 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Feb 2019 11:20:01 +1100 Subject: [PATCH 2545/3715] powerpc/64s: Fix logic when handling unknown CPU features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8cfaf106918a8c13abb24c641556172afbb9545c ] In cpufeatures_process_feature(), if a provided CPU feature is unknown and enable_unknown is false, we erroneously print that the feature is being enabled and return true, even though no feature has been enabled, and may also set feature bits based on the last entry in the match table. Fix this so that we only set feature bits from the match table if we have actually enabled a feature from that table, and when failing to enable an unknown feature, always print the "not enabling" message and return false. Coincidentally, some older gccs (cpu_ftr_bit_mask) An upcoming patch will enable support for kcov, which requires this option. This patch avoids the warning. Fixes: 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features") Reported-by: Segher Boessenkool Signed-off-by: Michael Ellerman [ajd: add commit message] Signed-off-by: Andrew Donnellan Signed-off-by: Sasha Levin --- arch/powerpc/kernel/dt_cpu_ftrs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2357df60de95..7ed2b1b6643c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -705,8 +705,10 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) m = &dt_cpu_feature_match_table[i]; if (!strcmp(f->name, m->name)) { known = true; - if (m->enable(f)) + if (m->enable(f)) { + cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask; break; + } pr_info("not enabling: %s (disabled or unsupported by kernel)\n", f->name); @@ -714,17 +716,12 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) } } - if (!known && enable_unknown) { - if (!feat_try_enable_unknown(f)) { - pr_info("not enabling: %s (unknown and unsupported by kernel)\n", - f->name); - return false; - } + if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) { + pr_info("not enabling: %s (unknown and unsupported by kernel)\n", + f->name); + return false; } - if (m->cpu_ftr_bit_mask) - cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask; - if (known) pr_debug("enabling: %s\n", f->name); else From a155d39d1e4c1b7e7edcc8885742a87c2c479cd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Feb 2019 14:51:25 -0500 Subject: [PATCH 2546/3715] NFS: Fix a soft lockup in the delegation recovery code [ Upstream commit 6f9449be53f3ce383caed797708b332ede8d952c ] Fix a soft lockup when NFS client delegation recovery is attempted but the inode is in the process of being freed. When the igrab(inode) call fails, and we have to restart the recovery process, we need to ensure that we won't attempt to recover the same delegation again. Fixes: 45870d6909d5a ("NFSv4.1: Test delegation stateids when server...") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/delegation.c | 20 ++++++++++++-------- fs/nfs/delegation.h | 1 + 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 04d57e11577e..09b3bcb86d32 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -234,6 +234,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation spin_lock(&delegation->lock); if (delegation->inode != NULL) inode = igrab(delegation->inode); + if (!inode) + set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); spin_unlock(&delegation->lock); return inode; } @@ -863,10 +865,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) @@ -971,10 +974,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_TEST_EXPIRED, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index df41d16dc6ab..510c9edcc712 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -34,6 +34,7 @@ enum { NFS_DELEGATION_RETURNING, NFS_DELEGATION_REVOKED, NFS_DELEGATION_TEST_EXPIRED, + NFS_DELEGATION_INODE_FREEING, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); From cc423232fb93e234e8e43edc0ca1cda830e48e21 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 10 Jan 2019 14:22:07 +0800 Subject: [PATCH 2547/3715] clocksource/drivers/sun5i: Fail gracefully when clock rate is unavailable [ Upstream commit e7e7e0d7beafebd11b0c065cd5fbc1e5759c5aab ] If the clock tree is not fully populated when the timer-sun5i init code is called, attempts to get the clock rate for the timer would fail and return 0. Make the init code for both clock events and clocksource check the returned clock rate and fail gracefully if the result is 0, instead of causing a divide by 0 exception later on. Fixes: 4a59058f0b09 ("clocksource/drivers/sun5i: Refactor the current code") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/timer-sun5i.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2a3fe83ec337..6f4a9a8faccc 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -202,6 +202,11 @@ static int __init sun5i_setup_clocksource(struct device_node *node, } rate = clk_get_rate(clk); + if (!rate) { + pr_err("Couldn't get parent clock rate\n"); + ret = -EINVAL; + goto err_disable_clk; + } cs->timer.base = base; cs->timer.clk = clk; @@ -275,6 +280,11 @@ static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem } rate = clk_get_rate(clk); + if (!rate) { + pr_err("Couldn't get parent clock rate\n"); + ret = -EINVAL; + goto err_disable_clk; + } ce->timer.base = base; ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); From ba95507ae390062f5af1e8de76c8161fcc69eade Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 18 Oct 2018 11:57:04 +0200 Subject: [PATCH 2548/3715] clocksource/drivers/exynos_mct: Fix error path in timer resources initialization [ Upstream commit b9307420196009cdf18bad55e762ac49fb9a80f4 ] While freeing interrupt handlers in error path, don't assume that all requested interrupts are per-processor interrupts and properly release standard interrupts too. Reported-by: Krzysztof Kozlowski Fixes: 56a94f13919c ("clocksource: exynos_mct: Avoid blocking calls in the cpu hotplug notifier") Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/exynos_mct.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index aaf5bfa9bd9c..e3ae041ac30e 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -563,7 +563,19 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem * return 0; out_irq: - free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); + if (mct_int_type == MCT_INT_PPI) { + free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); + } else { + for_each_possible_cpu(cpu) { + struct mct_clock_event_device *pcpu_mevt = + per_cpu_ptr(&percpu_mct_tick, cpu); + + if (pcpu_mevt->evt.irq != -1) { + free_irq(pcpu_mevt->evt.irq, pcpu_mevt); + pcpu_mevt->evt.irq = -1; + } + } + } return err; } From 6fdf4970d3f8832525554a4f0a79d71ecc469e1b Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Wed, 30 Jan 2019 16:14:24 +0100 Subject: [PATCH 2549/3715] platform/x86: wmi: fix potential null pointer dereference [ Upstream commit c355ec651a8941864549f2586f969d0eb7bf499a ] In the function wmi_dev_match() the variable id is dereferenced without first performing a NULL check. The variable can for example be NULL if a WMI driver is registered without specifying the id_table field in struct wmi_driver. Add a NULL check and return that the driver can't handle the device if the variable is NULL. Fixes: 844af950da94 ("platform/x86: wmi: Turn WMI into a bus driver") Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Darren Hart (VMware) Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 7f8fa42a1084..a56e997816b2 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -748,6 +748,9 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) struct wmi_block *wblock = dev_to_wblock(dev); const struct wmi_device_id *id = wmi_driver->id_table; + if (id == NULL) + return 0; + while (id->guid_string) { uuid_le driver_guid; From 0cf0f51cc94c3e29ffc345f82468fe7c29df3b5b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2019 14:20:27 -0500 Subject: [PATCH 2550/3715] NFS/pnfs: Bulk destroy of layouts needs to be safe w.r.t. umount [ Upstream commit 5085607d209102b37b169bc94d0aa39566a9842a ] If a bulk layout recall or a metadata server reboot coincides with a umount, then holding a reference to an inode is unsafe unless we also hold a reference to the super block. Fixes: fd9a8d7160937 ("NFSv4.1: Fix bulk recall and destroy of layouts") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/pnfs.c | 33 +++++++++++++++++++++++---------- fs/nfs/pnfs.h | 1 + 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ec04cce31814..83abf3dd7351 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -725,22 +725,35 @@ static int pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct nfs_server *server, struct list_head *layout_list) + __must_hold(&clp->cl_lock) + __must_hold(RCU) { struct pnfs_layout_hdr *lo, *next; struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || + !list_empty(&lo->plh_bulk_destroy)) continue; + /* If the sb is being destroyed, just bail */ + if (!nfs_sb_active(server->super)) + break; inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); + if (inode != NULL) { + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, + layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + } else { + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + } + nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); rcu_read_lock(); return -EAGAIN; @@ -778,7 +791,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); - iput(inode); + nfs_iput_and_deactive(inode); } return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 87f144f14d1e..965d657086c8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -99,6 +99,7 @@ enum { NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ }; enum layoutdriver_policy_flags { From d95167b06b8696044c516891e0c51be7dd34e631 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 23 Dec 2018 21:59:18 +0100 Subject: [PATCH 2551/3715] mmc: sdhci-brcmstb: handle mmc_of_parse() errors during probe [ Upstream commit 1e20186e706da8446f9435f2924cd65ab1397e73 ] We need to handle mmc_of_parse() errors during probe otherwise the MMC driver could start without proper initialization (e.g. power sequence). Fixes: 476bf3d62d5c ("mmc: sdhci-brcmstb: Add driver for Broadcom BRCMSTB SoCs") Signed-off-by: Stefan Wahren Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-brcmstb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 552bddc5096c..1cd10356fc14 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -55,7 +55,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) } sdhci_get_of_property(pdev); - mmc_of_parse(host->mmc); + res = mmc_of_parse(host->mmc); + if (res) + goto err; /* * Supply the existing CAPS, but clear the UHS modes. This From feb8ad9508f6fbba81b99cf25af1de7bee72c7b3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 18 Feb 2019 09:31:41 +0100 Subject: [PATCH 2552/3715] ARM: 8847/1: pm: fix HYP/SVC mode mismatch when MCPM is used [ Upstream commit ca70ea43f80c98582f5ffbbd1e6f4da2742da0c4 ] MCPM does a soft reset of the CPUs and uses common cpu_resume() routine to perform low-level platform initialization. This results in a try to install HYP stubs for the second time for each CPU and results in false HYP/SVC mode mismatch detection. The HYP stubs are already installed at the beginning of the kernel initialization on the boot CPU (head.S) or in the secondary_startup() for other CPUs. To fix this issue MCPM code should use a cpu_resume() routine without HYP stubs installation. This change fixes HYP/SVC mode mismatch on Samsung Exynos5422-based Odroid XU3/XU4/HC1 boards. Fixes: 3721924c8154 ("ARM: 8081/1: MCPM: provide infrastructure to allow for MCPM loopback") Signed-off-by: Marek Szyprowski Acked-by: Nicolas Pitre Tested-by: Anand Moon Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/common/mcpm_entry.c | 2 +- arch/arm/include/asm/suspend.h | 1 + arch/arm/kernel/sleep.S | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 2b913f17d50f..c24a55b0deac 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -379,7 +379,7 @@ static int __init nocache_trampoline(unsigned long _arg) unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); phys_reset_t phys_reset; - mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_set_entry_vector(cpu, cluster, cpu_resume_no_hyp); setup_mm_for_reboot(); __mcpm_cpu_going_down(cpu, cluster); diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 452bbdcbcc83..506314265c6f 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -10,6 +10,7 @@ struct sleep_save_sp { }; extern void cpu_resume(void); +extern void cpu_resume_no_hyp(void); extern void cpu_resume_arm(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index a8257fc9cf2a..5dc8b80bb693 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -120,6 +120,14 @@ ENDPROC(cpu_resume_after_mmu) .text .align +#ifdef CONFIG_MCPM + .arm +THUMB( .thumb ) +ENTRY(cpu_resume_no_hyp) +ARM_BE8(setend be) @ ensure we are in BE mode + b no_hyp +#endif + #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) @@ -135,6 +143,7 @@ ARM_BE8(setend be) @ ensure we are in BE mode bl __hyp_stub_install_secondary #endif safe_svcmode_maskall r1 +no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) @@ -163,6 +172,9 @@ ENDPROC(cpu_resume) #ifdef CONFIG_MMU ENDPROC(cpu_resume_arm) +#endif +#ifdef CONFIG_MCPM +ENDPROC(cpu_resume_no_hyp) #endif .align 2 From 1a65ea1ea865e74490b2cb186d3ce3f934b9550d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 20 Feb 2019 15:00:13 +0100 Subject: [PATCH 2553/3715] ARM: 8848/1: virt: Align GIC version check with arm64 counterpart [ Upstream commit 9db043d36bd379f4cc99054c079de0dabfc38d03 ] arm64 has got relaxation on GIC version check at early boot stage due to update of the GIC architecture let's align ARM with that. To help backports (even though the code was correct at the time of writing) Fixes: e59941b9b381 ("ARM: 8527/1: virt: enable GICv3 system registers") Signed-off-by: Vladimir Murzin Reviewed-by: Marc Zyngier Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/hyp-stub.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 60146e32619a..82a942894fc0 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -180,8 +180,8 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE @ Check whether GICv3 system registers are available mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 ubfx r7, r7, #28, #4 - cmp r7, #1 - bne 2f + teq r7, #0 + beq 2f @ Enable system register accesses mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE From 79db752d9004b3a77528078b68de18861805b800 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 24 Feb 2019 21:16:51 +0800 Subject: [PATCH 2554/3715] regulator: wm831x-dcdc: Fix list of wm831x_dcdc_ilim from mA to uA [ Upstream commit c25d47888f0fb3d836d68322d4aea2caf31a75a6 ] The wm831x_dcdc_ilim entries needs to be uA because it is used to compare with min_uA and max_uA. While at it also make the array const and change to use unsigned int. Fixes: e4ee831f949a ("regulator: Add WM831x DC-DC buck convertor support") Signed-off-by: Axel Lin Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/wm831x-dcdc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 5a5bc4bb08d2..df591435d12a 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -327,8 +327,8 @@ static int wm831x_buckv_get_voltage_sel(struct regulator_dev *rdev) } /* Current limit options */ -static u16 wm831x_dcdc_ilim[] = { - 125, 250, 375, 500, 625, 750, 875, 1000 +static const unsigned int wm831x_dcdc_ilim[] = { + 125000, 250000, 375000, 500000, 625000, 750000, 875000, 1000000 }; static int wm831x_buckv_set_current_limit(struct regulator_dev *rdev, From 223b167f01e30201e5de6db87b273ba5a36992a4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Feb 2019 14:13:41 +0100 Subject: [PATCH 2555/3715] netfilter: nft_set_hash: fix lookups with fixed size hash on big endian [ Upstream commit 3b02b0adc242a72b5e46019b6a9e4f84823592f6 ] Call jhash_1word() for the 4-bytes key case from the insertion and deactivation path, otherwise big endian arch set lookups fail. Fixes: 446a8268b7f5 ("netfilter: nft_set_hash: add lookup variant for fixed size hashtable") Reported-by: Florian Westphal Tested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_hash.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 33aa2ac3a62e..73f8f99b1193 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -442,6 +442,23 @@ static bool nft_hash_lookup_fast(const struct net *net, return false; } +static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, + const struct nft_set_ext *ext) +{ + const struct nft_data *key = nft_set_ext_key(ext); + u32 hash, k1; + + if (set->klen == 4) { + k1 = *(u32 *)key; + hash = jhash_1word(k1, priv->seed); + } else { + hash = jhash(key, set->klen, priv->seed); + } + hash = reciprocal_scale(hash, priv->buckets); + + return hash; +} + static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -451,8 +468,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && @@ -491,8 +507,7 @@ static void *nft_hash_deactivate(const struct net *net, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, set->klen) || From 76cc6d437e1349c34ef08fb428d1e4ef0c912c9d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Feb 2019 11:19:46 -0500 Subject: [PATCH 2556/3715] NFSv4/flexfiles: Fix invalid deref in FF_LAYOUT_DEVID_NODE() [ Upstream commit 108bb4afd351d65826648a47f11fa3104e250d9b ] If the attempt to instantiate the mirror's layout DS pointer failed, then that pointer may hold a value of type ERR_PTR(), so we need to check that before we dereference it. Fixes: 65990d1afbd2d ("pNFS/flexfiles: Fix a deadlock on LAYOUTGET") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.h | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index d6515f1584f3..d78ec99b6c4c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -131,16 +131,6 @@ FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } -static inline struct nfs4_deviceid_node * -FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) -{ - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) - return NULL; - return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; -} - static inline struct nfs4_ff_layout_ds * FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) { @@ -150,9 +140,25 @@ FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) static inline struct nfs4_ff_layout_mirror * FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) { - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) - return NULL; - return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + + if (idx < fls->mirror_array_cnt) + return fls->mirror_array[idx]; + return NULL; +} + +static inline struct nfs4_deviceid_node * +FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx); + + if (mirror != NULL) { + struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds; + + if (!IS_ERR_OR_NULL(mirror_ds)) + return &mirror_ds->id_node; + } + return NULL; } static inline u32 From 3b24a4144fbd078e6665ba8c64e6611eb14cecc5 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Wed, 27 Feb 2019 12:10:09 +0000 Subject: [PATCH 2557/3715] net: aquantia: fixed instack structure overflow [ Upstream commit 8006e3730b6e900319411e35cee85b4513d298df ] This is a real stack undercorruption found by kasan build. The issue did no harm normally because it only overflowed 2 bytes after `bitary` array which on most architectures were mapped into `err` local. Fixes: bab6de8fd180 ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index b0abd187cead..b83ee74d2839 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -182,8 +182,8 @@ static int hw_atl_a0_hw_rss_set(struct aq_hw_s *self, u32 i = 0U; u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues); int err = 0; - u16 bitary[(HW_ATL_A0_RSS_REDIRECTION_MAX * - HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)]; + u16 bitary[1 + (HW_ATL_A0_RSS_REDIRECTION_MAX * + HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)]; memset(bitary, 0, sizeof(bitary)); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 236325f48ec9..1c1bb074f664 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -183,8 +183,8 @@ static int hw_atl_b0_hw_rss_set(struct aq_hw_s *self, u32 i = 0U; u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues); int err = 0; - u16 bitary[(HW_ATL_B0_RSS_REDIRECTION_MAX * - HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)]; + u16 bitary[1 + (HW_ATL_B0_RSS_REDIRECTION_MAX * + HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)]; memset(bitary, 0, sizeof(bitary)); From 43f5a75119f6d03f88b6c59ca2745934d72ea698 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 13 Feb 2019 10:29:49 +1100 Subject: [PATCH 2558/3715] powerpc/mm: Check secondary hash page table [ Upstream commit 790845e2f12709d273d08ea7a2af7c2593689519 ] We were always calling base_hpte_find() with primary = true, even when we wanted to check the secondary table. mpe: I broke this when refactoring Rashmica's original patch. Fixes: 1515ab932156 ("powerpc/mm: Dump hash table") Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/dump_hashpagetable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index 5c4c93dcff19..f666d74f05f5 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -343,7 +343,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) /* Look in secondary table */ if (slot == -1) - slot = base_hpte_find(ea, psize, true, &v, &r); + slot = base_hpte_find(ea, psize, false, &v, &r); /* No entry found */ if (slot == -1) From a3323a093f3af8a715ce76c7d0783b1ac3a0cf93 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 7 Nov 2018 10:36:10 +0800 Subject: [PATCH 2559/3715] nios2: ksyms: Add missing symbol exports [ Upstream commit 0f8ed994575429d6042cf5d7ef70081c94091587 ] Building nios2:allmodconfig fails as follows (each symbol is only listed once). ERROR: "__ashldi3" [drivers/md/dm-writecache.ko] undefined! ERROR: "__ashrdi3" [fs/xfs/xfs.ko] undefined! ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined! ERROR: "__lshrdi3" [drivers/md/dm-zoned.ko] undefined! ERROR: "flush_icache_range" [drivers/misc/lkdtm/lkdtm.ko] undefined! ERROR: "empty_zero_page" [drivers/md/dm-mod.ko] undefined! The problem is seen with gcc 7.3.0. Export the missing symbols. Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Guenter Roeck Signed-off-by: Ley Foon Tan Signed-off-by: Sasha Levin --- arch/nios2/kernel/nios2_ksyms.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/nios2/kernel/nios2_ksyms.c b/arch/nios2/kernel/nios2_ksyms.c index bf2f55d10a4d..4e704046a150 100644 --- a/arch/nios2/kernel/nios2_ksyms.c +++ b/arch/nios2/kernel/nios2_ksyms.c @@ -9,12 +9,20 @@ #include #include +#include +#include + /* string functions */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); +/* memory management */ + +EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(flush_icache_range); + /* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that @@ -31,3 +39,7 @@ DECLARE_EXPORT(__udivsi3); DECLARE_EXPORT(__umoddi3); DECLARE_EXPORT(__umodsi3); DECLARE_EXPORT(__muldi3); +DECLARE_EXPORT(__ucmpdi2); +DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__ashldi3); +DECLARE_EXPORT(__ashrdi3); From ba0c43da8f5d7cb2942ccf2780ad9c67ed7ba042 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Feb 2019 17:01:55 -0500 Subject: [PATCH 2560/3715] x86/mm: Remove unused variable 'cpu' [ Upstream commit 3609e31bc8dc03b701390f79c74fc7fe92b95039 ] The commit a2055abe9c67 ("x86/mm: Pass flush_tlb_info to flush_tlb_others() etc") removed the unnecessary cpu parameter from uv_flush_tlb_others() but left an unused variable. arch/x86/mm/tlb.c: In function 'native_flush_tlb_others': arch/x86/mm/tlb.c:688:16: warning: variable 'cpu' set but not used [-Wunused-but-set-variable] unsigned int cpu; ^~~ Fixes: a2055abe9c67 ("x86/mm: Pass flush_tlb_info to flush_tlb_others() etc") Signed-off-by: Qian Cai Signed-off-by: Thomas Gleixner Acked-by: Andyt Lutomirski Cc: dave.hansen@linux.intel.com Cc: peterz@infradead.org Cc: bp@alien8.de Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20190228220155.88124-1-cai@lca.pw Signed-off-by: Sasha Levin --- arch/x86/mm/tlb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5400a24e1a8c..c5d7b4ae17ca 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -651,9 +651,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, * that UV should be updated so that smp_call_function_many(), * etc, are optimal on UV. */ - unsigned int cpu; - - cpu = smp_processor_id(); cpumask = uv_flush_tlb_others(cpumask, info); if (cpumask) smp_call_function_many(cpumask, flush_tlb_func_remote, From d6bf70766f60c396a8e4cb3ff0385cca93a4b1c9 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 1 Mar 2019 06:46:28 -0800 Subject: [PATCH 2561/3715] scsi: megaraid_sas: reduce module load time [ Upstream commit 31b6a05f86e690e1818116fd23c3be915cc9d9ed ] megaraid_sas takes 1+ seconds to load while waiting for firmware: [2.822603] megaraid_sas 0000:03:00.0: Waiting for FW to come to ready state [3.871003] megaraid_sas 0000:03:00.0: FW now in Ready state This is due to the following loop in megasas_transition_to_ready(), which waits a minimum of 1 second, even though the FW becomes ready in tens of millisecs: /* * The cur_state should not last for more than max_wait secs */ for (i = 0; i < max_wait; i++) { ... msleep(1000); ... dev_info(&instance->pdev->dev, "FW now in Ready state\n"); This is a regression, caused by a change of the msleep granularity from 1 to 1000 due to concern about waiting too long on systems with coarse jiffies. To fix, increase iterations and use msleep(20), which results in: [2.670627] megaraid_sas 0000:03:00.0: Waiting for FW to come to ready state [2.739386] megaraid_sas 0000:03:00.0: FW now in Ready state Fixes: fb2f3e96d80f ("scsi: megaraid_sas: Fix msleep granularity") Signed-off-by: Steve Sistare Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 577513649afb..6abad63b127a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3823,12 +3823,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) /* * The cur_state should not last for more than max_wait secs */ - for (i = 0; i < max_wait; i++) { + for (i = 0; i < max_wait * 50; i++) { curr_abs_state = instance->instancet-> read_fw_status_reg(instance->reg_set); if (abs_state == curr_abs_state) { - msleep(1000); + msleep(20); } else break; } From 5ba5c4da88ae802ba8f43710d2965aa0ecba6492 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Mar 2019 16:29:33 -0800 Subject: [PATCH 2562/3715] drivers/rapidio/rio_cm.c: fix potential oops in riocm_ch_listen() [ Upstream commit 5ac188b12e7cbdd92dee60877d1fac913fc1d074 ] If riocm_get_channel() fails, then we should just return -EINVAL. Calling riocm_put_channel() will trigger a NULL dereference and generally we should call put() if the get() didn't succeed. Link: http://lkml.kernel.org/r/20190110130230.GB27017@kadam Fixes: b6e8d4aa1110 ("rapidio: add RapidIO channelized messaging driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Morton Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/rapidio/rio_cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index ef989a15aefc..b29fc258eeba 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1215,7 +1215,9 @@ static int riocm_ch_listen(u16 ch_id) riocm_debug(CHOP, "(ch_%d)", ch_id); ch = riocm_get_channel(ch_id); - if (!ch || !riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) + if (!ch) + return -EINVAL; + if (!riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) ret = -EINVAL; riocm_put_channel(ch); return ret; From 2cd5e08b9af2b40ca7537ec2c66f1459cd95ad8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Mar 2019 08:41:22 +0300 Subject: [PATCH 2563/3715] xen, cpu_hotplug: Prevent an out of bounds access [ Upstream commit 201676095dda7e5b31a5e1d116d10fc22985075e ] The "cpu" variable comes from the sscanf() so Smatch marks it as untrusted data. We can't pass a higher value than "nr_cpu_ids" to cpu_possible() or it results in an out of bounds access. Fixes: d68d82afd4c8 ("xen: implement CPU hotplugging") Signed-off-by: Dan Carpenter Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/cpu_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index b1357aa4bc55..f192b6f42da9 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -54,7 +54,7 @@ static int vcpu_online(unsigned int cpu) } static void vcpu_hotplug(unsigned int cpu) { - if (!cpu_possible(cpu)) + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) return; switch (vcpu_online(cpu)) { From 9ef50cb11df1163b45ac989df6b235a09a42c772 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:43:18 -0500 Subject: [PATCH 2564/3715] net: sh_eth: fix a missing check of of_get_phy_mode [ Upstream commit 035a14e71f27eefa50087963b94cbdb3580d08bf ] of_get_phy_mode may fail and return a negative error code; the fix checks the return value of of_get_phy_mode and returns NULL of it fails. Fixes: b356e978e92f ("sh_eth: add device tree support") Signed-off-by: Kangjie Lu Reviewed-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/sh_eth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 9b1906a65e11..25f3b2ad26e9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3046,12 +3046,16 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) struct device_node *np = dev->of_node; struct sh_eth_plat_data *pdata; const char *mac_addr; + int ret; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return NULL; - pdata->phy_interface = of_get_phy_mode(np); + ret = of_get_phy_mode(np); + if (ret < 0) + return NULL; + pdata->phy_interface = ret; mac_addr = of_get_mac_address(np); if (mac_addr) From d0d7c1cbd50bc1d32e37ea9b92c30b66075979b5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 1 Mar 2019 14:16:40 +0800 Subject: [PATCH 2565/3715] regulator: lp87565: Fix missing register for LP87565_BUCK_0 [ Upstream commit d1a6cbdf1e597917cb642c655512d91b71a35d22 ] LP87565_BUCK_0 is missed, fix it. Fixes: f0168a9bf ("regulator: lp87565: Add support for lp87565 PMIC regulators") Signed-off-by: Axel Lin Reviewed-by: Keerthy Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/lp87565-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/lp87565-regulator.c b/drivers/regulator/lp87565-regulator.c index cfdbe294fb6a..32d4e6ec2e19 100644 --- a/drivers/regulator/lp87565-regulator.c +++ b/drivers/regulator/lp87565-regulator.c @@ -188,7 +188,7 @@ static int lp87565_regulator_probe(struct platform_device *pdev) struct lp87565 *lp87565 = dev_get_drvdata(pdev->dev.parent); struct regulator_config config = { }; struct regulator_dev *rdev; - int i, min_idx = LP87565_BUCK_1, max_idx = LP87565_BUCK_3; + int i, min_idx = LP87565_BUCK_0, max_idx = LP87565_BUCK_3; platform_set_drvdata(pdev, lp87565); From b9ce28072898cafd5a06ac7a7b29c2827ae7125b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Feb 2019 01:36:41 -0500 Subject: [PATCH 2566/3715] media: ivtv: update *pos correctly in ivtv_read_pos() [ Upstream commit f8e579f3ca0973daef263f513da5edff520a6c0d ] We had intended to update *pos, but the current code is a no-op. Fixes: 1a0adaf37c30 ("V4L/DVB (5345): ivtv driver for Conexant cx23416/cx23415 MPEG encoder/decoder") Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/ivtv/ivtv-fileops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/ivtv/ivtv-fileops.c b/drivers/media/pci/ivtv/ivtv-fileops.c index c9bd018e53de..e2b19c3eaa87 100644 --- a/drivers/media/pci/ivtv/ivtv-fileops.c +++ b/drivers/media/pci/ivtv/ivtv-fileops.c @@ -420,7 +420,7 @@ static ssize_t ivtv_read_pos(struct ivtv_stream *s, char __user *ubuf, size_t co IVTV_DEBUG_HI_FILE("read %zd from %s, got %zd\n", count, s->name, rc); if (rc > 0) - pos += rc; + *pos += rc; return rc; } From 6ae3b318c93ee18af1144fb11df995ba7bd7a190 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Feb 2019 01:37:02 -0500 Subject: [PATCH 2567/3715] media: cx18: update *pos correctly in cx18_read_pos() [ Upstream commit 7afb0df554292dca7568446f619965fb8153085d ] We should be updating *pos. The current code is a no-op. Fixes: 1c1e45d17b66 ("V4L/DVB (7786): cx18: new driver for the Conexant CX23418 MPEG encoder chip") Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx18/cx18-fileops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/cx18/cx18-fileops.c b/drivers/media/pci/cx18/cx18-fileops.c index 98467b2089fa..099d59b992c1 100644 --- a/drivers/media/pci/cx18/cx18-fileops.c +++ b/drivers/media/pci/cx18/cx18-fileops.c @@ -484,7 +484,7 @@ static ssize_t cx18_read_pos(struct cx18_stream *s, char __user *ubuf, CX18_DEBUG_HI_FILE("read %zd from %s, got %zd\n", count, s->name, rc); if (rc > 0) - pos += rc; + *pos += rc; return rc; } From 8d53d5a2de14502ff1a62764e67568a68a558f1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Mar 2019 02:27:43 -0500 Subject: [PATCH 2568/3715] media: wl128x: Fix an error code in fm_download_firmware() [ Upstream commit ef4bb63dc1f7213c08e13f6943c69cd27f69e4a3 ] We forgot to set "ret" on this error path. Fixes: e8454ff7b9a4 ("[media] drivers:media:radio: wl128x: FM Driver Common sources") Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/radio/wl128x/fmdrv_common.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c index 26895ae42fcf..2d20d908e280 100644 --- a/drivers/media/radio/wl128x/fmdrv_common.c +++ b/drivers/media/radio/wl128x/fmdrv_common.c @@ -1271,8 +1271,9 @@ static int fm_download_firmware(struct fmdev *fmdev, const u8 *fw_name) switch (action->type) { case ACTION_SEND_COMMAND: /* Send */ - if (fmc_send_cmd(fmdev, 0, 0, action->data, - action->size, NULL, NULL)) + ret = fmc_send_cmd(fmdev, 0, 0, action->data, + action->size, NULL, NULL); + if (ret) goto rel_fw; cmd_cnt++; From 8f51a1bbe9fecdd7f7bdf7e58287bc0beda5d404 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 19 Jan 2019 22:52:23 -0500 Subject: [PATCH 2569/3715] media: cx23885: check allocation return [ Upstream commit a3d7f22ef34ec4206b50ee121384d5c8bebd5591 ] Checking of kmalloc() seems to have been committed - as cx23885_dvb_register() is checking for != 0 return, returning -ENOMEM should be fine here. While at it address the coccicheck suggestion to move to kmemdup rather than using kmalloc+memcpy. Fixes: 46b21bbaa8a8 ("[media] Add support for DViCO FusionHDTV DVB-T Dual Express2") Signed-off-by: Nicholas Mc Guire Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/cx23885/cx23885-dvb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c index e795ddeb7fe2..60f122edaefb 100644 --- a/drivers/media/pci/cx23885/cx23885-dvb.c +++ b/drivers/media/pci/cx23885/cx23885-dvb.c @@ -1460,8 +1460,9 @@ static int dvb_register(struct cx23885_tsport *port) if (fe0->dvb.frontend != NULL) { struct i2c_adapter *tun_i2c; - fe0->dvb.frontend->sec_priv = kmalloc(sizeof(dib7000p_ops), GFP_KERNEL); - memcpy(fe0->dvb.frontend->sec_priv, &dib7000p_ops, sizeof(dib7000p_ops)); + fe0->dvb.frontend->sec_priv = kmemdup(&dib7000p_ops, sizeof(dib7000p_ops), GFP_KERNEL); + if (!fe0->dvb.frontend->sec_priv) + return -ENOMEM; tun_i2c = dib7000p_ops.get_i2c_master(fe0->dvb.frontend, DIBX000_I2C_INTERFACE_TUNER, 1); if (!dvb_attach(dib0070_attach, fe0->dvb.frontend, tun_i2c, &dib7070p_dib0070_config)) return -ENODEV; From 96ef352408a62a567c38b02b39bb87a5efb4c375 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 4 Mar 2019 16:57:52 +0800 Subject: [PATCH 2570/3715] regulator: tps65086: Fix tps65086_ldoa1_ranges for selector 0xB [ Upstream commit e69b394703e032e56a140172440ec4f9890b536d ] selector 0xB (1011) should be 2.6V rather than 2.7V, fit ix. Table 5-4. LDOA1 Output Voltage Options VID Bits VOUT VID Bits VOUT VID Bits VOUT VID Bits VOUT 0000 1.35 0100 1.8 1000 2.3 1100 2.85 0001 1.5 0101 1.9 1001 2.4 1101 3.0 0010 1.6 0110 2.0 1010 2.5 1110 3.3 0011 1.7 0111 2.1 1011 2.6 1111 Not Used Fixes: d2a2e729a666 ("regulator: tps65086: Add regulator driver for the TPS65086 PMIC") Signed-off-by: Axel Lin Acked-by: Andrew F. Davis Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/tps65086-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c index 45e96e154690..5a5e9b5bf4be 100644 --- a/drivers/regulator/tps65086-regulator.c +++ b/drivers/regulator/tps65086-regulator.c @@ -90,8 +90,8 @@ static const struct regulator_linear_range tps65086_buck345_25mv_ranges[] = { static const struct regulator_linear_range tps65086_ldoa1_ranges[] = { REGULATOR_LINEAR_RANGE(1350000, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(1500000, 0x1, 0x7, 100000), - REGULATOR_LINEAR_RANGE(2300000, 0x8, 0xA, 100000), - REGULATOR_LINEAR_RANGE(2700000, 0xB, 0xD, 150000), + REGULATOR_LINEAR_RANGE(2300000, 0x8, 0xB, 100000), + REGULATOR_LINEAR_RANGE(2850000, 0xC, 0xD, 150000), REGULATOR_LINEAR_RANGE(3300000, 0xE, 0xE, 0), }; From a2f301a5a385581a38e740379126a3800417924c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2019 15:19:16 +0100 Subject: [PATCH 2571/3715] jfs: fix bogus variable self-initialization [ Upstream commit a5fdd713d256887b5f012608701149fa939e5645 ] A statement was originally added in 2006 to shut up a gcc warning, now but now clang warns about it: fs/jfs/jfs_txnmgr.c:1932:15: error: variable 'pxd' is uninitialized when used within its own initialization [-Werror,-Wuninitialized] pxd_t pxd = pxd; /* truncated extent of xad */ ~~~ ^~~ Modern versions of gcc are fine without the silly assignment, so just drop it. Tested with gcc-4.6 (released 2011), 4.7, 4.8, and 4.9. Fixes: c9e3ad6021e5 ("JFS: Get rid of "may be used uninitialized" warnings") Signed-off-by: Arnd Bergmann Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_txnmgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 4d973524c887..224ef034004b 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1928,8 +1928,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * header ? */ if (tlck->type & tlckTRUNCATE) { - /* This odd declaration suppresses a bogus gcc warning */ - pxd_t pxd = pxd; /* truncated extent of xad */ + pxd_t pxd; /* truncated extent of xad */ int twm; /* From 34bb4eab1942bc1d54e4d14a8ddfc3be7904fe6e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 22 Mar 2019 15:03:51 +0100 Subject: [PATCH 2572/3715] tipc: tipc clang warning [ Upstream commit 737889efe9713a0f20a75fd0de952841d9275e6b ] When checking the code with clang -Wsometimes-uninitialized we get the following warning: if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:847:46: note: uninitialized use occurs here tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); net/tipc/node.c:831:2: note: remove the 'if' if its condition is always true if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:821:31: note: initialize the variable 'maddr' to silence this warning struct tipc_media_addr *maddr; We fix this by initializing 'maddr' to NULL. For the matter of clarity, we also test if 'xmitq' is non-empty before we use it and 'maddr' further down in the function. It will never happen that 'xmitq' is non- empty at the same time as 'maddr' is NULL, so this is a sufficient test. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Reported-by: Nathan Chancellor Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 42e9bdcc4bb6..82f8f69f4d6b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -688,10 +688,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -713,7 +713,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } From 5bd4bd3e35d156dda9c0da69ebdef0e83c3dd360 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 1 Dec 2018 11:53:10 +1100 Subject: [PATCH 2573/3715] m68k: mac: Fix VIA timer counter accesses [ Upstream commit 0ca7ce7db771580433bf24454f7a1542bd326078 ] This resolves some bugs that affect VIA timer counter accesses. Avoid lost interrupts caused by reading the counter low byte register. Make allowance for the fact that the counter will be decremented to 0xFFFF before being reloaded. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/m68k/mac/via.c | 102 +++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 9f59a662ace5..7334245abf26 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -54,16 +54,6 @@ static __u8 rbv_clear; static int gIER,gIFR,gBufA,gBufB; -/* - * Timer defs. - */ - -#define TICK_SIZE 10000 -#define MAC_CLOCK_TICK (783300/HZ) /* ticks per HZ */ -#define MAC_CLOCK_LOW (MAC_CLOCK_TICK&0xFF) -#define MAC_CLOCK_HIGH (MAC_CLOCK_TICK>>8) - - /* * On Macs with a genuine VIA chip there is no way to mask an individual slot * interrupt. This limitation also seems to apply to VIA clone logic cores in @@ -278,22 +268,6 @@ void __init via_init(void) } } -/* - * Start the 100 Hz clock - */ - -void __init via_init_clock(irq_handler_t func) -{ - via1[vACR] |= 0x40; - via1[vT1LL] = MAC_CLOCK_LOW; - via1[vT1LH] = MAC_CLOCK_HIGH; - via1[vT1CL] = MAC_CLOCK_LOW; - via1[vT1CH] = MAC_CLOCK_HIGH; - - if (request_irq(IRQ_MAC_TIMER_1, func, 0, "timer", func)) - pr_err("Couldn't register %s interrupt\n", "timer"); -} - /* * Debugging dump, used in various places to see what's going on. */ @@ -321,29 +295,6 @@ void via_debug_dump(void) } } -/* - * This is always executed with interrupts disabled. - * - * TBI: get time offset between scheduling timer ticks - */ - -u32 mac_gettimeoffset(void) -{ - unsigned long ticks, offset = 0; - - /* read VIA1 timer 2 current value */ - ticks = via1[vT1CL] | (via1[vT1CH] << 8); - /* The probability of underflow is less than 2% */ - if (ticks > MAC_CLOCK_TICK - MAC_CLOCK_TICK / 50) - /* Check for pending timer interrupt in VIA1 IFR */ - if (via1[vIFR] & 0x40) offset = TICK_SIZE; - - ticks = MAC_CLOCK_TICK - ticks; - ticks = ticks * 10000L / MAC_CLOCK_TICK; - - return (ticks + offset) * 1000; -} - /* * Flush the L2 cache on Macs that have it by flipping * the system into 24-bit mode for an instant. @@ -612,3 +563,56 @@ int via2_scsi_drq_pending(void) return via2[gIFR] & (1 << IRQ_IDX(IRQ_MAC_SCSIDRQ)); } EXPORT_SYMBOL(via2_scsi_drq_pending); + +/* timer and clock source */ + +#define VIA_CLOCK_FREQ 783360 /* VIA "phase 2" clock in Hz */ +#define VIA_TIMER_INTERVAL (1000000 / HZ) /* microseconds per jiffy */ +#define VIA_TIMER_CYCLES (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */ + +#define VIA_TC (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */ +#define VIA_TC_LOW (VIA_TC & 0xFF) +#define VIA_TC_HIGH (VIA_TC >> 8) + +void __init via_init_clock(irq_handler_t timer_routine) +{ + if (request_irq(IRQ_MAC_TIMER_1, timer_routine, 0, "timer", NULL)) { + pr_err("Couldn't register %s interrupt\n", "timer"); + return; + } + + via1[vT1LL] = VIA_TC_LOW; + via1[vT1LH] = VIA_TC_HIGH; + via1[vT1CL] = VIA_TC_LOW; + via1[vT1CH] = VIA_TC_HIGH; + via1[vACR] |= 0x40; +} + +u32 mac_gettimeoffset(void) +{ + unsigned long flags; + u8 count_high; + u16 count, offset = 0; + + /* + * Timer counter wrap-around is detected with the timer interrupt flag + * but reading the counter low byte (vT1CL) would reset the flag. + * Also, accessing both counter registers is essentially a data race. + * These problems are avoided by ignoring the low byte. Clock accuracy + * is 256 times worse (error can reach 0.327 ms) but CPU overhead is + * reduced by avoiding slow VIA register accesses. + */ + + local_irq_save(flags); + count_high = via1[vT1CH]; + if (count_high == 0xFF) + count_high = 0; + if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT)) + offset = VIA_TIMER_CYCLES; + local_irq_restore(flags); + + count = count_high << 8; + count = VIA_TIMER_CYCLES - count + offset; + + return ((count * VIA_TIMER_INTERVAL) / VIA_TIMER_CYCLES) * 1000; +} From 9a8e28387cf4797860080bfeb09076540b4b4e53 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 25 Mar 2019 14:52:50 +0100 Subject: [PATCH 2574/3715] arm64: dts: allwinner: a64: Add missing PIO clocks [ Upstream commit 562bf19611c000cb7219431c3cc78aa60c2b371e ] The pinctrl binding mandates that we have the three clocks fed into the PIO described. Even though the old case is still supported for backward compatibility, we should update our DTs to fix this. Fixes: 6bc37fac30cf ("arm64: dts: add Allwinner A64 SoC .dtsi") Acked-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 8c8db1b057df..788a6f8c5994 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -274,7 +274,8 @@ interrupts = , , ; - clocks = <&ccu 58>; + clocks = <&ccu 58>, <&osc24M>, <&rtc 0>; + clock-names = "apb", "hosc", "losc"; gpio-controller; #gpio-cells = <3>; interrupt-controller; From 7ea5302d4890fa1fd2277f9514c633c4c6ab5354 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 21 Mar 2019 11:00:21 -0700 Subject: [PATCH 2575/3715] ARM: OMAP2+: Fix potentially uninitialized return value for _setup_reset() [ Upstream commit 7f0d078667a494466991aa7133f49594f32ff6a2 ] Commit 747834ab8347 ("ARM: OMAP2+: hwmod: revise hardreset behavior") made the call to _enable() conditional based on no oh->rst_lines_cnt. This caused the return value to be potentially uninitialized. Curiously we see no compiler warnings for this, probably as this gets inlined. We call _setup_reset() from _setup() and only _setup_postsetup() if the return value is zero. Currently the return value can be uninitialized for cases where oh->rst_lines_cnt is set and HWMOD_INIT_NO_RESET is not set. Fixes: 747834ab8347 ("ARM: OMAP2+: hwmod: revise hardreset behavior") Cc: Paul Walmsley Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 45c8f2ef4e23..9274a484c6a3 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2530,7 +2530,7 @@ static void _setup_iclk_autoidle(struct omap_hwmod *oh) */ static int _setup_reset(struct omap_hwmod *oh) { - int r; + int r = 0; if (oh->_state != _HWMOD_STATE_INITIALIZED) return -EINVAL; From 9162cb9cf6d9ed5e3b8d0fd6cdae4048b008e645 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2019 10:34:22 -0400 Subject: [PATCH 2576/3715] media: davinci-isif: avoid uninitialized variable use [ Upstream commit 0e633f97162c1c74c68e2eb20bbd9259dce87cd9 ] clang warns about a possible variable use that gcc never complained about: drivers/media/platform/davinci/isif.c:982:32: error: variable 'frame_size' is uninitialized when used here [-Werror,-Wuninitialized] dm365_vpss_set_pg_frame_size(frame_size); ^~~~~~~~~~ drivers/media/platform/davinci/isif.c:887:2: note: variable 'frame_size' is declared here struct vpss_pg_frame_size frame_size; ^ 1 error generated. There is no initialization for this variable at all, and there has never been one in the mainline kernel, so we really should not put that stack data into an mmio register. On the other hand, I suspect that gcc checks the condition more closely and notices that the global isif_cfg.bayer.config_params.test_pat_gen flag is initialized to zero and never written to from any code path, so anything depending on it can be eliminated. To shut up the clang warning, just remove the dead code manually, it has probably never been used because any attempt to do so would have resulted in undefined behavior. Fixes: 63e3ab142fa3 ("V4L/DVB: V4L - vpfe capture - source for ISIF driver on DM365") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/isif.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/media/platform/davinci/isif.c b/drivers/media/platform/davinci/isif.c index 90d0f13283ae..12065ad1ac45 100644 --- a/drivers/media/platform/davinci/isif.c +++ b/drivers/media/platform/davinci/isif.c @@ -886,9 +886,7 @@ static int isif_set_hw_if_params(struct vpfe_hw_if_param *params) static int isif_config_ycbcr(void) { struct isif_ycbcr_config *params = &isif_cfg.ycbcr; - struct vpss_pg_frame_size frame_size; u32 modeset = 0, ccdcfg = 0; - struct vpss_sync_pol sync; dev_dbg(isif_cfg.dev, "\nStarting isif_config_ycbcr..."); @@ -976,13 +974,6 @@ static int isif_config_ycbcr(void) /* two fields are interleaved in memory */ regw(0x00000249, SDOFST); - /* Setup test pattern if enabled */ - if (isif_cfg.bayer.config_params.test_pat_gen) { - sync.ccdpg_hdpol = params->hd_pol; - sync.ccdpg_vdpol = params->vd_pol; - dm365_vpss_set_sync_pol(sync); - dm365_vpss_set_pg_frame_size(frame_size); - } return 0; } From f4c8d9e5ea524d7f53e54da2920a7d1250822ec5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 14 Mar 2019 22:01:24 -0400 Subject: [PATCH 2577/3715] media: tw5864: Fix possible NULL pointer dereference in tw5864_handle_frame [ Upstream commit 2e7682ebfc750177a4944eeb56e97a3f05734528 ] 'vb' null check should be done before dereferencing it in tw5864_handle_frame, otherwise a NULL pointer dereference may occur. Fixes: 34d1324edd31 ("[media] pci: Add tw5864 driver") Signed-off-by: YueHaibing Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/tw5864/tw5864-video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/pci/tw5864/tw5864-video.c b/drivers/media/pci/tw5864/tw5864-video.c index e7bd2b8484e3..ee1230440b39 100644 --- a/drivers/media/pci/tw5864/tw5864-video.c +++ b/drivers/media/pci/tw5864/tw5864-video.c @@ -1395,13 +1395,13 @@ static void tw5864_handle_frame(struct tw5864_h264_frame *frame) input->vb = NULL; spin_unlock_irqrestore(&input->slock, flags); - v4l2_buf = to_vb2_v4l2_buffer(&vb->vb.vb2_buf); - if (!vb) { /* Gone because of disabling */ dev_dbg(&dev->pci->dev, "vb is empty, dropping frame\n"); return; } + v4l2_buf = to_vb2_v4l2_buffer(&vb->vb.vb2_buf); + /* * Check for space. * Mind the overhead of startcode emulation prevention. From 58ac2bc9ce809985c78bf4855dfa69e8505e6bfc Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 26 Mar 2019 22:56:23 -0700 Subject: [PATCH 2578/3715] spi: tegra114: clear packed bit for unpacked mode [ Upstream commit 7b3d10cdf54b8bc1dc0da21faed9789ac4da3684 ] Fixes: Clear packed bit when not using packed mode. Packed bit is not cleared when not using packed mode. This results in transfer timeouts for the unpacked mode transfers followed by the packed mode transfers. Signed-off-by: Sowjanya Komatineni Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra114.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 2ad04796ef29..3a6b202dfffe 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -730,6 +730,8 @@ static int tegra_spi_start_transfer_one(struct spi_device *spi, if (tspi->is_packed) command1 |= SPI_PACKED; + else + command1 &= ~SPI_PACKED; command1 &= ~(SPI_CS_SEL_MASK | SPI_TX_EN | SPI_RX_EN); tspi->cur_direction = 0; From ffd39bb17e492dc878ed4b72201d115f7522a015 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 26 Mar 2019 22:56:24 -0700 Subject: [PATCH 2579/3715] spi: tegra114: fix for unpacked mode transfers [ Upstream commit 1a89ac5b91895127f7c586ec5075c3753ca25501 ] Fixes: computation of actual bytes to fill/receive in/from FIFO in unpacked mode when transfer length is not a multiple of requested bits per word. unpacked mode transfers fails when the transfer includes partial bytes in the last word. Total words to be written/read to/from FIFO is computed based on transfer length and bits per word. Unpacked mode includes 0 padding bytes for partial words to align with bits per word and these extra bytes are also accounted for calculating bytes left to transfer in the current driver. This causes extra bytes access of tx/rx buffers along with buffer index position crossing actual length where remain_len becomes negative and due to unsigned type, negative value is a 32 bit representation of signed value and transferred bytes never meets the actual transfer length resulting in transfer timeout and a hang. This patch fixes this with proper computation of the actual bytes to fill in FIFO during transmit and the actual bytes to read from FIFO during receive ignoring 0 padded bytes. Signed-off-by: Sowjanya Komatineni Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra114.c | 43 +++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3a6b202dfffe..c6674b01e0fd 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -307,10 +307,16 @@ static unsigned tegra_spi_fill_tx_fifo_from_client_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tegra_spi_writel(tspi, x, SPI_TX_FIFO); } + + tspi->cur_tx_pos += written_words * tspi->bytes_per_word; } else { + unsigned int write_bytes; max_n_32bit = min(tspi->curr_dma_words, tx_empty_count); written_words = max_n_32bit; nbytes = written_words * tspi->bytes_per_word; + if (nbytes > t->len - tspi->cur_pos) + nbytes = t->len - tspi->cur_pos; + write_bytes = nbytes; for (count = 0; count < max_n_32bit; count++) { u32 x = 0; @@ -319,8 +325,10 @@ static unsigned tegra_spi_fill_tx_fifo_from_client_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tegra_spi_writel(tspi, x, SPI_TX_FIFO); } + + tspi->cur_tx_pos += write_bytes; } - tspi->cur_tx_pos += written_words * tspi->bytes_per_word; + return written_words; } @@ -344,20 +352,27 @@ static unsigned int tegra_spi_read_rx_fifo_to_client_rxbuf( for (i = 0; len && (i < 4); i++, len--) *rx_buf++ = (x >> i*8) & 0xFF; } - tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; read_words += tspi->curr_dma_words; + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { u32 rx_mask = ((u32)1 << t->bits_per_word) - 1; + u8 bytes_per_word = tspi->bytes_per_word; + unsigned int read_bytes; + len = rx_full_count * bytes_per_word; + if (len > t->len - tspi->cur_pos) + len = t->len - tspi->cur_pos; + read_bytes = len; for (count = 0; count < rx_full_count; count++) { u32 x = tegra_spi_readl(tspi, SPI_RX_FIFO) & rx_mask; - for (i = 0; (i < tspi->bytes_per_word); i++) + for (i = 0; len && (i < bytes_per_word); i++, len--) *rx_buf++ = (x >> (i*8)) & 0xFF; } - tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word; read_words += rx_full_count; + tspi->cur_rx_pos += read_bytes; } + return read_words; } @@ -372,12 +387,17 @@ static void tegra_spi_copy_client_txbuf_to_spi_txbuf( unsigned len = tspi->curr_dma_words * tspi->bytes_per_word; memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len); + tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { unsigned int i; unsigned int count; u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos; unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word; + unsigned int write_bytes; + if (consume > t->len - tspi->cur_pos) + consume = t->len - tspi->cur_pos; + write_bytes = consume; for (count = 0; count < tspi->curr_dma_words; count++) { u32 x = 0; @@ -386,8 +406,9 @@ static void tegra_spi_copy_client_txbuf_to_spi_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tspi->tx_dma_buf[count] = x; } + + tspi->cur_tx_pos += write_bytes; } - tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word; /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys, @@ -405,20 +426,28 @@ static void tegra_spi_copy_spi_rxbuf_to_client_rxbuf( unsigned len = tspi->curr_dma_words * tspi->bytes_per_word; memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len); + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { unsigned int i; unsigned int count; unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos; u32 rx_mask = ((u32)1 << t->bits_per_word) - 1; + unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word; + unsigned int read_bytes; + if (consume > t->len - tspi->cur_pos) + consume = t->len - tspi->cur_pos; + read_bytes = consume; for (count = 0; count < tspi->curr_dma_words; count++) { u32 x = tspi->rx_dma_buf[count] & rx_mask; - for (i = 0; (i < tspi->bytes_per_word); i++) + for (i = 0; consume && (i < tspi->bytes_per_word); + i++, consume--) *rx_buf++ = (x >> (i*8)) & 0xFF; } + + tspi->cur_rx_pos += read_bytes; } - tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, From f24affc80580feff6a7517f8f716cfff04b63740 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 26 Mar 2019 22:56:27 -0700 Subject: [PATCH 2580/3715] spi: tegra114: terminate dma and reset on transfer timeout [ Upstream commit 32bd1a9551cae34e6889afa235c7afdfede9aeac ] Fixes: terminate DMA and perform controller reset on transfer timeout to clear the FIFO's and errors. Signed-off-by: Sowjanya Komatineni Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra114.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index c6674b01e0fd..4878d5e00c66 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -869,7 +869,16 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, if (WARN_ON(ret == 0)) { dev_err(tspi->dev, "spi transfer timeout, err %d\n", ret); + if (tspi->is_curr_dma_xfer && + (tspi->cur_direction & DATA_DIR_TX)) + dmaengine_terminate_all(tspi->tx_dma_chan); + if (tspi->is_curr_dma_xfer && + (tspi->cur_direction & DATA_DIR_RX)) + dmaengine_terminate_all(tspi->rx_dma_chan); ret = -EIO; + reset_control_assert(tspi->rst); + udelay(2); + reset_control_deassert(tspi->rst); goto complete_xfer; } From 938f5d5d4802b99fcca2ae846f73e16cd77740ab Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 26 Mar 2019 22:56:28 -0700 Subject: [PATCH 2581/3715] spi: tegra114: flush fifos [ Upstream commit c4fc9e5b28ff787e35137c2cc13316bb11d7657b ] Fixes: Flush TX and RX FIFOs before start of new transfer and on FIFO overflow or underrun errors. Signed-off-by: Sowjanya Komatineni Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra114.c | 39 +++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 4878d5e00c66..18dfbd57c61f 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -499,21 +499,36 @@ static int tegra_spi_start_rx_dma(struct tegra_spi_data *tspi, int len) return 0; } +static int tegra_spi_flush_fifos(struct tegra_spi_data *tspi) +{ + unsigned long timeout = jiffies + HZ; + u32 status; + + status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); + if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { + status |= SPI_RX_FIFO_FLUSH | SPI_TX_FIFO_FLUSH; + tegra_spi_writel(tspi, status, SPI_FIFO_STATUS); + while ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { + status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); + if (time_after(jiffies, timeout)) { + dev_err(tspi->dev, + "timeout waiting for fifo flush\n"); + return -EIO; + } + + udelay(1); + } + } + + return 0; +} + static int tegra_spi_start_dma_based_transfer( struct tegra_spi_data *tspi, struct spi_transfer *t) { u32 val; unsigned int len; int ret = 0; - u32 status; - - /* Make sure that Rx and Tx fifo are empty */ - status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); - if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { - dev_err(tspi->dev, "Rx/Tx fifo are not empty status 0x%08x\n", - (unsigned)status); - return -EIO; - } val = SPI_DMA_BLK_SET(tspi->curr_dma_words - 1); tegra_spi_writel(tspi, val, SPI_DMA_BLK); @@ -779,6 +794,9 @@ static int tegra_spi_start_transfer_one(struct spi_device *spi, dev_dbg(tspi->dev, "The def 0x%x and written 0x%x\n", tspi->def_command1_reg, (unsigned)command1); + ret = tegra_spi_flush_fifos(tspi); + if (ret < 0) + return ret; if (total_fifo_words > SPI_FIFO_DEPTH) ret = tegra_spi_start_dma_based_transfer(tspi, t); else @@ -876,6 +894,7 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, (tspi->cur_direction & DATA_DIR_RX)) dmaengine_terminate_all(tspi->rx_dma_chan); ret = -EIO; + tegra_spi_flush_fifos(tspi); reset_control_assert(tspi->rst); udelay(2); reset_control_deassert(tspi->rst); @@ -929,6 +948,7 @@ static irqreturn_t handle_cpu_based_xfer(struct tegra_spi_data *tspi) tspi->status_reg); dev_err(tspi->dev, "CpuXfer 0x%08x:0x%08x\n", tspi->command1_reg, tspi->dma_control_reg); + tegra_spi_flush_fifos(tspi); reset_control_assert(tspi->rst); udelay(2); reset_control_deassert(tspi->rst); @@ -1001,6 +1021,7 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_spi_data *tspi) tspi->status_reg); dev_err(tspi->dev, "DmaXfer 0x%08x:0x%08x\n", tspi->command1_reg, tspi->dma_control_reg); + tegra_spi_flush_fifos(tspi); reset_control_assert(tspi->rst); udelay(2); reset_control_deassert(tspi->rst); From bd9f7b6a0a4ffe6ff00454085c4f0abef44d221f Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 26 Mar 2019 22:56:29 -0700 Subject: [PATCH 2582/3715] spi: tegra114: configure dma burst size to fifo trig level [ Upstream commit f4ce428c41fb22e3ed55496dded94df44cb920fa ] Fixes: Configure DMA burst size to be same as SPI TX/RX trigger levels to avoid mismatch. SPI FIFO trigger levels are calculated based on the transfer length. So this patch moves DMA slave configuration to happen before start of DMAs. Signed-off-by: Sowjanya Komatineni Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra114.c | 52 ++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 18dfbd57c61f..84ff0c507f0b 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -529,6 +529,8 @@ static int tegra_spi_start_dma_based_transfer( u32 val; unsigned int len; int ret = 0; + u8 dma_burst; + struct dma_slave_config dma_sconfig = {0}; val = SPI_DMA_BLK_SET(tspi->curr_dma_words - 1); tegra_spi_writel(tspi, val, SPI_DMA_BLK); @@ -540,12 +542,16 @@ static int tegra_spi_start_dma_based_transfer( len = tspi->curr_dma_words * 4; /* Set attention level based on length of transfer */ - if (len & 0xF) + if (len & 0xF) { val |= SPI_TX_TRIG_1 | SPI_RX_TRIG_1; - else if (((len) >> 4) & 0x1) + dma_burst = 1; + } else if (((len) >> 4) & 0x1) { val |= SPI_TX_TRIG_4 | SPI_RX_TRIG_4; - else + dma_burst = 4; + } else { val |= SPI_TX_TRIG_8 | SPI_RX_TRIG_8; + dma_burst = 8; + } if (tspi->cur_direction & DATA_DIR_TX) val |= SPI_IE_TX; @@ -556,7 +562,18 @@ static int tegra_spi_start_dma_based_transfer( tegra_spi_writel(tspi, val, SPI_DMA_CTL); tspi->dma_control_reg = val; + dma_sconfig.device_fc = true; if (tspi->cur_direction & DATA_DIR_TX) { + dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO; + dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.dst_maxburst = dma_burst; + ret = dmaengine_slave_config(tspi->tx_dma_chan, &dma_sconfig); + if (ret < 0) { + dev_err(tspi->dev, + "DMA slave config failed: %d\n", ret); + return ret; + } + tegra_spi_copy_client_txbuf_to_spi_txbuf(tspi, t); ret = tegra_spi_start_tx_dma(tspi, len); if (ret < 0) { @@ -567,6 +584,16 @@ static int tegra_spi_start_dma_based_transfer( } if (tspi->cur_direction & DATA_DIR_RX) { + dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO; + dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.src_maxburst = dma_burst; + ret = dmaengine_slave_config(tspi->rx_dma_chan, &dma_sconfig); + if (ret < 0) { + dev_err(tspi->dev, + "DMA slave config failed: %d\n", ret); + return ret; + } + /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, tspi->dma_buf_size, DMA_FROM_DEVICE); @@ -626,7 +653,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, u32 *dma_buf; dma_addr_t dma_phys; int ret; - struct dma_slave_config dma_sconfig; dma_chan = dma_request_slave_channel_reason(tspi->dev, dma_to_memory ? "rx" : "tx"); @@ -646,19 +672,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, return -ENOMEM; } - if (dma_to_memory) { - dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO; - dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - dma_sconfig.src_maxburst = 0; - } else { - dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO; - dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - dma_sconfig.dst_maxburst = 0; - } - - ret = dmaengine_slave_config(dma_chan, &dma_sconfig); - if (ret) - goto scrub; if (dma_to_memory) { tspi->rx_dma_chan = dma_chan; tspi->rx_dma_buf = dma_buf; @@ -669,11 +682,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, tspi->tx_dma_phys = dma_phys; } return 0; - -scrub: - dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys); - dma_release_channel(dma_chan); - return ret; } static void tegra_spi_deinit_dma_param(struct tegra_spi_data *tspi, From a96770dea1ba612fe7210bd7ad1dfa6b86ca948b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Mar 2019 17:18:41 +0300 Subject: [PATCH 2583/3715] soc/fsl/qe: Fix an error code in qe_pin_request() [ Upstream commit 5674a92ca4b7e5a6a19231edd10298d30324cd27 ] We forgot to set "err" on this error path. Fixes: 1a2d397a6eb5 ("gpio/powerpc: Eliminate duplication of of_get_named_gpio_flags()") Signed-off-by: Dan Carpenter Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/soc/fsl/qe/gpio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c index 3b27075c21a7..5cbc5ce5ac15 100644 --- a/drivers/soc/fsl/qe/gpio.c +++ b/drivers/soc/fsl/qe/gpio.c @@ -152,8 +152,10 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) if (err < 0) goto err0; gc = gpio_to_chip(err); - if (WARN_ON(!gc)) + if (WARN_ON(!gc)) { + err = -ENODEV; goto err0; + } if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) { pr_debug("%s: tried to get a non-qe pin\n", __func__); From a040d2bf4437e0efd75a811207e79be3001cc5b9 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sat, 30 Mar 2019 09:31:02 +0000 Subject: [PATCH 2584/3715] spi: bcm2835aux: fix driver to not allow 65535 (=-1) cs-gpios [ Upstream commit 509c583620e9053e43d611bf1614fc3d3abafa96 ] The original driver by default defines num_chipselects as -1. This actually allicates an array of 65535 entries in of_spi_register_master. There is a side-effect for buggy device trees that (contrary to dt-binding documentation) have no cs-gpio defined. This mode was never supported by the driver due to limitations of native cs and additional code complexity and is explicitly not stated to be implemented. To keep backwards compatibility with such buggy DTs we limit the number of chip_selects to 1, as for all practical purposes it is only ever realistic to use a single chip select in native cs mode without negative side-effects. Fixes: 1ea29b39f4c812ec ("spi: bcm2835aux: add bcm2835 auxiliary spi device...") Signed-off-by: Martin Sperl Acked-by: Stefan Wahren Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm2835aux.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 5c89bbb05441..e075712c501e 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -416,7 +416,18 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, master); master->mode_bits = (SPI_CPOL | SPI_CS_HIGH | SPI_NO_CS); master->bits_per_word_mask = SPI_BPW_MASK(8); - master->num_chipselect = -1; + /* even though the driver never officially supported native CS + * allow a single native CS for legacy DT support purposes when + * no cs-gpio is configured. + * Known limitations for native cs are: + * * multiple chip-selects: cs0-cs2 are all simultaniously asserted + * whenever there is a transfer - this even includes SPI_NO_CS + * * SPI_CS_HIGH: is ignores - cs are always asserted low + * * cs_change: cs is deasserted after each spi_transfer + * * cs_delay_usec: cs is always deasserted one SCK cycle after + * a spi_transfer + */ + master->num_chipselect = 1; master->transfer_one = bcm2835aux_spi_transfer_one; master->handle_err = bcm2835aux_spi_handle_err; master->prepare_message = bcm2835aux_spi_prepare_message; From 105f6b1ab3d04b6477cb1d45b4d8dba5e64c210e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 3 Apr 2019 15:47:59 +0800 Subject: [PATCH 2585/3715] ehea: Fix a copy-paste err in ehea_init_port_res [ Upstream commit c8f191282f819ab4e9b47b22a65c6c29734cefce ] pr->tx_bytes should be assigned to tx_bytes other than rx_bytes. Reported-by: Hulk Robot Fixes: ce45b873028f ("ehea: Fixing statistics") Signed-off-by: YueHaibing Reviewed-by: Mukesh Ojha Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 30cbdf0fed59..373deb247ac0 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1475,7 +1475,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, memset(pr, 0, sizeof(struct ehea_port_res)); - pr->tx_bytes = rx_bytes; + pr->tx_bytes = tx_bytes; pr->tx_packets = tx_packets; pr->rx_bytes = rx_bytes; pr->rx_packets = rx_packets; From 770d2807dc789f27f21c5f6555f2b1c4e6db3a6d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 Apr 2019 12:44:46 -0700 Subject: [PATCH 2586/3715] scsi: qla2xxx: Unregister chrdev if module initialization fails [ Upstream commit c794d24ec9eb6658909955772e70f34bef5b5b91 ] If module initialization fails after the character device has been registered, unregister the character device. Additionally, avoid duplicating error path code. Cc: Himanshu Madhani Cc: Giridhar Malavali Fixes: 6a03b4cd78f3 ("[SCSI] qla2xxx: Add char device to increase driver use count") # v2.6.35. Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5617bb18c233..5f9d4dbc4a98 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6714,8 +6714,7 @@ qla2x00_module_init(void) /* Initialize target kmem_cache and mem_pools */ ret = qlt_init(); if (ret < 0) { - kmem_cache_destroy(srb_cachep); - return ret; + goto destroy_cache; } else if (ret > 0) { /* * If initiator mode is explictly disabled by qlt_init(), @@ -6736,11 +6735,10 @@ qla2x00_module_init(void) qla2xxx_transport_template = fc_attach_transport(&qla2xxx_transport_functions); if (!qla2xxx_transport_template) { - kmem_cache_destroy(srb_cachep); ql_log(ql_log_fatal, NULL, 0x0002, "fc_attach_transport failed...Failing load!.\n"); - qlt_exit(); - return -ENODEV; + ret = -ENODEV; + goto qlt_exit; } apidev_major = register_chrdev(0, QLA2XXX_APIDEV, &apidev_fops); @@ -6752,27 +6750,37 @@ qla2x00_module_init(void) qla2xxx_transport_vport_template = fc_attach_transport(&qla2xxx_transport_vport_functions); if (!qla2xxx_transport_vport_template) { - kmem_cache_destroy(srb_cachep); - qlt_exit(); - fc_release_transport(qla2xxx_transport_template); ql_log(ql_log_fatal, NULL, 0x0004, "fc_attach_transport vport failed...Failing load!.\n"); - return -ENODEV; + ret = -ENODEV; + goto unreg_chrdev; } ql_log(ql_log_info, NULL, 0x0005, "QLogic Fibre Channel HBA Driver: %s.\n", qla2x00_version_str); ret = pci_register_driver(&qla2xxx_pci_driver); if (ret) { - kmem_cache_destroy(srb_cachep); - qlt_exit(); - fc_release_transport(qla2xxx_transport_template); - fc_release_transport(qla2xxx_transport_vport_template); ql_log(ql_log_fatal, NULL, 0x0006, "pci_register_driver failed...ret=%d Failing load!.\n", ret); + goto release_vport_transport; } return ret; + +release_vport_transport: + fc_release_transport(qla2xxx_transport_vport_template); + +unreg_chrdev: + if (apidev_major >= 0) + unregister_chrdev(apidev_major, QLA2XXX_APIDEV); + fc_release_transport(qla2xxx_transport_template); + +qlt_exit: + qlt_exit(); + +destroy_cache: + kmem_cache_destroy(srb_cachep); + return ret; } /** From e03671470aebcb8f7b4f19997e047fbdc7f5b261 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Apr 2019 12:58:05 -0700 Subject: [PATCH 2587/3715] scsi: target/core: Fix a race condition in the LUN lookup code [ Upstream commit 63f7479439c95bcd49b7dd4af809862c316c71a3 ] The rcu_dereference(deve->se_lun) expression occurs twice in the LUN lookup functions. Since these expressions are not serialized against deve->se_lun assignments each of these expressions may yield a different result. Avoid that the wrong LUN pointer is stored in se_cmd by reading deve->se_lun only once. Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Nicholas Bellinger Fixes: 29a05deebf6c ("target: Convert se_node_acl->device_list[] to RCU hlist") # v4.10 Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 92b52d2314b5..cebef8e5a43d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -85,7 +85,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) goto out_unlock; } - se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = se_lun; se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -176,7 +176,7 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) goto out_unlock; } - se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = se_lun; se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; From f7919eec49098a9c5172dd67132227d12e08b155 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 29 Jan 2019 16:03:24 +0800 Subject: [PATCH 2588/3715] ARM: pxa: ssp: Fix "WARNING: invalid free of devm_ allocated data" [ Upstream commit 9ee8578d953023cc57e7e736ae48502c707c0210 ] Since commit 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") kfree, iounmap, clk_put etc are not needed anymore in remove path. Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: YueHaibing [ commit message spelling fix ] Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/plat-pxa/ssp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index b92673efffff..97bd43c16cd8 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -230,18 +230,12 @@ static int pxa_ssp_probe(struct platform_device *pdev) static int pxa_ssp_remove(struct platform_device *pdev) { - struct resource *res; struct ssp_device *ssp; ssp = platform_get_drvdata(pdev); if (ssp == NULL) return -ENODEV; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - clk_put(ssp->clk); - mutex_lock(&ssp_lock); list_del(&ssp->node); mutex_unlock(&ssp_lock); From d2a71849205a6746f6013e71062c8e15e8ccdafd Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Mon, 15 Apr 2019 21:48:39 +0800 Subject: [PATCH 2589/3715] net: hns3: fix for vport->bw_limit overflow problem [ Upstream commit 2566f10676ba996b745e138f54f3e2f974311692 ] When setting vport->bw_limit to hdev->tm_info.pg_info[0].bw_limit in hclge_tm_vport_tc_info_update, vport->bw_limit can be as big as HCLGE_ETHER_MAX_RATE (100000), which can not fit into u16 (65535). So this patch fixes it by using u32 for vport->bw_limit. Fixes: 848440544b41 ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Reported-by: Dan Carpenter Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9fcfd9395424..a4c5e72d6012 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -480,7 +480,7 @@ struct hclge_vport { u16 alloc_rss_size; u16 qs_offset; - u16 bw_limit; /* VSI BW Limit (0 = disabled) */ + u32 bw_limit; /* VSI BW Limit (0 = disabled) */ u8 dwrr; int vport_id; From ec16a5a2cc3059b2d6571b7db1e9ad98cf7882ee Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 5 Apr 2019 08:44:41 -0700 Subject: [PATCH 2590/3715] hwmon: (w83627hf) Use request_muxed_region for Super-IO accesses [ Upstream commit e95fd518d05bfc087da6fcdea4900a57cfb083bd ] Super-IO accesses may fail on a system with no or unmapped LPC bus. Also, other drivers may attempt to access the LPC bus at the same time, resulting in undefined behavior. Use request_muxed_region() to ensure that IO access on the requested address space is supported, and to ensure that access by multiple drivers is synchronized. Fixes: b72656dbc491 ("hwmon: (w83627hf) Stop using globals for I/O port numbers") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/w83627hf.c | 42 +++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 8ac89d0781cc..a575e1cdb81a 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -130,17 +130,23 @@ superio_select(struct w83627hf_sio_data *sio, int ld) outb(ld, sio->sioaddr + 1); } -static inline void +static inline int superio_enter(struct w83627hf_sio_data *sio) { + if (!request_muxed_region(sio->sioaddr, 2, DRVNAME)) + return -EBUSY; + outb(0x87, sio->sioaddr); outb(0x87, sio->sioaddr); + + return 0; } static inline void superio_exit(struct w83627hf_sio_data *sio) { outb(0xAA, sio->sioaddr); + release_region(sio->sioaddr, 2); } #define W627_DEVID 0x52 @@ -1278,7 +1284,7 @@ static DEVICE_ATTR_RO(name); static int __init w83627hf_find(int sioaddr, unsigned short *addr, struct w83627hf_sio_data *sio_data) { - int err = -ENODEV; + int err; u16 val; static __initconst char *const names[] = { @@ -1290,7 +1296,11 @@ static int __init w83627hf_find(int sioaddr, unsigned short *addr, }; sio_data->sioaddr = sioaddr; - superio_enter(sio_data); + err = superio_enter(sio_data); + if (err) + return err; + + err = -ENODEV; val = force_id ? force_id : superio_inb(sio_data, DEVID); switch (val) { case W627_DEVID: @@ -1644,9 +1654,21 @@ static int w83627thf_read_gpio5(struct platform_device *pdev) struct w83627hf_sio_data *sio_data = dev_get_platdata(&pdev->dev); int res = 0xff, sel; - superio_enter(sio_data); + if (superio_enter(sio_data)) { + /* + * Some other driver reserved the address space for itself. + * We don't want to fail driver instantiation because of that, + * so display a warning and keep going. + */ + dev_warn(&pdev->dev, + "Can not read VID data: Failed to enable SuperIO access\n"); + return res; + } + superio_select(sio_data, W83627HF_LD_GPIO5); + res = 0xff; + /* Make sure these GPIO pins are enabled */ if (!(superio_inb(sio_data, W83627THF_GPIO5_EN) & (1<<3))) { dev_dbg(&pdev->dev, "GPIO5 disabled, no VID function\n"); @@ -1677,7 +1699,17 @@ static int w83687thf_read_vid(struct platform_device *pdev) struct w83627hf_sio_data *sio_data = dev_get_platdata(&pdev->dev); int res = 0xff; - superio_enter(sio_data); + if (superio_enter(sio_data)) { + /* + * Some other driver reserved the address space for itself. + * We don't want to fail driver instantiation because of that, + * so display a warning and keep going. + */ + dev_warn(&pdev->dev, + "Can not read VID data: Failed to enable SuperIO access\n"); + return res; + } + superio_select(sio_data, W83627HF_LD_HWM); /* Make sure these GPIO pins are enabled */ From 16d61aeda20bb6cd5c88f0c6c4bdc78ee9436d70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 30 Mar 2019 00:17:12 +0000 Subject: [PATCH 2591/3715] platform/x86: alienware-wmi: fix kfree on potentially uninitialized pointer [ Upstream commit 98e2630284ab741804bd0713e932e725466f2f84 ] Currently the kfree of output.pointer can be potentially freeing an uninitalized pointer in the case where out_data is NULL. Fix this by reworking the case where out_data is not-null to perform the ACPI status check and also the kfree of outpoint.pointer in one block and hence ensuring the pointer is only freed when it has been used. Also replace the if (ptr != NULL) idiom with just if (ptr). Fixes: ff0e9f26288d ("platform/x86: alienware-wmi: Correct a memory leak") Signed-off-by: Colin Ian King Signed-off-by: Darren Hart (VMware) Signed-off-by: Sasha Levin --- drivers/platform/x86/alienware-wmi.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c index e335b18da20f..cbd84e2e3bd4 100644 --- a/drivers/platform/x86/alienware-wmi.c +++ b/drivers/platform/x86/alienware-wmi.c @@ -505,23 +505,22 @@ static acpi_status alienware_wmax_command(struct wmax_basic_args *in_args, input.length = (acpi_size) sizeof(*in_args); input.pointer = in_args; - if (out_data != NULL) { + if (out_data) { output.length = ACPI_ALLOCATE_BUFFER; output.pointer = NULL; status = wmi_evaluate_method(WMAX_CONTROL_GUID, 0, command, &input, &output); - } else + if (ACPI_SUCCESS(status)) { + obj = (union acpi_object *)output.pointer; + if (obj && obj->type == ACPI_TYPE_INTEGER) + *out_data = (u32)obj->integer.value; + } + kfree(output.pointer); + } else { status = wmi_evaluate_method(WMAX_CONTROL_GUID, 0, command, &input, NULL); - - if (ACPI_SUCCESS(status) && out_data != NULL) { - obj = (union acpi_object *)output.pointer; - if (obj && obj->type == ACPI_TYPE_INTEGER) - *out_data = (u32) obj->integer.value; } - kfree(output.pointer); return status; - } /* From edefec3a6d59fad0902d8d2209442a593a6af6e6 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 16 Apr 2019 13:10:09 +0800 Subject: [PATCH 2592/3715] tipc: set sysctl_tipc_rmem and named_timeout right range [ Upstream commit 4bcd4ec1017205644a2697bccbc3b5143f522f5f ] We find that sysctl_tipc_rmem and named_timeout do not have the right minimum setting. sysctl_tipc_rmem should be larger than zero, like sysctl_tcp_rmem. And named_timeout as a timeout setting should be not less than zero. Fixes: cc79dd1ba9c10 ("tipc: change socket buffer overflow control to respect sk_rcvbuf") Fixes: a5325ae5b8bff ("tipc: add name distributor resiliency queue") Signed-off-by: Jie Liu Reported-by: Qiang Ning Reviewed-by: Zhiqiang Liu Reviewed-by: Miaohe Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/sysctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 1a779b1e8510..40f6d82083d7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -37,6 +37,8 @@ #include +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -45,14 +47,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, {} }; From c0d4d3bdf73f868e6afbec16395d1edc448651b0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Apr 2019 10:13:44 -0700 Subject: [PATCH 2593/3715] selftests/ipc: Fix msgque compiler warnings [ Upstream commit a147faa96f832f76e772b1e448e94ea84c774081 ] This fixes the various compiler warnings when building the msgque selftest. The primary change is using sys/msg.h instead of linux/msg.h directly to gain the API declarations. Fixes: 3a665531a3b7 ("selftests: IPC message queue copy feature test") Signed-off-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/ipc/msgque.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index ee9382bdfadc..c5587844fbb8 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include #include #include -#include +#include #include #include "../kselftest.h" @@ -73,7 +74,7 @@ int restore_queue(struct msgque_data *msgque) return 0; destroy: - if (msgctl(id, IPC_RMID, 0)) + if (msgctl(id, IPC_RMID, NULL)) printf("Failed to destroy queue: %d\n", -errno); return ret; } @@ -120,7 +121,7 @@ int check_and_destroy_queue(struct msgque_data *msgque) ret = 0; err: - if (msgctl(msgque->msq_id, IPC_RMID, 0)) { + if (msgctl(msgque->msq_id, IPC_RMID, NULL)) { printf("Failed to destroy queue: %d\n", -errno); return -errno; } @@ -129,7 +130,7 @@ err: int dump_queue(struct msgque_data *msgque) { - struct msqid64_ds ds; + struct msqid_ds ds; int kern_id; int i, ret; @@ -246,7 +247,7 @@ int main(int argc, char **argv) return ksft_exit_pass(); err_destroy: - if (msgctl(msgque.msq_id, IPC_RMID, 0)) { + if (msgctl(msgque.msq_id, IPC_RMID, NULL)) { printf("Failed to destroy queue: %d\n", -errno); return ksft_exit_fail(); } From c707b68465f91759515e09783119a8cc2f802530 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 22 Mar 2019 04:24:37 +0000 Subject: [PATCH 2594/3715] powerpc: vdso: Make vdso32 installation conditional in vdso_install [ Upstream commit ff6d27823f619892ab96f7461764840e0d786b15 ] The 32-bit vDSO is not needed and not normally built for 64-bit little-endian configurations. However, the vdso_install target still builds and installs it. Add the same config condition as is normally used for the build. Fixes: e0d005916994 ("powerpc/vdso: Disable building the 32-bit VDSO ...") Signed-off-by: Ben Hutchings Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 0f04c878113e..9c78ef298257 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -385,7 +385,9 @@ vdso_install: ifeq ($(CONFIG_PPC64),y) $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ endif +ifdef CONFIG_VDSO32 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ +endif archclean: $(Q)$(MAKE) $(clean)=$(boot) From 78f7e2aa4e4549429bb5d4ca8fdfca8c8cd237dd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 12 Apr 2019 02:23:14 +0300 Subject: [PATCH 2595/3715] ARM: dts: ls1021: Fix SGMII PCS link remaining down after PHY disconnect [ Upstream commit c7861adbe37f576931650ad8ef805e0c47564b9a ] Each eTSEC MAC has its own TBI (SGMII) PCS and private MDIO bus. But due to a DTS oversight, both SGMII-compatible MACs of the LS1021 SoC are pointing towards the same internal PCS. Therefore nobody is controlling the internal PCS of eTSEC0. Upon initial ndo_open, the SGMII link is ok by virtue of U-boot initialization. But upon an ifdown/ifup sequence, the code path from ndo_open -> init_phy -> gfar_configure_serdes does not get executed for the PCS of eTSEC0 (and is executed twice for MAC eTSEC1). So the SGMII link remains down for eTSEC0. On the LS1021A-TWR board, to signal this failure condition, the PHY driver keeps printing '803x_aneg_done: SGMII link is not ok'. Also, it changes compatible of mdio0 to "fsl,etsec2-mdio" to match mdio1 device. Fixes: 055223d4d22d ("ARM: dts: ls1021a: Enable the eTSEC ports on QDS and TWR") Signed-off-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Acked-by: Li Yang Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ls1021a-twr.dts | 9 ++++++++- arch/arm/boot/dts/ls1021a.dtsi | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index 44715c8ef756..72a3fc63d0ec 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -143,7 +143,7 @@ }; &enet0 { - tbi-handle = <&tbi1>; + tbi-handle = <&tbi0>; phy-handle = <&sgmii_phy2>; phy-connection-type = "sgmii"; status = "okay"; @@ -222,6 +222,13 @@ sgmii_phy2: ethernet-phy@2 { reg = <0x2>; }; + tbi0: tbi-phy@1f { + reg = <0x1f>; + device_type = "tbi-phy"; + }; +}; + +&mdio1 { tbi1: tbi-phy@1f { reg = <0x1f>; device_type = "tbi-phy"; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 2d20f60947b9..1343c86988c5 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -562,13 +562,22 @@ }; mdio0: mdio@2d24000 { - compatible = "gianfar"; + compatible = "fsl,etsec2-mdio"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; reg = <0x0 0x2d24000 0x0 0x4000>; }; + mdio1: mdio@2d64000 { + compatible = "fsl,etsec2-mdio"; + device_type = "mdio"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x2d64000 0x0 0x4000>, + <0x0 0x2d50030 0x0 0x4>; + }; + ptp_clock@2d10e00 { compatible = "fsl,etsec-ptp"; reg = <0x0 0x2d10e00 0x0 0xb0>; From 62ca24f153245509e1278c8fe62b6ec01f8a3739 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sat, 30 Mar 2019 10:01:32 -0400 Subject: [PATCH 2596/3715] media: ov2659: fix unbalanced mutex_lock/unlock [ Upstream commit 384538bda10913e5c94ec5b5d34bd3075931bcf4 ] Avoid returning with mutex locked. Fixes: fa8cb6444c32 ("[media] ov2659: Don't depend on subdev API") Cc: "Lad, Prabhakar" Signed-off-by: Akinobu Mita Acked-by: Lad, Prabhakar Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov2659.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index 44b0584eb8a6..e7768ed1ff9c 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -1136,7 +1136,7 @@ static int ov2659_set_fmt(struct v4l2_subdev *sd, mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad); *mf = fmt->format; #else - return -ENOTTY; + ret = -ENOTTY; #endif } else { s64 val; From 0d5d07b98b7dbfa0b87c02b5925498d5fd667993 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 08:34:16 +0300 Subject: [PATCH 2597/3715] 6lowpan: Off by one handling ->nexthdr [ Upstream commit f57c4bbf34439531adccd7d3a4ecc14f409c1399 ] NEXTHDR_MAX is 255. What happens here is that we take a u8 value "hdr->nexthdr" from the network and then look it up in lowpan_nexthdr_nhcs[]. The problem is that if hdr->nexthdr is 0xff then we read one element beyond the end of the array so the array needs to be one element larger. Fixes: 92aa7c65d295 ("6lowpan: add generic nhc layer interface") Signed-off-by: Dan Carpenter Acked-by: Jukka Rissanen Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin --- net/6lowpan/nhc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c index 4fa2fdda174d..9e56fb98f33c 100644 --- a/net/6lowpan/nhc.c +++ b/net/6lowpan/nhc.c @@ -18,7 +18,7 @@ #include "nhc.h" static struct rb_root rb_root = RB_ROOT; -static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX]; +static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1]; static DEFINE_SPINLOCK(lowpan_nhc_lock); static int lowpan_nhc_insert(struct lowpan_nhc *nhc) From 2722d16445fab8dde6166f12875e3b93a366a1a8 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 26 Mar 2019 16:05:20 +0200 Subject: [PATCH 2598/3715] dmaengine: axi-dmac: Don't check the number of frames for alignment [ Upstream commit 648865a79d8ee3d1aa64aab5eb2a9d12eeed14f9 ] In 2D transfers (for the AXI DMAC), the number of frames (numf) represents Y_LENGTH, and the length of a frame is X_LENGTH. 2D transfers are useful for video transfers where screen resolutions ( X * Y ) are typically aligned for X, but not for Y. There is no requirement for Y_LENGTH to be aligned to the bus-width (or anything), and this is also true for AXI DMAC. Checking the Y_LENGTH for alignment causes false errors when initiating DMA transfers. This change fixes this by checking only that the Y_LENGTH is non-zero. Fixes: 0e3b67b348b8 ("dmaengine: Add support for the Analog Devices AXI-DMAC DMA controller") Signed-off-by: Alexandru Ardelean Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dma-axi-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index 7f0b9aa15867..9887f2a14aa9 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -451,7 +451,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( if (chan->hw_2d) { if (!axi_dmac_check_len(chan, xt->sgl[0].size) || - !axi_dmac_check_len(chan, xt->numf)) + xt->numf == 0) return NULL; if (xt->sgl[0].size + dst_icg > chan->max_length || xt->sgl[0].size + src_icg > chan->max_length) From a1a19d86859db27a62762d9f6b49c80b85b29316 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Apr 2019 13:00:03 +0200 Subject: [PATCH 2599/3715] ALSA: usb-audio: Handle the error from snd_usb_mixer_apply_create_quirk() [ Upstream commit 328e9f6973be2ee67862cb17bf6c0c5c5918cd72 ] The error from snd_usb_mixer_apply_create_quirk() is ignored in the current usb-audio driver code, which will continue the probing even after the error. Let's take it more serious. Fixes: 7b1eda223deb ("ALSA: usb-mixer: factor out quirks") Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 044193b2364d..e6e4c3b9d9d3 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2632,7 +2632,9 @@ int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif, (err = snd_usb_mixer_status_create(mixer)) < 0) goto _error; - snd_usb_mixer_apply_create_quirk(mixer); + err = snd_usb_mixer_apply_create_quirk(mixer); + if (err < 0) + goto _error; err = snd_device_new(chip->card, SNDRV_DEV_CODEC, mixer, &dev_ops); if (err < 0) From a5553f27402b64e7164d7ddf7b87bc5d48042433 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:02 -0400 Subject: [PATCH 2600/3715] NFS: Don't interrupt file writeout due to fatal errors [ Upstream commit 14bebe3c90b326d2a0df78aed5e9de090c71d878 ] When flushing out dirty pages, the fact that we may hit fatal errors is not a reason to stop writeback. Those errors are reported through fsync(), not through the flush mechanism. Fixes: a6598813a4c5b ("NFS: Don't write back further requests if there...") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 01b9d9341b54..ed3f5afc4ff7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -643,7 +643,7 @@ out: return ret; out_launder: nfs_write_error_remove_page(req); - return ret; + return 0; } static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, From ded647fdda63e9dd50d51f1303768e673e6339c1 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Mon, 8 Apr 2019 22:01:03 +0800 Subject: [PATCH 2601/3715] irqchip/gic-v3-its: fix some definitions of inner cacheability attributes [ Upstream commit 0f29456d08042134aff6e562d07a6365c841c4ad ] Some definitions of Inner Cacheability attibutes need to be corrected. Fixes: 8c828a535e29f ("irqchip/gicv3-its: Restore all cacheability attributes") Signed-off-by: Hongbo Yao Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- include/linux/irqchip/arm-gic-v3.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 845ff8c51564..0fe1fdedb8a1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -152,7 +152,7 @@ #define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) #define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) #define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) -#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) #define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) #define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) #define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) @@ -179,7 +179,7 @@ #define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) #define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) #define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) -#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) #define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) #define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) #define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) @@ -238,7 +238,7 @@ #define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) #define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) #define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) -#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) #define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) #define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) #define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) @@ -264,7 +264,7 @@ #define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) #define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) #define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) -#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) #define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) #define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) #define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) @@ -337,7 +337,7 @@ #define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) #define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) #define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) -#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) #define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) #define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) #define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) @@ -361,7 +361,7 @@ #define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) #define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) #define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) -#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) #define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) #define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) #define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) From ab353598ceed8af45a9a83bf50b5e2056733742c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 17 Apr 2019 14:44:11 -0700 Subject: [PATCH 2602/3715] scsi: qla2xxx: Fix a format specifier [ Upstream commit 19ce192cd718e02f880197c0983404ca48236807 ] Since mcmd->sess->port_name is eight bytes long, use %8phC to format that port name instead of %phC. Cc: Himanshu Madhani Cc: Giridhar Malavali Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") # v4.11. Signed-off-by: Bart Van Assche Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 55227d20496a..1000422ef4f8 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2179,7 +2179,7 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode == ELS_TPRLO) { ql_dbg(ql_dbg_disc, vha, 0x2106, - "TM response logo %phC status %#x state %#x", + "TM response logo %8phC status %#x state %#x", mcmd->sess->port_name, mcmd->fc_tm_rsp, mcmd->flags); qlt_schedule_sess_for_deletion_lock(mcmd->sess); From 6a75405369e9cdb7509dd760c0213154e819c353 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 17 Apr 2019 14:44:42 -0700 Subject: [PATCH 2603/3715] scsi: qla2xxx: Avoid that qlt_send_resp_ctio() corrupts memory [ Upstream commit a861b49273578e255426a499842cf7f465456351 ] The "(&ctio->u.status1.sense_data)[i]" where i >= 0 expressions in qlt_send_resp_ctio() are probably typos and should have been "(&ctio->u.status1.sense_data[4 * i])" instead. Instead of only fixing these typos, modify the code for storing sense data such that it becomes easy to read. This patch fixes a Coverity complaint about accessing an array outside its bounds. Cc: Himanshu Madhani Cc: Giridhar Malavali Fixes: be25152c0d9e ("qla2xxx: Improve T10-DIF/PI handling in driver.") # v4.11. Signed-off-by: Bart Van Assche Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 1000422ef4f8..21011c5fddeb 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2122,14 +2122,14 @@ void qlt_send_resp_ctio(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, ctio->u.status1.scsi_status |= cpu_to_le16(SS_RESIDUAL_UNDER); - /* Response code and sense key */ - put_unaligned_le32(((0x70 << 24) | (sense_key << 8)), - (&ctio->u.status1.sense_data)[0]); + /* Fixed format sense data. */ + ctio->u.status1.sense_data[0] = 0x70; + ctio->u.status1.sense_data[2] = sense_key; /* Additional sense length */ - put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]); + ctio->u.status1.sense_data[7] = 0xa; /* ASC and ASCQ */ - put_unaligned_le32(((asc << 24) | (ascq << 16)), - (&ctio->u.status1.sense_data)[3]); + ctio->u.status1.sense_data[12] = asc; + ctio->u.status1.sense_data[13] = ascq; /* Memory Barrier */ wmb(); From dd1ce3f27ba05819659b28a41d6885cac1cdca0d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Apr 2019 11:46:55 -0400 Subject: [PATCH 2604/3715] packet: in recvmsg msg_name return at least sizeof sockaddr_ll [ Upstream commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c ] Packet send checks that msg_name is at least sizeof sockaddr_ll. Packet recv must return at least this length, so that its output can be passed unmodified to packet send. This ceased to be true since adding support for lladdr longer than sll_addr. Since, the return value uses true address length. Always return at least sizeof sockaddr_ll, even if address length is shorter. Zero the padding bytes. Change v1->v2: do not overwrite zeroed padding again. use copy_len. Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.") Suggested-by: David Laight Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/packet/af_packet.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4e1058159b08..e788f9c7c398 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3407,20 +3407,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { From 0075f99fb42ddcf77c527740fa9503bbad0f98d0 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 29 Apr 2019 11:47:49 +0200 Subject: [PATCH 2605/3715] ASoC: fix valid stream condition [ Upstream commit 6a7c59c6d9f3b280e81d7a04bbe4e55e90152dce ] A stream may specify a rate range using 'rate_min' and 'rate_max', so a stream may be valid and not specify any rates. However, as stream cannot be valid and not have any channel. Let's use this condition instead to determine if a stream is valid or not. Fixes: cde79035c6cf ("ASoC: Handle multiple codecs with split playback / capture") Signed-off-by: Jerome Brunet Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 24047375c2fb..70e1a60a2e98 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -48,8 +48,8 @@ static bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream) else codec_stream = &dai->driver->capture; - /* If the codec specifies any rate at all, it supports the stream. */ - return codec_stream->rates; + /* If the codec specifies any channels at all, it supports the stream */ + return codec_stream->channels_min; } /** From 152920f6c6fe9fa91e837dc99b34b607b10d4958 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Apr 2019 14:25:32 +0200 Subject: [PATCH 2606/3715] usb: gadget: fsl: fix link error against usb-gadget module [ Upstream commit 2100e3ca3676e894fa48b8f6f01d01733387fe81 ] The dependency to ensure this driver links correctly fails since it can not be a loadable module: drivers/usb/phy/phy-fsl-usb.o: In function `fsl_otg_set_peripheral': phy-fsl-usb.c:(.text+0x2224): undefined reference to `usb_gadget_vbus_disconnect' Make the option 'tristate' so it can work correctly. Fixes: 5a8d651a2bde ("usb: gadget: move gadget API functions to udc-core") Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 85a92d0813dd..440238061edd 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -20,7 +20,7 @@ config AB8500_USB in host mode, low speed. config FSL_USB2_OTG - bool "Freescale USB OTG Transceiver Driver" + tristate "Freescale USB OTG Transceiver Driver" depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY From 1e5c78fb5b18af8d51433cb74358979a0f13a36e Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Fri, 22 Feb 2019 15:49:19 +0400 Subject: [PATCH 2607/3715] dwc2: gadget: Fix completed transfer size calculation in DDMA [ Upstream commit 5acb4b970184d189d901192d075997c933b82260 ] Fix calculation of transfer size on completion in function dwc2_gadget_get_xfersize_ddma(). Added increment of descriptor pointer to move to next descriptor in the loop. Fixes: aa3e8bc81311 ("usb: dwc2: gadget: DDMA transfer start and complete") Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc2/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e164439b2154..4af9a1c652ed 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2276,6 +2276,7 @@ static unsigned int dwc2_gadget_get_xfersize_ddma(struct dwc2_hsotg_ep *hs_ep) if (status & DEV_DMA_STS_MASK) dev_err(hsotg->dev, "descriptor %d closed with %x\n", i, status & DEV_DMA_STS_MASK); + desc++; } return bytes_rem; From 35fe8691b18701ac0ff6fc7efd601f7bb930bab0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 1 May 2019 08:38:30 +0300 Subject: [PATCH 2608/3715] IB/mlx5: Add missing XRC options to QP optional params mask [ Upstream commit 8f4426aa19fcdb9326ac44154a117b1a3a5ae126 ] The QP transition optional parameters for the various transition for XRC QPs are identical to those for RC QPs. Many of the XRC QP transition optional parameter bits are missing from the QP optional mask table. These omissions caused failures when doing XRC QP state transitions. For example, when trying to change the response timer of an XRC receive QP via the RTS2RTS transition, the new timer value was ignored because MLX5_QP_OPTPAR_RNR_TIMEOUT bit was missing from the optional params mask for XRC qps for the RTS2RTS transition. Fix this by adding the missing XRC optional parameters for all QP transitions to the opt_mask table. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Fixes: a4774e9095de ("IB/mlx5: Fix opt param mask according to firmware spec") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5a7dcb5afe6e..84c962820aa2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2357,6 +2357,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | @@ -2390,6 +2395,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, }, }, [MLX5_QP_STATE_RTS] = { @@ -2406,6 +2417,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, }, }, [MLX5_QP_STATE_SQER] = { @@ -2417,6 +2434,10 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; From feed4aa653cf0bdf4f1b5cb8ae80ee5a8186840b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 2 May 2019 09:34:26 +0800 Subject: [PATCH 2609/3715] iommu/vt-d: Make kernel parameter igfx_off work with vIOMMU [ Upstream commit 5daab58043ee2bca861068e2595564828f3bc663 ] The kernel parameter igfx_off is used by users to disable DMA remapping for the Intel integrated graphic device. It was designed for bare metal cases where a dedicated IOMMU is used for graphic. This doesn't apply to virtual IOMMU case where an include-all IOMMU is used. This makes the kernel parameter work with virtual IOMMU as well. Cc: Ashok Raj Cc: Jacob Pan Suggested-by: Kevin Tian Fixes: c0771df8d5297 ("intel-iommu: Export a flag indicating that the IOMMU is used for iGFX.") Signed-off-by: Lu Baolu Tested-by: Zhenyu Wang Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 523d0889c2a4..4fbd183d973a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3361,9 +3361,12 @@ static int __init init_dmars(void) iommu_identity_mapping |= IDENTMAP_ALL; #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - iommu_identity_mapping |= IDENTMAP_GFX; + dmar_map_gfx = 0; #endif + if (!dmar_map_gfx) + iommu_identity_mapping |= IDENTMAP_GFX; + check_tylersburg_isoch(); if (iommu_identity_mapping) { From 44d443842c6a5c4b8a5ca3cd1748a63f13541052 Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Wed, 1 May 2019 16:47:03 +0300 Subject: [PATCH 2610/3715] net: ena: fix swapped parameters when calling ena_com_indirect_table_fill_entry [ Upstream commit 3c6eeff295f01bdf1c6c3addcb0a04c0c6c029e9 ] second parameter should be the index of the table rather than the value. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Saeed Bshara Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 967020fb26ee..a2f02c23fe14 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -694,8 +694,8 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, if (indir) { for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { rc = ena_com_indirect_table_fill_entry(ena_dev, - ENA_IO_RXQ_IDX(indir[i]), - i); + i, + ENA_IO_RXQ_IDX(indir[i])); if (unlikely(rc)) { netif_err(adapter, drv, netdev, "Cannot fill indirect table (index is too large)\n"); From 7799cd5d6b0efea6dbad635738b9049362810457 Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Wed, 1 May 2019 16:47:05 +0300 Subject: [PATCH 2611/3715] net: ena: fix: Free napi resources when ena_up() fails [ Upstream commit b287cdbd1cedfc9606682c6e02b58d00ff3a33ae ] ena_up() calls ena_init_napi() but does not call ena_del_napi() in case of failure. This causes a segmentation fault upon rmmod when netif_napi_del() is called. Fix this bug by calling ena_del_napi() before returning error from ena_up(). Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d22b138c2b09..518ff393a026 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1796,6 +1796,7 @@ err_setup_rx: err_setup_tx: ena_free_io_irq(adapter); err_req_irq: + ena_del_napi(adapter); return rc; } From 086815f78499514e5d532df6343aadd65ed2a247 Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Wed, 1 May 2019 16:47:06 +0300 Subject: [PATCH 2612/3715] net: ena: fix incorrect test of supported hash function [ Upstream commit d3cfe7ddbc3dfbb9b201615b7fef8fd66d1b5fe8 ] ena_com_set_hash_function() tests if a hash function is supported by the device before setting it. The test returns the opposite result than needed. Reverse the condition to return the correct value. Also use the BIT macro instead of inline shift. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 1a4ffc5d3da4..011b54c541aa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2002,7 +2002,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) if (unlikely(ret)) return ret; - if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) { pr_err("Func hash %d isn't supported by device, abort\n", rss->hash_func); return -EOPNOTSUPP; From 709f31c4b346c16ff25829e908bde81f9d7f3f7d Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Wed, 1 May 2019 16:47:09 +0300 Subject: [PATCH 2613/3715] net: ena: fix ena_com_fill_hash_function() implementation [ Upstream commit 11bd7a00c0d8ffe33d1e926f8e789b4aea787186 ] ena_com_fill_hash_function() didn't configure the rss->hash_func. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Netanel Belgazal Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 011b54c541aa..10e6053f6671 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2087,6 +2087,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EINVAL; } + rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); /* Restore the old function */ From 5131126c746631b34fcd001594b4c1c73dbc425a Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Thu, 2 May 2019 18:25:17 +0530 Subject: [PATCH 2614/3715] dmaengine: tegra210-adma: restore channel status [ Upstream commit f33e7bb3eb922618612a90f0a828c790e8880773 ] Status of ADMA channel registers is not saved and restored during system suspend. During active playback if system enters suspend, this results in wrong state of channel registers during system resume and playback fails to resume properly. Fix this by saving following channel registers in runtime suspend and restore during runtime resume. * ADMA_CH_LOWER_SRC_ADDR * ADMA_CH_LOWER_TRG_ADDR * ADMA_CH_FIFO_CTRL * ADMA_CH_CONFIG * ADMA_CH_CTRL * ADMA_CH_CMD * ADMA_CH_TC Runtime PM calls will be inovked during system resume path if a playback or capture needs to be resumed. Hence above changes work fine for system suspend case. Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA") Signed-off-by: Sameer Pujar Reviewed-by: Jon Hunter Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/tegra210-adma.c | 46 ++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 09b6756366c3..ac2a6b800db3 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -98,6 +98,7 @@ struct tegra_adma_chan_regs { unsigned int src_addr; unsigned int trg_addr; unsigned int fifo_ctrl; + unsigned int cmd; unsigned int tc; }; @@ -127,6 +128,7 @@ struct tegra_adma_chan { enum dma_transfer_direction sreq_dir; unsigned int sreq_index; bool sreq_reserved; + struct tegra_adma_chan_regs ch_regs; /* Transfer count and position info */ unsigned int tx_buf_count; @@ -635,8 +637,30 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, static int tegra_adma_runtime_suspend(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int i; tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + if (!tdma->global_cmd) + goto clk_disable; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD); + /* skip if channel is not active */ + if (!ch_reg->cmd) + continue; + ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC); + ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR); + ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR); + ch_reg->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL); + ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL); + ch_reg->config = tdma_ch_read(tdc, ADMA_CH_CONFIG); + } + +clk_disable: clk_disable_unprepare(tdma->ahub_clk); return 0; @@ -645,7 +669,9 @@ static int tegra_adma_runtime_suspend(struct device *dev) static int tegra_adma_runtime_resume(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); - int ret; + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int ret, i; ret = clk_prepare_enable(tdma->ahub_clk); if (ret) { @@ -654,6 +680,24 @@ static int tegra_adma_runtime_resume(struct device *dev) } tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + if (!tdma->global_cmd) + return 0; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + /* skip if channel was not active earlier */ + if (!ch_reg->cmd) + continue; + tdma_ch_write(tdc, ADMA_CH_TC, ch_reg->tc); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_reg->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_reg->trg_addr); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_reg->ctrl); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_reg->config); + tdma_ch_write(tdc, ADMA_CH_CMD, ch_reg->cmd); + } + return 0; } From acab21ffa8c0951f2180f80184c921fa4799c786 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 17 Apr 2019 16:28:37 +0800 Subject: [PATCH 2615/3715] mmc: core: fix possible use after free of host [ Upstream commit 8e1943af2986db42bee2b8dddf49a36cdb2e9219 ] In the function mmc_alloc_host, the function put_device is called to release allocated resources when mmc_gpio_alloc fails. Finally, the function pointed by host->class_dev.class->dev_release (i.e., mmc_host_classdev_release) is used to release resources including the host structure. However, after put_device, host is used and released again. Resulting in a use-after-free bug. Fixes: 1ed217194488 ("mmc: core: fix error path in mmc_host_alloc") Signed-off-by: Pan Bian Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/host.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index ad88deb2e8f3..3740fb0052a4 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -376,8 +376,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) if (mmc_gpio_alloc(host)) { put_device(&host->class_dev); - ida_simple_remove(&mmc_host_ida, host->index); - kfree(host); return NULL; } From 1dc93fb4fe6690531f499e99f7eb2031ad1202c5 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Sat, 4 May 2019 20:37:59 +0200 Subject: [PATCH 2616/3715] lightnvm: pblk: fix lock order in pblk_rb_tear_down_check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 486b5aac85f6ec0b2df3e82a6a629d5eb7804db5 ] In pblk_rb_tear_down_check() the spinlock functions are not called in proper order. Fixes: a4bd217 ("lightnvm: physical block device (pblk) target") Signed-off-by: Igor Konopko Reviewed-by: Javier González Reviewed-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/lightnvm/pblk-rb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index c0dd17a82170..73de2deaba67 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -825,8 +825,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb) } out: - spin_unlock(&rb->w_lock); spin_unlock_irq(&rb->s_lock); + spin_unlock(&rb->w_lock); return ret; } From ae04bb451d966c993d7afe9ed0e8655810805801 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 May 2019 13:27:09 +0100 Subject: [PATCH 2617/3715] afs: Fix the afs.cell and afs.volume xattr handlers [ Upstream commit c73aa4102f5b9f261a907c3b3df94cd2c478504d ] Fix the ->get handlers for the afs.cell and afs.volume xattrs to pass the source data size to memcpy() rather than target buffer size. Overcopying the source data occasionally causes the kernel to oops. Fixes: d3e3b7eac886 ("afs: Add metadata xattrs") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/afs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 2830e4f48d85..7c6b62a94e7e 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -50,7 +50,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler, return namelen; if (namelen > size) return -ERANGE; - memcpy(buffer, cell->name, size); + memcpy(buffer, cell->name, namelen); return namelen; } @@ -104,7 +104,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler, return namelen; if (namelen > size) return -ERANGE; - memcpy(buffer, volname, size); + memcpy(buffer, volname, namelen); return namelen; } From 2d464b0246a08f2a03c4c5c3e7fc8cc226f77064 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 Apr 2019 17:49:28 -0500 Subject: [PATCH 2618/3715] vfio/mdev: Avoid release parent reference during error path [ Upstream commit 60e7f2c3fe9919cee9534b422865eed49f4efb15 ] During mdev parent registration in mdev_register_device(), if parent device is duplicate, it releases the reference of existing parent device. This is incorrect. Existing parent device should not be touched. Fixes: 7b96953bc640 ("vfio: Mediated device Core driver") Reviewed-by: Cornelia Huck Reviewed-by: Kirti Wankhede Reviewed-by: Maxim Levitsky Signed-off-by: Parav Pandit Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/mdev/mdev_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 0212f0ee8aea..8cfa71230877 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -182,6 +182,7 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) /* Check for duplicate */ parent = __find_parent_device(dev); if (parent) { + parent = NULL; ret = -EEXIST; goto add_dev_err; } From f736690af39473dce29705e4a75a44c9fe242e21 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 Apr 2019 17:49:33 -0500 Subject: [PATCH 2619/3715] vfio/mdev: Fix aborting mdev child device removal if one fails [ Upstream commit 6093e348a5e2475c5bb2e571346460f939998670 ] device_for_each_child() stops executing callback function for remaining child devices, if callback hits an error. Each child mdev device is independent of each other. While unregistering parent device, mdev core must remove all child mdev devices. Therefore, mdev_device_remove_cb() always returns success so that device_for_each_child doesn't abort if one child removal hits error. While at it, improve remove and unregister functions for below simplicity. There isn't need to pass forced flag pointer during mdev parent removal which invokes mdev_device_remove(). So simplify the flow. mdev_device_remove() is called from two paths. 1. mdev_unregister_driver() mdev_device_remove_cb() mdev_device_remove() 2. remove_store() mdev_device_remove() Fixes: 7b96953bc640 ("vfio: Mediated device Core driver") Reviewed-by: Maxim Levitsky Signed-off-by: Parav Pandit Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/mdev/mdev_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 8cfa71230877..e052f62fdea7 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove) static int mdev_device_remove_cb(struct device *dev, void *data) { - if (!dev_is_mdev(dev)) - return 0; + if (dev_is_mdev(dev)) + mdev_device_remove(dev, true); - return mdev_device_remove(dev, data ? *(bool *)data : true); + return 0; } /* @@ -241,7 +241,6 @@ EXPORT_SYMBOL(mdev_register_device); void mdev_unregister_device(struct device *dev) { struct mdev_parent *parent; - bool force_remove = true; mutex_lock(&parent_list_lock); parent = __find_parent_device(dev); @@ -255,8 +254,7 @@ void mdev_unregister_device(struct device *dev) list_del(&parent->next); class_compat_remove_link(mdev_bus_compat_class, dev, NULL); - device_for_each_child(dev, (void *)&force_remove, - mdev_device_remove_cb); + device_for_each_child(dev, NULL, mdev_device_remove_cb); parent_remove_sysfs_files(parent); From cbd028fdf2ad4edaab71243346a5ba11814d8e3f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 6 May 2019 22:44:04 +0800 Subject: [PATCH 2620/3715] l2tp: Fix possible NULL pointer dereference [ Upstream commit 638a3a1e349ddf5b82f222ff5cb3b4f266e7c278 ] BUG: unable to handle kernel NULL pointer dereference at 0000000000000128 PGD 0 P4D 0 Oops: 0000 [#1 CPU: 0 PID: 5697 Comm: modprobe Tainted: G W 5.1.0-rc7+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:__lock_acquire+0x53/0x10b0 Code: 8b 1c 25 40 5e 01 00 4c 8b 6d 10 45 85 e4 0f 84 bd 06 00 00 44 8b 1d 7c d2 09 02 49 89 fe 41 89 d2 45 85 db 0f 84 47 02 00 00 <48> 81 3f a0 05 70 83 b8 00 00 00 00 44 0f 44 c0 83 fe 01 0f 86 3a RSP: 0018:ffffc90001c07a28 EFLAGS: 00010002 RAX: 0000000000000000 RBX: ffff88822f038440 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000128 RBP: ffffc90001c07a88 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000001 R13: 0000000000000000 R14: 0000000000000128 R15: 0000000000000000 FS: 00007fead0811540(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000128 CR3: 00000002310da000 CR4: 00000000000006f0 Call Trace: ? __lock_acquire+0x24e/0x10b0 lock_acquire+0xdf/0x230 ? flush_workqueue+0x71/0x530 flush_workqueue+0x97/0x530 ? flush_workqueue+0x71/0x530 l2tp_exit_net+0x170/0x2b0 [l2tp_core ? l2tp_exit_net+0x93/0x2b0 [l2tp_core ops_exit_list.isra.6+0x36/0x60 unregister_pernet_operations+0xb8/0x110 unregister_pernet_device+0x25/0x40 l2tp_init+0x55/0x1000 [l2tp_core ? 0xffffffffa018d000 do_one_initcall+0x6c/0x3cc ? do_init_module+0x22/0x1f1 ? rcu_read_lock_sched_held+0x97/0xb0 ? kmem_cache_alloc_trace+0x325/0x3b0 do_init_module+0x5b/0x1f1 load_module+0x1db1/0x2690 ? m_show+0x1d0/0x1d0 __do_sys_finit_module+0xc5/0xd0 __x64_sys_finit_module+0x15/0x20 do_syscall_64+0x6b/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fead031a839 Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe8d9acca8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 0000560078398b80 RCX: 00007fead031a839 RDX: 0000000000000000 RSI: 000056007659dc2e RDI: 0000000000000003 RBP: 000056007659dc2e R08: 0000000000000000 R09: 0000560078398b80 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 00005600783a04a0 R14: 0000000000040000 R15: 0000560078398b80 Modules linked in: l2tp_core(+) e1000 ip_tables ipv6 [last unloaded: l2tp_core CR2: 0000000000000128 ---[ end trace 8322b2b8bf83f8e1 If alloc_workqueue fails in l2tp_init, l2tp_net_ops is unregistered on failure path. Then l2tp_exit_net is called which will flush NULL workqueue, this patch add a NULL check to fix it. Fixes: 67e04c29ec0d ("l2tp: unregister l2tp_net_ops on failure path") Signed-off-by: YueHaibing Acked-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e494f04819e9..b9be0360ab94 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1892,7 +1892,8 @@ static __net_exit void l2tp_exit_net(struct net *net) } rcu_read_unlock_bh(); - flush_workqueue(l2tp_wq); + if (l2tp_wq) + flush_workqueue(l2tp_wq); rcu_barrier(); } From ecad62adbecc3dde19a5ed239e16dbb76b958bcd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 11 Apr 2019 05:01:57 -0400 Subject: [PATCH 2621/3715] media: omap_vout: potential buffer overflow in vidioc_dqbuf() [ Upstream commit dd6e2a981bfe83aa4a493143fd8cf1edcda6c091 ] The "b->index" is a u32 the comes from the user in the ioctl. It hasn't been checked. We aren't supposed to use it but we're instead supposed to use the value that gets written to it when we call videobuf_dqbuf(). The videobuf_dqbuf() first memsets it to zero and then re-initializes it inside the videobuf_status() function. It's this final value which we want. Hans Verkuil pointed out that we need to check the return from videobuf_dqbuf(). I ended up doing a little cleanup related to that as well. Fixes: 72915e851da9 ("[media] V4L2: OMAP: VOUT: dma map and unmap v4l2 buffers in qbuf and dqbuf") Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/omap/omap_vout.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 4d29860d27b4..18604b608ab2 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -1527,23 +1527,20 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b) unsigned long size; struct videobuf_buffer *vb; - vb = q->bufs[b->index]; - if (!vout->streaming) return -EINVAL; - if (file->f_flags & O_NONBLOCK) - /* Call videobuf_dqbuf for non blocking mode */ - ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 1); - else - /* Call videobuf_dqbuf for blocking mode */ - ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 0); + ret = videobuf_dqbuf(q, b, !!(file->f_flags & O_NONBLOCK)); + if (ret) + return ret; + + vb = q->bufs[b->index]; addr = (unsigned long) vout->buf_phy_addr[vb->i]; size = (unsigned long) vb->size; dma_unmap_single(vout->vid_dev->v4l2_dev.dev, addr, size, DMA_TO_DEVICE); - return ret; + return 0; } static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type i) From a2b94c244825fb80e03a83c1d29522e6265cddef Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Apr 2019 05:46:27 -0400 Subject: [PATCH 2622/3715] media: davinci/vpbe: array underflow in vpbe_enum_outputs() [ Upstream commit b72845ee5577b227131b1fef23f9d9a296621d7b ] In vpbe_enum_outputs() we check if (temp_index >= cfg->num_outputs) but the problem is that "temp_index" can be negative. This patch changes the types to unsigned to address this array underflow bug. Fixes: 66715cdc3224 ("[media] davinci vpbe: VPBE display driver") Signed-off-by: Dan Carpenter Acked-by: "Lad, Prabhakar" Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/davinci/vpbe.c | 2 +- include/media/davinci/vpbe.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/davinci/vpbe.c b/drivers/media/platform/davinci/vpbe.c index 1d3c13e36904..915af9ca4711 100644 --- a/drivers/media/platform/davinci/vpbe.c +++ b/drivers/media/platform/davinci/vpbe.c @@ -126,7 +126,7 @@ static int vpbe_enum_outputs(struct vpbe_device *vpbe_dev, struct v4l2_output *output) { struct vpbe_config *cfg = vpbe_dev->cfg; - int temp_index = output->index; + unsigned int temp_index = output->index; if (temp_index >= cfg->num_outputs) return -EINVAL; diff --git a/include/media/davinci/vpbe.h b/include/media/davinci/vpbe.h index 79a566d7defd..180a05e91497 100644 --- a/include/media/davinci/vpbe.h +++ b/include/media/davinci/vpbe.h @@ -92,7 +92,7 @@ struct vpbe_config { struct encoder_config_info *ext_encoders; /* amplifier information goes here */ struct amp_config_info *amp; - int num_outputs; + unsigned int num_outputs; /* Order is venc outputs followed by LCD and then external encoders */ struct vpbe_output *outputs; }; From a21d4ba48183c5c498cbdea1f64e4dbb2bad5a7f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Apr 2019 12:44:18 +0300 Subject: [PATCH 2623/3715] platform/x86: alienware-wmi: printing the wrong error code [ Upstream commit 6d1f8b3d75419a8659ac916a1e9543bb3513a882 ] The "out_data" variable is uninitialized at the point. Originally, this used to print "status" instead and that seems like the correct thing to print. Fixes: bc2ef884320b ("alienware-wmi: For WMAX HDMI method, introduce a way to query HDMI cable status") Signed-off-by: Dan Carpenter Reviewed-by: Mario Limonciello Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/alienware-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c index cbd84e2e3bd4..2c82188f8486 100644 --- a/drivers/platform/x86/alienware-wmi.c +++ b/drivers/platform/x86/alienware-wmi.c @@ -570,7 +570,7 @@ static ssize_t show_hdmi_source(struct device *dev, return scnprintf(buf, PAGE_SIZE, "input [gpu] unknown\n"); } - pr_err("alienware-wmi: unknown HDMI source status: %d\n", out_data); + pr_err("alienware-wmi: unknown HDMI source status: %u\n", status); return scnprintf(buf, PAGE_SIZE, "input gpu [unknown]\n"); } From b98958074cae36b6bc6b4267b06af266bc3ea511 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Tue, 7 May 2019 16:37:03 +0300 Subject: [PATCH 2624/3715] crypto: caam - fix caam_dump_sg that iterates through scatterlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8c65d35435e8cbfdf953cafe5ebe3648ee9276a2 ] Fix caam_dump_sg by correctly determining the next scatterlist entry in the list. Fixes: 5ecf8ef9103c ("crypto: caam - fix sg dump") Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/caam/error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 8da88beb1abb..832ba2afdcd5 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -22,7 +22,7 @@ void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, size_t len; void *buf; - for (it = sg; it && tlen > 0 ; it = sg_next(sg)) { + for (it = sg; it && tlen > 0 ; it = sg_next(it)) { /* * make sure the scatterlist's page * has a valid virtual memory mapping From eba68981c6eb2246198d6fe8f5f959ab2ca982ff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 5 May 2019 18:47:33 +0200 Subject: [PATCH 2625/3715] netfilter: ebtables: CONFIG_COMPAT: reject trailing data after last rule [ Upstream commit 680f6af5337c98d116e4f127cea7845339dba8da ] If userspace provides a rule blob with trailing data after last target, we trigger a splat, then convert ruleset to 64bit format (with trailing data), then pass that to do_replace_finish() which then returns -EINVAL. Erroring out right away avoids the splat plus unneeded translation and error unwind. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-by: Tetsuo Handa Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 35a670ec9077..a1834ad7422c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2164,7 +2164,9 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user, if (ret < 0) return ret; - WARN_ON(size_remaining); + if (size_remaining) + return -EINVAL; + return state->buf_kern_offset; } From 220461f5537b050ee0fc4ff129e0e68dd60fd7f6 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 1 Apr 2019 20:18:16 +0200 Subject: [PATCH 2626/3715] pwm: meson: Consider 128 a valid pre-divider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51496e4446875726d50a5617a6e0e0dabbc2e6da ] The pre-divider allows configuring longer PWM periods compared to using the input clock directly. The pre-divider is 7 bit wide, meaning it's maximum value is 128 (the register value is off-by-one: 0x7f or 127). Change the loop to also allow for the maximum possible value to be considered valid. Fixes: 211ed630753d2f ("pwm: Add support for Meson PWM Controller") Signed-off-by: Martin Blumenstingl Acked-by: Uwe Kleine-König Reviewed-by: Neil Armstrong Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-meson.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 9b79cbc7a715..9551f896dd6f 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -188,7 +188,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, do_div(fin_ps, fin_freq); /* Calc pre_div with the period */ - for (pre_div = 0; pre_div < MISC_CLK_DIV_MASK; pre_div++) { + for (pre_div = 0; pre_div <= MISC_CLK_DIV_MASK; pre_div++) { cnt = DIV_ROUND_CLOSEST_ULL((u64)period * 1000, fin_ps * (pre_div + 1)); dev_dbg(meson->chip.dev, "fin_ps=%llu pre_div=%u cnt=%u\n", @@ -197,7 +197,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, break; } - if (pre_div == MISC_CLK_DIV_MASK) { + if (pre_div > MISC_CLK_DIV_MASK) { dev_err(meson->chip.dev, "unable to get period pre_div\n"); return -EINVAL; } From 6f6200cb6dc2fb7a55a218e23806c140fc89ad2e Mon Sep 17 00:00:00 2001 From: Bichao Zheng Date: Mon, 1 Apr 2019 20:18:17 +0200 Subject: [PATCH 2627/3715] pwm: meson: Don't disable PWM when setting duty repeatedly [ Upstream commit a279345807e1e0ae79567a52cfdd9d30c9174a3c ] There is an abnormally low about 20ms,when setting duty repeatedly. Because setting the duty will disable PWM and then enable. Delete this operation now. Fixes: 211ed630753d2f ("pwm: Add support for Meson PWM Controller") Signed-off-by: Bichao Zheng [ Dropped code instead of hiding it behind a comment ] Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-meson.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 9551f896dd6f..3d2c36963a4f 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -325,11 +325,6 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->period != channel->state.period || state->duty_cycle != channel->state.duty_cycle || state->polarity != channel->state.polarity) { - if (channel->state.enabled) { - meson_pwm_disable(meson, pwm->hwpwm); - channel->state.enabled = false; - } - if (state->polarity != channel->state.polarity) { if (state->polarity == PWM_POLARITY_NORMAL) meson->inverter_mask |= BIT(pwm->hwpwm); From 1229ccad5380c4b97874bdb4d9e6c7d639417151 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 27 Apr 2019 22:43:49 +0100 Subject: [PATCH 2628/3715] ARM: riscpc: fix lack of keyboard interrupts after irq conversion [ Upstream commit 63a0666bca9311f35017be454587f3ba903644b8 ] Fix lack of keyboard interrupts for RiscPC due to incorrect conversion. Fixes: e8d36d5dbb6a ("ARM: kill off set_irq_flags usage") Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mach-rpc/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-rpc/irq.c b/arch/arm/mach-rpc/irq.c index b8a61cb11207..7f0f40178634 100644 --- a/arch/arm/mach-rpc/irq.c +++ b/arch/arm/mach-rpc/irq.c @@ -118,7 +118,7 @@ extern unsigned char rpc_default_fiq_start, rpc_default_fiq_end; void __init rpc_init_irq(void) { - unsigned int irq, clr, set = 0; + unsigned int irq, clr, set; iomd_writeb(0, IOMD_IRQMASKA); iomd_writeb(0, IOMD_IRQMASKB); @@ -130,6 +130,7 @@ void __init rpc_init_irq(void) for (irq = 0; irq < NR_IRQS; irq++) { clr = IRQ_NOREQUEST; + set = 0; if (irq <= 6 || (irq >= 9 && irq <= 15)) clr |= IRQ_NOPROBE; From 67b69c426a1c8d5a5e771c7ff1348e1bd3803719 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 May 2019 15:50:18 +0300 Subject: [PATCH 2629/3715] kdb: do a sanity check on the cpu in kdb_per_cpu() [ Upstream commit b586627e10f57ee3aa8f0cfab0d6f7dc4ae63760 ] The "whichcpu" comes from argv[3]. The cpu_online() macro looks up the cpu in a bitmap of online cpus, but if the value is too high then it could read beyond the end of the bitmap and possibly Oops. Fixes: 5d5314d6795f ("kdb: core for kgdb back end (1 of 2)") Signed-off-by: Dan Carpenter Reviewed-by: Douglas Anderson Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/kdb/kdb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 993db6b2348e..15d902daeef6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2634,7 +2634,7 @@ static int kdb_per_cpu(int argc, const char **argv) diag = kdbgetularg(argv[3], &whichcpu); if (diag) return diag; - if (!cpu_online(whichcpu)) { + if (whichcpu >= nr_cpu_ids || !cpu_online(whichcpu)) { kdb_printf("cpu %ld is not online\n", whichcpu); return KDB_BADCPUNUM; } From 616d12400dc2cf57969978fd96e257d6559ea046 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Wed, 24 Apr 2019 05:25:03 -0400 Subject: [PATCH 2630/3715] backlight: lm3630a: Return 0 on success in update_status functions [ Upstream commit d3f48ec0954c6aac736ab21c34a35d7554409112 ] lm3630a_bank_a_update_status() and lm3630a_bank_b_update_status() both return the brightness value if the brightness was successfully updated. Writing to these attributes via sysfs would cause a 'Bad address' error to be returned. These functions should return 0 on success, so let's change it to correct that error. Fixes: 28e64a68a2ef ("backlight: lm3630: apply chip revision") Signed-off-by: Brian Masney Acked-by: Pavel Machek Acked-by: Daniel Thompson Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/lm3630a_bl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 2030a6b77a09..ef2553f452ca 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -201,7 +201,7 @@ static int lm3630a_bank_a_update_status(struct backlight_device *bl) LM3630A_LEDA_ENABLE, LM3630A_LEDA_ENABLE); if (ret < 0) goto out_i2c_err; - return bl->props.brightness; + return 0; out_i2c_err: dev_err(pchip->dev, "i2c failed to access\n"); @@ -278,7 +278,7 @@ static int lm3630a_bank_b_update_status(struct backlight_device *bl) LM3630A_LEDB_ENABLE, LM3630A_LEDB_ENABLE); if (ret < 0) goto out_i2c_err; - return bl->props.brightness; + return 0; out_i2c_err: dev_err(pchip->dev, "i2c failed to access REG_CTRL\n"); From 0450254e832c40401c074c84e664d2f37bae7155 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 2 May 2019 11:32:38 -0700 Subject: [PATCH 2631/3715] thermal: cpu_cooling: Actually trace CPU load in thermal_power_cpu_get_power [ Upstream commit bf45ac18b78038e43af3c1a273cae4ab5704d2ce ] The CPU load values passed to the thermal_power_cpu_get_power tracepoint are zero for all CPUs, unless, unless the thermal_power_cpu_limit tracepoint is enabled too: irq/41-rockchip-98 [000] .... 290.972410: thermal_power_cpu_get_power: cpus=0000000f freq=1800000 load={{0x0,0x0,0x0,0x0}} dynamic_power=4815 vs irq/41-rockchip-96 [000] .... 95.773585: thermal_power_cpu_get_power: cpus=0000000f freq=1800000 load={{0x56,0x64,0x64,0x5e}} dynamic_power=4959 irq/41-rockchip-96 [000] .... 95.773596: thermal_power_cpu_limit: cpus=0000000f freq=408000 cdev_state=10 power=416 There seems to be no good reason for omitting the CPU load information depending on another tracepoint. My guess is that the intention was to check whether thermal_power_cpu_get_power is (still) enabled, however 'load_cpu != NULL' already indicates that it was at least enabled when cpufreq_get_requested_power() was entered, there seems little gain from omitting the assignment if the tracepoint was just disabled, so just remove the check. Fixes: 6828a4711f99 ("thermal: add trace events to the power allocator governor") Signed-off-by: Matthias Kaehlcke Reviewed-by: Daniel Lezcano Acked-by: Javi Merino Acked-by: Viresh Kumar Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/cpu_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 908a8014cf76..aed995ec2c90 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -514,7 +514,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, load = 0; total_load += load; - if (trace_thermal_power_cpu_limit_enabled() && load_cpu) + if (load_cpu) load_cpu[i] = load; i++; From 857247a21385e45dbffbc87b95f3761faa04fa81 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 14 May 2019 10:49:09 +0000 Subject: [PATCH 2632/3715] EDAC/mc: Fix edac_mc_find() in case no device is found [ Upstream commit 29a0c843973bc385918158c6976e4dbe891df969 ] The function should return NULL in case no device is found, but it always returns the last checked mc device from the list even if the index did not match. Fix that. I did some analysis why this did not raise any issues for about 3 years and the reason is that edac_mc_find() is mostly used to search for existing devices. Thus, the bug is not triggered. [ bp: Drop the if (mci->mc_idx > idx) test in favor of readability. ] Fixes: c73e8833bec5 ("EDAC, mc: Fix locking around mc_devices list") Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Link: https://lkml.kernel.org/r/20190514104838.15065-1-rrichter@marvell.com Signed-off-by: Sasha Levin --- drivers/edac/edac_mc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f7fa05fee45a..329021189c38 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -680,22 +680,18 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) struct mem_ctl_info *edac_mc_find(int idx) { - struct mem_ctl_info *mci = NULL; + struct mem_ctl_info *mci; struct list_head *item; mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->mc_idx >= idx) { - if (mci->mc_idx == idx) { - goto unlock; - } - break; - } + if (mci->mc_idx == idx) + goto unlock; } + mci = NULL; unlock: mutex_unlock(&mem_ctls_mutex); return mci; From 4f531be6de4085e1e61fb01ef02c4278ce263d60 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 18 May 2019 17:40:14 +0200 Subject: [PATCH 2633/3715] ARM: dts: sun8i-h3: Fix wifi in Beelink X2 DT [ Upstream commit ca0961011db57e39880df0b5708df8aa3339dc6f ] mmc1 node where wifi module is connected doesn't have properly defined power supplies so wifi module is never powered up. Fix that by specifying additional power supplies. Additionally, this STB may have either Realtek or Broadcom based wifi module. One based on Broadcom module also needs external clock to work properly. Fix that by adding clock property to wifi_pwrseq node. Fixes: e582b47a9252 ("ARM: dts: sun8i-h3: Add dts for the Beelink X2 STB") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h3-beelink-x2.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 10da56e86ab8..21b38c386f1b 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -79,6 +79,8 @@ wifi_pwrseq: wifi_pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&r_pio 0 7 GPIO_ACTIVE_LOW>; /* PL7 */ + clocks = <&rtc 1>; + clock-names = "ext_clock"; }; sound_spdif { @@ -128,6 +130,8 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins_a>; vmmc-supply = <®_vcc3v3>; + vqmmc-supply = <®_vcc3v3>; + mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; non-removable; status = "okay"; From 71bc62b08059915218d3fbc78092f40cedbfe446 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 16 May 2019 16:53:52 +0100 Subject: [PATCH 2634/3715] dmaengine: tegra210-adma: Fix crash during probe [ Upstream commit b53611fb1ce9b1786bd18205473e0c1d6bfa8934 ] Commit f33e7bb3eb92 ("dmaengine: tegra210-adma: restore channel status") added support to save and restore the DMA channel registers when runtime suspending the ADMA. This change is causing the kernel to crash when probing the ADMA, if the device is probed deferred when looking up the channel interrupts. The crash occurs because not all of the channel base addresses have been setup at this point and in the clean-up path of the probe, pm_runtime_suspend() is called invoking its callback which expects all the channel base addresses to be initialised. Although this could be fixed by simply checking for a NULL address, on further review of the driver it seems more appropriate that we only call pm_runtime_get_sync() after all the channel interrupts and base addresses have been configured. Therefore, fix this crash by moving the calls to pm_runtime_enable(), pm_runtime_get_sync() and tegra_adma_init() after the DMA channels have been initialised. Fixes: f33e7bb3eb92 ("dmaengine: tegra210-adma: restore channel status") Signed-off-by: Jon Hunter Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/tegra210-adma.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index ac2a6b800db3..4f4733d831a1 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -744,16 +744,6 @@ static int tegra_adma_probe(struct platform_device *pdev) return PTR_ERR(tdma->ahub_clk); } - pm_runtime_enable(&pdev->dev); - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) - goto rpm_disable; - - ret = tegra_adma_init(tdma); - if (ret) - goto rpm_put; - INIT_LIST_HEAD(&tdma->dma_dev.channels); for (i = 0; i < tdma->nr_channels; i++) { struct tegra_adma_chan *tdc = &tdma->channels[i]; @@ -771,6 +761,16 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->tdma = tdma; } + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + goto rpm_disable; + + ret = tegra_adma_init(tdma); + if (ret) + goto rpm_put; + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); @@ -812,13 +812,13 @@ static int tegra_adma_probe(struct platform_device *pdev) dma_remove: dma_async_device_unregister(&tdma->dma_dev); -irq_dispose: - while (--i >= 0) - irq_dispose_mapping(tdma->channels[i].irq); rpm_put: pm_runtime_put_sync(&pdev->dev); rpm_disable: pm_runtime_disable(&pdev->dev); +irq_dispose: + while (--i >= 0) + irq_dispose_mapping(tdma->channels[i].irq); return ret; } From 733ab5b38187791b0cc9e7de8f0f795be5cd48d9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 18 Apr 2019 14:27:09 +0200 Subject: [PATCH 2635/3715] arm64: dts: meson: libretech-cc: set eMMC as removable [ Upstream commit 9f72e321d5506fe3e162a6308a4a295d7f10bb5d ] The eMMC on this board is add-on module which is not mandatory. Removing 'non-removable' property should prevent some errors when booting a board w/o an eMMC module present. Fixes: 72fb2c852188 ("ARM64: dts: meson-gxl-s905x-libretech-cc: fixup board definition") Signed-off-by: Jerome Brunet Reviewed-by: Martin Blumenstingl Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index e2c71753e327..407d32f4fe73 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -226,7 +226,6 @@ cap-mmc-highspeed; mmc-ddr-3_3v; max-frequency = <50000000>; - non-removable; disable-wp; mmc-pwrseq = <&emmc_pwrseq>; From 6a494caf49a7fc9946d87bbd347a8965e9200b39 Mon Sep 17 00:00:00 2001 From: Sagiv Ozeri Date: Mon, 20 May 2019 12:33:20 +0300 Subject: [PATCH 2636/3715] RDMA/qedr: Fix incorrect device rate. [ Upstream commit 69054666df0a9b4e8331319f98b6b9a88bc3fcc4 ] Use the correct enum value introduced in commit 12113a35ada6 ("IB/core: Add HDR speed enum") Prior to this change a 50Gbps port would show 40Gbps. This patch also cleaned up the redundant redefiniton of ib speeds for qedr. Fixes: 12113a35ada6 ("IB/core: Add HDR speed enum") Signed-off-by: Sagiv Ozeri Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8bfe9073da78..6ae72accae3d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -178,54 +178,47 @@ int qedr_query_device(struct ib_device *ibdev, return 0; } -#define QEDR_SPEED_SDR (1) -#define QEDR_SPEED_DDR (2) -#define QEDR_SPEED_QDR (4) -#define QEDR_SPEED_FDR10 (8) -#define QEDR_SPEED_FDR (16) -#define QEDR_SPEED_EDR (32) - static inline void get_link_speed_and_width(int speed, u8 *ib_speed, u8 *ib_width) { switch (speed) { case 1000: - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; break; case 10000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_1X; break; case 20000: - *ib_speed = QEDR_SPEED_DDR; + *ib_speed = IB_SPEED_DDR; *ib_width = IB_WIDTH_4X; break; case 25000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_1X; break; case 40000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_4X; break; case 50000: - *ib_speed = QEDR_SPEED_QDR; - *ib_width = IB_WIDTH_4X; + *ib_speed = IB_SPEED_HDR; + *ib_width = IB_WIDTH_1X; break; case 100000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_4X; break; default: /* Unsupported */ - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; } } From 0d56b73c7e727b3a1de39035a504637b800adccd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 22 May 2019 11:00:36 +0000 Subject: [PATCH 2637/3715] spi: spi-fsl-spi: call spi_finalize_current_message() at the end [ Upstream commit 44a042182cb1e9f7916e015c836967bf638b33c4 ] spi_finalize_current_message() shall be called once all actions are finished, otherwise the last actions might step over a newly started transfer. Fixes: c592becbe704 ("spi: fsl-(e)spi: migrate to generic master queueing") Signed-off-by: Christophe Leroy Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 8b79e36fab21..cd784552de7f 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -407,7 +407,6 @@ static int fsl_spi_do_one_msg(struct spi_master *master, } m->status = status; - spi_finalize_current_message(master); if (status || !cs_change) { ndelay(nsecs); @@ -415,6 +414,7 @@ static int fsl_spi_do_one_msg(struct spi_master *master, } fsl_spi_setup_transfer(spi, NULL); + spi_finalize_current_message(master); return 0; } From a90e4a8e08333eaae2767a766c4fd6d165af10d4 Mon Sep 17 00:00:00 2001 From: "Hook, Gary" Date: Tue, 14 May 2019 21:53:23 +0000 Subject: [PATCH 2638/3715] crypto: ccp - fix AES CFB error exposed by new test vectors [ Upstream commit c3b359d6567c0b8f413e924feb37cf025067d55a ] Updated testmgr will exhibit this error message when loading the ccp-crypto module: alg: skcipher: cfb-aes-ccp encryption failed with err -22 on test vector 3, cfg="in-place" Update the CCP crypto driver to correctly treat CFB as a streaming mode cipher (instead of block mode). Update the configuration for CFB to specify the block size as a single byte; Fixes: 2b789435d7f3 ('crypto: ccp - CCP AES crypto API support') Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccp/ccp-crypto-aes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index 89291c15015c..3f768699332b 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * AMD Cryptographic Coprocessor (CCP) AES crypto API support * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013-2019 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * @@ -79,8 +80,7 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt) return -EINVAL; if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || - (ctx->u.aes.mode == CCP_AES_MODE_CBC) || - (ctx->u.aes.mode == CCP_AES_MODE_CFB)) && + (ctx->u.aes.mode == CCP_AES_MODE_CBC)) && (req->nbytes & (AES_BLOCK_SIZE - 1))) return -EINVAL; @@ -291,7 +291,7 @@ static struct ccp_aes_def aes_algs[] = { .version = CCP_VERSION(3, 0), .name = "cfb(aes)", .driver_name = "cfb-aes-ccp", - .blocksize = AES_BLOCK_SIZE, + .blocksize = 1, .ivsize = AES_BLOCK_SIZE, .alg_defaults = &ccp_aes_defaults, }, From 9080a21773ca3f60d98b0126951a6135c2d96f66 Mon Sep 17 00:00:00 2001 From: "Hook, Gary" Date: Tue, 14 May 2019 21:53:30 +0000 Subject: [PATCH 2639/3715] crypto: ccp - Fix 3DES complaint from ccp-crypto module [ Upstream commit 89646fdda4cae203185444ac7988835f36a21ee1 ] Crypto self-tests reveal an error: alg: skcipher: cbc-des3-ccp encryption test failed (wrong output IV) on test vector 0, cfg="in-place" The offset value should not be recomputed when retrieving the context. Also, a code path exists which makes decisions based on older (version 3) hardware; a v3 device deosn't support 3DES so remove this check. Fixes: 990672d48515 ('crypto: ccp - Enable 3DES function on v5 CCPs') Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccp/ccp-ops.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 1e2e42106dee..4b48b8523a40 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -1293,6 +1293,9 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) int ret; /* Error checks */ + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) + return -EINVAL; + if (!cmd_q->ccp->vdata->perform->des3) return -EINVAL; @@ -1375,8 +1378,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) * passthru option to convert from big endian to little endian. */ if (des3->mode != CCP_DES3_MODE_ECB) { - u32 load_mode; - op.sb_ctx = cmd_q->sb_ctx; ret = ccp_init_dm_workarea(&ctx, cmd_q, @@ -1392,12 +1393,8 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) goto e_ctx; - if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) - load_mode = CCP_PASSTHRU_BYTESWAP_NOOP; - else - load_mode = CCP_PASSTHRU_BYTESWAP_256BIT; ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, - load_mode); + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; @@ -1459,10 +1456,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) } /* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */ - if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) - dm_offset = CCP_SB_BYTES - des3->iv_len; - else - dm_offset = 0; ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0, DES3_EDE_BLOCK_SIZE); } From 58828c8a7ba68580c5683781cfcc644d43c21778 Mon Sep 17 00:00:00 2001 From: Erwan Le Ray Date: Tue, 21 May 2019 17:45:42 +0200 Subject: [PATCH 2640/3715] serial: stm32: fix rx error handling [ Upstream commit 4f01d833fdcdd6f9b85d9e5d5d7568eb683626a7 ] - Fixes parity and framing error bit by clearing parity and framing error flag. The current implementation doesn't clear the error bits when an error is detected. - Fixes the incorrect name of framing error clearing flag in header file. - Fixes misalignement between data frame and errors status. The status read for "n" frame was the status of "n+1" frame". - Fixes break detection was not triggered by the expected register. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Signed-off-by: Erwan Le Ray Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/stm32-usart.c | 54 +++++++++++++++++++++----------- drivers/tty/serial/stm32-usart.h | 10 ++---- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 03a583264d9e..c43590077372 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -118,35 +118,51 @@ static void stm32_receive_chars(struct uart_port *port, bool threaded) while (stm32_pending_rx(port, &sr, &stm32_port->last_res, threaded)) { sr |= USART_SR_DUMMY_RX; - c = stm32_get_char(port, &sr, &stm32_port->last_res); flag = TTY_NORMAL; - port->icount.rx++; + /* + * Status bits has to be cleared before reading the RDR: + * In FIFO mode, reading the RDR will pop the next data + * (if any) along with its status bits into the SR. + * Not doing so leads to misalignement between RDR and SR, + * and clear status bits of the next rx data. + * + * Clear errors flags for stm32f7 and stm32h7 compatible + * devices. On stm32f4 compatible devices, the error bit is + * cleared by the sequence [read SR - read DR]. + */ + if ((sr & USART_SR_ERR_MASK) && ofs->icr != UNDEF_REG) + stm32_clr_bits(port, ofs->icr, USART_ICR_ORECF | + USART_ICR_PECF | USART_ICR_FECF); + + c = stm32_get_char(port, &sr, &stm32_port->last_res); + port->icount.rx++; if (sr & USART_SR_ERR_MASK) { - if (sr & USART_SR_LBD) { - port->icount.brk++; - if (uart_handle_break(port)) - continue; - } else if (sr & USART_SR_ORE) { - if (ofs->icr != UNDEF_REG) - writel_relaxed(USART_ICR_ORECF, - port->membase + - ofs->icr); + if (sr & USART_SR_ORE) { port->icount.overrun++; } else if (sr & USART_SR_PE) { port->icount.parity++; } else if (sr & USART_SR_FE) { - port->icount.frame++; + /* Break detection if character is null */ + if (!c) { + port->icount.brk++; + if (uart_handle_break(port)) + continue; + } else { + port->icount.frame++; + } } sr &= port->read_status_mask; - if (sr & USART_SR_LBD) - flag = TTY_BREAK; - else if (sr & USART_SR_PE) + if (sr & USART_SR_PE) { flag = TTY_PARITY; - else if (sr & USART_SR_FE) - flag = TTY_FRAME; + } else if (sr & USART_SR_FE) { + if (!c) + flag = TTY_BREAK; + else + flag = TTY_FRAME; + } } if (uart_handle_sysrq_char(port, c)) @@ -569,14 +585,14 @@ static void stm32_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_iflag & INPCK) port->read_status_mask |= USART_SR_PE | USART_SR_FE; if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - port->read_status_mask |= USART_SR_LBD; + port->read_status_mask |= USART_SR_FE; /* Characters to ignore */ port->ignore_status_mask = 0; if (termios->c_iflag & IGNPAR) port->ignore_status_mask = USART_SR_PE | USART_SR_FE; if (termios->c_iflag & IGNBRK) { - port->ignore_status_mask |= USART_SR_LBD; + port->ignore_status_mask |= USART_SR_FE; /* * If we're ignoring parity and break indicators, * ignore overruns too (for real raw support). diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h index ffc0c5285e51..9d087881913a 100644 --- a/drivers/tty/serial/stm32-usart.h +++ b/drivers/tty/serial/stm32-usart.h @@ -108,7 +108,6 @@ struct stm32_usart_info stm32h7_info = { #define USART_SR_RXNE BIT(5) #define USART_SR_TC BIT(6) #define USART_SR_TXE BIT(7) -#define USART_SR_LBD BIT(8) #define USART_SR_CTSIF BIT(9) #define USART_SR_CTS BIT(10) /* F7 */ #define USART_SR_RTOF BIT(11) /* F7 */ @@ -120,8 +119,7 @@ struct stm32_usart_info stm32h7_info = { #define USART_SR_SBKF BIT(18) /* F7 */ #define USART_SR_WUF BIT(20) /* H7 */ #define USART_SR_TEACK BIT(21) /* F7 */ -#define USART_SR_ERR_MASK (USART_SR_LBD | USART_SR_ORE | \ - USART_SR_FE | USART_SR_PE) +#define USART_SR_ERR_MASK (USART_SR_ORE | USART_SR_FE | USART_SR_PE) /* Dummy bits */ #define USART_SR_DUMMY_RX BIT(16) @@ -166,8 +164,6 @@ struct stm32_usart_info stm32h7_info = { /* USART_CR2 */ #define USART_CR2_ADD_MASK GENMASK(3, 0) /* F4 */ #define USART_CR2_ADDM7 BIT(4) /* F7 */ -#define USART_CR2_LBDL BIT(5) -#define USART_CR2_LBDIE BIT(6) #define USART_CR2_LBCL BIT(8) #define USART_CR2_CPHA BIT(9) #define USART_CR2_CPOL BIT(10) @@ -224,12 +220,10 @@ struct stm32_usart_info stm32h7_info = { /* USART_ICR */ #define USART_ICR_PECF BIT(0) /* F7 */ -#define USART_ICR_FFECF BIT(1) /* F7 */ -#define USART_ICR_NCF BIT(2) /* F7 */ +#define USART_ICR_FECF BIT(1) /* F7 */ #define USART_ICR_ORECF BIT(3) /* F7 */ #define USART_ICR_IDLECF BIT(4) /* F7 */ #define USART_ICR_TCCF BIT(6) /* F7 */ -#define USART_ICR_LBDCF BIT(8) /* F7 */ #define USART_ICR_CTSCF BIT(9) /* F7 */ #define USART_ICR_RTOCF BIT(11) /* F7 */ #define USART_ICR_EOBCF BIT(12) /* F7 */ From 2e0f6b8569f2a2b447a13dfecacd35a6bc3aa421 Mon Sep 17 00:00:00 2001 From: Erwan Le Ray Date: Tue, 21 May 2019 17:45:44 +0200 Subject: [PATCH 2641/3715] serial: stm32: fix transmit_chars when tx is stopped [ Upstream commit b83b957c91f68e53f0dc596e129e8305761f2a32 ] Disables the tx irq when the transmission is ended and updates stop_tx conditions for code cleanup. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Signed-off-by: Erwan Le Ray Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/stm32-usart.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index c43590077372..a1e31913bcf9 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -310,13 +310,8 @@ static void stm32_transmit_chars(struct uart_port *port) return; } - if (uart_tx_stopped(port)) { - stm32_stop_tx(port); - return; - } - - if (uart_circ_empty(xmit)) { - stm32_stop_tx(port); + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE); return; } @@ -329,7 +324,7 @@ static void stm32_transmit_chars(struct uart_port *port) uart_write_wakeup(port); if (uart_circ_empty(xmit)) - stm32_stop_tx(port); + stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE); } static irqreturn_t stm32_interrupt(int irq, void *ptr) From 6859cf2284a7c4be2e8678bf75ffd7c8f51a0147 Mon Sep 17 00:00:00 2001 From: Erwan Le Ray Date: Tue, 21 May 2019 17:45:45 +0200 Subject: [PATCH 2642/3715] serial: stm32: Add support of TC bit status check [ Upstream commit 64c32eab660386f9904bb295a104c9c425e9f8b2 ] Adds a check on the Transmission Complete bit status before closing the com port. Prevents the port closure before the end of the transmission. TC poll loop is moved from stm32_tx_dma_complete to stm32_shutdown routine, in order to check TC before shutdown in both dma and PIO tx modes. TC clear is added in stm32_transmit_char routine, in order to be cleared before transmitting in both dma and PIO tx modes. Fixes: 3489187204eb ("serial: stm32: adding dma support") Signed-off-by: Erwan Le Ray Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/stm32-usart.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index a1e31913bcf9..2384f786b76d 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -180,21 +180,6 @@ static void stm32_tx_dma_complete(void *arg) struct uart_port *port = arg; struct stm32_port *stm32port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32port->info->ofs; - unsigned int isr; - int ret; - - ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, - isr, - (isr & USART_SR_TC), - 10, 100000); - - if (ret) - dev_err(port->dev, "terminal count not set\n"); - - if (ofs->icr == UNDEF_REG) - stm32_clr_bits(port, ofs->isr, USART_SR_TC); - else - stm32_set_bits(port, ofs->icr, USART_CR_TC); stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT); stm32port->tx_dma_busy = false; @@ -286,7 +271,6 @@ static void stm32_transmit_chars_dma(struct uart_port *port) /* Issue pending DMA TX requests */ dma_async_issue_pending(stm32port->tx_ch); - stm32_clr_bits(port, ofs->isr, USART_SR_TC); stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT); xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); @@ -315,6 +299,11 @@ static void stm32_transmit_chars(struct uart_port *port) return; } + if (ofs->icr == UNDEF_REG) + stm32_clr_bits(port, ofs->isr, USART_SR_TC); + else + stm32_set_bits(port, ofs->icr, USART_ICR_TCCF); + if (stm32_port->tx_ch) stm32_transmit_chars_dma(port); else @@ -491,12 +480,21 @@ static void stm32_shutdown(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct stm32_usart_config *cfg = &stm32_port->info->cfg; - u32 val; + u32 val, isr; + int ret; val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE; val |= BIT(cfg->uart_enable_bit); if (stm32_port->fifoen) val |= USART_CR1_FIFOEN; + + ret = readl_relaxed_poll_timeout(port->membase + ofs->isr, + isr, (isr & USART_SR_TC), + 10, 100000); + + if (ret) + dev_err(port->dev, "transmission complete not set\n"); + stm32_clr_bits(port, ofs->cr1, val); dev_pm_clear_wake_irq(port->dev); From c837323da42905c0dd26bf062ed4d3a2eedc41f5 Mon Sep 17 00:00:00 2001 From: Erwan Le Ray Date: Tue, 21 May 2019 17:45:46 +0200 Subject: [PATCH 2643/3715] serial: stm32: fix wakeup source initialization [ Upstream commit 5297f274e8b61ceb9676cba6649d3de9d03387ad ] Fixes dedicated_irq_wakeup issue and deactivated uart as wakeup source by default. Fixes: 270e5a74fe4c ("serial: stm32: add wakeup mechanism") Signed-off-by: Erwan Le Ray Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/stm32-usart.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 2384f786b76d..f8f3f8fafd9f 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -447,7 +447,6 @@ static int stm32_startup(struct uart_port *port) { struct stm32_port *stm32_port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; - struct stm32_usart_config *cfg = &stm32_port->info->cfg; const char *name = to_platform_device(port->dev)->name; u32 val; int ret; @@ -458,15 +457,6 @@ static int stm32_startup(struct uart_port *port) if (ret) return ret; - if (cfg->has_wakeup && stm32_port->wakeirq >= 0) { - ret = dev_pm_set_dedicated_wake_irq(port->dev, - stm32_port->wakeirq); - if (ret) { - free_irq(port->irq, port); - return ret; - } - } - val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE; if (stm32_port->fifoen) val |= USART_CR1_FIFOEN; @@ -497,7 +487,6 @@ static void stm32_shutdown(struct uart_port *port) stm32_clr_bits(port, ofs->cr1, val); - dev_pm_clear_wake_irq(port->dev); free_irq(port->irq, port); } @@ -904,11 +893,18 @@ static int stm32_serial_probe(struct platform_device *pdev) ret = device_init_wakeup(&pdev->dev, true); if (ret) goto err_uninit; + + ret = dev_pm_set_dedicated_wake_irq(&pdev->dev, + stm32port->wakeirq); + if (ret) + goto err_nowup; + + device_set_wakeup_enable(&pdev->dev, false); } ret = uart_add_one_port(&stm32_usart_driver, &stm32port->port); if (ret) - goto err_nowup; + goto err_wirq; ret = stm32_of_dma_rx_probe(stm32port, pdev); if (ret) @@ -922,6 +918,10 @@ static int stm32_serial_probe(struct platform_device *pdev) return 0; +err_wirq: + if (stm32port->info->cfg.has_wakeup && stm32port->wakeirq >= 0) + dev_pm_clear_wake_irq(&pdev->dev); + err_nowup: if (stm32port->info->cfg.has_wakeup && stm32port->wakeirq >= 0) device_init_wakeup(&pdev->dev, false); @@ -959,8 +959,10 @@ static int stm32_serial_remove(struct platform_device *pdev) TX_BUF_L, stm32_port->tx_buf, stm32_port->tx_dma_buf); - if (cfg->has_wakeup && stm32_port->wakeirq >= 0) + if (cfg->has_wakeup && stm32_port->wakeirq >= 0) { + dev_pm_clear_wake_irq(&pdev->dev); device_init_wakeup(&pdev->dev, false); + } clk_disable_unprepare(stm32_port->clk); From 50eafd6938decd0693085d0b713060bd0e539d8b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 24 May 2019 09:15:17 -0700 Subject: [PATCH 2644/3715] misc: sgi-xp: Properly initialize buf in xpc_get_rsvd_page_pa [ Upstream commit b0576f9ecb5c51e9932531d23c447b2739261841 ] Clang warns: drivers/misc/sgi-xp/xpc_partition.c:73:14: warning: variable 'buf' is uninitialized when used within its own initialization [-Wuninitialized] void *buf = buf; ~~~ ^~~ 1 warning generated. Arnd's explanation during review: /* * Returns the physical address of the partition's reserved page through * an iterative number of calls. * * On first call, 'cookie' and 'len' should be set to 0, and 'addr' * set to the nasid of the partition whose reserved page's address is * being sought. * On subsequent calls, pass the values, that were passed back on the * previous call. * * While the return status equals SALRET_MORE_PASSES, keep calling * this function after first copying 'len' bytes starting at 'addr' * into 'buf'. Once the return status equals SALRET_OK, 'addr' will * be the physical address of the partition's reserved page. If the * return status equals neither of these, an error as occurred. */ static inline s64 sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) so *len is set to zero on the first call and tells the bios how many bytes are accessible at 'buf', and it does get updated by the BIOS to tell us how many bytes it needs, and then we allocate that and try again. Fixes: 279290294662 ("[IA64-SGI] cleanup the way XPC locates the reserved page") Link: https://github.com/ClangBuiltLinux/linux/issues/466 Suggested-by: Stephen Hines Reviewed-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/sgi-xp/xpc_partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 6956f7e7d439..ca5f0102daef 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -70,7 +70,7 @@ xpc_get_rsvd_page_pa(int nasid) unsigned long rp_pa = nasid; /* seed with nasid */ size_t len = 0; size_t buf_len = 0; - void *buf = buf; + void *buf = NULL; void *buf_base = NULL; enum xp_retval (*get_partition_rsvd_page_pa) (void *, u64 *, unsigned long *, size_t *) = From a31ef6b36d7703379d46914eae56f65b6d6e108b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 21 May 2019 15:27:35 +0800 Subject: [PATCH 2645/3715] iommu: Use right function to get group for device [ Upstream commit 57274ea25736496ee019a5c40479855b21888839 ] The iommu_group_get_for_dev() will allocate a group for a device if it isn't in any group. This isn't the use case in iommu_request_dm_for_dev(). Let's use iommu_group_get() instead. Fixes: d290f1e70d85a ("iommu: Introduce iommu_request_dm_for_dev()") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4b761678a18b..2c48a9d6d91e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1856,9 +1856,9 @@ int iommu_request_dm_for_dev(struct device *dev) int ret; /* Device must already be in a group before calling this function */ - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); + group = iommu_group_get(dev); + if (!group) + return -EINVAL; mutex_lock(&group->mutex); From 9360b13308b085c57430e8b28fa5ea1ecf6e6645 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 May 2019 12:33:50 -0500 Subject: [PATCH 2646/3715] signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig [ Upstream commit 72abe3bcf0911d69b46c1e8bdb5612675e0ac42c ] The locking in force_sig_info is not prepared to deal with a task that exits or execs (as sighand may change). The is not a locking problem in force_sig as force_sig is only built to handle synchronous exceptions. Further the function force_sig_info changes the signal state if the signal is ignored, or blocked or if SIGNAL_UNKILLABLE will prevent the delivery of the signal. The signal SIGKILL can not be ignored and can not be blocked and SIGNAL_UNKILLABLE won't prevent it from being delivered. So using force_sig rather than send_sig for SIGKILL is confusing and pointless. Because it won't impact the sending of the signal and and because using force_sig is wrong, replace force_sig with send_sig. Cc: Namjae Jeon Cc: Jeff Layton Cc: Steve French Fixes: a5c3e1c725af ("Revert "cifs: No need to send SIGKILL to demux_thread during umount"") Fixes: e7ddee9037e7 ("cifs: disable sharing session and tcon and add new TCP sharing code") Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f523a9ca9574..51bbb1c0b71a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2320,7 +2320,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * From 4f80b033f61bafc56239ae6507d944aa4d13ddd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 May 2019 16:56:48 -0700 Subject: [PATCH 2647/3715] inet: frags: call inet_frags_fini() after unregister_pernet_subsys() [ Upstream commit ae7352d384a552d8c799c242e74a934809990a71 ] Both IPv6 and 6lowpan are calling inet_frags_fini() too soon. inet_frags_fini() is dismantling a kmem_cache, that might be needed later when unregister_pernet_subsys() eventually has to remove frags queues from hash tables and free them. This fixes potential use-after-free, and is a prereq for the following patch. Fixes: d4ad4d22e7ac ("inet: frags: use kmem_cache for inet_frag_queue") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv6/reassembly.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index ec7a5da56129..e873a6a007f2 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -634,7 +634,7 @@ err_sysctl: void lowpan_net_frag_exit(void) { - inet_frags_fini(&lowpan_frags); lowpan_frags_sysctl_unregister(); unregister_pernet_subsys(&lowpan_frags_ops); + inet_frags_fini(&lowpan_frags); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index fe797b29ca89..6dea6e92e686 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -593,8 +593,8 @@ err_protocol: void ipv6_frag_exit(void) { - inet_frags_fini(&ip6_frags); ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + inet_frags_fini(&ip6_frags); } From 7313729555616597f2ca79cb72de28778298abeb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 28 May 2019 11:47:30 -0700 Subject: [PATCH 2648/3715] netvsc: unshare skb in VF rx handler [ Upstream commit 996ed04741467f6d1552440c92988b132a9487ec ] The netvsc VF skb handler should make sure that skb is not shared. Similar logic already exists in bonding and team device drivers. This is not an issue in practice because the VF devicex does not send up shared skb's. But the netvsc driver should do the right thing if it did. Fixes: 0c195567a8f6 ("netvsc: transparent VF management") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a89de5752a8c..9e48855f6407 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1840,6 +1840,12 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) struct netvsc_vf_pcpu_stats *pcpu_stats = this_cpu_ptr(ndev_ctx->vf_stats); + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return RX_HANDLER_CONSUMED; + + *pskb = skb; + skb->dev = ndev; u64_stats_update_begin(&pcpu_stats->syncp); From 0ba089779082da6e53b4a57d478091a94e94abb4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 May 2019 11:45:46 -0700 Subject: [PATCH 2649/3715] cpufreq: brcmstb-avs-cpufreq: Fix initial command check [ Upstream commit 22a26cc6a51ef73dcfeb64c50513903f6b2d53d8 ] There is a logical error in brcm_avs_is_firmware_loaded() whereby if the firmware returns -EINVAL, we will be reporting this as an error. The comment is correct, the code was not. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Florian Fainelli Acked-by: Markus Mayer Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 7281a2c19c36..bae319037658 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -762,8 +762,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) rc = brcm_avs_get_pmap(priv, NULL); magic = readl(priv->base + AVS_MBOX_MAGIC); - return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) && - (rc != -EINVAL); + return (magic == AVS_FIRMWARE_MAGIC) && ((rc != -ENOTSUPP) || + (rc != -EINVAL)); } static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) From d4fa8aa0ef869c05559653c23359c68c5bb921b7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 May 2019 11:45:47 -0700 Subject: [PATCH 2650/3715] cpufreq: brcmstb-avs-cpufreq: Fix types for voltage/frequency [ Upstream commit 4c5681fcc684c762b09435de3e82ffeee7769d21 ] What we read back from the register is going to be capped at 32-bits, and cpufreq_freq_table.frequency is an unsigned int. Avoid any possible value truncation by using the appropriate return value. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Florian Fainelli Acked-by: Markus Mayer Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index bae319037658..39c462711eae 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -468,12 +468,12 @@ static int brcm_avs_set_pstate(struct private_data *priv, unsigned int pstate) return __issue_avs_command(priv, AVS_CMD_SET_PSTATE, true, args); } -static unsigned long brcm_avs_get_voltage(void __iomem *base) +static u32 brcm_avs_get_voltage(void __iomem *base) { return readl(base + AVS_MBOX_VOLTAGE1); } -static unsigned long brcm_avs_get_frequency(void __iomem *base) +static u32 brcm_avs_get_frequency(void __iomem *base) { return readl(base + AVS_MBOX_FREQUENCY) * 1000; /* in kHz */ } @@ -973,14 +973,14 @@ static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_voltage(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_voltage(priv->base)); } static ssize_t show_brcm_avs_frequency(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_frequency(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_frequency(priv->base)); } cpufreq_freq_attr_ro(brcm_avs_pstate); From 317357415bef2cc2ec618d4c4f274abbcdc20bab Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Jun 2019 10:55:15 -0400 Subject: [PATCH 2651/3715] media: vivid: fix incorrect assignment operation when setting video mode [ Upstream commit d4ec9550e4b2d2e357a46fdc65d8ef3d4d15984c ] The assigment of FB_VMODE_NONINTERLACE to var->vmode should be a bit-wise or of FB_VMODE_NONINTERLACE instead of an assignment, otherwise the previous clearing of the FB_VMODE_MASK bits of var->vmode makes no sense and is redundant. Addresses-Coverity: ("Unused value") Fixes: ad4e02d5081d ("[media] vivid: add a simple framebuffer device for overlay testing") Signed-off-by: Colin Ian King Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/vivid/vivid-osd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/vivid/vivid-osd.c b/drivers/media/platform/vivid/vivid-osd.c index bdc380b14e0c..a95b7c56569e 100644 --- a/drivers/media/platform/vivid/vivid-osd.c +++ b/drivers/media/platform/vivid/vivid-osd.c @@ -167,7 +167,7 @@ static int _vivid_fb_check_var(struct fb_var_screeninfo *var, struct vivid_dev * var->nonstd = 0; var->vmode &= ~FB_VMODE_MASK; - var->vmode = FB_VMODE_NONINTERLACED; + var->vmode |= FB_VMODE_NONINTERLACED; /* Dummy values */ var->hsync_len = 24; From 21b0815f71553fe95ddc16caf5b33eecad2bd56d Mon Sep 17 00:00:00 2001 From: George Wilkie Date: Fri, 7 Jun 2019 11:49:41 +0100 Subject: [PATCH 2652/3715] mpls: fix warning with multi-label encap [ Upstream commit 2f3f7d1fa0d1039b24a55d127ed190f196fc3e79 ] If you configure a route with multiple labels, e.g. ip route add 10.10.3.0/24 encap mpls 16/100 via 10.10.2.2 dev ens4 A warning is logged: kernel: [ 130.561819] netlink: 'ip': attribute type 1 has an invalid length. This happens because mpls_iptunnel_policy has set the type of MPLS_IPTUNNEL_DST to fixed size NLA_U32. Change it to a minimum size. nla_get_labels() does the remaining validation. Fixes: e3e4712ec096 ("mpls: ip tunnel support") Signed-off-by: George Wilkie Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/mpls/mpls_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 6e558a419f60..6c01166f972b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -28,7 +28,7 @@ #include "internal.h" static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { - [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, + [MPLS_IPTUNNEL_DST] = { .len = sizeof(u32) }, [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, }; From f7edbb6415098817533b9024694b2a6c05c74fc1 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 3 Jun 2019 08:53:31 +0200 Subject: [PATCH 2653/3715] iommu/vt-d: Duplicate iommu_resv_region objects per device list [ Upstream commit 5f64ce5411b467f1cfea6c63e2494c22b773582b ] intel_iommu_get_resv_regions() aims to return the list of reserved regions accessible by a given @device. However several devices can access the same reserved memory region and when building the list it is not safe to use a single iommu_resv_region object, whose container is the RMRR. This iommu_resv_region must be duplicated per device reserved region list. Let's remove the struct iommu_resv_region from the RMRR unit and allocate the iommu_resv_region directly in intel_iommu_get_resv_regions(). We hold the dmar_global_lock instead of the rcu-lock to allow sleeping. Fixes: 0659b8dc45a6 ("iommu/vt-d: Implement reserved region get/put callbacks") Signed-off-by: Eric Auger Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 4fbd183d973a..b48666849dbe 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -442,7 +442,6 @@ struct dmar_rmrr_unit { u64 end_address; /* reserved end address */ struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ - struct iommu_resv_region *resv; /* reserved region handle */ }; struct dmar_atsr_unit { @@ -4171,7 +4170,6 @@ static inline void init_iommu_pm_ops(void) {} int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; - int prot = DMA_PTE_READ|DMA_PTE_WRITE; struct dmar_rmrr_unit *rmrru; size_t length; @@ -4185,22 +4183,16 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) rmrru->end_address = rmrr->end_address; length = rmrr->end_address - rmrr->base_address + 1; - rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, - IOMMU_RESV_DIRECT); - if (!rmrru->resv) - goto free_rmrru; rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt); if (rmrru->devices_cnt && rmrru->devices == NULL) - goto free_all; + goto free_rmrru; list_add(&rmrru->list, &dmar_rmrr_units); return 0; -free_all: - kfree(rmrru->resv); free_rmrru: kfree(rmrru); out: @@ -4418,7 +4410,6 @@ static void intel_iommu_free_dmars(void) list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); - kfree(rmrru->resv); kfree(rmrru); } @@ -5186,22 +5177,33 @@ static void intel_iommu_remove_device(struct device *dev) static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { + int prot = DMA_PTE_READ | DMA_PTE_WRITE; struct iommu_resv_region *reg; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i; - rcu_read_lock(); + down_read(&dmar_global_lock); for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, i_dev) { + struct iommu_resv_region *resv; + size_t length; + if (i_dev != device) continue; - list_add_tail(&rmrr->resv->list, head); + length = rmrr->end_address - rmrr->base_address + 1; + resv = iommu_alloc_resv_region(rmrr->base_address, + length, prot, + IOMMU_RESV_DIRECT); + if (!resv) + break; + + list_add_tail(&resv->list, head); } } - rcu_read_unlock(); + up_read(&dmar_global_lock); reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, @@ -5216,10 +5218,8 @@ static void intel_iommu_put_resv_regions(struct device *dev, { struct iommu_resv_region *entry, *next; - list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_MSI) - kfree(entry); - } + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); } #ifdef CONFIG_INTEL_IOMMU_SVM From e0f944d4cb3a9e4e8ebebf5379ede31cc3a3fbe2 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Thu, 13 Jun 2019 11:29:40 +0300 Subject: [PATCH 2654/3715] qed: iWARP - Use READ_ONCE and smp_store_release to access ep->state [ Upstream commit 6117561e1bb30b2fe7f51e1961f34dbedd0bec8a ] Destroy QP waits for it's ep object state to be set to CLOSED before proceeding. ep->state can be updated from a different context. Add smp_store_release/READ_ONCE to synchronize. Fixes: fc4c6065e661 ("qed: iWARP implement disconnect flows") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index bb09f5a9846f..38d0f62bf037 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -509,7 +509,8 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) /* Make sure ep is closed before returning and freeing memory. */ if (ep) { - while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200) + while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED && + wait_count++ < 200) msleep(100); if (ep->state != QED_IWARP_EP_CLOSED) @@ -991,8 +992,6 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, params.ep_context = ep; - ep->state = QED_IWARP_EP_CLOSED; - switch (fw_return_code) { case RDMA_RETURN_OK: ep->qp->max_rd_atomic_req = ep->cm_info.ord; @@ -1052,6 +1051,10 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, break; } + if (fw_return_code != RDMA_RETURN_OK) + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + ep->event_cb(ep->cb_context, ¶ms); /* on passive side, if there is no associated QP (REJECT) we need to @@ -2069,7 +2072,9 @@ void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn, params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? 0 : -ECONNRESET; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); list_del(&ep->list_entry); spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); @@ -2157,7 +2162,8 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn, params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE; params.ep_context = ep; params.cm_info = &ep->cm_info; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); switch (fw_return_code) { case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: From 55221779bb3255b1d8e9fd20ed5d5998d65cd6f4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 11 Jun 2019 23:45:04 -0500 Subject: [PATCH 2655/3715] powerpc/cacheinfo: add cacheinfo_teardown, cacheinfo_rebuild [ Upstream commit d4aa219a074a5abaf95a756b9f0d190b5c03a945 ] Allow external callers to force the cacheinfo code to release all its references to cache nodes, e.g. before processing device tree updates post-migration, and to rebuild the hierarchy afterward. CPU online/offline must be blocked by callers; enforce this. Fixes: 410bccf97881 ("powerpc/pseries: Partition migration in the kernel") Signed-off-by: Nathan Lynch Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/cacheinfo.c | 21 +++++++++++++++++++++ arch/powerpc/kernel/cacheinfo.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a8f20e5928e1..9edb45430133 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -865,4 +865,25 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) if (cache) cache_cpu_clear(cache, cpu_id); } + +void cacheinfo_teardown(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_offline(cpu); +} + +void cacheinfo_rebuild(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_online(cpu); +} + #endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h index 955f5e999f1b..52bd3fc6642d 100644 --- a/arch/powerpc/kernel/cacheinfo.h +++ b/arch/powerpc/kernel/cacheinfo.h @@ -6,4 +6,8 @@ extern void cacheinfo_cpu_online(unsigned int cpu_id); extern void cacheinfo_cpu_offline(unsigned int cpu_id); +/* Allow migration/suspend to tear down and rebuild the hierarchy. */ +extern void cacheinfo_teardown(void); +extern void cacheinfo_rebuild(void); + #endif /* _PPC_CACHEINFO_H */ From 6a275c6ff76d87d9dd4d59c35413aaa4a6a5c0d4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 11 Jun 2019 23:45:06 -0500 Subject: [PATCH 2656/3715] powerpc/pseries/mobility: rebuild cacheinfo hierarchy post-migration [ Upstream commit e610a466d16a086e321f0bd421e2fc75cff28605 ] It's common for the platform to replace the cache device nodes after a migration. Since the cacheinfo code is never informed about this, it never drops its references to the source system's cache nodes, causing it to wind up in an inconsistent state resulting in warnings and oopses as soon as CPU online/offline occurs after the migration, e.g. cache for /cpus/l3-cache@3113(Unified) refers to cache for /cpus/l2-cache@200d(Unified) WARNING: CPU: 15 PID: 86 at arch/powerpc/kernel/cacheinfo.c:176 release_cache+0x1bc/0x1d0 [...] NIP release_cache+0x1bc/0x1d0 LR release_cache+0x1b8/0x1d0 Call Trace: release_cache+0x1b8/0x1d0 (unreliable) cacheinfo_cpu_offline+0x1c4/0x2c0 unregister_cpu_online+0x1b8/0x260 cpuhp_invoke_callback+0x114/0xf40 cpuhp_thread_fun+0x270/0x310 smpboot_thread_fn+0x2c8/0x390 kthread+0x1b8/0x1c0 ret_from_kernel_thread+0x5c/0x68 Using device tree notifiers won't work since we want to rebuild the hierarchy only after all the removals and additions have occurred and the device tree is in a consistent state. Call cacheinfo_teardown() before processing device tree updates, and rebuild the hierarchy afterward. Fixes: 410bccf97881 ("powerpc/pseries: Partition migration in the kernel") Signed-off-by: Nathan Lynch Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/mobility.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 9739a055e5f7..2d3668acb6ef 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -23,6 +23,7 @@ #include #include #include "pseries.h" +#include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -359,11 +360,20 @@ void post_mobility_fixup(void) */ cpus_read_lock(); + /* + * It's common for the destination firmware to replace cache + * nodes. Release all of the cacheinfo hierarchy's references + * before updating the device tree. + */ + cacheinfo_teardown(); + rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + cacheinfo_rebuild(); + cpus_read_unlock(); /* Possibly switch to a new RFI flush type */ From 68925f1521a2f57f7785a9377fe6baeb6ef0f986 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Tue, 21 May 2019 08:00:30 -0700 Subject: [PATCH 2657/3715] drm/msm/mdp5: Fix mdp5_cfg_init error return [ Upstream commit fc19cbb785d7bbd1a1af26229b5240a3ab332744 ] If mdp5_cfg_init fails because of an unknown major version, a null pointer dereference occurs. This is because the caller of init expects error pointers, but init returns NULL on error. Fix this by returning the expected values on error. Fixes: 2e362e1772b8 (drm/msm/mdp5: introduce mdp5_cfg module) Signed-off-by: Jeffrey Hugo Reviewed-by: Bjorn Andersson Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index 824067d2d427..42f0ecb0cf35 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -635,7 +635,7 @@ fail: if (cfg_handler) mdp5_cfg_destroy(cfg_handler); - return NULL; + return ERR_PTR(ret); } static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev) From 9085504b3b57fca32dc54ce56c8499512c60ddeb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 17 Jun 2019 11:11:10 -0700 Subject: [PATCH 2658/3715] net: netem: fix backlog accounting for corrupted GSO frames [ Upstream commit 177b8007463c4f36c9a2c7ce7aa9875a4cad9bd5 ] When GSO frame has to be corrupted netem uses skb_gso_segment() to produce the list of frames, and re-enqueues the segments one by one. The backlog length has to be adjusted to account for new frames. The current calculation is incorrect, leading to wrong backlog lengths in the parent qdisc (both bytes and packets), and incorrect packet backlog count in netem itself. Parent backlog goes negative, netem's packet backlog counts all non-first segments twice (thus remaining non-zero even after qdisc is emptied). Move the variables used to count the adjustment into local scope to make 100% sure they aren't used at any stage in backports. Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6266121a03f9..ede0a24e67eb 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -431,8 +431,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_skb_cb *cb; struct sk_buff *skb2; struct sk_buff *segs = NULL; - unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); - int nb = 0; + unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; int rc = NET_XMIT_SUCCESS; int rc_drop = NET_XMIT_DROP; @@ -489,6 +488,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, segs = netem_segment(skb, sch, to_free); if (!segs) return rc_drop; + qdisc_skb_cb(segs)->pkt_len = segs->len; } else { segs = skb; } @@ -579,6 +579,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, finish_segs: if (segs) { + unsigned int len, last_len; + int nb = 0; + + len = skb->len; + while (segs) { skb2 = segs->next; segs->next = NULL; @@ -594,9 +599,7 @@ finish_segs: } segs = skb2; } - sch->q.qlen += nb; - if (nb > 1) - qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + qdisc_tree_reduce_backlog(sch, -nb, prev_len - len); } return NET_XMIT_SUCCESS; } From b6668f5207180535ae8ca2e33a80464c9e291c5b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 18 Jun 2019 20:43:01 +0200 Subject: [PATCH 2659/3715] net/af_iucv: always register net_device notifier [ Upstream commit 06996c1d4088a0d5f3e7789d7f96b4653cc947cc ] Even when running as VM guest (ie pr_iucv != NULL), af_iucv can still open HiperTransport-based connections. For robust operation these connections require the af_iucv_netdev_notifier, so register it unconditionally. Also handle any error that register_netdevice_notifier() returns. Fixes: 9fbd87d41392 ("af_iucv: handle netdev events") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/af_iucv.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ca98276c2709..7a9cbc9502d9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2446,6 +2446,13 @@ out: return err; } +static void afiucv_iucv_exit(void) +{ + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +} + static int __init afiucv_init(void) { int err; @@ -2479,11 +2486,18 @@ static int __init afiucv_init(void) err = afiucv_iucv_init(); if (err) goto out_sock; - } else - register_netdevice_notifier(&afiucv_netdev_notifier); + } + + err = register_netdevice_notifier(&afiucv_netdev_notifier); + if (err) + goto out_notifier; + dev_add_pack(&iucv_packet_type); return 0; +out_notifier: + if (pr_iucv) + afiucv_iucv_exit(); out_sock: sock_unregister(PF_IUCV); out_proto: @@ -2497,12 +2511,11 @@ out: static void __exit afiucv_exit(void) { if (pr_iucv) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); - pr_iucv->iucv_unregister(&af_iucv_handler, 0); + afiucv_iucv_exit(); symbol_put(iucv_if); - } else - unregister_netdevice_notifier(&afiucv_netdev_notifier); + } + + unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); From d999896bec7cd5cedb3d9afc4ea1ff6db041be5b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 20 Jun 2019 12:20:02 +0300 Subject: [PATCH 2660/3715] ASoC: ti: davinci-mcasp: Fix slot mask settings when using multiple AXRs [ Upstream commit fd14f4436fd47d5418023c90e933e66d3645552e ] If multiple serializers are connected in the system and the number of channels will need to use more than one serializer the mask to enable the serializers were left to 0 if tdm_mask is provided Fixes: dd55ff8346a97 ("ASoC: davinci-mcasp: Add set_tdm_slots() support") Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/davinci/davinci-mcasp.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 07bac9ea65c4..e10e03800cce 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -882,14 +882,13 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream, active_slots = hweight32(mcasp->tdm_mask[stream]); active_serializers = (channels + active_slots - 1) / active_slots; - if (active_serializers == 1) { + if (active_serializers == 1) active_slots = channels; - for (i = 0; i < total_slots; i++) { - if ((1 << i) & mcasp->tdm_mask[stream]) { - mask |= (1 << i); - if (--active_slots <= 0) - break; - } + for (i = 0; i < total_slots; i++) { + if ((1 << i) & mcasp->tdm_mask[stream]) { + mask |= (1 << i); + if (--active_slots <= 0) + break; } } } else { From 59010f8ca3b284b9da05f5c2721a95b5275ce639 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 4 Jun 2019 12:23:35 +0800 Subject: [PATCH 2661/3715] rtc: pcf8563: Fix interrupt trigger method [ Upstream commit 65f662cbf829834fa4d94190eb7691e5a9cb92d8 ] The PCF8563 datasheet says the interrupt line is active low and stays active until the events are cleared, i.e. a level trigger interrupt. Fix the flags used to request the interrupt. Fixes: ede3e9d47cca ("drivers/rtc/rtc-pcf8563.c: add alarm support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 8c836c51a508..ef04472dde1d 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -605,7 +605,7 @@ static int pcf8563_probe(struct i2c_client *client, if (client->irq > 0) { err = devm_request_threaded_irq(&client->dev, client->irq, NULL, pcf8563_irq, - IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING, + IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW, pcf8563_driver.driver.name, client); if (err) { dev_err(&client->dev, "unable to request IRQ %d\n", From 9a473d3bd98c7c1f717f5bdaab64b74e5b1e8b1b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 4 Jun 2019 12:23:36 +0800 Subject: [PATCH 2662/3715] rtc: pcf8563: Clear event flags and disable interrupts before requesting irq [ Upstream commit 3572e8aea3bf925dac1dbf86127657c39fe5c254 ] Besides the alarm, the PCF8563 also has a timer triggered interrupt. In cases where the previous system left the timer and interrupts on, or somehow the bits got enabled, the interrupt would keep triggering as the kernel doesn't know about it. Clear both the alarm and timer event flags, and disable the interrupts, before requesting the interrupt line. Fixes: ede3e9d47cca ("drivers/rtc/rtc-pcf8563.c: add alarm support") Fixes: a45d528aab8b ("rtc: pcf8563: clear expired alarm at boot time") Signed-off-by: Chen-Yu Tsai Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf8563.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index ef04472dde1d..4d0b81f9805f 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -563,7 +563,6 @@ static int pcf8563_probe(struct i2c_client *client, struct pcf8563 *pcf8563; int err; unsigned char buf; - unsigned char alm_pending; dev_dbg(&client->dev, "%s\n", __func__); @@ -587,13 +586,13 @@ static int pcf8563_probe(struct i2c_client *client, return err; } - err = pcf8563_get_alarm_mode(client, NULL, &alm_pending); - if (err) { - dev_err(&client->dev, "%s: read error\n", __func__); + /* Clear flags and disable interrupts */ + buf = 0; + err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, &buf); + if (err < 0) { + dev_err(&client->dev, "%s: write error\n", __func__); return err; } - if (alm_pending) - pcf8563_set_alarm_mode(client, 0); pcf8563->rtc = devm_rtc_device_register(&client->dev, pcf8563_driver.driver.name, From bc99903915ab5d06368c97058e017ae4580b13e5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 24 Jun 2019 09:09:47 -0700 Subject: [PATCH 2663/3715] drm/msm/a3xx: remove TPL1 regs from snapshot [ Upstream commit f47bee2ba447bebc304111c16ef1e1a73a9744dd ] These regs are write-only, and the hw throws a hissy-fit (ie. reboots) when we try to read them for GPU state snapshot, in response to a GPU hang. It is rather impolite when GPU recovery triggers an insta- reboot, so lets remove the TPL1 registers from the snapshot. Fixes: 7198e6b03155 drm/msm: add a3xx gpu support Signed-off-by: Rob Clark Reviewed-by: Jordan Crouse Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7791313405b5..c8671b1578c6 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -394,19 +394,17 @@ static const unsigned int a3xx_registers[] = { 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, - 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356, - 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, - 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, - 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, - 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, - 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, - 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, - 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, - 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, - 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749, - 0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d, - 0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036, - 0x303c, 0x303c, 0x305e, 0x305f, + 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444, + 0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, + 0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, + 0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, + 0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, + 0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, + 0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, + 0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, + 0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, + 0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d, + 0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f, ~0 /* sentinel */ }; From 232ac0764ac76ff4f8eef9a8c24a661dbd0a6382 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 4 Jun 2019 09:59:53 +0530 Subject: [PATCH 2664/3715] perf/ioctl: Add check for the sample_period value [ Upstream commit 913a90bc5a3a06b1f04c337320e9aeee2328dd77 ] perf_event_open() limits the sample_period to 63 bits. See: 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period to 63 bits") Make ioctl() consistent with it. Also on PowerPC, negative sample_period could cause a recursive PMIs leading to a hang (reported when running perf-fuzzer). Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: maddy@linux.vnet.ibm.com Cc: mpe@ellerman.id.au Fixes: 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period to 63 bits") Link: https://lkml.kernel.org/r/20190604042953.914-1-ravi.bangoria@linux.ibm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ea4f3f7a0c6f..2ac73b4cb8a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4762,6 +4762,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; From f08cd3c7db421905dd1c15b2b2f9a4d0a3c5ab8f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 13 Jun 2019 16:32:32 +0300 Subject: [PATCH 2665/3715] dmaengine: hsu: Revert "set HSU_CH_MTSR to memory width" [ Upstream commit c24a5c735f87d0549060de31367c095e8810b895 ] The commit 080edf75d337 ("dmaengine: hsu: set HSU_CH_MTSR to memory width") has been mistakenly submitted. The further investigations show that the original code does better job since the memory side transfer size has never been configured by DMA users. As per latest revision of documentation: "Channel minimum transfer size (CHnMTSR)... For IOSF UART, maximum value that can be programmed is 64 and minimum value that can be programmed is 1." This reverts commit 080edf75d337d35faa6fc3df99342b10d2848d16. Fixes: 080edf75d337 ("dmaengine: hsu: set HSU_CH_MTSR to memory width") Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/hsu/hsu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index 29d04ca71d52..15525a2b8ebd 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -64,10 +64,10 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) if (hsuc->direction == DMA_MEM_TO_DEV) { bsr = config->dst_maxburst; - mtsr = config->src_addr_width; + mtsr = config->dst_addr_width; } else if (hsuc->direction == DMA_DEV_TO_MEM) { bsr = config->src_maxburst; - mtsr = config->dst_addr_width; + mtsr = config->src_addr_width; } hsu_chan_disable(hsuc); From de2970e92948b7de60ba41f36b05aa1740e77b6a Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Tue, 11 Jun 2019 14:11:34 -0700 Subject: [PATCH 2666/3715] clk: qcom: Fix -Wunused-const-variable [ Upstream commit da642427bd7710ec4f4140f693f59aa8521a358c ] Clang produces the following warning drivers/clk/qcom/gcc-msm8996.c:133:32: warning: unused variable 'gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div_map' [-Wunused-const-variable] static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div_map[] = { ^drivers/clk/qcom/gcc-msm8996.c:141:27: warning: unused variable 'gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div' [-Wunused-const-variable] static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div[] = { ^ drivers/clk/qcom/gcc-msm8996.c:187:32: warning: unused variable 'gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div_map' [-Wunused-const-variable] static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div_map[] = { ^ drivers/clk/qcom/gcc-msm8996.c:197:27: warning: unused variable 'gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div' [-Wunused-const-variable] static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div[] = { It looks like these were never used. Fixes: b1e010c0730a ("clk: qcom: Add MSM8996 Global Clock Control (GCC) driver") Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/518 Suggested-by: Nathan Chancellor Signed-off-by: Nathan Huckleberry Reviewed-by: Nathan Chancellor Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-msm8996.c | 36 ---------------------------------- 1 file changed, 36 deletions(-) diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c index 7ddec886fcd3..c0b043b1bd24 100644 --- a/drivers/clk/qcom/gcc-msm8996.c +++ b/drivers/clk/qcom/gcc-msm8996.c @@ -140,22 +140,6 @@ static const char * const gcc_xo_gpll0_gpll4_gpll0_early_div[] = { "gpll0_early_div" }; -static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div_map[] = { - { P_XO, 0 }, - { P_GPLL0, 1 }, - { P_GPLL2, 2 }, - { P_GPLL3, 3 }, - { P_GPLL0_EARLY_DIV, 6 } -}; - -static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div[] = { - "xo", - "gpll0", - "gpll2", - "gpll3", - "gpll0_early_div" -}; - static const struct parent_map gcc_xo_gpll0_gpll1_early_div_gpll1_gpll4_gpll0_early_div_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, @@ -194,26 +178,6 @@ static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll2_early_gpll0_early "gpll0_early_div" }; -static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div_map[] = { - { P_XO, 0 }, - { P_GPLL0, 1 }, - { P_GPLL2, 2 }, - { P_GPLL3, 3 }, - { P_GPLL1, 4 }, - { P_GPLL4, 5 }, - { P_GPLL0_EARLY_DIV, 6 } -}; - -static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div[] = { - "xo", - "gpll0", - "gpll2", - "gpll3", - "gpll1", - "gpll4", - "gpll0_early_div" -}; - static struct clk_fixed_factor xo = { .mult = 1, .div = 1, From 383019e9d2dcb8e8e0cc67f5f0c8552ff8ad67b1 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 26 Jun 2019 11:27:29 +0100 Subject: [PATCH 2667/3715] nvmem: imx-ocotp: Ensure WAIT bits are preserved when setting timing [ Upstream commit 0493c4792b4eb260441e57f52cc11a9ded48b5a7 ] The i.MX6 and i.MX8 both have a bit-field spanning bits 27:22 called the WAIT field. The WAIT field according to the documentation for both parts "specifies time interval between auto read and write access in one time program. It is given in number of ipg_clk periods." This patch ensures that the relevant field is read and written back to the timing register. Fixes: 0642bac7da42 ("nvmem: imx-ocotp: add write support") Signed-off-by: Bryan O'Donoghue Reviewed-by: Leonard Crestez Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/nvmem/imx-ocotp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 193ca8fd350a..0c8c3b9bb6a7 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -199,7 +199,8 @@ static int imx_ocotp_write(void *context, unsigned int offset, void *val, strobe_prog = clk_rate / (1000000000 / 10000) + 2 * (DEF_RELAX + 1) - 1; strobe_read = clk_rate / (1000000000 / 40) + 2 * (DEF_RELAX + 1) - 1; - timing = strobe_prog & 0x00000FFF; + timing = readl(priv->base + IMX_OCOTP_ADDR_TIMING) & 0x0FC00000; + timing |= strobe_prog & 0x00000FFF; timing |= (relax << 12) & 0x0000F000; timing |= (strobe_read << 16) & 0x003F0000; From 46942fb18681b3d70441353af7bb39d856ab931f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 29 Jun 2019 11:16:45 -0400 Subject: [PATCH 2668/3715] bnxt_en: Fix ethtool selftest crash under error conditions. [ Upstream commit d27e2ca1166aefd54d9c48fb6647dee8115a5dfc ] After ethtool loopback packet tests, we re-open the nic for the next IRQ test. If the open fails, we must not proceed with the IRQ test or we will crash with NULL pointer dereference. Fix it by checking the bnxt_open_nic() return code before proceeding. Reported-by: Somasundaram Krishnasamy Fixes: 67fea463fd87 ("bnxt_en: Add interrupt test to ethtool -t selftest.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fc8e185718a1..963beaa8fabb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2463,7 +2463,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bool offline = false; u8 test_results = 0; u8 test_mask = 0; - int rc, i; + int rc = 0, i; if (!bp->num_tests || !BNXT_SINGLE_PF(bp)) return; @@ -2521,9 +2521,9 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } bnxt_hwrm_phy_loopback(bp, false); bnxt_half_close_nic(bp); - bnxt_open_nic(bp, false, true); + rc = bnxt_open_nic(bp, false, true); } - if (bnxt_test_irq(bp)) { + if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; etest->flags |= ETH_TEST_FL_FAILED; } From be4a793b10fd11c2a519d6222799b53d8af2c05d Mon Sep 17 00:00:00 2001 From: Kevin Mitchell Date: Wed, 12 Jun 2019 14:52:03 -0700 Subject: [PATCH 2669/3715] iommu/amd: Make iommu_disable safer [ Upstream commit 3ddbe913e55516d3e2165d43d4d5570761769878 ] Make it safe to call iommu_disable during early init error conditions before mmio_base is set, but after the struct amd_iommu has been added to the amd_iommu_list. For example, this happens if firmware fails to fill in mmio_phys in the ACPI table leading to a NULL pointer dereference in iommu_feature_disable. Fixes: 2c0ae1720c09c ('iommu/amd: Convert iommu initialization to state machine') Signed-off-by: Kevin Mitchell Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6a3cf4d0bd5e..4d2920988d60 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -420,6 +420,9 @@ static void iommu_enable(struct amd_iommu *iommu) static void iommu_disable(struct amd_iommu *iommu) { + if (!iommu->mmio_base) + return; + /* Disable command buffer */ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); From 92cbf173ea692cd2111d3cedebe408a68a96d0f6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Jun 2019 15:56:31 +0300 Subject: [PATCH 2670/3715] mfd: intel-lpss: Release IDA resources [ Upstream commit 02f36911c1b41fcd8779fa0c135aab0554333fa5 ] ida instances allocate some internal memory for ->free_bitmap in addition to the base 'struct ida'. Use ida_destroy() to release that memory at module_exit(). Fixes: 4b45efe85263 ("mfd: Add support for Intel Sunrisepoint LPSS devices") Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/intel-lpss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index b5c4f8f974aa..9ed573e232c0 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -541,6 +541,7 @@ module_init(intel_lpss_init); static void __exit intel_lpss_exit(void) { + ida_destroy(&intel_lpss_devid_ida); debugfs_remove(intel_lpss_debugfs); } module_exit(intel_lpss_exit); From 37624e6350ed8f773e814311e482fbe6c4d77ca2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2019 15:55:28 +0100 Subject: [PATCH 2671/3715] rxrpc: Fix uninitialized error code in rxrpc_send_data_packet() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3427beb6375d04e9627c67343872e79341a684ea ] With gcc 4.1: net/rxrpc/output.c: In function ‘rxrpc_send_data_packet’: net/rxrpc/output.c:338: warning: ‘ret’ may be used uninitialized in this function Indeed, if the first jump to the send_fragmentable label is made, and the address family is not handled in the switch() statement, ret will be used uninitialized. Fix this by BUG()'ing as is done in other places in rxrpc where internal support for future address families will need adding. It should not be possible to reach this normally as the address families are checked up-front. Fixes: 5a924b8951f835b5 ("rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs") Reported-by: Geert Uytterhoeven Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5b67cb5d47f0..edddbacf33bc 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -404,6 +404,9 @@ send_fragmentable: } break; #endif + + default: + BUG(); } up_write(&conn->params.local->defrag_sem); From 97464364e3e1642315a9f194258b50e6781be139 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jun 2019 16:59:45 +0200 Subject: [PATCH 2672/3715] devres: allow const resource arguments [ Upstream commit 9dea44c91469512d346e638694c22c30a5273992 ] devm_ioremap_resource() does not currently take 'const' arguments, which results in a warning from the first driver trying to do it anyway: drivers/gpio/gpio-amd-fch.c: In function 'amd_fch_gpio_probe': drivers/gpio/gpio-amd-fch.c:171:49: error: passing argument 2 of 'devm_ioremap_resource' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] priv->base = devm_ioremap_resource(&pdev->dev, &amd_fch_gpio_iores); ^~~~~~~~~~~~~~~~~~~ Change the prototype to allow it, as there is no real reason not to. Fixes: 9bb2e0452508 ("gpio: amd: Make resource struct const") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190628150049.1108048-1-arnd@arndb.de Acked-by: Greg Kroah-Hartman Reviwed-By: Enrico Weigelt Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- include/linux/device.h | 3 ++- lib/devres.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 66fe271c2544..0b2e67014a83 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -682,7 +682,8 @@ extern unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); extern void devm_free_pages(struct device *dev, unsigned long addr); -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res); /* allows to add/remove a custom action to devres stack */ int devm_add_action(struct device *dev, void (*action)(void *), void *data); diff --git a/lib/devres.c b/lib/devres.c index 5f2aedd58bc5..40a8b12a8b6b 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -132,7 +132,8 @@ EXPORT_SYMBOL(devm_iounmap); * if (IS_ERR(base)) * return PTR_ERR(base); */ -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res) { resource_size_t size; const char *name; From 192a3c131e58c13987dfdb8e5c4a5d2d03114dd4 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 24 Jun 2019 19:47:51 +0800 Subject: [PATCH 2673/3715] RDMA/hns: Fixs hw access invalid dma memory error [ Upstream commit ec5bc2cc69b4fc494e04d10fc5226f6f9cf67c56 ] When smmu is enable, if execute the perftest command and then use 'kill -9' to exit, follow this operation repeatedly, the kernel will have a high probability to print the following smmu event: arm-smmu-v3 arm-smmu-v3.1.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.1.auto: 0x00007d0000000010 arm-smmu-v3 arm-smmu-v3.1.auto: 0x0000020900000080 arm-smmu-v3 arm-smmu-v3.1.auto: 0x00000000f47cf000 arm-smmu-v3 arm-smmu-v3.1.auto: 0x00000000f47cf000 This is because the hw will periodically refresh the qpc cache until the next reset. This patch fixed it by removing the action that release qpc memory in the 'hns_roce_qp_free' function. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Xi Wang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 3a37d26889df..281e9987ffc8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -241,7 +241,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); - hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); } } From 4b8e04d87ab02f5769b0912780cf0c198c4257e9 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 6 Jul 2019 12:23:41 +0800 Subject: [PATCH 2674/3715] net: pasemi: fix an use-after-free in pasemi_mac_phy_init() [ Upstream commit faf5577f2498cea23011b5c785ef853ded22700b ] The phy_dn variable is still being used in of_phy_connect() after the of_node_put() call, which may result in use-after-free. Fixes: 1dd2d06c0459 ("net: Rework pasemi_mac driver to use of_mdio infrastructure") Signed-off-by: Wen Yang Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Luis Chamberlain Cc: Michael Ellerman Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pasemi/pasemi_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 49591d9c2e1b..c9b4ac9d3330 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1053,7 +1053,6 @@ static int pasemi_mac_phy_init(struct net_device *dev) dn = pci_device_to_OF_node(mac->pdev); phy_dn = of_parse_phandle(dn, "phy-handle", 0); - of_node_put(phy_dn); mac->link = 0; mac->speed = 0; @@ -1062,6 +1061,7 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); + of_node_put(phy_dn); if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); return -ENODEV; From 4077713e39ed3b1672fa833375453800c431b6c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 2 Jul 2019 10:18:35 +0100 Subject: [PATCH 2675/3715] scsi: libfc: fix null pointer dereference on a null lport [ Upstream commit 41a6bf6529edd10a6def42e3b2c34a7474bcc2f5 ] Currently if lport is null then the null lport pointer is dereference when printing out debug via the FC_LPORT_DB macro. Fix this by using the more generic FC_LIBFC_DBG debug macro instead that does not use lport. Addresses-Coverity: ("Dereference after null check") Fixes: 7414705ea4ae ("libfc: Add runtime debugging with debug_logging module parameter") Signed-off-by: Colin Ian King Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_exch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 42bcf7f3a0f9..6ba257cbc6d9 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -2603,7 +2603,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp) /* lport lock ? */ if (!lport || lport->state == LPORT_ST_DISABLED) { - FC_LPORT_DBG(lport, "Receiving frames for an lport that " + FC_LIBFC_DBG("Receiving frames for an lport that " "has not been initialized correctly\n"); fc_frame_free(fp); return; From ed45a2de36f8ecadebabe6cbc09607c9fc144aef Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 13 Jul 2019 11:46:28 +0800 Subject: [PATCH 2676/3715] clk: sunxi-ng: v3s: add the missing PLL_DDR1 [ Upstream commit c5ed9475c22c89d5409402055142372e35d26a3f ] The user manual of V3/V3s/S3 declares a PLL_DDR1, however it's forgot when developing the V3s CCU driver. Add back the missing PLL_DDR1. Fixes: d0f11d14b0bc ("clk: sunxi-ng: add support for V3s CCU") Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 19 +++++++++++++++---- drivers/clk/sunxi-ng/ccu-sun8i-v3s.h | 6 ++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index 9e3f4088724b..c7f9d974b10d 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -84,7 +84,7 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve", BIT(28), /* lock */ 0); -static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr", +static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr0_clk, "pll-ddr0", "osc24M", 0x020, 8, 5, /* N */ 4, 2, /* K */ @@ -123,6 +123,14 @@ static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph1_clk, "pll-periph1", 2, /* post-div */ 0); +static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1", + "osc24M", 0x04c, + 8, 7, /* N */ + 0, 2, /* M */ + BIT(31), /* gate */ + BIT(28), /* lock */ + 0); + static const char * const cpu_parents[] = { "osc32k", "osc24M", "pll-cpu", "pll-cpu" }; static SUNXI_CCU_MUX(cpu_clk, "cpu", cpu_parents, @@ -310,7 +318,8 @@ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", static SUNXI_CCU_GATE(usb_ohci0_clk, "usb-ohci0", "osc24M", 0x0cc, BIT(16), 0); -static const char * const dram_parents[] = { "pll-ddr", "pll-periph0-2x" }; +static const char * const dram_parents[] = { "pll-ddr0", "pll-ddr1", + "pll-periph0-2x" }; static SUNXI_CCU_M_WITH_MUX(dram_clk, "dram", dram_parents, 0x0f4, 0, 4, 20, 2, CLK_IS_CRITICAL); @@ -369,10 +378,11 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = { &pll_audio_base_clk.common, &pll_video_clk.common, &pll_ve_clk.common, - &pll_ddr_clk.common, + &pll_ddr0_clk.common, &pll_periph0_clk.common, &pll_isp_clk.common, &pll_periph1_clk.common, + &pll_ddr1_clk.common, &cpu_clk.common, &axi_clk.common, &ahb1_clk.common, @@ -457,11 +467,12 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = { [CLK_PLL_AUDIO_8X] = &pll_audio_8x_clk.hw, [CLK_PLL_VIDEO] = &pll_video_clk.common.hw, [CLK_PLL_VE] = &pll_ve_clk.common.hw, - [CLK_PLL_DDR] = &pll_ddr_clk.common.hw, + [CLK_PLL_DDR0] = &pll_ddr0_clk.common.hw, [CLK_PLL_PERIPH0] = &pll_periph0_clk.common.hw, [CLK_PLL_PERIPH0_2X] = &pll_periph0_2x_clk.hw, [CLK_PLL_ISP] = &pll_isp_clk.common.hw, [CLK_PLL_PERIPH1] = &pll_periph1_clk.common.hw, + [CLK_PLL_DDR1] = &pll_ddr1_clk.common.hw, [CLK_CPU] = &cpu_clk.common.hw, [CLK_AXI] = &axi_clk.common.hw, [CLK_AHB1] = &ahb1_clk.common.hw, diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h index 4a4d36fdad96..a091b7217dfd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h @@ -29,7 +29,7 @@ #define CLK_PLL_AUDIO_8X 5 #define CLK_PLL_VIDEO 6 #define CLK_PLL_VE 7 -#define CLK_PLL_DDR 8 +#define CLK_PLL_DDR0 8 #define CLK_PLL_PERIPH0 9 #define CLK_PLL_PERIPH0_2X 10 #define CLK_PLL_ISP 11 @@ -58,6 +58,8 @@ /* And the GPU module clock is exported */ -#define CLK_NUMBER (CLK_MIPI_CSI + 1) +#define CLK_PLL_DDR1 74 + +#define CLK_NUMBER (CLK_PLL_DDR1 + 1) #endif /* _CCU_SUN8I_H3_H_ */ From 07f604c86825735985d4f1c8a4bd01e76132baa1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 13:03:48 +0200 Subject: [PATCH 2677/3715] PM: sleep: Fix possible overflow in pm_system_cancel_wakeup() [ Upstream commit 2933954b71f10d392764f95eec0f0aa2d103054b ] It is not actually guaranteed that pm_abort_suspend will be nonzero when pm_system_cancel_wakeup() is called which may lead to subtle issues, so make it use atomic_dec_if_positive() instead of atomic_dec() for the safety sake. Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle") Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/base/power/wakeup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index df53e2b3296b..877b2a1767a5 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -877,7 +877,7 @@ EXPORT_SYMBOL_GPL(pm_system_wakeup); void pm_system_cancel_wakeup(void) { - atomic_dec(&pm_abort_suspend); + atomic_dec_if_positive(&pm_abort_suspend); } void pm_wakeup_clear(bool reset) From cade65b0a4545b71843dcea2764931f64c5e24bf Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Jul 2019 22:42:18 +0800 Subject: [PATCH 2678/3715] libertas_tf: Use correct channel range in lbtf_geo_init [ Upstream commit 2ec4ad49b98e4a14147d04f914717135eca7c8b1 ] It seems we should use 'range' instead of 'priv->range' in lbtf_geo_init(), because 'range' is the corret one related to current regioncode. Reported-by: Hulk Robot Fixes: 691cdb49388b ("libertas_tf: command helper functions for libertas_tf") Signed-off-by: YueHaibing Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas_tf/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c index 909ac3685010..2b193f1257a5 100644 --- a/drivers/net/wireless/marvell/libertas_tf/cmd.c +++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c @@ -69,7 +69,7 @@ static void lbtf_geo_init(struct lbtf_private *priv) break; } - for (ch = priv->range.start; ch < priv->range.end; ch++) + for (ch = range->start; ch < range->end; ch++) priv->channels[CHAN_TO_IDX(ch)].flags = 0; } From 8f08d7d4acf11febbb125489c0e123c1d22e8744 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 17:01:23 +0200 Subject: [PATCH 2679/3715] qed: reduce maximum stack frame size [ Upstream commit 7c116e02a4a7575c8c62bfd2106e3e3ec8fb99dc ] clang warns about an overly large stack frame in one function when it decides to inline all __qed_get_vport_*() functions into __qed_get_vport_stats(): drivers/net/ethernet/qlogic/qed/qed_l2.c:1889:13: error: stack frame size of 1128 bytes in function '_qed_get_vport_stats' [-Werror,-Wframe-larger-than=] Use a noinline_for_stack annotation to prevent clang from inlining these, which keeps the maximum stack usage at around half of that in the worst case, similar to what we get with gcc. Fixes: 86622ee75312 ("qed: Move statistics to L2 code") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 34 +++++++++++------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 62cde3854a5c..5d7adedac68d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1629,10 +1629,9 @@ static void __qed_get_vport_pstats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_pstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_pstorm_per_queue_stat pstats; u32 pstats_addr = 0, pstats_len = 0; @@ -1659,10 +1658,9 @@ static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn, HILO_64_REGPAIR(pstats.error_drop_pkts); } -static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_tstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct tstorm_per_port_stat tstats; u32 tstats_addr, tstats_len; @@ -1705,10 +1703,9 @@ static void __qed_get_vport_ustats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack +void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_ustorm_per_queue_stat ustats; u32 ustats_addr = 0, ustats_len = 0; @@ -1747,10 +1744,9 @@ static void __qed_get_vport_mstats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_mstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_mstorm_per_queue_stat mstats; u32 mstats_addr = 0, mstats_len = 0; @@ -1776,9 +1772,9 @@ static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn, HILO_64_REGPAIR(mstats.tpa_coalesced_bytes); } -static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats) +static noinline_for_stack void +__qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats) { struct qed_eth_stats_common *p_common = &p_stats->common; struct port_stats port_stats; From ccb5bd32b590aa439a8376228d0b6b6f883d69d8 Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Sun, 7 Jul 2019 15:17:19 +0300 Subject: [PATCH 2680/3715] usb: host: xhci-hub: fix extra endianness conversion [ Upstream commit 6269e4c76eacabaea0d0099200ae1a455768d208 ] Don't do extra cpu_to_le32 conversion for put_unaligned_le32 because it is already implemented in this function. Fixes sparse error: xhci-hub.c:1152:44: warning: incorrect type in argument 1 (different base types) xhci-hub.c:1152:44: expected unsigned int [usertype] val xhci-hub.c:1152:44: got restricted __le32 [usertype] Fixes: 395f540 "xhci: support new USB 3.1 hub request to get extended port status" Cc: Mathias Nyman Signed-off-by: Ruslan Bilovol Link: https://lore.kernel.org/r/1562501839-26522-1-git-send-email-ruslan.bilovol@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d1363f3fabfa..3bb38d9dc45b 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1118,7 +1118,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } port_li = readl(port_array[wIndex] + PORTLI); status = xhci_get_ext_port_status(temp, port_li); - put_unaligned_le32(cpu_to_le32(status), &buf[4]); + put_unaligned_le32(status, &buf[4]); } break; case SetPortFeature: From c8d45c212e5066ec20efca6dbe47f88096c33161 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jul 2019 11:24:09 +0200 Subject: [PATCH 2681/3715] mic: avoid statically declaring a 'struct device'. [ Upstream commit bc83f79bd2119230888fb8574639d5a51b38f903 ] Generally, declaring a platform device as a static variable is a bad idea and can cause all kinds of problems, in particular with the DMA configuration and lifetime rules. A specific problem we hit here is from a bug in clang that warns about certain (otherwise valid) macros when used in static variables: drivers/misc/mic/card/mic_x100.c:285:27: warning: shift count >= width of type [-Wshift-count-overflow] static u64 mic_dma_mask = DMA_BIT_MASK(64); ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) ^ ~~~ A slightly better way here is to create the platform device dynamically and set the dma mask in the probe function. This avoids the warning and some other problems, but is still not ideal because the device creation should really be separated from the driver, and the fact that the device has no parent means we have to force the dma mask rather than having it set up from the bus that the device is actually on. Fixes: dd8d8d44df64 ("misc: mic: MIC card driver specific changes to enable SCIF") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190712092426.872625-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/mic/card/mic_x100.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c index b9f0710ffa6b..4007adc666f3 100644 --- a/drivers/misc/mic/card/mic_x100.c +++ b/drivers/misc/mic/card/mic_x100.c @@ -249,6 +249,9 @@ static int __init mic_probe(struct platform_device *pdev) mdrv->dev = &pdev->dev; snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name); + /* FIXME: use dma_set_mask_and_coherent() and check result */ + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + mdev->mmio.pa = MIC_X100_MMIO_BASE; mdev->mmio.len = MIC_X100_MMIO_LEN; mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE, @@ -294,18 +297,6 @@ static void mic_platform_shutdown(struct platform_device *pdev) mic_remove(pdev); } -static u64 mic_dma_mask = DMA_BIT_MASK(64); - -static struct platform_device mic_platform_dev = { - .name = mic_driver_name, - .id = 0, - .num_resources = 0, - .dev = { - .dma_mask = &mic_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), - }, -}; - static struct platform_driver __refdata mic_platform_driver = { .probe = mic_probe, .remove = mic_remove, @@ -315,6 +306,8 @@ static struct platform_driver __refdata mic_platform_driver = { }, }; +static struct platform_device *mic_platform_dev; + static int __init mic_init(void) { int ret; @@ -328,9 +321,12 @@ static int __init mic_init(void) request_module("mic_x100_dma"); mic_init_card_debugfs(); - ret = platform_device_register(&mic_platform_dev); + + mic_platform_dev = platform_device_register_simple(mic_driver_name, + 0, NULL, 0); + ret = PTR_ERR_OR_ZERO(mic_platform_dev); if (ret) { - pr_err("platform_device_register ret %d\n", ret); + pr_err("platform_device_register_full ret %d\n", ret); goto cleanup_debugfs; } ret = platform_driver_register(&mic_platform_driver); @@ -341,7 +337,7 @@ static int __init mic_init(void) return ret; device_unregister: - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); cleanup_debugfs: mic_exit_card_debugfs(); done: @@ -351,7 +347,7 @@ done: static void __exit mic_exit(void) { platform_driver_unregister(&mic_platform_driver); - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); mic_exit_card_debugfs(); } From 8d3b98c7a9b6019ef93c3838db68f31f4d3c5f60 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Jul 2019 20:47:06 +0200 Subject: [PATCH 2682/3715] x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI [ Upstream commit 2591bc4e8d70b4e1330d327fb7e3921f4e070a51 ] apic->send_IPI_allbutself() takes a vector number as argument. APIC_DM_NMI is clearly not a vector number. It's defined to 0x400 which is outside the vector space. Use NMI_VECTOR instead as that's what it is intended to be. Fixes: 82da3ff89dc2 ("x86: kgdb support") Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190722105218.855189979@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8e36f249646e..904e18bb38c5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -438,7 +438,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) */ void kgdb_roundup_cpus(unsigned long flags) { - apic->send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(NMI_VECTOR); } #endif From c877153dc53dc4abf4d85ce587735bb284fd7f28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jul 2019 10:59:24 +0200 Subject: [PATCH 2683/3715] crypto: ccp - Reduce maximum stack usage [ Upstream commit 72c8117adfced37df101c8c0b3f363e0906f83f0 ] Each of the operations in ccp_run_cmd() needs several hundred bytes of kernel stack. Depending on the inlining, these may need separate stack slots that add up to more than the warning limit, as shown in this clang based build: drivers/crypto/ccp/ccp-ops.c:871:12: error: stack frame size of 1164 bytes in function 'ccp_run_aes_cmd' [-Werror,-Wframe-larger-than=] static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) The problem may also happen when there is no warning, e.g. in the ccp_run_cmd()->ccp_run_aes_cmd()->ccp_run_aes_gcm_cmd() call chain with over 2000 bytes. Mark each individual function as 'noinline_for_stack' to prevent this from happening, and move the calls to the two special cases for aes into the top-level function. This will keep the actual combined stack usage to the mimimum: 828 bytes for ccp_run_aes_gcm_cmd() and at most 524 bytes for each of the other cases. Fixes: 63b945091a07 ("crypto: ccp - CCP device driver and interface support") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccp/ccp-ops.c | 52 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 4b48b8523a40..330853a2702f 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -458,8 +458,8 @@ static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q, return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true); } -static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx; @@ -614,8 +614,8 @@ e_key: return ret; } -static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx, final_wa, tag; @@ -897,7 +897,8 @@ e_key: return ret; } -static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx; @@ -907,12 +908,6 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) bool in_place = false; int ret; - if (aes->mode == CCP_AES_MODE_CMAC) - return ccp_run_aes_cmac_cmd(cmd_q, cmd); - - if (aes->mode == CCP_AES_MODE_GCM) - return ccp_run_aes_gcm_cmd(cmd_q, cmd); - if (!((aes->key_len == AES_KEYSIZE_128) || (aes->key_len == AES_KEYSIZE_192) || (aes->key_len == AES_KEYSIZE_256))) @@ -1080,8 +1075,8 @@ e_key: return ret; } -static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_xts_aes_engine *xts = &cmd->u.xts; struct ccp_dm_workarea key, ctx; @@ -1280,7 +1275,8 @@ e_key: return ret; } -static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_des3_engine *des3 = &cmd->u.des3; @@ -1476,7 +1472,8 @@ e_key: return ret; } -static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_sha_engine *sha = &cmd->u.sha; struct ccp_dm_workarea ctx; @@ -1820,7 +1817,8 @@ e_ctx: return ret; } -static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_rsa_engine *rsa = &cmd->u.rsa; struct ccp_dm_workarea exp, src, dst; @@ -1951,8 +1949,8 @@ e_sb: return ret; } -static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_passthru_engine *pt = &cmd->u.passthru; struct ccp_dm_workarea mask; @@ -2083,7 +2081,8 @@ e_mask: return ret; } -static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, +static noinline_for_stack int +ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; @@ -2424,7 +2423,8 @@ e_src: return ret; } -static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_ecc_engine *ecc = &cmd->u.ecc; @@ -2461,7 +2461,17 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) switch (cmd->engine) { case CCP_ENGINE_AES: - ret = ccp_run_aes_cmd(cmd_q, cmd); + switch (cmd->u.aes.mode) { + case CCP_AES_MODE_CMAC: + ret = ccp_run_aes_cmac_cmd(cmd_q, cmd); + break; + case CCP_AES_MODE_GCM: + ret = ccp_run_aes_gcm_cmd(cmd_q, cmd); + break; + default: + ret = ccp_run_aes_cmd(cmd_q, cmd); + break; + } break; case CCP_ENGINE_XTS_AES_128: ret = ccp_run_xts_aes_cmd(cmd_q, cmd); From 183eebd4efa7904e6d9e95b8e482f2969f743f66 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jul 2019 09:14:22 +0200 Subject: [PATCH 2684/3715] ALSA: aoa: onyx: always initialize register read value [ Upstream commit f474808acb3c4b30552d9c59b181244e0300d218 ] A lot of places in the driver use onyx_read_register() without checking the return value, and it's been working OK for ~10 years or so, so probably never fails ... Rather than trying to check the return value everywhere, which would be relatively intrusive, at least make sure we don't use an uninitialized value. Fixes: f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Reported-by: Stephen Rothwell Signed-off-by: Johannes Berg Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/aoa/codecs/onyx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/aoa/codecs/onyx.c b/sound/aoa/codecs/onyx.c index d2d96ca082b7..6224fd3bbf7c 100644 --- a/sound/aoa/codecs/onyx.c +++ b/sound/aoa/codecs/onyx.c @@ -74,8 +74,10 @@ static int onyx_read_register(struct onyx *onyx, u8 reg, u8 *value) return 0; } v = i2c_smbus_read_byte_data(onyx->i2c, reg); - if (v < 0) + if (v < 0) { + *value = 0; return -1; + } *value = (u8)v; onyx->cache[ONYX_REG_CONTROL-FIRSTREGISTER] = *value; return 0; From 801dafc3218c23ba1ea39f409b962113604f64bc Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Tue, 30 Jul 2019 16:23:18 +0200 Subject: [PATCH 2685/3715] tipc: reduce risk of wakeup queue starvation [ Upstream commit 7c5b42055964f587e55bd87ef334c3a27e95d144 ] In commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") we allowed senders to add exactly one list of extra buffers to the link backlog queues during link congestion (aka "oversubscription"). However, the criteria for when to stop adding wakeup messages to the input queue when the overload abates is inaccurate, and may cause starvation problems during very high load. Currently, we stop adding wakeup messages after 10 total failed attempts where we find that there is no space left in the backlog queue for a certain importance level. The counter for this is accumulated across all levels, which may lead the algorithm to leave the loop prematurely, although there may still be plenty of space available at some levels. The result is sometimes that messages near the wakeup queue tail are not added to the input queue as they should be. We now introduce a more exact algorithm, where we keep adding wakeup messages to a level as long as the backlog queue has free slots for the corresponding level, and stop at the moment there are no more such slots or when there are no more wakeup messages to dequeue. Fixes: 365ad35 ("tipc: reduce risk of user starvation during link congestion") Reported-by: Tung Nguyen Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/link.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index da749916faac..82e4e0e152d1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -811,18 +811,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + } void tipc_link_reset(struct tipc_link *l) From 9abc2bb43a47a30ea1ccbf8d27bc8d92604404bd Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 19 Jun 2019 14:29:58 +0200 Subject: [PATCH 2686/3715] ARM: dts: stm32: add missing vdda-supply to adc on stm32h743i-eval [ Upstream commit 493e84c5dc4d703d976b5875f5db22dae08a0782 ] Add missing vdda-supply required by STM32 ADC. Fixes: 090992a9ca54 ("ARM: dts: stm32: enable ADC on stm32h743i-eval board") Signed-off-by: Fabrice Gasnier Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32h743i-eval.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stm32h743i-eval.dts b/arch/arm/boot/dts/stm32h743i-eval.dts index 6c07786e7ddb..0d98b2865bd7 100644 --- a/arch/arm/boot/dts/stm32h743i-eval.dts +++ b/arch/arm/boot/dts/stm32h743i-eval.dts @@ -71,6 +71,7 @@ }; &adc_12 { + vdda-supply = <&vdda>; vref-supply = <&vdda>; status = "okay"; adc1: adc@0 { From ed1929a76537c41b212adc93013e46dd3f7b46f9 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 31 Jul 2019 14:40:13 +0300 Subject: [PATCH 2687/3715] net/mlx5: Fix mlx5_ifc_query_lag_out_bits [ Upstream commit ea77388b02270b0af8dc57f668f311235ea068f0 ] Remove the "reserved_at_40" field to match the device specification. Fixes: 84df61ebc69b ("net/mlx5: Add HW interfaces used by LAG") Signed-off-by: Mark Zhang Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- include/linux/mlx5/mlx5_ifc.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1d793d86d55f..6ffa181598e6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8671,8 +8671,6 @@ struct mlx5_ifc_query_lag_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; - struct mlx5_ifc_lagc_bits ctx; }; From f996f9ee615bebddcab67e4ebaeb5ce56956d256 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 23 Jul 2019 22:14:29 -0500 Subject: [PATCH 2688/3715] cifs: fix rmmod regression in cifs.ko caused by force_sig changes [ Upstream commit 247bc9470b1eeefc7b58cdf2c39f2866ba651509 ] Fixes: 72abe3bcf091 ("signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig") The global change from force_sig caused module unloading of cifs.ko to fail (since the cifsd process could not be killed, "rmmod cifs" now would always fail) Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Eric W. Biederman Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 51bbb1c0b71a..ed4a0352ea90 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -921,6 +921,7 @@ cifs_demultiplex_thread(void *p) mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); + allow_signal(SIGKILL); while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; From 5ea904f9954ee9072114a8d52c951b13bb8ed7bd Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 31 Jul 2019 16:08:12 +0300 Subject: [PATCH 2689/3715] crypto: caam - free resources in case caam_rng registration failed [ Upstream commit c59a1d41672a89b5cac49db1a472ff889e35a2d2 ] Check the return value of the hardware registration for caam_rng and free resources in case of failure. Fixes: e24f7c9e87d4 ("crypto: caam - hwrng support") Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/caam/caamrng.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index fde07d4ff019..ff6718a11e9e 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -353,7 +353,10 @@ static int __init caam_rng_init(void) goto free_rng_ctx; dev_info(dev, "registering rng-caam\n"); - return hwrng_register(&caam_rng); + + err = hwrng_register(&caam_rng); + if (!err) + return err; free_rng_ctx: kfree(rng_ctx); From 25f9e3e502a92782b5f9f270ba15b25490f65a77 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Aug 2019 14:29:38 -0400 Subject: [PATCH 2690/3715] ext4: set error return correctly when ext4_htree_store_dirent fails [ Upstream commit 7a14826ede1d714f0bb56de8167c0e519041eeda ] Currently when the call to ext4_htree_store_dirent fails the error return variable 'ret' is is not being set to the error code and variable count is instead, hence the error code is not being returned. Fix this by assigning ret to the error return code. Addresses-Coverity: ("Unused value") Fixes: 8af0f0822797 ("ext4: fix readdir error in the case of inline_data+dir_index") Signed-off-by: Colin Ian King Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 137c752ab985..6064bcb8572b 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1425,7 +1425,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); if (err) { - count = err; + ret = err; goto out; } count++; From 02a996446ae1b0f09ed9d006578e0ca33196f2c7 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 15 Aug 2019 17:23:00 +0800 Subject: [PATCH 2691/3715] ASoC: es8328: Fix copy-paste error in es8328_right_line_controls [ Upstream commit 630742c296341a8cfe00dfd941392025ba8dd4e8 ] It seems 'es8328_rline_enum' should be used in es8328_right_line_controls Fixes: 567e4f98922c ("ASoC: add es8328 codec driver") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20190815092300.68712-1-yuehaibing@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/es8328.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index bcdb8914ec16..e2f44fa46262 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -231,7 +231,7 @@ static const struct soc_enum es8328_rline_enum = ARRAY_SIZE(es8328_line_texts), es8328_line_texts); static const struct snd_kcontrol_new es8328_right_line_controls = - SOC_DAPM_ENUM("Route", es8328_lline_enum); + SOC_DAPM_ENUM("Route", es8328_rline_enum); /* Left Mixer */ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { From 41c4947ed34f7d9e2ad460c84bd677f178e6f13c Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 15 Aug 2019 17:01:57 +0800 Subject: [PATCH 2692/3715] ASoC: cs4349: Use PM ops 'cs4349_runtime_pm' [ Upstream commit 9b4275c415acca6264a3d7f1182589959c93d530 ] sound/soc/codecs/cs4349.c:358:32: warning: cs4349_runtime_pm defined but not used [-Wunused-const-variable=] cs4349_runtime_pm ops already defined, it seems we should enable it. Reported-by: Hulk Robot Fixes: e40da86 ("ASoC: cs4349: Add support for Cirrus Logic CS4349") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20190815090157.70036-1-yuehaibing@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs4349.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs4349.c b/sound/soc/codecs/cs4349.c index 0a749c79ef57..1d38e53dc95c 100644 --- a/sound/soc/codecs/cs4349.c +++ b/sound/soc/codecs/cs4349.c @@ -380,6 +380,7 @@ static struct i2c_driver cs4349_i2c_driver = { .driver = { .name = "cs4349", .of_match_table = cs4349_of_match, + .pm = &cs4349_runtime_pm, }, .id_table = cs4349_i2c_id, .probe = cs4349_i2c_probe, From b569bda1c5c0b3edfaabf155ec5a239676f86b79 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 15 Aug 2019 17:19:20 +0800 Subject: [PATCH 2693/3715] ASoC: wm8737: Fix copy-paste error in wm8737_snd_controls [ Upstream commit 554b75bde64bcad9662530726d1483f7ef012069 ] sound/soc/codecs/wm8737.c:112:29: warning: high_3d defined but not used [-Wunused-const-variable=] 'high_3d' should be used for 3D High Cut-off. Reported-by: Hulk Robot Fixes: 2a9ae13a2641 ("ASoC: Add initial WM8737 driver") Signed-off-by: YueHaibing Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20190815091920.64480-1-yuehaibing@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8737.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8737.c b/sound/soc/codecs/wm8737.c index f0cb1c4afe3c..c5a8d758f58b 100644 --- a/sound/soc/codecs/wm8737.c +++ b/sound/soc/codecs/wm8737.c @@ -170,7 +170,7 @@ SOC_DOUBLE("Polarity Invert Switch", WM8737_ADC_CONTROL, 5, 6, 1, 0), SOC_SINGLE("3D Switch", WM8737_3D_ENHANCE, 0, 1, 0), SOC_SINGLE("3D Depth", WM8737_3D_ENHANCE, 1, 15, 0), SOC_ENUM("3D Low Cut-off", low_3d), -SOC_ENUM("3D High Cut-off", low_3d), +SOC_ENUM("3D High Cut-off", high_3d), SOC_SINGLE_TLV("3D ADC Volume", WM8737_3D_ENHANCE, 7, 1, 1, adc_tlv), SOC_SINGLE("Noise Gate Switch", WM8737_NOISE_GATE, 0, 1, 0), From 060fc2173afad7e1f13c9b47b3a2d43e2d2afe6c Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Thu, 11 Jul 2019 12:15:50 -0700 Subject: [PATCH 2694/3715] net/rds: Add a few missing rds_stat_names entries [ Upstream commit 55c70ca00c982fbc0df4c4d3e31747fb73f4ddb5 ] In a previous commit, fields were added to "struct rds_statistics" but array "rds_stat_names" was not updated accordingly. Please note the inconsistent naming of the string representations that is done in the name of compatibility with the Oracle internal code-base. s_recv_bytes_added_to_socket -> "recv_bytes_added_to_sock" s_recv_bytes_removed_from_socket -> "recv_bytes_freed_fromsock" Fixes: 192a798f5299 ("RDS: add stat for socket recv memory usage") Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rds/stats.c b/net/rds/stats.c index 73be187d389e..6bbab4d74c4f 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -76,6 +76,8 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "recv_bytes_added_to_sock", + "recv_bytes_freed_fromsock", }; void rds_stats_info_copy(struct rds_info_iterator *iter, From 1c5e0766089dceeeff6dde7dec7bb9e25d064a9c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 17 Aug 2019 17:04:49 -0400 Subject: [PATCH 2695/3715] bnxt_en: Fix handling FRAG_ERR when NVM_INSTALL_UPDATE cmd fails [ Upstream commit dd2ebf3404c7c295014bc025dea23960960ceb1a ] If FW returns FRAG_ERR in response error code, driver is resending the command only when HWRM command returns success. Fix the code to resend NVM_INSTALL_UPDATE command with DEFRAG install flags, if FW returns FRAG_ERR in its response error code. Fixes: cb4d1d626145 ("bnxt_en: Retry failed NVM_INSTALL_UPDATE with defragmentation flag enabled.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 963beaa8fabb..3c78cd1cdd6f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1667,21 +1667,19 @@ static int bnxt_flash_package_from_file(struct net_device *dev, mutex_lock(&bp->hwrm_cmd_lock); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; - - if (resp->error_code) { + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; - if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; } + if (hwrm_err) + goto flash_pkg_exit; } if (resp->result) { From cde0dc52e7d462332bdcf7dc22ab6ccc865b4b52 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Aug 2019 12:33:54 -0500 Subject: [PATCH 2696/3715] signal: Allow cifs and drbd to receive their terminating signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 33da8e7c814f77310250bb54a9db36a44c5de784 ] My recent to change to only use force_sig for a synchronous events wound up breaking signal reception cifs and drbd. I had overlooked the fact that by default kthreads start out with all signals set to SIG_IGN. So a change I thought was safe turned out to have made it impossible for those kernel thread to catch their signals. Reverting the work on force_sig is a bad idea because what the code was doing was very much a misuse of force_sig. As the way force_sig ultimately allowed the signal to happen was to change the signal handler to SIG_DFL. Which after the first signal will allow userspace to send signals to these kernel threads. At least for wake_ack_receiver in drbd that does not appear actively wrong. So correct this problem by adding allow_kernel_signal that will allow signals whose siginfo reports they were sent by the kernel through, but will not allow userspace generated signals, and update cifs and drbd to call allow_kernel_signal in an appropriate place so that their thread can receive this signal. Fixing things this way ensures that userspace won't be able to send signals and cause problems, that it is clear which signals the threads are expecting to receive, and it guarantees that nothing else in the system will be affected. This change was partly inspired by similar cifs and drbd patches that added allow_signal. Reported-by: ronnie sahlberg Reported-by: Christoph Böhmwalder Tested-by: Christoph Böhmwalder Cc: Steve French Cc: Philipp Reisner Cc: David Laight Fixes: 247bc9470b1e ("cifs: fix rmmod regression in cifs.ko caused by force_sig changes") Fixes: 72abe3bcf091 ("signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig") Fixes: fee109901f39 ("signal/drbd: Use send_sig not force_sig") Fixes: 3cf5d076fb4d ("signal: Remove task parameter from force_sig") Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 2 ++ fs/cifs/connect.c | 2 +- include/linux/signal.h | 15 ++++++++++++++- kernel/signal.c | 5 +++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7ea13b5497fd..b998e3abca7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -334,6 +334,8 @@ static int drbd_thread_setup(void *arg) thi->name[0], resource->name); + allow_kernel_signal(DRBD_SIGKILL); + allow_kernel_signal(SIGXCPU); restart: retval = thi->function(thi); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ed4a0352ea90..f0b1279a7de6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -921,7 +921,7 @@ cifs_demultiplex_thread(void *p) mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); - allow_signal(SIGKILL); + allow_kernel_signal(SIGKILL); while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; diff --git a/include/linux/signal.h b/include/linux/signal.h index 843bd62b1ead..c4e3eb89a622 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -268,6 +268,9 @@ extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); +#define SIG_KTHREAD ((__force __sighandler_t)2) +#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) + static inline void allow_signal(int sig) { /* @@ -275,7 +278,17 @@ static inline void allow_signal(int sig) * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ - kernel_sigaction(sig, (__force __sighandler_t)2); + kernel_sigaction(sig, SIG_KTHREAD); +} + +static inline void allow_kernel_signal(int sig) +{ + /* + * Kernel threads handle their own signals. Let the signal code + * know signals sent by the kernel will be handled, so that they + * don't get silently dropped. + */ + kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) diff --git a/kernel/signal.c b/kernel/signal.c index c9b203875001..8fee1f2eba2f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -85,6 +85,11 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; + /* Only allow kernel generated signals to this kthread */ + if (unlikely((t->flags & PF_KTHREAD) && + (handler == SIG_KTHREAD_KERNEL) && !force)) + return true; + return sig_handler_ignored(handler, sig); } From e62abd5b37521c7020d8650dcb753cc32a82df69 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 19 Aug 2019 21:25:17 +0200 Subject: [PATCH 2697/3715] ASoC: sun4i-i2s: RX and TX counter registers are swapped [ Upstream commit cf2c0e1ce9544df42170fb921f12da82dc0cc8d6 ] The RX and TX counters registers offset have been swapped, fix that. Fixes: fa7c0d13cb26 ("ASoC: sunxi: Add Allwinner A10 Digital Audio driver") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/8b26477560ad5fd8f69e037b167c5e61de5c26a3.1566242458.git-series.maxime.ripard@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sunxi/sun4i-i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index da0a2083e12a..d2802fd8c1dd 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -80,8 +80,8 @@ #define SUN4I_I2S_CLK_DIV_MCLK_MASK GENMASK(3, 0) #define SUN4I_I2S_CLK_DIV_MCLK(mclk) ((mclk) << 0) -#define SUN4I_I2S_RX_CNT_REG 0x28 -#define SUN4I_I2S_TX_CNT_REG 0x2c +#define SUN4I_I2S_TX_CNT_REG 0x28 +#define SUN4I_I2S_RX_CNT_REG 0x2c #define SUN4I_I2S_TX_CHAN_SEL_REG 0x30 #define SUN4I_I2S_CHAN_SEL(num_chan) (((num_chan) - 1) << 0) From 1b394b564c08798124b9b41b50019d1cf6ec9714 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Aug 2019 16:15:43 +0300 Subject: [PATCH 2698/3715] dmaengine: dw: platform: Switch to acpi_dma_controller_register() [ Upstream commit e7b8514e4d68bec21fc6385fa0a66797ddc34ac9 ] There is a possibility to have registered ACPI DMA controller while it has been gone already. To avoid the potential crash, move to non-managed acpi_dma_controller_register(). Fixes: 42c91ee71d6d ("dw_dmac: add ACPI support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20190820131546.75744-8-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dw/platform.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 46a519e07195..b408c07662f5 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -87,13 +87,20 @@ static void dw_dma_acpi_controller_register(struct dw_dma *dw) dma_cap_set(DMA_SLAVE, info->dma_cap); info->filter_fn = dw_dma_acpi_filter; - ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate, - info); + ret = acpi_dma_controller_register(dev, acpi_dma_simple_xlate, info); if (ret) dev_err(dev, "could not register acpi_dma_controller\n"); } + +static void dw_dma_acpi_controller_free(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + + acpi_dma_controller_free(dev); +} #else /* !CONFIG_ACPI */ static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {} +static inline void dw_dma_acpi_controller_free(struct dw_dma *dw) {} #endif /* !CONFIG_ACPI */ #ifdef CONFIG_OF @@ -249,6 +256,9 @@ static int dw_remove(struct platform_device *pdev) { struct dw_dma_chip *chip = platform_get_drvdata(pdev); + if (ACPI_HANDLE(&pdev->dev)) + dw_dma_acpi_controller_free(chip->dw); + if (pdev->dev.of_node) of_dma_controller_free(pdev->dev.of_node); From 9878718005a17512f6fcdcbfc64a2a3837e07bc7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 20 Aug 2019 11:54:46 +0200 Subject: [PATCH 2699/3715] mac80211: minstrel_ht: fix per-group max throughput rate initialization [ Upstream commit 56dd918ff06e3ee24d8067e93ed12b2a39e71394 ] The group number needs to be multiplied by the number of rates per group to get the full rate index Fixes: 5935839ad735 ("mac80211: improve minstrel_ht rate sorting by throughput & probability") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20190820095449.45255-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index e57811e4b91f..7ba4272642c9 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -529,7 +529,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) - tmp_group_tp_rate[j] = group; + tmp_group_tp_rate[j] = MCS_GROUP_RATES * group; for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) From 4cd97e29c3dd3ef05c92dbae84bb45bceb9ea14b Mon Sep 17 00:00:00 2001 From: Alexandre Kroupski Date: Tue, 20 Aug 2019 08:37:45 -0300 Subject: [PATCH 2700/3715] media: atmel: atmel-isi: fix timeout value for stop streaming [ Upstream commit 623fd246bb40234fe68dd4e7c1f1f081f9c45a3d ] In case of sensor malfunction, stop streaming timeout takes much longer than expected. This is due to conversion of time to jiffies: milliseconds multiplied with HZ (ticks/second) gives out a value of jiffies with 10^3 greater. We need to also divide by 10^3 to obtain the right jiffies value. In other words FRAME_INTERVAL_MILLI_SEC must be in seconds in order to multiply by HZ and get the right jiffies value to add to the current jiffies for the timeout expire time. Fixes: 195ebc43bf76 ("[media] V4L: at91: add Atmel Image Sensor Interface (ISI) support") Signed-off-by: Alexandre Kroupski Reviewed-by: Eugen Hristev Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/atmel/atmel-isi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c index 891fa2505efa..2f962a3418f6 100644 --- a/drivers/media/platform/atmel/atmel-isi.c +++ b/drivers/media/platform/atmel/atmel-isi.c @@ -496,7 +496,7 @@ static void stop_streaming(struct vb2_queue *vq) spin_unlock_irq(&isi->irqlock); if (!isi->enable_preview_path) { - timeout = jiffies + FRAME_INTERVAL_MILLI_SEC * HZ; + timeout = jiffies + (FRAME_INTERVAL_MILLI_SEC * HZ) / 1000; /* Wait until the end of the current frame. */ while ((isi_readl(isi, ISI_STATUS) & ISI_CTRL_CDC) && time_before(jiffies, timeout)) From 6698015a135887a55621e35f65f6b4a448f19e1f Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Thu, 22 Aug 2019 15:19:34 +0200 Subject: [PATCH 2701/3715] rtc: pcf2127: bugfix: read rtc disables watchdog [ Upstream commit 7f43020e3bdb63d65661ed377682702f8b34d3ea ] The previous fix listed bulk read of registers as root cause of accendential disabling of watchdog, since the watchdog counter register (WD_VAL) was zeroed. Fixes: 3769a375ab83 rtc: pcf2127: bulk read only date and time registers. Tested with the same PCF2127 chip as Sean reveled root cause of WD_VAL register value zeroing was caused by reading CTRL2 register which is one of the watchdog feature control registers. So the solution is to not read the first two control registers (CTRL1 and CTRL2) in pcf2127_rtc_read_time as they are not needed anyway. Size of local buf variable is kept to allow easy usage of register defines to improve readability of code. Debug trace line was updated after CTRL1 and CTRL2 are no longer read from the chip. Also replaced magic numbers in buf access with register defines. Signed-off-by: Bruno Thomsen Link: https://lore.kernel.org/r/20190822131936.18772-3-bruno.thomsen@gmail.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf2127.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9f1b14bf91ae..367e0f803440 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -52,20 +52,14 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm) struct pcf2127 *pcf2127 = dev_get_drvdata(dev); unsigned char buf[10]; int ret; - int i; - for (i = 0; i <= PCF2127_REG_CTRL3; i++) { - ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1 + i, - (unsigned int *)(buf + i)); - if (ret) { - dev_err(dev, "%s: read error\n", __func__); - return ret; - } - } - - ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_SC, - (buf + PCF2127_REG_SC), - ARRAY_SIZE(buf) - PCF2127_REG_SC); + /* + * Avoid reading CTRL2 register as it causes WD_VAL register + * value to reset to 0 which means watchdog is stopped. + */ + ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3, + (buf + PCF2127_REG_CTRL3), + ARRAY_SIZE(buf) - PCF2127_REG_CTRL3); if (ret) { dev_err(dev, "%s: read error\n", __func__); return ret; @@ -86,14 +80,12 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm) } dev_dbg(dev, - "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, " - "sec=%02x, min=%02x, hr=%02x, " + "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, " "mday=%02x, wday=%02x, mon=%02x, year=%02x\n", - __func__, - buf[0], buf[1], buf[2], - buf[3], buf[4], buf[5], - buf[6], buf[7], buf[8], buf[9]); - + __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC], + buf[PCF2127_REG_MN], buf[PCF2127_REG_HR], + buf[PCF2127_REG_DM], buf[PCF2127_REG_DW], + buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]); tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F); tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F); From 0972c51f346f0be5581620d61afdea6857c08503 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 29 Jul 2019 14:10:12 -0700 Subject: [PATCH 2702/3715] mips: avoid explicit UB in assignment of mips_io_port_base [ Upstream commit 12051b318bc3ce5b42d6d786191008284b067d83 ] The code in question is modifying a variable declared const through pointer manipulation. Such code is explicitly undefined behavior, and is the lone issue preventing malta_defconfig from booting when built with Clang: If an attempt is made to modify an object defined with a const-qualified type through use of an lvalue with non-const-qualified type, the behavior is undefined. LLVM is removing such assignments. A simple fix is to not declare variables const that you plan on modifying. Limiting the scope would be a better method of preventing unwanted writes to such a variable. Further, the code in question mentions "compiler bugs" without any links to bug reports, so it is difficult to know if the issue is resolved in GCC. The patch was authored in 2006, which would have been GCC 4.0.3 or 4.1.1. The minimal supported version of GCC in the Linux kernel is currently 4.6. For what its worth, there was UB before the commit in question, it just added a barrier and got lucky IRT codegen. I don't think there's any actual compiler bugs related, just runtime bugs due to UB. Link: https://github.com/ClangBuiltLinux/linux/issues/610 Fixes: 966f4406d903 ("[MIPS] Work around bad code generation for .") Reported-by: Nathan Chancellor Debugged-by: Nathan Chancellor Suggested-by: Eli Friedman Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: Maciej W. Rozycki Cc: Hassan Naveed Cc: Stephen Kitt Cc: Serge Semin Cc: Mike Rapoport Cc: Andrew Morton Cc: Michal Hocko Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: clang-built-linux@googlegroups.com Signed-off-by: Sasha Levin --- arch/mips/include/asm/io.h | 14 ++------------ arch/mips/kernel/setup.c | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 57b34257be2b..98eb15b0524c 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -60,21 +60,11 @@ * instruction, so the lower 16 bits must be zero. Should be true on * on any sane architecture; generic code does not use this assumption. */ -extern const unsigned long mips_io_port_base; +extern unsigned long mips_io_port_base; -/* - * Gcc will generate code to load the value of mips_io_port_base after each - * function call which may be fairly wasteful in some cases. So we don't - * play quite by the book. We tell gcc mips_io_port_base is a long variable - * which solves the code generation issue. Now we need to violate the - * aliasing rules a little to make initialization possible and finally we - * will need the barrier() to fight side effects of the aliasing chat. - * This trickery will eventually collapse under gcc's optimizer. Oh well. - */ static inline void set_io_port_base(unsigned long base) { - * (unsigned long *) &mips_io_port_base = base; - barrier(); + mips_io_port_base = base; } /* diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 795caa763da3..05ed4ed411c7 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -75,7 +75,7 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; * mips_io_port_base is the begin of the address space to which x86 style * I/O ports are mapped. */ -const unsigned long mips_io_port_base = -1; +unsigned long mips_io_port_base = -1; EXPORT_SYMBOL(mips_io_port_base); static struct resource code_resource = { .name = "Kernel code", }; From 91f098c085ceb891302fa9707d3da94be6a88524 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sat, 24 Aug 2019 11:01:50 +0800 Subject: [PATCH 2703/3715] iommu/mediatek: Fix iova_to_phys PA start for 4GB mode [ Upstream commit 76ce65464fcd2c21db84391572b7938b716aceb0 ] In M4U 4GB mode, the physical address is remapped as below: CPU Physical address: ==================== 0 1G 2G 3G 4G 5G |---A---|---B---|---C---|---D---|---E---| +--I/O--+------------Memory-------------+ IOMMU output physical address: ============================= 4G 5G 6G 7G 8G |---E---|---B---|---C---|---D---| +------------Memory-------------+ The Region 'A'(I/O) can not be mapped by M4U; For Region 'B'/'C'/'D', the bit32 of the CPU physical address always is needed to set, and for Region 'E', the CPU physical address keep as is. something looks like this: CPU PA -> M4U OUTPUT PA 0x4000_0000 0x1_4000_0000 (Add bit32) 0x8000_0000 0x1_8000_0000 ... 0xc000_0000 0x1_c000_0000 ... 0x1_0000_0000 0x1_0000_0000 (No change) Additionally, the iommu consumers always use the CPU phyiscal address. The PA in the iova_to_phys that is got from v7s always is u32, But from the CPU point of view, PA only need add BIT(32) when PA < 0x4000_0000. Fixes: 30e2fccf9512 ("iommu/mediatek: Enlarge the validate PA range for 4GB mode") Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/mtk_iommu.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c30f62700431..0f99e95a1a73 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -115,6 +115,30 @@ struct mtk_iommu_domain { static struct iommu_ops mtk_iommu_ops; +/* + * In M4U 4GB mode, the physical address is remapped as below: + * + * CPU Physical address: + * ==================== + * + * 0 1G 2G 3G 4G 5G + * |---A---|---B---|---C---|---D---|---E---| + * +--I/O--+------------Memory-------------+ + * + * IOMMU output physical address: + * ============================= + * + * 4G 5G 6G 7G 8G + * |---E---|---B---|---C---|---D---| + * +------------Memory-------------+ + * + * The Region 'A'(I/O) can NOT be mapped by M4U; For Region 'B'/'C'/'D', the + * bit32 of the CPU physical address always is needed to set, and for Region + * 'E', the CPU physical address keep as is. + * Additionally, The iommu consumers always use the CPU phyiscal address. + */ +#define MTK_IOMMU_4GB_MODE_REMAP_BASE 0x40000000 + static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) @@ -404,7 +428,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, pa = dom->iop->iova_to_phys(dom->iop, iova); spin_unlock_irqrestore(&dom->pgtlock, flags); - if (data->enable_4GB) + if (data->enable_4GB && pa < MTK_IOMMU_4GB_MODE_REMAP_BASE) pa |= BIT_ULL(32); return pa; From e37950799ad018334e321c6fcbccdfbc1a909c85 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Aug 2019 22:42:55 +0300 Subject: [PATCH 2704/3715] ahci: Do not export local variable ahci_em_messages [ Upstream commit 60fc35f327e0a9e60b955c0f3c3ed623608d1baa ] The commit ed08d40cdec4 ("ahci: Changing two module params with static and __read_mostly") moved ahci_em_messages to be static while missing the fact of exporting it. WARNING: "ahci_em_messages" [vmlinux] is a static EXPORT_SYMBOL_GPL Drop export for the local variable ahci_em_messages. Fixes: ed08d40cdec4 ("ahci: Changing two module params with static and __read_mostly") Cc: Chuansheng Liu Signed-off-by: Andy Shevchenko Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libahci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index cda9a0b5bdaa..7473ff46de66 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -191,7 +191,6 @@ struct ata_port_operations ahci_pmp_retry_srst_ops = { EXPORT_SYMBOL_GPL(ahci_pmp_retry_srst_ops); static bool ahci_em_messages __read_mostly = true; -EXPORT_SYMBOL_GPL(ahci_em_messages); module_param(ahci_em_messages, bool, 0444); /* add other LED protocol types when they become supported */ MODULE_PARM_DESC(ahci_em_messages, From abb78b8d946459494af91302e6da94a9f3f9dab2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Aug 2019 18:47:15 -0700 Subject: [PATCH 2705/3715] Partially revert "kfifo: fix kfifo_alloc() and kfifo_init()" [ Upstream commit ab9bb6318b0967671e0c9b6537c1537d51ca4f45 ] Commit dfe2a77fd243 ("kfifo: fix kfifo_alloc() and kfifo_init()") made the kfifo code round the number of elements up. That was good for __kfifo_alloc(), but it's actually wrong for __kfifo_init(). The difference? __kfifo_alloc() will allocate the rounded-up number of elements, but __kfifo_init() uses an allocation done by the caller. We can't just say "use more elements than the caller allocated", and have to round down. The good news? All the normal cases will be using power-of-two arrays anyway, and most users of kfifo's don't use kfifo_init() at all, but one of the helper macros to declare a KFIFO that enforce the proper power-of-two behavior. But it looks like at least ibmvscsis might be affected. The bad news? Will Deacon refers to an old thread and points points out that the memory ordering in kfifo's is questionable. See https://lore.kernel.org/lkml/20181211034032.32338-1-yuleixzhang@tencent.com/ for more. Fixes: dfe2a77fd243 ("kfifo: fix kfifo_alloc() and kfifo_init()") Reported-by: laokz Cc: Stefani Seibold Cc: Andrew Morton Cc: Dan Carpenter Cc: Greg KH Cc: Kees Cook Cc: Will Deacon Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/kfifo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/kfifo.c b/lib/kfifo.c index 90ba1eb1df06..a94227c55551 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -82,7 +82,8 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer, { size /= esize; - size = roundup_pow_of_two(size); + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); fifo->in = 0; fifo->out = 0; From 8cf42a20d95d975fb46841413ecb6670fe791e3d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 8 Aug 2019 12:00:18 -0700 Subject: [PATCH 2706/3715] hwmon: (lm75) Fix write operations for negative temperatures [ Upstream commit 7d82fcc9d9e81241778aaa22fda7be753e237d86 ] Writes into limit registers fail if the temperature written is negative. The regmap write operation checks the value range, regmap_write accepts an unsigned int as parameter, and the temperature value passed to regmap_write is kept in a variable declared as long. Negative values are converted large unsigned integers, which fails the range check. Fix by type casting the temperature to u16 when calling regmap_write(). Cc: Iker Perez del Palomar Sustatxa Fixes: e65365fed87f ("hwmon: (lm75) Convert to use regmap") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/lm75.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 005ffb5ffa92..1737bb5fbaaf 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -165,7 +165,7 @@ static int lm75_write(struct device *dev, enum hwmon_sensor_types type, temp = DIV_ROUND_CLOSEST(temp << (resolution - 8), 1000) << (16 - resolution); - return regmap_write(data->regmap, reg, temp); + return regmap_write(data->regmap, reg, (u16)temp); } static umode_t lm75_is_visible(const void *data, enum hwmon_sensor_types type, From 0d9754bd7419b466b47723781dd98b3a4c56fbe1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 1 Aug 2019 14:33:30 -0700 Subject: [PATCH 2707/3715] power: supply: Init device wakeup after device_add() [ Upstream commit 8288022284859acbcc3cf1a073a1e2692d6c2543 ] We may want to use the device pointer in device_init_wakeup() with functions that expect the device to already be added with device_add(). For example, if we were to link the device initializing wakeup to something in sysfs such as a class for wakeups we'll run into an error. It looks like this code was written with the assumption that the device would be added before initializing wakeup due to the order of operations in power_supply_unregister(). Let's change the order of operations so we don't run into problems here. Fixes: 948dcf966228 ("power_supply: Prevent suspend until power supply events are processed") Cc: Greg Kroah-Hartman Cc: Tri Vo Cc: Kalesh Singh Cc: Ravi Chandra Sadineni Cc: Viresh Kumar Signed-off-by: Stephen Boyd Acked-by: Rafael J. Wysocki Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/power_supply_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 3226faebe0a0..0f1a0efd5926 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -891,14 +891,14 @@ __power_supply_register(struct device *parent, } spin_lock_init(&psy->changed_lock); - rc = device_init_wakeup(dev, ws); - if (rc) - goto wakeup_init_failed; - rc = device_add(dev); if (rc) goto device_add_failed; + rc = device_init_wakeup(dev, ws); + if (rc) + goto wakeup_init_failed; + rc = psy_register_thermal(psy); if (rc) goto register_thermal_failed; @@ -935,8 +935,8 @@ register_cooler_failed: psy_unregister_thermal(psy); register_thermal_failed: device_del(dev); -device_add_failed: wakeup_init_failed: +device_add_failed: check_supplies_failed: dev_set_name_failed: put_device(dev); From 236a45c27006a0c73f763240531a19a5fd6a5255 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 1 Sep 2019 12:03:08 +0900 Subject: [PATCH 2708/3715] x86, perf: Fix the dependency of the x86 insn decoder selftest [ Upstream commit 7720804a2ae46c90265a32c81c45fb6f8d2f4e8b ] Since x86 instruction decoder is not only for kprobes, it should be tested when the insn.c is compiled. (e.g. perf is enabled but kprobes is disabled) Signed-off-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: cbe5c34c8c1f ("x86: Compile insn.c and inat.c only for KPROBES") Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6293a8768a91..bec0952c5595 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -189,7 +189,7 @@ config HAVE_MMIOTRACE_SUPPORT config X86_DECODER_SELFTEST bool "x86 instruction decoder selftest" - depends on DEBUG_KERNEL && KPROBES + depends on DEBUG_KERNEL && INSTRUCTION_DECODER depends on !COMPILE_TEST ---help--- Perform x86 instruction decoder selftests at build time. From b6cda623b5b86879d05fae8eb0e2b562a27dc3ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2019 15:28:39 +0300 Subject: [PATCH 2709/3715] staging: greybus: light: fix a couple double frees [ Upstream commit 329101244f214952606359d254ae883b7109e1a5 ] The problem is in gb_lights_request_handler(). If we get a request to change the config then we release the light with gb_lights_light_release() and re-allocated it. However, if the allocation fails part way through then we call gb_lights_light_release() again. This can lead to a couple different double frees where we haven't cleared out the original values: gb_lights_light_v4l2_unregister(light); ... kfree(light->channels); kfree(light->name); I also made a small change to how we set "light->channels_count = 0;". The original code handled this part fine and did not cause a use after free but it was sort of complicated to read. Fixes: 2870b52bae4c ("greybus: lights: add lights implementation") Signed-off-by: Dan Carpenter Acked-by: Rui Miguel Silva Link: https://lore.kernel.org/r/20190829122839.GA20116@mwanda Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/greybus/light.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 0f538b8c3a07..4e7575147775 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -1103,21 +1103,21 @@ static void gb_lights_channel_release(struct gb_channel *channel) static void gb_lights_light_release(struct gb_light *light) { int i; - int count; light->ready = false; - count = light->channels_count; - if (light->has_flash) gb_lights_light_v4l2_unregister(light); + light->has_flash = false; - for (i = 0; i < count; i++) { + for (i = 0; i < light->channels_count; i++) gb_lights_channel_release(&light->channels[i]); - light->channels_count--; - } + light->channels_count = 0; + kfree(light->channels); + light->channels = NULL; kfree(light->name); + light->name = NULL; } static void gb_lights_release(struct gb_lights *glights) From 4a3d966299aa7009cad9230107b4b0a2188c84e0 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 2 Sep 2019 23:14:56 +0000 Subject: [PATCH 2710/3715] irqdomain: Add the missing assignment of domain->fwnode for named fwnode [ Upstream commit 711419e504ebd68c8f03656616829c8ad7829389 ] Recently device pass-through stops working for Linux VM running on Hyper-V. git-bisect shows the regression is caused by the recent commit 467a3bb97432 ("PCI: hv: Allocate a named fwnode ..."), but the root cause is that the commit d59f6617eef0 forgets to set the domain->fwnode for IRQCHIP_FWNODE_NAMED*, and as a result: 1. The domain->fwnode remains to be NULL. 2. irq_find_matching_fwspec() returns NULL since "h->fwnode == fwnode" is false, and pci_set_bus_msi_domain() sets the Hyper-V PCI root bus's msi_domain to NULL. 3. When the device is added onto the root bus, the device's dev->msi_domain is set to NULL in pci_set_msi_domain(). 4. When a device driver tries to enable MSI-X, pci_msi_setup_msi_irqs() calls arch_setup_msi_irqs(), which uses the native MSI chip (i.e. arch/x86/kernel/apic/msi.c: pci_msi_controller) to set up the irqs, but actually pci_msi_setup_msi_irqs() is supposed to call msi_domain_alloc_irqs() with the hbus->irq_domain, which is created in hv_pcie_init_irq_domain() and is associated with the Hyper-V chip hv_msi_irq_chip. Consequently, the irq line is not properly set up, and the device driver can not receive any interrupt. Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only") Fixes: 467a3bb97432 ("PCI: hv: Allocate a named fwnode instead of an address-based one") Reported-by: Lili Deng Signed-off-by: Dexuan Cui Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/PU1P153MB01694D9AF625AC335C600C5FBFBE0@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM Signed-off-by: Sasha Levin --- kernel/irq/irqdomain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index ac4644e92b49..0f0e7975a309 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -147,6 +147,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: + domain->fwnode = fwnode; domain->name = kstrdup(fwid->name, GFP_KERNEL); if (!domain->name) { kfree(domain); From 950f6f8492deeb68cfe71799b5e1531e35c19463 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Aug 2019 09:16:20 +0100 Subject: [PATCH 2711/3715] bcma: fix incorrect update of BCMA_CORE_PCI_MDIO_DATA [ Upstream commit 420c20be08a4597404d272ae9793b642401146eb ] An earlier commit re-worked the setting of the bitmask and is now assigning v with some bit flags rather than bitwise or-ing them into v, consequently the earlier bit-settings of v are being lost. Fix this by replacing an assignment with the bitwise or instead. Addresses-Coverity: ("Unused value") Fixes: 2be25cac8402 ("bcma: add constants for PCI and use them") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/bcma/driver_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index f499a469e66d..12b2cc9a3fbe 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -78,7 +78,7 @@ static u16 bcma_pcie_mdio_read(struct bcma_drv_pci *pc, u16 device, u8 address) v |= (address << BCMA_CORE_PCI_MDIODATA_REGADDR_SHF_OLD); } - v = BCMA_CORE_PCI_MDIODATA_START; + v |= BCMA_CORE_PCI_MDIODATA_START; v |= BCMA_CORE_PCI_MDIODATA_READ; v |= BCMA_CORE_PCI_MDIODATA_TA; @@ -121,7 +121,7 @@ static void bcma_pcie_mdio_write(struct bcma_drv_pci *pc, u16 device, v |= (address << BCMA_CORE_PCI_MDIODATA_REGADDR_SHF_OLD); } - v = BCMA_CORE_PCI_MDIODATA_START; + v |= BCMA_CORE_PCI_MDIODATA_START; v |= BCMA_CORE_PCI_MDIODATA_WRITE; v |= BCMA_CORE_PCI_MDIODATA_TA; v |= data; From c0d6177fda727f29889ad9bab6b9bb7e867d8fed Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Aug 2019 12:58:46 +0100 Subject: [PATCH 2712/3715] iio: dac: ad5380: fix incorrect assignment to val [ Upstream commit b1e18768ef1214c0a8048327918a182cabe09f9d ] Currently the pointer val is being incorrectly incremented instead of the value pointed to by val. Fix this by adding in the missing * indirection operator. Addresses-Coverity: ("Unused value") Fixes: c03f2c536818 ("staging:iio:dac: Add AD5380 driver") Signed-off-by: Colin Ian King Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/dac/ad5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c index 97d2c5111f43..8bf7fc626a9d 100644 --- a/drivers/iio/dac/ad5380.c +++ b/drivers/iio/dac/ad5380.c @@ -221,7 +221,7 @@ static int ad5380_read_raw(struct iio_dev *indio_dev, if (ret) return ret; *val >>= chan->scan_type.shift; - val -= (1 << chan->scan_type.realbits) / 2; + *val -= (1 << chan->scan_type.realbits) / 2; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 2 * st->vref; From 763ce5f99060cd589174f3394a83343c8e21f87b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 19 Aug 2019 09:41:39 +0200 Subject: [PATCH 2713/3715] ath9k: dynack: fix possible deadlock in ath_dynack_node_{de}init [ Upstream commit e1aa1a1db3b01c9890e82cf065cee99962ba1ed9 ] Fix following lockdep warning disabling bh in ath_dynack_node_init/ath_dynack_node_deinit [ 75.955878] -------------------------------- [ 75.955880] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 75.955884] swapper/0/0 [HC0[0]:SC1[3]:HE1:SE0] takes: [ 75.955888] 00000000792a7ee0 (&(&da->qlock)->rlock){+.?.}, at: ath_dynack_sample_ack_ts+0x4d/0xa0 [ath9k_hw] [ 75.955905] {SOFTIRQ-ON-W} state was registered at: [ 75.955912] lock_acquire+0x9a/0x160 [ 75.955917] _raw_spin_lock+0x2c/0x70 [ 75.955927] ath_dynack_node_init+0x2a/0x60 [ath9k_hw] [ 75.955934] ath9k_sta_state+0xec/0x160 [ath9k] [ 75.955976] drv_sta_state+0xb2/0x740 [mac80211] [ 75.956008] sta_info_insert_finish+0x21a/0x420 [mac80211] [ 75.956039] sta_info_insert_rcu+0x12b/0x2c0 [mac80211] [ 75.956069] sta_info_insert+0x7/0x70 [mac80211] [ 75.956093] ieee80211_prep_connection+0x42e/0x730 [mac80211] [ 75.956120] ieee80211_mgd_auth.cold+0xb9/0x15c [mac80211] [ 75.956152] cfg80211_mlme_auth+0x143/0x350 [cfg80211] [ 75.956169] nl80211_authenticate+0x25e/0x2b0 [cfg80211] [ 75.956172] genl_family_rcv_msg+0x198/0x400 [ 75.956174] genl_rcv_msg+0x42/0x90 [ 75.956176] netlink_rcv_skb+0x35/0xf0 [ 75.956178] genl_rcv+0x1f/0x30 [ 75.956180] netlink_unicast+0x154/0x200 [ 75.956182] netlink_sendmsg+0x1bf/0x3d0 [ 75.956186] ___sys_sendmsg+0x2c2/0x2f0 [ 75.956187] __sys_sendmsg+0x44/0x80 [ 75.956190] do_syscall_64+0x55/0x1a0 [ 75.956192] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 75.956194] irq event stamp: 2357092 [ 75.956196] hardirqs last enabled at (2357092): [] _raw_spin_unlock_irqrestore+0x3e/0x50 [ 75.956199] hardirqs last disabled at (2357091): [] _raw_spin_lock_irqsave+0x11/0x80 [ 75.956202] softirqs last enabled at (2357072): [] irq_enter+0x59/0x60 [ 75.956204] softirqs last disabled at (2357073): [] irq_exit+0xae/0xc0 [ 75.956206] other info that might help us debug this: [ 75.956207] Possible unsafe locking scenario: [ 75.956208] CPU0 [ 75.956209] ---- [ 75.956210] lock(&(&da->qlock)->rlock); [ 75.956213] [ 75.956214] lock(&(&da->qlock)->rlock); [ 75.956216] *** DEADLOCK *** [ 75.956217] 1 lock held by swapper/0/0: [ 75.956219] #0: 000000003bb5675c (&(&sc->sc_pcu_lock)->rlock){+.-.}, at: ath9k_tasklet+0x55/0x240 [ath9k] [ 75.956225] stack backtrace: [ 75.956228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc1-wdn+ #13 [ 75.956229] Hardware name: Dell Inc. Studio XPS 1340/0K183D, BIOS A11 09/08/2009 [ 75.956231] Call Trace: [ 75.956233] [ 75.956236] dump_stack+0x67/0x90 [ 75.956239] mark_lock+0x4c1/0x640 [ 75.956242] ? check_usage_backwards+0x130/0x130 [ 75.956245] ? sched_clock_local+0x12/0x80 [ 75.956247] __lock_acquire+0x484/0x7a0 [ 75.956250] ? __lock_acquire+0x3b9/0x7a0 [ 75.956252] lock_acquire+0x9a/0x160 [ 75.956259] ? ath_dynack_sample_ack_ts+0x4d/0xa0 [ath9k_hw] [ 75.956262] _raw_spin_lock_bh+0x34/0x80 [ 75.956268] ? ath_dynack_sample_ack_ts+0x4d/0xa0 [ath9k_hw] [ 75.956275] ath_dynack_sample_ack_ts+0x4d/0xa0 [ath9k_hw] [ 75.956280] ath_rx_tasklet+0xd09/0xe90 [ath9k] [ 75.956286] ath9k_tasklet+0x102/0x240 [ath9k] [ 75.956288] tasklet_action_common.isra.0+0x6d/0x170 [ 75.956291] __do_softirq+0xcc/0x425 [ 75.956294] irq_exit+0xae/0xc0 [ 75.956296] do_IRQ+0x8a/0x110 [ 75.956298] common_interrupt+0xf/0xf [ 75.956300] [ 75.956303] RIP: 0010:cpuidle_enter_state+0xb2/0x400 [ 75.956308] RSP: 0018:ffffffff82203e70 EFLAGS: 00000202 ORIG_RAX: ffffffffffffffd7 [ 75.956310] RAX: ffffffff82219800 RBX: ffffffff822bd0a0 RCX: 0000000000000000 [ 75.956312] RDX: 0000000000000046 RSI: 0000000000000006 RDI: ffffffff82219800 [ 75.956314] RBP: ffff888155a01c00 R08: 00000011af51aabe R09: 0000000000000000 [ 75.956315] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002 [ 75.956317] R13: 00000011af51aabe R14: 0000000000000003 R15: ffffffff82219800 [ 75.956321] cpuidle_enter+0x24/0x40 [ 75.956323] do_idle+0x1ac/0x220 [ 75.956326] cpu_startup_entry+0x14/0x20 [ 75.956329] start_kernel+0x482/0x489 [ 75.956332] secondary_startup_64+0xa4/0xb0 Fixes: c774d57fd47c ("ath9k: add dynamic ACK timeout estimation") Signed-off-by: Lorenzo Bianconi Tested-by: Koen Vandeputte Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/dynack.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c index 6e236a485431..71b4888b30e7 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.c +++ b/drivers/net/wireless/ath/ath9k/dynack.c @@ -300,9 +300,9 @@ void ath_dynack_node_init(struct ath_hw *ah, struct ath_node *an) an->ackto = ackto; - spin_lock(&da->qlock); + spin_lock_bh(&da->qlock); list_add_tail(&an->list, &da->nodes); - spin_unlock(&da->qlock); + spin_unlock_bh(&da->qlock); } EXPORT_SYMBOL(ath_dynack_node_init); @@ -316,9 +316,9 @@ void ath_dynack_node_deinit(struct ath_hw *ah, struct ath_node *an) { struct ath_dynack *da = &ah->dynack; - spin_lock(&da->qlock); + spin_lock_bh(&da->qlock); list_del(&an->list); - spin_unlock(&da->qlock); + spin_unlock_bh(&da->qlock); } EXPORT_SYMBOL(ath_dynack_node_deinit); From 36803fe500521550b631de05717d5ce22a5a62a2 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 29 Jul 2019 12:52:15 -0700 Subject: [PATCH 2714/3715] tty: serial: fsl_lpuart: Use appropriate lpuart32_* I/O funcs [ Upstream commit 1da17d7cf8e2c4b60163d54300f72c02f510327c ] When dealing with 32-bit variant of LPUART IP block appropriate I/O helpers have to be used to properly deal with endianness differences. Change all of the offending code to do that. Fixes: a5fa2660d787 ("tty/serial/fsl_lpuart: Add CONSOLE_POLL support for lpuart32.") Signed-off-by: Andrey Smirnov Cc: Stefan Agner Cc: Bhuvanchandra DV Cc: Chris Healy Cc: Cory Tusar Cc: Lucas Stach Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: linux-imx@nxp.com Cc: linux-serial@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20190729195226.8862-14-andrew.smirnov@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/fsl_lpuart.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index fb2dcb3f8591..16422987ab0f 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -532,26 +532,26 @@ static int lpuart32_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); /* Disable Rx & Tx */ - writel(0, sport->port.membase + UARTCTRL); + lpuart32_write(&sport->port, UARTCTRL, 0); - temp = readl(sport->port.membase + UARTFIFO); + temp = lpuart32_read(&sport->port, UARTFIFO); /* Enable Rx and Tx FIFO */ - writel(temp | UARTFIFO_RXFE | UARTFIFO_TXFE, - sport->port.membase + UARTFIFO); + lpuart32_write(&sport->port, UARTFIFO, + temp | UARTFIFO_RXFE | UARTFIFO_TXFE); /* flush Tx and Rx FIFO */ - writel(UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH, - sport->port.membase + UARTFIFO); + lpuart32_write(&sport->port, UARTFIFO, + UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH); /* explicitly clear RDRF */ - if (readl(sport->port.membase + UARTSTAT) & UARTSTAT_RDRF) { - readl(sport->port.membase + UARTDATA); - writel(UARTFIFO_RXUF, sport->port.membase + UARTFIFO); + if (lpuart32_read(&sport->port, UARTSTAT) & UARTSTAT_RDRF) { + lpuart32_read(&sport->port, UARTDATA); + lpuart32_write(&sport->port, UARTFIFO, UARTFIFO_RXUF); } /* Enable Rx and Tx */ - writel(UARTCTRL_RE | UARTCTRL_TE, sport->port.membase + UARTCTRL); + lpuart32_write(&sport->port, UARTCTRL, UARTCTRL_RE | UARTCTRL_TE); spin_unlock_irqrestore(&sport->port.lock, flags); return 0; @@ -559,18 +559,18 @@ static int lpuart32_poll_init(struct uart_port *port) static void lpuart32_poll_put_char(struct uart_port *port, unsigned char c) { - while (!(readl(port->membase + UARTSTAT) & UARTSTAT_TDRE)) + while (!(lpuart32_read(port, UARTSTAT) & UARTSTAT_TDRE)) barrier(); - writel(c, port->membase + UARTDATA); + lpuart32_write(port, UARTDATA, c); } static int lpuart32_poll_get_char(struct uart_port *port) { - if (!(readl(port->membase + UARTSTAT) & UARTSTAT_RDRF)) + if (!(lpuart32_read(port, UARTSTAT) & UARTSTAT_RDRF)) return NO_POLL_CHAR; - return readl(port->membase + UARTDATA); + return lpuart32_read(port, UARTDATA); } #endif From 8b71eb46f20c8d6f235d71ca21db58c4ed5cf503 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 5 Sep 2019 09:57:12 +0800 Subject: [PATCH 2715/3715] net: sonic: return NETDEV_TX_OK if failed to map buffer [ Upstream commit 6e1cdedcf0362fed3aedfe051d46bd7ee2a85fe1 ] NETDEV_TX_BUSY really should only be used by drivers that call netif_tx_stop_queue() at the wrong moment. If dma_map_single() is failed to map tx DMA buffer, it might trigger an infinite loop. This patch use NETDEV_TX_OK instead of NETDEV_TX_BUSY, and change printk to pr_err_ratelimited. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 23821540ab07..11f472fd5d47 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -221,9 +221,9 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { - printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name); + pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb(skb); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ From caa4dd3ae27bc135274c39ea5de64e521875e848 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Tue, 27 Aug 2019 14:13:40 -0700 Subject: [PATCH 2716/3715] scsi: fnic: fix msix interrupt allocation [ Upstream commit 3ec24fb4c035e9cbb2f02a48640a09aa913442a2 ] pci_alloc_irq_vectors() returns number of vectors allocated. Fix the check for error condition. Fixes: cca678dfbad49 ("scsi: fnic: switch to pci_alloc_irq_vectors") Link: https://lore.kernel.org/r/20190827211340.1095-1-gvaradar@cisco.com Signed-off-by: Govindarajulu Varadarajan Acked-by: Satish Kharat Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/fnic/fnic_isr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c index 4e3a50202e8c..d28088218c36 100644 --- a/drivers/scsi/fnic/fnic_isr.c +++ b/drivers/scsi/fnic/fnic_isr.c @@ -254,7 +254,7 @@ int fnic_set_intr_mode(struct fnic *fnic) int vecs = n + m + o + 1; if (pci_alloc_irq_vectors(fnic->pdev, vecs, vecs, - PCI_IRQ_MSIX) < 0) { + PCI_IRQ_MSIX) == vecs) { fnic->rq_count = n; fnic->raw_wq_count = m; fnic->wq_copy_count = o; @@ -280,7 +280,7 @@ int fnic_set_intr_mode(struct fnic *fnic) fnic->wq_copy_count >= 1 && fnic->cq_count >= 3 && fnic->intr_count >= 1 && - pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) < 0) { + pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) == 1) { fnic->rq_count = 1; fnic->raw_wq_count = 1; fnic->wq_copy_count = 1; From 0d479ec44e1c4257e69b400bf9ba429105d9e7aa Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 4 Jul 2019 16:24:09 +0100 Subject: [PATCH 2717/3715] Btrfs: fix hang when loading existing inode cache off disk [ Upstream commit 7764d56baa844d7f6206394f21a0e8c1f303c476 ] If we are able to load an existing inode cache off disk, we set the state of the cache to BTRFS_CACHE_FINISHED, but we don't wake up any one waiting for the cache to be available. This means that anyone waiting for the cache to be available, waiting on the condition that either its state is BTRFS_CACHE_FINISHED or its available free space is greather than zero, can hang forever. This could be observed running fstests with MOUNT_OPTIONS="-o inode_cache", in particular test case generic/161 triggered it very frequently for me, producing a trace like the following: [63795.739712] BTRFS info (device sdc): enabling inode map caching [63795.739714] BTRFS info (device sdc): disk space caching is enabled [63795.739716] BTRFS info (device sdc): has skinny extents [64036.653886] INFO: task btrfs-transacti:3917 blocked for more than 120 seconds. [64036.654079] Not tainted 5.2.0-rc4-btrfs-next-50 #1 [64036.654143] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [64036.654232] btrfs-transacti D 0 3917 2 0x80004000 [64036.654239] Call Trace: [64036.654258] ? __schedule+0x3ae/0x7b0 [64036.654271] schedule+0x3a/0xb0 [64036.654325] btrfs_commit_transaction+0x978/0xae0 [btrfs] [64036.654339] ? remove_wait_queue+0x60/0x60 [64036.654395] transaction_kthread+0x146/0x180 [btrfs] [64036.654450] ? btrfs_cleanup_transaction+0x620/0x620 [btrfs] [64036.654456] kthread+0x103/0x140 [64036.654464] ? kthread_create_worker_on_cpu+0x70/0x70 [64036.654476] ret_from_fork+0x3a/0x50 [64036.654504] INFO: task xfs_io:3919 blocked for more than 120 seconds. [64036.654568] Not tainted 5.2.0-rc4-btrfs-next-50 #1 [64036.654617] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [64036.654685] xfs_io D 0 3919 3633 0x00000000 [64036.654691] Call Trace: [64036.654703] ? __schedule+0x3ae/0x7b0 [64036.654716] schedule+0x3a/0xb0 [64036.654756] btrfs_find_free_ino+0xa9/0x120 [btrfs] [64036.654764] ? remove_wait_queue+0x60/0x60 [64036.654809] btrfs_create+0x72/0x1f0 [btrfs] [64036.654822] lookup_open+0x6bc/0x790 [64036.654849] path_openat+0x3bc/0xc00 [64036.654854] ? __lock_acquire+0x331/0x1cb0 [64036.654869] do_filp_open+0x99/0x110 [64036.654884] ? __alloc_fd+0xee/0x200 [64036.654895] ? do_raw_spin_unlock+0x49/0xc0 [64036.654909] ? do_sys_open+0x132/0x220 [64036.654913] do_sys_open+0x132/0x220 [64036.654926] do_syscall_64+0x60/0x1d0 [64036.654933] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix this by adding a wake_up() call right after setting the cache state to BTRFS_CACHE_FINISHED, at start_caching(), when we are able to load the cache from disk. Fixes: 82d5902d9c681b ("Btrfs: Support reading/writing on disk free ino cache") Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode-map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index d02019747d00..7dc2923655d9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -159,6 +159,7 @@ static void start_caching(struct btrfs_root *root) spin_lock(&root->ino_cache_lock); root->ino_cache_state = BTRFS_CACHE_FINISHED; spin_unlock(&root->ino_cache_lock); + wake_up(&root->ino_cache_wait); return; } From 2162f5aae4a7feb883739ca92054715f8151bd7a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 4 Jul 2019 16:24:32 +0100 Subject: [PATCH 2718/3715] Btrfs: fix inode cache waiters hanging on failure to start caching thread [ Upstream commit a68ebe0790fc88b4314d17984a2cf99ce2361901 ] If we fail to start the inode caching thread, we print an error message and disable the inode cache, however we never wake up any waiters, so they hang forever waiting for the caching to finish. Fix this by waking them up and have them fallback to a call to btrfs_find_free_objectid(). Fixes: e60efa84252c05 ("Btrfs: avoid triggering bug_on() when we fail to start inode caching task") Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode-map.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 7dc2923655d9..b1c3a4ec76c8 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -26,6 +26,19 @@ #include "inode-map.h" #include "transaction.h" +static void fail_caching_thread(struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + + btrfs_warn(fs_info, "failed to start inode caching task"); + btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE, + "disabling inode map caching"); + spin_lock(&root->ino_cache_lock); + root->ino_cache_state = BTRFS_CACHE_ERROR; + spin_unlock(&root->ino_cache_lock); + wake_up(&root->ino_cache_wait); +} + static int caching_kthread(void *data) { struct btrfs_root *root = data; @@ -178,11 +191,8 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu", root->root_key.objectid); - if (IS_ERR(tsk)) { - btrfs_warn(fs_info, "failed to start inode caching task"); - btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE, - "disabling inode map caching"); - } + if (IS_ERR(tsk)) + fail_caching_thread(root); } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) @@ -200,11 +210,14 @@ again: wait_event(root->ino_cache_wait, root->ino_cache_state == BTRFS_CACHE_FINISHED || + root->ino_cache_state == BTRFS_CACHE_ERROR || root->free_ino_ctl->free_space > 0); if (root->ino_cache_state == BTRFS_CACHE_FINISHED && root->free_ino_ctl->free_space == 0) return -ENOSPC; + else if (root->ino_cache_state == BTRFS_CACHE_ERROR) + return btrfs_find_free_objectid(root, objectid); else goto again; } From 510cd98350463b841fdc01065eac4cec180ba9d5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 4 Jul 2019 16:24:44 +0100 Subject: [PATCH 2719/3715] Btrfs: fix inode cache waiters hanging on path allocation failure [ Upstream commit 9d123a35d7e97bb2139747b16127c9b22b6a593e ] If the caching thread fails to allocate a path, it returns without waking up any cache waiters, leaving them hang forever. Fix this by following the same approach as when we fail to start the caching thread: print an error message, disable inode caching and make the wakers fallback to non-caching mode behaviour (calling btrfs_find_free_objectid()). Fixes: 581bb050941b4f ("Btrfs: Cache free inode numbers in memory") Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode-map.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b1c3a4ec76c8..2ae32451fb5b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -55,8 +55,10 @@ static int caching_kthread(void *data) return 0; path = btrfs_alloc_path(); - if (!path) + if (!path) { + fail_caching_thread(root); return -ENOMEM; + } /* Since the commit root is read-only, we can safely skip locking. */ path->skip_locking = 1; From a7a67b4e8e8d68d6301d233645a3bfbfba40bc35 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 15 Aug 2019 14:04:02 -0700 Subject: [PATCH 2720/3715] btrfs: use correct count in btrfs_file_write_iter() [ Upstream commit c09767a8960ca0500fb636bf73686723337debf4 ] generic_write_checks() may modify iov_iter_count(), so we must get the count after the call, not before. Using the wrong one has a couple of consequences: 1. We check a longer range in check_can_nocow() for nowait than we're actually writing. 2. We create extra hole extent maps in btrfs_cont_expand(). As far as I can tell, this is harmless, but I might be missing something. These issues are pretty minor, but let's fix it before something more important trips on it. Fixes: edf064e7c6fe ("btrfs: nowait aio support") Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97be32da857a..c68ce3412dc1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1882,7 +1882,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); ssize_t err; loff_t pos; - size_t count = iov_iter_count(from); + size_t count; loff_t oldsize; int clean_page = 0; @@ -1904,6 +1904,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, } pos = iocb->ki_pos; + count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { /* * We will allocate space in case nodatacow is not set, From 22437abfe204d1797245896a701bc2dc5a90c7ca Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 8 Aug 2019 04:03:49 +0000 Subject: [PATCH 2721/3715] ixgbe: sync the first fragment unconditionally [ Upstream commit e7ba676c6188d394a0133fc4b9bcd7ee50d54b7f ] In Xen environment, if Xen-swiotlb is enabled, ixgbe driver could possibly allocate a page, DMA memory buffer, for the first fragment which is not suitable for Xen-swiotlb to do DMA operations. Xen-swiotlb have to internally allocate another page for doing DMA operations. This mechanism requires syncing the data from the internal page to the page which ixgbe sends to upper network stack. However, since commit f3213d932173 ("ixgbe: Update driver to make use of DMA attributes in Rx path"), the unmap operation is performed with DMA_ATTR_SKIP_CPU_SYNC. As a result, the sync is not performed. Since the sync isn't performed, the upper network stack could receive a incomplete network packet. By incomplete, it means the linear data on the first fragment(between skb->head and skb->end) is invalid. So we have to copy the data from the internal xen-swiotlb page to the page which ixgbe sends to upper network stack through the sync operation. More details from Alexander Duyck: Specifically since we are mapping the frame with DMA_ATTR_SKIP_CPU_SYNC we have to unmap with that as well. As a result a sync is not performed on an unmap and must be done manually as we skipped it for the first frag. As such we need to always sync before possibly performing a page unmap operation. Fixes: f3213d932173 ("ixgbe: Update driver to make use of DMA attributes in Rx path") Signed-off-by: Firo Yang Reviewed-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0edfd199937d..e4c1e6345edd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1871,13 +1871,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - } else if (ring_uses_build_skb(rx_ring)) { + if (ring_uses_build_skb(rx_ring)) { unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; dma_sync_single_range_for_cpu(rx_ring->dev, @@ -1894,6 +1888,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, skb_frag_size(frag), DMA_FROM_DEVICE); } + + /* If the page was released, just unmap it. */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); + } } /** From 2c565cc955e6b5f07b97ab074bfbbc37b7682a35 Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Thu, 5 Sep 2019 01:45:54 +0000 Subject: [PATCH 2722/3715] hwmon: (shtc1) fix shtc1 and shtw1 id mask [ Upstream commit fdc7d8e829ec755c5cfb2f5a8d8c0cdfb664f895 ] Fix an error in the bitmaskfor the shtc1 and shtw1 bitmask used to retrieve the chip ID from the ID register. See section 5.7 of the shtw1 or shtc1 datasheet for details. Fixes: 1a539d372edd9832444e7a3daa710c444c014dc9 ("hwmon: add support for Sensirion SHTC1 sensor") Signed-off-by: Dan Robertson Link: https://lore.kernel.org/r/20190905014554.21658-3-dan@dlrobertson.com [groeck: Reordered to be first in series and adjusted accordingly] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/shtc1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c index decd7df995ab..2a18539591ea 100644 --- a/drivers/hwmon/shtc1.c +++ b/drivers/hwmon/shtc1.c @@ -38,7 +38,7 @@ static const unsigned char shtc1_cmd_read_id_reg[] = { 0xef, 0xc8 }; /* constants for reading the ID register */ #define SHTC1_ID 0x07 -#define SHTC1_ID_REG_MASK 0x1f +#define SHTC1_ID_REG_MASK 0x3f /* delays for non-blocking i2c commands, both in us */ #define SHTC1_NONBLOCKING_WAIT_TIME_HPM 14400 From 742ca82d31dfaac9bcdf552ca9b555658687c5a0 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 11 Sep 2019 09:36:23 +0800 Subject: [PATCH 2723/3715] net: sonic: replace dev_kfree_skb in sonic_send_packet [ Upstream commit 49f6c90bf6805948b597eabb499e500a47cf24be ] sonic_send_packet will be processed in irq or non-irq context, so it would better use dev_kfree_skb_any instead of dev_kfree_skb. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 11f472fd5d47..a051dddcbd76 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -222,7 +222,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 5c09015c8bcb8599e711bb3db13aa1fc2d0ecdda Mon Sep 17 00:00:00 2001 From: Li Jin Date: Thu, 29 Aug 2019 10:22:27 +0530 Subject: [PATCH 2724/3715] pinctrl: iproc-gpio: Fix incorrect pinconf configurations [ Upstream commit 398a1f50e3c731586182fd52b834103b0aa2f826 ] Fix drive strength for AON/CRMU controller; fix pull-up/down setting for CCM/CDRU controller. Fixes: 616043d58a89 ("pinctrl: Rename gpio driver from cygnus to iproc") Signed-off-by: Li Jin Link: https://lore.kernel.org/r/1567054348-19685-2-git-send-email-srinath.mannam@broadcom.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 94 +++++++++++++++++++----- 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 85a8c97d9dfe..5fe419e468ec 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -54,8 +54,12 @@ /* drive strength control for ASIU GPIO */ #define IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET 0x58 -/* drive strength control for CCM/CRMU (AON) GPIO */ -#define IPROC_GPIO_DRV0_CTRL_OFFSET 0x00 +/* pinconf for CCM GPIO */ +#define IPROC_GPIO_PULL_DN_OFFSET 0x10 +#define IPROC_GPIO_PULL_UP_OFFSET 0x14 + +/* pinconf for CRMU(aon) GPIO and CCM GPIO*/ +#define IPROC_GPIO_DRV_CTRL_OFFSET 0x00 #define GPIO_BANK_SIZE 0x200 #define NGPIOS_PER_BANK 32 @@ -76,6 +80,12 @@ enum iproc_pinconf_param { IPROC_PINCON_MAX, }; +enum iproc_pinconf_ctrl_type { + IOCTRL_TYPE_AON = 1, + IOCTRL_TYPE_CDRU, + IOCTRL_TYPE_INVALID, +}; + /* * Iproc GPIO core * @@ -100,6 +110,7 @@ struct iproc_gpio { void __iomem *base; void __iomem *io_ctrl; + enum iproc_pinconf_ctrl_type io_ctrl_type; raw_spinlock_t lock; @@ -461,20 +472,44 @@ static const struct pinctrl_ops iproc_pctrl_ops = { static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio, bool disable, bool pull_up) { + void __iomem *base; unsigned long flags; + unsigned int shift; + u32 val_1, val_2; raw_spin_lock_irqsave(&chip->lock, flags); + if (chip->io_ctrl_type == IOCTRL_TYPE_CDRU) { + base = chip->io_ctrl; + shift = IPROC_GPIO_SHIFT(gpio); - if (disable) { - iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, false); + val_1 = readl(base + IPROC_GPIO_PULL_UP_OFFSET); + val_2 = readl(base + IPROC_GPIO_PULL_DN_OFFSET); + if (disable) { + /* no pull-up or pull-down */ + val_1 &= ~BIT(shift); + val_2 &= ~BIT(shift); + } else if (pull_up) { + val_1 |= BIT(shift); + val_2 &= ~BIT(shift); + } else { + val_1 &= ~BIT(shift); + val_2 |= BIT(shift); + } + writel(val_1, base + IPROC_GPIO_PULL_UP_OFFSET); + writel(val_2, base + IPROC_GPIO_PULL_DN_OFFSET); } else { - iproc_set_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio, - pull_up); - iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, true); + if (disable) { + iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, + false); + } else { + iproc_set_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio, + pull_up); + iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, + true); + } } raw_spin_unlock_irqrestore(&chip->lock, flags); - dev_dbg(chip->dev, "gpio:%u set pullup:%d\n", gpio, pull_up); return 0; @@ -483,14 +518,35 @@ static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio, static void iproc_gpio_get_pull(struct iproc_gpio *chip, unsigned gpio, bool *disable, bool *pull_up) { + void __iomem *base; unsigned long flags; + unsigned int shift; + u32 val_1, val_2; raw_spin_lock_irqsave(&chip->lock, flags); - *disable = !iproc_get_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio); - *pull_up = iproc_get_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio); + if (chip->io_ctrl_type == IOCTRL_TYPE_CDRU) { + base = chip->io_ctrl; + shift = IPROC_GPIO_SHIFT(gpio); + + val_1 = readl(base + IPROC_GPIO_PULL_UP_OFFSET) & BIT(shift); + val_2 = readl(base + IPROC_GPIO_PULL_DN_OFFSET) & BIT(shift); + + *pull_up = val_1 ? true : false; + *disable = (val_1 | val_2) ? false : true; + + } else { + *disable = !iproc_get_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio); + *pull_up = iproc_get_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio); + } raw_spin_unlock_irqrestore(&chip->lock, flags); } +#define DRV_STRENGTH_OFFSET(gpio, bit, type) ((type) == IOCTRL_TYPE_AON ? \ + ((2 - (bit)) * 4 + IPROC_GPIO_DRV_CTRL_OFFSET) : \ + ((type) == IOCTRL_TYPE_CDRU) ? \ + ((bit) * 4 + IPROC_GPIO_DRV_CTRL_OFFSET) : \ + ((bit) * 4 + IPROC_GPIO_REG(gpio, IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET))) + static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, unsigned strength) { @@ -505,11 +561,8 @@ static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, if (chip->io_ctrl) { base = chip->io_ctrl; - offset = IPROC_GPIO_DRV0_CTRL_OFFSET; } else { base = chip->base; - offset = IPROC_GPIO_REG(gpio, - IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET); } shift = IPROC_GPIO_SHIFT(gpio); @@ -520,11 +573,11 @@ static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, raw_spin_lock_irqsave(&chip->lock, flags); strength = (strength / 2) - 1; for (i = 0; i < GPIO_DRV_STRENGTH_BITS; i++) { + offset = DRV_STRENGTH_OFFSET(gpio, i, chip->io_ctrl_type); val = readl(base + offset); val &= ~BIT(shift); val |= ((strength >> i) & 0x1) << shift; writel(val, base + offset); - offset += 4; } raw_spin_unlock_irqrestore(&chip->lock, flags); @@ -541,11 +594,8 @@ static int iproc_gpio_get_strength(struct iproc_gpio *chip, unsigned gpio, if (chip->io_ctrl) { base = chip->io_ctrl; - offset = IPROC_GPIO_DRV0_CTRL_OFFSET; } else { base = chip->base; - offset = IPROC_GPIO_REG(gpio, - IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET); } shift = IPROC_GPIO_SHIFT(gpio); @@ -553,10 +603,10 @@ static int iproc_gpio_get_strength(struct iproc_gpio *chip, unsigned gpio, raw_spin_lock_irqsave(&chip->lock, flags); *strength = 0; for (i = 0; i < GPIO_DRV_STRENGTH_BITS; i++) { + offset = DRV_STRENGTH_OFFSET(gpio, i, chip->io_ctrl_type); val = readl(base + offset) & BIT(shift); val >>= shift; *strength += (val << i); - offset += 4; } /* convert to mA */ @@ -734,6 +784,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) u32 ngpios, pinconf_disable_mask = 0; int irq, ret; bool no_pinconf = false; + enum iproc_pinconf_ctrl_type io_ctrl_type = IOCTRL_TYPE_INVALID; /* NSP does not support drive strength config */ if (of_device_is_compatible(dev->of_node, "brcm,iproc-nsp-gpio")) @@ -764,8 +815,15 @@ static int iproc_gpio_probe(struct platform_device *pdev) dev_err(dev, "unable to map I/O memory\n"); return PTR_ERR(chip->io_ctrl); } + if (of_device_is_compatible(dev->of_node, + "brcm,cygnus-ccm-gpio")) + io_ctrl_type = IOCTRL_TYPE_CDRU; + else + io_ctrl_type = IOCTRL_TYPE_AON; } + chip->io_ctrl_type = io_ctrl_type; + if (of_property_read_u32(dev->of_node, "ngpios", &ngpios)) { dev_err(&pdev->dev, "missing ngpios DT property\n"); return -ENODEV; From a4f56e03bf2a69548cedaeaa46a691da520cc7d3 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 10 Sep 2019 16:46:17 +0300 Subject: [PATCH 2725/3715] ath10k: adjust skb length in ath10k_sdio_mbox_rx_packet [ Upstream commit b7139960832eb56fa15d390a4b5c8c5739bd0d1a ] When the FW bundles multiple packets, pkt->act_len may be incorrect as it refers to the first packet only (however, the FW will only bundle packets that fit into the same pkt->alloc_len). Before this patch, the skb length would be set (incorrectly) to pkt->act_len in ath10k_sdio_mbox_rx_packet, and then later manually adjusted in ath10k_sdio_mbox_rx_process_packet. The first problem is that ath10k_sdio_mbox_rx_process_packet does not use proper skb_put commands to adjust the length (it directly changes skb->len), so we end up with a mismatch between skb->head + skb->tail and skb->data + skb->len. This is quite serious, and causes corruptions in the TCP stack, as the stack tries to coalesce packets, and relies on skb->tail being correct (that is, skb_tail_pointer must point to the first byte_after_ the data). Instead of re-adjusting the size in ath10k_sdio_mbox_rx_process_packet, this moves the code to ath10k_sdio_mbox_rx_packet, and also add a bounds check, as skb_put would crash the kernel if not enough space is available. Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00007-QCARMSWP-1. Fixes: 8530b4e7b22bc3b ("ath10k: sdio: set skb len for all rx packets") Signed-off-by: Nicolas Boichat Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/sdio.c | 29 +++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 0a1248ebccf5..f49b21b137c1 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -392,16 +392,11 @@ static int ath10k_sdio_mbox_rx_process_packet(struct ath10k *ar, struct ath10k_htc_hdr *htc_hdr = (struct ath10k_htc_hdr *)skb->data; bool trailer_present = htc_hdr->flags & ATH10K_HTC_FLAG_TRAILER_PRESENT; enum ath10k_htc_ep_id eid; - u16 payload_len; u8 *trailer; int ret; - payload_len = le16_to_cpu(htc_hdr->len); - skb->len = payload_len + sizeof(struct ath10k_htc_hdr); - if (trailer_present) { - trailer = skb->data + sizeof(*htc_hdr) + - payload_len - htc_hdr->trailer_len; + trailer = skb->data + skb->len - htc_hdr->trailer_len; eid = pipe_id_to_eid(htc_hdr->eid); @@ -635,13 +630,31 @@ static int ath10k_sdio_mbox_rx_packet(struct ath10k *ar, { struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar); struct sk_buff *skb = pkt->skb; + struct ath10k_htc_hdr *htc_hdr; int ret; ret = ath10k_sdio_readsb(ar, ar_sdio->mbox_info.htc_addr, skb->data, pkt->alloc_len); + if (ret) + goto out; + + /* Update actual length. The original length may be incorrect, + * as the FW will bundle multiple packets as long as their sizes + * fit within the same aligned length (pkt->alloc_len). + */ + htc_hdr = (struct ath10k_htc_hdr *)skb->data; + pkt->act_len = le16_to_cpu(htc_hdr->len) + sizeof(*htc_hdr); + if (pkt->act_len > pkt->alloc_len) { + ath10k_warn(ar, "rx packet too large (%zu > %zu)\n", + pkt->act_len, pkt->alloc_len); + ret = -EMSGSIZE; + goto out; + } + + skb_put(skb, pkt->act_len); + +out: pkt->status = ret; - if (!ret) - skb_put(skb, pkt->act_len); return ret; } From 7f746a04e45c0b7baa0facd3ebba1b02693e33f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Mon, 2 Sep 2019 11:27:31 +0200 Subject: [PATCH 2726/3715] RDMA/cma: Fix false error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a6e4d254c19b541a58caced322111084b27a7788 ] In addr_handler(), assuming status == 0 and the device already has been acquired (id_priv->cma_dev != NULL), we get the following incorrect "error" message: RDMA CM: ADDR_ERROR: failed to resolve IP. status 0 Fixes: 498683c6a7ee ("IB/cma: Add debug messages to error flows") Link: https://lore.kernel.org/r/20190902092731.1055757-1-haakon.bugge@oracle.com Signed-off-by: Håkon Bugge Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fc4630e4acdd..1614f6f3677c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2789,7 +2789,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (status) pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", status); - } else { + } else if (status) { pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); } From 8bfb051a68c2b0f08ac685296993574e5861bd9b Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Thu, 12 Sep 2019 13:49:41 -0700 Subject: [PATCH 2727/3715] net/rds: Fix 'ib_evt_handler_call' element in 'rds_ib_stat_names' [ Upstream commit 05a82481a3024b94db00b8c816bb3d526b5209e0 ] All entries in 'rds_ib_stat_names' are stringified versions of the corresponding "struct rds_ib_statistics" element without the "s_"-prefix. Fix entry 'ib_evt_handler_call' to do the same. Fixes: f4f943c958a2 ("RDS: IB: ack more receive completions to improve performance") Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/ib_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c index 9252ad126335..ac46d8961b61 100644 --- a/net/rds/ib_stats.c +++ b/net/rds/ib_stats.c @@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", - "s_ib_evt_handler_call", + "ib_evt_handler_call", "ib_tasklet_call", "ib_tx_cq_event", "ib_tx_ring_full", From 91586b4d9cae8fd12c6e6ae1b1c2c342c88cc95d Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Tue, 10 Sep 2019 19:49:21 +0200 Subject: [PATCH 2728/3715] iommu/amd: Wait for completion of IOTLB flush in attach_device [ Upstream commit 0b15e02f0cc4fb34a9160de7ba6db3a4013dc1b7 ] To make sure the domain tlb flush completes before the function returns, explicitly wait for its completion. Signed-off-by: Filippo Sironi Fixes: 42a49f965a8d ("amd-iommu: flush domain tlb when attaching a new device") [joro: Added commit message and fixes tag] Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d09c24825734..778f167be2d3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2160,6 +2160,8 @@ skip_ats_check: */ domain_flush_tlb_pde(domain); + domain_flush_complete(domain); + return ret; } From 39479c6c4695855c5d09fc2290723c8f00098110 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 13:54:30 +0300 Subject: [PATCH 2729/3715] net: aquantia: Fix aq_vec_isr_legacy() return value [ Upstream commit 31aefe14bc9f56566041303d733fda511d3a1c3e ] The irqreturn_t type is an enum or an unsigned int in GCC. That creates to problems because it can't detect if the self->aq_hw_ops->hw_irq_read() call fails and at the end the function always returns IRQ_HANDLED. drivers/net/ethernet/aquantia/atlantic/aq_vec.c:316 aq_vec_isr_legacy() warn: unsigned 'err' is never less than zero. drivers/net/ethernet/aquantia/atlantic/aq_vec.c:329 aq_vec_isr_legacy() warn: always true condition '(err >= 0) => (0-u32max >= 0)' Fixes: 970a2e9864b0 ("net: ethernet: aquantia: Vector operations") Signed-off-by: Dan Carpenter Reviewed-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index 5fecc9a099ef..bb2894a333f2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -310,15 +310,13 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private) { struct aq_vec_s *self = private; u64 irq_mask = 0U; - irqreturn_t err = 0; + int err; - if (!self) { - err = -EINVAL; - goto err_exit; - } + if (!self) + return IRQ_NONE; err = self->aq_hw_ops->hw_irq_read(self->aq_hw, &irq_mask); if (err < 0) - goto err_exit; + return IRQ_NONE; if (irq_mask) { self->aq_hw_ops->hw_irq_disable(self->aq_hw, @@ -326,11 +324,10 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private) napi_schedule(&self->napi); } else { self->aq_hw_ops->hw_irq_enable(self->aq_hw, 1U); - err = IRQ_NONE; + return IRQ_NONE; } -err_exit: - return err >= 0 ? IRQ_HANDLED : IRQ_NONE; + return IRQ_HANDLED; } cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self) From cb6066a342cd90eb5d2bcd438ccb242bb67e278e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 13:55:32 +0300 Subject: [PATCH 2730/3715] net: hisilicon: Fix signedness bug in hix5hd2_dev_probe() [ Upstream commit 002dfe8085255b7bf1e0758c3d195c5412d35be9 ] The "priv->phy_mode" variable is an enum and in this context GCC will treat it as unsigned to the error handling will never trigger. Fixes: 57c5bc9ad7d7 ("net: hisilicon: add hix5hd2 mac driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index aab6fb10af94..6adf6831d120 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1202,7 +1202,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) goto err_free_mdio; priv->phy_mode = of_get_phy_mode(node); - if (priv->phy_mode < 0) { + if ((int)priv->phy_mode < 0) { netdev_err(ndev, "not find phy-mode\n"); ret = -EINVAL; goto err_mdiobus; From ba54da52df0d948f9965f2d054df87e72b30dca0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 13:56:04 +0300 Subject: [PATCH 2731/3715] net: broadcom/bcmsysport: Fix signedness in bcm_sysport_probe() [ Upstream commit 25a584955f020d6ec499c513923fb220f3112d2b ] The "priv->phy_interface" variable is an enum and in this context GCC will treat it as unsigned so the error handling will never be triggered. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Dan Carpenter Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 79018fea7be2..69b2f99b0c19 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2116,7 +2116,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->phy_interface = of_get_phy_mode(dn); /* Default to GMII interface mode */ - if (priv->phy_interface < 0) + if ((int)priv->phy_interface < 0) priv->phy_interface = PHY_INTERFACE_MODE_GMII; /* In the case of a fixed PHY, the DT node associated From cd885e8726baad6e386c182272bd52ed1612cb7a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 13:58:22 +0300 Subject: [PATCH 2732/3715] net: stmmac: dwmac-meson8b: Fix signedness bug in probe [ Upstream commit f10210517a2f37feea2edf85eb34c98977265c16 ] The "dwmac->phy_mode" is an enum and in this context GCC treats it as an unsigned int so the error handling is never triggered. Fixes: 566e82516253 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC") Signed-off-by: Dan Carpenter Reviewed-by: Martin Blumenstingl Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 8be4b32544ef..d71d3c1c85ee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -285,7 +285,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) dwmac->pdev = pdev; dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node); - if (dwmac->phy_mode < 0) { + if ((int)dwmac->phy_mode < 0) { dev_err(&pdev->dev, "missing phy-mode property\n"); ret = -EINVAL; goto err_remove_config_dt; From 168de4c8560e2ee278709557bf74d1418b18ddf4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 13:59:11 +0300 Subject: [PATCH 2733/3715] net: axienet: fix a signedness bug in probe [ Upstream commit 73e211e11be86715d66bd3c9d38b3c34b05fca9a ] The "lp->phy_mode" is an enum but in this context GCC treats it as an unsigned int so the error handling is never triggered. Fixes: ee06b1728b95 ("net: axienet: add support for standard phy-mode binding") Signed-off-by: Dan Carpenter Reviewed-by: Radhey Shyam Pandey Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9ccd08a051f6..1152d74433f6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1574,7 +1574,7 @@ static int axienet_probe(struct platform_device *pdev) } } else { lp->phy_mode = of_get_phy_mode(pdev->dev.of_node); - if (lp->phy_mode < 0) { + if ((int)lp->phy_mode < 0) { ret = -EINVAL; goto free_netdev; } From ed3f1423a1b303aac7aa3f38ab3535397e1e1e42 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 14:01:00 +0300 Subject: [PATCH 2734/3715] of: mdio: Fix a signedness bug in of_phy_get_and_connect() [ Upstream commit d7eb651212fdbafa82d485d8e76095ac3b14c193 ] The "iface" variable is an enum and in this context GCC treats it as an unsigned int so the error handling is never triggered. Fixes: b78624125304 ("of_mdio: Abstract a general interface for phy connect") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/of/of_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8c1819230ed2..fe26697d3bd7 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -358,7 +358,7 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev, struct phy_device *phy; iface = of_get_phy_mode(np); - if (iface < 0) + if ((int)iface < 0) return NULL; phy_np = of_parse_phandle(np, "phy-handle", 0); From 3dc236296a368bb3c5571207176d8e547589aadf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Sep 2019 14:05:54 +0300 Subject: [PATCH 2735/3715] net: ethernet: stmmac: Fix signedness bug in ipq806x_gmac_of_parse() [ Upstream commit 231042181dc9d6122c6faba64e99ccb25f13cc6c ] The "gmac->phy_mode" variable is an enum and in this context GCC will treat it as an unsigned int so the error handling will never be triggered. Fixes: b1c17215d718 ("stmmac: add ipq806x glue layer") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 866444b6c82f..11a4a81b0397 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -203,7 +203,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) struct device *dev = &gmac->pdev->dev; gmac->phy_mode = of_get_phy_mode(dev->of_node); - if (gmac->phy_mode < 0) { + if ((int)gmac->phy_mode < 0) { dev_err(dev, "missing phy mode property\n"); return -EINVAL; } From ae79af838296f399eb53f555e76e86217695d6da Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Oct 2019 13:57:29 +0200 Subject: [PATCH 2736/3715] nvme: retain split access workaround for capability reads [ Upstream commit 3a8ecc935efabdad106b5e06d07b150c394b4465 ] Commit 7fd8930f26be4 "nvme: add a common helper to read Identify Controller data" has re-introduced an issue that we have attempted to work around in the past, in commit a310acd7a7ea ("NVMe: use split lo_hi_{read,write}q"). The problem is that some PCIe NVMe controllers do not implement 64-bit outbound accesses correctly, which is why the commit above switched to using lo_hi_[read|write]q for all 64-bit BAR accesses occuring in the code. In the mean time, the NVMe subsystem has been refactored, and now calls into the PCIe support layer for NVMe via a .reg_read64() method, which fails to use lo_hi_readq(), and thus reintroduces the problem that the workaround above aimed to address. Given that, at the moment, .reg_read64() is only used to read the capability register [which is known to tolerate split reads], let's switch .reg_read64() to lo_hi_readq() as well. This fixes a boot issue on some ARM boxes with NVMe behind a Synopsys DesignWare PCIe host controller. Fixes: 7fd8930f26be4 ("nvme: add a common helper to read Identify Controller data") Signed-off-by: Ard Biesheuvel Signed-off-by: Sagi Grimberg Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd11cced3678..3788c053a0b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2274,7 +2274,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); + *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); return 0; } From fe89f528b519c29e392a4acd71c00753702f27e9 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Sun, 6 Oct 2019 13:08:56 +0200 Subject: [PATCH 2737/3715] net: stmmac: gmac4+: Not all Unicast addresses may be available [ Upstream commit 25683bab09a70542b9f8e3e28f79b3369e56701f ] Some setups may not have all Unicast addresses filters available. Check the number of available filters before trying to setup it. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 8445af580cb6..e5566c121525 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -438,7 +438,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, } /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { /* Switch to promiscuous mode if more than 128 addrs * are required */ From 05c2aa29d3a4e6d88015cecf04085d4b13ec1ae2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Oct 2019 15:37:05 +0300 Subject: [PATCH 2738/3715] mac80211: accept deauth frames in IBSS mode [ Upstream commit 95697f9907bfe3eab0ef20265a766b22e27dde64 ] We can process deauth frames and all, but we drop them very early in the RX path today - this could never have worked. Fixes: 2cc59e784b54 ("mac80211: reply to AUTH with DEAUTH if sta allocation fails in IBSS") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20191004123706.15768-2-luca@coelho.fi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4a6b3c7b35e3..31000622376d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3227,9 +3227,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP): /* process for all: mesh, mlme, ibss */ break; + case cpu_to_le16(IEEE80211_STYPE_DEAUTH): + if (is_multicast_ether_addr(mgmt->da) && + !is_broadcast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + + /* process only for station/IBSS */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + break; case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP): case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP): - case cpu_to_le16(IEEE80211_STYPE_DEAUTH): case cpu_to_le16(IEEE80211_STYPE_DISASSOC): if (is_multicast_ether_addr(mgmt->da) && !is_broadcast_ether_addr(mgmt->da)) From e39340636dd980978224755f7bb641eeb08b88f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:26 -0700 Subject: [PATCH 2739/3715] llc: fix another potential sk_buff leak in llc_ui_sendmsg() [ Upstream commit fc8d5db10cbe1338a52ebc74e7feab9276721774 ] All callers of llc_conn_state_process() except llc_build_and_send_pkt() (via llc_ui_sendmsg() -> llc_ui_send_data()) assume that it always consumes a reference to the skb. Fix this caller to do the same. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/llc/af_llc.c | 34 ++++++++++++++++++++-------------- net/llc/llc_conn.c | 2 ++ net/llc/llc_if.c | 12 ++++++++---- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2e472d5c3ea4..d552e8819713 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -113,22 +113,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) * * Send data via reliable llc2 connection. * Returns 0 upon success, non-zero if action did not succeed. + * + * This function always consumes a reference to the skb. */ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) { struct llc_sock* llc = llc_sk(sk); - int rc = 0; if (unlikely(llc_data_accept_state(llc->state) || llc->remote_busy_flag || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); + int rc; rc = llc_ui_wait_for_busy_core(sk, timeout); + if (rc) { + kfree_skb(skb); + return rc; + } } - if (unlikely(!rc)) - rc = llc_build_and_send_pkt(sk, skb); - return rc; + return llc_build_and_send_pkt(sk, skb); } static void llc_ui_sk_init(struct socket *sock, struct sock *sk) @@ -900,7 +904,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb = NULL; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; @@ -909,10 +913,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) - goto release; + goto out; } else { if (llc_ui_addr_null(&llc->addr)) - goto release; + goto out; addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ @@ -920,7 +924,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* bind to sap with null dev, exclusive. */ rc = llc_ui_autobind(sock, addr); if (rc) - goto release; + goto out; } hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; @@ -929,12 +933,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) copied = size - hdrlen; rc = -EINVAL; if (copied < 0) - goto release; + goto out; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) - goto release; + goto out; skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); @@ -944,29 +948,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) { llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_test) { llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_xid) { llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } rc = -ENOPROTOOPT; if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); + skb = NULL; out: - if (rc) { - kfree_skb(skb); -release: + kfree_skb(skb); + if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __func__, llc->laddr.lsap, llc->daddr.lsap, rc); - } release_sock(sk); return rc ? : copied; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 444c13e752a0..7340f23e16de 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; * (executing it's actions and changing state), upper layer will be * indicated or confirmed, if needed. Returns 0 for success, 1 for * failure. The socket lock has to be held before calling this function. + * + * This function always consumes a reference to the skb. */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) { diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 6daf391b3e84..fc4d2bd8816f 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -38,6 +38,8 @@ * closed and -EBUSY when sending data is not permitted in this state or * LLC has send an I pdu with p bit set to 1 and is waiting for it's * response. + * + * This function always consumes a reference to the skb. */ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) { @@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); if (unlikely(llc->state == LLC_CONN_STATE_ADM)) - goto out; + goto out_free; rc = -EBUSY; if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ llc->p_flag)) { llc->failed_data_req = 1; - goto out; + goto out_free; } ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_DATA_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; skb->dev = llc->dev; - rc = llc_conn_state_process(sk, skb); -out: + return llc_conn_state_process(sk, skb); + +out_free: + kfree_skb(skb); return rc; } From a8cfe559cb47108cc97353b65d79115cf6bf1b57 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Oct 2019 14:24:27 -0700 Subject: [PATCH 2740/3715] llc: fix sk_buff refcounting in llc_conn_state_process() [ Upstream commit 36453c852816f19947ca482a595dffdd2efa4965 ] If llc_conn_state_process() sees that llc_conn_service() put the skb on a list, it will drop one fewer references to it. This is wrong because the current behavior is that llc_conn_service() never consumes a reference to the skb. The code also makes the number of skb references being dropped conditional on which of ind_prim and cfm_prim are nonzero, yet neither of these affects how many references are *acquired*. So there is extra code that tries to fix this up by sometimes taking another reference. Remove the unnecessary/broken refcounting logic and instead just add an skb_get() before the only two places where an extra reference is actually consumed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/llc/llc_conn.c | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 7340f23e16de..7fbc682aff04 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -64,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(skb->sk); struct llc_conn_state_ev *ev = llc_conn_ev(skb); - /* - * We have to hold the skb, because llc_conn_service will kfree it in - * the sending path and we need to look at the skb->cb, where we encode - * llc_conn_state_ev. - */ - skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; /* * Send event to state machine @@ -77,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) rc = llc_conn_service(skb->sk, skb); if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __func__); - goto out_kfree_skb; - } - - if (unlikely(!ev->ind_prim && !ev->cfm_prim)) { - /* indicate or confirm not required */ - if (!skb->next) - goto out_kfree_skb; goto out_skb_put; } - if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */ - skb_get(skb); - switch (ev->ind_prim) { case LLC_DATA_PRIM: + skb_get(skb); llc_save_primitive(sk, skb, LLC_DATA_PRIM); if (unlikely(sock_queue_rcv_skb(sk, skb))) { /* @@ -108,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * skb->sk pointing to the newly created struct sock in * llc_conn_handler. -acme */ + skb_get(skb); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_state_change(sk); break; @@ -123,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); } } - kfree_skb(skb); sock_put(sk); break; case LLC_RESET_PRIM: @@ -132,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * RESET is not being notified to upper layers for now */ printk(KERN_INFO "%s: received a reset ind!\n", __func__); - kfree_skb(skb); break; default: - if (ev->ind_prim) { + if (ev->ind_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->ind_prim); - kfree_skb(skb); - } /* No indication */ break; } @@ -181,15 +163,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) printk(KERN_INFO "%s: received a reset conf!\n", __func__); break; default: - if (ev->cfm_prim) { + if (ev->cfm_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->cfm_prim); - break; - } - goto out_skb_put; /* No confirmation */ + /* No confirmation */ + break; } -out_kfree_skb: - kfree_skb(skb); out_skb_put: kfree_skb(skb); return rc; From 3134607b52605e595781f5083621bc7eb60e1242 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 7 Oct 2019 17:43:04 +0200 Subject: [PATCH 2741/3715] net: stmmac: fix length of PTP clock's name string [ Upstream commit 5da202c88f8c355ad79bc2e8eb582e6d433060e7 ] The field "name" in struct ptp_clock_info has a fixed size of 16 chars and is used as zero terminated string by clock_name_show() in drivers/ptp/ptp_sysfs.c The current initialization value requires 17 chars to fit also the null termination, and this causes overflow to the next bytes in the struct when the string is read as null terminated: hexdump -C /sys/class/ptp/ptp0/clock_name 00000000 73 74 6d 6d 61 63 5f 70 74 70 5f 63 6c 6f 63 6b |stmmac_ptp_clock| 00000010 a0 ac b9 03 0a |.....| where the extra 4 bytes (excluding the newline) after the string represent the integer 0x03b9aca0 = 62500000 assigned to the field "max_adj" that follows "name" in the same struct. There is no strict requirement for the "name" content and in the comment in ptp_clock_kernel.h it's reported it should just be 'A short "friendly name" to identify the clock'. Replace it with "stmmac ptp". Signed-off-by: Antonio Borneo Fixes: 92ba6888510c ("stmmac: add the support for PTP hw clock driver") Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e471a903c654..1c1d6a942822 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -154,7 +154,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, /* structure describing a PTP hardware clock */ static const struct ptp_clock_info stmmac_ptp_clock_ops = { .owner = THIS_MODULE, - .name = "stmmac_ptp_clock", + .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, .n_ext_ts = 0, From e891a20643c751e07f1404c6e9e2267391a37f04 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 9 Oct 2019 11:10:52 +0800 Subject: [PATCH 2742/3715] act_mirred: Fix mirred_init_module error handling [ Upstream commit 11c9a7d38af524217efb7a176ad322b97ac2f163 ] If tcf_register_action failed, mirred_device_notifier should be unregistered. Fixes: 3b87956ea645 ("net sched: fix race in mirred device removal") Signed-off-by: YueHaibing Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 529bb064c4a4..dcfaa4f9c7c5 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -371,7 +371,11 @@ static int __init mirred_init_module(void) return err; pr_info("Mirror/redirect action on\n"); - return tcf_register_action(&act_mirred_ops, &mirred_net_ops); + err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); + if (err) + unregister_netdevice_notifier(&mirred_device_notifier); + + return err; } static void __exit mirred_cleanup_module(void) From 2ffb5a9b396d7857600fdf94432a94490c0cae71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 12:55:53 -0700 Subject: [PATCH 2743/3715] net: avoid possible false sharing in sk_leave_memory_pressure() [ Upstream commit 503978aca46124cd714703e180b9c8292ba50ba7 ] As mentioned in https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance a C compiler can legally transform : if (memory_pressure && *memory_pressure) *memory_pressure = 0; to : if (memory_pressure) *memory_pressure = 0; Fixes: 0604475119de ("tcp: add TCPMemoryPressuresChrono counter") Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Fixes: 3ab224be6d69 ("[NET] CORE: Introducing new memory accounting interface.") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 90ccbbf9e6b0..03ca2f638eb4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2165,8 +2165,8 @@ static void sk_leave_memory_pressure(struct sock *sk) } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; - if (memory_pressure && *memory_pressure) - *memory_pressure = 0; + if (memory_pressure && READ_ONCE(*memory_pressure)) + WRITE_ONCE(*memory_pressure, 0); } } From 681c8c92c4b78e80e0853f8e18861e93069d68e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 14:51:20 -0700 Subject: [PATCH 2744/3715] net: add {READ|WRITE}_ONCE() annotations on ->rskq_accept_head [ Upstream commit 60b173ca3d1cd1782bd0096dc17298ec242f6fb1 ] reqsk_queue_empty() is called from inet_csk_listen_poll() while other cpus might write ->rskq_accept_head value. Use {READ|WRITE}_ONCE() to avoid compiler tricks and potential KCSAN splats. Fixes: fff1f3001cc5 ("tcp: add a spinlock to protect struct request_sock_queue") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/xen/pvcalls-back.c | 2 +- include/net/request_sock.h | 4 ++-- net/ipv4/inet_connection_sock.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index abd6dbc29ac2..58be15c27b6d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -792,7 +792,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev, mappass->reqcopy = *req; icsk = inet_csk(mappass->sock->sk); queue = &icsk->icsk_accept_queue; - data = queue->rskq_accept_head != NULL; + data = READ_ONCE(queue->rskq_accept_head) != NULL; if (data) { mappass->reqcopy.cmd = 0; ret = 0; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 23e22054aa60..04aa2c7d35c4 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -181,7 +181,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { - return queue->rskq_accept_head == NULL; + return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, @@ -193,7 +193,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); - queue->rskq_accept_head = req->dl_next; + WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7224c4fc30f..da55ce62fe50 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -936,7 +936,7 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk, req->sk = child; req->dl_next = NULL; if (queue->rskq_accept_head == NULL) - queue->rskq_accept_head = req; + WRITE_ONCE(queue->rskq_accept_head, req); else queue->rskq_accept_tail->dl_next = req; queue->rskq_accept_tail = req; From a8e920b22026c4717daee0e16ecee828aecfcb84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2019 15:10:15 -0700 Subject: [PATCH 2745/3715] tcp: annotate lockless access to tcp_memory_pressure [ Upstream commit 1f142c17d19a5618d5a633195a46f2c8be9bf232 ] tcp_memory_pressure is read without holding any lock, and its value could be changed on other cpus. Use READ_ONCE() to annotate these lockless reads. The write side is already using atomic ops. Fixes: b8da51ebb1aa ("tcp: introduce tcp_under_memory_pressure()") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 00d10f0e1194..c96302310314 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -289,7 +289,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return tcp_memory_pressure; + return READ_ONCE(tcp_memory_pressure); } /* * The next routines deal with comparing 32 bit unsigned ints diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8f07655718f3..db1eceda2359 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -328,7 +328,7 @@ void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; - if (tcp_memory_pressure) + if (READ_ONCE(tcp_memory_pressure)) return; val = jiffies; @@ -343,7 +343,7 @@ void tcp_leave_memory_pressure(struct sock *sk) { unsigned long val; - if (!tcp_memory_pressure) + if (!READ_ONCE(tcp_memory_pressure)) return; val = xchg(&tcp_memory_pressure, 0); if (val) From 35ff594b0da23f442a881154356deadf86e2d3ca Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 11 Oct 2019 06:39:39 -0700 Subject: [PATCH 2746/3715] drm/msm/dsi: Implement reset correctly [ Upstream commit 78e31c42261779a01bc73472d0f65f15378e9de3 ] On msm8998, vblank timeouts are observed because the DSI controller is not reset properly, which ends up stalling the MDP. This is because the reset logic is not correct per the hardware documentation. The documentation states that after asserting reset, software should wait some time (no indication of how long), or poll the status register until it returns 0 before deasserting reset. wmb() is insufficient for this purpose since it just ensures ordering, not timing between writes. Since asserting and deasserting reset occurs on the same register, ordering is already guaranteed by the architecture, making the wmb extraneous. Since we would define a timeout for polling the status register to avoid a possible infinite loop, lets just use a static delay of 20 ms, since 16.666 ms is the time available to process one frame at 60 fps. Fixes: a689554ba6ed ("drm/msm: Initial add DSI connector support") Cc: Hai Li Cc: Rob Clark Signed-off-by: Jeffrey Hugo Reviewed-by: Sean Paul [seanpaul renamed RESET_DELAY to DSI_RESET_TOGGLE_DELAY_MS] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191011133939.16551-1-jeffrey.l.hugo@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_host.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index a9a0b56f1fbc..b9cb7c09e05a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -34,6 +34,8 @@ #include "dsi_cfg.h" #include "msm_kms.h" +#define DSI_RESET_TOGGLE_DELAY_MS 20 + static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { u32 ver; @@ -906,7 +908,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host) wmb(); /* clocks need to be enabled before reset */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); } @@ -1288,7 +1290,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host) /* dsi controller can only be reset while clocks are running */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); wmb(); /* controller out of reset */ dsi_write(msm_host, REG_DSI_CTRL, data0); From 1ca124f27e5083048bb9d8c5a87deca63b1a027e Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Tue, 24 Sep 2019 09:49:18 +0000 Subject: [PATCH 2747/3715] dmaengine: imx-sdma: fix size check for sdma script_number [ Upstream commit bd73dfabdda280fc5f05bdec79b6721b4b2f035f ] Illegal memory will be touch if SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 (41) exceed the size of structure sdma_script_start_addrs(40), thus cause memory corrupt such as slob block header so that kernel trap into while() loop forever in slob_free(). Please refer to below code piece in imx-sdma.c: for (i = 0; i < sdma->script_number; i++) if (addr_arr[i] > 0) saddr_arr[i] = addr_arr[i]; /* memory corrupt here */ That issue was brought by commit a572460be9cf ("dmaengine: imx-sdma: Add support for version 3 firmware") because SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 (38->41 3 scripts added) not align with script number added in sdma_script_start_addrs(2 scripts). Fixes: a572460be9cf ("dmaengine: imx-sdma: Add support for version 3 firmware") Cc: stable@vger.kernel Link: https://www.spinics.net/lists/arm-kernel/msg754895.html Signed-off-by: Robin Gong Reported-by: Jurgen Lambrecht Link: https://lore.kernel.org/r/1569347584-3478-1-git-send-email-yibin.gong@nxp.com [vkoul: update the patch title] Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/imx-sdma.c | 8 ++++++++ include/linux/platform_data/dma-imx-sdma.h | 3 +++ 2 files changed, 11 insertions(+) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 0fc12a8783e3..99f3f22ed647 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1441,6 +1441,14 @@ static void sdma_add_scripts(struct sdma_engine *sdma, if (!sdma->script_number) sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + if (sdma->script_number > sizeof(struct sdma_script_start_addrs) + / sizeof(s32)) { + dev_err(sdma->dev, + "SDMA script number %d not match with firmware.\n", + sdma->script_number); + return; + } + for (i = 0; i < sdma->script_number; i++) if (addr_arr[i] > 0) saddr_arr[i] = addr_arr[i]; diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h index 6eaa53cef0bd..30e676b36b24 100644 --- a/include/linux/platform_data/dma-imx-sdma.h +++ b/include/linux/platform_data/dma-imx-sdma.h @@ -51,7 +51,10 @@ struct sdma_script_start_addrs { /* End of v2 array */ s32 zcanfd_2_mcu_addr; s32 zqspi_2_mcu_addr; + s32 mcu_2_ecspi_addr; /* End of v3 array */ + s32 mcu_2_zqspi_addr; + /* End of v4 array */ }; /** From e0f8a42943c46030a40737a02e871186968f0123 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Oct 2019 09:16:57 -0700 Subject: [PATCH 2748/3715] net: netem: fix error path for corrupted GSO frames [ Upstream commit a7fa12d15855904aff1716e1fc723c03ba38c5cc ] To corrupt a GSO frame we first perform segmentation. We then proceed using the first segment instead of the full GSO skb and requeue the rest of the segments as separate packets. If there are any issues with processing the first segment we still want to process the rest, therefore we jump to the finish_segs label. Commit 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames") started using the pointer to the first segment in the "rest of segments processing", but as mentioned above the first segment may had already been freed at this point. Backlog corrections for parent qdiscs have to be adjusted. Fixes: 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames") Reported-by: kbuild test robot Reported-by: Dan Carpenter Reported-by: Ben Hutchings Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ede0a24e67eb..64c3cfa35736 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -504,6 +504,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) { qdisc_drop(skb, sch, to_free); + skb = NULL; goto finish_segs; } @@ -580,9 +581,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, finish_segs: if (segs) { unsigned int len, last_len; - int nb = 0; + int nb; - len = skb->len; + len = skb ? skb->len : 0; + nb = skb ? 1 : 0; while (segs) { skb2 = segs->next; @@ -599,7 +601,8 @@ finish_segs: } segs = skb2; } - qdisc_tree_reduce_backlog(sch, -nb, prev_len - len); + /* Parent qdiscs accounted for 1 skb of size @prev_len */ + qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); } return NET_XMIT_SUCCESS; } From b630744138cd00215befad571428b38b4b9f219d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 18 Oct 2019 09:16:58 -0700 Subject: [PATCH 2749/3715] net: netem: correct the parent's backlog when corrupted packet was dropped [ Upstream commit e0ad032e144731a5928f2d75e91c2064ba1a764c ] If packet corruption failed we jump to finish_segs and return NET_XMIT_SUCCESS. Seeing success will make the parent qdisc increment its backlog, that's incorrect - we need to return NET_XMIT_DROP. Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 64c3cfa35736..328b043edf07 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -603,6 +603,8 @@ finish_segs: } /* Parent qdiscs accounted for 1 skb of size @prev_len */ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); + } else if (!skb) { + return NET_XMIT_DROP; } return NET_XMIT_SUCCESS; } From fde9261b5217d48dc19f93610759d7a658237c9a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 20 Nov 2019 18:29:13 +0100 Subject: [PATCH 2750/3715] net: qca_spi: Move reset_count to struct qcaspi [ Upstream commit bc19c32904e36548335b35fdce6ce734e20afc0a ] The reset counter is specific for every QCA700x chip. So move this into the private driver struct. Otherwise we get unpredictable reset behavior in setups with multiple QCA700x chips. Fixes: 291ab06ecf67 (net: qualcomm: new Ethernet over SPI driver for QCA7000) Signed-off-by: Stefan Wahren Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/qca_spi.c | 9 ++++----- drivers/net/ethernet/qualcomm/qca_spi.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 275fc6f154a7..1c87178fc485 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -475,7 +475,6 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event) u16 signature = 0; u16 spi_config; u16 wrbuf_space = 0; - static u16 reset_count; if (event == QCASPI_EVENT_CPUON) { /* Read signature twice, if not valid @@ -528,13 +527,13 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event) qca->sync = QCASPI_SYNC_RESET; qca->stats.trig_reset++; - reset_count = 0; + qca->reset_count = 0; break; case QCASPI_SYNC_RESET: - reset_count++; + qca->reset_count++; netdev_dbg(qca->net_dev, "sync: waiting for CPU on, count %u.\n", - reset_count); - if (reset_count >= QCASPI_RESET_TIMEOUT) { + qca->reset_count); + if (qca->reset_count >= QCASPI_RESET_TIMEOUT) { /* reset did not seem to take place, try again */ qca->sync = QCASPI_SYNC_UNKNOWN; qca->stats.reset_timeout++; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index fc0e98726b36..719c41227f22 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -92,6 +92,7 @@ struct qcaspi { unsigned int intr_req; unsigned int intr_svc; + u16 reset_count; #ifdef CONFIG_DEBUG_FS struct dentry *device_root; From 495d767ec747d62fb16b246832b545db1c8f1f8f Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 21 Nov 2019 15:37:26 +0000 Subject: [PATCH 2751/3715] afs: Fix large file support [ Upstream commit b485275f1aca8a9da37fd35e4fad673935e827da ] By default s_maxbytes is set to MAX_NON_LFS, which limits the usable file size to 2GB, enforced by the vfs. Commit b9b1f8d5930a ("AFS: write support fixes") added support for the 64-bit fetch and store server operations, but did not change this value. As a result, attempts to write past the 2G mark result in EFBIG errors: $ dd if=/dev/zero of=foo bs=1M count=1 seek=2048 dd: error writing 'foo': File too large Set s_maxbytes to MAX_LFS_FILESIZE. Fixes: b9b1f8d5930a ("AFS: write support fixes") Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index 689173c0a682..f8529ddbd587 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -359,6 +359,7 @@ static int afs_fill_super(struct super_block *sb, /* fill in the superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; sb->s_xattr = afs_xattr_handlers; From d9d56b8e366ae31bcb4e74056cda11432dc251c1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 4 Nov 2019 21:33:50 +0800 Subject: [PATCH 2752/3715] MIPS: Loongson: Fix return value of loongson_hwmon_init [ Upstream commit dece3c2a320b0a6d891da6ff774ab763969b6860 ] When call function hwmon_device_register failed, use the actual return value instead of always -ENOMEM. Fixes: 64f09aa967e1 ("MIPS: Loongson-3: Add CPU Hwmon platform driver") Signed-off-by: Tiezhu Yang Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: Huacai Chen Cc: Jiaxun Yang Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/platform/mips/cpu_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c index 322de58eebaf..02484ae9a116 100644 --- a/drivers/platform/mips/cpu_hwmon.c +++ b/drivers/platform/mips/cpu_hwmon.c @@ -158,7 +158,7 @@ static int __init loongson_hwmon_init(void) cpu_hwmon_dev = hwmon_device_register(NULL); if (IS_ERR(cpu_hwmon_dev)) { - ret = -ENOMEM; + ret = PTR_ERR(cpu_hwmon_dev); pr_err("hwmon_device_register fail!\n"); goto fail_hwmon_device_register; } From de67fb88a153da82174e8496166fcb72e337d962 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 1 Nov 2019 16:42:37 -0700 Subject: [PATCH 2753/3715] hv_netvsc: flag software created hash value [ Upstream commit df9f540ca74297a84bafacfa197e9347b20beea5 ] When the driver needs to create a hash value because it was not done at higher level, then the hash should be marked as a software not hardware hash. Fixes: f72860afa2e3 ("hv_netvsc: Exclude non-TCP port numbers from vRSS hashing") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9e48855f6407..14451e14d99d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -282,9 +282,9 @@ static inline u32 netvsc_get_hash( else if (flow.basic.n_proto == htons(ETH_P_IPV6)) hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); else - hash = 0; + return 0; - skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + __skb_set_sw_hash(skb, hash, false); } return hash; @@ -802,8 +802,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, skb->protocol == htons(ETH_P_IP)) netvsc_comp_ipcsum(skb); - /* Do L4 checksum offload if enabled and present. - */ + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || csum_info->receive.udp_checksum_succeeded) From 00719495ea54c6d40a11ee1785ea2cedc8573042 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Nov 2019 14:11:49 -0800 Subject: [PATCH 2754/3715] net: neigh: use long type to store jiffies delta [ Upstream commit 9d027e3a83f39b819e908e4e09084277a2e45e95 ] A difference of two unsigned long needs long storage. Fixes: c7fb64db001f ("[NETLINK]: Neighbour table configuration and statistics via rtnetlink") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16ac50b1b9a7..567e431813e5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1877,8 +1877,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; { unsigned long now = jiffies; - unsigned int flush_delta = now - tbl->last_flush; - unsigned int rand_delta = now - tbl->last_rand; + long flush_delta = now - tbl->last_flush; + long rand_delta = now - tbl->last_rand; struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, From 68b7234958105858d85a5130d73e3f5fe5e70c22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Nov 2019 05:07:46 -0800 Subject: [PATCH 2755/3715] packet: fix data-race in fanout_flow_is_huge() [ Upstream commit b756ad928d98e5ef0b74af7546a6a31a8dadde00 ] KCSAN reported the following data-race [1] Adding a couple of READ_ONCE()/WRITE_ONCE() should silence it. Since the report hinted about multiple cpus using the history concurrently, I added a test avoiding writing on it if the victim slot already contains the desired value. [1] BUG: KCSAN: data-race in fanout_demux_rollover / fanout_demux_rollover read to 0xffff8880b01786cc of 4 bytes by task 18921 on cpu 1: fanout_flow_is_huge net/packet/af_packet.c:1303 [inline] fanout_demux_rollover+0x33e/0x3f0 net/packet/af_packet.c:1353 packet_rcv_fanout+0x34e/0x490 net/packet/af_packet.c:1453 deliver_skb net/core/dev.c:1888 [inline] dev_queue_xmit_nit+0x15b/0x540 net/core/dev.c:1958 xmit_one net/core/dev.c:3195 [inline] dev_hard_start_xmit+0x3f5/0x430 net/core/dev.c:3215 __dev_queue_xmit+0x14ab/0x1b40 net/core/dev.c:3792 dev_queue_xmit+0x21/0x30 net/core/dev.c:3825 neigh_direct_output+0x1f/0x30 net/core/neighbour.c:1530 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311 __sys_sendmmsg+0x123/0x350 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 write to 0xffff8880b01786cc of 4 bytes by task 18922 on cpu 0: fanout_flow_is_huge net/packet/af_packet.c:1306 [inline] fanout_demux_rollover+0x3a4/0x3f0 net/packet/af_packet.c:1353 packet_rcv_fanout+0x34e/0x490 net/packet/af_packet.c:1453 deliver_skb net/core/dev.c:1888 [inline] dev_queue_xmit_nit+0x15b/0x540 net/core/dev.c:1958 xmit_one net/core/dev.c:3195 [inline] dev_hard_start_xmit+0x3f5/0x430 net/core/dev.c:3215 __dev_queue_xmit+0x14ab/0x1b40 net/core/dev.c:3792 dev_queue_xmit+0x21/0x30 net/core/dev.c:3825 neigh_direct_output+0x1f/0x30 net/core/neighbour.c:1530 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311 __sys_sendmmsg+0x123/0x350 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 18922 Comm: syz-executor.3 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 3b3a5b0aab5b ("packet: rollover huge flows before small flows") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/packet/af_packet.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e788f9c7c398..46b7fac82775 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1337,15 +1337,21 @@ static void packet_sock_destruct(struct sock *sk) static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) { - u32 rxhash; + u32 *history = po->rollover->history; + u32 victim, rxhash; int i, count = 0; rxhash = skb_get_hash(skb); for (i = 0; i < ROLLOVER_HLEN; i++) - if (po->rollover->history[i] == rxhash) + if (READ_ONCE(history[i]) == rxhash) count++; - po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash; + victim = prandom_u32() % ROLLOVER_HLEN; + + /* Avoid dirtying the cache line if possible */ + if (READ_ONCE(history[victim]) != rxhash) + WRITE_ONCE(history[victim], rxhash); + return count > (ROLLOVER_HLEN >> 1); } From 7effc7cf857317e78fdaa9111243ee9d24a8e80f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:42 +0100 Subject: [PATCH 2756/3715] mmc: sdio: fix wl1251 vendor id [ Upstream commit e5db673e7fe2f971ec82039a28dc0811c2100e87 ] v4.11-rc1 did introduce a patch series that rearranged the sdio quirks into a header file. Unfortunately this did forget to handle SDIO_VENDOR_ID_TI differently between wl1251 and wl1271 with the result that although the wl1251 was found on the sdio bus, the firmware did not load any more and there was no interface registration. This patch defines separate constants to be used by sdio quirks and drivers. Fixes: 884f38607897 ("mmc: core: move some sdio IDs out of quirks file") Signed-off-by: H. Nikolaus Schaller Cc: # v4.11+ Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- include/linux/mmc/sdio_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0a7abe8a407f..68bbbd9edc08 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -67,6 +67,8 @@ #define SDIO_VENDOR_ID_TI 0x0097 #define SDIO_DEVICE_ID_TI_WL1271 0x4076 +#define SDIO_VENDOR_ID_TI_WL1251 0x104c +#define SDIO_DEVICE_ID_TI_WL1251 0x9066 #define SDIO_VENDOR_ID_STE 0x0020 #define SDIO_DEVICE_ID_STE_CW1200 0x2280 From 8c17dd4b587bed444f1ea58bfc9bd90f44bf9db3 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:43 +0100 Subject: [PATCH 2757/3715] mmc: core: fix wl1251 sdio quirks [ Upstream commit 16568b4a4f0c34bd35cfadac63303c7af7812764 ] wl1251 and wl1271 have different vendor id and device id. So we need to handle both with sdio quirks. Fixes: 884f38607897 ("mmc: core: move some sdio IDs out of quirks file") Signed-off-by: H. Nikolaus Schaller Cc: # v4.11+ Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/quirks.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 5153577754f0..09897abb79ed 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -119,7 +119,14 @@ static const struct mmc_fixup mmc_ext_csd_fixups[] = { END_FIXUP }; + static const struct mmc_fixup sdio_fixup_methods[] = { + SDIO_FIXUP(SDIO_VENDOR_ID_TI_WL1251, SDIO_DEVICE_ID_TI_WL1251, + add_quirk, MMC_QUIRK_NONSTD_FUNC_IF), + + SDIO_FIXUP(SDIO_VENDOR_ID_TI_WL1251, SDIO_DEVICE_ID_TI_WL1251, + add_quirk, MMC_QUIRK_DISABLE_CD), + SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, add_quirk, MMC_QUIRK_NONSTD_FUNC_IF), From beb22d9386c3eab7938f6920e01768e98a3924e9 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 2 Oct 2019 16:52:37 -0500 Subject: [PATCH 2758/3715] affs: fix a memory leak in affs_remount [ Upstream commit 450c3d4166837c496ebce03650c08800991f2150 ] In affs_remount if data is provided it is duplicated into new_opts. The allocated memory for new_opts is only released if parse_options fails. There's a bit of history behind new_options, originally there was save/replace options on the VFS layer so the 'data' passed must not change (thus strdup), this got cleaned up in later patches. But not completely. There's no reason to do the strdup in cases where the filesystem does not need to reuse the 'data' again, because strsep would modify it directly. Fixes: c8f33d0bec99 ("affs: kstrdup() memory handling") Signed-off-by: Navid Emamdoost [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/affs/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/affs/super.c b/fs/affs/super.c index 884bedab7266..789a1c7db5d8 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -559,14 +559,9 @@ affs_remount(struct super_block *sb, int *flags, char *data) int root_block; unsigned long mount_flags; int res = 0; - char *new_opts; char volume[32]; char *prefix = NULL; - new_opts = kstrdup(data, GFP_KERNEL); - if (data && !new_opts) - return -ENOMEM; - pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); sync_filesystem(sb); @@ -577,7 +572,6 @@ affs_remount(struct super_block *sb, int *flags, char *data) &blocksize, &prefix, volume, &mount_flags)) { kfree(prefix); - kfree(new_opts); return -EINVAL; } From e3ba33cc5a94f5469865263bbb99c8ad3631ea3f Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 18 Nov 2019 15:38:02 +0800 Subject: [PATCH 2759/3715] dmaengine: ti: edma: fix missed failure handling [ Upstream commit 340049d453682a9fe8d91fe794dd091730f4bb25 ] When devm_kcalloc fails, it forgets to call edma_free_slot. Replace direct return with failure handler to fix it. Fixes: 1be5336bc7ba ("dmaengine: edma: New device tree binding") Signed-off-by: Chuhong Yuan Link: https://lore.kernel.org/r/20191118073802.28424-1-hslester96@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/edma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 519c24465dea..57a49fe713fd 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -2340,8 +2340,10 @@ static int edma_probe(struct platform_device *pdev) ecc->tc_list = devm_kcalloc(dev, ecc->num_tc, sizeof(*ecc->tc_list), GFP_KERNEL); - if (!ecc->tc_list) - return -ENOMEM; + if (!ecc->tc_list) { + ret = -ENOMEM; + goto err_reg1; + } for (i = 0;; i++) { ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs", From 75262c4cc870f6ccf29d33d8758a826dc735e3cf Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 18 Nov 2019 10:53:53 +1100 Subject: [PATCH 2760/3715] drm/radeon: fix bad DMA from INTERRUPT_CNTL2 [ Upstream commit 62d91dd2851e8ae2ca552f1b090a3575a4edf759 ] The INTERRUPT_CNTL2 register expects a valid DMA address, but is currently set with a GPU MC address. This can cause problems on systems that detect the resulting DMA read from an invalid address (found on a Power8 guest). Instead, use the DMA address of the dummy page because it will always be safe. Fixes: d8f60cfc9345 ("drm/radeon/kms: Add support for interrupts on r6xx/r7xx chips (v3)") Fixes: 25a857fbe973 ("drm/radeon/kms: add support for interrupts on SI") Fixes: a59781bbe528 ("drm/radeon: add support for interrupts on CIK (v5)") Signed-off-by: Sam Bobroff Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 4 ++-- drivers/gpu/drm/radeon/si.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ce8b353b5753..ba31c7674fcd 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7012,8 +7012,8 @@ static int cik_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* XXX this should actually be a bus address, not an MC address. same on older asics */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e06e2d8feab3..a724bb87cfad 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3690,8 +3690,8 @@ int r600_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 1907c950d76f..1144cafea9ac 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5993,8 +5993,8 @@ static int si_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN From 602cfd269e9cf2fa1c2242985ac0406834389402 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 19 Nov 2019 12:03:31 +0000 Subject: [PATCH 2761/3715] arm64: dts: juno: Fix UART frequency [ Upstream commit 39a1a8941b27c37f79508426e27a2ec29829d66c ] Older versions of the Juno *SoC* TRM [1] recommended that the UART clock source should be 7.2738 MHz, whereas the *system* TRM [2] stated a more correct value of 7.3728 MHz. Somehow the wrong value managed to end up in our DT. Doing a prime factorisation, a modulo divide by 115200 and trying to buy a 7.2738 MHz crystal at your favourite electronics dealer suggest that the old value was actually a typo. The actual UART clock is driven by a PLL, configured via a parameter in some board.txt file in the firmware, which reads 7.37 MHz (sic!). Fix this to correct the baud rate divisor calculation on the Juno board. [1] http://infocenter.arm.com/help/topic/com.arm.doc.ddi0515b.b/DDI0515B_b_juno_arm_development_platform_soc_trm.pdf [2] http://infocenter.arm.com/help/topic/com.arm.doc.100113_0000_07_en/arm_versatile_express_juno_development_platform_(v2m_juno)_technical_reference_manual_100113_0000_07_en.pdf Fixes: 71f867ec130e ("arm64: Add Juno board device tree.") Signed-off-by: Andre Przywara Acked-by: Liviu Dudau Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/arm/juno-clocks.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index e5e265dfa902..2870b5eeb198 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,10 +8,10 @@ */ / { /* SoC fixed clocks */ - soc_uartclk: refclk7273800hz { + soc_uartclk: refclk7372800hz { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <7273800>; + clock-frequency = <7372800>; clock-output-names = "juno:uartclk"; }; From 30987a4fd901be1a688feaf4b672d825ceffcd1c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 26 Feb 2019 12:22:11 +0200 Subject: [PATCH 2762/3715] IB/iser: Fix dma_nents type definition [ Upstream commit c1545f1a200f4adc4ef8dd534bf33e2f1aa22c2f ] The retured value from ib_dma_map_sg saved in dma_nents variable. To avoid future mismatch between types, define dma_nents as an integer instead of unsigned. Fixes: 57b26497fabe ("IB/iser: Pass the correct number of entries for dma mapped SGL") Reported-by: Dan Carpenter Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Acked-by: Sagi Grimberg Reviewed-by: Dan Carpenter Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index c1ae4aeae2f9..46dfc6ae9d1c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -197,7 +197,7 @@ struct iser_data_buf { struct scatterlist *sg; int size; unsigned long data_len; - unsigned int dma_nents; + int dma_nents; }; /* fwd declarations */ From 7c662630d94fc07da206a8423bfbf9c2330bb2c8 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 21 Nov 2019 09:10:49 +0100 Subject: [PATCH 2763/3715] serial: stm32: fix clearing interrupt error flags [ Upstream commit 1250ed7114a977cdc2a67a0c09d6cdda63970eb9 ] The interrupt clear flag register is a "write 1 to clear" register. So, only writing ones allows to clear flags: - Replace buggy stm32_clr_bits() by a simple write to clear error flags - Replace useless read/modify/write stm32_set_bits() routine by a simple write to clear TC (transfer complete) flag. Fixes: 4f01d833fdcd ("serial: stm32: fix rx error handling") Signed-off-by: Fabrice Gasnier Cc: stable Link: https://lore.kernel.org/r/1574323849-1909-1-git-send-email-fabrice.gasnier@st.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/stm32-usart.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index f8f3f8fafd9f..1e854e1851fb 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -132,8 +132,8 @@ static void stm32_receive_chars(struct uart_port *port, bool threaded) * cleared by the sequence [read SR - read DR]. */ if ((sr & USART_SR_ERR_MASK) && ofs->icr != UNDEF_REG) - stm32_clr_bits(port, ofs->icr, USART_ICR_ORECF | - USART_ICR_PECF | USART_ICR_FECF); + writel_relaxed(sr & USART_SR_ERR_MASK, + port->membase + ofs->icr); c = stm32_get_char(port, &sr, &stm32_port->last_res); port->icount.rx++; @@ -302,7 +302,7 @@ static void stm32_transmit_chars(struct uart_port *port) if (ofs->icr == UNDEF_REG) stm32_clr_bits(port, ofs->isr, USART_SR_TC); else - stm32_set_bits(port, ofs->icr, USART_ICR_TCCF); + writel_relaxed(USART_ICR_TCCF, port->membase + ofs->icr); if (stm32_port->tx_ch) stm32_transmit_chars_dma(port); From 4127249099b796b1fd5c74562ce94b94cda38553 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 1 Dec 2018 11:53:10 +1100 Subject: [PATCH 2764/3715] m68k: Call timer_interrupt() with interrupts disabled [ Upstream commit 1efdd4bd254311498123a15fa0acd565f454da97 ] Some platforms execute their timer handler with the interrupt priority level set below 6. That means the handler could be interrupted by another driver and this could lead to re-entry of the timer core. Avoid this by use of local_irq_save/restore for timer interrupt dispatch. This provides mutual exclusion around the timer interrupt flag access which is needed later in this series for the clocksource conversion. Reported-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1811131407120.2697@nanos.tec.linutronix.de Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/m68k/amiga/cia.c | 9 +++++++++ arch/m68k/atari/ataints.c | 4 ++-- arch/m68k/atari/time.c | 15 ++++++++++++++- arch/m68k/bvme6000/config.c | 20 ++++++++++---------- arch/m68k/hp300/time.c | 10 ++++++++-- arch/m68k/mac/via.c | 17 +++++++++++++++++ arch/m68k/mvme147/config.c | 18 ++++++++++-------- arch/m68k/mvme16x/config.c | 21 +++++++++++---------- arch/m68k/q40/q40ints.c | 19 +++++++++++-------- arch/m68k/sun3/sun3ints.c | 3 +++ arch/m68k/sun3x/time.c | 16 ++++++++++------ 11 files changed, 105 insertions(+), 47 deletions(-) diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c index 2081b8cd5591..b9aee983e6f4 100644 --- a/arch/m68k/amiga/cia.c +++ b/arch/m68k/amiga/cia.c @@ -88,10 +88,19 @@ static irqreturn_t cia_handler(int irq, void *dev_id) struct ciabase *base = dev_id; int mach_irq; unsigned char ints; + unsigned long flags; + /* Interrupts get disabled while the timer irq flag is cleared and + * the timer interrupt serviced. + */ mach_irq = base->cia_irq; + local_irq_save(flags); ints = cia_set_irq(base, CIA_ICR_ALL); amiga_custom.intreq = base->int_mask; + if (ints & 1) + generic_handle_irq(mach_irq); + local_irq_restore(flags); + mach_irq++, ints >>= 1; for (; ints; mach_irq++, ints >>= 1) { if (ints & 1) generic_handle_irq(mach_irq); diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index 3d2b63bedf05..56f02ea2c248 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -142,7 +142,7 @@ struct mfptimerbase { .name = "MFP Timer D" }; -static irqreturn_t mfptimer_handler(int irq, void *dev_id) +static irqreturn_t mfp_timer_d_handler(int irq, void *dev_id) { struct mfptimerbase *base = dev_id; int mach_irq; @@ -344,7 +344,7 @@ void __init atari_init_IRQ(void) st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 0xf0) | 0x6; /* request timer D dispatch handler */ - if (request_irq(IRQ_MFP_TIMD, mfptimer_handler, IRQF_SHARED, + if (request_irq(IRQ_MFP_TIMD, mfp_timer_d_handler, IRQF_SHARED, stmfp_base.name, &stmfp_base)) pr_err("Couldn't register %s interrupt\n", stmfp_base.name); diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c index c549b48174ec..972181c1fe4b 100644 --- a/arch/m68k/atari/time.c +++ b/arch/m68k/atari/time.c @@ -24,6 +24,18 @@ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); +static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id) +{ + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; +} + void __init atari_sched_init(irq_handler_t timer_routine) { @@ -32,7 +44,8 @@ atari_sched_init(irq_handler_t timer_routine) /* start timer C, div = 1:100 */ st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60; /* install interrupt service routine for MFP Timer C */ - if (request_irq(IRQ_MFP_TIMC, timer_routine, 0, "timer", timer_routine)) + if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, 0, "timer", + timer_routine)) pr_err("Couldn't register timer interrupt\n"); } diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 2cfff4765040..0e602c32b246 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -45,11 +45,6 @@ extern int bvme6000_set_clock_mmss (unsigned long); extern void bvme6000_reset (void); void bvme6000_set_vectors (void); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/timer/timekeeping.c, called via bvme6000_process_int() */ - -static irq_handler_t tick_handler; - int __init bvme6000_parse_bootinfo(const struct bi_record *bi) { @@ -159,12 +154,18 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id) static irqreturn_t bvme6000_timer_int (int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; - unsigned char msr = rtc->msr & 0xc0; + unsigned char msr; + local_irq_save(flags); + msr = rtc->msr & 0xc0; rtc->msr = msr | 0x20; /* Ack the interrupt */ + timer_routine(0, NULL); + local_irq_restore(flags); - return tick_handler(irq, dev_id); + return IRQ_HANDLED; } /* @@ -183,9 +184,8 @@ void bvme6000_sched_init (irq_handler_t timer_routine) rtc->msr = 0; /* Ensure timer registers accessible */ - tick_handler = timer_routine; - if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, - "timer", bvme6000_timer_int)) + if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, "timer", + timer_routine)) panic ("Couldn't register timer int"); rtc->t1cr_omr = 0x04; /* Mode 2, ext clk */ diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c index 289d928a46cb..d30b03ea93a2 100644 --- a/arch/m68k/hp300/time.c +++ b/arch/m68k/hp300/time.c @@ -38,13 +38,19 @@ static irqreturn_t hp300_tick(int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; unsigned long tmp; - irq_handler_t vector = dev_id; + + local_irq_save(flags); in_8(CLOCKBASE + CLKSR); asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE)); + timer_routine(0, NULL); + local_irq_restore(flags); + /* Turn off the network and SCSI leds */ blinken_leds(0, 0xe0); - return vector(irq, NULL); + return IRQ_HANDLED; } u32 hp300_gettimeoffset(void) diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 7334245abf26..863806e6775a 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -398,6 +398,8 @@ void via_nubus_irq_shutdown(int irq) * via6522.c :-), disable/pending masks added. */ +#define VIA_TIMER_1_INT BIT(6) + void via1_irq(struct irq_desc *desc) { int irq_num; @@ -407,6 +409,21 @@ void via1_irq(struct irq_desc *desc) if (!events) return; + irq_num = IRQ_MAC_TIMER_1; + irq_bit = VIA_TIMER_1_INT; + if (events & irq_bit) { + unsigned long flags; + + local_irq_save(flags); + via1[vIFR] = irq_bit; + generic_handle_irq(irq_num); + local_irq_restore(flags); + + events &= ~irq_bit; + if (!events) + return; + } + irq_num = VIA1_SOURCE_BASE; irq_bit = 1; do { diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 8778612d1f31..78ae803c833e 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -46,11 +46,6 @@ extern void mvme147_reset (void); static int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/time/timekeeping.c, called via mvme147_process_int() */ - -irq_handler_t tick_handler; - int __init mvme147_parse_bootinfo(const struct bi_record *bi) { @@ -106,16 +101,23 @@ void __init config_mvme147(void) static irqreturn_t mvme147_timer_int (int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1; - return tick_handler(irq, dev_id); + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; } void mvme147_sched_init (irq_handler_t timer_routine) { - tick_handler = timer_routine; - if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", NULL)) + if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", + timer_routine)) pr_err("Couldn't register timer interrupt\n"); /* Init the clock with a value */ diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 6fa06d4d16bf..3116dd576bb3 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -51,11 +51,6 @@ extern void mvme16x_reset (void); int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/time/timekeeping.c, called via mvme16x_process_int() */ - -static irq_handler_t tick_handler; - unsigned short mvme16x_config; EXPORT_SYMBOL(mvme16x_config); @@ -354,8 +349,15 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) static irqreturn_t mvme16x_timer_int (int irq, void *dev_id) { - *(volatile unsigned char *)0xfff4201b |= 8; - return tick_handler(irq, dev_id); + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); + *(volatile unsigned char *)0xfff4201b |= 8; + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; } void mvme16x_sched_init (irq_handler_t timer_routine) @@ -363,14 +365,13 @@ void mvme16x_sched_init (irq_handler_t timer_routine) uint16_t brdno = be16_to_cpu(mvme_bdid.brdno); int irq; - tick_handler = timer_routine; /* Using PCCchip2 or MC2 chip tick timer 1 */ *(volatile unsigned long *)0xfff42008 = 0; *(volatile unsigned long *)0xfff42004 = 10000; /* 10ms */ *(volatile unsigned char *)0xfff42017 |= 3; *(volatile unsigned char *)0xfff4201b = 0x16; - if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, - "timer", mvme16x_timer_int)) + if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, "timer", + timer_routine)) panic ("Couldn't register timer int"); if (brdno == 0x0162 || brdno == 0x172) diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c index 3e7603202977..1c696906c159 100644 --- a/arch/m68k/q40/q40ints.c +++ b/arch/m68k/q40/q40ints.c @@ -127,10 +127,10 @@ void q40_mksound(unsigned int hz, unsigned int ticks) sound_ticks = ticks << 1; } -static irq_handler_t q40_timer_routine; - -static irqreturn_t q40_timer_int (int irq, void * dev) +static irqreturn_t q40_timer_int(int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + ql_ticks = ql_ticks ? 0 : 1; if (sound_ticks) { unsigned char sval=(sound_ticks & 1) ? 128-SVOL : 128+SVOL; @@ -139,8 +139,13 @@ static irqreturn_t q40_timer_int (int irq, void * dev) *DAC_RIGHT=sval; } - if (!ql_ticks) - q40_timer_routine(irq, dev); + if (!ql_ticks) { + unsigned long flags; + + local_irq_save(flags); + timer_routine(0, NULL); + local_irq_restore(flags); + } return IRQ_HANDLED; } @@ -148,11 +153,9 @@ void q40_sched_init (irq_handler_t timer_routine) { int timer_irq; - q40_timer_routine = timer_routine; timer_irq = Q40_IRQ_FRAME; - if (request_irq(timer_irq, q40_timer_int, 0, - "timer", q40_timer_int)) + if (request_irq(timer_irq, q40_timer_int, 0, "timer", timer_routine)) panic("Couldn't register timer int"); master_outb(-1, FRAME_CLEAR_REG); diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index 6bbca30c9188..a5824abb4a39 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -61,8 +61,10 @@ static irqreturn_t sun3_int7(int irq, void *dev_id) static irqreturn_t sun3_int5(int irq, void *dev_id) { + unsigned long flags; unsigned int cnt; + local_irq_save(flags); #ifdef CONFIG_SUN3 intersil_clear(); #endif @@ -76,6 +78,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id) cnt = kstat_irqs_cpu(irq, 0); if (!(cnt % 20)) sun3_leds(led_pattern[cnt % 160 / 20]); + local_irq_restore(flags); return IRQ_HANDLED; } diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c index 7a2c53d9f779..48b43903253e 100644 --- a/arch/m68k/sun3x/time.c +++ b/arch/m68k/sun3x/time.c @@ -78,15 +78,19 @@ u32 sun3x_gettimeoffset(void) } #if 0 -static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sun3x_timer_tick(int irq, void *dev_id) { - void (*vector)(int, void *, struct pt_regs *) = dev_id; + irq_handler_t timer_routine = dev_id; + unsigned long flags; - /* Clear the pending interrupt - pulse the enable line low */ - disable_irq(5); - enable_irq(5); + local_irq_save(flags); + /* Clear the pending interrupt - pulse the enable line low */ + disable_irq(5); + enable_irq(5); + timer_routine(0, NULL); + local_irq_restore(flags); - vector(irq, NULL, regs); + return IRQ_HANDLED; } #endif From 9a95f25269bd9257ab9fba7bb14355d50b5f39ec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 27 Jan 2020 14:46:54 +0100 Subject: [PATCH 2765/3715] Linux 4.14.168 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b538e6170f73..1e74ba09cdda 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 167 +SUBLEVEL = 168 EXTRAVERSION = NAME = Petit Gorille From 9abd1aede4b9e0bce0b62158f18977933390f06c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Nov 2019 15:01:42 -0800 Subject: [PATCH 2766/3715] f2fs: preallocate DIO blocks when forcing buffered_io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous preallocation and DIO decision like below. allow_outplace_dio !allow_outplace_dio f2fs_force_buffered_io (*) No_Prealloc / Buffered_IO Prealloc / Buffered_IO !f2fs_force_buffered_io No_Prealloc / DIO Prealloc / DIO But, Javier reported Case (*) where zoned device bypassed preallocation but fell back to buffered writes in f2fs_direct_IO(), resulting in stale data being read. In order to fix the issue, actually we need to preallocate blocks whenever we fall back to buffered IO like this. No change is made in the other cases. allow_outplace_dio !allow_outplace_dio f2fs_force_buffered_io (*) Prealloc / Buffered_IO Prealloc / Buffered_IO !f2fs_force_buffered_io No_Prealloc / DIO Prealloc / DIO Reported-and-tested-by: Javier Gonzalez Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Chao Yu Reviewed-by: Javier González Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 ------------- fs/f2fs/file.c | 45 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1546afc1f407..212dda069b6a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1180,19 +1180,6 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) int err = 0; bool direct_io = iocb->ki_flags & IOCB_DIRECT; - /* convert inline data for Direct I/O*/ - if (direct_io) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - - if (direct_io && allow_outplace_dio(inode, iocb, from)) - return 0; - - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - return 0; - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); if (map.m_len > map.m_lblk) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 115763b0135c..7fa780001cba 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3319,18 +3319,41 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } - } else { - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); - - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; - goto out; - } + goto write; } + + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + goto write; + + if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Convert inline data for Direct I/O before entering + * f2fs_direct_IO(). + */ + err = f2fs_convert_inline_inode(inode); + if (err) + goto out_err; + /* + * If force_buffere_io() is true, we have to allocate + * blocks all the time, since f2fs_direct_IO will fall + * back to buffered IO. + */ + if (!f2fs_force_buffered_io(inode, iocb, from) && + allow_outplace_dio(inode, iocb, from)) + goto write; + } + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { +out_err: + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = err; + goto out; + } +write: ret = __generic_file_write_iter(iocb, from); clear_inode_flag(inode, FI_NO_PREALLOC); From 6ec5762bf1ab8427cb880f791666726dd7141e13 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Dec 2019 15:53:16 -0800 Subject: [PATCH 2767/3715] f2fs: call f2fs_balance_fs outside of locked page Otherwise, we can hit deadlock by waiting for the locked page in move_data_block in GC. Thread A Thread B - do_page_mkwrite - f2fs_vm_page_mkwrite - lock_page - f2fs_balance_fs - mutex_lock(gc_mutex) - f2fs_gc - do_garbage_collect - ra_data_block - grab_cache_page - f2fs_balance_fs - mutex_lock(gc_mutex) Fixes: 39a8695824510 ("f2fs: refactor ->page_mkwrite() flow") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7fa780001cba..eaba5e34d579 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -49,7 +49,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn = { .node_changed = false }; + struct dnode_of_data dn; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -62,6 +62,9 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto err; } + /* should do out of any locked page */ + f2fs_balance_fs(sbi, true); + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -119,8 +122,6 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); - f2fs_balance_fs(sbi, dn.node_changed); - sb_end_pagefault(inode->i_sb); err: return block_page_mkwrite_return(err); From 2e2eb2311d16efedd0950035b6820a05820af58b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Dec 2019 18:54:29 -0800 Subject: [PATCH 2768/3715] f2fs: keep quota data on write_begin failure This patch avoids some unnecessary locks for quota files when write_begin fails. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 212dda069b6a..86d4c56a79a8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2600,14 +2600,16 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); + if (IS_NOQUOTA(inode)) + return; + /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - if (!IS_NOQUOTA(inode)) - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); From 396a9ec3a75f54b1614bb43b9192a54ebce122e4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Dec 2019 19:02:15 -0800 Subject: [PATCH 2769/3715] f2fs: should avoid recursive filesystem ops We need to use GFP_NOFS, since we did f2fs_lock_op(). Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eaba5e34d579..8112af220836 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1187,13 +1187,13 @@ static int __exchange_data_block(struct inode *src_inode, src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(block_t)), - GFP_KERNEL); + GFP_NOFS); if (!src_blkaddr) return -ENOMEM; do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(int)), - GFP_KERNEL); + GFP_NOFS); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; From d5d3da042b690488154a7ff3381f62d22e1d642a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Dec 2019 19:07:53 -0800 Subject: [PATCH 2770/3715] f2fs: set GFP_NOFS when moving inline dentries Otherwise, it can cause circular locking dependency reported by mm. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 35f7f972d209..0f8d0c171918 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -368,7 +368,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct f2fs_dentry_ptr src, dst; int err; - page = f2fs_grab_cache_page(dir->i_mapping, 0, false); + page = f2fs_grab_cache_page(dir->i_mapping, 0, true); if (!page) { f2fs_put_page(ipage, 1); return -ENOMEM; From 8eb795d4848acfa04a7643d1799330472eec9ad0 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 5 Dec 2019 08:52:39 +0530 Subject: [PATCH 2771/3715] f2fs: cleanup duplicate stats for atomic files Remove duplicate sbi->aw_cnt stats counter that tracks the number of atomic files currently opened (it also shows incorrect value sometimes). Use more relit lable sbi->atomic_files to show in the stats. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +-- fs/f2fs/f2fs.h | 7 +------ fs/f2fs/file.c | 1 - fs/f2fs/segment.c | 1 - 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 244c5bb23fe2..dbf6eb329c03 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -56,7 +56,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); - si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->aw_cnt = sbi->atomic_files; si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); @@ -506,7 +506,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); - atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3a17f3ba954d..dbfe63857f11 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1325,7 +1325,6 @@ struct f2fs_sb_info { atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ - atomic_t aw_cnt; /* # of atomic writes */ atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ @@ -3385,13 +3384,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_atomic_write(inode) \ - (atomic_inc(&F2FS_I_SB(inode)->aw_cnt)) -#define stat_dec_atomic_write(inode) \ - (atomic_dec(&F2FS_I_SB(inode)->aw_cnt)) #define stat_update_max_atomic_write(inode) \ do { \ - int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \ + int cur = F2FS_I_SB(inode)->atomic_files; \ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8112af220836..e3e08c23482d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1932,7 +1932,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; - stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: inode_unlock(inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa32ce92ed65..d3480c8d25a0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,7 +334,6 @@ void f2fs_drop_inmem_pages(struct inode *inode) } fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - stat_dec_atomic_write(inode); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) From 22bdd23cd682ce95c1a890bd206935dc48aa7e1d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Dec 2019 09:52:58 +0800 Subject: [PATCH 2772/3715] f2fs: introduce private bioset In low memory scenario, we can allocate multiple bios without submitting any of them. - f2fs_write_checkpoint() - block_operations() - f2fs_sync_node_pages() step 1) flush cold nodes, allocate new bio from mempool - bio_alloc() - mempool_alloc() step 2) flush hot nodes, allocate a bio from mempool - bio_alloc() - mempool_alloc() step 3) flush warm nodes, be stuck in below call path - bio_alloc() - mempool_alloc() - loop to wait mempool element release, as we only reserved memory for two bio allocation, however above allocated two bios may never be submitted. So we need avoid using default bioset, in this patch we introduce a private bioset, in where we enlarg mempool element count to total number of log header, so that we can make sure we have enough backuped memory pool in scenario of allocating/holding multiple bios. Signed-off-by: Gao Xiang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 23 +++-------------------- fs/f2fs/super.c | 7 ++++++- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 86d4c56a79a8..b132dffa5c7a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -31,6 +31,48 @@ static struct kmem_cache *bio_post_read_ctx_cache; static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; +static struct bio_set *f2fs_bioset; + +#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE + +int __init f2fs_init_bioset(void) +{ + f2fs_bioset = bioset_create(F2FS_BIO_POOL_SIZE, + 0, BIOSET_NEED_BVECS); + if (!f2fs_bioset) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_bioset(void) +{ + bioset_free(f2fs_bioset); +} + +static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, + unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, f2fs_bioset); +} + +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +{ + struct bio *bio; + + if (no_fail) { + /* No failure on bio allocation */ + bio = __f2fs_bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); + return NULL; + } + + return __f2fs_bio_alloc(GFP_KERNEL, npages); +} static bool __is_cp_guaranteed(struct page *page) { @@ -3317,7 +3359,7 @@ int __init f2fs_init_bio_entry_cache(void) return 0; } -void __exit f2fs_destroy_bio_entry_cache(void) +void f2fs_destroy_bio_entry_cache(void) { kmem_cache_destroy(bio_entry_slab); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbfe63857f11..7e0631cdf5bd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2220,26 +2220,6 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, - int npages, bool no_fail) -{ - struct bio *bio; - - if (no_fail) { - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; - } - if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); - return NULL; - } - - return bio_alloc(GFP_KERNEL, npages); -} - static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (sbi->gc_mode == GC_URGENT) @@ -3203,6 +3183,9 @@ void f2fs_destroy_checkpoint_caches(void); /* * data.c */ +int __init f2fs_init_bioset(void); +void f2fs_destroy_bioset(void); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_bio_entry_cache(void); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6ed411850b1c..6f450082168c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3790,8 +3790,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_bio_entry_cache(); if (err) goto free_post_read; + err = f2fs_init_bioset(); + if (err) + goto free_bio_enrty_cache; return 0; - +free_bio_enrty_cache: + f2fs_destroy_bio_entry_cache(); free_post_read: f2fs_destroy_post_read_processing(); free_root_stats: @@ -3817,6 +3821,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_bioset(); f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); From ff032520431f9a4dfb49e121cbba62b4114a0c16 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:00:55 -0800 Subject: [PATCH 2773/3715] f2fs: don't keep META_MAPPING pages used for moving verity file blocks META_MAPPING is used to move blocks for both encrypted and verity files. So the META_MAPPING invalidation condition in do_checkpoint() should consider verity too, not just encrypt. Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3299ee97f220..0cbd5e7b13f9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1509,10 +1509,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_wait_on_all_pages_writeback(sbi); /* - * invalidate intermediate page cache borrowed from meta inode - * which are used for migration of encrypted inode's blocks. + * invalidate intermediate page cache borrowed from meta inode which are + * used for migration of encrypted or verity inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi)) + if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); From c643e2b9c949963a086cc6e4e69680bdc2df1081 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Dec 2019 16:59:58 -0800 Subject: [PATCH 2774/3715] f2fs: set I_LINKABLE early to avoid wrong access by vfs This patch moves setting I_LINKABLE early in rename2(whiteout) to avoid the below warning. [ 3189.163385] WARNING: CPU: 3 PID: 59523 at fs/inode.c:358 inc_nlink+0x32/0x40 [ 3189.246979] Call Trace: [ 3189.248707] f2fs_init_inode_metadata+0x2d6/0x440 [f2fs] [ 3189.251399] f2fs_add_inline_entry+0x162/0x8c0 [f2fs] [ 3189.254010] f2fs_add_dentry+0x69/0xe0 [f2fs] [ 3189.256353] f2fs_do_add_link+0xc5/0x100 [f2fs] [ 3189.258774] f2fs_rename2+0xabf/0x1010 [f2fs] [ 3189.261079] vfs_rename+0x3f8/0xaa0 [ 3189.263056] ? tomoyo_path_rename+0x44/0x60 [ 3189.265283] ? do_renameat2+0x49b/0x550 [ 3189.267324] do_renameat2+0x49b/0x550 [ 3189.269316] __x64_sys_renameat2+0x20/0x30 [ 3189.271441] do_syscall_64+0x5a/0x230 [ 3189.273410] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 3189.275848] RIP: 0033:0x7f270b4d9a49 Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 452394990121..24d116190530 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -801,6 +801,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (whiteout) { f2fs_i_links_write(inode, false); + inode->i_state |= I_LINKABLE; *whiteout = inode; } else { d_tmpfile(dentry, inode); @@ -871,6 +872,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + if (flags & RENAME_WHITEOUT) { + err = f2fs_create_whiteout(old_dir, &whiteout); + if (err) + return err; + } + err = dquot_initialize(old_dir); if (err) goto out; @@ -902,17 +909,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); - if (err) - goto out_dir; - } - if (new_inode) { err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_whiteout; + goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, @@ -920,7 +921,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); - goto out_whiteout; + goto out_dir; } f2fs_balance_fs(sbi, true); @@ -952,7 +953,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } if (old_dir_entry) @@ -976,7 +977,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ERR(old_page)) err = PTR_ERR(old_page); f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } } } @@ -995,7 +996,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (whiteout) { - whiteout->i_state |= I_LINKABLE; set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) @@ -1031,15 +1031,14 @@ put_out_dir: f2fs_unlock_op(sbi); if (new_page) f2fs_put_page(new_page, 0); -out_whiteout: - if (whiteout) - iput(whiteout); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); out: + if (whiteout) + iput(whiteout); return err; } From a068719285ffd08a836339e52e397da51f74dc1e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Dec 2019 15:10:47 -0800 Subject: [PATCH 2775/3715] f2fs: don't put new_page twice in f2fs_rename In f2fs_rename(), new_page is gone after f2fs_set_link(), but it tries to put again when whiteout is failed and jumped to put_out_dir. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 24d116190530..a8ef5aeafe44 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -854,7 +854,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; - struct page *old_dir_page; + struct page *old_dir_page = NULL; struct page *old_page, *new_page = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; @@ -933,6 +933,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto put_out_dir; f2fs_set_link(new_dir, new_entry, new_page, old_inode); + new_page = NULL; new_inode->i_ctime = current_time(new_inode); down_write(&F2FS_I(new_inode)->i_sem); @@ -994,6 +995,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); + old_page = NULL; if (whiteout) { set_inode_flag(whiteout, FI_INC_LINK); @@ -1029,8 +1031,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (new_page) - f2fs_put_page(new_page, 0); + f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); From 859f5e47353f2cbdfed42c6da3bc2942b6e86ca5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Jun 2018 17:09:40 -0700 Subject: [PATCH 2776/3715] mm: kvmalloc does not fallback to vmalloc for incompatible gfp flags kvmalloc warned about incompatible gfp_mask to catch abusers (mostly GFP_NOFS) with an intention that this will motivate authors of the code to fix those. Linus argues that this just motivates people to do even more hacks like if (gfp == GFP_KERNEL) kvmalloc else kmalloc I haven't seen this happening much (Linus pointed to bucket_lock special cases an atomic allocation but my git foo hasn't found much more) but it is true that we can grow those in future. Therefore Linus suggested to simply not fallback to vmalloc for incompatible gfp flags and rather stick with the kmalloc path. Link: http://lkml.kernel.org/r/20180601115329.27807-1-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Linus Torvalds Cc: Tom Herbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 34e57fae959d..8dabac7514fe 100644 --- a/mm/util.c +++ b/mm/util.c @@ -367,7 +367,8 @@ EXPORT_SYMBOL(vm_mmap); * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is * preferable to the vmalloc fallback, due to visible performance drawbacks. * - * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. + * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not + * fall back to vmalloc. */ void *kvmalloc_node(size_t size, gfp_t flags, int node) { @@ -378,7 +379,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) * so the given set of flags has to be compatible. */ - WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); + if ((flags & GFP_KERNEL) != GFP_KERNEL) + return kmalloc_node(size, flags, node); /* * We want to attempt a large physically contiguous block first because From 6f8658482819dcbd3aa95cb7c2a95674ff4d1379 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Dec 2019 17:31:00 -0800 Subject: [PATCH 2777/3715] f2fs: declare nested quota_sem and remove unnecessary sems 1. f2fs_quota_sync -> down_read(&sbi->quota_sem) -> dquot_writeback_dquots -> f2fs_dquot_commit -> down_read(&sbi->quota_sem) 2. f2fs_quota_sync -> down_read(&sbi->quota_sem) -> f2fs_write_data_pages -> f2fs_write_single_data_page -> down_write(&F2FS_I(inode)->i_sem) f2fs_mkdir -> f2fs_do_add_link -> down_write(&F2FS_I(inode)->i_sem) -> f2fs_init_inode_metadata -> f2fs_new_node_page -> dquot_alloc_inode -> f2fs_dquot_mark_dquot_dirty -> down_read(&sbi->quota_sem) Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6f450082168c..7d7d416b41cf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2166,7 +2166,7 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); @@ -2190,13 +2190,10 @@ static int f2fs_dquot_acquire(struct dquot *dquot) static int f2fs_dquot_release(struct dquot *dquot) { struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); - int ret; + int ret = dquot_release(dquot); - down_read(&sbi->quota_sem); - ret = dquot_release(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } @@ -2204,29 +2201,22 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; - - down_read(&sbi->quota_sem); - ret = dquot_mark_dquot_dirty(dquot); + int ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); - up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; + int ret = dquot_commit_info(sb, type); - down_read(&sbi->quota_sem); - ret = dquot_commit_info(sb, type); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } From 749e5b5b3264d5ba34005c18164947aa3db08706 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Dec 2019 18:32:16 -0800 Subject: [PATCH 2778/3715] f2fs: free sysfs kobject Detected kmemleak. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9ed632b9c9bb..e75c5921d750 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -802,4 +802,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } From f6574fbf6578e47cfa3cace486ca852979a1e433 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 1 Nov 2019 18:07:14 +0800 Subject: [PATCH 2779/3715] f2fs: support data compression This patch tries to support compression in f2fs. - New term named cluster is defined as basic unit of compression, file can be divided into multiple clusters logically. One cluster includes 4 << n (n >= 0) logical pages, compression size is also cluster size, each of cluster can be compressed or not. - In cluster metadata layout, one special flag is used to indicate cluster is compressed one or normal one, for compressed cluster, following metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores data including compress header and compressed data. - In order to eliminate write amplification during overwrite, F2FS only support compression on write-once file, data can be compressed only when all logical blocks in file are valid and cluster compress ratio is lower than specified threshold. - To enable compression on regular inode, there are three ways: * chattr +c file * chattr +c dir; touch dir/file * mount w/ -o compress_extension=ext; touch file.ext Compress metadata layout: [Dnode Structure] +-----------------------------------------------+ | cluster 1 | cluster 2 | ......... | cluster N | +-----------------------------------------------+ . . . . . . . . . Compressed Cluster . . Normal Cluster . +----------+---------+---------+---------+ +---------+---------+---------+---------+ |compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 | +----------+---------+---------+---------+ +---------+---------+---------+---------+ . . . . . . +-------------+-------------+----------+----------------------------+ | data length | data chksum | reserved | compressed data | +-------------+-------------+----------+----------------------------+ Changelog: 20190326: - fix error handling of read_end_io(). - remove unneeded comments in f2fs_encrypt_one_page(). 20190327: - fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages(). - don't jump into loop directly to avoid uninitialized variables. - add TODO tag in error path of f2fs_write_cache_pages(). 20190328: - fix wrong merge condition in f2fs_read_multi_pages(). - check compressed file in f2fs_post_read_required(). 20190401 - allow overwrite on non-compressed cluster. - check cluster meta before writing compressed data. 20190402 - don't preallocate blocks for compressed file. - add lz4 compress algorithm - process multiple post read works in one workqueue Now f2fs supports processing post read work in multiple workqueue, it shows low performance due to schedule overhead of multiple workqueue executing orderly. 20190921 - compress: support buffered overwrite C: compress cluster flag V: valid block address N: NEW_ADDR One cluster contain 4 blocks before overwrite after overwrite - VVVV -> CVNN - CVNN -> VVVV - CVNN -> CVNN - CVNN -> CVVV - CVVV -> CVNN - CVVV -> CVVV 20191029 - add kconfig F2FS_FS_COMPRESSION to isolate compression related codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm. note that: will remove lzo backend if Jaegeuk agreed that too. - update codes according to Eric's comments. 20191101 - apply fixes from Jaegeuk 20191113 - apply fixes from Jaegeuk - split workqueue for fsverity 20191216 - apply fixes from Jaegeuk [Jaegeuk Kim] - add tracepoint for f2fs_{,de}compress_pages() - fix many bugs and add some compression stats - fix overwrite/mmap bugs - address 32bit build error, reported by Geert. - bug fixes when handling errors and i_compressed_blocks Reported-by: Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 52 ++ fs/f2fs/Kconfig | 25 + fs/f2fs/Makefile | 1 + fs/f2fs/compress.c | 1176 ++++++++++++++++++++++++++++ fs/f2fs/data.c | 630 +++++++++++++-- fs/f2fs/debug.c | 6 + fs/f2fs/f2fs.h | 280 ++++++- fs/f2fs/file.c | 186 ++++- fs/f2fs/inode.c | 41 + fs/f2fs/namei.c | 51 ++ fs/f2fs/segment.c | 5 +- fs/f2fs/segment.h | 12 - fs/f2fs/super.c | 112 ++- fs/f2fs/sysfs.c | 7 + include/linux/f2fs_fs.h | 5 + include/trace/events/f2fs.h | 100 +++ 16 files changed, 2569 insertions(+), 120 deletions(-) create mode 100644 fs/f2fs/compress.c diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index dc2cecdaa7dd..f211561b6e63 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,6 +235,17 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" + and "lz4" algorithm. +compress_log_size=%u Support configuring compress cluster size, the size will + be 4KB * (1 << %u), 16KB is minimum size, also it's + default size. +compress_extension=%s Support adding specified extension, so that f2fs can enable + compression on those corresponding files, e.g. if all files + with '.ext' has high compression rate, we can set the '.ext' + on compression extension list and enable compression on + these file by default rather than to enable it via ioctl. + For other files, we can still enable compression via ioctl. ================================================================================ DEBUGFS ENTRIES @@ -780,3 +791,44 @@ zero or random data, which is useful to the below scenario where: 4. address = fibmap(fd, offset) 5. open(blkdev) 6. write(blkdev, address) + +Compression implementation +-------------------------- + +- New term named cluster is defined as basic unit of compression, file can +be divided into multiple clusters logically. One cluster includes 4 << n +(n >= 0) logical pages, compression size is also cluster size, each of +cluster can be compressed or not. + +- In cluster metadata layout, one special block address is used to indicate +cluster is compressed one or normal one, for compressed cluster, following +metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs +stores data including compress header and compressed data. + +- In order to eliminate write amplification during overwrite, F2FS only +support compression on write-once file, data can be compressed only when +all logical blocks in file are valid and cluster compress ratio is lower +than specified threshold. + +- To enable compression on regular inode, there are three ways: +* chattr +c file +* chattr +c dir; touch dir/file +* mount w/ -o compress_extension=ext; touch file.ext + +Compress metadata layout: + [Dnode Structure] + +-----------------------------------------------+ + | cluster 1 | cluster 2 | ......... | cluster N | + +-----------------------------------------------+ + . . . . + . . . . + . Compressed Cluster . . Normal Cluster . ++----------+---------+---------+---------+ +---------+---------+---------+---------+ +|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 | ++----------+---------+---------+---------+ +---------+---------+---------+---------+ + . . + . . + . . + +-------------+-------------+----------+----------------------------+ + | data length | data chksum | reserved | compressed data | + +-------------+-------------+----------+----------------------------+ diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d4519801745e..cf52194f37e5 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -104,3 +104,28 @@ config F2FS_FAULT_INJECTION Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on. If unsure, say N. + +config F2FS_FS_COMPRESSION + bool "F2FS compression feature" + depends on F2FS_FS + help + Enable filesystem-level compression on f2fs regular files, + multiple back-end compression algorithms are supported. + +config F2FS_FS_LZO + bool "LZO compression support" + depends on F2FS_FS_COMPRESSION + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO compress algorithm, if unsure, say Y. + +config F2FS_FS_LZ4 + bool "LZ4 compression support" + depends on F2FS_FS_COMPRESSION + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default y + help + Support LZ4 compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2aaecc63834f..ee7316b42f69 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -9,3 +9,4 @@ f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o f2fs-$(CONFIG_FS_VERITY) += verity.o +f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c new file mode 100644 index 000000000000..d8a64be90a50 --- /dev/null +++ b/fs/f2fs/compress.c @@ -0,0 +1,1176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * f2fs compress support + * + * Copyright (c) 2019 Chao Yu + */ + +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include + +struct f2fs_compress_ops { + int (*init_compress_ctx)(struct compress_ctx *cc); + void (*destroy_compress_ctx)(struct compress_ctx *cc); + int (*compress_pages)(struct compress_ctx *cc); + int (*decompress_pages)(struct decompress_io_ctx *dic); +}; + +static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + return index & (cc->cluster_size - 1); +} + +static pgoff_t cluster_idx(struct compress_ctx *cc, pgoff_t index) +{ + return index >> cc->log_cluster_size; +} + +static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) +{ + return cc->cluster_idx << cc->log_cluster_size; +} + +bool f2fs_is_compressed_page(struct page *page) +{ + if (!PagePrivate(page)) + return false; + if (!page_private(page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) + return false; + f2fs_bug_on(F2FS_M_SB(page->mapping), + *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + return true; +} + +static void f2fs_set_compressed_page(struct page *page, + struct inode *inode, pgoff_t index, void *data, refcount_t *r) +{ + SetPagePrivate(page); + set_page_private(page, (unsigned long)data); + + /* i_crypto_info and iv index */ + page->index = index; + page->mapping = inode->i_mapping; + if (r) + refcount_inc(r); +} + +static void f2fs_put_compressed_page(struct page *page) +{ + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + page->mapping = NULL; + unlock_page(page); + put_page(page); +} + +static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) +{ + int i; + + for (i = 0; i < len; i++) { + if (!cc->rpages[i]) + continue; + if (unlock) + unlock_page(cc->rpages[i]); + else + put_page(cc->rpages[i]); + } +} + +static void f2fs_put_rpages(struct compress_ctx *cc) +{ + f2fs_drop_rpages(cc, cc->cluster_size, false); +} + +static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) +{ + f2fs_drop_rpages(cc, len, true); +} + +static void f2fs_put_rpages_mapping(struct compress_ctx *cc, + struct address_space *mapping, + pgoff_t start, int len) +{ + int i; + + for (i = 0; i < len; i++) { + struct page *page = find_get_page(mapping, start + i); + + put_page(page); + put_page(page); + } +} + +static void f2fs_put_rpages_wbc(struct compress_ctx *cc, + struct writeback_control *wbc, bool redirty, int unlock) +{ + unsigned int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + if (redirty) + redirty_page_for_writepage(wbc, cc->rpages[i]); + f2fs_put_page(cc->rpages[i], unlock); + } +} + +struct page *f2fs_compress_control_page(struct page *page) +{ + return ((struct compress_io_ctx *)page_private(page))->rpages[0]; +} + +int f2fs_init_compress_ctx(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + + if (cc->nr_rpages) + return 0; + + cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + return cc->rpages ? 0 : -ENOMEM; +} + +void f2fs_destroy_compress_ctx(struct compress_ctx *cc) +{ + kfree(cc->rpages); + cc->rpages = NULL; + cc->nr_rpages = 0; + cc->nr_cpages = 0; + cc->cluster_idx = NULL_CLUSTER; +} + +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +{ + unsigned int cluster_ofs; + + if (!f2fs_cluster_can_merge_page(cc, page->index)) + f2fs_bug_on(F2FS_I_SB(cc->inode), 1); + + cluster_ofs = offset_in_cluster(cc, page->index); + cc->rpages[cluster_ofs] = page; + cc->nr_rpages++; + cc->cluster_idx = cluster_idx(cc, page->index); +} + +#ifdef CONFIG_F2FS_FS_LZO +static int lzo_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZO1X_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = lzo1x_worst_compress(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lzo_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lzo_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static int lzo_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, + dic->rbuf, &dic->rlen); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzo_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzo_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif + +#ifdef CONFIG_F2FS_FS_LZ4 +static int lz4_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZ4_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lz4_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lz4_compress_pages(struct compress_ctx *cc) +{ + int len; + + len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, + cc->clen, cc->private); + if (!len) { + printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id); + return -EIO; + } + cc->clen = len; + return 0; +} + +static int lz4_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, + dic->clen, dic->rlen); + if (ret < 0) { + printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (ret != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lz4_ops = { + .init_compress_ctx = lz4_init_compress_ctx, + .destroy_compress_ctx = lz4_destroy_compress_ctx, + .compress_pages = lz4_compress_pages, + .decompress_pages = lz4_decompress_pages, +}; +#endif + +static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { +#ifdef CONFIG_F2FS_FS_LZO + &f2fs_lzo_ops, +#else + NULL, +#endif +#ifdef CONFIG_F2FS_FS_LZ4 + &f2fs_lz4_ops, +#else + NULL, +#endif +}; + +bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; +} + +static struct page *f2fs_grab_page(void) +{ + struct page *page; + + page = alloc_page(GFP_NOFS); + if (!page) + return NULL; + lock_page(page); + return page; +} + +static int f2fs_compress_pages(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + unsigned int max_len, nr_cpages; + int i, ret; + + trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, + cc->cluster_size, fi->i_compress_algorithm); + + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + + max_len = COMPRESS_HEADER_SIZE + cc->clen; + cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + + cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + cc->nr_cpages, GFP_NOFS); + if (!cc->cpages) { + ret = -ENOMEM; + goto destroy_compress_ctx; + } + + for (i = 0; i < cc->nr_cpages; i++) { + cc->cpages[i] = f2fs_grab_page(); + if (!cc->cpages[i]) { + ret = -ENOMEM; + goto out_free_cpages; + } + } + + cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); + if (!cc->rbuf) { + ret = -ENOMEM; + goto out_free_cpages; + } + + cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL); + if (!cc->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + ret = cops->compress_pages(cc); + if (ret) + goto out_vunmap_cbuf; + + max_len = PAGE_SIZE * (cc->cluster_size - 1) - COMPRESS_HEADER_SIZE; + + if (cc->clen > max_len) { + ret = -EAGAIN; + goto out_vunmap_cbuf; + } + + cc->cbuf->clen = cpu_to_le32(cc->clen); + cc->cbuf->chksum = cpu_to_le32(0); + + for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) + cc->cbuf->reserved[i] = cpu_to_le32(0); + + vunmap(cc->cbuf); + vunmap(cc->rbuf); + + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + for (i = nr_cpages; i < cc->nr_cpages; i++) { + f2fs_put_compressed_page(cc->cpages[i]); + cc->cpages[i] = NULL; + } + + cc->nr_cpages = nr_cpages; + + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return 0; + +out_vunmap_cbuf: + vunmap(cc->cbuf); +out_vunmap_rbuf: + vunmap(cc->rbuf); +out_free_cpages: + for (i = 0; i < cc->nr_cpages; i++) { + if (cc->cpages[i]) + f2fs_put_compressed_page(cc->cpages[i]); + } + kfree(cc->cpages); + cc->cpages = NULL; +destroy_compress_ctx: + cops->destroy_compress_ctx(cc); +out: + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return ret; +} + +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) +{ + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_inode_info *fi= F2FS_I(dic->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int ret; + + dec_page_count(sbi, F2FS_RD_DATA); + + if (bio->bi_status || PageError(page)) + dic->failed = true; + + if (refcount_dec_not_one(&dic->ref)) + return; + + trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, + dic->cluster_size, fi->i_compress_algorithm); + + /* submit partial compressed pages */ + if (dic->failed) { + ret = -EIO; + goto out_free_dic; + } + + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); + if (!dic->rbuf) { + ret = -ENOMEM; + goto out_free_dic; + } + + dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); + if (!dic->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + dic->clen = le32_to_cpu(dic->cbuf->clen); + dic->rlen = PAGE_SIZE << dic->log_cluster_size; + + if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { + ret = -EFSCORRUPTED; + goto out_vunmap_cbuf; + } + + ret = cops->decompress_pages(dic); + +out_vunmap_cbuf: + vunmap(dic->cbuf); +out_vunmap_rbuf: + vunmap(dic->rbuf); +out_free_dic: + if (!verity) + f2fs_decompress_end_io(dic->rpages, dic->cluster_size, + ret, false); + + trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, + dic->clen, ret); + if (!verity) + f2fs_free_dic(dic); +} + +static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + if (cc->cluster_idx == NULL_CLUSTER) + return true; + return cc->cluster_idx == cluster_idx(cc, index); +} + +bool f2fs_cluster_is_empty(struct compress_ctx *cc) +{ + return cc->nr_rpages == 0; +} + +static bool f2fs_cluster_is_full(struct compress_ctx *cc) +{ + return cc->cluster_size == cc->nr_rpages; +} + +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) +{ + if (f2fs_cluster_is_empty(cc)) + return true; + return is_page_in_cluster(cc, index); +} + +static bool __cluster_may_compress(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + loff_t i_size = i_size_read(cc->inode); + unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE); + int i; + + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + f2fs_bug_on(sbi, !page); + + if (unlikely(f2fs_cp_error(sbi))) + return false; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return false; + + /* beyond EOF */ + if (page->index >= nr_pages) + return false; + } + return true; +} + +/* return # of compressed block addresses */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + struct dnode_of_data dn; + int ret; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx_of_cluster(cc), + LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) + ret = 0; + goto fail; + } + + if (dn.data_blkaddr == COMPRESS_ADDR) { + int i; + + ret = 1; + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node + i); + if (blkaddr != NULL_ADDR) + ret++; + } + } +fail: + f2fs_put_dnode(&dn); + return ret; +} + +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + }; + + return f2fs_compressed_blocks(&cc); +} + +static bool cluster_may_compress(struct compress_ctx *cc) +{ + if (!f2fs_compressed_file(cc->inode)) + return false; + if (f2fs_is_atomic_file(cc->inode)) + return false; + if (f2fs_is_mmap_file(cc->inode)) + return false; + if (!f2fs_cluster_is_full(cc)) + return false; + return __cluster_may_compress(cc); +} + +static void set_cluster_writeback(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (cc->rpages[i]) + set_page_writeback(cc->rpages[i]); + } +} + +static void set_cluster_dirty(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) + if (cc->rpages[i]) + set_page_dirty(cc->rpages[i]); +} + +static int prepare_compress_overwrite(struct compress_ctx *cc, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct address_space *mapping = cc->inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + sector_t last_block_in_bio; + unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i, ret; + bool prealloc; + +retry: + ret = f2fs_compressed_blocks(cc); + if (ret <= 0) + return ret; + + /* compressed case */ + prealloc = (ret < cc->cluster_size); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + return ret; + + /* keep page reference to avoid page reclaim */ + for (i = 0; i < cc->cluster_size; i++) { + page = f2fs_pagecache_get_page(mapping, start_idx + i, + fgp_flag, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + goto unlock_pages; + } + + if (PageUptodate(page)) + unlock_page(page); + else + f2fs_compress_ctx_add_page(cc, page); + } + + if (!f2fs_cluster_is_empty(cc)) { + struct bio *bio = NULL; + + ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, + &last_block_in_bio, false); + f2fs_destroy_compress_ctx(cc); + if (ret) + goto release_pages; + if (bio) + f2fs_submit_bio(sbi, bio, DATA); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + goto release_pages; + } + + for (i = 0; i < cc->cluster_size; i++) { + f2fs_bug_on(sbi, cc->rpages[i]); + + page = find_lock_page(mapping, start_idx + i); + f2fs_bug_on(sbi, !page); + + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_compress_ctx_add_page(cc, page); + f2fs_put_page(page, 0); + + if (!PageUptodate(page)) { + f2fs_unlock_rpages(cc, i + 1); + f2fs_put_rpages_mapping(cc, mapping, start_idx, + cc->cluster_size); + f2fs_destroy_compress_ctx(cc); + goto retry; + } + } + + if (prealloc) { + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + for (i = cc->cluster_size - 1; i > 0; i--) { + ret = f2fs_get_block(&dn, start_idx + i); + if (ret) { + i = cc->cluster_size; + break; + } + + if (dn.data_blkaddr != NEW_ADDR) + break; + } + + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + } + + if (likely(!ret)) { + *fsdata = cc->rpages; + *pagep = cc->rpages[offset_in_cluster(cc, index)]; + return cc->cluster_size; + } + +unlock_pages: + f2fs_unlock_rpages(cc, i); +release_pages: + f2fs_put_rpages_mapping(cc, mapping, start_idx, i); + f2fs_destroy_compress_ctx(cc); + return ret; +} + +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + .rpages = NULL, + .nr_rpages = 0, + }; + + return prepare_compress_overwrite(&cc, pagep, index, fsdata); +} + +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied) + +{ + struct compress_ctx cc = { + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .rpages = fsdata, + }; + bool first_index = (index == cc.rpages[0]->index); + + if (copied) + set_cluster_dirty(&cc); + + f2fs_put_rpages_wbc(&cc, NULL, false, 1); + f2fs_destroy_compress_ctx(&cc); + + return first_index; +} + +static int f2fs_write_compressed_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_io_info fio = { + .sbi = sbi, + .ino = cc->inode->i_ino, + .type = DATA, + .op = REQ_OP_WRITE, + .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NEW_ADDR, + .page = NULL, + .encrypted_page = NULL, + .compressed_page = NULL, + .submitted = false, + .need_lock = LOCK_RETRY, + .io_type = io_type, + .io_wbc = wbc, + .encrypted = f2fs_encrypted_file(cc->inode), + }; + struct dnode_of_data dn; + struct node_info ni; + struct compress_io_ctx *cic; + pgoff_t start_idx = start_idx_of_cluster(cc); + unsigned int last_index = cc->cluster_size - 1; + loff_t psize; + int i, err; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + f2fs_lock_op(sbi); + + err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (err) + goto out_unlock_op; + + for (i = 0; i < cc->cluster_size; i++) { + if (datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i) == NULL_ADDR) + goto out_put_dnode; + } + + psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto out_put_dnode; + + fio.version = ni.version; + + cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS); + if (!cic) + goto out_put_dnode; + + cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + cic->inode = inode; + refcount_set(&cic->ref, 1); + cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!cic->rpages) + goto out_put_cic; + + cic->nr_rpages = cc->cluster_size; + + for (i = 0; i < cc->nr_cpages; i++) { + f2fs_set_compressed_page(cc->cpages[i], inode, + cc->rpages[i + 1]->index, + cic, i ? &cic->ref : NULL); + fio.compressed_page = cc->cpages[i]; + if (fio.encrypted) { + fio.page = cc->rpages[i + 1]; + err = f2fs_encrypt_one_page(&fio); + if (err) + goto out_destroy_crypt; + cc->cpages[i] = fio.encrypted_page; + } + } + + set_cluster_writeback(cc); + + for (i = 0; i < cc->cluster_size; i++) + cic->rpages[i] = cc->rpages[i]; + + for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node); + fio.page = cic->rpages[i]; + fio.old_blkaddr = blkaddr; + + /* cluster header */ + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + fio.compr_blocks++; + if (__is_valid_data_blkaddr(blkaddr)) + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); + goto unlock_continue; + } + + if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr)) + fio.compr_blocks++; + + if (i > cc->nr_cpages) { + if (__is_valid_data_blkaddr(blkaddr)) { + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } + goto unlock_continue; + } + + f2fs_bug_on(fio.sbi, blkaddr == NULL_ADDR); + + if (fio.encrypted) + fio.encrypted_page = cc->cpages[i - 1]; + else + fio.compressed_page = cc->cpages[i - 1]; + + cc->cpages[i - 1] = NULL; + f2fs_outplace_write_data(&dn, &fio); + (*submitted)++; +unlock_continue: + inode_dec_dirty_pages(cc->inode); + unlock_page(fio.page); + } + + if (fio.compr_blocks) + f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); + f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); + + set_inode_flag(cc->inode, FI_APPEND_WRITE); + if (cc->cluster_idx == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + down_write(&fi->i_sem); + if (fi->last_disk_size < psize) + fi->last_disk_size = psize; + up_write(&fi->i_sem); + + f2fs_put_rpages(cc); + f2fs_destroy_compress_ctx(cc); + return 0; + +out_destroy_crypt: + kfree(cic->rpages); + + for (--i; i >= 0; i--) + fscrypt_finalize_bounce_page(&cc->cpages[i]); + for (i = 0; i < cc->nr_cpages; i++) { + if (!cc->cpages[i]) + continue; + f2fs_put_page(cc->cpages[i], 1); + } +out_put_cic: + kfree(cic); +out_put_dnode: + f2fs_put_dnode(&dn); +out_unlock_op: + f2fs_unlock_op(sbi); + return -EAGAIN; +} + +void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +{ + struct f2fs_sb_info *sbi = bio->bi_private; + struct compress_io_ctx *cic = + (struct compress_io_ctx *)page_private(page); + int i; + + if (unlikely(bio->bi_status)) + mapping_set_error(cic->inode->i_mapping, -EIO); + + f2fs_put_compressed_page(page); + + dec_page_count(sbi, F2FS_WB_DATA); + + if (refcount_dec_not_one(&cic->ref)) + return; + + for (i = 0; i < cic->nr_rpages; i++) { + WARN_ON(!cic->rpages[i]); + clear_cold_data(cic->rpages[i]); + end_page_writeback(cic->rpages[i]); + } + + kfree(cic->rpages); + kfree(cic); +} + +static int f2fs_write_raw_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct address_space *mapping = cc->inode->i_mapping; + int _submitted, compr_blocks, ret; + int i = -1, err = 0; + + compr_blocks = f2fs_compressed_blocks(cc); + if (compr_blocks < 0) { + err = compr_blocks; + goto out_err; + } + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; +retry_write: + if (cc->rpages[i]->mapping != mapping) { + unlock_page(cc->rpages[i]); + continue; + } + + BUG_ON(!PageLocked(cc->rpages[i])); + + ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + NULL, NULL, wbc, io_type, + compr_blocks); + if (ret) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(cc->rpages[i]); + ret = 0; + } else if (ret == -EAGAIN) { + ret = 0; + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + lock_page(cc->rpages[i]); + clear_page_dirty_for_io(cc->rpages[i]); + goto retry_write; + } + err = ret; + goto out_fail; + } + + *submitted += _submitted; + } + return 0; + +out_fail: + /* TODO: revoke partially updated block addresses */ + BUG_ON(compr_blocks); +out_err: + for (++i; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); + } + return err; +} + +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int err; + + *submitted = 0; + if (cluster_may_compress(cc)) { + err = f2fs_compress_pages(cc); + if (err == -EAGAIN) { + goto write; + } else if (err) { + f2fs_put_rpages_wbc(cc, wbc, true, 1); + goto destroy_out; + } + + err = f2fs_write_compressed_pages(cc, submitted, + wbc, io_type); + cops->destroy_compress_ctx(cc); + if (!err) + return 0; + f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN); + } +write: + f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted); + + err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); + f2fs_put_rpages_wbc(cc, wbc, false, 0); +destroy_out: + f2fs_destroy_compress_ctx(cc); + return err; +} + +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct decompress_io_ctx *dic; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i; + + dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS); + if (!dic) + return ERR_PTR(-ENOMEM); + + dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!dic->rpages) { + kfree(dic); + return ERR_PTR(-ENOMEM); + } + + dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + dic->inode = cc->inode; + refcount_set(&dic->ref, 1); + dic->cluster_idx = cc->cluster_idx; + dic->cluster_size = cc->cluster_size; + dic->log_cluster_size = cc->log_cluster_size; + dic->nr_cpages = cc->nr_cpages; + dic->failed = false; + + for (i = 0; i < dic->cluster_size; i++) + dic->rpages[i] = cc->rpages[i]; + dic->nr_rpages = cc->cluster_size; + + dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->nr_cpages, GFP_NOFS); + if (!dic->cpages) + goto out_free; + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page; + + page = f2fs_grab_page(); + if (!page) + goto out_free; + + f2fs_set_compressed_page(page, cc->inode, + start_idx + i + 1, + dic, i ? &dic->ref : NULL); + dic->cpages[i] = page; + } + + dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->cluster_size, GFP_NOFS); + if (!dic->tpages) + goto out_free; + + for (i = 0; i < dic->cluster_size; i++) { + if (cc->rpages[i]) + continue; + + dic->tpages[i] = f2fs_grab_page(); + if (!dic->tpages[i]) + goto out_free; + } + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->tpages[i]) + continue; + dic->tpages[i] = cc->rpages[i]; + } + + return dic; + +out_free: + f2fs_free_dic(dic); + return ERR_PTR(-ENOMEM); +} + +void f2fs_free_dic(struct decompress_io_ctx *dic) +{ + int i; + + if (dic->tpages) { + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) + continue; + f2fs_put_page(dic->tpages[i], 1); + } + kfree(dic->tpages); + } + + if (dic->cpages) { + for (i = 0; i < dic->nr_cpages; i++) { + if (!dic->cpages[i]) + continue; + f2fs_put_compressed_page(dic->cpages[i]); + } + kfree(dic->cpages); + } + + kfree(dic->rpages); + kfree(dic); +} + +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity) +{ + int i; + + for (i = 0; i < cluster_size; i++) { + struct page *rpage = rpages[i]; + + if (!rpage) + continue; + + if (err || PageError(rpage)) { + ClearPageUptodate(rpage); + ClearPageError(rpage); + } else { + if (!verity || fsverity_verify_page(rpage)) + SetPageUptodate(rpage); + else + SetPageError(rpage); + } + unlock_page(rpage); + } +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b132dffa5c7a..8ebaad834930 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -83,6 +83,9 @@ static bool __is_cp_guaranteed(struct page *page) if (!mapping) return false; + if (f2fs_is_compressed_page(page)) + return false; + inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -115,19 +118,19 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { - STEP_INITIAL = 0, STEP_DECRYPT, + STEP_DECOMPRESS, STEP_VERITY, }; struct bio_post_read_ctx { struct bio *bio; + struct f2fs_sb_info *sbi; struct work_struct work; - unsigned int cur_step; unsigned int enabled_steps; }; -static void __read_end_io(struct bio *bio) +static void __read_end_io(struct bio *bio, bool compr, bool verity) { struct page *page; struct bio_vec *bv; @@ -136,6 +139,13 @@ static void __read_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { page = bv->bv_page; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (compr && f2fs_is_compressed_page(page)) { + f2fs_decompress_pages(bio, page, verity); + continue; + } +#endif + /* PG_error was set if any post_read step failed */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); @@ -147,31 +157,94 @@ static void __read_end_io(struct bio *bio) dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } - if (bio->bi_private) - mempool_free(bio->bi_private, bio_post_read_ctx_pool); - bio_put(bio); +} + +static void f2fs_release_read_bio(struct bio *bio); +static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity) +{ + if (!compr) + __read_end_io(bio, false, verity); + f2fs_release_read_bio(bio); +} + +static void f2fs_decompress_bio(struct bio *bio, bool verity) +{ + __read_end_io(bio, true, verity); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx); -static void decrypt_work(struct work_struct *work) +static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx) { - struct bio_post_read_ctx *ctx = - container_of(work, struct bio_post_read_ctx, work); - fscrypt_decrypt_bio(ctx->bio); - - bio_post_read_processing(ctx); } -static void verity_work(struct work_struct *work) +static void f2fs_decompress_work(struct bio_post_read_ctx *ctx) +{ + f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY)); +} + +#ifdef CONFIG_F2FS_FS_COMPRESSION +static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) +{ + f2fs_decompress_end_io(rpages, cluster_size, false, true); +} + +static void f2fs_verify_bio(struct bio *bio) +{ + struct page *page = bio->bi_io_vec[0].bv_page; + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + + f2fs_verify_pages(dic->rpages, dic->cluster_size); + f2fs_free_dic(dic); +} +#endif + +static void f2fs_verity_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); - fsverity_verify_bio(ctx->bio); +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* previous step is decompression */ + if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { - bio_post_read_processing(ctx); + f2fs_verify_bio(ctx->bio); + f2fs_release_read_bio(ctx->bio); + return; + } +#endif + + fsverity_verify_bio(ctx->bio); + __f2fs_read_end_io(ctx->bio, false, false); +} + +static void f2fs_post_read_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) + f2fs_decrypt_work(ctx); + + if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) + f2fs_decompress_work(ctx); + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + + __f2fs_read_end_io(ctx->bio, + ctx->enabled_steps & (1 << STEP_DECOMPRESS), false); +} + +static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi, + struct work_struct *work) +{ + queue_work(sbi->post_read_wq, work); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx) @@ -181,31 +254,26 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) * verity may require reading metadata pages that need decryption, and * we shouldn't recurse to the same workqueue. */ - switch (++ctx->cur_step) { - case STEP_DECRYPT: - if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { - INIT_WORK(&ctx->work, decrypt_work); - fscrypt_enqueue_decrypt_work(&ctx->work); - return; - } - ctx->cur_step++; - /* fall-through */ - case STEP_VERITY: - if (ctx->enabled_steps & (1 << STEP_VERITY)) { - INIT_WORK(&ctx->work, verity_work); - fsverity_enqueue_verify_work(&ctx->work); - return; - } - ctx->cur_step++; - /* fall-through */ - default: - __read_end_io(ctx->bio); + + if (ctx->enabled_steps & (1 << STEP_DECRYPT) || + ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); + return; } + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + + __f2fs_read_end_io(ctx->bio, false, false); } static bool f2fs_bio_post_read_required(struct bio *bio) { - return bio->bi_private && !bio->bi_status; + return bio->bi_private; } static void f2fs_read_end_io(struct bio *bio) @@ -220,12 +288,11 @@ static void f2fs_read_end_io(struct bio *bio) if (f2fs_bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; - ctx->cur_step = STEP_INITIAL; bio_post_read_processing(ctx); return; } - __read_end_io(bio); + __f2fs_read_end_io(bio, false, false); } static void f2fs_write_end_io(struct bio *bio) @@ -256,6 +323,13 @@ static void f2fs_write_end_io(struct bio *bio) fscrypt_finalize_bounce_page(&page); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_is_compressed_page(page)) { + f2fs_compress_write_end_io(bio, page); + continue; + } +#endif + if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) @@ -400,6 +474,12 @@ submit_io: submit_bio(bio); } +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -422,7 +502,6 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; - struct page *target; int i; if (!bio) @@ -432,10 +511,18 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, return true; bio_for_each_segment_all(bvec, bio, i) { + struct page *target = bvec->bv_page; - target = bvec->bv_page; - if (fscrypt_is_bounce_page(target)) + if (fscrypt_is_bounce_page(target)) { target = fscrypt_pagecache_page(target); + if (IS_ERR(target)) + continue; + } + if (f2fs_is_compressed_page(target)) { + target = f2fs_compress_control_page(target); + if (IS_ERR(target)) + continue; + } if (inode && inode == target->mapping->host) return true; @@ -630,7 +717,8 @@ static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, found = true; - if (bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { + if (bio_add_page(*bio, page, PAGE_SIZE, 0) == + PAGE_SIZE) { ret = 0; break; } @@ -770,7 +858,12 @@ next: verify_fio_blkaddr(fio); - bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + if (fio->encrypted_page) + bio_page = fio->encrypted_page; + else if (fio->compressed_page) + bio_page = fio->compressed_page; + else + bio_page = fio->page; /* set submitted = true as a return value */ fio->submitted = true; @@ -839,7 +932,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; - + if (f2fs_compressed_file(inode)) + post_read_steps |= 1 << STEP_DECOMPRESS; if (f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; @@ -850,6 +944,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return ERR_PTR(-ENOMEM); } ctx->bio = bio; + ctx->sbi = sbi; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; } @@ -857,6 +952,13 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return bio; } +static void f2fs_release_read_bio(struct bio *bio) +{ + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) @@ -1901,6 +2003,144 @@ out: return ret; } +#ifdef CONFIG_F2FS_FS_COMPRESSION +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead) +{ + struct dnode_of_data dn; + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio = *bio_ret; + unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; + sector_t last_block_in_file; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + struct decompress_io_ctx *dic = NULL; + int i; + int ret = 0; + + f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); + + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + + /* get rid of pages beyond EOF */ + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + if (!page) + continue; + if ((sector_t)page->index >= last_block_in_file) { + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + } else if (!PageUptodate(page)) { + continue; + } + unlock_page(page); + cc->rpages[i] = NULL; + cc->nr_rpages--; + } + + /* we are done since all pages are beyond EOF */ + if (f2fs_cluster_is_empty(cc)) + goto out; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (ret) + goto out; + + /* cluster was overwritten as normal cluster */ + if (dn.data_blkaddr != COMPRESS_ADDR) + goto out; + + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + break; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + ret = -EFAULT; + goto out_put_dnode; + } + cc->nr_cpages++; + } + + /* nothing to decompress */ + if (cc->nr_cpages == 0) { + ret = 0; + goto out_put_dnode; + } + + dic = f2fs_alloc_dic(cc); + if (IS_ERR(dic)) { + ret = PTR_ERR(dic); + goto out_put_dnode; + } + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page = dic->cpages[i]; + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1); + + if (bio && !page_is_mergeable(sbi, bio, + *last_block_in_bio, blkaddr)) { +submit_and_realloc: + __submit_bio(sbi, bio, DATA); + bio = NULL; + } + + if (!bio) { + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + is_readahead ? REQ_RAHEAD : 0, + page->index); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + dic->failed = true; + if (refcount_sub_and_test(dic->nr_cpages - i, + &dic->ref)) + f2fs_decompress_end_io(dic->rpages, + cc->cluster_size, true, + false); + f2fs_free_dic(dic); + f2fs_put_dnode(&dn); + *bio_ret = bio; + return ret; + } + } + + f2fs_wait_on_block_writeback(inode, blkaddr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(sbi, F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = blkaddr; + } + + f2fs_put_dnode(&dn); + + *bio_ret = bio; + return 0; + +out_put_dnode: + f2fs_put_dnode(&dn); +out: + f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false); + *bio_ret = bio; + return ret; +} +#endif + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1910,7 +2150,7 @@ out: * use ->readpage() or do the necessary surgery to decouple ->readpages() * from read-ahead. */ -static int f2fs_mpage_readpages(struct address_space *mapping, +int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead) { @@ -1918,6 +2158,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; struct f2fs_map_blocks map; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .cpages = NULL, + .nr_rpages = 0, + .nr_cpages = 0, + }; +#endif + unsigned max_nr_pages = nr_pages; int ret = 0; map.m_pblk = 0; @@ -1941,9 +2194,41 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, - &last_block_in_bio, is_readahead); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* there are remained comressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, page->index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + if (ret) + goto set_error_page; + } + ret = f2fs_is_compressed_cluster(inode, page->index); + if (ret < 0) + goto set_error_page; + else if (!ret) + goto read_single_page; + + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, page); + + goto next_page; + } +read_single_page: +#endif + + ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + &bio, &last_block_in_bio, is_readahead); if (ret) { +#ifdef CONFIG_F2FS_FS_COMPRESSION +set_error_page: +#endif SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); @@ -1951,6 +2236,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, next_page: if (pages) put_page(page); + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* last page */ + if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + } + } +#endif } BUG_ON(pages && !list_empty(pages)); if (bio) @@ -1965,6 +2263,11 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); + if (!f2fs_is_compress_backend_ready(inode)) { + unlock_page(page); + return -EOPNOTSUPP; + } + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); @@ -1983,6 +2286,9 @@ static int f2fs_read_data_pages(struct file *file, trace_f2fs_readpages(inode, page, nr_pages); + if (!f2fs_is_compress_backend_ready(inode)) + return 0; + /* If the file has inline data, skip readpages */ if (f2fs_has_inline_data(inode)) return 0; @@ -1990,22 +2296,23 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } -static int encrypt_one_page(struct f2fs_io_info *fio) +int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - struct page *mpage; + struct page *mpage, *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; + page = fio->compressed_page ? fio->compressed_page : fio->page; + /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, - PAGE_SIZE, 0, - gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, + PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { @@ -2165,7 +2472,7 @@ got_it: if (ipu_force || (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; @@ -2201,13 +2508,16 @@ got_it: fio->version = ni.version; - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; set_page_writeback(page); ClearPageError(page); + if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) + f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); + /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); @@ -2222,16 +2532,17 @@ out: return err; } -static int __write_data_page(struct page *page, bool *submitted, +int f2fs_write_single_data_page(struct page *page, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, - enum iostat_type io_type) + enum iostat_type io_type, + int compr_blocks) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); - const pgoff_t end_index = ((unsigned long long) i_size) + const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; @@ -2247,6 +2558,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, @@ -2271,7 +2583,9 @@ static int __write_data_page(struct page *page, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || f2fs_verity_in_progress(inode)) + if (page->index < end_index || + f2fs_verity_in_progress(inode) || + compr_blocks) goto write; /* @@ -2347,7 +2661,6 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->cp_task) @@ -2360,7 +2673,7 @@ out: } if (submitted) - *submitted = fio.submitted; + *submitted = fio.submitted ? 1 : 0; return 0; @@ -2381,7 +2694,23 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = page->mapping->host; + + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + goto out; + + if (f2fs_compressed_file(inode)) { + if (f2fs_is_compressed_cluster(inode, page->index)) { + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; + } + } +out: +#endif + + return f2fs_write_single_data_page(page, NULL, NULL, NULL, + wbc, FS_DATA_IO, 0); } /* @@ -2394,11 +2723,27 @@ static int f2fs_write_cache_pages(struct address_space *mapping, enum iostat_type io_type) { int ret = 0; - int done = 0; + int done = 0, retry = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = mapping->host; + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .nr_rpages = 0, + .cpages = NULL, + .rbuf = NULL, + .cbuf = NULL, + .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, + .private = NULL, + }; +#endif int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2408,6 +2753,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int range_whole = 0; int tag; int nwritten = 0; + int submitted = 0; + int i; pagevec_init(&pvec, 0); @@ -2437,12 +2784,11 @@ static int f2fs_write_cache_pages(struct address_space *mapping, else tag = PAGECACHE_TAG_DIRTY; retry: + retry = 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; - while (!done && (index <= end)) { - int i; - + while (!done && !retry && (index <= end)) { nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, tag); if (nr_pages == 0) @@ -2450,15 +2796,62 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - bool submitted = false; + bool need_readd; +readd: + need_readd = false; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + ret = f2fs_init_compress_ctx(&cc); + if (ret) { + done = 1; + break; + } + if (!f2fs_cluster_can_merge_page(&cc, + page->index)) { + ret = f2fs_write_multi_pages(&cc, + &submitted, wbc, io_type); + if (!ret) + need_readd = true; + goto result; + } + + if (unlikely(f2fs_cp_error(sbi))) + goto lock_page; + + if (f2fs_cluster_is_empty(&cc)) { + void *fsdata = NULL; + struct page *pagep; + int ret2; + + ret2 = f2fs_prepare_compress_overwrite( + inode, &pagep, + page->index, &fsdata); + if (ret2 < 0) { + ret = ret2; + done = 1; + break; + } else if (ret2 && + !f2fs_compress_write_end(inode, + fsdata, page->index, + 1)) { + retry = 1; + break; + } + } else { + goto lock_page; + } + } +#endif /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[DATA]) && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } - +#ifdef CONFIG_F2FS_FS_COMPRESSION +lock_page: +#endif done_index = page->index; retry_write: lock_page(page); @@ -2485,45 +2878,71 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, &bio, - &last_block, wbc, io_type); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + get_page(page); + f2fs_compress_ctx_add_page(&cc, page); + continue; + } +#endif + ret = f2fs_write_single_data_page(page, &submitted, + &bio, &last_block, wbc, io_type, 0); + if (ret == AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); +#ifdef CONFIG_F2FS_FS_COMPRESSION +result: +#endif + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to * get # of written pages. */ if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); ret = 0; - continue; + goto next; } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + HZ/50); goto retry_write; } - continue; + goto next; } done_index = page->index + 1; done = 1; break; - } else if (submitted) { - nwritten++; } - if (--wbc->nr_to_write <= 0 && + if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } +next: + if (need_readd) + goto readd; } pagevec_release(&pvec); cond_resched(); } - - if (!cycled && !done) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* flush remained pages in compress cluster */ + if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (ret) { + done = 1; + retry = 0; + } + } +#endif + if ((!cycled && !done) || retry) { cycled = 1; index = 0; end = writeback_index - 1; @@ -2547,6 +2966,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (f2fs_compressed_file(inode)) + return true; if (IS_NOQUOTA(inode)) return false; /* to avoid deadlock in path of data flush */ @@ -2691,6 +3112,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, __do_map_lock(sbi, flag, true); locked = true; } + restart: /* check inline_data */ ipage = f2fs_get_node_page(sbi, inode->i_ino); @@ -2781,6 +3203,24 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (err) goto fail; } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret; + + *fsdata = NULL; + + ret = f2fs_prepare_compress_overwrite(inode, pagep, + index, fsdata); + if (ret < 0) { + err = ret; + goto fail; + } else if (ret) { + return 0; + } + } +#endif + repeat: /* * Do not use grab_cache_page_write_begin() to avoid deadlock due to @@ -2793,6 +3233,8 @@ repeat: goto fail; } + /* TODO: cluster can be compressed due to race with .writepage */ + *pagep = page; err = prepare_write_begin(sbi, page, pos, len, @@ -2876,6 +3318,16 @@ static int f2fs_write_end(struct file *file, else SetPageUptodate(page); } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* overwrite compressed file */ + if (f2fs_compressed_file(inode) && fsdata) { + f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return copied; + } +#endif + if (!copied) goto unlock_out; @@ -3266,6 +3718,9 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) return ret; + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + ret = check_swap_activate(file, sis->max); if (ret) return ret; @@ -3350,6 +3805,27 @@ void f2fs_destroy_post_read_processing(void) kmem_cache_destroy(bio_post_read_ctx_cache); } +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (!f2fs_sb_has_encrypt(sbi) && + !f2fs_sb_has_verity(sbi) && + !f2fs_sb_has_compression(sbi)) + return 0; + + sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); + if (!sbi->post_read_wq) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (sbi->post_read_wq) + destroy_workqueue(sbi->post_read_wq); +} + int __init f2fs_init_bio_entry_cache(void) { bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index dbf6eb329c03..28e202b76874 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -94,6 +94,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inline_xattr = atomic_read(&sbi->inline_xattr); si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_dir = atomic_read(&sbi->inline_dir); + si->compr_inode = atomic_read(&sbi->compr_inode); + si->compr_blocks = atomic_read(&sbi->compr_blocks); si->append = sbi->im[APPEND_INO].ino_num; si->update = sbi->im[UPDATE_INO].ino_num; si->orphans = sbi->im[ORPHAN_INO].ino_num; @@ -315,6 +317,8 @@ static int stat_show(struct seq_file *s, void *v) si->inline_inode); seq_printf(s, " - Inline_dentry Inode: %u\n", si->inline_dir); + seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n", + si->compr_inode, si->compr_blocks); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", @@ -502,6 +506,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->compr_inode, 0); + atomic_set(&sbi->compr_blocks, 0); atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7e0631cdf5bd..2725951b4e73 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -117,6 +117,8 @@ typedef u32 block_t; /* */ typedef u32 nid_t; +#define COMPRESS_EXT_NUM 16 + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -141,6 +143,12 @@ struct f2fs_mount_info { block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ + + /* For compression */ + unsigned char compress_algorithm; /* algorithm type */ + unsigned compress_log_size; /* cluster log size */ + unsigned char compress_ext_cnt; /* extension count */ + unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -156,6 +164,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_VERITY 0x0400 #define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_FEATURE_CASEFOLD 0x1000 +#define F2FS_FEATURE_COMPRESSION 0x2000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -710,6 +719,12 @@ struct f2fs_inode_info { int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ struct timespec i_disk_time[4]; /* inode disk times */ + + /* for file compress */ + u64 i_compr_blocks; /* # of compressed blocks */ + unsigned char i_compress_algorithm; /* algorithm type */ + unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned int i_cluster_size; /* cluster size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -1016,6 +1031,7 @@ enum need_lock_type { enum cp_reason_type { CP_NO_NEEDED, CP_NON_REGULAR, + CP_COMPRESSED, CP_HARDLINK, CP_SB_NEED_CP, CP_WRONG_PINO, @@ -1054,12 +1070,15 @@ struct f2fs_io_info { block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ + int compr_blocks; /* # of compressed block addresses */ + bool encrypted; /* indicate file is encrypted */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1167,6 +1186,18 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; +/* + * this value is set in page as a private data which indicate that + * the page is atomically written, and it is in inmem_pages list. + */ +#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) +#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) + +#define IS_ATOMIC_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) +#define IS_DUMMY_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) + #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) @@ -1174,6 +1205,75 @@ enum fsync_mode { #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) #endif +/* For compression */ +enum compress_algorithm_type { + COMPRESS_LZO, + COMPRESS_LZ4, + COMPRESS_MAX, +}; + +#define COMPRESS_DATA_RESERVED_SIZE 4 +struct compress_data { + __le32 clen; /* compressed data size */ + __le32 chksum; /* checksum of compressed data */ + __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ + u8 cdata[]; /* compressed data */ +}; + +#define COMPRESS_HEADER_SIZE (sizeof(struct compress_data)) + +#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000 + +/* compress context */ +struct compress_ctx { + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + void *private; /* payload buffer for specified compression algorithm */ +}; + +/* compress context for write IO path */ +struct compress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + refcount_t ref; /* referrence count of raw page */ +}; + +/* decompress io context for read IO path */ +struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + struct page **tpages; /* temp pages to pad holes in cluster */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + refcount_t ref; /* referrence count of compressed page */ + bool failed; /* indicate IO error during decompression */ +}; + +#define NULL_CLUSTER ((unsigned int)(~0)) +#define MIN_COMPRESS_LOG_SIZE 2 +#define MAX_COMPRESS_LOG_SIZE 8 + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1325,6 +1425,8 @@ struct f2fs_sb_info { atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ + atomic_t compr_inode; /* # of compressed inodes */ + atomic_t compr_blocks; /* # of compressed blocks */ atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ @@ -1362,6 +1464,8 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; + + struct workqueue_struct *post_read_wq; /* post read workqueue */ }; struct f2fs_private_dio { @@ -2356,11 +2460,13 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* * On-disk inode flags (f2fs_inode::i_flags) */ +#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +#define F2FS_NOCOMP_FL 0x00000400 /* Don't compress */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ @@ -2369,7 +2475,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ - F2FS_CASEFOLD_FL) + F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ @@ -2421,6 +2527,8 @@ enum { FI_PIN_FILE, /* indicate file should not be gced */ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2437,6 +2545,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: + case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2592,16 +2701,27 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_XATTR); } +static inline int f2fs_compressed_file(struct inode *inode) +{ + return S_ISREG(inode->i_mode) && + is_inode_flag_set(inode, FI_COMPRESSED_FILE); +} + static inline unsigned int addrs_per_inode(struct inode *inode) { unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); - return ALIGN_DOWN(addrs, 1); + + if (!f2fs_compressed_file(inode)) + return addrs; + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); } static inline unsigned int addrs_per_block(struct inode *inode) { - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); + if (!f2fs_compressed_file(inode)) + return DEF_ADDRS_PER_BLOCK; + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2634,6 +2754,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DOTS); } +static inline int f2fs_is_mmap_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_MMAP_FILE); +} + static inline bool f2fs_is_pinned_file(struct inode *inode) { return is_inode_flag_set(inode, FI_PIN_FILE); @@ -2761,7 +2886,8 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!test_opt(sbi, EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT)) + is_inode_flag_set(inode, FI_NO_EXTENT) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE)) return false; /* @@ -2881,7 +3007,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, static inline bool __is_valid_data_blkaddr(block_t blkaddr) { - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR || + blkaddr == COMPRESS_ADDR) return false; return true; } @@ -3186,10 +3313,10 @@ void f2fs_destroy_checkpoint_caches(void); int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); -int f2fs_init_post_read_processing(void); -void f2fs_destroy_post_read_processing(void); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, @@ -3210,6 +3337,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); +int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages, bool is_readahead); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); @@ -3223,8 +3353,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); +int f2fs_write_single_data_page(struct page *page, int *submitted, + struct bio **bio, sector_t *last_block, + struct writeback_control *wbc, + enum iostat_type io_type, + int compr_blocks); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -3234,6 +3370,10 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, #endif bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); void f2fs_clear_radix_tree_dirty_tag(struct page *page); +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); /* * gc.c @@ -3280,6 +3420,7 @@ struct f2fs_stat_info { int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; + int compr_inode, compr_blocks; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; @@ -3350,6 +3491,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_dec_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_add_compr_blocks(inode, blocks) \ + (atomic_add(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_sub_compr_blocks(inode, blocks) \ + (atomic_sub(blocks, &F2FS_I_SB(inode)->compr_blocks)) #define stat_inc_meta_count(sbi, blkaddr) \ do { \ if (blkaddr < SIT_I(sbi)->sit_base_addr) \ @@ -3440,6 +3595,10 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_inode(inode) do { } while (0) #define stat_inc_inline_dir(inode) do { } while (0) #define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_compr_inode(inode) do { } while (0) +#define stat_dec_compr_inode(inode) do { } while (0) +#define stat_add_compr_blocks(inode, blocks) do { } while (0) +#define stat_sub_compr_blocks(inode, blocks) do { } while (0) #define stat_inc_atomic_write(inode) do { } while (0) #define stat_dec_atomic_write(inode) do { } while (0) #define stat_update_max_atomic_write(inode) do { } while (0) @@ -3579,7 +3738,85 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) */ static inline bool f2fs_post_read_required(struct inode *inode) { - return f2fs_encrypted_file(inode) || fsverity_active(inode); + return f2fs_encrypted_file(inode) || fsverity_active(inode) || + f2fs_compressed_file(inode); +} + +/* + * compress.c + */ +#ifdef CONFIG_F2FS_FS_COMPRESSION +bool f2fs_is_compressed_page(struct page *page); +struct page *f2fs_compress_control_page(struct page *page); +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata); +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied); +void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +bool f2fs_is_compress_backend_ready(struct inode *inode); +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); +bool f2fs_cluster_is_empty(struct compress_ctx *cc); +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type); +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead); +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); +void f2fs_free_dic(struct decompress_io_ctx *dic); +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity); +int f2fs_init_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc); +void f2fs_init_compress_info(struct f2fs_sb_info *sbi); +#else +static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + /* not support compression */ + return false; +} +static inline struct page *f2fs_compress_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} +#endif + +static inline void set_compress_context(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + F2FS_I(inode)->i_compress_algorithm = + F2FS_OPTION(sbi).compress_algorithm; + F2FS_I(inode)->i_log_cluster_size = + F2FS_OPTION(sbi).compress_log_size; + F2FS_I(inode)->i_cluster_size = + 1 << F2FS_I(inode)->i_log_cluster_size; + F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + set_inode_flag(inode, FI_COMPRESSED_FILE); + stat_inc_compr_inode(inode); +} + +static inline u64 f2fs_disable_compressed_file(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + if (!f2fs_compressed_file(inode)) + return 0; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + + fi->i_flags &= ~F2FS_COMPR_FL; + clear_inode_flag(inode, FI_COMPRESSED_FILE); + stat_dec_compr_inode(inode); + return 0; } #define F2FS_FEATURE_FUNCS(name, flagname) \ @@ -3600,6 +3837,7 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); +F2FS_FEATURE_FUNCS(compression, COMPRESSION); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, @@ -3681,6 +3919,30 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_may_compress(struct inode *inode) +{ + if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) + return false; + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + +static inline void f2fs_i_compr_blocks_update(struct inode *inode, + u64 blocks, bool add) +{ + int diff = F2FS_I(inode)->i_cluster_size - blocks; + + if (add) { + F2FS_I(inode)->i_compr_blocks += diff; + stat_add_compr_blocks(inode, diff); + } else { + F2FS_I(inode)->i_compr_blocks -= diff; + stat_sub_compr_blocks(inode, diff); + } + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline int block_unaligned_IO(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { @@ -3712,6 +3974,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return true; if (f2fs_is_multi_device(sbi)) return true; + if (f2fs_compressed_file(inode)) + return true; /* * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e3e08c23482d..788f5cf1561f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,7 +50,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - int err; + bool need_alloc = true; + int err = 0; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -62,8 +63,25 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto err; } +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret = f2fs_is_compressed_cluster(inode, page->index); + + if (ret < 0) { + err = ret; + goto err; + } else if (ret) { + if (ret < F2FS_I(inode)->i_cluster_size) { + err = -EAGAIN; + goto err; + } + need_alloc = false; + } + } +#endif /* should do out of any locked page */ - f2fs_balance_fs(sbi, true); + if (need_alloc) + f2fs_balance_fs(sbi, true); sb_start_pagefault(inode->i_sb); @@ -80,15 +98,17 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } - /* block allocation */ - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block(&dn, page->index); - f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; + if (need_alloc) { + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } } /* fill the page */ @@ -155,6 +175,8 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode)) cp_reason = CP_NON_REGULAR; + else if (f2fs_compressed_file(inode)) + cp_reason = CP_COMPRESSED; else if (inode->i_nlink != 1) cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) @@ -485,6 +507,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -492,6 +517,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; + set_inode_flag(inode, FI_MMAP_FILE); return 0; } @@ -502,6 +528,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + err = fsverity_file_open(inode, filp); if (err) return err; @@ -518,6 +547,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; int base = 0; + bool compressed_cluster = false; + int cluster_index = 0, valid_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) base = get_extra_isize(dn->inode); @@ -525,26 +557,43 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + base + ofs; - for (; count > 0; count--, addr++, dn->ofs_in_node++) { + /* Assumption: truncateion starts with cluster */ + for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { block_t blkaddr = le32_to_cpu(*addr); + if (f2fs_compressed_file(dn->inode) && + !(cluster_index & (cluster_size - 1))) { + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, + valid_blocks, false); + compressed_cluster = (blkaddr == COMPRESS_ADDR); + valid_blocks = 0; + } + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; f2fs_set_data_blkaddr(dn); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, + if (__is_valid_data_blkaddr(blkaddr)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + continue; + if (compressed_cluster) + valid_blocks++; + } - f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_invalidate_blocks(sbi, blkaddr); nr_free++; } + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); + if (nr_free) { pgoff_t fofs; /* @@ -587,6 +636,9 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } + if (f2fs_compressed_file(inode)) + return 0; + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); @@ -602,7 +654,7 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +static int do_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -667,6 +719,28 @@ free_partial: return err; } +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +{ + u64 free_from = from; + + /* + * for compressed file, only support cluster size + * aligned truncation. + */ + if (f2fs_compressed_file(inode)) { + size_t cluster_shift = PAGE_SHIFT + + F2FS_I(inode)->i_log_cluster_size; + size_t cluster_mask = (1 << cluster_shift) - 1; + + free_from = from >> cluster_shift; + if (from & cluster_mask) + free_from++; + free_from <<= cluster_shift; + } + + return do_truncate_blocks(inode, free_from, lock); +} + int f2fs_truncate(struct inode *inode) { int err; @@ -783,6 +857,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if ((attr->ia_valid & ATTR_SIZE) && + !f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + err = setattr_prepare(dentry, attr); if (err) return err; @@ -1023,8 +1101,8 @@ next_dnode: } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, - len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - + dn.ofs_in_node, len); blkaddr += done; do_replace += done; goto next; @@ -1618,6 +1696,8 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) @@ -1627,6 +1707,11 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; + if (f2fs_compressed_file(inode) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) + return -EOPNOTSUPP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1716,7 +1801,40 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -ENOTEMPTY; } + if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) + return -EINVAL; + } + + if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if (S_ISREG(inode->i_mode) && + (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + F2FS_HAS_BLOCKS(inode))) + return -EINVAL; + if (iflags & F2FS_NOCOMP_FL) + return -EINVAL; + if (iflags & F2FS_COMPR_FL) { + int err = f2fs_convert_inline_inode(inode); + + if (err) + return err; + + if (!f2fs_may_compress(inode)) + return -EINVAL; + + set_compress_context(inode); + } + } + if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { + if (fi->i_flags & F2FS_COMPR_FL) + return -EINVAL; + } + fi->i_flags = iflags | (fi->i_flags & ~mask); + f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && + (fi->i_flags & F2FS_NOCOMP_FL)); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1742,11 +1860,13 @@ static const struct { u32 iflag; u32 fsflag; } f2fs_fsflags_map[] = { + { F2FS_COMPR_FL, FS_COMPR_FL }, { F2FS_SYNC_FL, FS_SYNC_FL }, { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, { F2FS_APPEND_FL, FS_APPEND_FL }, { F2FS_NODUMP_FL, FS_NODUMP_FL }, { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, { F2FS_INDEX_FL, FS_INDEX_FL }, { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, @@ -1754,11 +1874,13 @@ static const struct { }; #define F2FS_GETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_INDEX_FL | \ FS_DIRSYNC_FL | \ FS_PROJINHERIT_FL | \ @@ -1769,11 +1891,13 @@ static const struct { FS_CASEFOLD_FL) #define F2FS_SETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_DIRSYNC_FL | \ FS_PROJINHERIT_FL | \ FS_CASEFOLD_FL) @@ -1894,6 +2018,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); + f2fs_disable_compressed_file(inode); + if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) ret = -EINVAL; @@ -3094,10 +3220,16 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) ret = -EAGAIN; goto out; } + ret = f2fs_convert_inline_inode(inode); if (ret) goto out; + if (f2fs_disable_compressed_file(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + set_inode_flag(inode, FI_PIN_FILE); ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: @@ -3280,6 +3412,17 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + + return generic_file_read_iter(iocb, iter); +} + static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -3291,6 +3434,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) { ret = -EAGAIN; @@ -3425,7 +3571,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read_iter = generic_file_read_iter, + .read_iter = f2fs_file_read_iter, .write_iter = f2fs_file_write_iter, .open = f2fs_file_open, .release = f2fs_release_file, diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 502bd491336a..78c3f1d70f1d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -200,6 +200,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri = F2FS_INODE(node_page); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -286,6 +287,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + fi->i_flags & F2FS_COMPR_FL && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + if (ri->i_compress_algorithm >= COMPRESS_MAX) + return false; + if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + return false; + if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + return false; + } + return true; } @@ -407,6 +421,18 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + (fi->i_flags & F2FS_COMPR_FL)) { + if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks); + fi->i_compress_algorithm = ri->i_compress_algorithm; + fi->i_log_cluster_size = ri->i_log_cluster_size; + fi->i_cluster_size = 1 << fi->i_log_cluster_size; + set_inode_flag(inode, FI_COMPRESSED_FILE); + } + } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; @@ -416,6 +442,8 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); + stat_inc_compr_inode(inode); + stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); return 0; } @@ -569,6 +597,17 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_crtime_nsec = cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); } + + if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_log_cluster_size)) { + ri->i_compr_blocks = + cpu_to_le64(F2FS_I(inode)->i_compr_blocks); + ri->i_compress_algorithm = + F2FS_I(inode)->i_compress_algorithm; + ri->i_log_cluster_size = + F2FS_I(inode)->i_log_cluster_size; + } } __set_inode_rdev(inode, ri); @@ -711,6 +750,8 @@ no_delete: stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + stat_dec_compr_inode(inode); + stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a8ef5aeafe44..22b031f70e75 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -119,6 +119,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + if (f2fs_sb_has_compression(sbi)) { + /* Inherit the compression flag in directory */ + if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) && + f2fs_may_compress(inode)) + set_compress_context(inode); + } + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -149,6 +156,9 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) size_t sublen = strlen(sub); int i; + if (sublen == 1 && *sub == '*') + return 1; + /* * filename format of multimedia file should be defined as: * "filename + '.' + extension + (optional: '.' + temp extension)". @@ -262,6 +272,45 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, return 0; } +static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; + unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + int i, cold_count, hot_count; + + if (!f2fs_sb_has_compression(sbi) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE) || + F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || + !f2fs_may_compress(inode)) + return; + + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + + for (i = cold_count; i < cold_count + hot_count; i++) { + if (is_extension_exist(name, extlist[i])) { + up_read(&sbi->sb_lock); + return; + } + } + + up_read(&sbi->sb_lock); + + ext = F2FS_OPTION(sbi).extensions; + + for (i = 0; i < ext_cnt; i++) { + if (!is_extension_exist(name, ext[i])) + continue; + + set_compress_context(inode); + return; + } +} + static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -286,6 +335,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) set_file_temperature(sbi, inode, dentry->d_name.name); + set_compress_inode(sbi, inode, dentry->d_name.name); + inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d3480c8d25a0..7633aade0e84 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2223,7 +2223,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) struct sit_info *sit_i = SIT_I(sbi); f2fs_bug_on(sbi, addr == NULL_ADDR); - if (addr == NEW_ADDR) + if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); @@ -3034,7 +3034,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; - if (is_cold_data(fio->page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode) || + f2fs_compressed_file(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index a95467b202ea..a1b3951367cd 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -200,18 +200,6 @@ struct segment_allocation { void (*allocate_segment)(struct f2fs_sb_info *, int, bool); }; -/* - * this value is set in page as a private data which indicate that - * the page is atomically written, and it is in inmem_pages list. - */ -#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) -#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) - -#define IS_ATOMIC_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) -#define IS_DUMMY_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) - #define MAX_SKIP_GC_COUNT 16 struct inmem_pages { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7d7d416b41cf..44afb10cbcb8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -141,6 +141,9 @@ enum { Opt_checkpoint_disable_cap, Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, + Opt_compress_algorithm, + Opt_compress_log_size, + Opt_compress_extension, Opt_err, }; @@ -203,6 +206,9 @@ static match_table_t f2fs_tokens = { {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, {Opt_checkpoint_enable, "checkpoint=enable"}, + {Opt_compress_algorithm, "compress_algorithm=%s"}, + {Opt_compress_log_size, "compress_log_size=%u"}, + {Opt_compress_extension, "compress_extension=%s"}, {Opt_err, NULL}, }; @@ -391,8 +397,9 @@ static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; char *p, *name; - int arg = 0; + int arg = 0, ext_cnt; kuid_t uid; kgid_t gid; #ifdef CONFIG_QUOTA @@ -810,6 +817,66 @@ static int parse_options(struct super_block *sb, char *options) case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; + case Opt_compress_algorithm: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature if off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 3 && !strcmp(name, "lzo")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZO; + } else if (strlen(name) == 3 && + !strcmp(name, "lz4")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZ4; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_compress_log_size: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg < MIN_COMPRESS_LOG_SIZE || + arg > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(sbi, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_OPTION(sbi).compress_log_size = arg; + break; + case Opt_compress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, + "invalid extension length/number"); + kfree(name); + return -EINVAL; + } + + strcpy(ext[ext_cnt], name); + F2FS_OPTION(sbi).compress_ext_cnt++; + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1133,6 +1200,8 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); @@ -1348,6 +1417,35 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #endif } +static inline void f2fs_show_compress_options(struct seq_file *seq, + struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + char *algtype = ""; + int i; + + if (!f2fs_sb_has_compression(sbi)) + return; + + switch (F2FS_OPTION(sbi).compress_algorithm) { + case COMPRESS_LZO: + algtype = "lzo"; + break; + case COMPRESS_LZ4: + algtype = "lz4"; + break; + } + seq_printf(seq, ",compress_algorithm=%s", algtype); + + seq_printf(seq, ",compress_log_size=%u", + F2FS_OPTION(sbi).compress_log_size); + + for (i = 0; i < F2FS_OPTION(sbi).compress_ext_cnt; i++) { + seq_printf(seq, ",compress_extension=%s", + F2FS_OPTION(sbi).extensions[i]); + } +} + static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); @@ -1470,6 +1568,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",fsync_mode=%s", "strict"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) seq_printf(seq, ",fsync_mode=%s", "nobarrier"); + + f2fs_show_compress_options(seq, sbi->sb); return 0; } @@ -1484,6 +1584,9 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; + F2FS_OPTION(sbi).compress_ext_cnt = 0; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -3419,6 +3522,12 @@ try_onemore: goto free_devices; } + err = f2fs_init_post_read_wq(sbi); + if (err) { + f2fs_err(sbi, "Failed to initialize post read workqueue"); + goto free_devices; + } + sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, @@ -3639,6 +3748,7 @@ free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); kvfree(sbi->ckpt); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e75c5921d750..74e1dc129ac2 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -154,6 +154,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_casefold(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); + if (f2fs_sb_has_compression(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "compression"); len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "pin_file"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -389,6 +392,7 @@ enum feat_id { FEAT_VERITY, FEAT_SB_CHECKSUM, FEAT_CASEFOLD, + FEAT_COMPRESSION, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -408,6 +412,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_VERITY: case FEAT_SB_CHECKSUM: case FEAT_CASEFOLD: + case FEAT_COMPRESSION: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -503,6 +508,7 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -572,6 +578,7 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), + ATTR_LIST(compression), NULL, }; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 284738996028..ac3f4888b3df 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -23,6 +23,7 @@ #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +#define COMPRESS_ADDR ((block_t)-2) /* used as compressed data flag */ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) @@ -271,6 +272,10 @@ struct f2fs_inode { __le32 i_inode_checksum;/* inode meta checksum */ __le64 i_crtime; /* creation time */ __le32 i_crtime_nsec; /* creation time in nano scale */ + __le64 i_compr_blocks; /* # of compressed blocks */ + __u8 i_compress_algorithm; /* compress algorithm */ + __u8 i_log_cluster_size; /* log of cluster size */ + __le16 i_padding; /* padding */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4c282573437d..b39e8e90b6ea 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -141,6 +141,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); __print_symbolic(type, \ { CP_NO_NEEDED, "no needed" }, \ { CP_NON_REGULAR, "non regular" }, \ + { CP_COMPRESSED, "compressed" }, \ { CP_HARDLINK, "hardlink" }, \ { CP_SB_NEED_CP, "sb needs cp" }, \ { CP_WRONG_PINO, "wrong pino" }, \ @@ -158,6 +159,11 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { F2FS_GOING_DOWN_METAFLUSH, "meta flush" }, \ { F2FS_GOING_DOWN_NEED_FSCK, "need fsck" }) +#define show_compress_algorithm(type) \ + __print_symbolic(type, \ + { COMPRESS_LZO, "LZO" }, \ + { COMPRESS_LZ4, "LZ4" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1720,6 +1726,100 @@ TRACE_EVENT(f2fs_shutdown, __entry->ret) ); +DECLARE_EVENT_CLASS(f2fs_zip_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, idx) + __field(unsigned int, size) + __field(unsigned int, algtype) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->idx = cluster_idx; + __entry->size = cluster_size; + __entry->algtype = algtype; + ), + + TP_printk("dev = (%d,%d), ino = %lu, cluster_idx:%lu, " + "cluster_size = %u, algorithm = %s", + show_dev_ino(__entry), + __entry->idx, + __entry->size, + show_compress_algorithm(__entry->algtype)) +); + +DECLARE_EVENT_CLASS(f2fs_zip_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, idx) + __field(unsigned int, size) + __field(unsigned int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->idx = cluster_idx; + __entry->size = compressed_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, cluster_idx:%lu, " + "compressed_size = %u, ret = %d", + show_dev_ino(__entry), + __entry->idx, + __entry->size, + __entry->ret) +); + +DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype) +); + +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype) +); + +DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret) +); + +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From f61ddc1e8ed3c42787c7a00ed4413f2c2f202149 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 Dec 2019 17:20:05 -0800 Subject: [PATCH 2780/3715] f2fs: run fsck when getting bad inode during GC This is to avoid inifinite GC when trying to disable checkpoint. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bd94068647ff..1decb25002e2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1049,8 +1049,10 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; + } if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { From 3c72860b34bb48bc107bf246a932feeebaa34134 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 26 Oct 2018 15:10:55 -0700 Subject: [PATCH 2781/3715] mm: export add_swap_extent() Btrfs currently does not support swap files because swap's use of bmap does not work with copy-on-write and multiple devices. See 35054394c4b3 ("Btrfs: stop providing a bmap operation to avoid swapfile corruptions"). However, the swap code has a mechanism for the filesystem to manually add swap extents using add_swap_extent() from the ->swap_activate() aop. iomap has done this since 67482129cdab ("iomap: add a swapfile activation function"). Btrfs will do the same in a later patch, so export add_swap_extent(). Link: http://lkml.kernel.org/r/bb1208575e02829aae51b538709476964f97b1ea.1536704650.git.osandov@fb.com Signed-off-by: Omar Sandoval Reviewed-by: Andrew Morton Cc: David Sterba Cc: Johannes Weiner Cc: Nikolay Borisov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2f68455f7d55..d2bd49059c3a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2374,6 +2374,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, list_add_tail(&new_se->list, &sis->first_swap_extent.list); return 1; } +EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages From bcfd305ad4050102bd401e1a4f8db26d3863d06b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Dec 2019 18:44:56 +0800 Subject: [PATCH 2782/3715] f2fs: fix to add swap extent correctly As Youling reported in mailing list: https://www.linuxquestions.org/questions/linux-newbie-8/the-file-system-f2fs-is-broken-4175666043/ https://www.linux.org/threads/the-file-system-f2fs-is-broken.26490/ There is a test case can corrupt f2fs image: - dd if=/dev/zero of=/swapfile bs=1M count=4096 - chmod 600 /swapfile - mkswap /swapfile - swapon --discard /swapfile The root cause is f2fs_swap_activate() intends to return zero value to setup_swap_extents() to enable SWP_FS mode (swap file goes through fs), in this flow, setup_swap_extents() setups swap extent with wrong block address range, result in discard_swap() erasing incorrect address. Because f2fs_swap_activate() has pinned swapfile, its data block address will not change, it's safe to let swap to handle IO through raw device, so we can get rid of SWAP_FS mode and initial swap extents inside f2fs_swap_activate(), by this way, later discard_swap() can trim in right address range. Fixes: 4969c06a0d83 ("f2fs: support swap file w/ DIO") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8ebaad834930..cd454876cf6e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3628,7 +3628,8 @@ int f2fs_migrate_page(struct address_space *mapping, #ifdef CONFIG_SWAP /* Copied from generic_swapfile_activate() to check any holes */ -static int check_swap_activate(struct file *swap_file, unsigned int max) +static int check_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; @@ -3639,6 +3640,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; + int nr_extents = 0; + int ret; blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; @@ -3650,7 +3653,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; - while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + while ((probe_block + blocks_per_page) <= last_block && + page_no < sis->max) { unsigned block_in_page; sector_t first_block; @@ -3690,13 +3694,27 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) highest_block = first_block; } + /* + * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks + */ + ret = add_swap_extent(sis, page_no, 1, first_block); + if (ret < 0) + goto out; + nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } - return 0; - + ret = nr_extents; + *span = 1 + highest_block - lowest_block; + if (page_no == 0) + page_no = 1; /* force Empty message */ + sis->max = page_no; + sis->pages = page_no - 1; + sis->highest_bit = page_no - 1; +out: + return ret; bad_bmap: pr_err("swapon: swapfile has holes\n"); return -EINVAL; @@ -3721,14 +3739,14 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (f2fs_disable_compressed_file(inode)) return -EINVAL; - ret = check_swap_activate(file, sis->max); - if (ret) + ret = check_swap_activate(sis, file, span); + if (ret < 0) return ret; set_inode_flag(inode, FI_PIN_FILE); f2fs_precache_extents(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return 0; + return ret; } static void f2fs_swap_deactivate(struct file *file) From 30f5d05f3c936560d1c00414206310d4436974f5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2019 17:41:41 +0800 Subject: [PATCH 2783/3715] f2fs: fix memleak of kobject If kobject_init_and_add() failed, caller needs to invoke kobject_put() to release kobject explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 74e1dc129ac2..e347f87eecb9 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -756,10 +756,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -780,8 +782,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); From a44ac57a0b370ba7f248d2334b31492ca05afbae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Dec 2019 19:03:05 -0800 Subject: [PATCH 2784/3715] f2fs: convert inline_dir early before starting rename If we hit an error during rename, we'll get two dentries in different directories. Chao adds to check the room in inline_dir which can avoid needless inversion. This should be done by inode_lock(&old_dir). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 14 ++++++++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/inline.c | 42 ++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/namei.c | 37 ++++++++++++++----------------------- 4 files changed, 71 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index dbb9f5a4d7a6..1373b46a7085 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -578,6 +578,20 @@ next: goto next; } +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname) +{ + struct f2fs_dentry_ptr d; + unsigned int bit_pos; + int slots = GET_DENTRY_SLOTS(fname_len(fname)); + + make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); + + bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); + + return bit_pos < d.max; +} + void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2725951b4e73..19c967c5b6a7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3106,6 +3106,8 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); @@ -3646,6 +3648,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0f8d0c171918..3b387675d05f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -530,7 +530,7 @@ recover: return err; } -static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, +static int do_convert_inline_dir(struct inode *dir, struct page *ipage, void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) @@ -539,6 +539,44 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); } +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct fscrypt_name fname; + void *inline_dentry = NULL; + int err = 0; + + if (!f2fs_has_inline_dentry(dir)) + return 0; + + f2fs_lock_op(sbi); + + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (err) + goto out; + + ipage = f2fs_get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_enough_room(dir, ipage, &fname)) { + f2fs_put_page(ipage, 1); + goto out; + } + + inline_dentry = inline_data_addr(dir, ipage); + + err = do_convert_inline_dir(dir, ipage, inline_dentry); + if (!err) + f2fs_put_page(ipage, 1); +out: + f2fs_unlock_op(sbi); + return err; +} + int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode) @@ -562,7 +600,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { - err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 22b031f70e75..efdab303d94a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -910,7 +910,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err; if (unlikely(f2fs_cp_error(sbi))) @@ -923,6 +922,20 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + /* + * If new_inode is null, the below renaming flow will + * add a link in old_dir which can conver inline_dir. + * After then, if we failed to get the entry due to other + * reasons like ENOMEM, we had to remove the new entry. + * Instead of adding such the error handling routine, let's + * simply convert first here. + */ + if (old_dir == new_dir && !new_inode) { + err = f2fs_try_convert_inline_dir(old_dir, new_dentry); + if (err) + return err; + } + if (flags & RENAME_WHITEOUT) { err = f2fs_create_whiteout(old_dir, &whiteout); if (err) @@ -1010,28 +1023,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir_entry) f2fs_i_links_write(new_dir, true); - - /* - * old entry and new entry can locate in the same inline - * dentry in inode, when attaching new entry in inline dentry, - * it could force inline dentry conversion, after that, - * old_entry and old_page will point to wrong address, in - * order to avoid this, let's do the check and update here. - */ - if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) { - f2fs_put_page(old_page, 0); - old_page = NULL; - - old_entry = f2fs_find_entry(old_dir, - &old_dentry->d_name, &old_page); - if (!old_entry) { - err = -ENOENT; - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); - f2fs_unlock_op(sbi); - goto out_dir; - } - } } down_write(&F2FS_I(old_inode)->i_sem); From 9daea8e96e2737a58fa2e61f7c09e4e582deb484 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 12:14:56 -0600 Subject: [PATCH 2785/3715] f2fs: remove unneeded check for error allocating bio_post_read_ctx Since allocating an object from a mempool never fails when __GFP_DIRECT_RECLAIM (which is included in GFP_NOFS) is set, the check for failure to allocate a bio_post_read_ctx is unnecessary. Remove it. Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cd454876cf6e..47b8c6b78cb2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -938,11 +938,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { + /* Due to the mempool, this never fails. */ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); - if (!ctx) { - bio_put(bio); - return ERR_PTR(-ENOMEM); - } ctx->bio = bio; ctx->sbi = sbi; ctx->enabled_steps = post_read_steps; From 79493bf183df834b219f678aceb7748224719644 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 12:14:16 -0600 Subject: [PATCH 2786/3715] f2fs: fix deadlock allocating bio_post_read_ctx from mempool Without any form of coordination, any case where multiple allocations from the same mempool are needed at a time to make forward progress can deadlock under memory pressure. This is the case for struct bio_post_read_ctx, as one can be allocated to decrypt a Merkle tree page during fsverity_verify_bio(), which itself is running from a post-read callback for a data bio which has its own struct bio_post_read_ctx. Fix this by freeing first bio_post_read_ctx before calling fsverity_verify_bio(). This works because verity (if enabled) is always the last post-read step. This deadlock can be reproduced by trying to read from an encrypted verity file after reducing NUM_PREALLOC_POST_READ_CTXS to 1 and patching mempool_alloc() to pretend that pool->alloc() always fails. Note that since NUM_PREALLOC_POST_READ_CTXS is actually 128, to actually hit this bug in practice would require reading from lots of encrypted verity files at the same time. But it's theoretically possible, as N available objects doesn't guarantee forward progress when > N/2 threads each need 2 objects at a time. Fixes: 95ae251fe828 ("f2fs: add fs-verity support") Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47b8c6b78cb2..2dfeeda3fea9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -205,19 +205,32 @@ static void f2fs_verity_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned int enabled_steps = ctx->enabled_steps; +#endif + + /* + * fsverity_verify_bio() may call readpages() again, and while verity + * will be disabled for this, decryption may still be needed, resulting + * in another bio_post_read_ctx being allocated. So to prevent + * deadlocks we need to release the current ctx to the mempool first. + * This assumes that verity is the last post-read step. + */ + mempool_free(ctx, bio_post_read_ctx_pool); + bio->bi_private = NULL; #ifdef CONFIG_F2FS_FS_COMPRESSION /* previous step is decompression */ - if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { - - f2fs_verify_bio(ctx->bio); - f2fs_release_read_bio(ctx->bio); + if (enabled_steps & (1 << STEP_DECOMPRESS)) { + f2fs_verify_bio(bio); + f2fs_release_read_bio(bio); return; } #endif - fsverity_verify_bio(ctx->bio); - __f2fs_read_end_io(ctx->bio, false, false); + fsverity_verify_bio(bio); + __f2fs_read_end_io(bio, false, false); } static void f2fs_post_read_work(struct work_struct *work) From 3b80b2b0b177b8663e3a4f340515f9a37756963d Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 3 Jan 2020 08:49:28 +0530 Subject: [PATCH 2787/3715] f2fs: show the CP_PAUSE reason in checkpoint traces Remove the duplicate CP_UMOUNT enum and add the new CP_PAUSE enum to show the checkpoint reason in the trace prints. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index b39e8e90b6ea..ed0d788495fc 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_SYNC); TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); +TRACE_DEFINE_ENUM(CP_PAUSE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -134,7 +135,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_SYNC, "Sync" }, \ { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }, \ - { CP_UMOUNT, "Umount" }, \ + { CP_PAUSE, "Pause" }, \ { CP_TRIMMED, "Trimmed" }) #define show_fsync_cpreason(type) \ From de0207330245697498b7f7addb312d587d1d09c8 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 4 Jan 2020 22:20:03 +0800 Subject: [PATCH 2788/3715] f2fs: fix miscounted block limit in f2fs_statfs_project() statfs calculates Total/Used/Avail disk space in block unit, so we should translate soft/hard prjquota limit to block unit as well. Below testing result shows the block/inode numbers of Total/Used/Avail from df command are all correct afer applying this patch. [root@localhost quota-tools]\# ./repquota -P /dev/sdb1 *** Report for project quotas on device /dev/sdb1 Block grace time: 7days; Inode grace time: 7days Block limits File limits Project used soft hard grace used soft hard grace ----------------------------------------------------------- \#0 -- 4 0 0 1 0 0 \#101 -- 0 0 0 2 0 0 \#102 -- 0 10240 0 2 10 0 \#103 -- 0 0 20480 2 0 20 \#104 -- 0 10240 20480 2 10 20 \#105 -- 0 20480 10240 2 20 10 [root@localhost sdb1]\# lsattr -p t{1,2,3,4,5} 101 ----------------N-- t1/a1 102 ----------------N-- t2/a2 103 ----------------N-- t3/a3 104 ----------------N-- t4/a4 105 ----------------N-- t5/a5 [root@localhost sdb1]\# df -hi t{1,2,3,4,5} Filesystem Inodes IUsed IFree IUse% Mounted on /dev/sdb1 2.4M 21 2.4M 1% /mnt/sdb1 /dev/sdb1 10 2 8 20% /mnt/sdb1 /dev/sdb1 20 2 18 10% /mnt/sdb1 /dev/sdb1 10 2 8 20% /mnt/sdb1 /dev/sdb1 10 2 8 20% /mnt/sdb1 [root@localhost sdb1]\# df -h t{1,2,3,4,5} Filesystem Size Used Avail Use% Mounted on /dev/sdb1 10G 489M 9.6G 5% /mnt/sdb1 /dev/sdb1 10M 0 10M 0% /mnt/sdb1 /dev/sdb1 20M 0 20M 0% /mnt/sdb1 /dev/sdb1 10M 0 10M 0% /mnt/sdb1 /dev/sdb1 10M 0 10M 0% /mnt/sdb1 Fixes: 909110c060f2 ("f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project()") Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 44afb10cbcb8..6c8198e02d75 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1296,6 +1296,8 @@ static int f2fs_statfs_project(struct super_block *sb, if (dquot->dq_dqb.dqb_bhardlimit && (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) limit = dquot->dq_dqb.dqb_bhardlimit; + if (limit) + limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; From 124544e89ca3b9b61786871dfafca1615a518c46 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 4 Jan 2020 22:20:04 +0800 Subject: [PATCH 2789/3715] f2fs: code cleanup for f2fs_statfs_project() Calling min_not_zero() to simplify complicated prjquota limit comparison in f2fs_statfs_project(). Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6c8198e02d75..3b82af81812b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1290,12 +1290,8 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = 0; - if (dquot->dq_dqb.dqb_bsoftlimit) - limit = dquot->dq_dqb.dqb_bsoftlimit; - if (dquot->dq_dqb.dqb_bhardlimit && - (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) - limit = dquot->dq_dqb.dqb_bhardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); if (limit) limit >>= sb->s_blocksize_bits; @@ -1307,12 +1303,8 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = 0; - if (dquot->dq_dqb.dqb_isoftlimit) - limit = dquot->dq_dqb.dqb_isoftlimit; - if (dquot->dq_dqb.dqb_ihardlimit && - (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) - limit = dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); if (limit && buf->f_files > limit) { buf->f_files = limit; From 75d1a8c497365d61e1d776e68a11d33652c2720c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Jan 2020 16:43:09 -0800 Subject: [PATCH 2790/3715] f2fs: add a way to turn off ipu bio cache Setting 0x40 in /sys/fs/f2fs/dev/ipu_policy gives a way to turn off bio cache, which is useufl to check whether block layer using hardware encryption engine merges IOs correctly. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 5 +++-- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index f211561b6e63..7f42dc504d6a 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -331,10 +331,11 @@ Files in /sys/fs/f2fs/ conducts. 32 sections is set by default. ipu_policy This parameter controls the policy of in-place - updates in f2fs. There are five policies: + updates in f2fs. User can set: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, - 0x10: F2FS_IPU_FSYNC. + 0x10: F2FS_IPU_FSYNC, 0x40: F2FS_IPU_NOCACHE. + Refer segment.h for details. min_ipu_util This parameter controls the threshold to trigger in-place-updates. The number indicates percentage diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7633aade0e84..03d3541cfd93 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3288,7 +3288,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); - if (fio->bio) + if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) err = f2fs_merge_page_bio(fio); else err = f2fs_submit_page_bio(fio); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index a1b3951367cd..5e6cd8d8411d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -608,6 +608,7 @@ static inline int utilization(struct f2fs_sb_info *sbi) * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash * storages. IPU will be triggered only if the # of dirty * pages over min_fsync_blocks. + * F2FS_IPU_NOCACHE - disable IPU bio cache. * F2FS_IPUT_DISABLE - disable IPU. (=default option) */ #define DEF_MIN_IPU_UTIL 70 @@ -623,6 +624,7 @@ enum { F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, + F2FS_IPU_NOCACHE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, From 52e959c6db2cce20288aa770ae87680eb22091d7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Jan 2020 15:10:02 -0800 Subject: [PATCH 2791/3715] f2fs: update f2fs document regarding to fsync_mode This patch adds missing fsync_mode entry in f2fs document. Fixes: 04485987f053 ("f2fs: introduce async IPU policy") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 ++- fs/f2fs/segment.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 7f42dc504d6a..36799a38e32d 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -334,7 +334,8 @@ Files in /sys/fs/f2fs/ updates in f2fs. User can set: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, - 0x10: F2FS_IPU_FSYNC, 0x40: F2FS_IPU_NOCACHE. + 0x10: F2FS_IPU_FSYNC, 0x20: F2FS_IPU_ASYNC, + 0x40: F2FS_IPU_NOCACHE. Refer segment.h for details. min_ipu_util This parameter controls the threshold to trigger diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5e6cd8d8411d..459dc3901a57 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -607,9 +607,10 @@ static inline int utilization(struct f2fs_sb_info *sbi) * threashold, * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash * storages. IPU will be triggered only if the # of dirty - * pages over min_fsync_blocks. + * pages over min_fsync_blocks. (=default option) + * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. - * F2FS_IPUT_DISABLE - disable IPU. (=default option) + * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 From a0574e03c824a7ae082756f11710463de0fe4579 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 14 Jan 2020 19:36:50 +0800 Subject: [PATCH 2792/3715] f2fs: change to use rwsem for gc_mutex Mutex lock won't serialize callers, in order to avoid starving of unlucky caller, let's use rwsem lock instead. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 ++++- fs/f2fs/file.c | 12 ++++++------ fs/f2fs/gc.c | 12 ++++++------ fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 16 ++++++++-------- 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 19c967c5b6a7..5dc10adb0ee1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1389,7 +1389,10 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct mutex gc_mutex; /* mutex for GC */ + struct rw_semaphore gc_lock; /* + * semaphore for GC, avoid + * race between GC and GC or CP + */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 788f5cf1561f..b5e87d0403e8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1638,7 +1638,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; @@ -2446,12 +2446,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); @@ -2489,12 +2489,12 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) do_more: if (!range.sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); @@ -2925,7 +2925,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1decb25002e2..05b3448dec52 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -78,18 +78,18 @@ static int gc_thread_func(void *data) */ if (sbi->gc_mode == GC_URGENT) { wait_ms = gc_th->urgent_sleep_time; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -1370,7 +1370,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1409,9 +1409,9 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); if (get_valid_blocks(sbi, segno, true)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 03d3541cfd93..dea5e265c8cc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -504,7 +504,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -2859,9 +2859,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); if (err) goto out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3b82af81812b..1ee49e33f480 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1246,9 +1246,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); } f2fs_trace_ios(NULL, 1); @@ -1629,7 +1629,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -1651,7 +1651,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -1663,7 +1663,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ return err; @@ -1671,12 +1671,12 @@ restore_flag: static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -3427,7 +3427,7 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - mutex_init(&sbi->gc_mutex); + init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); mutex_init(&sbi->resize_mutex); From 608333b10ba9d12be168dfee1b90e9b5590a8cc4 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Wed, 22 Jan 2020 10:49:20 -0800 Subject: [PATCH 2793/3715] f2fs: delete duplicate information on sysfs nodes This patch merges the sysfs node documentation present in Documentation/filesystems/f2fs.txt and Documentation/ABI/testing/sysfs-fs-f2fs and deletes the duplicate information from Documentation/filesystems/f2fs.txt. This is to prevent having to update both files when a new sysfs node is added for f2fs. The patch also makes minor formatting changes to Documentation/ABI/testing/sysfs-fs-f2fs. Signed-off-by: Hridya Valsaraju Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 231 ++++++++++++------------ Documentation/filesystems/f2fs.txt | 166 ----------------- 2 files changed, 119 insertions(+), 278 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 0cd608e3ed12..48c387d9fd87 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -1,37 +1,40 @@ What: /sys/fs/f2fs//gc_max_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the maximun sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the maximum sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_min_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the minimum sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the minimum sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_no_gc_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the default sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the default sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_idle Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the victim selection policy for garbage collection. +Description: Controls the victim selection policy for garbage collection. + Setting gc_idle = 0(default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach & setting + gc_idle = 2 will select the greedy approach. What: /sys/fs/f2fs//reclaim_segments Date: October 2013 Contact: "Jaegeuk Kim" -Description: - Controls the issue rate of segment discard commands. +Description: This parameter controls the number of prefree segments to be + reclaimed. If the number of prefree segments is larger than + the number of segments in the proportion to the percentage + over total volume size, f2fs tries to conduct checkpoint to + reclaim the prefree segments to free segments. + By default, 5% over total # of segments. -What: /sys/fs/f2fs//max_blkaddr +What: /sys/fs/f2fs//main_blkaddr Date: November 2019 Contact: "Ramon Pantin" Description: @@ -40,227 +43,231 @@ Description: What: /sys/fs/f2fs//ipu_policy Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the in-place-update policy. +Description: Controls the in-place-update policy. + updates in f2fs. User can set: + 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, + 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, + 0x10: F2FS_IPU_FSYNC, 0x20: F2FS_IPU_ASYNC, + 0x40: F2FS_IPU_NOCACHE. + Refer segment.h for details. What: /sys/fs/f2fs//min_ipu_util Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the FS utilization condition for the in-place-update - policies. +Description: Controls the FS utilization condition for the in-place-update + policies. It is used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. What: /sys/fs/f2fs//min_fsync_blocks Date: September 2014 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for the in-place-update - policies. +Description: Controls the dirty page count condition for the in-place-update + policies. What: /sys/fs/f2fs//min_seq_blocks Date: August 2018 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for batched sequential - writes in ->writepages. - +Description: Controls the dirty page count condition for batched sequential + writes in writepages. What: /sys/fs/f2fs//min_hot_blocks Date: March 2017 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for redefining hot data. +Description: Controls the dirty page count condition for redefining hot data. What: /sys/fs/f2fs//min_ssr_sections Date: October 2017 Contact: "Chao Yu" -Description: - Controls the fee section threshold to trigger SSR allocation. +Description: Controls the free section threshold to trigger SSR allocation. + If this is large, SSR mode will be enabled early. What: /sys/fs/f2fs//max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the issue rate of small discard commands. +Description: Controls the issue rate of discard commands that consist of small + blocks less than 2MB. The candidates to be discarded are cached until + checkpoint is triggered, and issued during the checkpoint. + By default, it is disabled with 0. -What: /sys/fs/f2fs//discard_granularity -Date: July 2017 -Contact: "Chao Yu" -Description: - Controls discard granularity of inner discard thread, inner thread +What: /sys/fs/f2fs//discard_granularity +Date: July 2017 +Contact: "Chao Yu" +Description: Controls discard granularity of inner discard thread. Inner thread will not issue discards with size that is smaller than granularity. - The unit size is one block, now only support configuring in range - of [1, 512]. + The unit size is one block(4KB), now only support configuring + in range of [1, 512]. Default value is 4(=16KB). -What: /sys/fs/f2fs//umount_discard_timeout -Date: January 2019 -Contact: "Jaegeuk Kim" -Description: - Set timeout to issue discard commands during umount. - Default: 5 secs +What: /sys/fs/f2fs//umount_discard_timeout +Date: January 2019 +Contact: "Jaegeuk Kim" +Description: Set timeout to issue discard commands during umount. + Default: 5 secs What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" -Description: - Controls the number of trials to find a victim segment. +Description: Controls the number of trials to find a victim segment + when conducting SSR and cleaning operations. The default value + is 4096 which covers 8GB block address range. What: /sys/fs/f2fs//migration_granularity Date: October 2018 Contact: "Chao Yu" -Description: - Controls migration granularity of garbage collection on large - section, it can let GC move partial segment{s} of one section - in one GC cycle, so that dispersing heavy overhead GC to - multiple lightweight one. +Description: Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. What: /sys/fs/f2fs//dir_level Date: March 2014 Contact: "Jaegeuk Kim" -Description: - Controls the directory level for large directory. +Description: Controls the directory level for large directory. If a + directory has a number of files, it can reduce the file lookup + latency by increasing this dir_level value. Otherwise, it + needs to decrease this value to reduce the space overhead. + The default value is 0. What: /sys/fs/f2fs//ram_thresh Date: March 2014 Contact: "Jaegeuk Kim" -Description: - Controls the memory footprint used by f2fs. +Description: Controls the memory footprint used by free nids and cached + nat entries. By default, 1 is set, which indicates + 10 MB / 1 GB RAM. What: /sys/fs/f2fs//batched_trim_sections Date: February 2015 Contact: "Jaegeuk Kim" -Description: - Controls the trimming rate in batch mode. - +Description: Controls the trimming rate in batch mode. + What: /sys/fs/f2fs//cp_interval Date: October 2015 Contact: "Jaegeuk Kim" -Description: - Controls the checkpoint timing. +Description: Controls the checkpoint timing, set to 60 seconds by default. What: /sys/fs/f2fs//idle_interval Date: January 2016 Contact: "Jaegeuk Kim" -Description: - Controls the idle timing for all paths other than - discard and gc path. +Description: Controls the idle timing of system, if there is no FS operation + during given interval. + Set to 5 seconds by default. What: /sys/fs/f2fs//discard_idle_interval Date: September 2018 Contact: "Chao Yu" Contact: "Sahitya Tummala" -Description: - Controls the idle timing for discard path. +Description: Controls the idle timing of discard thread given + this time interval. + Default is 5 secs. What: /sys/fs/f2fs//gc_idle_interval Date: September 2018 Contact: "Chao Yu" Contact: "Sahitya Tummala" -Description: - Controls the idle timing for gc path. +Description: Controls the idle timing for gc path. Set to 5 seconds by default. What: /sys/fs/f2fs//iostat_enable Date: August 2017 Contact: "Chao Yu" -Description: - Controls to enable/disable IO stat. +Description: Controls to enable/disable IO stat. What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" -Description: - Controls the count of nid pages to be readaheaded. +Description: Controls the count of nid pages to be readaheaded. + When building free nids, F2FS reads NAT blocks ahead for + speed up. Default is 0. What: /sys/fs/f2fs//dirty_nats_ratio Date: January 2016 Contact: "Chao Yu" -Description: - Controls dirty nat entries ratio threshold, if current - ratio exceeds configured threshold, checkpoint will - be triggered for flushing dirty nat entries. +Description: Controls dirty nat entries ratio threshold, if current + ratio exceeds configured threshold, checkpoint will + be triggered for flushing dirty nat entries. What: /sys/fs/f2fs//lifetime_write_kbytes Date: January 2016 Contact: "Shuoran Liu" -Description: - Shows total written kbytes issued to disk. +Description: Shows total written kbytes issued to disk. What: /sys/fs/f2fs//feature Date: July 2017 Contact: "Jaegeuk Kim" -Description: - Shows all enabled features in current device. +Description: Shows all enabled features in current device. What: /sys/fs/f2fs//inject_rate Date: May 2016 Contact: "Sheng Yong" -Description: - Controls the injection rate. +Description: Controls the injection rate of arbitrary faults. What: /sys/fs/f2fs//inject_type Date: May 2016 Contact: "Sheng Yong" -Description: - Controls the injection type. +Description: Controls the injection type of arbitrary faults. + +What: /sys/fs/f2fs//dirty_segments +Date: October 2017 +Contact: "Jaegeuk Kim" +Description: Shows the number of dirty segments. What: /sys/fs/f2fs//reserved_blocks Date: June 2017 Contact: "Chao Yu" -Description: - Controls target reserved blocks in system, the threshold - is soft, it could exceed current available user space. +Description: Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. What: /sys/fs/f2fs//current_reserved_blocks Date: October 2017 Contact: "Yunlong Song" Contact: "Chao Yu" -Description: - Shows current reserved blocks in system, it may be temporarily - smaller than target_reserved_blocks, but will gradually - increase to target_reserved_blocks when more free blocks are - freed by user later. +Description: Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs//gc_urgent Date: August 2017 Contact: "Jaegeuk Kim" -Description: - Do background GC agressively +Description: Do background GC agressively when set. When gc_urgent = 1, + background thread starts to do GC by given gc_urgent_sleep_time + interval. It is set to 0 by default. What: /sys/fs/f2fs//gc_urgent_sleep_time Date: August 2017 Contact: "Jaegeuk Kim" -Description: - Controls sleep time of GC urgent mode +Description: Controls sleep time of GC urgent mode. Set to 500ms by default. What: /sys/fs/f2fs//readdir_ra Date: November 2017 Contact: "Sheng Yong" -Description: - Controls readahead inode block in readdir. +Description: Controls readahead inode block in readdir. Enabled by default. + +What: /sys/fs/f2fs//gc_pin_file_thresh +Date: January 2018 +Contact: Jaegeuk Kim +Description: This indicates how many GC can be failed for the pinned + file. If it exceeds this, F2FS doesn't guarantee its pinning + state. 2048 trials is set by default. What: /sys/fs/f2fs//extension_list Date: Feburary 2018 Contact: "Chao Yu" -Description: - Used to control configure extension list: - - Query: cat /sys/fs/f2fs//extension_list - - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list - - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - - [h] means add/del hot file extension - - [c] means add/del cold file extension +Description: Used to control configure extension list: + - Query: cat /sys/fs/f2fs//extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension What: /sys/fs/f2fs//unusable Date April 2019 Contact: "Daniel Rosenberg" -Description: - If checkpoint=disable, it displays the number of blocks that are unusable. - If checkpoint=enable it displays the enumber of blocks that would be unusable - if checkpoint=disable were to be set. +Description: If checkpoint=disable, it displays the number of blocks that + are unusable. + If checkpoint=enable it displays the enumber of blocks that + would be unusable if checkpoint=disable were to be set. What: /sys/fs/f2fs//encoding Date July 2019 Contact: "Daniel Rosenberg" -Description: - Displays name and version of the encoding set for the filesystem. - If no encoding is set, displays (none) +Description: Displays name and version of the encoding set for the filesystem. + If no encoding is set, displays (none) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 36799a38e32d..b4de21867b96 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -270,172 +270,6 @@ The files in each per-device directory are shown in table below. Files in /sys/fs/f2fs/ (see also Documentation/ABI/testing/sysfs-fs-f2fs) -.............................................................................. - File Content - - gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. - 500 ms is set by default. See above gc_urgent. - - gc_min_sleep_time This tuning parameter controls the minimum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_max_sleep_time This tuning parameter controls the maximum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_no_gc_sleep_time This tuning parameter controls the default sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_idle This parameter controls the selection of victim - policy for garbage collection. Setting gc_idle = 0 - (default) will disable this option. Setting - gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy approach. - - gc_urgent This parameter controls triggering background GCs - urgently or not. Setting gc_urgent = 0 [default] - makes back to default behavior, while if it is set - to 1, background thread starts to do GC by given - gc_urgent_sleep_time interval. - - reclaim_segments This parameter controls the number of prefree - segments to be reclaimed. If the number of prefree - segments is larger than the number of segments - in the proportion to the percentage over total - volume size, f2fs tries to conduct checkpoint to - reclaim the prefree segments to free segments. - By default, 5% over total # of segments. - - main_blkaddr This value gives the first block address of - MAIN area in the partition. - - max_small_discards This parameter controls the number of discard - commands that consist small blocks less than 2MB. - The candidates to be discarded are cached until - checkpoint is triggered, and issued during the - checkpoint. By default, it is disabled with 0. - - discard_granularity This parameter controls the granularity of discard - command size. It will issue discard commands iif - the size is larger than given granularity. Its - unit size is 4KB, and 4 (=16KB) is set by default. - The maximum value is 128 (=512KB). - - reserved_blocks This parameter indicates the number of blocks that - f2fs reserves internally for root. - - batched_trim_sections This parameter controls the number of sections - to be trimmed out in batch mode when FITRIM - conducts. 32 sections is set by default. - - ipu_policy This parameter controls the policy of in-place - updates in f2fs. User can set: - 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, - 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, - 0x10: F2FS_IPU_FSYNC, 0x20: F2FS_IPU_ASYNC, - 0x40: F2FS_IPU_NOCACHE. - Refer segment.h for details. - - min_ipu_util This parameter controls the threshold to trigger - in-place-updates. The number indicates percentage - of the filesystem utilization, and used by - F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. - - min_fsync_blocks This parameter controls the threshold to trigger - in-place-updates when F2FS_IPU_FSYNC mode is set. - The number indicates the number of dirty pages - when fsync needs to flush on its call path. If - the number is less than this value, it triggers - in-place-updates. - - min_seq_blocks This parameter controls the threshold to serialize - write IOs issued by multiple threads in parallel. - - min_hot_blocks This parameter controls the threshold to allocate - a hot data log for pending data blocks to write. - - min_ssr_sections This parameter adds the threshold when deciding - SSR block allocation. If this is large, SSR mode - will be enabled early. - - ram_thresh This parameter controls the memory footprint used - by free nids and cached nat entries. By default, - 1 is set, which indicates 10 MB / 1 GB RAM. - - ra_nid_pages When building free nids, F2FS reads NAT blocks - ahead for speed up. Default is 0. - - dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS - determines flushing them in background. - - max_victim_search This parameter controls the number of trials to - find a victim segment when conducting SSR and - cleaning operations. The default value is 4096 - which covers 8GB block address range. - - migration_granularity For large-sized sections, F2FS can stop GC given - this granularity instead of reclaiming entire - section. - - dir_level This parameter controls the directory level to - support large directory. If a directory has a - number of files, it can reduce the file lookup - latency by increasing this dir_level value. - Otherwise, it needs to decrease this value to - reduce the space overhead. The default value is 0. - - cp_interval F2FS tries to do checkpoint periodically, 60 secs - by default. - - idle_interval F2FS detects system is idle, if there's no F2FS - operations during given interval, 5 secs by - default. - - discard_idle_interval F2FS detects the discard thread is idle, given - time interval. Default is 5 secs. - - gc_idle_interval F2FS detects the GC thread is idle, given time - interval. Default is 5 secs. - - umount_discard_timeout When unmounting the disk, F2FS waits for finishing - queued discard commands which can take huge time. - This gives time out for it, 5 secs by default. - - iostat_enable This controls to enable/disable iostat in F2FS. - - readdir_ra This enables/disabled readahead of inode blocks - in readdir, and default is enabled. - - gc_pin_file_thresh This indicates how many GC can be failed for the - pinned file. If it exceeds this, F2FS doesn't - guarantee its pinning state. 2048 trials is set - by default. - - extension_list This enables to change extension_list for hot/cold - files in runtime. - - inject_rate This controls injection rate of arbitrary faults. - - inject_type This controls injection type of arbitrary faults. - - dirty_segments This shows # of dirty segments. - - lifetime_write_kbytes This shows # of data written to the disk. - - features This shows current features enabled on F2FS. - - current_reserved_blocks This shows # of blocks currently reserved. - - unusable If checkpoint=disable, this shows the number of - blocks that are unusable. - If checkpoint=enable it shows the number of blocks - that would be unusable if checkpoint=disable were - to be set. - -encoding This shows the encoding used for casefolding. - If casefolding is not enabled, returns (none) ================================================================================ USAGE From 89df19dc0d40c3d583eab4fc1299ab721b276cbf Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Wed, 22 Jan 2020 10:51:16 -0800 Subject: [PATCH 2794/3715] f2fs: Add f2fs stats to sysfs Currently f2fs stats are only available from /d/f2fs/status. This patch adds some of the f2fs stats to sysfs so that they are accessible even when debugfs is not mounted. The following sysfs nodes are added: -/sys/fs/f2fs//free_segments -/sys/fs/f2fs//cp_foreground_calls -/sys/fs/f2fs//cp_background_calls -/sys/fs/f2fs//gc_foreground_calls -/sys/fs/f2fs//gc_background_calls -/sys/fs/f2fs//moved_blocks_foreground -/sys/fs/f2fs//moved_blocks_background -/sys/fs/f2fs//avg_vblocks Signed-off-by: Hridya Valsaraju [Jaegeuk Kim: allow STAT_FS without DEBUG_FS] Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 47 ++++++++ fs/f2fs/Kconfig | 2 +- fs/f2fs/debug.c | 79 ++++++++------ fs/f2fs/f2fs.h | 9 +- fs/f2fs/gc.c | 2 +- fs/f2fs/sysfs.c | 139 ++++++++++++++++++------ 6 files changed, 204 insertions(+), 74 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 48c387d9fd87..e0a8a2c2c0c1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -271,3 +271,50 @@ Date July 2019 Contact: "Daniel Rosenberg" Description: Displays name and version of the encoding set for the filesystem. If no encoding is set, displays (none) + +What: /sys/fs/f2fs//free_segments +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of free segments in disk. + +What: /sys/fs/f2fs//cp_foreground_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of checkpoint operations performed on demand. Available when + CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//cp_background_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of checkpoint operations performed in the background to + free segments. Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//gc_foreground_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of garbage collection operations performed on demand. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//gc_background_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of garbage collection operations triggered in background. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//moved_blocks_foreground +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of blocks moved by garbage collection in foreground. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//moved_blocks_background +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of blocks moved by garbage collection in background. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//avg_vblocks +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Average number of valid blocks. + Available when CONFIG_F2FS_STAT_FS=y. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index cf52194f37e5..9bb02d446d44 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -20,7 +20,7 @@ config F2FS_FS config F2FS_STAT_FS bool "F2FS Status Information" - depends on F2FS_FS && DEBUG_FS + depends on F2FS_FS default y help /sys/kernel/debug/f2fs/ contains information about all the partitions diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 28e202b76874..77a82f93b54c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,9 +21,45 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static struct dentry *f2fs_debugfs_root; static DEFINE_MUTEX(f2fs_stat_mutex); +#ifdef CONFIG_DEBUG_FS +static struct dentry *f2fs_debugfs_root; +#endif +/* + * This function calculates BDF of every segments + */ +void f2fs_update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = BLKS_PER_SEC(sbi); + hblks_per_sec = blks_per_sec / 2; + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, true); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div64_u64(bimodal, dist); + if (si->dirty_count) + si->avg_vblocks = div_u64(total_vblocks, ndirty); + else + si->avg_vblocks = 0; +} + +#ifdef CONFIG_DEBUG_FS static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -116,7 +152,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; - si->bg_gc = sbi->bg_gc; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; @@ -147,39 +182,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inplace_count = atomic_read(&sbi->inplace_count); } -/* - * This function calculates BDF of every segments - */ -static void update_sit_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; - unsigned long long bimodal, dist; - unsigned int segno, vblocks; - int ndirty = 0; - - bimodal = 0; - total_vblocks = 0; - blks_per_sec = BLKS_PER_SEC(sbi); - hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, true); - dist = abs(vblocks - hblks_per_sec); - bimodal += dist * dist; - - if (vblocks > 0 && vblocks < blks_per_sec) { - total_vblocks += vblocks; - ndirty++; - } - } - dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); - si->bimodal = div64_u64(bimodal, dist); - if (si->dirty_count) - si->avg_vblocks = div_u64(total_vblocks, ndirty); - else - si->avg_vblocks = 0; -} - /* * This function calculates memory footprint. */ @@ -445,7 +447,7 @@ static int stat_show(struct seq_file *s, void *v) si->block_count[LFS], si->segment_count[LFS]); /* segment usage info */ - update_sit_info(si->sbi); + f2fs_update_sit_info(si->sbi); seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", si->bimodal, si->avg_vblocks); @@ -476,6 +478,7 @@ static const struct file_operations stat_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif int f2fs_build_stats(struct f2fs_sb_info *sbi) { @@ -536,14 +539,18 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) void __init f2fs_create_root_stats(void) { +#ifdef CONFIG_DEBUG_FS f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); +#endif } void f2fs_destroy_root_stats(void) { +#ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; +#endif } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5dc10adb0ee1..720f2f44ed66 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1433,7 +1433,6 @@ struct f2fs_sb_info { atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ - int bg_gc; /* background gc calls */ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ @@ -3457,7 +3456,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_inc_bggc_count(si) ((si)->bg_gc++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -3581,6 +3580,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); +void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) @@ -3590,8 +3590,8 @@ void f2fs_destroy_root_stats(void); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sb) do { } while (0) -#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_total_hit(sbi) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) #define stat_inc_cached_node_hit(sbi) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) @@ -3623,6 +3623,7 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } +static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 05b3448dec52..7c28b794e438 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -99,7 +99,7 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi); + stat_inc_bggc_count(sbi->stat_info); /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e347f87eecb9..227d3db5c989 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -25,6 +25,9 @@ enum { DCC_INFO, /* struct discard_cmd_control */ NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_STAT_FS + STAT_INFO, /* struct f2fs_stat_info */ +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ @@ -42,6 +45,9 @@ struct f2fs_attr { int id; }; +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf); + static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) { if (struct_type == GC_THREAD) @@ -58,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) return (unsigned char *)&F2FS_OPTION(sbi).fault_info; +#endif +#ifdef CONFIG_F2FS_STAT_FS + else if (struct_type == STAT_INFO) + return (unsigned char *)F2FS_STAT(sbi); #endif return NULL; } @@ -65,35 +75,15 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) static ssize_t dirty_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(dirty_segments(sbi))); + return sprintf(buf, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); } -static ssize_t unusable_show(struct f2fs_attr *a, +static ssize_t free_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - block_t unusable; - - if (test_opt(sbi, DISABLE_CHECKPOINT)) - unusable = sbi->unusable_block_count; - else - unusable = f2fs_get_unusable_blocks(sbi); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)unusable); -} - -static ssize_t encoding_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ -#ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi)) - return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); -#endif - return snprintf(buf, PAGE_SIZE, "(none)"); + return sprintf(buf, "%llu\n", + (unsigned long long)(free_segments(sbi))); } static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, @@ -102,10 +92,10 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct super_block *sb = sbi->sb; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + + return sprintf(buf, "%llu\n", + (unsigned long long)(sbi->kbytes_written + BD_PART_WRITTEN(sbi))); } @@ -116,7 +106,7 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", @@ -166,9 +156,66 @@ static ssize_t features_show(struct f2fs_attr *a, static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); + return sprintf(buf, "%u\n", sbi->current_reserved_blocks); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)unusable); +} + +static ssize_t encoding_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ +#ifdef CONFIG_UNICODE + if (f2fs_sb_has_casefold(sbi)) + return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", + sbi->s_encoding->charset, + (sbi->s_encoding->version >> 16) & 0xff, + (sbi->s_encoding->version >> 8) & 0xff, + sbi->s_encoding->version & 0xff); +#endif + return sprintf(buf, "(none)"); +} + +#ifdef CONFIG_F2FS_STAT_FS +static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->tot_blks - + (si->bg_data_blks + si->bg_node_blks))); +} + +static ssize_t moved_blocks_background_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->bg_data_blks + si->bg_node_blks)); +} + +static ssize_t avg_vblocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + si->dirty_count = dirty_segments(sbi); + f2fs_update_sit_info(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); +} +#endif + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -202,7 +249,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, ui = (unsigned int *)(ptr + a->offset); - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); + return sprintf(buf, "%u\n", *ui); } static ssize_t __sbi_store(struct f2fs_attr *a, @@ -413,7 +460,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_SB_CHECKSUM: case FEAT_CASEFOLD: case FEAT_COMPRESSION: - return snprintf(buf, PAGE_SIZE, "supported\n"); + return sprintf(buf, "supported\n"); } return 0; } @@ -442,6 +489,14 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .id = _id, \ } +#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_sbi_show, \ + .struct_type = _struct_type, \ + .offset = offsetof(struct _struct_name, _elname), \ +} + F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, urgent_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); @@ -483,11 +538,21 @@ F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_GENERAL_RO_ATTR(dirty_segments); +F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +#ifdef CONFIG_F2FS_STAT_FS +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc); +F2FS_GENERAL_RO_ATTR(moved_blocks_background); +F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); +F2FS_GENERAL_RO_ATTR(avg_vblocks); +#endif #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -549,12 +614,22 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(free_segments), ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), +#ifdef CONFIG_F2FS_STAT_FS + ATTR_LIST(cp_foreground_calls), + ATTR_LIST(cp_background_calls), + ATTR_LIST(gc_foreground_calls), + ATTR_LIST(gc_background_calls), + ATTR_LIST(moved_blocks_foreground), + ATTR_LIST(moved_blocks_background), + ATTR_LIST(avg_vblocks), +#endif NULL, }; From 86c6acb2330a81e3032a0b3460ac9e7b707aa17c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 23 Jan 2020 20:15:48 -0800 Subject: [PATCH 2795/3715] f2fs: fix dcache lookup of !casefolded directories Do the name comparison for non-casefolded directories correctly. This is analogous to ext4's commit 66883da1eee8 ("ext4: fix dcache lookup of !casefolded directories"). Fixes: 2c2eb7a300cd ("f2fs: Support case-insensitive file name lookups") Cc: # v5.4+ Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1373b46a7085..9fc79cf30056 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1087,7 +1087,7 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { if (len != name->len) return -1; - return memcmp(str, name, len); + return memcmp(str, name->name, len); } return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); From 2945d197414d9732c680ea0b709735d3b0d8ea57 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 23 Jan 2020 20:15:49 -0800 Subject: [PATCH 2796/3715] f2fs: fix race conditions in ->d_compare() and ->d_hash() Since ->d_compare() and ->d_hash() can be called in RCU-walk mode, ->d_parent and ->d_inode can be concurrently modified, and in particular, ->d_inode may be changed to NULL. For f2fs_d_hash() this resulted in a reproducible NULL dereference if a lookup is done in a directory being deleted, e.g. with: int main() { if (fork()) { for (;;) { mkdir("subdir", 0700); rmdir("subdir"); } } else { for (;;) access("subdir/file", 0); } } ... or by running the 't_encrypted_d_revalidate' program from xfstests. Both repros work in any directory on a filesystem with the encoding feature, even if the directory doesn't actually have the casefold flag. I couldn't reproduce a crash in f2fs_d_compare(), but it appears that a similar crash is possible there. Fix these bugs by reading ->d_parent and ->d_inode using READ_ONCE() and falling back to the case sensitive behavior if the inode is NULL. Reported-by: Al Viro Fixes: 2c2eb7a300cd ("f2fs: Support case-insensitive file name lookups") Cc: # v5.4+ Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9fc79cf30056..1972638165fd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1083,24 +1083,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { + if (!inode || !IS_CASEFOLDED(inode)) { if (len != name->len) return -1; return memcmp(str, name->name, len); } - return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); + return f2fs_ci_compare(inode, name, &qstr, false); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode)) + if (!inode || !IS_CASEFOLDED(inode)) return 0; norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); From a637b674ce4beb5f9d34b68cb0eb3e009c1e8b88 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Tue, 28 Jan 2020 12:35:12 -0800 Subject: [PATCH 2797/3715] ANDROID: cuttlefish_defconfig: Enable CONFIG_BTT Without this, the virtio_pmem driver does not seem to work. Bug: 146400078 Change-Id: I2689aa4739b83a0a5a0e9930dc50b57e4f612525 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index d0a8c06670c1..7fd46d9c1629 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -422,7 +422,6 @@ CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_LIBNVDIMM=y # CONFIG_ND_BLK is not set -# CONFIG_BTT is not set CONFIG_ARM_SCPI_PROTOCOL=y # CONFIG_ARM_SCPI_POWER_DOMAIN is not set CONFIG_EXT4_FS=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 022325173f80..ca27bedba3bf 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -436,7 +436,6 @@ CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_LIBNVDIMM=y # CONFIG_ND_BLK is not set -# CONFIG_BTT is not set # CONFIG_FIRMWARE_MEMMAP is not set CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y From 318babce66a62d8bc8d5434909a044b89ab3a53e Mon Sep 17 00:00:00 2001 From: Eugene Zemtsov Date: Mon, 18 Nov 2019 20:21:06 -0800 Subject: [PATCH 2798/3715] ANDROID: Initial commit of Incremental FS Fully working incremental fs filesystem Signed-off-by: Eugene Zemtsov Signed-off-by: Paul Lawrence Bug: 133435829 Change-Id: I14741a61ce7891a0f9054e70f026917712cbef78 --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/incfs/Kconfig | 18 + fs/incfs/Makefile | 9 + fs/incfs/compat.h | 33 + fs/incfs/data_mgmt.c | 1142 ++++++++ fs/incfs/data_mgmt.h | 339 +++ fs/incfs/format.c | 696 +++++ fs/incfs/format.h | 349 +++ fs/incfs/integrity.c | 217 ++ fs/incfs/integrity.h | 72 + fs/incfs/internal.h | 21 + fs/incfs/main.c | 103 + fs/incfs/vfs.c | 2203 +++++++++++++++ fs/incfs/vfs.h | 13 + include/uapi/linux/incrementalfs.h | 244 ++ .../selftests/filesystems/incfs/Makefile | 16 + .../selftests/filesystems/incfs/config | 1 + .../selftests/filesystems/incfs/incfs_test.c | 2420 +++++++++++++++++ .../selftests/filesystems/incfs/utils.c | 377 +++ .../selftests/filesystems/incfs/utils.h | 59 + 21 files changed, 8334 insertions(+) create mode 100644 fs/incfs/Kconfig create mode 100644 fs/incfs/Makefile create mode 100644 fs/incfs/compat.h create mode 100644 fs/incfs/data_mgmt.c create mode 100644 fs/incfs/data_mgmt.h create mode 100644 fs/incfs/format.c create mode 100644 fs/incfs/format.h create mode 100644 fs/incfs/integrity.c create mode 100644 fs/incfs/integrity.h create mode 100644 fs/incfs/internal.h create mode 100644 fs/incfs/main.c create mode 100644 fs/incfs/vfs.c create mode 100644 fs/incfs/vfs.h create mode 100644 include/uapi/linux/incrementalfs.h create mode 100644 tools/testing/selftests/filesystems/incfs/Makefile create mode 100644 tools/testing/selftests/filesystems/incfs/config create mode 100644 tools/testing/selftests/filesystems/incfs/incfs_test.c create mode 100644 tools/testing/selftests/filesystems/incfs/utils.c create mode 100644 tools/testing/selftests/filesystems/incfs/utils.h diff --git a/fs/Kconfig b/fs/Kconfig index 6da435421744..1c1ed36c36c2 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -106,6 +106,7 @@ source "fs/quota/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" source "fs/overlayfs/Kconfig" +source "fs/incfs/Kconfig" menu "Caches" diff --git a/fs/Makefile b/fs/Makefile index 3204be31dad1..580e49be1219 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_ORANGEFS_FS) += orangefs/ +obj-$(CONFIG_INCREMENTAL_FS) += incfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig new file mode 100644 index 000000000000..d860c07664c3 --- /dev/null +++ b/fs/incfs/Kconfig @@ -0,0 +1,18 @@ +config INCREMENTAL_FS + tristate "Incremental file system support" + depends on BLOCK + select DECOMPRESS_LZ4 + select CRC32 + select CRYPTO + select CRYPTO_SHA256 + select X509_CERTIFICATE_PARSER + select ASYMMETRIC_KEY_TYPE + select ASYMMETRIC_PUBLIC_KEY_SUBTYPE + select PKCS7_MESSAGE_PARSER + help + Incremental FS is a read-only virtual file system that facilitates execution + of programs while their binaries are still being lazily downloaded over the + network, USB or pigeon post. + + To compile this file system support as a module, choose M here: the + module will be called incrementalfs. diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile new file mode 100644 index 000000000000..8d734bf91ecd --- /dev/null +++ b/fs/incfs/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INCREMENTAL_FS) += incrementalfs.o + +incrementalfs-y := \ + data_mgmt.o \ + format.o \ + integrity.o \ + main.o \ + vfs.o diff --git a/fs/incfs/compat.h b/fs/incfs/compat.h new file mode 100644 index 000000000000..f6fd9b2b3cb2 --- /dev/null +++ b/fs/incfs/compat.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_COMPAT_H +#define _INCFS_COMPAT_H + +#include +#include + +typedef unsigned int __poll_t; + +#ifndef u64_to_user_ptr +#define u64_to_user_ptr(x) ( \ +{ \ + typecheck(u64, x); \ + (void __user *)(uintptr_t)x; \ +} \ +) +#endif + +#ifndef lru_to_page +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#endif + +#define readahead_gfp_mask(x) \ + (mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN) + +#ifndef SB_ACTIVE +#define SB_ACTIVE MS_ACTIVE +#endif + +#endif /* _INCFS_COMPAT_H */ diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c new file mode 100644 index 000000000000..25ea1099946d --- /dev/null +++ b/fs/incfs/data_mgmt.c @@ -0,0 +1,1142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path) +{ + struct mount_info *mi = NULL; + int error = 0; + + mi = kzalloc(sizeof(*mi), GFP_NOFS); + if (!mi) + return ERR_PTR(-ENOMEM); + + mi->mi_sb = sb; + mi->mi_options = *options; + mi->mi_backing_dir_path = *backing_dir_path; + mi->mi_owner = get_current_cred(); + path_get(&mi->mi_backing_dir_path); + mutex_init(&mi->mi_dir_struct_mutex); + mutex_init(&mi->mi_pending_reads_mutex); + init_waitqueue_head(&mi->mi_pending_reads_notif_wq); + INIT_LIST_HEAD(&mi->mi_reads_list_head); + + if (options->read_log_pages != 0) { + size_t buf_size = PAGE_SIZE * options->read_log_pages; + + spin_lock_init(&mi->mi_log.rl_writer_lock); + init_waitqueue_head(&mi->mi_log.ml_notif_wq); + + mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); + mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); + if (!mi->mi_log.rl_ring_buf) { + error = -ENOMEM; + goto err; + } + } + + return mi; + +err: + incfs_free_mount_info(mi); + return ERR_PTR(error); +} + +void incfs_free_mount_info(struct mount_info *mi) +{ + if (!mi) + return; + + dput(mi->mi_index_dir); + path_put(&mi->mi_backing_dir_path); + mutex_destroy(&mi->mi_dir_struct_mutex); + mutex_destroy(&mi->mi_pending_reads_mutex); + put_cred(mi->mi_owner); + kfree(mi->mi_log.rl_ring_buf); + kfree(mi); +} + +static void data_file_segment_init(struct data_file_segment *segment) +{ + init_waitqueue_head(&segment->new_data_arrival_wq); + mutex_init(&segment->blockmap_mutex); + INIT_LIST_HEAD(&segment->reads_list_head); +} + +static void data_file_segment_destroy(struct data_file_segment *segment) +{ + mutex_destroy(&segment->blockmap_mutex); +} + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) +{ + struct data_file *df = NULL; + struct backing_file_context *bfc = NULL; + int md_records; + u64 size; + int error = 0; + int i; + + if (!bf || !mi) + return ERR_PTR(-EFAULT); + + if (!S_ISREG(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + bfc = incfs_alloc_bfc(bf); + if (IS_ERR(bfc)) + return ERR_CAST(bfc); + + df = kzalloc(sizeof(*df), GFP_NOFS); + if (!df) { + error = -ENOMEM; + goto out; + } + + df->df_backing_file_context = bfc; + df->df_mount_info = mi; + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_init(&df->df_segments[i]); + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + error = incfs_read_file_header(bfc, &df->df_metadata_off, + &df->df_id, &size); + mutex_unlock(&bfc->bc_mutex); + + if (error) + goto out; + + df->df_size = size; + if (size > 0) + df->df_block_count = get_blocks_count_for_size(size); + + md_records = incfs_scan_metadata_chain(df); + if (md_records < 0) + error = md_records; + +out: + if (error) { + incfs_free_bfc(bfc); + df->df_backing_file_context = NULL; + incfs_free_data_file(df); + return ERR_PTR(error); + } + return df; +} + +void incfs_free_data_file(struct data_file *df) +{ + int i; + + if (!df) + return; + + incfs_free_mtree(df->df_hash_tree); + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_destroy(&df->df_segments[i]); + incfs_free_bfc(df->df_backing_file_context); + kfree(df); +} + +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file) +{ + struct inode_info *node = get_incfs_node(inode); + struct data_file *df = NULL; + int err = 0; + + inode_lock(inode); + if (S_ISREG(inode->i_mode)) { + if (!node->n_file) { + df = incfs_open_data_file(mi, backing_file); + + if (IS_ERR(df)) + err = PTR_ERR(df); + else + node->n_file = df; + } + } else + err = -EBADF; + inode_unlock(inode); + return err; +} + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) +{ + struct dir_file *dir = NULL; + + if (!S_ISDIR(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + dir = kzalloc(sizeof(*dir), GFP_NOFS); + if (!dir) + return ERR_PTR(-ENOMEM); + + dir->backing_dir = get_file(bf); + dir->mount_info = mi; + return dir; +} + +void incfs_free_dir_file(struct dir_file *dir) +{ + if (!dir) + return; + if (dir->backing_dir) + fput(dir->backing_dir); + kfree(dir); +} + +static ssize_t decompress(struct mem_range src, struct mem_range dst) +{ + int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len); + + if (result < 0) + return -EBADMSG; + + return result; +} + +static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, + int block_index, bool timed_out) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state state; + s64 now_us = ktime_to_us(ktime_get()); + struct read_log_record record = { + .file_id = *id, + .block_index = block_index, + .timed_out = timed_out, + .timestamp_us = now_us + }; + + if (log->rl_size == 0) + return; + + spin_lock(&log->rl_writer_lock); + state = READ_ONCE(log->rl_state); + log->rl_ring_buf[state.next_index] = record; + if (++state.next_index == log->rl_size) { + state.next_index = 0; + ++state.current_pass_no; + } + WRITE_ONCE(log->rl_state, state); + spin_unlock(&log->rl_writer_lock); + + wake_up_all(&log->ml_notif_wq); +} + +static int validate_hash_tree(struct file *bf, struct data_file *df, + int block_index, struct mem_range data, u8 *buf) +{ + u8 digest[INCFS_MAX_HASH_SIZE] = {}; + struct mtree *tree = NULL; + struct ondisk_signature *sig = NULL; + struct mem_range calc_digest_rng; + struct mem_range saved_digest_rng; + struct mem_range root_hash_rng; + int digest_size; + int hash_block_index = block_index; + int hash_per_block; + int lvl = 0; + int res; + + tree = df->df_hash_tree; + sig = df->df_signature; + if (!tree || !sig) + return 0; + + digest_size = tree->alg->digest_size; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + calc_digest_rng = range(digest, digest_size); + res = incfs_calc_digest(tree->alg, data, calc_digest_rng); + if (res) + return res; + + for (lvl = 0; lvl < tree->depth; lvl++) { + loff_t lvl_off = tree->hash_level_suboffset[lvl] + + sig->mtree_offset; + loff_t hash_block_off = lvl_off + + round_down(hash_block_index * digest_size, + INCFS_DATA_FILE_BLOCK_SIZE); + size_t hash_off_in_block = hash_block_index * digest_size + % INCFS_DATA_FILE_BLOCK_SIZE; + struct mem_range buf_range = range(buf, + INCFS_DATA_FILE_BLOCK_SIZE); + ssize_t read_res = incfs_kread(bf, buf, + INCFS_DATA_FILE_BLOCK_SIZE, hash_block_off); + + if (read_res < 0) + return read_res; + if (read_res != INCFS_DATA_FILE_BLOCK_SIZE) + return -EIO; + + saved_digest_rng = range(buf + hash_off_in_block, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, saved_digest_rng)) { + int i; + bool zero = true; + + pr_debug("incfs: Hash mismatch lvl:%d blk:%d\n", + lvl, block_index); + for (i = 0; i < saved_digest_rng.len; ++i) + if (saved_digest_rng.data[i]) { + zero = false; + break; + } + + if (zero) + pr_debug("incfs: Note saved_digest all zero - did you forget to load the hashes?\n"); + return -EBADMSG; + } + + res = incfs_calc_digest(tree->alg, buf_range, calc_digest_rng); + if (res) + return res; + hash_block_index /= hash_per_block; + } + + root_hash_rng = range(tree->root_hash, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, root_hash_rng)) { + pr_debug("incfs: Root hash mismatch blk:%d\n", block_index); + return -EBADMSG; + } + return 0; +} + +static int revalidate_signature(struct file *bf, struct data_file *df) +{ + struct ondisk_signature *sig = df->df_signature; + struct mem_range root_hash = {}; + int result = 0; + u8 *sig_buf = NULL; + u8 *add_data_buf = NULL; + ssize_t read_res; + + /* File has no signature. */ + if (!sig || !df->df_hash_tree || sig->sig_size == 0) + return 0; + + /* Signature has already been validated. */ + if (df->df_signature_validated) + return 0; + + add_data_buf = kzalloc(sig->add_data_size, GFP_NOFS); + if (!add_data_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, add_data_buf, sig->add_data_size, + sig->add_data_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->add_data_size) { + result = -EIO; + goto out; + } + + sig_buf = kzalloc(sig->sig_size, GFP_NOFS); + if (!sig_buf) { + result = -ENOMEM; + goto out; + } + + read_res = incfs_kread(bf, sig_buf, sig->sig_size, sig->sig_offset); + if (read_res < 0) { + result = read_res; + goto out; + } + if (read_res != sig->sig_size) { + result = -EIO; + goto out; + } + + root_hash = range(df->df_hash_tree->root_hash, + df->df_hash_tree->alg->digest_size); + + result = incfs_validate_pkcs7_signature( + range(sig_buf, sig->sig_size), + root_hash, + range(add_data_buf, sig->add_data_size)); + + if (result == 0) + df->df_signature_validated = true; +out: + kfree(sig_buf); + kfree(add_data_buf); + return result; +} + +static struct data_file_segment *get_file_segment(struct data_file *df, + int block_index) +{ + int seg_idx = block_index % ARRAY_SIZE(df->df_segments); + + return &df->df_segments[seg_idx]; +} + +static bool is_data_block_present(struct data_file_block *block) +{ + return (block->db_backing_file_data_offset != 0) && + (block->db_stored_size != 0); +} + +static int get_data_file_block(struct data_file *df, int index, + struct data_file_block *res_block) +{ + struct incfs_blockmap_entry bme = {}; + struct backing_file_context *bfc = NULL; + loff_t blockmap_off = 0; + u16 flags = 0; + int error = 0; + + if (!df || !res_block) + return -EFAULT; + + blockmap_off = df->df_blockmap_off; + bfc = df->df_backing_file_context; + + if (index < 0 || index >= df->df_block_count || blockmap_off == 0) + return -EINVAL; + + error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); + if (error) + return error; + + flags = le16_to_cpu(bme.me_flags); + res_block->db_backing_file_data_offset = + le16_to_cpu(bme.me_data_offset_hi); + res_block->db_backing_file_data_offset <<= 32; + res_block->db_backing_file_data_offset |= + le32_to_cpu(bme.me_data_offset_lo); + res_block->db_stored_size = le16_to_cpu(bme.me_data_size); + res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? + COMPRESSION_LZ4 : + COMPRESSION_NONE; + return 0; +} + +static bool is_read_done(struct pending_read *read) +{ + return atomic_read_acquire(&read->done) != 0; +} + +static void set_read_done(struct pending_read *read) +{ + atomic_set_release(&read->done, 1); +} + +/* + * Notifies a given data file about pending read from a given block. + * Returns a new pending read entry. + */ +static struct pending_read *add_pending_read(struct data_file *df, + int block_index) +{ + struct pending_read *result = NULL; + struct data_file_segment *segment = NULL; + struct mount_info *mi = NULL; + + WARN_ON(!df); + segment = get_file_segment(df, block_index); + mi = df->df_mount_info; + + WARN_ON(!segment); + WARN_ON(!mi); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return NULL; + + result->file_id = df->df_id; + result->block_index = block_index; + result->timestamp_us = ktime_to_us(ktime_get()); + + mutex_lock(&mi->mi_pending_reads_mutex); + + result->serial_number = ++mi->mi_last_pending_read_number; + mi->mi_pending_reads_count++; + + list_add(&result->mi_reads_list, &mi->mi_reads_list_head); + list_add(&result->segment_reads_list, &segment->reads_list_head); + mutex_unlock(&mi->mi_pending_reads_mutex); + + wake_up_all(&mi->mi_pending_reads_notif_wq); + return result; +} + +/* Notifies a given data file that pending read is completed. */ +static void remove_pending_read(struct data_file *df, struct pending_read *read) +{ + struct mount_info *mi = NULL; + + if (!df || !read) { + WARN_ON(!df); + WARN_ON(!read); + return; + } + + mi = df->df_mount_info; + + mutex_lock(&mi->mi_pending_reads_mutex); + list_del(&read->mi_reads_list); + list_del(&read->segment_reads_list); + + mi->mi_pending_reads_count--; + mutex_unlock(&mi->mi_pending_reads_mutex); + + kfree(read); +} + +static void notify_pending_reads(struct mount_info *mi, + struct data_file_segment *segment, + int index) +{ + struct pending_read *entry = NULL; + + /* Notify pending reads waiting for this block. */ + mutex_lock(&mi->mi_pending_reads_mutex); + list_for_each_entry(entry, &segment->reads_list_head, + segment_reads_list) { + if (entry->block_index == index) + set_read_done(entry); + } + mutex_unlock(&mi->mi_pending_reads_mutex); + wake_up_all(&segment->new_data_arrival_wq); +} + +static int wait_for_data_block(struct data_file *df, int block_index, + int timeout_ms, + struct data_file_block *res_block) +{ + struct data_file_block block = {}; + struct data_file_segment *segment = NULL; + struct pending_read *read = NULL; + struct mount_info *mi = NULL; + int error = 0; + int wait_res = 0; + + if (!df || !res_block) + return -EFAULT; + + if (block_index < 0 || block_index >= df->df_block_count) + return -EINVAL; + + if (df->df_blockmap_off <= 0) + return -ENODATA; + + segment = get_file_segment(df, block_index); + WARN_ON(!segment); + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* Look up the given block */ + error = get_data_file_block(df, block_index, &block); + + /* If it's not found, create a pending read */ + if (!error && !is_data_block_present(&block) && timeout_ms != 0) + read = add_pending_read(df, block_index); + + mutex_unlock(&segment->blockmap_mutex); + if (error) + return error; + + /* If the block was found, just return it. No need to wait. */ + if (is_data_block_present(&block)) { + *res_block = block; + return 0; + } + + mi = df->df_mount_info; + + if (timeout_ms == 0) { + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + + if (!read) + return -ENOMEM; + + /* Wait for notifications about block's arrival */ + wait_res = + wait_event_interruptible_timeout(segment->new_data_arrival_wq, + (is_read_done(read)), + msecs_to_jiffies(timeout_ms)); + + /* Woke up, the pending read is no longer needed. */ + remove_pending_read(df, read); + read = NULL; + + if (wait_res == 0) { + /* Wait has timed out */ + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + if (wait_res < 0) { + /* + * Only ERESTARTSYS is really expected here when a signal + * comes while we wait. + */ + return wait_res; + } + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* + * Re-read block's info now, it has just arrived and + * should be available. + */ + error = get_data_file_block(df, block_index, &block); + if (!error) { + if (is_data_block_present(&block)) + *res_block = block; + else { + /* + * Somehow wait finished successfully bug block still + * can't be found. It's not normal. + */ + pr_warn("incfs:Wait succeeded, but block not found.\n"); + error = -ENODATA; + } + } + + mutex_unlock(&segment->blockmap_mutex); + return error; +} + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp) +{ + loff_t pos; + ssize_t result; + size_t bytes_to_read; + struct mount_info *mi = NULL; + struct file *bf = NULL; + struct data_file_block block = {}; + + if (!dst.data || !df) + return -EFAULT; + + if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) + return -ERANGE; + + mi = df->df_mount_info; + bf = df->df_backing_file_context->bc_file; + + result = wait_for_data_block(df, index, timeout_ms, &block); + if (result < 0) + goto out; + + pos = block.db_backing_file_data_offset; + if (block.db_comp_alg == COMPRESSION_NONE) { + bytes_to_read = min(dst.len, block.db_stored_size); + result = incfs_kread(bf, dst.data, bytes_to_read, pos); + + /* Some data was read, but not enough */ + if (result >= 0 && result != bytes_to_read) + result = -EIO; + } else { + bytes_to_read = min(tmp.len, block.db_stored_size); + result = incfs_kread(bf, tmp.data, bytes_to_read, pos); + if (result == bytes_to_read) { + result = + decompress(range(tmp.data, bytes_to_read), dst); + if (result < 0) { + const char *name = + bf->f_path.dentry->d_name.name; + + pr_warn_once("incfs: Decompression error. %s", + name); + } + } else if (result >= 0) { + /* Some data was read, but not enough */ + result = -EIO; + } + } + + if (result > 0) { + int err = validate_hash_tree(bf, df, index, dst, tmp.data); + + if (err < 0) + result = err; + } + + if (result > 0) { + int err = revalidate_signature(bf, df); + + if (err < 0) + result = err; + } + + if (result >= 0) + log_block_read(mi, &df->df_id, index, false /*timed out*/); + +out: + return result; +} + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data) +{ + struct mount_info *mi = NULL; + struct backing_file_context *bfc = NULL; + struct data_file_segment *segment = NULL; + struct data_file_block existing_block = {}; + u16 flags = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (block->block_index >= df->df_block_count) + return -ERANGE; + + segment = get_file_segment(df, block->block_index); + if (!segment) + return -EFAULT; + if (block->compression == COMPRESSION_LZ4) + flags |= INCFS_BLOCK_COMPRESSED_LZ4; + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + error = get_data_file_block(df, block->block_index, &existing_block); + if (error) + goto unlock; + if (is_data_block_present(&existing_block)) { + /* Block is already present, nothing to do here */ + goto unlock; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) { + error = incfs_write_data_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + df->df_blockmap_off, flags); + mutex_unlock(&bfc->bc_mutex); + } + if (!error) + notify_pending_reads(mi, segment, block->block_index); + +unlock: + mutex_unlock(&segment->blockmap_mutex); + if (error) + pr_debug("incfs: %s %d error: %d\n", __func__, + block->block_index, error); + return error; +} + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst) +{ + struct file *bf = df->df_backing_file_context->bc_file; + struct ondisk_signature *sig; + int read_res = 0; + + if (!dst.data) + return -EFAULT; + + sig = df->df_signature; + if (!sig) + return 0; + + if (dst.len < sig->sig_size) + return -E2BIG; + + read_res = incfs_kread(bf, dst.data, sig->sig_size, sig->sig_offset); + + if (read_res < 0) + return read_res; + + if (read_res != sig->sig_size) + return -EIO; + + return read_res; +} + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data) +{ + struct backing_file_context *bfc = NULL; + struct mount_info *mi = NULL; + struct mtree *hash_tree = NULL; + struct ondisk_signature *sig = NULL; + loff_t hash_area_base = 0; + loff_t hash_area_size = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + if (!(block->flags & INCFS_BLOCK_FLAGS_HASH)) + return -EINVAL; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (!df) + return -ENOENT; + + hash_tree = df->df_hash_tree; + sig = df->df_signature; + if (!hash_tree || !sig || sig->mtree_offset == 0) + return -ENOTSUPP; + + hash_area_base = sig->mtree_offset; + hash_area_size = sig->mtree_size; + if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + + block->data_len) { + /* Hash block goes beyond dedicated hash area of this file. */ + return -ERANGE; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) + error = incfs_write_hash_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + hash_area_base); + mutex_unlock(&bfc->bc_mutex); + return error; +} + +static int process_blockmap_md(struct incfs_blockmap *bm, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + int error = 0; + loff_t base_off = le64_to_cpu(bm->m_base_offset); + u32 block_count = le32_to_cpu(bm->m_block_count); + + if (!df) + return -EFAULT; + + if (df->df_block_count != block_count) + return -EBADMSG; + + df->df_blockmap_off = base_off; + return error; +} + +static int process_file_attr_md(struct incfs_file_attr *fa, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + u16 attr_size = le16_to_cpu(fa->fa_size); + + if (!df) + return -EFAULT; + + if (attr_size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset); + df->n_attr.fa_value_size = attr_size; + df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc); + + return 0; +} + +static int process_file_signature_md(struct incfs_file_signature *sg, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + struct mtree *hash_tree = NULL; + struct ondisk_signature *signature = NULL; + int error = 0; + loff_t base_tree_off = le64_to_cpu(sg->sg_hash_tree_offset); + u32 tree_size = le32_to_cpu(sg->sg_hash_tree_size); + loff_t sig_off = le64_to_cpu(sg->sg_sig_offset); + u32 sig_size = le32_to_cpu(sg->sg_sig_size); + loff_t add_data_off = le64_to_cpu(sg->sg_add_data_offset); + u32 add_data_size = le32_to_cpu(sg->sg_add_data_size); + + if (!df) + return -ENOENT; + + signature = kzalloc(sizeof(*signature), GFP_NOFS); + if (!signature) { + error = -ENOMEM; + goto out; + } + + signature->add_data_offset = add_data_off; + signature->add_data_size = add_data_size; + signature->sig_offset = sig_off; + signature->sig_size = sig_size; + signature->mtree_offset = base_tree_off; + signature->mtree_size = tree_size; + + hash_tree = incfs_alloc_mtree(sg->sg_hash_alg, df->df_block_count, + range(sg->sg_root_hash, sizeof(sg->sg_root_hash))); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + if (hash_tree->hash_tree_area_size != tree_size) { + error = -EINVAL; + goto out; + } + if (tree_size > 0 && handler->md_record_offset <= base_tree_off) { + error = -EINVAL; + goto out; + } + if (handler->md_record_offset <= signature->add_data_offset || + handler->md_record_offset <= signature->sig_offset) { + error = -EINVAL; + goto out; + } + df->df_hash_tree = hash_tree; + df->df_signature = signature; +out: + if (error) { + incfs_free_mtree(hash_tree); + kfree(signature); + } + + return error; +} + +int incfs_scan_metadata_chain(struct data_file *df) +{ + struct metadata_handler *handler = NULL; + int result = 0; + int records_count = 0; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!df || !df->df_backing_file_context) + return -EFAULT; + + bfc = df->df_backing_file_context; + + handler = kzalloc(sizeof(*handler), GFP_NOFS); + if (!handler) + return -ENOMEM; + + /* No writing to the backing file while it's being scanned. */ + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + + /* Reading superblock */ + handler->md_record_offset = df->df_metadata_off; + handler->context = df; + handler->handle_blockmap = process_blockmap_md; + handler->handle_file_attr = process_file_attr_md; + handler->handle_signature = process_file_signature_md; + + pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n", + handler->md_record_offset); + while (handler->md_record_offset > 0) { + error = incfs_read_next_metadata_record(bfc, handler); + if (error) { + pr_warn("incfs: Error during reading incfs-metadata record. Offset: %lld Record #%d Error code: %d\n", + handler->md_record_offset, records_count + 1, + -error); + break; + } + records_count++; + } + if (error) { + pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n", + -error, records_count); + result = error; + } else { + pr_debug("incfs: Finished reading %d incfs-metadata records.\n", + records_count); + result = records_count; + } + mutex_unlock(&bfc->bc_mutex); +out: + kfree(handler); + return result; +} + +/* + * Quickly checks if there are pending reads with a serial number larger + * than a given one. + */ +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) +{ + bool result = false; + + mutex_lock(&mi->mi_pending_reads_mutex); + result = (mi->mi_last_pending_read_number > last_number) && + (mi->mi_pending_reads_count > 0); + mutex_unlock(&mi->mi_pending_reads_mutex); + return result; +} + +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size) +{ + int reported_reads = 0; + struct pending_read *entry = NULL; + + if (!mi) + return -EFAULT; + + if (reads_size <= 0) + return 0; + + mutex_lock(&mi->mi_pending_reads_mutex); + + if (mi->mi_last_pending_read_number <= sn_lowerbound + || mi->mi_pending_reads_count == 0) + goto unlock; + + list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) { + if (entry->serial_number <= sn_lowerbound) + continue; + + reads[reported_reads].file_id = entry->file_id; + reads[reported_reads].block_index = entry->block_index; + reads[reported_reads].serial_number = entry->serial_number; + reads[reported_reads].timestamp_us = entry->timestamp_us; + /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */ + + reported_reads++; + if (reported_reads >= reads_size) + break; + } + +unlock: + mutex_unlock(&mi->mi_pending_reads_mutex); + + return reported_reads; +} + +struct read_log_state incfs_get_log_state(struct mount_info *mi) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state result; + + spin_lock(&log->rl_writer_lock); + result = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_writer_lock); + return result; +} + +static u64 calc_record_count(const struct read_log_state *state, int rl_size) +{ + return state->current_pass_no * (u64)rl_size + state->next_index; +} + +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state) +{ + struct read_log *log = &mi->mi_log; + + u64 count = calc_record_count(&log->rl_state, log->rl_size) - + calc_record_count(&state, log->rl_size); + return min_t(int, count, log->rl_size); +} + +static void fill_pending_read_from_log_record( + struct incfs_pending_read_info *dest, const struct read_log_record *src, + struct read_log_state *state, u64 log_size) +{ + dest->file_id = src->file_id; + dest->block_index = src->block_index; + dest->serial_number = + state->current_pass_no * log_size + state->next_index; + dest->timestamp_us = src->timestamp_us; +} + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *reader_state, + struct incfs_pending_read_info *reads, + int reads_size) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state live_state = incfs_get_log_state(mi); + u64 read_count = calc_record_count(reader_state, log->rl_size); + u64 written_count = calc_record_count(&live_state, log->rl_size); + int dst_idx; + + if (reader_state->next_index >= log->rl_size || + read_count > written_count) + return -ERANGE; + + if (read_count == written_count) + return 0; + + if (read_count > written_count) { + /* This reader is somehow ahead of the writer. */ + pr_debug("incfs: Log reader is ahead of writer\n"); + *reader_state = live_state; + } + + if (written_count - read_count > log->rl_size) { + /* + * Reading pointer is too far behind, + * start from the record following the write pointer. + */ + pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", + (u32)reader_state->next_index, + (u32)reader_state->current_pass_no, + (u32)live_state.next_index, + (u32)live_state.current_pass_no - 1, (u32)log->rl_size); + + *reader_state = (struct read_log_state){ + .next_index = live_state.next_index, + .current_pass_no = live_state.current_pass_no - 1, + }; + } + + for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { + if (reader_state->next_index == live_state.next_index && + reader_state->current_pass_no == live_state.current_pass_no) + break; + + fill_pending_read_from_log_record( + &reads[dst_idx], + &log->rl_ring_buf[reader_state->next_index], + reader_state, log->rl_size); + + reader_state->next_index++; + if (reader_state->next_index == log->rl_size) { + reader_state->next_index = 0; + reader_state->current_pass_no++; + } + } + return dst_idx; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) +{ + if (lhs.len != rhs.len) + return false; + return memcmp(lhs.data, rhs.data, lhs.len) == 0; +} diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h new file mode 100644 index 000000000000..82ccab3be4bb --- /dev/null +++ b/fs/incfs/data_mgmt.h @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_DATA_MGMT_H +#define _INCFS_DATA_MGMT_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "internal.h" + +#define SEGMENTS_PER_FILE 3 + +struct read_log_record { + u32 block_index : 31; + + u32 timed_out : 1; + + u64 timestamp_us; + + incfs_uuid_t file_id; +} __packed; + +struct read_log_state { + /* Next slot in rl_ring_buf to write to. */ + u32 next_index; + + /* Current number of writer pass over rl_ring_buf */ + u32 current_pass_no; +}; + +/* A ring buffer to save records about data blocks which were recently read. */ +struct read_log { + struct read_log_record *rl_ring_buf; + + struct read_log_state rl_state; + + spinlock_t rl_writer_lock; + + int rl_size; + + /* + * A queue of waiters who want to be notified about reads. + */ + wait_queue_head_t ml_notif_wq; +}; + +struct mount_options { + unsigned int read_timeout_ms; + unsigned int readahead_pages; + unsigned int read_log_pages; + unsigned int read_log_wakeup_count; + bool no_backing_file_cache; + bool no_backing_file_readahead; +}; + +struct mount_info { + struct super_block *mi_sb; + + struct path mi_backing_dir_path; + + struct dentry *mi_index_dir; + + const struct cred *mi_owner; + + struct mount_options mi_options; + + /* This mutex is to be taken before create, rename, delete */ + struct mutex mi_dir_struct_mutex; + + /* + * A queue of waiters who want to be notified about new pending reads. + */ + wait_queue_head_t mi_pending_reads_notif_wq; + + /* + * Protects: + * - reads_list_head + * - mi_pending_reads_count + * - mi_last_pending_read_number + * - data_file_segment.reads_list_head + */ + struct mutex mi_pending_reads_mutex; + + /* List of active pending_read objects */ + struct list_head mi_reads_list_head; + + /* Total number of items in reads_list_head */ + int mi_pending_reads_count; + + /* + * Last serial number that was assigned to a pending read. + * 0 means no pending reads have been seen yet. + */ + int mi_last_pending_read_number; + + /* Temporary buffer for read logger. */ + struct read_log mi_log; +}; + +struct data_file_block { + loff_t db_backing_file_data_offset; + + size_t db_stored_size; + + enum incfs_compression_alg db_comp_alg; +}; + +struct pending_read { + incfs_uuid_t file_id; + + s64 timestamp_us; + + atomic_t done; + + int block_index; + + int serial_number; + + struct list_head mi_reads_list; + + struct list_head segment_reads_list; +}; + +struct data_file_segment { + wait_queue_head_t new_data_arrival_wq; + + /* Protects reads and writes from the blockmap */ + /* Good candidate for read/write mutex */ + struct mutex blockmap_mutex; + + /* List of active pending_read objects belonging to this segment */ + /* Protected by mount_info.pending_reads_mutex */ + struct list_head reads_list_head; +}; + +/* + * Extra info associated with a file. Just a few bytes set by a user. + */ +struct file_attr { + loff_t fa_value_offset; + + size_t fa_value_size; + + u32 fa_crc; +}; + + +struct data_file { + struct backing_file_context *df_backing_file_context; + + struct mount_info *df_mount_info; + + incfs_uuid_t df_id; + + /* + * Array of segments used to reduce lock contention for the file. + * Segment is chosen for a block depends on the block's index. + */ + struct data_file_segment df_segments[SEGMENTS_PER_FILE]; + + /* Base offset of the first metadata record. */ + loff_t df_metadata_off; + + /* Base offset of the block map. */ + loff_t df_blockmap_off; + + /* File size in bytes */ + loff_t df_size; + + int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */ + + struct file_attr n_attr; + + struct mtree *df_hash_tree; + + struct ondisk_signature *df_signature; + + /* True, if file signature has already been validated. */ + bool df_signature_validated; +}; + +struct dir_file { + struct mount_info *mount_info; + + struct file *backing_dir; +}; + +struct inode_info { + struct mount_info *n_mount_info; /* A mount, this file belongs to */ + + struct inode *n_backing_inode; + + struct data_file *n_file; + + struct inode n_vfs_inode; +}; + +struct dentry_info { + struct path backing_path; +}; + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path); + +void incfs_free_mount_info(struct mount_info *mi); + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); +void incfs_free_data_file(struct data_file *df); + +int incfs_scan_metadata_chain(struct data_file *df); + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); +void incfs_free_dir_file(struct dir_file *dir); + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp); + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst); + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data); + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_new_data_block *block, u8 *data); + + +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); + +/* + * Collects pending reads and saves them into the array (reads/reads_size). + * Only reads with serial_number > sn_lowerbound are reported. + * Returns how many reads were saved into the array. + */ +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size); + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *start_state, + struct incfs_pending_read_info *reads, + int reads_size); +struct read_log_state incfs_get_log_state(struct mount_info *mi); +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state); + +static inline struct inode_info *get_incfs_node(struct inode *inode) +{ + if (!inode) + return NULL; + + if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { + /* This inode doesn't belong to us. */ + pr_warn_once("incfs: %s on an alien inode.", __func__); + return NULL; + } + + return container_of(inode, struct inode_info, n_vfs_inode); +} + +static inline struct data_file *get_incfs_data_file(struct file *f) +{ + struct inode_info *node = NULL; + + if (!f) + return NULL; + + if (!S_ISREG(f->f_inode->i_mode)) + return NULL; + + node = get_incfs_node(f->f_inode); + if (!node) + return NULL; + + return node->n_file; +} + +static inline struct dir_file *get_incfs_dir_file(struct file *f) +{ + if (!f) + return NULL; + + if (!S_ISDIR(f->f_inode->i_mode)) + return NULL; + + return (struct dir_file *)f->private_data; +} + +/* + * Make sure that inode_info.n_file is initialized and inode can be used + * for reading and writing data from/to the backing file. + */ +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file); + +static inline struct dentry_info *get_incfs_dentry(const struct dentry *d) +{ + if (!d) + return NULL; + + return (struct dentry_info *)d->d_fsdata; +} + +static inline void get_incfs_backing_path(const struct dentry *d, + struct path *path) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di) { + *path = (struct path) {}; + return; + } + + *path = di->backing_path; + path_get(path); +} + +static inline int get_blocks_count_for_size(u64 size) +{ + if (size == 0) + return 0; + return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); + +#endif /* _INCFS_DATA_MGMT_H */ diff --git a/fs/incfs/format.c b/fs/incfs/format.c new file mode 100644 index 000000000000..27498b9c3d34 --- /dev/null +++ b/fs/incfs/format.c @@ -0,0 +1,696 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat.h" +#include "format.h" + +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) +{ + struct backing_file_context *result = NULL; + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->bc_file = get_file(backing_file); + mutex_init(&result->bc_mutex); + return result; +} + +void incfs_free_bfc(struct backing_file_context *bfc) +{ + if (!bfc) + return; + + if (bfc->bc_file) + fput(bfc->bc_file); + + mutex_destroy(&bfc->bc_mutex); + kfree(bfc); +} + +loff_t incfs_get_end_offset(struct file *f) +{ + /* + * This function assumes that file size and the end-offset + * are the same. This is not always true. + */ + return i_size_read(file_inode(f)); +} + +/* + * Truncate the tail of the file to the given length. + * Used to rollback partially successful multistep writes. + */ +static int truncate_backing_file(struct backing_file_context *bfc, + loff_t new_end) +{ + struct inode *inode = NULL; + struct dentry *dentry = NULL; + loff_t old_end = 0; + struct iattr attr; + int result = 0; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (!bfc->bc_file) + return -EFAULT; + + old_end = incfs_get_end_offset(bfc->bc_file); + if (old_end == new_end) + return 0; + if (old_end < new_end) + return -EINVAL; + + inode = bfc->bc_file->f_inode; + dentry = bfc->bc_file->f_path.dentry; + + attr.ia_size = new_end; + attr.ia_valid = ATTR_SIZE; + + inode_lock(inode); + result = notify_change(dentry, &attr, NULL); + inode_unlock(inode); + + return result; +} + +/* Append a given number of zero bytes to the end of the backing file. */ +static int append_zeros(struct backing_file_context *bfc, size_t len) +{ + loff_t file_size = 0; + loff_t new_last_byte_offset = 0; + int res = 0; + + if (!bfc) + return -EFAULT; + + if (len == 0) + return 0; + + LOCK_REQUIRED(bfc->bc_mutex); + + /* + * Allocate only one byte at the new desired end of the file. + * It will increase file size and create a zeroed area of + * a given size. + */ + file_size = incfs_get_end_offset(bfc->bc_file); + new_last_byte_offset = file_size + len - 1; + res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (res) + return res; + + res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1); + return res; +} + +static int write_to_bf(struct backing_file_context *bfc, const void *buf, + size_t count, loff_t pos, bool sync) +{ + ssize_t res = 0; + + res = incfs_kwrite(bfc->bc_file, buf, count, pos); + if (res < 0) + return res; + if (res != count) + return -EIO; + + if (sync) + return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1); + + return 0; +} + +static u32 calc_md_crc(struct incfs_md_header *record) +{ + u32 result = 0; + __le32 saved_crc = record->h_record_crc; + __le64 saved_md_offset = record->h_next_md_offset; + size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size), + INCFS_MAX_METADATA_RECORD_SIZE); + + /* Zero fields which needs to be excluded from CRC calculation. */ + record->h_record_crc = 0; + record->h_next_md_offset = 0; + result = crc32(0, record, record_size); + + /* Restore excluded fields. */ + record->h_record_crc = saved_crc; + record->h_next_md_offset = saved_md_offset; + + return result; +} + +/* + * Append a given metadata record to the backing file and update a previous + * record to add the new record the the metadata list. + */ +static int append_md_to_backing_file(struct backing_file_context *bfc, + struct incfs_md_header *record) +{ + int result = 0; + loff_t record_offset; + loff_t file_pos; + __le64 new_md_offset; + size_t record_size; + + if (!bfc || !record) + return -EFAULT; + + if (bfc->bc_last_md_record_offset < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + record_size = le16_to_cpu(record->h_record_size); + file_pos = incfs_get_end_offset(bfc->bc_file); + record->h_prev_md_offset = bfc->bc_last_md_record_offset; + record->h_next_md_offset = 0; + record->h_record_crc = cpu_to_le32(calc_md_crc(record)); + + /* Write the metadata record to the end of the backing file */ + record_offset = file_pos; + new_md_offset = cpu_to_le64(record_offset); + result = write_to_bf(bfc, record, record_size, file_pos, true); + if (result) + return result; + + /* Update next metadata offset in a previous record or a superblock. */ + if (bfc->bc_last_md_record_offset) { + /* + * Find a place in the previous md record where new record's + * offset needs to be saved. + */ + file_pos = bfc->bc_last_md_record_offset + + offsetof(struct incfs_md_header, h_next_md_offset); + } else { + /* + * No metadata yet, file a place to update in the + * file_header. + */ + file_pos = offsetof(struct incfs_file_header, + fh_first_md_offset); + } + result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), + file_pos, true); + if (result) + return result; + + bfc->bc_last_md_record_offset = record_offset; + return result; +} + +/* + * Reserve 0-filled space for the blockmap body, and append + * incfs_blockmap metadata record pointing to it. + */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off) +{ + struct incfs_blockmap blockmap = {}; + int result = 0; + loff_t file_end = 0; + size_t map_size = block_count * sizeof(struct incfs_blockmap_entry); + + if (!bfc) + return -EFAULT; + + blockmap.m_header.h_md_entry_type = INCFS_MD_BLOCK_MAP; + blockmap.m_header.h_record_size = cpu_to_le16(sizeof(blockmap)); + blockmap.m_header.h_next_md_offset = cpu_to_le64(0); + blockmap.m_block_count = cpu_to_le32(block_count); + + LOCK_REQUIRED(bfc->bc_mutex); + + /* Reserve 0-filled space for the blockmap body in the backing file. */ + file_end = incfs_get_end_offset(bfc->bc_file); + result = append_zeros(bfc, map_size); + if (result) + return result; + + /* Write blockmap metadata record pointing to the body written above. */ + blockmap.m_base_offset = cpu_to_le64(file_end); + result = append_md_to_backing_file(bfc, &blockmap.m_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, file_end); + } else if (map_base_off) { + *map_base_off = file_end; + } + + return result; +} + +/* + * Write file attribute data and metadata record to the backing file. + */ +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr) +{ + struct incfs_file_attr file_attr = {}; + int result = 0; + u32 crc = 0; + loff_t value_offset = 0; + + if (!bfc) + return -EFAULT; + + if (value.len > INCFS_MAX_FILE_ATTR_SIZE) + return -ENOSPC; + + LOCK_REQUIRED(bfc->bc_mutex); + + crc = crc32(0, value.data, value.len); + value_offset = incfs_get_end_offset(bfc->bc_file); + file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR; + file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr)); + file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); + file_attr.fa_size = cpu_to_le16((u16)value.len); + file_attr.fa_offset = cpu_to_le64(value_offset); + file_attr.fa_crc = cpu_to_le64(crc); + + result = write_to_bf(bfc, value.data, value.len, value_offset, true); + if (result) + return result; + + result = append_md_to_backing_file(bfc, &file_attr.fa_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, value_offset); + } else if (attr) { + *attr = file_attr; + } + + return result; +} + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig) +{ + struct incfs_file_signature sg = {}; + int result = 0; + loff_t rollback_pos = 0; + loff_t tree_area_pos = 0; + size_t alignment = 0; + + if (!bfc) + return -EFAULT; + if (root_hash.len > sizeof(sg.sg_root_hash)) + return -E2BIG; + + LOCK_REQUIRED(bfc->bc_mutex); + + rollback_pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; + sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); + sg.sg_header.h_next_md_offset = cpu_to_le64(0); + sg.sg_hash_alg = hash_alg; + if (sig.data != NULL && sig.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_sig_size = cpu_to_le32(sig.len); + sg.sg_sig_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, sig.data, sig.len, pos, false); + if (result) + goto err; + } + + if (add_data.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_add_data_size = cpu_to_le32(add_data.len); + sg.sg_add_data_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, add_data.data, + add_data.len, pos, false); + if (result) + goto err; + } + + tree_area_pos = incfs_get_end_offset(bfc->bc_file); + if (hash_alg && tree_size > 0) { + if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { + /* + * If hash tree is big enough, it makes sense to + * align in the backing file for faster access. + */ + loff_t offset = round_up(tree_area_pos, PAGE_SIZE); + + alignment = offset - tree_area_pos; + tree_area_pos = offset; + } + + /* + * If root hash is not the only hash in the tree. + * reserve 0-filled space for the tree. + */ + result = append_zeros(bfc, tree_size + alignment); + if (result) + goto err; + + sg.sg_hash_tree_size = cpu_to_le32(tree_size); + sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); + } + memcpy(sg.sg_root_hash, root_hash.data, root_hash.len); + + /* Write a hash tree metadata record pointing to the hash tree above. */ + result = append_md_to_backing_file(bfc, &sg.sg_header); +err: + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, rollback_pos); + } + return result; +} + +/* + * Write a backing file header + * It should always be called only on empty file. + * incfs_super_block.s_first_md_offset is 0 for now, but will be updated + * once first metadata record is added. + */ +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + struct incfs_file_header fh = {}; + loff_t file_pos = 0; + + if (!bfc) + return -EFAULT; + + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); + fh.fh_header_size = cpu_to_le16(sizeof(fh)); + fh.fh_first_md_offset = cpu_to_le64(0); + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + + fh.fh_file_size = cpu_to_le64(file_size); + fh.fh_uuid = *uuid; + + LOCK_REQUIRED(bfc->bc_mutex); + + file_pos = incfs_get_end_offset(bfc->bc_file); + if (file_pos != 0) + return -EEXIST; + + return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true); +} + +/* Write a given data block and update file's blockmap to point it. */ +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, int block_index, + loff_t bm_base_off, u16 flags) +{ + struct incfs_blockmap_entry bm_entry = {}; + int result = 0; + loff_t data_offset = 0; + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * block_index; + + if (!bfc) + return -EFAULT; + + if (block.len >= (1 << 16) || block_index < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = incfs_get_end_offset(bfc->bc_file); + if (data_offset <= bm_entry_off) { + /* Blockmap entry is beyond the file's end. It is not normal. */ + return -EINVAL; + } + + /* Write the block data at the end of the backing file. */ + result = write_to_bf(bfc, block.data, block.len, data_offset, false); + if (result) + return result; + + /* Update the blockmap to point to the newly written data. */ + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); + bm_entry.me_data_size = cpu_to_le16((u16)block.len); + bm_entry.me_flags = cpu_to_le16(flags); + + result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry), + bm_entry_off, false); + return result; +} + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off) +{ + loff_t data_offset = 0; + loff_t file_end = 0; + + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = hash_area_off + block_index * INCFS_DATA_FILE_BLOCK_SIZE; + file_end = incfs_get_end_offset(bfc->bc_file); + if (data_offset + block.len > file_end) { + /* Block is located beyond the file's end. It is not normal. */ + return -EINVAL; + } + + return write_to_bf(bfc, block.data, block.len, data_offset, false); +} + +/* Initialize a new image in a given backing file. */ +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + int result = 0; + + if (!bfc || !bfc->bc_file) + return -EFAULT; + + result = mutex_lock_interruptible(&bfc->bc_mutex); + if (result) + goto out; + + result = truncate_backing_file(bfc, 0); + if (result) + goto out; + + result = incfs_write_fh_to_backing_file(bfc, uuid, file_size); +out: + mutex_unlock(&bfc->bc_mutex); + return result; +} + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry) +{ + return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, + bm_base_off); +} + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off) +{ + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * start_index; + const size_t bytes_to_read = sizeof(struct incfs_blockmap_entry) + * blocks_number; + int result = 0; + + if (!bfc || !entries) + return -EFAULT; + + if (start_index < 0 || bm_base_off <= 0) + return -ENODATA; + + result = incfs_kread(bfc->bc_file, entries, bytes_to_read, + bm_entry_off); + if (result < 0) + return result; + if (result < bytes_to_read) + return -EIO; + return 0; +} + + +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size) +{ + ssize_t bytes_read = 0; + struct incfs_file_header fh = {}; + + if (!bfc || !first_md_off) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0); + if (bytes_read < 0) + return bytes_read; + + if (bytes_read < sizeof(fh)) + return -EBADMSG; + + if (le64_to_cpu(fh.fh_magic) != INCFS_MAGIC_NUMBER) + return -EILSEQ; + + if (le64_to_cpu(fh.fh_version) > INCFS_FORMAT_CURRENT_VER) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_data_block_size) != INCFS_DATA_FILE_BLOCK_SIZE) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_header_size) != sizeof(fh)) + return -EILSEQ; + + if (first_md_off) + *first_md_off = le64_to_cpu(fh.fh_first_md_offset); + if (uuid) + *uuid = fh.fh_uuid; + if (file_size) + *file_size = le64_to_cpu(fh.fh_file_size); + return 0; +} + +/* + * Read through metadata records from the backing file one by one + * and call provided metadata handlers. + */ +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler) +{ + const ssize_t max_md_size = INCFS_MAX_METADATA_RECORD_SIZE; + ssize_t bytes_read = 0; + size_t md_record_size = 0; + loff_t next_record = 0; + loff_t prev_record = 0; + int res = 0; + struct incfs_md_header *md_hdr = NULL; + + if (!bfc || !handler) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (handler->md_record_offset == 0) + return -EPERM; + + memset(&handler->md_buffer, 0, max_md_size); + bytes_read = incfs_kread(bfc->bc_file, &handler->md_buffer, + max_md_size, handler->md_record_offset); + if (bytes_read < 0) + return bytes_read; + if (bytes_read < sizeof(*md_hdr)) + return -EBADMSG; + + md_hdr = &handler->md_buffer.md_header; + next_record = le64_to_cpu(md_hdr->h_next_md_offset); + prev_record = le64_to_cpu(md_hdr->h_prev_md_offset); + md_record_size = le16_to_cpu(md_hdr->h_record_size); + + if (md_record_size > max_md_size) { + pr_warn("incfs: The record is too large. Size: %ld", + md_record_size); + return -EBADMSG; + } + + if (bytes_read < md_record_size) { + pr_warn("incfs: The record hasn't been fully read."); + return -EBADMSG; + } + + if (next_record <= handler->md_record_offset && next_record != 0) { + pr_warn("incfs: Next record (%lld) points back in file.", + next_record); + return -EBADMSG; + } + + if (prev_record != handler->md_prev_record_offset) { + pr_warn("incfs: Metadata chain has been corrupted."); + return -EBADMSG; + } + + if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) { + pr_warn("incfs: Metadata CRC mismatch."); + return -EBADMSG; + } + + switch (md_hdr->h_md_entry_type) { + case INCFS_MD_NONE: + break; + case INCFS_MD_BLOCK_MAP: + if (handler->handle_blockmap) + res = handler->handle_blockmap( + &handler->md_buffer.blockmap, handler); + break; + case INCFS_MD_FILE_ATTR: + if (handler->handle_file_attr) + res = handler->handle_file_attr( + &handler->md_buffer.file_attr, handler); + break; + case INCFS_MD_SIGNATURE: + if (handler->handle_signature) + res = handler->handle_signature( + &handler->md_buffer.signature, handler); + break; + default: + res = -ENOTSUPP; + break; + } + + if (!res) { + if (next_record == 0) { + /* + * Zero offset for the next record means that the last + * metadata record has just been processed. + */ + bfc->bc_last_md_record_offset = + handler->md_record_offset; + } + handler->md_prev_record_offset = handler->md_record_offset; + handler->md_record_offset = next_record; + } + return res; +} + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + return kernel_read(f, pos, (char *)buf, size); +#else + return kernel_read(f, buf, size, &pos); +#endif +} + +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + return kernel_write(f, buf, size, pos); +#else + return kernel_write(f, buf, size, &pos); +#endif +} diff --git a/fs/incfs/format.h b/fs/incfs/format.h new file mode 100644 index 000000000000..a86881482e19 --- /dev/null +++ b/fs/incfs/format.h @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +/* + * Overview + * -------- + * The backbone of the incremental-fs ondisk format is an append only linked + * list of metadata blocks. Each metadata block contains an offset of the next + * one. These blocks describe files and directories on the + * file system. They also represent actions of adding and removing file names + * (hard links). + * + * Every time incremental-fs instance is mounted, it reads through this list + * to recreate filesystem's state in memory. An offset of the first record in + * the metadata list is stored in the superblock at the beginning of the backing + * file. + * + * Most of the backing file is taken by data areas and blockmaps. + * Since data blocks can be compressed and have different sizes, + * single per-file data area can't be pre-allocated. That's why blockmaps are + * needed in order to find a location and size of each data block in + * the backing file. Each time a file is created, a corresponding block map is + * allocated to store future offsets of data blocks. + * + * Whenever a data block is given by data loader to incremental-fs: + * - A data area with the given block is appended to the end of + * the backing file. + * - A record in the blockmap for the given block index is updated to reflect + * its location, size, and compression algorithm. + + * Metadata records + * ---------------- + * incfs_blockmap - metadata record that specifies size and location + * of a blockmap area for a given file. This area + * contains an array of incfs_blockmap_entry-s. + * incfs_file_signature - metadata record that specifies where file signature + * and its hash tree can be found in the backing file. + * + * incfs_file_attr - metadata record that specifies where additional file + * attributes blob can be found. + * + * Metadata header + * --------------- + * incfs_md_header - header of a metadata record. It's always a part + * of other structures and served purpose of metadata + * bookkeeping. + * + * +-----------------------------------------------+ ^ + * | incfs_md_header | | + * | 1. type of body(BLOCKMAP, FILE_ATTR..) | | + * | 2. size of the whole record header + body | | + * | 3. CRC the whole record header + body | | + * | 4. offset of the previous md record |]------+ + * | 5. offset of the next md record (md link) |]---+ + * +-----------------------------------------------+ | + * | Metadata record body with useful data | | + * +-----------------------------------------------+ | + * +---> + * + * Other ondisk structures + * ----------------------- + * incfs_super_block - backing file header + * incfs_blockmap_entry - a record in a blockmap area that describes size + * and location of a data block. + * Data blocks dont have any particular structure, they are written to the + * backing file in a raw form as they come from a data loader. + * + * Backing file layout + * ------------------- + * + * + * +-------------------------------------------+ + * | incfs_super_block |]---+ + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_signature |]---+ + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | metadata + * +------->| blockmap area | | list links + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | +--[| [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | +-------------------------------------------+ | + * | | ......................... | + * | | +-------------------------------------------+ | + * | | | metadata |<---+ + * +----|--[| incfs_blockmap |]---+ + * | +-------------------------------------------+ | + * | ......................... | + * | +-------------------------------------------+ | + * +-->| data block | | + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_attr | + * +-------------------------------------------+ + */ +#ifndef _INCFS_FORMAT_H +#define _INCFS_FORMAT_H +#include +#include +#include + +#include "internal.h" + +#define INCFS_MAX_NAME_LEN 255 +#define INCFS_FORMAT_V1 1 +#define INCFS_FORMAT_CURRENT_VER INCFS_FORMAT_V1 + +enum incfs_metadata_type { + INCFS_MD_NONE = 0, + INCFS_MD_BLOCK_MAP = 1, + INCFS_MD_FILE_ATTR = 2, + INCFS_MD_SIGNATURE = 3 +}; + +/* Header included at the beginning of all metadata records on the disk. */ +struct incfs_md_header { + __u8 h_md_entry_type; + + /* + * Size of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le16 h_record_size; + + /* + * CRC32 of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le32 h_record_crc; + + /* Offset of the next metadata entry if any */ + __le64 h_next_md_offset; + + /* Offset of the previous metadata entry if any */ + __le64 h_prev_md_offset; + +} __packed; + +/* Backing file header */ +struct incfs_file_header { + /* Magic number: INCFS_MAGIC_NUMBER */ + __le64 fh_magic; + + /* Format version: INCFS_FORMAT_CURRENT_VER */ + __le64 fh_version; + + /* sizeof(incfs_file_header) */ + __le16 fh_header_size; + + /* INCFS_DATA_FILE_BLOCK_SIZE */ + __le16 fh_data_block_size; + + /* Padding, also reserved for future use. */ + __le32 fh_dummy; + + /* Offset of the first metadata record */ + __le64 fh_first_md_offset; + + /* + * Put file specific information after this point + */ + + /* Full size of the file's content */ + __le64 fh_file_size; + + /* File uuid */ + incfs_uuid_t fh_uuid; +} __packed; + +enum incfs_block_map_entry_flags { + INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), +}; + +/* Block map entry pointing to an actual location of the data block. */ +struct incfs_blockmap_entry { + /* Offset of the actual data block. Lower 32 bits */ + __le32 me_data_offset_lo; + + /* Offset of the actual data block. Higher 16 bits */ + __le16 me_data_offset_hi; + + /* How many bytes the data actually occupies in the backing file */ + __le16 me_data_size; + + /* Block flags from incfs_block_map_entry_flags */ + __le16 me_flags; +} __packed; + +/* Metadata record for locations of file blocks. Type = INCFS_MD_BLOCK_MAP */ +struct incfs_blockmap { + struct incfs_md_header m_header; + + /* Base offset of the array of incfs_blockmap_entry */ + __le64 m_base_offset; + + /* Size of the map entry array in blocks */ + __le32 m_block_count; +} __packed; + +/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */ +struct incfs_file_attr { + struct incfs_md_header fa_header; + + __le64 fa_offset; + + __le16 fa_size; + + __le32 fa_crc; +} __packed; + +/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */ +struct incfs_file_signature { + struct incfs_md_header sg_header; + + __u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */ + + __le32 sg_hash_tree_size; /* The size of the hash tree. */ + + __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ + + __u8 sg_root_hash[INCFS_MAX_HASH_SIZE]; + + __le32 sg_sig_size; /* The size of the pkcs7 signature. */ + + __le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */ + + __le32 sg_add_data_size; /* The size of the additional data. */ + + __le64 sg_add_data_offset; /* Additional data's offset */ +} __packed; + +/* State of the backing file. */ +struct backing_file_context { + /* Protects writes to bc_file */ + struct mutex bc_mutex; + + /* File object to read data from */ + struct file *bc_file; + + /* + * Offset of the last known metadata record in the backing file. + * 0 means there are no metadata records. + */ + loff_t bc_last_md_record_offset; +}; + + +/* Backing file locations of things required for signature validation. */ +struct ondisk_signature { + + loff_t add_data_offset; /* Additional data's offset */ + + loff_t sig_offset; /* pkcs7 signature's offset in the backing file */ + + loff_t mtree_offset; /* Backing file offset of the hash tree. */ + + u32 add_data_size; /* The size of the additional data. */ + + u32 sig_size; /* The size of the pkcs7 signature. */ + + u32 mtree_size; /* The size of the hash tree. */ +}; + +struct metadata_handler { + loff_t md_record_offset; + loff_t md_prev_record_offset; + void *context; + + union { + struct incfs_md_header md_header; + struct incfs_blockmap blockmap; + struct incfs_file_attr file_attr; + struct incfs_file_signature signature; + } md_buffer; + + int (*handle_blockmap)(struct incfs_blockmap *bm, + struct metadata_handler *handler); + int (*handle_file_attr)(struct incfs_file_attr *fa, + struct metadata_handler *handler); + int (*handle_signature)(struct incfs_file_signature *sig, + struct metadata_handler *handler); +}; +#define INCFS_MAX_METADATA_RECORD_SIZE \ + FIELD_SIZEOF(struct metadata_handler, md_buffer) + +loff_t incfs_get_end_offset(struct file *f); + +/* Backing file context management */ +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file); + +void incfs_free_bfc(struct backing_file_context *bfc); + +/* Writing stuff */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off); + +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t bm_base_off, + u16 flags); + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off); + +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr); + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + u8 hash_alg, u32 tree_size, + struct mem_range root_hash, struct mem_range add_data, + struct mem_range sig); + +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +/* Reading stuff */ +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size); + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry); + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off); + +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler); + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos); +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos); + +#endif /* _INCFS_FORMAT_H */ diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c new file mode 100644 index 000000000000..c6444e73e4d8 --- /dev/null +++ b/fs/incfs/integrity.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include + +#include "integrity.h" + +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data) +{ + struct pkcs7_message *pkcs7 = NULL; + const void *data = NULL; + size_t data_len = 0; + const char *p; + int err; + + pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len); + if (IS_ERR(pkcs7)) { + pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n", + pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7)); + return PTR_ERR(pkcs7); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) + err = pkcs7_get_content_data(pkcs7, &data, &data_len, false); +#else + err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); +#endif + if (err || data_len == 0 || data == NULL) { + pr_debug("PKCS#7 message does not contain data\n"); + err = -EBADMSG; + goto out; + } + + if (root_hash.len == 0) { + pr_debug("Root hash is empty.\n"); + err = -EBADMSG; + goto out; + } + + if (data_len != root_hash.len + add_data.len) { + pr_debug("PKCS#7 data size doesn't match arguments.\n"); + err = -EKEYREJECTED; + goto out; + } + + p = data; + if (memcmp(p, root_hash.data, root_hash.len) != 0) { + pr_debug("Root hash mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + p += root_hash.len; + if (memcmp(p, add_data.data, add_data.len) != 0) { + pr_debug("Additional data mismatch.\n"); + err = -EKEYREJECTED; + goto out; + } + + err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE); + if (err) + pr_debug("PKCS#7 signature verification error: %d\n", -err); + + /* + * RSA signature verification sometimes returns unexpected error codes + * when signature doesn't match. + */ + if (err == -ERANGE || err == -EINVAL) + err = -EBADMSG; + +out: + pkcs7_free_message(pkcs7); + return err; +} + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) +{ + static struct incfs_hash_alg sha256 = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .id = INCFS_HASH_TREE_SHA256 + }; + struct incfs_hash_alg *result = NULL; + struct crypto_shash *shash; + + if (id == INCFS_HASH_TREE_SHA256) { + BUILD_BUG_ON(INCFS_MAX_HASH_SIZE < SHA256_DIGEST_SIZE); + result = &sha256; + } + + if (result == NULL) + return ERR_PTR(-ENOENT); + + /* pairs with cmpxchg_release() below */ + shash = smp_load_acquire(&result->shash); + if (shash) + return result; + + shash = crypto_alloc_shash(result->name, 0, 0); + if (IS_ERR(shash)) { + int err = PTR_ERR(shash); + + pr_err("Can't allocate hash alg %s, error code:%d", + result->name, err); + return ERR_PTR(err); + } + + /* pairs with smp_load_acquire() above */ + if (cmpxchg_release(&result->shash, NULL, shash) != NULL) + crypto_free_shash(shash); + + return result; +} + + +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash) +{ + struct mtree *result = NULL; + struct incfs_hash_alg *hash_alg = NULL; + int hash_per_block; + int lvl; + int total_blocks = 0; + int blocks_in_level[INCFS_MAX_MTREE_LEVELS]; + int blocks = data_block_count; + + if (data_block_count <= 0) + return ERR_PTR(-EINVAL); + + hash_alg = incfs_get_hash_alg(id); + if (IS_ERR(hash_alg)) + return ERR_PTR(PTR_ERR(hash_alg)); + + if (root_hash.len < hash_alg->digest_size) + return ERR_PTR(-EINVAL); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->alg = hash_alg; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / result->alg->digest_size; + + /* Calculating tree geometry. */ + /* First pass: calculate how many blocks in each tree level. */ + for (lvl = 0; blocks > 1; lvl++) { + if (lvl >= INCFS_MAX_MTREE_LEVELS) { + pr_err("incfs: too much data in mtree"); + goto err; + } + + blocks = (blocks + hash_per_block - 1) / hash_per_block; + blocks_in_level[lvl] = blocks; + total_blocks += blocks; + } + result->depth = lvl; + result->hash_tree_area_size = total_blocks * INCFS_DATA_FILE_BLOCK_SIZE; + if (result->hash_tree_area_size > INCFS_MAX_HASH_AREA_SIZE) + goto err; + + blocks = 0; + /* Second pass: calculate offset of each level. 0th level goes last. */ + for (lvl = 0; lvl < result->depth; lvl++) { + u32 suboffset; + + blocks += blocks_in_level[lvl]; + suboffset = (total_blocks - blocks) + * INCFS_DATA_FILE_BLOCK_SIZE; + + result->hash_level_suboffset[lvl] = suboffset; + } + + /* Root hash is stored separately from the rest of the tree. */ + memcpy(result->root_hash, root_hash.data, hash_alg->digest_size); + return result; + +err: + kfree(result); + return ERR_PTR(-E2BIG); +} + +void incfs_free_mtree(struct mtree *tree) +{ + kfree(tree); +} + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest) +{ + SHASH_DESC_ON_STACK(desc, alg->shash); + + if (!alg || !alg->shash || !data.data || !digest.data) + return -EFAULT; + + if (alg->digest_size > digest.len) + return -EINVAL; + + desc->tfm = alg->shash; + return crypto_shash_digest(desc, data.data, data.len, digest.data); +} + +void incfs_free_signature_info(struct signature_info *si) +{ + if (!si) + return; + kfree(si->root_hash.data); + kfree(si->additional_data.data); + kfree(si->signature.data); + kfree(si); +} + diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h new file mode 100644 index 000000000000..da1c38486b2f --- /dev/null +++ b/fs/incfs/integrity.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_INTEGRITY_H +#define _INCFS_INTEGRITY_H +#include +#include +#include + +#include + +#include "internal.h" + +#define INCFS_MAX_MTREE_LEVELS 8 +#define INCFS_MAX_HASH_AREA_SIZE (1280 * 1024 * 1024) + +struct incfs_hash_alg { + const char *name; + int digest_size; + enum incfs_hash_tree_algorithm id; + + struct crypto_shash *shash; +}; + +/* Merkle tree structure. */ +struct mtree { + struct incfs_hash_alg *alg; + + u8 root_hash[INCFS_MAX_HASH_SIZE]; + + /* Offset of each hash level in the hash area. */ + u32 hash_level_suboffset[INCFS_MAX_MTREE_LEVELS]; + + u32 hash_tree_area_size; + + /* Number of levels in hash_level_suboffset */ + int depth; +}; + +struct signature_info { + struct mem_range root_hash; + + struct mem_range additional_data; + + struct mem_range signature; + + enum incfs_hash_tree_algorithm hash_alg; +}; + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); + +struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, + int data_block_count, + struct mem_range root_hash); + +void incfs_free_mtree(struct mtree *tree); + +size_t incfs_get_mtree_depth(enum incfs_hash_tree_algorithm alg, loff_t size); + +size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, + loff_t size); + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest); + +int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, + struct mem_range root_hash, struct mem_range add_data); + +void incfs_free_signature_info(struct signature_info *si); + +#endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/internal.h b/fs/incfs/internal.h new file mode 100644 index 000000000000..0a85eaed41d3 --- /dev/null +++ b/fs/incfs/internal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ +#ifndef _INCFS_INTERNAL_H +#define _INCFS_INTERNAL_H +#include + +struct mem_range { + u8 *data; + size_t len; +}; + +static inline struct mem_range range(u8 *data, size_t len) +{ + return (struct mem_range){ .data = data, .len = len }; +} + +#define LOCK_REQUIRED(lock) WARN_ON_ONCE(!mutex_is_locked(&lock)) + +#endif /* _INCFS_INTERNAL_H */ diff --git a/fs/incfs/main.c b/fs/incfs/main.c new file mode 100644 index 000000000000..d9eec7496846 --- /dev/null +++ b/fs/incfs/main.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include + +#include + +#include "vfs.h" + +#define INCFS_NODE_FEATURES "features" + +struct file_system_type incfs_fs_type = { + .owner = THIS_MODULE, + .name = INCFS_NAME, + .mount = incfs_mount_fs, + .kill_sb = incfs_kill_sb, + .fs_flags = 0 +}; + +static struct kobject *sysfs_root, *featurefs_root; + +static ssize_t corefs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "supported\n"); +} + +static struct kobj_attribute corefs_attr = __ATTR_RO(corefs); + +static struct attribute *attributes[] = { + &corefs_attr.attr, + NULL, +}; + +static const struct attribute_group attr_group = { + .attrs = attributes, +}; + +static int __init init_sysfs(void) +{ + int res = 0; + + sysfs_root = kobject_create_and_add(INCFS_NAME, fs_kobj); + if (!sysfs_root) + return -ENOMEM; + + featurefs_root = kobject_create_and_add(INCFS_NODE_FEATURES, + sysfs_root); + if (!featurefs_root) + return -ENOMEM; + + res = sysfs_create_group(featurefs_root, &attr_group); + if (res) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } + return res; +} + +static void cleanup_sysfs(void) +{ + if (featurefs_root) { + sysfs_remove_group(featurefs_root, &attr_group); + kobject_put(featurefs_root); + featurefs_root = NULL; + } + + if (sysfs_root) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } +} + +static int __init init_incfs_module(void) +{ + int err = 0; + + err = init_sysfs(); + if (err) + return err; + + err = register_filesystem(&incfs_fs_type); + if (err) + cleanup_sysfs(); + + return err; +} + +static void __exit cleanup_incfs_module(void) +{ + cleanup_sysfs(); + unregister_filesystem(&incfs_fs_type); +} + +module_init(init_incfs_module); +module_exit(cleanup_incfs_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eugene Zemtsov "); +MODULE_DESCRIPTION("Incremental File System"); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c new file mode 100644 index 000000000000..41efd70af8e1 --- /dev/null +++ b/fs/incfs/vfs.c @@ -0,0 +1,2203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" +#include "internal.h" + +#define INCFS_PENDING_READS_INODE 2 +#define INCFS_LOG_INODE 3 +#define INCFS_START_INO_RANGE 10 +#define READ_FILE_MODE 0444 +#define READ_EXEC_FILE_MODE 0555 +#define READ_WRITE_FILE_MODE 0666 + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); + +static int dentry_revalidate(struct dentry *dentry, unsigned int flags); +static void dentry_release(struct dentry *d); + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx); +static struct dentry *dir_lookup(struct inode *dir_inode, + struct dentry *dentry, unsigned int flags); +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +static int dir_unlink(struct inode *dir, struct dentry *dentry); +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); +static int dir_rmdir(struct inode *dir, struct dentry *dentry); +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + +static int file_open(struct inode *inode, struct file *file); +static int file_release(struct inode *inode, struct file *file); +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset); +static int read_single_page(struct file *f, struct page *page); +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t pending_reads_poll(struct file *file, poll_table *wait); +static int pending_reads_open(struct inode *inode, struct file *file); +static int pending_reads_release(struct inode *, struct file *); + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t log_poll(struct file *file, poll_table *wait); +static int log_open(struct inode *inode, struct file *file); +static int log_release(struct inode *, struct file *); + +static struct inode *alloc_inode(struct super_block *sb); +static void free_inode(struct inode *inode); +static void evict_inode(struct inode *inode); + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size); +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); + +static int show_options(struct seq_file *, struct dentry *); + +static const struct super_operations incfs_super_ops = { + .statfs = simple_statfs, + .remount_fs = incfs_remount_fs, + .alloc_inode = alloc_inode, + .destroy_inode = free_inode, + .evict_inode = evict_inode, + .show_options = show_options +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) +#define dir_rename_wrap dir_rename +#else +static int dir_rename_wrap(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + return dir_rename(old_dir, old_dentry, new_dir, new_dentry); +} +#endif + +static const struct inode_operations incfs_dir_inode_ops = { + .lookup = dir_lookup, + .mkdir = dir_mkdir, + .rename = dir_rename_wrap, + .unlink = dir_unlink, + .link = dir_link, + .rmdir = dir_rmdir +}; + +static const struct file_operations incfs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = iterate_incfs_dir, + .open = file_open, + .release = file_release, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct dentry_operations incfs_dentry_ops = { + .d_revalidate = dentry_revalidate, + .d_release = dentry_release +}; + +static const struct address_space_operations incfs_address_space_ops = { + .readpage = read_single_page, + /* .readpages = readpages */ +}; + +static const struct file_operations incfs_file_ops = { + .open = file_open, + .release = file_release, + .write = file_write, + .read_iter = generic_file_read_iter, + .mmap = generic_file_mmap, + .splice_read = generic_file_splice_read, + .llseek = generic_file_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct file_operations incfs_pending_read_file_ops = { + .read = pending_reads_read, + .poll = pending_reads_poll, + .open = pending_reads_open, + .release = pending_reads_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct file_operations incfs_log_file_ops = { + .read = log_read, + .poll = log_poll, + .open = log_open, + .release = log_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0) + +static const struct inode_operations incfs_file_inode_ops = { + .setattr = simple_setattr, + .getattr = simple_getattr, + .getxattr = incfs_getxattr, + .listxattr = incfs_listxattr +}; + +#else + +static const struct inode_operations incfs_file_inode_ops = { + .setattr = simple_setattr, + .getattr = simple_getattr, + .listxattr = incfs_listxattr +}; + +static int incfs_handler_getxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return incfs_getxattr(d, name, buffer, size); +} + +static const struct xattr_handler incfs_xattr_handler = { + .prefix = "", /* AKA all attributes */ + .get = incfs_handler_getxattr, +}; + +const struct xattr_handler *incfs_xattr_ops[] = { + &incfs_xattr_handler, + NULL, +}; + + +#endif + +/* State of an open .pending_reads file, unique for each file descriptor. */ +struct pending_reads_state { + /* A serial number of the last pending read obtained from this file. */ + int last_pending_read_sn; +}; + +/* State of an open .log file, unique for each file descriptor. */ +struct log_file_state { + struct read_log_state state; +}; + +struct inode_search { + unsigned long ino; + + struct dentry *backing_dentry; +}; + +enum parse_parameter { + Opt_read_timeout, + Opt_readahead_pages, + Opt_no_backing_file_cache, + Opt_no_backing_file_readahead, + Opt_rlog_pages, + Opt_rlog_wakeup_cnt, + Opt_err +}; + +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; +static struct mem_range pending_reads_file_name_range = { + .data = (u8 *)pending_reads_file_name, + .len = ARRAY_SIZE(pending_reads_file_name) - 1 +}; + +static const char log_file_name[] = INCFS_LOG_FILENAME; +static struct mem_range log_file_name_range = { + .data = (u8 *)log_file_name, + .len = ARRAY_SIZE(log_file_name) - 1 +}; + +static const match_table_t option_tokens = { + { Opt_read_timeout, "read_timeout_ms=%u" }, + { Opt_readahead_pages, "readahead=%u" }, + { Opt_no_backing_file_cache, "no_bf_cache=%u" }, + { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, + { Opt_rlog_pages, "rlog_pages=%u" }, + { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, + { Opt_err, NULL } +}; + +static int parse_options(struct mount_options *opts, char *str) +{ + substring_t args[MAX_OPT_ARGS]; + int value; + char *position; + + if (opts == NULL) + return -EFAULT; + + opts->read_timeout_ms = 1000; /* Default: 1s */ + opts->readahead_pages = 10; + opts->read_log_pages = 2; + opts->read_log_wakeup_count = 10; + opts->no_backing_file_cache = false; + opts->no_backing_file_readahead = false; + if (str == NULL || *str == 0) + return 0; + + while ((position = strsep(&str, ",")) != NULL) { + int token; + + if (!*position) + continue; + + token = match_token(position, option_tokens, args); + + switch (token) { + case Opt_read_timeout: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_timeout_ms = value; + break; + case Opt_readahead_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->readahead_pages = value; + break; + case Opt_no_backing_file_cache: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_cache = (value != 0); + break; + case Opt_no_backing_file_readahead: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_readahead = (value != 0); + break; + case Opt_rlog_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_pages = value; + break; + case Opt_rlog_wakeup_cnt: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_wakeup_count = value; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static struct super_block *file_superblock(struct file *f) +{ + struct inode *inode = file_inode(f); + + return inode->i_sb; +} + +static struct mount_info *get_mount_info(struct super_block *sb) +{ + struct mount_info *result = sb->s_fs_info; + + WARN_ON(!result); + return result; +} + +/* Read file size from the attribute. Quicker than reading the header */ +static u64 read_size_attr(struct dentry *backing_dentry) +{ + __le64 attr_value; + ssize_t bytes_read; + + bytes_read = vfs_getxattr(backing_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&attr_value, sizeof(attr_value)); + + if (bytes_read != sizeof(attr_value)) + return 0; + + return le64_to_cpu(attr_value); +} + +static int inode_test(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (!node) + return 0; + + if (search->backing_dentry) { + struct inode *backing_inode = d_inode(search->backing_dentry); + + return (node->n_backing_inode == backing_inode) && + inode->i_ino == search->ino; + } + return 1; +} + +static int inode_set(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (search->backing_dentry) { + /* It's a regular inode that has corresponding backing inode */ + struct dentry *backing_dentry = search->backing_dentry; + struct inode *backing_inode = d_inode(backing_dentry); + + inode_init_owner(inode, NULL, backing_inode->i_mode); + fsstack_copy_attr_all(inode, backing_inode); + if (S_ISREG(inode->i_mode)) { + u64 size = read_size_attr(backing_dentry); + + inode->i_size = size; + inode->i_blocks = get_blocks_count_for_size(size); + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_file_ops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_size = 0; + inode->i_blocks = 1; + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_dir_inode_ops; + inode->i_fop = &incfs_dir_fops; + } else { + pr_warn_once("incfs: Unexpected inode type\n"); + return -EBADF; + } + + ihold(backing_inode); + node->n_backing_inode = backing_inode; + node->n_mount_info = get_mount_info(inode->i_sb); + inode->i_ctime = backing_inode->i_ctime; + inode->i_mtime = backing_inode->i_mtime; + inode->i_atime = backing_inode->i_atime; + inode->i_ino = backing_inode->i_ino; + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { + pr_warn("incfs: ino conflict with backing FS %ld\n", + backing_inode->i_ino); + } + return 0; + } else if (search->ino == INCFS_PENDING_READS_INODE) { + /* It's an inode for .pending_reads pseudo file. */ + + inode->i_ctime = (struct timespec){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_PENDING_READS_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_pending_read_file_ops; + + } else if (search->ino == INCFS_LOG_INODE) { + /* It's an inode for .log pseudo file. */ + + inode->i_ctime = (struct timespec){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_LOG_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_log_file_ops; + + } else { + /* Unknown inode requested. */ + return -EINVAL; + } + + return 0; +} + +static struct inode *fetch_regular_inode(struct super_block *sb, + struct dentry *backing_dentry) +{ + struct inode *backing_inode = d_inode(backing_dentry); + struct inode_search search = { + .ino = backing_inode->i_ino, + .backing_dentry = backing_dentry + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct pending_reads_state *pr_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = NULL; + size_t reads_to_collect = len / sizeof(*reads_buf); + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); + int new_max_sn = last_known_read_sn; + int reads_collected = 0; + ssize_t result = 0; + int i = 0; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) + return 0; + + reads_buf = (struct incfs_pending_read_info *)get_zeroed_page(GFP_NOFS); + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); + + reads_collected = incfs_collect_pending_reads( + mi, last_known_read_sn, reads_buf, reads_to_collect); + if (reads_collected < 0) { + result = reads_collected; + goto out; + } + + for (i = 0; i < reads_collected; i++) + if (reads_buf[i].serial_number > new_max_sn) + new_max_sn = reads_buf[i].serial_number; + + /* + * Just to make sure that we don't accidentally copy more data + * to reads buffer than userspace can handle. + */ + reads_collected = min_t(size_t, reads_collected, reads_to_collect); + result = reads_collected * sizeof(*reads_buf); + + /* Copy reads info to the userspace buffer */ + if (copy_to_user(buf, reads_buf, result)) { + result = -EFAULT; + goto out; + } + + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + + +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) +{ + struct pending_reads_state *state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + __poll_t ret = 0; + + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); + if (incfs_fresh_pending_reads_exist(mi, + state->last_pending_read_sn)) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int pending_reads_open(struct inode *inode, struct file *file) +{ + struct pending_reads_state *state = NULL; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + + file->private_data = state; + return 0; +} + +static int pending_reads_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static struct inode *fetch_pending_reads_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_PENDING_READS_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int log_open(struct inode *inode, struct file *file) +{ + struct log_file_state *log_state = NULL; + struct mount_info *mi = get_mount_info(file_superblock(file)); + + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); + if (!log_state) + return -ENOMEM; + + log_state->state = incfs_get_log_state(mi); + file->private_data = log_state; + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct log_file_state *log_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = + (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); + size_t reads_to_collect = len / sizeof(*reads_buf); + size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); + int total_reads_collected = 0; + ssize_t result = 0; + + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect); + while (reads_to_collect > 0) { + struct read_log_state next_state = READ_ONCE(log_state->state); + int reads_collected = incfs_collect_logged_reads( + mi, &next_state, reads_buf, + min_t(size_t, reads_to_collect, reads_per_page)); + if (reads_collected <= 0) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + reads_collected; + goto out; + } + if (copy_to_user(buf, reads_buf, + reads_collected * sizeof(*reads_buf))) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + -EFAULT; + goto out; + } + + WRITE_ONCE(log_state->state, next_state); + total_reads_collected += reads_collected; + buf += reads_collected * sizeof(*reads_buf); + reads_to_collect -= reads_collected; + } + + result = total_reads_collected * sizeof(*reads_buf); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + +static __poll_t log_poll(struct file *file, poll_table *wait) +{ + struct log_file_state *log_state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + int count; + __poll_t ret = 0; + + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); + count = incfs_get_uncollected_logs_count(mi, log_state->state); + if (count >= mi->mi_options.read_log_wakeup_count) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static struct inode *fetch_log_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_LOG_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) +{ + struct dir_file *dir = get_incfs_dir_file(file); + int error = 0; + struct mount_info *mi = get_mount_info(file_superblock(file)); + bool root; + + if (!dir) { + error = -EBADF; + goto out; + } + + root = dir->backing_dir->f_inode + == d_inode(mi->mi_backing_dir_path.dentry); + + if (root && ctx->pos == 0) { + if (!dir_emit(ctx, pending_reads_file_name, + ARRAY_SIZE(pending_reads_file_name) - 1, + INCFS_PENDING_READS_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + if (root && ctx->pos == 1) { + if (!dir_emit(ctx, log_file_name, + ARRAY_SIZE(log_file_name) - 1, + INCFS_LOG_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + ctx->pos -= 2; + error = iterate_dir(dir->backing_dir, ctx); + ctx->pos += 2; + file->f_pos = dir->backing_dir->f_pos; +out: + if (error) + pr_warn("incfs: %s %s %d\n", __func__, + file->f_path.dentry->d_name.name, error); + return error; +} + +static int incfs_init_dentry(struct dentry *dentry, struct path *path) +{ + struct dentry_info *d_info = NULL; + + if (!dentry || !path) + return -EFAULT; + + d_info = kzalloc(sizeof(*d_info), GFP_NOFS); + if (!d_info) + return -ENOMEM; + + d_info->backing_path = *path; + path_get(path); + + dentry->d_fsdata = d_info; + return 0; +} + +static struct dentry *incfs_lookup_dentry(struct dentry *parent, + const char *name) +{ + struct inode *inode; + struct dentry *result = NULL; + + if (!parent) + return ERR_PTR(-EFAULT); + + inode = d_inode(parent); + inode_lock_nested(inode, I_MUTEX_PARENT); + result = lookup_one_len(name, parent, strlen(name)); + inode_unlock(inode); + + if (IS_ERR(result)) + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); + + return result; +} + +static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) +{ + static const char name[] = ".index"; + struct dentry *index_dentry; + struct inode *backing_inode = d_inode(backing_dir); + int err = 0; + + index_dentry = incfs_lookup_dentry(backing_dir, name); + if (!index_dentry) { + return ERR_PTR(-EINVAL); + } else if (IS_ERR(index_dentry)) { + return index_dentry; + } else if (d_really_is_positive(index_dentry)) { + /* Index already exists. */ + return index_dentry; + } + + /* Index needs to be created. */ + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(backing_inode, index_dentry, 0777); + inode_unlock(backing_inode); + + if (err) + return ERR_PTR(err); + + if (!d_really_is_positive(index_dentry)) { + dput(index_dentry); + return ERR_PTR(-EINVAL); + } + + return index_dentry; +} + +static int read_single_page(struct file *f, struct page *page) +{ + loff_t offset = 0; + loff_t size = 0; + ssize_t bytes_to_read = 0; + ssize_t read_result = 0; + struct data_file *df = get_incfs_data_file(f); + int result = 0; + void *page_start = kmap(page); + int block_index; + int timeout_ms; + + if (!df) + return -EBADF; + + offset = page_offset(page); + block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; + size = df->df_size; + timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; + + pr_debug("incfs: %s %s %lld\n", __func__, + f->f_path.dentry->d_name.name, offset); + + if (offset < size) { + struct mem_range tmp = { + .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); + bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); + read_result = incfs_read_data_file_block( + range(page_start, bytes_to_read), df, block_index, + timeout_ms, tmp); + + free_pages((unsigned long)tmp.data, get_order(tmp.len)); + } else { + bytes_to_read = 0; + read_result = 0; + } + + if (read_result < 0) + result = read_result; + else if (read_result < PAGE_SIZE) + zero_user(page, read_result, PAGE_SIZE - read_result); + + if (result == 0) + SetPageUptodate(page); + else + SetPageError(page); + + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + return result; +} + +static char *file_id_to_str(incfs_uuid_t id) +{ + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); + char *end; + + if (!result) + return NULL; + + end = bin2hex(result, id.bytes, sizeof(id.bytes)); + *end = 0; + return result; +} + +static struct signature_info *incfs_copy_signature_info_from_user( + struct incfs_file_signature_info __user *original) +{ + struct incfs_file_signature_info usr_si; + struct signature_info *result; + int error; + + if (!original) + return NULL; + + if (!access_ok(VERIFY_READ, original, sizeof(usr_si))) + return ERR_PTR(-EFAULT); + + if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) + return ERR_PTR(-EFAULT); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->hash_alg = usr_si.hash_tree_alg; + + if (result->hash_alg) { + void *p = kzalloc(INCFS_MAX_HASH_SIZE, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + + // TODO this sets the root_hash length to MAX_HASH_SIZE not + // the actual size. Fix, then set INCFS_MAX_HASH_SIZE back + // to 64 + result->root_hash = range(p, INCFS_MAX_HASH_SIZE); + if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), + result->root_hash.len) > 0) { + error = -EFAULT; + goto err; + } + } + + if (usr_si.additional_data_size > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto err; + } + + if (usr_si.additional_data && usr_si.additional_data_size) { + void *p = kzalloc(usr_si.additional_data_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->additional_data = range(p, + usr_si.additional_data_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.additional_data), + result->additional_data.len) > 0) { + error = -EFAULT; + goto err; + } + } + + if (usr_si.signature_size > INCFS_MAX_SIGNATURE_SIZE) { + error = -E2BIG; + goto err; + } + + if (usr_si.signature && usr_si.signature_size) { + void *p = kzalloc(usr_si.signature_size, GFP_NOFS); + + if (!p) { + error = -ENOMEM; + goto err; + } + result->signature = range(p, usr_si.signature_size); + if (copy_from_user(p, u64_to_user_ptr(usr_si.signature), + result->signature.len) > 0) { + error = -EFAULT; + goto err; + } + } + + return result; + +err: + incfs_free_signature_info(result); + return ERR_PTR(-error); +} + +static int init_new_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + struct incfs_file_signature_info __user *fsi) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = 0; + u32 block_count; + struct mem_range mem_range = {0}; + struct signature_info *si = 0; + struct mtree *hash_tree = 0; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME, mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_fh_to_backing_file(bfc, uuid, size); + if (error) + goto out; + + block_count = (u32)get_blocks_count_for_size(size); + error = incfs_write_blockmap_to_backing_file(bfc, block_count, NULL); + if (error) + goto out; + + /* This fill has data, reserve space for the block map. */ + if (block_count > 0) { + error = incfs_write_blockmap_to_backing_file( + bfc, block_count, NULL); + if (error) + goto out; + } + + if (attr.data && attr.len) { + error = incfs_write_file_attr_to_backing_file(bfc, + attr, NULL); + if (error) + goto out; + } + + if (fsi) { + si = incfs_copy_signature_info_from_user(fsi); + + if (IS_ERR(si)) { + error = PTR_ERR(si); + si = NULL; + goto out; + } + + if (si->hash_alg) { + hash_tree = incfs_alloc_mtree(si->hash_alg, block_count, + si->root_hash); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + + // TODO This code seems wrong when len is zero - we + // should error out?? + if (si->signature.len > 0) + error = incfs_validate_pkcs7_signature( + si->signature, + si->root_hash, + si->additional_data); + if (error) + goto out; + + error = incfs_write_signature_to_backing_file(bfc, + si->hash_alg, + hash_tree->hash_tree_area_size, + si->root_hash, si->additional_data, + si->signature); + + if (error) + goto out; + } + } + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + incfs_free_mtree(hash_tree); + incfs_free_signature_info(si); + kfree(mem_range.data); + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static int incfs_link(struct dentry *what, struct dentry *where) +{ + struct dentry *parent_dentry = dget_parent(where); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_link(what, pinode, where, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_unlink(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_unlink(pinode, dentry, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_rmdir(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_rmdir(pinode, dentry); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int dir_relative_path_resolve( + struct mount_info *mi, + const char __user *relative_path, + struct path *result_path) +{ + struct path *base_path = &mi->mi_backing_dir_path; + int dir_fd = get_unused_fd_flags(0); + struct file *dir_f = NULL; + int error = 0; + + if (dir_fd < 0) + return dir_fd; + + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + + if (IS_ERR(dir_f)) { + error = PTR_ERR(dir_f); + goto out; + } + fd_install(dir_fd, dir_f); + + if (!relative_path) { + /* No relative path given, just return the base dir. */ + *result_path = *base_path; + path_get(result_path); + goto out; + } + + error = user_path_at_empty(dir_fd, relative_path, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + +out: + // TODO sys_close should be replaced with ksys_close on later kernel + // Add to compat or some such? + sys_close(dir_fd); + if (error) + pr_debug("incfs: %s %d\n", __func__, error); + return error; +} + +static int validate_name(char *file_name) +{ + struct mem_range name = range(file_name, strlen(file_name)); + int i = 0; + + if (name.len > INCFS_MAX_NAME_LEN) + return -ENAMETOOLONG; + + if (incfs_equal_ranges(pending_reads_file_name_range, name)) + return -EINVAL; + + for (i = 0; i < name.len; i++) + if (name.data[i] == '/') + return -EINVAL; + + return 0; +} + +static long ioctl_create_file(struct mount_info *mi, + struct incfs_new_file_args __user *usr_args) +{ + struct incfs_new_file_args args; + char *file_id_str = NULL; + struct dentry *index_file_dentry = NULL; + struct dentry *named_file_dentry = NULL; + struct path parent_dir_path = {}; + struct inode *index_dir_inode = NULL; + __le64 size_attr_value = 0; + char *file_name = NULL; + char *attr_value = NULL; + int error = 0; + bool locked = false; + + if (!mi || !mi->mi_index_dir) { + error = -EFAULT; + goto out; + } + if (!access_ok(VERIFY_READ, usr_args, sizeof(args))) { + error = -EFAULT; + goto out; + } + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { + error = -EFAULT; + goto out; + } + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + file_id_str = file_id_to_str(args.file_id); + if (!file_id_str) { + error = -ENOMEM; + goto out; + } + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + goto out; + locked = true; + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!named_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(named_file_dentry)) { + error = PTR_ERR(named_file_dentry); + named_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(named_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + /* Look up a dentry in the .index dir. It should be negative. */ + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (!index_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(index_file_dentry)) { + /* File with this ID already exists in index. */ + error = -EEXIST; + goto out; + } + + /* Creating a file in the .index dir. */ + index_dir_inode = d_inode(mi->mi_index_dir); + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); + error = vfs_create(index_dir_inode, index_file_dentry, + args.mode, true); + inode_unlock(index_dir_inode); + + if (error) + goto out; + if (!d_really_is_positive(index_file_dentry)) { + error = -EINVAL; + goto out; + } + + /* Save the file's ID as an xattr for easy fetching in future. */ + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, + file_id_str, strlen(file_id_str), XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's attrubute as an xattr */ + if (args.file_attr_len && args.file_attr) { + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto delete_index_file; + } + + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); + if (!attr_value) { + error = -ENOMEM; + goto delete_index_file; + } + + if (!access_ok(VERIFY_READ, u64_to_user_ptr(args.file_attr), + args.file_attr_len)) { + error = -EFAULT; + goto delete_index_file; + } + + if (copy_from_user(attr_value, + u64_to_user_ptr(args.file_attr), + args.file_attr_len) > 0) { + error = -EFAULT; + goto delete_index_file; + } + + error = vfs_setxattr(index_file_dentry, + INCFS_XATTR_METADATA_NAME, + attr_value, args.file_attr_len, + XATTR_CREATE); + + if (error) + goto delete_index_file; + } + + /* Initializing a newly created file. */ + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, + range(attr_value, args.file_attr_len), + (struct incfs_file_signature_info __user *) + args.signature_info); + if (error) + goto delete_index_file; + + /* Linking a file with it's real name from the requested dir. */ + error = incfs_link(index_file_dentry, named_file_dentry); + + if (!error) + goto out; + +delete_index_file: + incfs_unlink(index_file_dentry); + +out: + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + + kfree(file_id_str); + kfree(file_name); + kfree(attr_value); + dput(named_file_dentry); + dput(index_file_dentry); + path_put(&parent_dir_path); + if (locked) + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static long ioctl_read_file_signature(struct file *f, void __user *arg) +{ + struct incfs_get_file_sig_args __user *args_usr_ptr = arg; + struct incfs_get_file_sig_args args = {}; + u8 *sig_buffer = NULL; + size_t sig_buf_size = 0; + int error = 0; + int read_result = 0; + struct data_file *df = get_incfs_data_file(f); + + if (!df) + return -EINVAL; + + if (!access_ok(VERIFY_READ, args_usr_ptr, sizeof(args))) + return -EFAULT; + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(args.file_signature), + args.file_signature_buf_size)) + return -EFAULT; + + sig_buf_size = args.file_signature_buf_size; + if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) + return -E2BIG; + + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS); + if (!sig_buffer) + return -ENOMEM; + + read_result = incfs_read_file_signature(df, + range(sig_buffer, sig_buf_size)); + + if (read_result < 0) { + error = read_result; + goto out; + } + + if (copy_to_user(u64_to_user_ptr(args.file_signature), sig_buffer, + read_result)) { + error = -EFAULT; + goto out; + } + + args.file_signature_len_out = read_result; + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + error = -EFAULT; + +out: + kfree(sig_buffer); + + return error; +} + +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + + switch (req) { + case INCFS_IOC_CREATE_FILE: + return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_READ_FILE_SIGNATURE: + return ioctl_read_file_signature(f, (void __user *)arg); + default: + return -EINVAL; + } +} + +static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, + unsigned int flags) +{ + struct mount_info *mi = get_mount_info(dir_inode->i_sb); + struct dentry *dir_dentry = NULL; + struct dentry *backing_dentry = NULL; + struct path dir_backing_path = {}; + struct inode_info *dir_info = get_incfs_node(dir_inode); + struct mem_range name_range = + range((u8 *)dentry->d_name.name, dentry->d_name.len); + int err = 0; + + if (d_inode(mi->mi_backing_dir_path.dentry) == + dir_info->n_backing_inode) { + /* We do lookup in the FS root. Show pseudo files. */ + + if (incfs_equal_ranges(pending_reads_file_name_range, + name_range)) { + struct inode *inode = fetch_pending_reads_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + + if (incfs_equal_ranges(log_file_name_range, name_range)) { + struct inode *inode = fetch_log_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + } + + dir_dentry = dget_parent(dentry); + get_incfs_backing_path(dir_dentry, &dir_backing_path); + backing_dentry = incfs_lookup_dentry(dir_backing_path.dentry, + dentry->d_name.name); + + if (!backing_dentry || IS_ERR(backing_dentry)) { + err = IS_ERR(backing_dentry) + ? PTR_ERR(backing_dentry) + : -EFAULT; + goto out; + } else { + struct inode *inode = NULL; + struct path backing_path = { + .mnt = dir_backing_path.mnt, + .dentry = backing_dentry + }; + + err = incfs_init_dentry(dentry, &backing_path); + if (err) + goto out; + + if (!d_really_is_positive(backing_dentry)) { + /* + * No such entry found in the backing dir. + * Create a negative entry. + */ + d_add(dentry, NULL); + err = 0; + goto out; + } + + if (d_inode(backing_dentry)->i_sb != + dir_info->n_backing_inode->i_sb) { + /* + * Somehow after the path lookup we ended up in a + * different fs mount. If we keep going it's going + * to end badly. + */ + err = -EXDEV; + goto out; + } + + inode = fetch_regular_inode(dir_inode->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + } + +out: + dput(dir_dentry); + dput(backing_dentry); + path_put(&dir_backing_path); + if (err) + pr_debug("incfs: %s %s %d\n", __func__, + dentry->d_name.name, err); + return ERR_PTR(err); +} + +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct inode_info *dir_node = get_incfs_node(dir); + struct dentry *backing_dentry = NULL; + struct path backing_path = {}; + int err = 0; + + + if (!mi || !dir_node || !dir_node->n_backing_inode) + return -EBADF; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + backing_dentry = backing_path.dentry; + + if (!backing_dentry) { + err = -EBADF; + goto out; + } + + if (backing_dentry->d_parent == mi->mi_index_dir) { + /* Can't create a subdir inside .index */ + err = -EBUSY; + goto out; + } + + inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode); + inode_unlock(dir_node->n_backing_inode); + if (!err) { + struct inode *inode = NULL; + + if (d_really_is_negative(backing_dentry)) { + err = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(dentry, inode); + } + +out: + if (d_really_is_negative(dentry)) + d_drop(dentry); + path_put(&backing_path); + mutex_unlock(&mi->mi_dir_struct_mutex); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + return err; +} + +/* Delete file referenced by backing_dentry and also its hardlink from .index */ +static int final_file_delete(struct mount_info *mi, + struct dentry *backing_dentry) +{ + struct dentry *index_file_dentry = NULL; + /* 2 chars per byte of file ID + 1 char for \0 */ + char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; + ssize_t uuid_size = 0; + int error = 0; + + WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); + uuid_size = vfs_getxattr(backing_dentry, INCFS_XATTR_ID_NAME, + file_id_str, 2 * sizeof(incfs_uuid_t)); + if (uuid_size < 0) { + error = uuid_size; + goto out; + } + + if (uuid_size != 2 * sizeof(incfs_uuid_t)) { + error = -EBADMSG; + goto out; + } + + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + goto out; + } + + error = incfs_unlink(backing_dentry); + if (error) + goto out; + + if (d_really_is_positive(index_file_dentry)) + error = incfs_unlink(index_file_dentry); +out: + if (error) + pr_debug("incfs: delete_file_from_index err:%d\n", error); + return error; +} + +static int dir_unlink(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + struct kstat stat; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry->d_parent == mi->mi_index_dir) { + /* Direct unlink from .index are not allowed. */ + err = -EBUSY; + goto out; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + err = vfs_getattr(&backing_path, &stat); +#else + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, + AT_STATX_SYNC_AS_STAT); +#endif + if (err) + goto out; + + if (stat.nlink == 2) { + /* + * This is the last named link to this file. The only one left + * is in .index. Remove them both now. + */ + err = final_file_delete(mi, backing_path.dentry); + } else { + /* There are other links to this file. Remove just this one. */ + err = incfs_unlink(backing_path.dentry); + } + + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_old_path = {}; + struct path backing_new_path = {}; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + get_incfs_backing_path(old_dentry, &backing_old_path); + get_incfs_backing_path(new_dentry, &backing_new_path); + + if (backing_new_path.dentry->d_parent == mi->mi_index_dir) { + /* Can't link to .index */ + error = -EBUSY; + goto out; + } + + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); + if (!error) { + struct inode *inode = NULL; + struct dentry *bdentry = backing_new_path.dentry; + + if (d_really_is_negative(bdentry)) { + error = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, bdentry); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto out; + } + d_instantiate(new_dentry, inode); + } + +out: + path_put(&backing_old_path); + path_put(&backing_new_path); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static int dir_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry == mi->mi_index_dir) { + /* Can't delete .index */ + err = -EBUSY; + goto out; + } + + err = incfs_rmdir(backing_path.dentry); + if (!err) + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(old_dir->i_sb); + struct dentry *backing_old_dentry; + struct dentry *backing_new_dentry; + struct dentry *backing_old_dir_dentry; + struct dentry *backing_new_dir_dentry; + struct inode *target_inode; + struct dentry *trap; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; + dget(backing_old_dentry); + dget(backing_new_dentry); + + backing_old_dir_dentry = dget_parent(backing_old_dentry); + backing_new_dir_dentry = dget_parent(backing_new_dentry); + target_inode = d_inode(new_dentry); + + if (backing_old_dir_dentry == mi->mi_index_dir) { + /* Direct moves from .index are not allowed. */ + error = -EBUSY; + goto out; + } + + trap = lock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + + if (trap == backing_old_dentry) { + error = -EINVAL; + goto unlock_out; + } + if (trap == backing_new_dentry) { + error = -ENOTEMPTY; + goto unlock_out; + } + + error = vfs_rename(d_inode(backing_old_dir_dentry), backing_old_dentry, + d_inode(backing_new_dir_dentry), backing_new_dentry, + NULL, 0); + if (error) + goto unlock_out; + if (target_inode) + fsstack_copy_attr_all(target_inode, + get_incfs_node(target_inode)->n_backing_inode); + fsstack_copy_attr_all(new_dir, d_inode(backing_new_dir_dentry)); + if (new_dir != old_dir) + fsstack_copy_attr_all(old_dir, d_inode(backing_old_dir_dentry)); + +unlock_out: + unlock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + +out: + dput(backing_new_dir_dentry); + dput(backing_old_dir_dentry); + dput(backing_new_dentry); + dput(backing_old_dentry); + + mutex_unlock(&mi->mi_dir_struct_mutex); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + return error; +} + + +static int file_open(struct inode *inode, struct file *file) +{ + struct mount_info *mi = get_mount_info(inode->i_sb); + struct file *backing_file = NULL; + struct path backing_path = {}; + int err = 0; + + get_incfs_backing_path(file->f_path.dentry, &backing_path); + backing_file = dentry_open(&backing_path, O_RDWR | O_NOATIME, + mi->mi_owner); + path_put(&backing_path); + + if (IS_ERR(backing_file)) { + err = PTR_ERR(backing_file); + backing_file = NULL; + goto out; + } + + if (S_ISREG(inode->i_mode)) + err = make_inode_ready_for_data_ops(mi, inode, backing_file); + else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = NULL; + + dir = incfs_open_dir_file(mi, backing_file); + if (IS_ERR(dir)) + err = PTR_ERR(dir); + else + file->private_data = dir; + } else + err = -EBADF; + +out: + if (err) + pr_debug("incfs: %s name:%s err: %d\n", __func__, + file->f_path.dentry->d_name.name, err); + if (backing_file) + fput(backing_file); + return err; +} + +static int file_release(struct inode *inode, struct file *file) +{ + if (S_ISREG(inode->i_mode)) { + /* Do nothing. + * data_file is released only by inode eviction. + */ + } else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = get_incfs_dir_file(file); + + incfs_free_dir_file(dir); + } + + return 0; +} + +static ssize_t file_write(struct file *f, const char __user *buf, + size_t size, loff_t *offset) +{ + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_count = size / sizeof(struct incfs_new_data_block); + struct incfs_new_data_block __user *usr_blocks = + (struct incfs_new_data_block __user *)buf; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if (!access_ok(VERIFY_READ, usr_blocks, size)) + return -EFAULT; + + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < block_count; i++) { + struct incfs_new_data_block block = {}; + + if (copy_from_user(&block, &usr_blocks[i], sizeof(block)) > 0) { + error = -EFAULT; + break; + } + + if (block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + if (!access_ok(VERIFY_READ, u64_to_user_ptr(block.data), + block.data_len)) { + error = -EFAULT; + break; + } + if (copy_from_user(data_buf, u64_to_user_ptr(block.data), + block.data_len) > 0) { + error = -EFAULT; + break; + } + block.data = 0; /* To make sure nobody uses it. */ + if (block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + *offset = 0; + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i * sizeof(struct incfs_new_data_block); +} + + +static int dentry_revalidate(struct dentry *d, unsigned int flags) +{ + struct path backing_path = {}; + struct inode_info *info = get_incfs_node(d_inode(d)); + struct inode *binode = (info == NULL) ? NULL : info->n_backing_inode; + struct dentry *backing_dentry = NULL; + int result = 0; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + get_incfs_backing_path(d, &backing_path); + backing_dentry = backing_path.dentry; + if (!backing_dentry) + goto out; + + if (d_inode(backing_dentry) != binode) { + /* + * Backing inodes obtained via dentry and inode don't match. + * It indicates that most likely backing dir has changed + * directly bypassing Incremental FS interface. + */ + goto out; + } + + if (backing_dentry->d_flags & DCACHE_OP_REVALIDATE) { + result = backing_dentry->d_op->d_revalidate(backing_dentry, + flags); + } else + result = 1; + +out: + path_put(&backing_path); + return result; +} + +static void dentry_release(struct dentry *d) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (di) + path_put(&di->backing_path); + d->d_fsdata = NULL; +} + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode_info *node = kzalloc(sizeof(*node), GFP_NOFS); + + /* TODO: add a slab-based cache here. */ + if (!node) + return NULL; + inode_init_once(&node->n_vfs_inode); + return &node->n_vfs_inode; +} + +static void free_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + kfree(node); +} + +static void evict_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + if (node) { + if (node->n_backing_inode) { + iput(node->n_backing_inode); + node->n_backing_inode = NULL; + } + if (node->n_file) { + incfs_free_data_file(node->n_file); + node->n_file = NULL; + } + } + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); +} + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di || !di->backing_path.dentry) + return -ENODATA; + + return vfs_getxattr(di->backing_path.dentry, name, value, size); +} + +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di || !di->backing_path.dentry) + return -ENODATA; + + return vfs_listxattr(di->backing_path.dentry, list, size); +} + +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data) +{ + struct mount_options options = {}; + struct mount_info *mi = NULL; + struct path backing_dir_path = {}; + struct dentry *index_dir; + struct super_block *src_fs_sb = NULL; + struct inode *root_inode = NULL; + struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); + int error = 0; + + if (IS_ERR(sb)) + return ERR_CAST(sb); + + sb->s_op = &incfs_super_ops; + sb->s_d_op = &incfs_dentry_ops; + sb->s_flags |= S_NOATIME; + sb->s_magic = INCFS_MAGIC_NUMBER; + sb->s_time_gran = 1; + sb->s_blocksize = INCFS_DATA_FILE_BLOCK_SIZE; + sb->s_blocksize_bits = blksize_bits(sb->s_blocksize); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) + sb->s_xattr = incfs_xattr_ops; +#endif + + BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); + + error = parse_options(&options, (char *)data); + if (error != 0) { + pr_err("incfs: Options parsing error. %d\n", error); + goto err; + } + + sb->s_bdi->ra_pages = options.readahead_pages; + if (!dev_name) { + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); + error = -ENOENT; + goto err; + } + + error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &backing_dir_path); + if (error || backing_dir_path.dentry == NULL || + !d_really_is_positive(backing_dir_path.dentry)) { + pr_err("incfs: Error accessing: %s.\n", + dev_name); + goto err; + } + src_fs_sb = backing_dir_path.dentry->d_sb; + sb->s_maxbytes = src_fs_sb->s_maxbytes; + + mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); + + if (IS_ERR_OR_NULL(mi)) { + error = PTR_ERR(mi); + pr_err("incfs: Error allocating mount info. %d\n", error); + mi = NULL; + goto err; + } + + index_dir = open_or_create_index_dir(backing_dir_path.dentry); + if (IS_ERR_OR_NULL(index_dir)) { + error = PTR_ERR(index_dir); + pr_err("incfs: Can't find or create .index dir in %s\n", + dev_name); + goto err; + } + mi->mi_index_dir = index_dir; + + sb->s_fs_info = mi; + root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); + if (IS_ERR(root_inode)) { + error = PTR_ERR(root_inode); + goto err; + } + + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) { + error = -ENOMEM; + goto err; + } + error = incfs_init_dentry(sb->s_root, &backing_dir_path); + if (error) + goto err; + + path_put(&backing_dir_path); + sb->s_flags |= SB_ACTIVE; + + pr_debug("infs: mount\n"); + return dget(sb->s_root); +err: + sb->s_fs_info = NULL; + path_put(&backing_dir_path); + incfs_free_mount_info(mi); + deactivate_locked_super(sb); + return ERR_PTR(error); +} + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct mount_options options; + struct mount_info *mi = get_mount_info(sb); + int err = 0; + + sync_filesystem(sb); + err = parse_options(&options, (char *)data); + if (err) + return err; + + if (mi->mi_options.read_timeout_ms != options.read_timeout_ms) { + mi->mi_options.read_timeout_ms = options.read_timeout_ms; + pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); + } + + pr_debug("infs: remount\n"); + return 0; +} + +void incfs_kill_sb(struct super_block *sb) +{ + struct mount_info *mi = sb->s_fs_info; + + pr_debug("infs: unmount\n"); + incfs_free_mount_info(mi); + generic_shutdown_super(sb); +} + +static int show_options(struct seq_file *m, struct dentry *root) +{ + struct mount_info *mi = get_mount_info(root->d_sb); + + seq_printf(m, ",read_timeout_ms=%u", mi->mi_options.read_timeout_ms); + seq_printf(m, ",readahead=%u", mi->mi_options.readahead_pages); + if (mi->mi_options.read_log_pages != 0) { + seq_printf(m, ",rlog_pages=%u", mi->mi_options.read_log_pages); + seq_printf(m, ",rlog_wakeup_cnt=%u", + mi->mi_options.read_log_wakeup_count); + } + if (mi->mi_options.no_backing_file_cache) + seq_puts(m, ",no_bf_cache"); + if (mi->mi_options.no_backing_file_readahead) + seq_puts(m, ",no_bf_readahead"); + return 0; +} diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h new file mode 100644 index 000000000000..eaa490e19072 --- /dev/null +++ b/fs/incfs/vfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +#ifndef _INCFS_VFS_H +#define _INCFS_VFS_H + +void incfs_kill_sb(struct super_block *sb); +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data); + +#endif diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h new file mode 100644 index 000000000000..8a06e2e48fc4 --- /dev/null +++ b/include/uapi/linux/incrementalfs.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace interface for Incremental FS. + * + * Incremental FS is special-purpose Linux virtual file system that allows + * execution of a program while its binary and resource files are still being + * lazily downloaded over the network, USB etc. + * + * Copyright 2019 Google LLC + */ +#ifndef _UAPI_LINUX_INCREMENTALFS_H +#define _UAPI_LINUX_INCREMENTALFS_H + +#include +#include +#include +#include + +/* ===== constants ===== */ +#define INCFS_NAME "incremental-fs" +#define INCFS_MAGIC_NUMBER (0x5346434e49ul) +#define INCFS_DATA_FILE_BLOCK_SIZE 4096 +#define INCFS_HEADER_VER 1 + +// TODO: This value is assumed in incfs_copy_signature_info_from_user to be the +// actual signature length. Set back to 64 when fixed. +#define INCFS_MAX_HASH_SIZE 32 +#define INCFS_MAX_FILE_ATTR_SIZE 512 + +#define INCFS_PENDING_READS_FILENAME ".pending_reads" +#define INCFS_LOG_FILENAME ".log" +#define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") +#define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") +#define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") + +#define INCFS_MAX_SIGNATURE_SIZE 8096 + +#define INCFS_IOCTL_BASE_CODE 'g' + +/* ===== ioctl requests on the command dir ===== */ + +/* Create a new file */ +#define INCFS_IOC_CREATE_FILE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) + +/* Read file signature */ +#define INCFS_IOC_READ_FILE_SIGNATURE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + +enum incfs_compression_alg { + COMPRESSION_NONE = 0, + COMPRESSION_LZ4 = 1 +}; + +enum incfs_block_flags { + INCFS_BLOCK_FLAGS_NONE = 0, + INCFS_BLOCK_FLAGS_HASH = 1, +}; + +typedef struct { + __u8 bytes[16]; +} incfs_uuid_t __attribute__((aligned (8))); + +/* + * Description of a pending read. A pending read - a read call by + * a userspace program for which the filesystem currently doesn't have data. + */ +struct incfs_pending_read_info { + /* Id of a file that is being read from. */ + incfs_uuid_t file_id; + + /* A number of microseconds since system boot to the read. */ + __aligned_u64 timestamp_us; + + /* Index of a file block that is being read. */ + __u32 block_index; + + /* A serial number of this pending read. */ + __u32 serial_number; +}; + +/* + * A struct to be written into a control file to load a data or hash + * block to a data file. + */ +struct incfs_new_data_block { + /* Index of a data block. */ + __u32 block_index; + + /* Length of data */ + __u32 data_len; + + /* + * A pointer to an actual data for the block. + * + * Equivalent to: __u8 *data; + */ + __aligned_u64 data; + + /* + * Compression algorithm used to compress the data block. + * Values from enum incfs_compression_alg. + */ + __u8 compression; + + /* Values from enum incfs_block_flags */ + __u8 flags; + + __u16 reserved1; + + __u32 reserved2; + + __aligned_u64 reserved3; +}; + +enum incfs_hash_tree_algorithm { + INCFS_HASH_TREE_NONE = 0, + INCFS_HASH_TREE_SHA256 = 1 +}; + +struct incfs_file_signature_info { + /* + * A pointer to file's root hash (if determined != 0) + * Actual hash size determined by hash_tree_alg. + * Size of the buffer should be at least INCFS_MAX_HASH_SIZE + * + * Equivalent to: u8 *root_hash; + */ + __aligned_u64 root_hash; + + /* + * A pointer to additional data that was attached to the root hash + * before signing. + * + * Equivalent to: u8 *additional_data; + */ + __aligned_u64 additional_data; + + /* Size of additional data. */ + __u32 additional_data_size; + + __u32 reserved1; + + /* + * A pointer to pkcs7 signature DER blob. + * + * Equivalent to: u8 *signature; + */ + __aligned_u64 signature; + + + /* Size of pkcs7 signature DER blob */ + __u32 signature_size; + + __u32 reserved2; + + /* Value from incfs_hash_tree_algorithm */ + __u8 hash_tree_alg; +}; + +/* + * Create a new file or directory. + */ +struct incfs_new_file_args { + /* Id of a file to create. */ + incfs_uuid_t file_id; + + /* + * Total size of the new file. Ignored if S_ISDIR(mode). + */ + __aligned_u64 size; + + /* + * File mode. Permissions and dir flag. + */ + __u16 mode; + + __u16 reserved1; + + __u32 reserved2; + + /* + * A pointer to a null-terminated relative path to the file's parent + * dir. + * Max length: PATH_MAX + * + * Equivalent to: char *directory_path; + */ + __aligned_u64 directory_path; + + /* + * A pointer to a null-terminated file's name. + * Max length: PATH_MAX + * + * Equivalent to: char *file_name; + */ + __aligned_u64 file_name; + + /* + * A pointer to a file attribute to be set on creation. + * + * Equivalent to: u8 *file_attr; + */ + __aligned_u64 file_attr; + + /* + * Length of the data buffer specfied by file_attr. + * Max value: INCFS_MAX_FILE_ATTR_SIZE + */ + __u32 file_attr_len; + + __u32 reserved4; + + /* struct incfs_file_signature_info *signature_info; */ + __aligned_u64 signature_info; + + __aligned_u64 reserved5; + + __aligned_u64 reserved6; +}; + +/* + * Request a digital signature blob for a given file. + * Argument for INCFS_IOC_READ_FILE_SIGNATURE ioctl + */ +struct incfs_get_file_sig_args { + /* + * A pointer to the data buffer to save an signature blob to. + * + * Equivalent to: u8 *file_signature; + */ + __aligned_u64 file_signature; + + /* Size of the buffer at file_signature. */ + __u32 file_signature_buf_size; + + /* + * Number of bytes save file_signature buffer. + * It is set after ioctl done. + */ + __u32 file_signature_len_out; +}; + +#endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile new file mode 100644 index 000000000000..7cff78cf5131 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4 +CFLAGS += -I../../../../../usr/include/ +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../lib + +EXTRA_SOURCES := utils.c +TEST_GEN_PROGS := incfs_test + +include ../../lib.mk + +$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES) +all: $(OUTPUT)incfs_test + +clean: + rm -rf $(OUTPUT)incfs_test *.o diff --git a/tools/testing/selftests/filesystems/incfs/config b/tools/testing/selftests/filesystems/incfs/config new file mode 100644 index 000000000000..b6749837a318 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/config @@ -0,0 +1 @@ +CONFIG_INCREMENTAL_FS=y \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c new file mode 100644 index 000000000000..f1e9f86605e7 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -0,0 +1,2420 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#include "lz4.h" +#include "utils.h" + +#define __packed __attribute__((__packed__)) + +#define TEST_FAILURE 1 +#define TEST_SUCCESS 0 +#define INCFS_MAX_MTREE_LEVELS 8 + +#define INCFS_ROOT_INODE 0 + +struct hash_block { + char data[INCFS_DATA_FILE_BLOCK_SIZE]; +}; + +struct test_signature { + void *data; + size_t size; + + char add_data[100]; + size_t add_data_size; +}; + +struct test_file { + int index; + incfs_uuid_t id; + char *name; + off_t size; + char root_hash[INCFS_MAX_HASH_SIZE]; + struct hash_block *mtree; + int mtree_block_count; + struct test_signature sig; +}; + +struct test_files_set { + struct test_file *files; + int files_count; +}; + +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[0]; +} __packed; + +/* + * The certificate below and the private key were created by calling: + * openssl req -x509 -newkey rsa:4096 -keyout private.key -out cert.crt + * -days 1000 -sha256 -nodes -outform PEM -subj + * "/C=US/ST=WA/L=Kirkland/O=Example/OU=Org/CN=www.example.com" + */ +char x509_cert[] = +"-----BEGIN CERTIFICATE-----\n" +"MIIFvzCCA6egAwIBAgIUXpwqelEljm6BBllRQGHLrls2MYgwDQYJKoZIhvcNAQEL\n" +"BQAwbzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCldhc2hpbmd0b24xETAPBgNVBAcM\n" +"CEtpcmtsYW5kMRAwDgYDVQQKDAdFeGFtcGxlMQwwCgYDVQQLDANPcmcxGDAWBgNV\n" +"BAMMD3d3dy5leGFtcGxlLmNvbTAeFw0xOTA4MDgyMzA3MDZaFw0yMjA1MDQyMzA3\n" +"MDZaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApXYXNoaW5ndG9uMREwDwYDVQQH\n" +"DAhLaXJrbGFuZDEQMA4GA1UECgwHRXhhbXBsZTEMMAoGA1UECwwDT3JnMRgwFgYD\n" +"VQQDDA93d3cuZXhhbXBsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK\n" +"AoICAQC1LuFW/lDV/GflqFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43\n" +"NeeJtqUoVxSLS9wHURjSjD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtA\n" +"uYcY4P9GHQEXYUX+ue82A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt\n" +"4/NXS/Dn+S0/mJlxw34IKfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RAD\n" +"qGewNNCab3ClJDP7/M32BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolV\n" +"gSL1HM2jin5bi4bpFMreY0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBT\n" +"qjjFb3oiSMugJzY+MhISM754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3\n" +"UgC6SyVmZxG2o+AO6m8TRTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiV\n" +"XDmotNb2myXNYHHTjRYNxkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61S\n" +"oxKWi+LGa7B4NaCMjz1LnaOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAb\n" +"uxkq9EYUDg+w9broltiBf4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABo1MwUTAd\n" +"BgNVHQ4EFgQUo6JN3gY2yGbzOTNj8Al7hNB3rw0wHwYDVR0jBBgwFoAUo6JN3gY2\n" +"yGbzOTNj8Al7hNB3rw0wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n" +"AgEAQb3pJqOzM4whfNVdpEOswd1EApcWNM1ps9iTlEEjDoRv9F7F1PW0uXCIpk3B\n" +"j5JgCmIxAcPnzj42rduRSx421hHMZhbAIWI/JL4ZSF64qlG0YrmJDXlJgSMoyst5\n" +"biUqeWgO7Js5udPt3zhkeA62z3hGM6dE5B3k7gHTaKKtK17+UeR9imZKsOK8GBnM\n" +"rxMPI6XghxxAK2OQ/r09DHDiyf/GxgOE46oknfXfMPx3HaSvDKrZUTZ+UvVbM5c2\n" +"5eXOgH5UO/e4llLknJK7CoP/R6G7pV44iT4t4t9FMnvCYvavAHwfR+6z5vTF3o8a\n" +"wd80fC8z1vfLsIPLROdzBl9rGCvv536fPiEA677CM1AZkjfT0a9DVzrE1NDvuCUF\n" +"0KgEdiNwux+hO6dbTyiS38yPT6TbpoWJptJmFhFkC4hGvUgoX/TI0covSyf74VRH\n" +"k3BHojOBMYiX1K66xoN7fhlGK8cith3L0XXPB8CgSEUPWURvm8RCaGuX2T3FZomF\n" +"BCnNpN+WNnN3Yf4OkjtuvtxxktUU7pfVLsUxrdpo/ph4rWm6U83VT/Zlq92aF4vW\n" +"QJ+7uraQFip7e+Gy9g3UJINm3B7b1C4ch/Z/upCZESOI/23sVGzkfTgOrS+23i6/\n" +"Vi9YW75zySC2FCa1AWMS1NmS5qfDSycJUgD6YvOUg0C54ZI=\n" +"-----END CERTIFICATE-----"; + +char private_key[] = +"-----BEGIN PRIVATE KEY-----\n" +"MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1LuFW/lDV/Gfl\n" +"qFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43NeeJtqUoVxSLS9wHURjS\n" +"jD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtAuYcY4P9GHQEXYUX+ue82\n" +"A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt4/NXS/Dn+S0/mJlxw34I\n" +"KfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RADqGewNNCab3ClJDP7/M32\n" +"BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolVgSL1HM2jin5bi4bpFMre\n" +"Y0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBTqjjFb3oiSMugJzY+MhIS\n" +"M754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3UgC6SyVmZxG2o+AO6m8T\n" +"RTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiVXDmotNb2myXNYHHTjRYN\n" +"xkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61SoxKWi+LGa7B4NaCMjz1L\n" +"naOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAbuxkq9EYUDg+w9broltiB\n" +"f4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABAoICAQCMKul/0J2e/ncub6t2t4dr\n" +"PnTrfCT6xKqPqciny4Ee6hr9So1jR2gvink380bd/mQFMmEdZqGhM3cdpAzLf82f\n" +"hu7BSNxsYIF0er0PB4MZFMJ4sMaXC+zp5/TJnP5MG/zBND0c5k8tQpEyWy8O28Jj\n" +"FKW/0F5P90Q0ncP20EJUS50tXgniOMsU2Prtw/UE6yZDgD0mPxsurMu66ycXSFwM\n" +"WqyfqEeBk7lw/AjR6Sft71W31lTbl+DclG0MN2OIKUPcxiwCRmDFKI36MDgERk1x\n" +"sMPfdrWRLj2ryDFTUuLAWBTOVEGWS0RdRsWWVaJCuHbKd6FLl0TW2xQbOfWDTjYC\n" +"Ps31ejh163qdbk7OGOZIbd83fP3jsyL+4eNzhUpeXMKhfG58mFIv4yhdZIUOpuL6\n" +"aqnoU9z9wEsJKj/SrKr3nw6tuTnmbXgNjun9LfTFmqqDRBYd0Okiprw6jHNM1jgA\n" +"GG0kC/K7r89jKymVDABwGMFCS33ynR1Tb6zG+cqgNMPw19Fy3uQuW21CjqSzCOyP\n" +"aEVCEUZeP+ofql5+7ZKi6Dj+EdTfeKt2ihgheHZZoaYSINb8tsnKbdJhwBfW9PFT\n" +"aT/hu3bnO2FPC8H2NGOqxOEeel9ALU4SFu1pOknEhiL3/mNfOQ+KgrSRDtNRlcL0\n" +"cto05J90u0cmqwWKlshfaQKCAQEA5dcklxs4ezyzt28NcsiyS02oZ+9TkQp6pCXV\n" +"kx7AwhivAmVTlJ+c6BegA5EPd7A1gknM3+EKzGpoBOqmlF45G57phVIAphAp4oCH\n" +"UOVtIQgM8p4EU2gtX+uNOopdYlpBQnWimXaHA2sOD9/yTbZ03j/McRH6D15+iCld\n" +"3880GHdZaYYbQmHoSDg39LRRO1bdS3WC0oKBD2gPi3K0b9RaZSwKzuVrmlvrLURj\n" +"WMZfmkGl4BsITfuoTxbWFVncG3Kb9eYkYUFZy4M2G/s849PS/HjrN7BvgpanjtVp\n" +"1/39APQfAYfUuBPbKYnb6F8dE0pb5cVd4uMZklAeTb3bXjOO9QKCAQEAyc4CxWXr\n" +"bG6Do5dGpWudQ7ucq00MR0T3MHQIu5XTn6BsPHAJ9ZgrQw9C24PXm2VEjjsrMs5T\n" +"rHNF9oeO39s25Za1iyJ+893icqA3h3ivCUOOoVE54BkuJK6REhkXPD5G1ubmxeBz\n" +"MKNehlpd/eSbJJArkzKFZ8sBtLt8i9VFhRnXSpDAbiMpCbjW+bem9MWdLmkenSnu\n" +"OUbnqYcJhFBCvOT7ZCHFCDNUNPfHcaReSY2EYjw0ZqtqAZD0Q+DL+RkLz7l1+/bF\n" +"eEwNjmjFTcwRyawqf38D4miU0H6ca16FkeSlbmM5p3HdwZK2HVYYz3FSwhox6Ebd\n" +"n6in42qfL4Ug6wKCAQAh9IDRWhIkErmyNdPUy1WbzmM8x5ye5t9rdLNywq5TfnYM\n" +"co/AezwhBax8GmgglIWzM9fykzqXLHklkMz/SlRBgl6ZdZ3m6qhlb/uNtfdDU/8l\n" +"sLaO4+sgKpp4tYxKRW8ytFJLPbmAhcZUDg+r73KgiuhXJAK/VoR29TWLJP9bRfaN\n" +"omRQkEpSsQuDOUhu7cxPo5KqKuGKNyNkxJNnmgWowLLwEfCtozrBO0M6EER7c4tf\n" +"6l51tuIMnSEPknD0FSB5WYCyZYcwi7fotlsuhVK8PdjyJzyyHDOw5FJ4uGsyQt55\n" +"yWlhsH1GS7mTQMn42Zlt/pR6OnbCqNdxQMUxy4gpAoIBAFvMbs5E0pb8nr0n72cI\n" +"UP2itl3mKpOw95D+94n9WcrfOt0zShSCKAvVQWCB1O5HXqwklj4CRWXI+iZu+7sx\n" +"CQPfTq3//ygH4x6paxkg+N6J8LPJMz6Rtb/R+QP2je9FlQvk9U1GEKArcLBFI0R/\n" +"XWOAgZHwBWd1nU0NjFY/qeQmIR02Q5LWQ7C8eG4X8MafriSShO6RSGCdtHwVhWq+\n" +"59ztfL3L7skQMFn37K3xS0LCMVpOcLfTeeFEgxjthVvG3OydPOJlGubiEbiaSEZf\n" +"cif/PUXKDYZMdIVzUsw0ryXykJ5qXKuizHFlv5oQtDCJKFBLgjBbLC2YluaIdekz\n" +"8gkCggEBAJWxS7EuB/qL7fOz0o3HRy0plR3qbwZ0pLoCz0Ii7WxraBS1yQwmxif1\n" +"Rgv89GyFqg1yQl3CSrMiw7oC9WxxxuiEZDO18c4KO3NTv9K4itN9OPQVBTHmEhod\n" +"KWcyP4/W/Sfuae77PyclSqUsAARRrKYn2fpLTS5ibaU0QZgHmdPgYDUrPr+6PHKK\n" +"ZfQKU2uBfuo6zoMbMmFi3UYG49j9rv4d6v+44vS1MPHV9JK/LD8YfBhgx8Pg/u6D\n" +"nUgipS48pkGjJr2u2Vu7Mx70vqz0Yf2neyyDbdLtkYauC4w7YKPTD0yzDJyGuAeB\n" +"GyPbW1yZa5vE302a1Cr0Cd7RC4AFAAw=\n" +"-----END PRIVATE KEY-----"; + +struct test_files_set get_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 5, + .name = "file_five", + .size = 500 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 6, + .name = "file_six", + .size = 600 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 7, + .name = "file_seven", + .size = 700 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 8, + .name = "file_eight", + .size = 800 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 9, + .name = "file_nine", + .size = 900 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 10, .name = "file_big", .size = 500 * 1024 * 1024 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +struct test_files_set get_small_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +static int get_file_block_seed(int file, int block) +{ + return 7919 * file + block; +} + +static loff_t min(loff_t a, loff_t b) +{ + return a < b ? a : b; +} + +static pid_t flush_and_fork(void) +{ + fflush(stdout); + return fork(); +} + +static void print_error(char *msg) +{ + ksft_print_msg("%s: %s\n", msg, strerror(errno)); +} + +static int wait_for_process(pid_t pid) +{ + int status; + int wait_res; + + wait_res = waitpid(pid, &status, 0); + if (wait_res <= 0) { + print_error("Can't wait for the child"); + return -EINVAL; + } + if (!WIFEXITED(status)) { + ksft_print_msg("Unexpected child status pid=%d\n", pid); + return -EINVAL; + } + status = WEXITSTATUS(status); + if (status != 0) + return status; + return 0; +} + +static void rnd_buf(uint8_t *data, size_t len, unsigned int seed) +{ + int i; + + for (i = 0; i < len; i++) { + seed = 1103515245 * seed + 12345; + data[i] = (uint8_t)(seed >> (i % 13)); + } +} + +char *bin2hex(char *dst, const void *src, size_t count) +{ + const unsigned char *_src = src; + static const char hex_asc[] = "0123456789abcdef"; + + while (count--) { + unsigned char x = *_src++; + + *dst++ = hex_asc[(x & 0xf0) >> 4]; + *dst++ = hex_asc[(x & 0x0f)]; + } + *dst = 0; + return dst; +} + +static char *get_index_filename(char *mnt_dir, incfs_uuid_t id) +{ + char path[FILENAME_MAX]; + char str_id[1 + 2 * sizeof(id)]; + + bin2hex(str_id, id.bytes, sizeof(id.bytes)); + snprintf(path, ARRAY_SIZE(path), "%s/.index/%s", mnt_dir, str_id); + + return strdup(path); +} + +int open_file_by_id(char *mnt_dir, incfs_uuid_t id) +{ + char *path = get_index_filename(mnt_dir, id); + int fd = open(path, O_RDWR); + + free(path); + if (fd < 0) { + print_error("Can't open file by id."); + return -errno; + } + + return fd; +} + +int get_file_attr(char *mnt_dir, incfs_uuid_t id, char *value, int size) +{ + char *path = get_index_filename(mnt_dir, id); + int res; + + res = getxattr(path, INCFS_XATTR_METADATA_NAME, value, size); + if (res < 0) + res = -errno; + + free(path); + return res; +} + +static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) +{ + return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); +} + +static int emit_test_blocks(char *mnt_dir, struct test_file *file, + int blocks[], int count) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t comp_data[2 * INCFS_DATA_FILE_BLOCK_SIZE]; + int block_count = (count > 32) ? 32 : count; + int data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE * block_count; + uint8_t *data_buf = malloc(data_buf_size); + uint8_t *current_data = data_buf; + uint8_t *data_end = data_buf + data_buf_size; + struct incfs_new_data_block *block_buf = + calloc(block_count, sizeof(*block_buf)); + ssize_t write_res = 0; + int fd; + int error = 0; + int i = 0; + int blocks_written = 0; + + fd = open_file_by_id(mnt_dir, file->id); + if (fd <= 0) { + error = -errno; + goto out; + } + + for (i = 0; i < block_count; i++) { + int block_index = blocks[i]; + bool compress = (file->index + block_index) % 2 == 0; + int seed = get_file_block_seed(file->index, block_index); + off_t block_offset = + ((off_t)block_index) * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = 0; + + if (block_offset > file->size) { + error = -EINVAL; + break; + } + if (file->size - block_offset > + INCFS_DATA_FILE_BLOCK_SIZE) + block_size = INCFS_DATA_FILE_BLOCK_SIZE; + else + block_size = file->size - block_offset; + + rnd_buf(data, block_size, seed); + if (compress) { + size_t comp_size = LZ4_compress_default( + (char *)data, (char *)comp_data, block_size, + ARRAY_SIZE(comp_data)); + + if (comp_size <= 0) { + error = -EBADMSG; + break; + } + if (current_data + comp_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, comp_data, comp_size); + block_size = comp_size; + block_buf[i].compression = COMPRESSION_LZ4; + } else { + if (current_data + block_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, data, block_size); + block_buf[i].compression = COMPRESSION_NONE; + } + + block_buf[i].block_index = block_index; + block_buf[i].data_len = block_size; + block_buf[i].data = ptr_to_u64(current_data); + block_buf[i].compression = + compress ? COMPRESSION_LZ4 : COMPRESSION_NONE; + current_data += block_size; + } + + if (!error) { + write_res = write(fd, block_buf, sizeof(*block_buf) * i); + if (write_res < 0) + error = -errno; + else + blocks_written = write_res / sizeof(*block_buf); + } + if (error) { + ksft_print_msg( + "Writing data block error. Write returned: %d. Error:%s\n", + write_res, strerror(-error)); + } + +out: + free(block_buf); + free(data_buf); + close(fd); + return (error < 0) ? error : blocks_written; +} + +static int emit_test_block(char *mnt_dir, struct test_file *file, + int block_index) +{ + int res = emit_test_blocks(mnt_dir, file, &block_index, 1); + + if (res == 0) + return -EINVAL; + if (res == 1) + return 0; + return res; +} + +static void shuffle(int array[], int count, unsigned int seed) +{ + int i; + + for (i = 0; i < count - 1; i++) { + int items_left = count - i; + int shuffle_index; + int v; + + seed = 1103515245 * seed + 12345; + shuffle_index = i + seed % items_left; + + v = array[shuffle_index]; + array[shuffle_index] = array[i]; + array[i] = v; + } +} + +static int emit_test_file_data(char *mount_dir, struct test_file *file) +{ + int i; + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int result = 0; + int blocks_written = 0; + + if (file->size == 0) + return 0; + + block_indexes = calloc(block_cnt, sizeof(*block_indexes)); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + shuffle(block_indexes, block_cnt, file->index); + + for (i = 0; i < block_cnt; i += blocks_written) { + blocks_written = emit_test_blocks(mount_dir, file, + block_indexes + i, block_cnt - i); + if (blocks_written < 0) { + result = blocks_written; + goto out; + } + if (blocks_written == 0) { + result = -EIO; + goto out; + } + } +out: + free(block_indexes); + return result; +} + +static loff_t read_whole_file(char *filename) +{ + int fd = -1; + loff_t result; + loff_t bytes_read = 0; + uint8_t buff[16 * 1024]; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + while (1) { + int read_result = read(fd, buff, ARRAY_SIZE(buff)); + + if (read_result < 0) { + print_error("Error during reading from a file."); + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static int read_test_file(uint8_t *buf, size_t len, char *filename, + int block_idx) +{ + int fd = -1; + int result; + int bytes_read = 0; + size_t bytes_to_read = len; + off_t offset = ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + if (lseek(fd, offset, SEEK_SET) != offset) { + print_error("Seek error"); + return -errno; + } + + while (bytes_read < bytes_to_read) { + int read_result = + read(fd, buf + bytes_read, bytes_to_read - bytes_read); + if (read_result < 0) { + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static char *create_backing_dir(char *mount_dir) +{ + struct stat st; + char backing_dir_name[255]; + + snprintf(backing_dir_name, ARRAY_SIZE(backing_dir_name), "%s-src", + mount_dir); + + if (stat(backing_dir_name, &st) == 0) { + if (S_ISDIR(st.st_mode)) { + int error = delete_dir_tree(backing_dir_name); + + if (error) { + ksft_print_msg( + "Can't delete existing backing dir. %d\n", + error); + return NULL; + } + } else { + if (unlink(backing_dir_name)) { + print_error("Can't clear backing dir"); + return NULL; + } + } + } + + if (mkdir(backing_dir_name, 0777)) { + if (errno != EEXIST) { + print_error("Can't open/create backing dir"); + return NULL; + } + } + + return strdup(backing_dir_name); +} + +static int validate_test_file_content_with_seed(char *mount_dir, + struct test_file *file, + unsigned int shuffle_seed) +{ + int error = -1; + char *filename = concat_file_name(mount_dir, file->name); + off_t size = file->size; + loff_t actual_size = get_file_size(filename); + int block_cnt = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int i; + + block_indexes = alloca(sizeof(int) * block_cnt); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + + if (shuffle_seed != 0) + shuffle(block_indexes, block_cnt, shuffle_seed); + + if (actual_size != size) { + ksft_print_msg( + "File size doesn't match. name: %s expected size:%ld actual size:%ld\n", + filename, size, actual_size); + error = -1; + goto failure; + } + + for (i = 0; i < block_cnt; i++) { + int block_idx = block_indexes[i]; + uint8_t expected_block[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t actual_block[INCFS_DATA_FILE_BLOCK_SIZE]; + int seed = get_file_block_seed(file->index, block_idx); + size_t bytes_to_compare = min( + (off_t)INCFS_DATA_FILE_BLOCK_SIZE, + size - ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE); + int read_result = + read_test_file(actual_block, INCFS_DATA_FILE_BLOCK_SIZE, + filename, block_idx); + if (read_result < 0) { + ksft_print_msg( + "Error reading block %d from file %s. Error: %s\n", + block_idx, filename, strerror(-read_result)); + error = read_result; + goto failure; + } + rnd_buf(expected_block, INCFS_DATA_FILE_BLOCK_SIZE, seed); + if (memcmp(expected_block, actual_block, bytes_to_compare)) { + ksft_print_msg( + "File contents don't match. name: %s block:%d\n", + file->name, block_idx); + error = -2; + goto failure; + } + } + free(filename); + return 0; + +failure: + free(filename); + return error; +} + +static int validate_test_file_content(char *mount_dir, struct test_file *file) +{ + return validate_test_file_content_with_seed(mount_dir, file, 0); +} + +static int data_producer(char *mount_dir, struct test_files_set *test_set) +{ + int ret = 0; + int timeout_ms = 1000; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int fd = open_commands_file(mount_dir); + + if (fd < 0) + return -errno; + + while ((ret = wait_for_pending_reads(fd, timeout_ms, prs, prs_size)) > + 0) { + int read_count = ret; + int i; + + for (i = 0; i < read_count; i++) { + int j = 0; + struct test_file *file = NULL; + + for (j = 0; j < test_set->files_count; j++) { + bool same = same_id(&(test_set->files[j].id), + &(prs[i].file_id)); + + if (same) { + file = &test_set->files[j]; + break; + } + } + if (!file) { + ksft_print_msg( + "Unknown file in pending reads.\n"); + break; + } + + ret = emit_test_block(mount_dir, file, + prs[i].block_index); + if (ret < 0) { + ksft_print_msg("Emitting test data error: %s\n", + strerror(-ret)); + break; + } + } + } + close(fd); + return ret; +} + +static int build_mtree(struct test_file *file) +{ + char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + int block_count = 0; + int hash_block_count = 0; + int total_tree_block_count = 0; + int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; + int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; + int levels_count = 0; + char data_to_sign[256] = {}; + int sig_data_size; + int i, level; + + if (file->size == 0) + return 0; + + block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + hash_block_count = block_count; + for (i = 0; hash_block_count > 1; i++) { + hash_block_count = (hash_block_count + hash_per_block - 1) + / hash_per_block; + tree_lvl_count[i] = hash_block_count; + total_tree_block_count += hash_block_count; + } + levels_count = i; + + for (i = 0; i < levels_count; i++) { + int prev_lvl_base = (i == 0) ? total_tree_block_count : + tree_lvl_index[i - 1]; + + tree_lvl_index[i] = prev_lvl_base - tree_lvl_count[i]; + } + + file->mtree_block_count = total_tree_block_count; + if (block_count == 1) { + int seed = get_file_block_seed(file->index, 0); + + rnd_buf((uint8_t *)data, file->size, seed); + sha256(data, file->size, file->root_hash); + return 0; + } + + file->mtree = calloc(total_tree_block_count, sizeof(*file->mtree)); + /* Build level 0 hashes. */ + for (i = 0; i < block_count; i++) { + off_t offset = i * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = INCFS_DATA_FILE_BLOCK_SIZE; + int block_index = tree_lvl_index[0] + + i / hash_per_block; + int block_off = (i % hash_per_block) * digest_size; + int seed = get_file_block_seed(file->index, i); + char *hash_ptr = file->mtree[block_index].data + block_off; + + if (file->size - offset < block_size) + block_size = file->size - offset; + + rnd_buf((uint8_t *)data, block_size, seed); + sha256(data, block_size, hash_ptr); + } + + /* Build higher levels of hash tree. */ + for (level = 1; level < levels_count; level++) { + int prev_lvl_base = tree_lvl_index[level - 1]; + int prev_lvl_count = tree_lvl_count[level - 1]; + + for (i = 0; i < prev_lvl_count; i++) { + int block_index = + i / hash_per_block + tree_lvl_index[level]; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = + file->mtree[block_index].data + block_off; + + sha256(file->mtree[i + prev_lvl_base].data, + INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + } + + /* Calculate root hash from the top block */ + sha256(file->mtree[0].data, + INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + + /* Calculating digital signature */ + snprintf(file->sig.add_data, sizeof(file->sig.add_data), "%ld", + file->size); + memcpy(data_to_sign, file->root_hash, SHA256_DIGEST_SIZE); + memcpy(data_to_sign + SHA256_DIGEST_SIZE, file->sig.add_data, + strlen(file->sig.add_data)); + sig_data_size = SHA256_DIGEST_SIZE + strlen(file->sig.add_data); + if (!sign_pkcs7(data_to_sign, sig_data_size, private_key, x509_cert, + &file->sig.data, &file->sig.size)) { + ksft_print_msg("Signing failed.\n"); + return -EINVAL; + } + + return 0; +} + +static int load_hash_tree(const char *mount_dir, struct test_file *file) +{ + int err; + int i; + int fd; + + size_t blocks_size = + file->mtree_block_count * sizeof(struct incfs_new_data_block); + struct incfs_new_data_block *blocks = NULL; + char *file_path; + + if (blocks_size == 0) + return 0; + + blocks = malloc(blocks_size); + if (!blocks) + return -ENOMEM; + + for (i = 0; i < file->mtree_block_count; i++) { + blocks[i] = (struct incfs_new_data_block){ + .block_index = i, + .data_len = INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(file->mtree[i].data), + .flags = INCFS_BLOCK_FLAGS_HASH + }; + } + + file_path = concat_file_name(mount_dir, file->name); + fd = open(file_path, O_RDWR); + free(file_path); + if (fd < 0) { + err = errno; + goto failure; + } + + err = write(fd, blocks, blocks_size); + close(fd); + + if (err < blocks_size) + err = errno; + else { + err = 0; + free(file->mtree); + } + +failure: + free(blocks); + return err; +} + +static int cant_touch_index_test(char *mount_dir) +{ + char *file_name = "test_file"; + int file_size = 123; + incfs_uuid_t file_id; + char *index_path = concat_file_name(mount_dir, ".index"); + char *subdir = concat_file_name(index_path, "subdir"); + char *dst_name = concat_file_name(mount_dir, "something"); + char *filename_in_index = NULL; + char *file_path = concat_file_name(mount_dir, file_name); + char *backing_dir; + int cmd_fd = -1; + int err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + + err = mkdir(subdir, 0777); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to crate subdir in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, ".index", file_name, &file_id, + file_size, NULL); + if (err != -EBUSY) { + print_error("Shouldn't be able to crate a file in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, NULL, file_name, &file_id, + file_size, NULL); + if (err < 0) + goto failure; + filename_in_index = get_index_filename(mount_dir, file_id); + + err = unlink(filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be delete from index\n"); + goto failure; + } + + + err = rename(filename_in_index, dst_name); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to move from index\n"); + goto failure; + } + + free(filename_in_index); + filename_in_index = concat_file_name(index_path, "abc"); + err = link(file_path, filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to link inside index\n"); + goto failure; + } + + close(cmd_fd); + free(subdir); + free(index_path); + free(dst_name); + free(filename_in_index); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + free(subdir); + free(dst_name); + free(index_path); + free(filename_in_index); + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) +{ + struct expected_name { + const char *name; + bool root_only; + bool found; + } names[] = { + {INCFS_LOG_FILENAME, true, false}, + {INCFS_PENDING_READS_FILENAME, true, false}, + {".index", true, false}, + {"..", false, false}, + {".", false, false}, + }; + + bool pass = true, found; + int i; + + /* Test directory iteration */ + int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY); + + if (fd < 0) { + print_error("Can't open directory\n"); + return false; + } + + for (;;) { + /* Enough space for one dirent - no name over 30 */ + char buf[sizeof(struct linux_dirent64) + NAME_MAX]; + struct linux_dirent64 *dirent = (struct linux_dirent64 *) buf; + int nread; + int i; + + for (i = 0; i < NAME_MAX; ++i) { + nread = syscall(__NR_getdents64, fd, buf, + sizeof(struct linux_dirent64) + i); + + if (nread >= 0) + break; + if (errno != EINVAL) + break; + } + + if (nread == 0) + break; + if (nread < 0) { + print_error("Error iterating directory\n"); + pass = false; + goto failure; + } + + /* Expected size is rounded up to 8 byte boundary. Not sure if + * this is universal truth or just happenstance, but useful test + * for the moment + */ + if (nread != (((sizeof(struct linux_dirent64) + + strlen(dirent->d_name) + 1) + 7) & ~7)) { + print_error("Wrong dirent size"); + pass = false; + goto failure; + } + + found = false; + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) + if (!strcmp(dirent->d_name, names[i].name)) { + if (names[i].root_only && !root) { + print_error("Root file error"); + pass = false; + goto failure; + } + + if (names[i].found) { + print_error("File appears twice"); + pass = false; + goto failure; + } + + names[i].found = true; + found = true; + break; + } + + if (!found) + --file_count; + } + + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) { + if (!names[i].found) + if (root || !names[i].root_only) { + print_error("Expected file not present"); + pass = false; + goto failure; + } + } + + if (file_count) { + print_error("Wrong number of files\n"); + pass = false; + goto failure; + } + +failure: + close(fd); + return pass; +} + +static int basic_file_ops_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + char *subdir1 = concat_file_name(mount_dir, "subdir1"); + char *subdir2 = concat_file_name(mount_dir, "subdir2"); + char *backing_dir; + int cmd_fd = -1; + int i, err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + err = mkdir(subdir1, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir1\n"); + goto failure; + } + + err = mkdir(subdir2, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir2\n"); + goto failure; + } + + /* Create all test files in subdir1 directory */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + loff_t size; + char *file_path = concat_file_name(subdir1, file->name); + + err = emit_file(cmd_fd, "subdir1", file->name, &file->id, + file->size, NULL); + if (err < 0) + goto failure; + + size = get_file_size(file_path); + free(file_path); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + } + + if (!iterate_directory(subdir1, false, file_num)) + goto failure; + + /* Link the files to subdir2 */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir1, file->name); + char *dst_name = concat_file_name(subdir2, file->name); + loff_t size; + + err = link(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* Move the files from subdir2 to the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir2, file->name); + char *dst_name = concat_file_name(mount_dir, file->name); + loff_t size; + + err = rename(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* +2 because there are 2 subdirs */ + if (!iterate_directory(mount_dir, true, file_num + 2)) + goto failure; + + /* Open and close all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(path, O_RDWR); + free(path); + if (fd <= 0) { + print_error("Can't open file"); + goto failure; + } + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + } + + /* Delete all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + + err = unlink(path); + free(path); + if (err < 0) { + print_error("Can't unlink file"); + goto failure; + } + } + + err = delete_dir_tree(subdir1); + if (err) { + ksft_print_msg("Error deleting subdir1 %d", err); + goto failure; + } + + err = rmdir(subdir2); + if (err) { + print_error("Error deleting subdir2"); + goto failure; + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int dynamic_files_and_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int missing_file_idx = 5; + int cmd_fd = -1; + char *backing_dir; + int i; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Check that test files don't exist in the filesystem. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg( + "File %s somehow already exists in a clean FS.\n", + filename); + goto failure; + } + free(filename); + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res < 0) { + ksft_print_msg("Error %s emiting file %s.\n", + strerror(-res), file->name); + goto failure; + } + + /* Skip writing data to one file so we can check */ + /* that it's missing later. */ + if (i == missing_file_idx) + continue; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + + res = emit_test_file_data(mount_dir, file); + if (res) { + ksft_print_msg("Error %s emiting data for %s.\n", + strerror(-res), file->name); + goto failure; + } + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == missing_file_idx) { + /* No data has been written to this file. */ + /* Check for read error; */ + uint8_t buf; + char *filename = + concat_file_name(mount_dir, file->name); + int res = read_test_file(&buf, 1, filename, 0); + + free(filename); + if (res > 0) { + ksft_print_msg( + "Data present, even though never writtern.\n"); + goto failure; + } + if (res != -ETIME) { + ksft_print_msg("Wrong error code: %d.\n", res); + goto failure; + } + } else { + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int concurrent_reads_and_writes_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + /* Validate each file from that many child processes. */ + const int child_multiplier = 3; + int cmd_fd = -1; + char *backing_dir; + int status; + int i; + pid_t producer_pid; + pid_t *child_pids = alloca(child_multiplier * file_num * sizeof(pid_t)); + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res) + goto failure; + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content_with_seed( + mount_dir, file, i); + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + producer_pid = flush_and_fork(); + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else { + status = wait_for_process(producer_pid); + if (status != 0) { + ksft_print_msg("Data produces failed. %d(%s) ", status, + strerror(status)); + goto failure; + } + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + + status = wait_for_process(child_pids[i]); + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + /* Check that there are no pending reads left */ + { + struct incfs_pending_read_info prs[1] = {}; + int timeout = 0; + int read_count = wait_for_pending_reads(cmd_fd, timeout, prs, + ARRAY_SIZE(prs)); + + if (read_count) { + ksft_print_msg( + "Pending reads pending when all data written\n"); + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int work_after_remount_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int file_num_stage1 = file_num / 2; + const int file_num_stage2 = file_num; + char *backing_dir = NULL; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write first half of the data into the command file. (stage 1) */ + for (i = 0; i < file_num_stage1; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } +} + + /* Unmount and mount again, to see that data is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write the second half of the data into the command file. (stage 2) */ + for (; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + int res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + + if (res) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Delete all files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + char *filename_in_index = get_index_filename(mount_dir, + file->id); + + if (access(filename, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", + filename_in_index); + goto failure; + } + + unlink(filename); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still present.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is still present.\n", + filename_in_index); + goto failure; + } + free(filename); + free(filename_in_index); + } + + /* Unmount and mount again, to see that deleted files stay deleted. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate all deleted files are still deleted. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still visible.\n", filename); + goto failure; + } + free(filename); + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int attribute_test(char *mount_dir) +{ + char file_attr[] = "metadata123123"; + char attr_buf[INCFS_MAX_FILE_ATTR_SIZE] = {}; + int cmd_fd = -1; + incfs_uuid_t file_id; + int attr_res = 0; + char *backing_dir; + + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + if (emit_file(cmd_fd, NULL, "file", &file_id, 12, file_attr)) + goto failure; + + /* Test attribute values */ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get file attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Unmount and mount again, to see that attributes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Test attribute values again after remount*/ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get dir attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int child_procs_waiting_for_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int cmd_fd = -1; + int i; + pid_t *child_pids = alloca(file_num * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content(mount_dir, file); + + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int status = wait_for_process(child_pids[i]); + + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int multiple_providers_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int producer_count = 5; + int cmd_fd = -1; + int status; + int i; + pid_t *producer_pids = alloca(producer_count * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL) < 0) + goto failure; + } + + /* Start producer processes */ + for (i = 0; i < producer_count; i++) { + pid_t producer_pid = flush_and_fork(); + + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else if (producer_pid > 0) { + producer_pids[i] = producer_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Validate FS content */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + loff_t read_result = read_whole_file(filename); + + free(filename); + if (read_result != file->size) { + ksft_print_msg( + "Error validating file %s. Result: %ld\n", + file->name, read_result); + goto failure; + } + } + + /* Check that all producers has finished with 0 exit status */ + for (i = 0; i < producer_count; i++) { + status = wait_for_process(producer_pids[i]); + if (status != 0) { + ksft_print_msg("Producer %d failed with code (%s)\n", i, + strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int signature_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + unsigned char sig_buf[INCFS_MAX_SIGNATURE_SIZE]; + char *backing_dir; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); + + if (res) { + ksft_print_msg("Emit failed for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Unmount and mount again, to make sure the signature is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int sig_len; + char *path; + int fd; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + + path = concat_file_name(mount_dir, file->name); + fd = open(path, O_RDWR); + free(path); + if (fd < 0) { + print_error("Can't open file"); + goto failure; + } + + sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); + + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + + if (sig_len < 0) { + ksft_print_msg("Can't load signature %s. error: %s\n", + file->name, strerror(-sig_len)); + goto failure; + } + if (sig_len != file->sig.size || + memcmp(sig_buf, file->sig.data, sig_len)) { + ksft_print_msg("Signature mismatch %s.\n", + file->name); + goto failure; + } + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int hash_tree_test(char *mount_dir) +{ + char *backing_dir; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int corrupted_file_idx = 5; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.data, file->sig.size, file->sig.add_data); + + if (i == corrupted_file_idx) { + /* Corrupt third blocks hash */ + file->mtree[0].data[2 * SHA256_DIGEST_SIZE] ^= 0xff; + } + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed1. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Unmount and mount again, to that hashes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed2. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int res; + int read_count; + int i; + char *filename = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(filename, O_RDONLY); + free(filename); + if (fd <= 0) + return TEST_FAILURE; + + if (block_cnt > prs_size) + block_cnt = prs_size; + + for (i = 0; i < block_cnt; i++) { + res = pread(fd, data, sizeof(data), + INCFS_DATA_FILE_BLOCK_SIZE * i); + if (res <= 0) + goto failure; + } + + read_count = wait_for_pending_reads(log_fd, 0, prs, prs_size); + if (read_count < 0) { + ksft_print_msg("Error reading logged reads %s.\n", + strerror(-read_count)); + goto failure; + } + + if (read_count != block_cnt) { + ksft_print_msg("Bad log read count %s %d %d.\n", file->name, + read_count, block_cnt); + goto failure; + } + + for (i = 0; i < read_count; i++) { + struct incfs_pending_read_info *read = &prs[i]; + + if (!same_id(&read->file_id, &file->id)) { + ksft_print_msg("Bad log read ino %s\n", file->name); + goto failure; + } + + if (read->block_index != i) { + ksft_print_msg("Bad log read ino %s %d %d.\n", + file->name, read->block_index, i); + goto failure; + } + + if (i != 0) { + unsigned long psn = prs[i - 1].serial_number; + + if (read->serial_number != psn + 1) { + ksft_print_msg("Bad log read sn %s %d %d.\n", + file->name, read->serial_number, + psn); + goto failure; + } + } + + if (read->timestamp_us == 0) { + ksft_print_msg("Bad log read timestamp %s.\n", + file->name); + goto failure; + } + } + close(fd); + return TEST_SUCCESS; + +failure: + close(fd); + return TEST_FAILURE; +} + +static int read_log_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + int cmd_fd = -1, log_fd = -1; + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Write data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Unmount and mount again, to see that logs work after remount. */ + close(cmd_fd); + close(log_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + close(log_fd); + free(backing_dir); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + close(log_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static char *setup_mount_dir() +{ + struct stat st; + char *current_dir = getcwd(NULL, 0); + char *mount_dir = concat_file_name(current_dir, "incfs-mount-dir"); + + free(current_dir); + if (stat(mount_dir, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return mount_dir; + + ksft_print_msg("%s is a file, not a dir.\n", mount_dir); + return NULL; + } + + if (mkdir(mount_dir, 0777)) { + print_error("Can't create mount dir."); + return NULL; + } + + return mount_dir; +} + +int main(int argc, char *argv[]) +{ + char *mount_dir = NULL; + int fails = 0; + int i; + int fd, count; + + // Seed randomness pool for testing on QEMU + // NOTE - this abuses the concept of randomness - do *not* ever do this + // on a machine for production use - the device will think it has good + // randomness when it does not. + fd = open("/dev/urandom", O_WRONLY); + count = 4096; + for (int i = 0; i < 128; ++i) + ioctl(fd, RNDADDTOENTCNT, &count); + close(fd); + + ksft_print_header(); + + if (geteuid() != 0) + ksft_print_msg("Not a root, might fail to mount.\n"); + + mount_dir = setup_mount_dir(); + if (mount_dir == NULL) + ksft_exit_fail_msg("Can't create a mount dir\n"); + +#define MAKE_TEST(test) \ + { \ + test, #test \ + } + struct { + int (*pfunc)(char *dir); + const char *name; + } cases[] = { + MAKE_TEST(basic_file_ops_test), + MAKE_TEST(cant_touch_index_test), + MAKE_TEST(dynamic_files_and_data_test), + MAKE_TEST(concurrent_reads_and_writes_test), + MAKE_TEST(attribute_test), + MAKE_TEST(work_after_remount_test), + MAKE_TEST(child_procs_waiting_for_data_test), + MAKE_TEST(multiple_providers_test), + MAKE_TEST(signature_test), + MAKE_TEST(hash_tree_test), + MAKE_TEST(read_log_test), + }; +#undef MAKE_TEST + + ksft_set_plan(ARRAY_SIZE(cases)); + for (i = 0; i < ARRAY_SIZE(cases); ++i) { + ksft_print_msg("Running %s\n", cases[i].name); + if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", cases[i].name); + else { + ksft_test_result_fail("%s\n", cases[i].name); + fails++; + } + } + + umount2(mount_dir, MNT_FORCE); + rmdir(mount_dir); + + if (fails > 0) + ksft_exit_pass(); + else + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c new file mode 100644 index 000000000000..08b8452ad0bc --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) +{ + static const char fs_name[] = INCFS_NAME; + char mount_options[512]; + int result; + + snprintf(mount_options, ARRAY_SIZE(mount_options), + "read_timeout_ms=%u", + read_timeout_ms); + + result = mount(backing_dir, mount_dir, fs_name, 0, mount_options); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) +{ + static const char fs_name[] = INCFS_NAME; + int result; + + result = mount(backing_dir, mount_dir, fs_name, 0, opt); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +int unlink_node(int fd, int parent_ino, char *filename) +{ + return 0; +} + + +static EVP_PKEY *deserialize_private_key(const char *pem_key) +{ + BIO *bio = NULL; + EVP_PKEY *pkey = NULL; + int len = strlen(pem_key); + + bio = BIO_new_mem_buf(pem_key, len); + if (!bio) + return NULL; + + pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL); + BIO_free(bio); + return pkey; +} + +static X509 *deserialize_cert(const char *pem_cert) +{ + BIO *bio = NULL; + X509 *cert = NULL; + int len = strlen(pem_cert); + + bio = BIO_new_mem_buf(pem_cert, len); + if (!bio) + return NULL; + + cert = PEM_read_bio_X509(bio, NULL, NULL, NULL); + BIO_free(bio); + return cert; +} + +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret) +{ + /* + * PKCS#7 signing flags: + * + * - PKCS7_BINARY signing binary data, so skip MIME translation + * + * - PKCS7_NOATTR omit extra authenticated attributes, such as + * SMIMECapabilities + * + * - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then + * PKCS7_sign_add_signer() can add a signer later. + * This is necessary to change the message digest + * algorithm from the default of SHA-1. Requires + * OpenSSL 1.0.0 or later. + */ + int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL; + void *sig; + size_t sig_size; + BIO *bio = NULL; + PKCS7 *p7 = NULL; + EVP_PKEY *pkey = NULL; + X509 *cert = NULL; + bool ok = false; + + const EVP_MD *md = EVP_sha256(); + + pkey = deserialize_private_key(pkey_pem); + if (!pkey) { + printf("deserialize_private_key failed\n"); + goto out; + } + + cert = deserialize_cert(cert_pem); + if (!cert) { + printf("deserialize_cert failed\n"); + goto out; + } + + bio = BIO_new_mem_buf(data_to_sign, data_size); + if (!bio) + goto out; + + p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags); + if (!p7) { + printf("failed to initialize PKCS#7 signature object\n"); + goto out; + } + + if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) { + printf("failed to add signer to PKCS#7 signature object\n"); + goto out; + } + + if (PKCS7_final(p7, bio, pkcs7_flags) != 1) { + printf("failed to finalize PKCS#7 signature\n"); + goto out; + } + + BIO_free(bio); + bio = BIO_new(BIO_s_mem()); + if (!bio) { + printf("out of memory\n"); + goto out; + } + + if (i2d_PKCS7_bio(bio, p7) != 1) { + printf("failed to DER-encode PKCS#7 signature object\n"); + goto out; + } + + sig_size = BIO_get_mem_data(bio, &sig); + *sig_ret = malloc(sig_size); + memcpy(*sig_ret, sig, sig_size); + *sig_size_ret = sig_size; + ok = true; +out: + PKCS7_free(p7); + BIO_free(bio); + return ok; +} + +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data) +{ + int mode = __S_IFREG | 0555; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = root_hash + ? INCFS_HASH_TREE_SHA256 + : 0, + .root_hash = ptr_to_u64(root_hash), + .additional_data = ptr_to_u64(add_data), + .additional_data_size = strlen(add_data), + .signature = ptr_to_u64(sig), + .signature_size = sig_size, + }; + + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), + .file_attr = 0, + .file_attr_len = 0 + }; + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; + + *id_out = args.file_id; + return 0; +} + + +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr) +{ + int mode = __S_IFREG | 0555; + struct incfs_file_signature_info sig_info = { + .hash_tree_alg = 0, + .root_hash = ptr_to_u64(NULL) + }; + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(&sig_info), + .file_attr = ptr_to_u64(attr), + .file_attr_len = attr ? strlen(attr) : 0 + }; + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; + + *id_out = args.file_id; + return 0; +} + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size) +{ + return 0; +} + +int get_file_signature(int fd, unsigned char *buf, int buf_size) +{ + struct incfs_get_file_sig_args args = { + .file_signature = ptr_to_u64(buf), + .file_signature_buf_size = buf_size + }; + + if (ioctl(fd, INCFS_IOC_READ_FILE_SIGNATURE, &args) == 0) + return args.file_signature_len_out; + return -errno; +} + +loff_t get_file_size(char *name) +{ + struct stat st; + + if (stat(name, &st) == 0) + return st.st_size; + return -ENOENT; +} + +int open_commands_file(char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), + "%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME); + cmd_fd = open(cmd_file, O_RDONLY); + + if (cmd_fd < 0) + perror("Can't open commands file"); + return cmd_fd; +} + +int open_log_file(char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); + cmd_fd = open(cmd_file, O_RDWR); + if (cmd_fd < 0) + perror("Can't open log file"); + return cmd_fd; +} + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count) +{ + ssize_t read_res = 0; + + if (timeout_ms > 0) { + int poll_res = 0; + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN + }; + + poll_res = poll(&pollfd, 1, timeout_ms); + if (poll_res < 0) + return -errno; + if (poll_res == 0) + return 0; + if (!(pollfd.revents | POLLIN)) + return 0; + } + + read_res = read(fd, prs, prs_count * sizeof(*prs)); + if (read_res < 0) + return -errno; + + return read_res / sizeof(*prs); +} + +char *concat_file_name(const char *dir, char *file) +{ + char full_name[FILENAME_MAX] = ""; + + if (snprintf(full_name, ARRAY_SIZE(full_name), "%s/%s", dir, file) < 0) + return NULL; + return strdup(full_name); +} + +int delete_dir_tree(const char *dir_path) +{ + DIR *dir = NULL; + struct dirent *dp; + int result = 0; + + dir = opendir(dir_path); + if (!dir) { + result = -errno; + goto out; + } + + while ((dp = readdir(dir))) { + char *full_path; + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + full_path = concat_file_name(dir_path, dp->d_name); + if (dp->d_type == DT_DIR) + result = delete_dir_tree(full_path); + else + result = unlink(full_path); + free(full_path); + if (result) + goto out; + } + +out: + if (dir) + closedir(dir); + if (!result) + rmdir(dir_path); + return result; +} + +void sha256(char *data, size_t dsize, char *hash) +{ + SHA256_CTX ctx; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, data, dsize); + SHA256_Final((unsigned char *)hash, &ctx); +} + +void md5(char *data, size_t dsize, char *hash) +{ + MD5_CTX ctx; + + MD5_Init(&ctx); + MD5_Update(&ctx, data, dsize); + MD5_Final((unsigned char *)hash, &ctx); +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h new file mode 100644 index 000000000000..9c9ba3c5f70a --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#include +#include + +#include "../../include/uapi/linux/incrementalfs.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +#ifdef __LP64__ +#define ptr_to_u64(p) ((__u64)p) +#else +#define ptr_to_u64(p) ((__u64)(__u32)p) +#endif + +#define SHA256_DIGEST_SIZE 32 + +int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms); + +int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt); + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); + +int get_file_signature(int fd, unsigned char *buf, int buf_size); + +int emit_node(int fd, char *filename, int *ino_out, int parent_ino, + size_t size, mode_t mode, char *attr); + +int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, char *attr); + +int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, + size_t size, const char *root_hash, char *sig, size_t sig_size, + char *add_data); + +int unlink_node(int fd, int parent_ino, char *filename); + +loff_t get_file_size(char *name); + +int open_commands_file(char *mount_dir); + +int open_log_file(char *mount_dir); + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count); + +char *concat_file_name(const char *dir, char *file); + +void sha256(char *data, size_t dsize, char *hash); + +void md5(char *data, size_t dsize, char *hash); + +bool sign_pkcs7(const void *data_to_sign, size_t data_size, + char *pkey_pem, char *cert_pem, + void **sig_ret, size_t *sig_size_ret); + +int delete_dir_tree(const char *path); From aa40e0ce6db23acbc1bfcca0f09a64758b512a74 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 27 Jan 2020 14:48:38 -0800 Subject: [PATCH 2799/3715] ANDROID: Make incfs selftests pass Fixed incfs_test build errors Fixed Kconfig errors Readded .gitignore Test: With just enabling CONFIG_INCREMENTAL_FS, both defconfig and cuttlefish_defconfig build and incfs_test runs and passes Bug: 133435829 Change-Id: Id3247ffcc63a095f66dcedf554017a06c5a9ce4a Signed-off-by: Paul Lawrence --- fs/incfs/Kconfig | 1 + tools/testing/selftests/filesystems/incfs/.gitignore | 1 + tools/testing/selftests/filesystems/incfs/Makefile | 2 ++ tools/testing/selftests/filesystems/incfs/incfs_test.c | 4 +++- 4 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/filesystems/incfs/.gitignore diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig index d860c07664c3..a655d599ea46 100644 --- a/fs/incfs/Kconfig +++ b/fs/incfs/Kconfig @@ -4,6 +4,7 @@ config INCREMENTAL_FS select DECOMPRESS_LZ4 select CRC32 select CRYPTO + select CRYPTO_RSA select CRYPTO_SHA256 select X509_CERTIFICATE_PARSER select ASYMMETRIC_KEY_TYPE diff --git a/tools/testing/selftests/filesystems/incfs/.gitignore b/tools/testing/selftests/filesystems/incfs/.gitignore new file mode 100644 index 000000000000..4cba9c219a92 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/.gitignore @@ -0,0 +1 @@ +incfs_test \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 7cff78cf5131..1f13573d3617 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -5,6 +5,8 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../lib EXTRA_SOURCES := utils.c +CFLAGS += $(EXTRA_SOURCES) + TEST_GEN_PROGS := incfs_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index f1e9f86605e7..dd70e019dc4c 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2398,7 +2398,9 @@ int main(int argc, char *argv[]) }; #undef MAKE_TEST - ksft_set_plan(ARRAY_SIZE(cases)); + /* Bring back for kernel 5.x */ + /* ksft_set_plan(ARRAY_SIZE(cases)); */ + for (i = 0; i < ARRAY_SIZE(cases); ++i) { ksft_print_msg("Running %s\n", cases[i].name); if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) From 4a1f984742c7b4ebfd2d77f48bf24b4ffcb069fd Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 28 Jan 2020 14:21:12 -0800 Subject: [PATCH 2800/3715] ANDROID: Fixing incremental fs style issues Removed WARN_ONs Removed compatibilty code Fixed tab issue Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: I8a9e9ead48a65fd09c2d01d22f65d9a352f118e2 --- fs/incfs/compat.h | 33 --------------------------------- fs/incfs/data_mgmt.c | 14 ++++---------- fs/incfs/format.c | 9 --------- fs/incfs/integrity.c | 4 ---- fs/incfs/vfs.c | 28 +++------------------------- 5 files changed, 7 insertions(+), 81 deletions(-) delete mode 100644 fs/incfs/compat.h diff --git a/fs/incfs/compat.h b/fs/incfs/compat.h deleted file mode 100644 index f6fd9b2b3cb2..000000000000 --- a/fs/incfs/compat.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 2019 Google LLC - */ -#ifndef _INCFS_COMPAT_H -#define _INCFS_COMPAT_H - -#include -#include - -typedef unsigned int __poll_t; - -#ifndef u64_to_user_ptr -#define u64_to_user_ptr(x) ( \ -{ \ - typecheck(u64, x); \ - (void __user *)(uintptr_t)x; \ -} \ -) -#endif - -#ifndef lru_to_page -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) -#endif - -#define readahead_gfp_mask(x) \ - (mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN) - -#ifndef SB_ACTIVE -#define SB_ACTIVE MS_ACTIVE -#endif - -#endif /* _INCFS_COMPAT_H */ diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 25ea1099946d..109329e0a180 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -456,13 +456,9 @@ static struct pending_read *add_pending_read(struct data_file *df, struct data_file_segment *segment = NULL; struct mount_info *mi = NULL; - WARN_ON(!df); segment = get_file_segment(df, block_index); mi = df->df_mount_info; - WARN_ON(!segment); - WARN_ON(!mi); - result = kzalloc(sizeof(*result), GFP_NOFS); if (!result) return NULL; @@ -545,8 +541,6 @@ static int wait_for_data_block(struct data_file *df, int block_index, return -ENODATA; segment = get_file_segment(df, block_index); - WARN_ON(!segment); - error = mutex_lock_interruptible(&segment->blockmap_mutex); if (error) return error; @@ -596,10 +590,10 @@ static int wait_for_data_block(struct data_file *df, int block_index, return -ETIME; } if (wait_res < 0) { - /* - * Only ERESTARTSYS is really expected here when a signal - * comes while we wait. - */ + /* + * Only ERESTARTSYS is really expected here when a signal + * comes while we wait. + */ return wait_res; } diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 27498b9c3d34..247e1b4ec563 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -12,7 +12,6 @@ #include #include -#include "compat.h" #include "format.h" struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) @@ -679,18 +678,10 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) - return kernel_read(f, pos, (char *)buf, size); -#else return kernel_read(f, buf, size, &pos); -#endif } ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) - return kernel_write(f, buf, size, pos); -#else return kernel_write(f, buf, size, &pos); -#endif } diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index c6444e73e4d8..feb212c38945 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -26,11 +26,7 @@ int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, return PTR_ERR(pkcs7); } -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) - err = pkcs7_get_content_data(pkcs7, &data, &data_len, false); -#else err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); -#endif if (err || data_len == 0 || data == NULL) { pr_debug("PKCS#7 message does not contain data\n"); err = -EBADMSG; diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 41efd70af8e1..0c2f23e5ca55 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -18,7 +18,6 @@ #include -#include "compat.h" #include "data_mgmt.h" #include "format.h" #include "integrity.h" @@ -31,6 +30,9 @@ #define READ_EXEC_FILE_MODE 0555 #define READ_WRITE_FILE_MODE 0666 +/* Needed for kernel 4.14 - remove for later kernels */ +typedef unsigned int __poll_t; + static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); static int dentry_revalidate(struct dentry *dentry, unsigned int flags); @@ -85,16 +87,12 @@ static const struct super_operations incfs_super_ops = { .show_options = show_options }; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) -#define dir_rename_wrap dir_rename -#else static int dir_rename_wrap(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return dir_rename(old_dir, old_dentry, new_dir, new_dentry); } -#endif static const struct inode_operations incfs_dir_inode_ops = { .lookup = dir_lookup, @@ -157,17 +155,6 @@ static const struct file_operations incfs_log_file_ops = { .compat_ioctl = dispatch_ioctl }; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0) - -static const struct inode_operations incfs_file_inode_ops = { - .setattr = simple_setattr, - .getattr = simple_getattr, - .getxattr = incfs_getxattr, - .listxattr = incfs_listxattr -}; - -#else - static const struct inode_operations incfs_file_inode_ops = { .setattr = simple_setattr, .getattr = simple_getattr, @@ -191,9 +178,6 @@ const struct xattr_handler *incfs_xattr_ops[] = { NULL, }; - -#endif - /* State of an open .pending_reads file, unique for each file descriptor. */ struct pending_reads_state { /* A serial number of the last pending read obtained from this file. */ @@ -1647,12 +1631,8 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) goto out; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - err = vfs_getattr(&backing_path, &stat); -#else err = vfs_getattr(&backing_path, &stat, STATX_NLINK, AT_STATX_SYNC_AS_STAT); -#endif if (err) goto out; @@ -2078,9 +2058,7 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, sb->s_time_gran = 1; sb->s_blocksize = INCFS_DATA_FILE_BLOCK_SIZE; sb->s_blocksize_bits = blksize_bits(sb->s_blocksize); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) sb->s_xattr = incfs_xattr_ops; -#endif BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); From 641dfeb61d45e3c3a841bcaaa5389adc037d733e Mon Sep 17 00:00:00 2001 From: Andrey Shvetsov Date: Thu, 16 Jan 2020 18:22:39 +0100 Subject: [PATCH 2801/3715] UPSTREAM: staging: most: net: fix buffer overflow If the length of the socket buffer is 0xFFFFFFFF (max size for an unsigned int), then payload_len becomes 0xFFFFFFF1 after subtracting 14 (ETH_HLEN). Then, mdp_len is set to payload_len + 16 (MDP_HDR_LEN) which overflows and results in a value of 2. These values for payload_len and mdp_len will pass current buffer size checks. This patch checks if derived from skb->len sum may overflow. The check is based on the following idea: For any `unsigned V1, V2` and derived `unsigned SUM = V1 + V2`, `V1 + V2` overflows iif `SUM < V1`. Bug: 143560807 Reported-by: Greg Kroah-Hartman Signed-off-by: Andrey Shvetsov Cc: stable Link: https://lore.kernel.org/r/20200116172238.6046-1-andrey.shvetsov@microchip.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4d1356ac12f4d5180d0df345d85ff0ee42b89c72) Signed-off-by: Greg Kroah-Hartman Change-Id: I71197b2963735ba181314332737fc0c1ca2cab96 --- drivers/staging/most/aim-network/networking.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/staging/most/aim-network/networking.c b/drivers/staging/most/aim-network/networking.c index 936f013c350e..6398c27563c9 100644 --- a/drivers/staging/most/aim-network/networking.c +++ b/drivers/staging/most/aim-network/networking.c @@ -85,6 +85,11 @@ static int skb_to_mamac(const struct sk_buff *skb, struct mbo *mbo) unsigned int payload_len = skb->len - ETH_HLEN; unsigned int mdp_len = payload_len + MDP_HDR_LEN; + if (mdp_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mdp_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mdp_len); @@ -132,6 +137,11 @@ static int skb_to_mep(const struct sk_buff *skb, struct mbo *mbo) u8 *buff = mbo->virt_address; unsigned int mep_len = skb->len + MEP_HDR_LEN; + if (mep_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mep_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mep_len); From c39c4e9116943faf30fb7fb9cc1e739c732b4443 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Tue, 21 Jan 2020 14:42:58 +0100 Subject: [PATCH 2802/3715] can, slip: Protect tty->disc_data in write_wakeup and close with RCU [ Upstream commit 0ace17d56824165c7f4c68785d6b58971db954dd ] write_wakeup can happen in parallel with close/hangup where tty->disc_data is set to NULL and the netdevice is freed thus also freeing disc_data. write_wakeup accesses disc_data so we must prevent close from freeing the netdev while write_wakeup has a non-NULL view of tty->disc_data. We also need to make sure that accesses to disc_data are atomic. Which can all be done with RCU. This problem was found by Syzkaller on SLCAN, but the same issue is reproducible with the SLIP line discipline using an LTP test based on the Syzkaller reproducer. A fix which didn't use RCU was posted by Hillf Danton. Fixes: 661f7fda21b1 ("slip: Fix deadlock in write_wakeup") Fixes: a8e83b17536a ("slcan: Port write_wakeup deadlock fix from slip") Reported-by: syzbot+017e491ae13c0068598a@syzkaller.appspotmail.com Signed-off-by: Richard Palethorpe Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Tyler Hall Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: syzkaller@googlegroups.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 12 ++++++++++-- drivers/net/slip/slip.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index a42737b4ac79..35564a9561b7 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -343,9 +343,16 @@ static void slcan_transmit(struct work_struct *work) */ static void slcan_write_wakeup(struct tty_struct *tty) { - struct slcan *sl = tty->disc_data; + struct slcan *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } /* Send a can_frame to a TTY queue. */ @@ -640,10 +647,11 @@ static void slcan_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* Flush network side */ diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index d6dc00b4ba55..b07f367abd91 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -452,9 +452,16 @@ static void slip_transmit(struct work_struct *work) */ static void slip_write_wakeup(struct tty_struct *tty) { - struct slip *sl = tty->disc_data; + struct slip *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } static void sl_tx_timeout(struct net_device *dev) @@ -886,10 +893,11 @@ static void slip_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* VSV = very important to remove timers */ From 70a985445d62b014970304080551c3697e7bd00e Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 25 Jan 2020 14:33:29 +0000 Subject: [PATCH 2803/3715] firestream: fix memory leaks [ Upstream commit fa865ba183d61c1ec8cbcab8573159c3b72b89a4 ] In fs_open(), 'vcc' is allocated through kmalloc() and assigned to 'atm_vcc->dev_data.' In the following execution, if an error occurs, e.g., there is no more free channel, an error code EBUSY or ENOMEM will be returned. However, 'vcc' is not deallocated, leading to memory leaks. Note that, in normal cases where fs_open() returns 0, 'vcc' will be deallocated in fs_close(). But, if fs_open() fails, there is no guarantee that fs_close() will be invoked. To fix this issue, deallocate 'vcc' before the error code is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/atm/firestream.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 6b6368a56526..0e449ee11ac7 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -927,6 +927,7 @@ static int fs_open(struct atm_vcc *atm_vcc) } if (!to) { printk ("No more free channels for FS50..\n"); + kfree(vcc); return -EBUSY; } vcc->channo = dev->channo; @@ -937,6 +938,7 @@ static int fs_open(struct atm_vcc *atm_vcc) if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) || ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) { printk ("Channel is in use for FS155.\n"); + kfree(vcc); return -EBUSY; } } @@ -950,6 +952,7 @@ static int fs_open(struct atm_vcc *atm_vcc) tc, sizeof (struct fs_transmit_config)); if (!tc) { fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n"); + kfree(vcc); return -ENOMEM; } From 4f0996db42deebaf7e58dc01a6e197dfa562aa9d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2020 23:17:14 -0800 Subject: [PATCH 2804/3715] gtp: make sure only SOCK_DGRAM UDP sockets are accepted [ Upstream commit 940ba14986657a50c15f694efca1beba31fa568f ] A malicious user could use RAW sockets and fool GTP using them as standard SOCK_DGRAM UDP sockets. BUG: KMSAN: uninit-value in udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] BUG: KMSAN: uninit-value in setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 CPU: 0 PID: 11262 Comm: syz-executor613 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 gtp_encap_enable_socket+0x37f/0x5a0 drivers/net/gtp.c:827 gtp_encap_enable drivers/net/gtp.c:844 [inline] gtp_newlink+0xfb/0x1e50 drivers/net/gtp.c:666 __rtnl_newlink net/core/rtnetlink.c:3305 [inline] rtnl_newlink+0x2973/0x3920 net/core/rtnetlink.c:3363 rtnetlink_rcv_msg+0x1153/0x1570 net/core/rtnetlink.c:5424 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x441359 Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff1cd0ac28 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441359 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000246 R12: 00000000004020d0 R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags+0x3c/0x90 mm/kmsan/kmsan.c:144 kmsan_internal_alloc_meta_for_pages mm/kmsan/kmsan_shadow.c:307 [inline] kmsan_alloc_page+0x12a/0x310 mm/kmsan/kmsan_shadow.c:336 __alloc_pages_nodemask+0x57f2/0x5f60 mm/page_alloc.c:4800 alloc_pages_current+0x67d/0x990 mm/mempolicy.c:2207 alloc_pages include/linux/gfp.h:534 [inline] alloc_slab_page+0x111/0x12f0 mm/slub.c:1511 allocate_slab mm/slub.c:1656 [inline] new_slab+0x2bc/0x1130 mm/slub.c:1722 new_slab_objects mm/slub.c:2473 [inline] ___slab_alloc+0x1533/0x1f30 mm/slub.c:2624 __slab_alloc mm/slub.c:2664 [inline] slab_alloc_node mm/slub.c:2738 [inline] slab_alloc mm/slub.c:2783 [inline] kmem_cache_alloc+0xb23/0xd70 mm/slub.c:2788 sk_prot_alloc+0xf2/0x620 net/core/sock.c:1597 sk_alloc+0xf0/0xbe0 net/core/sock.c:1657 inet_create+0x7c7/0x1370 net/ipv4/af_inet.c:321 __sock_create+0x8eb/0xf00 net/socket.c:1420 sock_create net/socket.c:1471 [inline] __sys_socket+0x1a1/0x600 net/socket.c:1513 __do_sys_socket net/socket.c:1522 [inline] __se_sys_socket+0x8d/0xb0 net/socket.c:1520 __x64_sys_socket+0x4a/0x70 net/socket.c:1520 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Eric Dumazet Cc: Pablo Neira Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 25be27826a22..3840f21dd635 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -807,19 +807,21 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, return NULL; } - if (sock->sk->sk_protocol != IPPROTO_UDP) { + sk = sock->sk; + if (sk->sk_protocol != IPPROTO_UDP || + sk->sk_type != SOCK_DGRAM || + (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { pr_debug("socket fd=%d not UDP\n", fd); sk = ERR_PTR(-EINVAL); goto out_sock; } - lock_sock(sock->sk); - if (sock->sk->sk_user_data) { + lock_sock(sk); + if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); goto out_rel_sock; } - sk = sock->sk; sock_hold(sk); tuncfg.sk_user_data = gtp; From 8dbd5ab8ff84311023712f8aec21937e2d36a527 Mon Sep 17 00:00:00 2001 From: Yuki Taguchi Date: Mon, 20 Jan 2020 13:48:37 +0900 Subject: [PATCH 2805/3715] ipv6: sr: remove SKB_GSO_IPXIP6 on End.D* actions [ Upstream commit 62ebaeaedee7591c257543d040677a60e35c7aec ] After LRO/GRO is applied, SRv6 encapsulated packets have SKB_GSO_IPXIP6 feature flag, and this flag must be removed right after decapulation procedure. Currently, SKB_GSO_IPXIP6 flag is not removed on End.D* actions, which creates inconsistent packet state, that is, a normal TCP/IP packets have the SKB_GSO_IPXIP6 flag. This behavior can cause unexpected fallback to GSO on routing to netdevices that do not support SKB_GSO_IPXIP6. For example, on inter-VRF forwarding, decapsulated packets separated into small packets by GSO because VRF devices do not support TSO for packets with SKB_GSO_IPXIP6 flag, and this degrades forwarding performance. This patch removes encapsulation related GSO flags from the skb right after the End.D* action is applied. Fixes: d7a669dd2f8b ("ipv6: sr: add helper functions for seg6local") Signed-off-by: Yuki Taguchi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_local.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 825b8e01f947..9a01f72d907f 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif @@ -126,7 +127,8 @@ static bool decap_and_validate(struct sk_buff *skb, int proto) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->encapsulation = 0; + if (iptunnel_pull_offloads(skb)) + return false; return true; } From 4f26a3a8f99951d961424b7d241a73d53691dcd1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Jan 2020 20:41:44 +1100 Subject: [PATCH 2806/3715] net: cxgb3_main: Add CAP_NET_ADMIN check to CHELSIO_GET_MEM [ Upstream commit 3546d8f1bbe992488ed91592cf6bf76e7114791a = The cxgb3 driver for "Chelsio T3-based gigabit and 10Gb Ethernet adapters" implements a custom ioctl as SIOCCHIOCTL/SIOCDEVPRIVATE in cxgb_extension_ioctl(). One of the subcommands of the ioctl is CHELSIO_GET_MEM, which appears to read memory directly out of the adapter and return it to userspace. It's not entirely clear what the contents of the adapter memory contains, but the assumption is that it shouldn't be accessible to all users. So add a CAP_NET_ADMIN check to the CHELSIO_GET_MEM case. Put it after the is_offload() check, which matches two of the other subcommands in the same function which also check for is_offload() and CAP_NET_ADMIN. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 338683e5ef1e..b8779afb8550 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2449,6 +2449,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (!is_offload(adapter)) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (!(adapter->flags & FULL_INIT_DONE)) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) From 1cc40dfad03c1dbf4b716d2d1615573964f502ab Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Tue, 21 Jan 2020 21:49:54 +0100 Subject: [PATCH 2807/3715] net, ip6_tunnel: fix namespaces move [ Upstream commit 5311a69aaca30fa849c3cc46fb25f75727fb72d0 ] in the same manner as commit d0f418516022 ("net, ip_tunnel: fix namespaces move"), fix namespace moving as it was broken since commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel"), but for ipv6 this time; there is no reason to keep it for ip6_tunnel. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel") Signed-off-by: William Dauchy Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5bc2788e6ba4..c2644405bab1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1878,10 +1878,8 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); - if (t->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (t->parms.collect_md) netif_keep_dst(dev); - } return 0; } From 426d5d62459db84634490bfeeb8a13dbc266e845 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Tue, 21 Jan 2020 15:26:24 +0100 Subject: [PATCH 2808/3715] net, ip_tunnel: fix namespaces move [ Upstream commit d0f418516022c32ecceaf4275423e5bd3f8743a9 ] in the same manner as commit 690afc165bb3 ("net: ip6_gre: fix moving ip6gre between namespaces"), fix namespace moving as it was broken since commit 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata."). Indeed, the ip6_gre commit removed the local flag for collect_md condition, so there is no reason to keep it for ip_gre/ip_tunnel. this patch will fix both ip_tunnel and ip_gre modules. Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: William Dauchy Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f1784162acc2..404dc765f2bf 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1202,10 +1202,8 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5; - if (tunnel->collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (tunnel->collect_md) netif_keep_dst(dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); From 24ac271a627ff257265bcd061b33b513260018af Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Jan 2020 15:42:02 -0800 Subject: [PATCH 2809/3715] net_sched: fix datalen for ematch [ Upstream commit 61678d28d4a45ef376f5d02a839cc37509ae9281 ] syzbot reported an out-of-bound access in em_nbyte. As initially analyzed by Eric, this is because em_nbyte sets its own em->datalen in em_nbyte_change() other than the one specified by user, but this value gets overwritten later by its caller tcf_em_validate(). We should leave em->datalen untouched to respect their choices. I audit all the in-tree ematch users, all of those implement ->change() set em->datalen, so we can just avoid setting it twice in this case. Reported-and-tested-by: syzbot+5af9a90dad568aa9f611@syzkaller.appspotmail.com Reported-by: syzbot+2f07903a5b05e7f36410@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/ematch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 03b677bc0700..60f2354c1789 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -267,12 +267,12 @@ static int tcf_em_validate(struct tcf_proto *tp, } em->data = (unsigned long) v; } + em->datalen = data_len; } } em->matchid = em_hdr->matchid; em->flags = em_hdr->flags; - em->datalen = data_len; em->net = net; err = 0; From 7ac7cc5e78444a84e5786e822ca6643ad4cd55f7 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Wed, 20 Nov 2019 09:08:16 +0200 Subject: [PATCH 2810/3715] net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject commit b8eb718348b8fb30b5a7d0a8fce26fb3f4ac741b upstream. kobject_init_and_add takes reference even when it fails. This has to be given up by the caller in error handling. Otherwise memory allocated by kobject_init_and_add is never freed. Originally found by Syzkaller: BUG: memory leak unreferenced object 0xffff8880679f8b08 (size 8): comm "netdev_register", pid 269, jiffies 4294693094 (age 12.132s) hex dump (first 8 bytes): 72 78 2d 30 00 36 20 d4 rx-0.6 . backtrace: [<000000008c93818e>] __kmalloc_track_caller+0x16e/0x290 [<000000001f2e4e49>] kvasprintf+0xb1/0x140 [<000000007f313394>] kvasprintf_const+0x56/0x160 [<00000000aeca11c8>] kobject_set_name_vargs+0x5b/0x140 [<0000000073a0367c>] kobject_init_and_add+0xd8/0x170 [<0000000088838e4b>] net_rx_queue_update_kobjects+0x152/0x560 [<000000006be5f104>] netdev_register_kobject+0x210/0x380 [<00000000e31dab9d>] register_netdevice+0xa1b/0xf00 [<00000000f68b2465>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000004c50599f>] tun_chr_ioctl+0x2f/0x40 [<00000000bbd4c317>] do_vfs_ioctl+0x1c7/0x1510 [<00000000d4c59e8f>] ksys_ioctl+0x99/0xb0 [<00000000946aea81>] __x64_sys_ioctl+0x78/0xb0 [<0000000038d946e5>] do_syscall_64+0x16f/0x580 [<00000000e0aa5d8f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000285b3d1a>] 0xffffffffffffffff Cc: David Miller Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index dee57c5ff738..3f015e736fa4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -915,21 +915,23 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) - return error; + goto err; dev_hold(queue->dev); if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; } kobject_uevent(kobj, KOBJ_ADD); return error; + +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */ @@ -1326,21 +1328,21 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) - return error; + goto err; dev_hold(queue->dev); #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; #endif kobject_uevent(kobj, KOBJ_ADD); - return 0; +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */ From 5f36336849edd9c3294adc4f93141c0261b98034 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Nov 2019 19:19:07 -0800 Subject: [PATCH 2811/3715] net-sysfs: fix netdev_queue_add_kobject() breakage commit 48a322b6f9965b2f1e4ce81af972f0e287b07ed0 upstream. kobject_put() should only be called in error path. Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Signed-off-by: Eric Dumazet Cc: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3f015e736fa4..3a22fa4ec7ff 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1339,6 +1339,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); + return 0; err: kobject_put(kobj); From 8ba773a2866a929542574b4578a0433bc3d6f0ac Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Thu, 5 Dec 2019 15:57:07 +0200 Subject: [PATCH 2812/3715] net-sysfs: Call dev_hold always in netdev_queue_add_kobject commit e0b60903b434a7ee21ba8d8659f207ed84101e89 upstream. Dev_hold has to be called always in netdev_queue_add_kobject. Otherwise usage count drops below 0 in case of failure in kobject_init_and_add. Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Reported-by: Hulk Robot Cc: Tetsuo Handa Cc: David Miller Cc: Lukas Bulwahn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3a22fa4ec7ff..7a042847ec9b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1324,14 +1324,17 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger netdev_queue_release call + * which decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) goto err; - dev_hold(queue->dev); - #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) From 8aca069fb05e2a65a264070efb9989cc72ab1694 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Tue, 17 Dec 2019 13:46:34 +0200 Subject: [PATCH 2813/3715] net-sysfs: Call dev_hold always in rx_queue_add_kobject commit ddd9b5e3e765d8ed5a35786a6cb00111713fe161 upstream. Dev_hold has to be called always in rx_queue_add_kobject. Otherwise usage count drops below 0 in case of failure in kobject_init_and_add. Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Reported-by: syzbot Cc: Tetsuo Handa Cc: David Miller Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7a042847ec9b..baf771d2d088 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -911,14 +911,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger rx_queue_release call which + * decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) goto err; - dev_hold(queue->dev); - if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) From c5fd8a37e97100254a2178e470e9641c51e91dbb Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Mon, 20 Jan 2020 09:51:03 +0200 Subject: [PATCH 2814/3715] net-sysfs: Fix reference count leak [ Upstream commit cb626bf566eb4433318d35681286c494f04fedcc ] Netdev_register_kobject is calling device_initialize. In case of error reference taken by device_initialize is not given up. Drivers are supposed to call free_netdev in case of error. In non-error case the last reference is given up there and device release sequence is triggered. In error case this reference is kept and the release sequence is never started. Fix this by setting reg_state as NETREG_UNREGISTERED if registering fails. This is the rootcause for couple of memory leaks reported by Syzkaller: BUG: memory leak unreferenced object 0xffff8880675ca008 (size 256): comm "netdev_register", pid 281, jiffies 4294696663 (age 6.808s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000058ca4711>] kmem_cache_alloc_trace+0x167/0x280 [<000000002340019b>] device_add+0x882/0x1750 [<000000001d588c3a>] netdev_register_kobject+0x128/0x380 [<0000000011ef5535>] register_netdevice+0xa1b/0xf00 [<000000007fcf1c99>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000006a5b7b2b>] tun_chr_ioctl+0x2f/0x40 [<00000000f30f834a>] do_vfs_ioctl+0x1c7/0x1510 [<00000000fba062ea>] ksys_ioctl+0x99/0xb0 [<00000000b1c1b8d2>] __x64_sys_ioctl+0x78/0xb0 [<00000000984cabb9>] do_syscall_64+0x16f/0x580 [<000000000bde033d>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000e6ca2d9f>] 0xffffffffffffffff BUG: memory leak unreferenced object 0xffff8880668ba588 (size 8): comm "kobject_set_nam", pid 286, jiffies 4294725297 (age 9.871s) hex dump (first 8 bytes): 6e 72 30 00 cc be df 2b nr0....+ backtrace: [<00000000a322332a>] __kmalloc_track_caller+0x16e/0x290 [<00000000236fd26b>] kstrdup+0x3e/0x70 [<00000000dd4a2815>] kstrdup_const+0x3e/0x50 [<0000000049a377fc>] kvasprintf_const+0x10e/0x160 [<00000000627fc711>] kobject_set_name_vargs+0x5b/0x140 [<0000000019eeab06>] dev_set_name+0xc0/0xf0 [<0000000069cb12bc>] netdev_register_kobject+0xc8/0x320 [<00000000f2e83732>] register_netdevice+0xa1b/0xf00 [<000000009e1f57cc>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000009c560784>] tun_chr_ioctl+0x2f/0x40 [<000000000d759e02>] do_vfs_ioctl+0x1c7/0x1510 [<00000000351d7c31>] ksys_ioctl+0x99/0xb0 [<000000008390040a>] __x64_sys_ioctl+0x78/0xb0 [<0000000052d196b7>] do_syscall_64+0x16f/0x580 [<0000000019af9236>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000bc384531>] 0xffffffffffffffff v3 -> v4: Set reg_state to NETREG_UNREGISTERED if registering fails v2 -> v3: * Replaced BUG_ON with WARN_ON in free_netdev and netdev_release v1 -> v2: * Relying on driver calling free_netdev rather than calling put_device directly in error path Reported-by: syzbot+ad8ca40ecd77896d51e2@syzkaller.appspotmail.com Cc: David Miller Cc: Greg Kroah-Hartman Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index f9f05b3df460..737211f1b29c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7667,8 +7667,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); From d6502fc298460df8e72f525778cff3dd40daaab3 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Mon, 20 Jan 2020 11:12:40 +0000 Subject: [PATCH 2815/3715] net: usb: lan78xx: Add .ndo_features_check [ Upstream commit ce896476c65d72b4b99fa09c2f33436b4198f034 ] As reported by Eric Dumazet, there are still some outstanding cases where the driver does not handle TSO correctly when skb's are over a certain size. Most cases have been fixed, this patch should ensure that forwarded SKB's that are greater than MAX_SINGLE_PACKET_SIZE - TX_OVERHEAD are software segmented and handled correctly. Signed-off-by: James Hughes Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ee7194a9e231..b179a96ea08c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -3525,6 +3526,19 @@ static void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); } +static netdev_features_t lan78xx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + if (skb->len + TX_OVERHEAD > MAX_SINGLE_PACKET_SIZE) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + return features; +} + static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3538,6 +3552,7 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, + .ndo_features_check = lan78xx_features_check, }; static void lan78xx_stat_monitor(unsigned long param) From 7e70784f1702cd9f438e23168ae937397c2d323a Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 20 Jan 2020 18:04:56 +0800 Subject: [PATCH 2816/3715] tcp_bbr: improve arithmetic division in bbr_update_bw() [ Upstream commit 5b2f1f3070b6447b76174ea8bfb7390dc6253ebd ] do_div() does a 64-by-32 division. Use div64_long() instead of it if the divisor is long, to avoid truncation to 32-bit. And as a nice side effect also cleans up the function a bit. Signed-off-by: Wen Yang Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 06f247ca9197..434ad1e72447 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -678,8 +678,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ - bw = (u64)rs->delivered * BW_UNIT; - do_div(bw, rs->interval_us); + bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); /* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than From e841252840c48e9a0e5add9d82796b1d55c0f653 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2020 22:47:29 -0800 Subject: [PATCH 2817/3715] net: rtnetlink: validate IFLA_MTU attribute in rtnl_create_link() [ Upstream commit d836f5c69d87473ff65c06a6123e5b2cf5e56f5b ] rtnl_create_link() needs to apply dev->min_mtu and dev->max_mtu checks that we apply in do_setlink() Otherwise malicious users can crash the kernel, for example after an integer overflow : BUG: KASAN: use-after-free in memset include/linux/string.h:365 [inline] BUG: KASAN: use-after-free in __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 Write of size 32 at addr ffff88819f20b9c0 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.5.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192 memset+0x24/0x40 mm/kasan/common.c:108 memset include/linux/string.h:365 [inline] __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 alloc_skb include/linux/skbuff.h:1049 [inline] alloc_skb_with_frags+0x93/0x590 net/core/skbuff.c:5664 sock_alloc_send_pskb+0x7ad/0x920 net/core/sock.c:2242 sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2259 mld_newpack+0x1d7/0x7f0 net/ipv6/mcast.c:1609 add_grhead.isra.0+0x299/0x370 net/ipv6/mcast.c:1713 add_grec+0x7db/0x10b0 net/ipv6/mcast.c:1844 mld_send_cr net/ipv6/mcast.c:1970 [inline] mld_ifc_timer_expire+0x3d3/0x950 net/ipv6/mcast.c:2477 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0x6c3/0x1790 kernel/time/timer.c:1786 __do_softirq+0x262/0x98c kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x19b/0x1e0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61 Code: 98 6b ea f9 eb 8a cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 44 1c 60 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 34 1c 60 00 fb f4 cc 55 48 89 e5 41 57 41 56 41 55 41 54 53 e8 4e 5d 9a f9 e8 79 RSP: 0018:ffffffff89807ce8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff13266ae RBX: ffffffff8987a1c0 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffffffff8987aa54 RBP: ffffffff89807d18 R08: ffffffff8987a1c0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff8a799980 R14: 0000000000000000 R15: 0000000000000000 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:690 default_idle_call+0x84/0xb0 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x3c8/0x6e0 kernel/sched/idle.c:269 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:361 rest_init+0x23b/0x371 init/main.c:451 arch_call_rest_init+0xe/0x1b start_kernel+0x904/0x943 init/main.c:784 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242 The buggy address belongs to the page: page:ffffea00067c82c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 raw: 057ffe0000000000 ffffea00067c82c8 ffffea00067c82c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88819f20b880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20b900: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88819f20b980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88819f20ba00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20ba80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 1 + net/core/dev.c | 32 ++++++++++++++++++++------------ net/core/rtnetlink.c | 13 +++++++++++-- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8818291815bc..31fc54757bf2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3313,6 +3313,7 @@ int dev_set_alias(struct net_device *, const char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); +int dev_validate_mtu(struct net_device *dev, int mtu); void dev_set_group(struct net_device *, int); int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); diff --git a/net/core/dev.c b/net/core/dev.c index 737211f1b29c..36d926d2d5f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6896,18 +6896,9 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", - dev->name, new_mtu, dev->min_mtu); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", - dev->name, new_mtu, dev->max_mtu); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu); + if (err) + return err; if (!netif_device_present(dev)) return -ENODEV; @@ -7769,6 +7760,23 @@ int init_dummy_netdev(struct net_device *dev) EXPORT_SYMBOL_GPL(init_dummy_netdev); +int dev_validate_mtu(struct net_device *dev, int new_mtu) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->max_mtu); + return -EINVAL; + } + return 0; +} + /** * register_netdev - register a network device * @dev: device to register diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b598e9909fec..7c479c1ffd77 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2466,8 +2466,17 @@ struct net_device *rtnl_create_link(struct net *net, dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + int err; + + err = dev_validate_mtu(dev, mtu); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); From 6090ac18fcc58ed264ffdd00f6fdd6042475b6a4 Mon Sep 17 00:00:00 2001 From: Luuk Paulussen Date: Fri, 6 Dec 2019 12:16:59 +1300 Subject: [PATCH 2818/3715] hwmon: (adt7475) Make volt2reg return same reg as reg2volt input commit cf3ca1877574a306c0207cbf7fdf25419d9229df upstream. reg2volt returns the voltage that matches a given register value. Converting this back the other way with volt2reg didn't return the same register value because it used truncation instead of rounding. This meant that values read from sysfs could not be written back to sysfs to set back the same register value. With this change, volt2reg will return the same value for every voltage previously returned by reg2volt (for the set of possible input values) Signed-off-by: Luuk Paulussen Link: https://lore.kernel.org/r/20191205231659.1301-1-luuk.paulussen@alliedtelesis.co.nz cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7475.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 37db2eb66ed7..d7d1f2467100 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -297,9 +297,10 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) long reg; if (bypass_attn & (1 << channel)) - reg = (volt * 1024) / 2250; + reg = DIV_ROUND_CLOSEST(volt * 1024, 2250); else - reg = (volt * r[1] * 1024) / ((r[0] + r[1]) * 2250); + reg = DIV_ROUND_CLOSEST(volt * r[1] * 1024, + (r[0] + r[1]) * 2250); return clamp_val(reg, 0, 1023) & (0xff << 2); } From 4c7b99b4c03b546c4ea2e7562ee083e5f3a2c0e6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 Dec 2017 09:36:14 +0100 Subject: [PATCH 2819/3715] hwmon: Deal with errors from the thermal subsystem commit 47c332deb8e89f6c59b0bb2615945c6e7fad1a60 upstream. If the thermal subsystem returne -EPROBE_DEFER or any other error when hwmon calls devm_thermal_zone_of_sensor_register(), this is silently ignored. I ran into this with an incorrectly defined thermal zone, making it non-existing and thus this call failed with -EPROBE_DEFER assuming it would appear later. The sensor was still added which is incorrect: sensors must strictly be added after the thermal zones, so deferred probe must be respected. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Signed-off-by: Linus Walleij Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/hwmon.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 7b53065e9882..b7f9e2adc2a2 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, struct hwmon_device *hwdev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) @@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, tdata->hwdev = hwdev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } @@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); + if (err) + goto free_device; + } } } } return hdev; +free_device: + device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: From ffea8daac4c58e21e0196e72a84b53e3fbc363f7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 24 Oct 2018 22:37:13 +0300 Subject: [PATCH 2820/3715] hwmon: (core) Fix double-free in __hwmon_device_register() commit 74e3512731bd5c9673176425a76a7cc5efa8ddb6 upstream. Fix double-free that happens when thermal zone setup fails, see KASAN log below. ================================================================== BUG: KASAN: double-free or invalid-free in __hwmon_device_register+0x5dc/0xa7c CPU: 0 PID: 132 Comm: kworker/0:2 Tainted: G B 4.19.0-rc8-next-20181016-00042-gb52cd80401e9-dirty #41 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events deferred_probe_work_func Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x9c/0xb0) [] (dump_stack) from [] (print_address_description+0x68/0x250) [] (print_address_description) from [] (kasan_report_invalid_free+0x68/0x88) [] (kasan_report_invalid_free) from [] (__kasan_slab_free+0x1f4/0x200) [] (__kasan_slab_free) from [] (kasan_slab_free+0x14/0x18) [] (kasan_slab_free) from [] (kfree+0x90/0x294) [] (kfree) from [] (__hwmon_device_register+0x5dc/0xa7c) [] (__hwmon_device_register) from [] (hwmon_device_register_with_info+0xa0/0xa8) [] (hwmon_device_register_with_info) from [] (devm_hwmon_device_register_with_info+0x74/0xb4) [] (devm_hwmon_device_register_with_info) from [] (lm90_probe+0x414/0x578) [] (lm90_probe) from [] (i2c_device_probe+0x35c/0x384) [] (i2c_device_probe) from [] (really_probe+0x290/0x3e4) [] (really_probe) from [] (driver_probe_device+0x80/0x1c4) [] (driver_probe_device) from [] (__device_attach_driver+0x104/0x11c) [] (__device_attach_driver) from [] (bus_for_each_drv+0xa4/0xc8) [] (bus_for_each_drv) from [] (__device_attach+0xf0/0x15c) [] (__device_attach) from [] (device_initial_probe+0x1c/0x20) [] (device_initial_probe) from [] (bus_probe_device+0xdc/0xec) [] (bus_probe_device) from [] (deferred_probe_work_func+0xa8/0xd4) [] (deferred_probe_work_func) from [] (process_one_work+0x3dc/0x96c) [] (process_one_work) from [] (worker_thread+0x4ec/0x8bc) [] (worker_thread) from [] (kthread+0x230/0x240) [] (kthread) from [] (ret_from_fork+0x14/0x38) Exception stack(0xcf743fb0 to 0xcf743ff8) 3fa0: 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 Allocated by task 132: kasan_kmalloc.part.1+0x58/0xf4 kasan_kmalloc+0x90/0xa4 kmem_cache_alloc_trace+0x90/0x2a0 __hwmon_device_register+0xbc/0xa7c hwmon_device_register_with_info+0xa0/0xa8 devm_hwmon_device_register_with_info+0x74/0xb4 lm90_probe+0x414/0x578 i2c_device_probe+0x35c/0x384 really_probe+0x290/0x3e4 driver_probe_device+0x80/0x1c4 __device_attach_driver+0x104/0x11c bus_for_each_drv+0xa4/0xc8 __device_attach+0xf0/0x15c device_initial_probe+0x1c/0x20 bus_probe_device+0xdc/0xec deferred_probe_work_func+0xa8/0xd4 process_one_work+0x3dc/0x96c worker_thread+0x4ec/0x8bc kthread+0x230/0x240 ret_from_fork+0x14/0x38 (null) Freed by task 132: __kasan_slab_free+0x12c/0x200 kasan_slab_free+0x14/0x18 kfree+0x90/0x294 hwmon_dev_release+0x1c/0x20 device_release+0x4c/0xe8 kobject_put+0xac/0x11c device_unregister+0x2c/0x30 __hwmon_device_register+0xa58/0xa7c hwmon_device_register_with_info+0xa0/0xa8 devm_hwmon_device_register_with_info+0x74/0xb4 lm90_probe+0x414/0x578 i2c_device_probe+0x35c/0x384 really_probe+0x290/0x3e4 driver_probe_device+0x80/0x1c4 __device_attach_driver+0x104/0x11c bus_for_each_drv+0xa4/0xc8 __device_attach+0xf0/0x15c device_initial_probe+0x1c/0x20 bus_probe_device+0xdc/0xec deferred_probe_work_func+0xa8/0xd4 process_one_work+0x3dc/0x96c worker_thread+0x4ec/0x8bc kthread+0x230/0x240 ret_from_fork+0x14/0x38 (null) Cc: # v4.15+ Fixes: 47c332deb8e8 ("hwmon: Deal with errors from the thermal subsystem") Signed-off-by: Dmitry Osipenko Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/hwmon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index b7f9e2adc2a2..6b2c72f4a834 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -631,8 +631,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (info[i]->config[j] & HWMON_T_INPUT) { err = hwmon_thermal_add_sensor(dev, hwdev, j); - if (err) - goto free_device; + if (err) { + device_unregister(hdev); + goto ida_remove; + } } } } @@ -640,8 +642,6 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, return hdev; -free_device: - device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: From 0a36cb84e2f4250d92be7e92920128474e49850d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 16 Jan 2020 10:44:17 -0800 Subject: [PATCH 2821/3715] hwmon: (core) Do not use device managed functions for memory allocations commit 3bf8bdcf3bada771eb12b57f2a30caee69e8ab8d upstream. The hwmon core uses device managed functions, tied to the hwmon parent device, for various internal memory allocations. This is problematic since hwmon device lifetime does not necessarily match its parent's device lifetime. If there is a mismatch, memory leaks will accumulate until the parent device is released. Fix the problem by managing all memory allocations internally. The only exception is memory allocation for thermal device registration, which can be tied to the hwmon device, along with thermal device registration itself. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Cc: stable@vger.kernel.org # v4.14.x: 47c332deb8e8: hwmon: Deal with errors from the thermal subsystem Cc: stable@vger.kernel.org # v4.14.x: 74e3512731bd: hwmon: (core) Fix double-free in __hwmon_device_register() Cc: stable@vger.kernel.org # v4.9.x: 3a412d5e4a1c: hwmon: (core) Simplify sysfs attribute name allocation Cc: stable@vger.kernel.org # v4.9.x: 47c332deb8e8: hwmon: Deal with errors from the thermal subsystem Cc: stable@vger.kernel.org # v4.9.x: 74e3512731bd: hwmon: (core) Fix double-free in __hwmon_device_register() Cc: stable@vger.kernel.org # v4.9+ Cc: Martin K. Petersen Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/hwmon.c | 68 ++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 6b2c72f4a834..652973d83a07 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -51,6 +51,7 @@ struct hwmon_device_attribute { #define to_hwmon_attr(d) \ container_of(d, struct hwmon_device_attribute, dev_attr) +#define to_dev_attr(a) container_of(a, struct device_attribute, attr) /* * Thermal zone information @@ -58,7 +59,7 @@ struct hwmon_device_attribute { * also provides the sensor index. */ struct hwmon_thermal_data { - struct hwmon_device *hwdev; /* Reference to hwmon device */ + struct device *dev; /* Reference to hwmon device */ int index; /* sensor index */ }; @@ -95,9 +96,27 @@ static const struct attribute_group *hwmon_dev_attr_groups[] = { NULL }; +static void hwmon_free_attrs(struct attribute **attrs) +{ + int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dattr = to_dev_attr(attrs[i]); + struct hwmon_device_attribute *hattr = to_hwmon_attr(dattr); + + kfree(hattr); + } + kfree(attrs); +} + static void hwmon_dev_release(struct device *dev) { - kfree(to_hwmon_device(dev)); + struct hwmon_device *hwdev = to_hwmon_device(dev); + + if (hwdev->group.attrs) + hwmon_free_attrs(hwdev->group.attrs); + kfree(hwdev->groups); + kfree(hwdev); } static struct class hwmon_class = { @@ -121,11 +140,11 @@ static DEFINE_IDA(hwmon_ida); static int hwmon_thermal_get_temp(void *data, int *temp) { struct hwmon_thermal_data *tdata = data; - struct hwmon_device *hwdev = tdata->hwdev; + struct hwmon_device *hwdev = to_hwmon_device(tdata->dev); int ret; long t; - ret = hwdev->chip->ops->read(&hwdev->dev, hwmon_temp, hwmon_temp_input, + ret = hwdev->chip->ops->read(tdata->dev, hwmon_temp, hwmon_temp_input, tdata->index, &t); if (ret < 0) return ret; @@ -139,8 +158,7 @@ static const struct thermal_zone_of_device_ops hwmon_thermal_ops = { .get_temp = hwmon_thermal_get_temp, }; -static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { struct hwmon_thermal_data *tdata; struct thermal_zone_device *tzd; @@ -149,10 +167,10 @@ static int hwmon_thermal_add_sensor(struct device *dev, if (!tdata) return -ENOMEM; - tdata->hwdev = hwdev; + tdata->dev = dev; tdata->index = index; - tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, &hwmon_thermal_ops); /* * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, @@ -164,8 +182,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, return 0; } #else -static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { return 0; } @@ -242,8 +259,7 @@ static bool is_string_attr(enum hwmon_sensor_types type, u32 attr) (type == hwmon_fan && attr == hwmon_fan_label); } -static struct attribute *hwmon_genattr(struct device *dev, - const void *drvdata, +static struct attribute *hwmon_genattr(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int index, @@ -271,7 +287,7 @@ static struct attribute *hwmon_genattr(struct device *dev, if ((mode & S_IWUGO) && !ops->write) return ERR_PTR(-EINVAL); - hattr = devm_kzalloc(dev, sizeof(*hattr), GFP_KERNEL); + hattr = kzalloc(sizeof(*hattr), GFP_KERNEL); if (!hattr) return ERR_PTR(-ENOMEM); @@ -474,8 +490,7 @@ static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info) return n; } -static int hwmon_genattrs(struct device *dev, - const void *drvdata, +static int hwmon_genattrs(const void *drvdata, struct attribute **attrs, const struct hwmon_ops *ops, const struct hwmon_channel_info *info) @@ -501,7 +516,7 @@ static int hwmon_genattrs(struct device *dev, attr_mask &= ~BIT(attr); if (attr >= template_size) return -EINVAL; - a = hwmon_genattr(dev, drvdata, info->type, attr, i, + a = hwmon_genattr(drvdata, info->type, attr, i, templates[attr], ops); if (IS_ERR(a)) { if (PTR_ERR(a) != -ENOENT) @@ -515,8 +530,7 @@ static int hwmon_genattrs(struct device *dev, } static struct attribute ** -__hwmon_create_attrs(struct device *dev, const void *drvdata, - const struct hwmon_chip_info *chip) +__hwmon_create_attrs(const void *drvdata, const struct hwmon_chip_info *chip) { int ret, i, aindex = 0, nattrs = 0; struct attribute **attrs; @@ -527,15 +541,17 @@ __hwmon_create_attrs(struct device *dev, const void *drvdata, if (nattrs == 0) return ERR_PTR(-EINVAL); - attrs = devm_kcalloc(dev, nattrs + 1, sizeof(*attrs), GFP_KERNEL); + attrs = kcalloc(nattrs + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) return ERR_PTR(-ENOMEM); for (i = 0; chip->info[i]; i++) { - ret = hwmon_genattrs(dev, drvdata, &attrs[aindex], chip->ops, + ret = hwmon_genattrs(drvdata, &attrs[aindex], chip->ops, chip->info[i]); - if (ret < 0) + if (ret < 0) { + hwmon_free_attrs(attrs); return ERR_PTR(ret); + } aindex += ret; } @@ -577,14 +593,13 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, for (i = 0; groups[i]; i++) ngroups++; - hwdev->groups = devm_kcalloc(dev, ngroups, sizeof(*groups), - GFP_KERNEL); + hwdev->groups = kcalloc(ngroups, sizeof(*groups), GFP_KERNEL); if (!hwdev->groups) { err = -ENOMEM; goto free_hwmon; } - attrs = __hwmon_create_attrs(dev, drvdata, chip); + attrs = __hwmon_create_attrs(drvdata, chip); if (IS_ERR(attrs)) { err = PTR_ERR(attrs); goto free_hwmon; @@ -629,8 +644,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, hwmon_temp_input, j)) continue; if (info[i]->config[j] & HWMON_T_INPUT) { - err = hwmon_thermal_add_sensor(dev, - hwdev, j); + err = hwmon_thermal_add_sensor(hdev, j); if (err) { device_unregister(hdev); goto ida_remove; @@ -643,7 +657,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, return hdev; free_hwmon: - kfree(hwdev); + hwmon_dev_release(hdev); ida_remove: ida_simple_remove(&hwmon_ida, id); return ERR_PTR(err); From 68c538b4a1cb84906fbcbffc62d4d6064ec8c9f8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Jan 2020 10:38:57 -0800 Subject: [PATCH 2822/3715] Input: keyspan-remote - fix control-message timeouts commit ba9a103f40fc4a3ec7558ec9b0b97d4f92034249 upstream. The driver was issuing synchronous uninterruptible control requests without using a timeout. This could lead to the driver hanging on probe due to a malfunctioning (or malicious) device until the device is physically disconnected. While sleeping in probe the driver prevents other devices connected to the same hub from being added to (or removed from) the bus. The USB upper limit of five seconds per request should be more than enough. Fixes: 99f83c9c9ac9 ("[PATCH] USB: add driver for Keyspan Digital Remote") Signed-off-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Cc: stable # 2.6.13 Link: https://lore.kernel.org/r/20200113171715.30621-1-johan@kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/keyspan_remote.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c index 77c47d6325fe..a9ee813eef10 100644 --- a/drivers/input/misc/keyspan_remote.c +++ b/drivers/input/misc/keyspan_remote.c @@ -344,7 +344,8 @@ static int keyspan_setup(struct usb_device* dev) int retval = 0; retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x11, 0x40, 0x5601, 0x0, NULL, 0, 0); + 0x11, 0x40, 0x5601, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set bit rate due to error: %d\n", __func__, retval); @@ -352,7 +353,8 @@ static int keyspan_setup(struct usb_device* dev) } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x44, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x44, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set resume sensitivity due to error: %d\n", __func__, retval); @@ -360,7 +362,8 @@ static int keyspan_setup(struct usb_device* dev) } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x22, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x22, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to turn receive on due to error: %d\n", __func__, retval); From ac390c982915393d87a1e52229865c17ae2458e2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 16 Jan 2020 20:12:27 -0800 Subject: [PATCH 2823/3715] Revert "Input: synaptics-rmi4 - don't increment rmiaddr for SMBus transfers" commit 8ff771f8c8d55d95f102cf88a970e541a8bd6bcf upstream. This reverts commit a284e11c371e446371675668d8c8120a27227339. This causes problems (drifting cursor) with at least the F11 function that reads more than 32 bytes. The real issue is in the F54 driver, and so this should be fixed there, and not in rmi_smbus.c. So first revert this bad commit, then fix the real problem in F54 in another patch. Signed-off-by: Hans Verkuil Reported-by: Timo Kaufmann Fixes: a284e11c371e ("Input: synaptics-rmi4 - don't increment rmiaddr for SMBus transfers") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200115124819.3191024-2-hverkuil-cisco@xs4all.nl Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_smbus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index 4b2466cf2fb1..b6ccf39c6a7b 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -166,6 +166,7 @@ static int rmi_smb_write_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to write next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; + rmiaddr += SMB_MAX_COUNT; } exit: mutex_unlock(&rmi_smb->page_mutex); @@ -217,6 +218,7 @@ static int rmi_smb_read_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to read next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; + rmiaddr += SMB_MAX_COUNT; } retval = 0; From bb4768b0583e717aae6c19ab7d6dd3244d7ccc2d Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Wed, 8 Jan 2020 15:57:47 +0100 Subject: [PATCH 2824/3715] ARM: 8950/1: ftrace/recordmcount: filter relocation types commit 927d780ee371d7e121cea4fc7812f6ef2cea461c upstream. Scenario 1, ARMv7 ================= If code in arch/arm/kernel/ftrace.c would operate on mcount() pointer the following may be generated: 00000230 : 230: b5f8 push {r3, r4, r5, r6, r7, lr} 232: b500 push {lr} 234: f7ff fffe bl 0 <__gnu_mcount_nc> 234: R_ARM_THM_CALL __gnu_mcount_nc 238: f240 0600 movw r6, #0 238: R_ARM_THM_MOVW_ABS_NC __gnu_mcount_nc 23c: f8d0 1180 ldr.w r1, [r0, #384] ; 0x180 FTRACE currently is not able to deal with it: WARNING: CPU: 0 PID: 0 at .../kernel/trace/ftrace.c:1979 ftrace_bug+0x1ad/0x230() ... CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.116-... #1 ... [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x81/0xa8) [] (dump_stack) from [] (warn_slowpath_common+0x69/0x90) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x17/0x1c) [] (warn_slowpath_null) from [] (ftrace_bug+0x1ad/0x230) [] (ftrace_bug) from [] (ftrace_process_locs+0x27d/0x444) [] (ftrace_process_locs) from [] (ftrace_init+0x91/0xe8) [] (ftrace_init) from [] (start_kernel+0x34b/0x358) [] (start_kernel) from [<00308095>] (0x308095) ---[ end trace cb88537fdc8fa200 ]--- ftrace failed to modify [] prealloc_fixed_plts+0x8/0x60 actual: 44:f2:e1:36 ftrace record flags: 0 (0) expected tramp: c03143e9 Scenario 2, ARMv4T ================== ftrace: allocating 14435 entries in 43 pages ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/trace/ftrace.c:2029 ftrace_bug+0x204/0x310 CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.5 #1 Hardware name: Cirrus Logic EDB9302 Evaluation Board [] (unwind_backtrace) from [] (show_stack+0x20/0x2c) [] (show_stack) from [] (dump_stack+0x20/0x30) [] (dump_stack) from [] (__warn+0xdc/0x104) [] (__warn) from [] (warn_slowpath_null+0x4c/0x5c) [] (warn_slowpath_null) from [] (ftrace_bug+0x204/0x310) [] (ftrace_bug) from [] (ftrace_init+0x3b4/0x4d4) [] (ftrace_init) from [] (start_kernel+0x20c/0x410) [] (start_kernel) from [<00000000>] ( (null)) ---[ end trace 0506a2f5dae6b341 ]--- ftrace failed to modify [] perf_trace_sys_exit+0x5c/0xe8 actual: 1e:ff:2f:e1 Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c000fb24 The analysis for this problem has been already performed previously, refer to the link below. Fix the above problems by allowing only selected reloc types in __mcount_loc. The list itself comes from the legacy recordmcount.pl script. Link: https://lore.kernel.org/lkml/56961010.6000806@pengutronix.de/ Cc: stable@vger.kernel.org Fixes: ed60453fa8f8 ("ARM: 6511/1: ftrace: add ARM support for C version of recordmcount") Signed-off-by: Alexander Sverdlin Acked-by: Steven Rostedt (VMware) Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- scripts/recordmcount.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 16e086dcc567..a4888e955466 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -53,6 +53,10 @@ #define R_AARCH64_ABS64 257 #endif +#define R_ARM_PC24 1 +#define R_ARM_THM_CALL 10 +#define R_ARM_CALL 28 + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static char gpfx; /* prefix for global symbol name (sometimes '_') */ @@ -428,6 +432,18 @@ is_mcounted_section_name(char const *const txtname) #define RECORD_MCOUNT_64 #include "recordmcount.h" +static int arm_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF32_R_TYPE(w(rp->r_info))) { + case R_ARM_THM_CALL: + case R_ARM_CALL: + case R_ARM_PC24: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -529,6 +545,7 @@ do_file(char const *const fname) altmcount = "__gnu_mcount_nc"; make_nop = make_nop_arm; rel_type_nop = R_ARM_NONE; + is_fake_mcount32 = arm_is_fake_mcount; break; case EM_AARCH64: reltype = R_AARCH64_ABS64; From dd5d5e77f725b32a93eb4d760736f4a47a680c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 7 Jan 2020 10:47:34 +0100 Subject: [PATCH 2825/3715] mmc: tegra: fix SDR50 tuning override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f571389c0b015e76f91c697c4c1700aba860d34f upstream. Commit 7ad2ed1dfcbe inadvertently mixed up a quirk flag's name and broke SDR50 tuning override. Use correct NVQUIRK_ name. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Cc: Acked-by: Adrian Hunter Reviewed-by: Thierry Reding Tested-by: Thierry Reding Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/9aff1d859935e59edd81e4939e40d6c55e0b55f6.1578390388.git.mirq-linux@rere.qmqm.pl Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index ce3f344d2b66..d2b0a62bfce1 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -177,7 +177,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50; if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104; - if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50) + if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50) clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE; } From da6b467e112957eea049c400eff84e3cfb89008d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 15 Jan 2020 10:54:35 +0100 Subject: [PATCH 2826/3715] mmc: sdhci: fix minimum clock rate for v3 controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2a187d03352086e300daa2044051db00044cd171 upstream. For SDHCIv3+ with programmable clock mode, minimal clock frequency is still base clock / max(divider). Minimal programmable clock frequency is always greater than minimal divided clock frequency. Without this patch, SDHCI uses out-of-spec initial frequency when multiplier is big enough: mmc1: mmc_rescan_try_freq: trying to init card at 468750 Hz [for 480 MHz source clock divided by 1024] The code in sdhci_calc_clk() already chooses a correct SDCLK clock mode. Fixes: c3ed3877625f ("mmc: sdhci: add support for programmable clock mode") Cc: # 4f6aa3264af4: mmc: tegra: Only advertise UHS modes if IO regulator is present Cc: Signed-off-by: Michał Mirosław Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/ffb489519a446caffe7a0a05c4b9372bd52397bb.1579082031.git.mirq-linux@rere.qmqm.pl Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 645775dd4edb..4f1c884c0b50 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3592,11 +3592,13 @@ int sdhci_setup_host(struct sdhci_host *host) if (host->ops->get_min_clock) mmc->f_min = host->ops->get_min_clock(host); else if (host->version >= SDHCI_SPEC_300) { - if (host->clk_mul) { - mmc->f_min = (host->max_clk * host->clk_mul) / 1024; + if (host->clk_mul) max_clk = host->max_clk * host->clk_mul; - } else - mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; + /* + * Divided Clock Mode minimum clock rate is always less than + * Programmable Clock Mode minimum clock rate. + */ + mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; From c57b0f88fce8b4f697f323cfb3b7acc195b7560f Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 25 Jan 2019 12:07:00 -0600 Subject: [PATCH 2827/3715] Documentation: Document arm64 kpti control commit de19055564c8f8f9d366f8db3395836da0b2176c upstream. For a while Arm64 has been capable of force enabling or disabling the kpti mitigations. Lets make sure the documentation reflects that. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 933465eff40e..7e0a4be3503d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1845,6 +1845,12 @@ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the default is off. + kpti= [ARM64] Control page table isolation of user + and kernel address spaces. + Default: enabled on cores which need mitigation. + 0: force disabled + 1: force enabled + kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) From 1130377fb5a8095a13c23018edadbabbec7f7fef Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 17 Jan 2020 13:40:36 -0800 Subject: [PATCH 2828/3715] Input: pm8xxx-vib - fix handling of separate enable register commit 996d5d5f89a558a3608a46e73ccd1b99f1b1d058 upstream. Setting the vibrator enable_mask is not implemented correctly: For regmap_update_bits(map, reg, mask, val) we give in either regs->enable_mask or 0 (= no-op) as mask and "val" as value. But "val" actually refers to the vibrator voltage control register, which has nothing to do with the enable_mask. So we usually end up doing nothing when we really wanted to enable the vibrator. We want to set or clear the enable_mask (to enable/disable the vibrator). Therefore, change the call to always modify the enable_mask and set the bits only if we want to enable the vibrator. Fixes: d4c7c5c96c92 ("Input: pm8xxx-vib - handle separate enable register") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20200114183442.45720-1-stephan@gerhold.net Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/pm8xxx-vibrator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c index 7dd1c1fbe42a..27b3db154a33 100644 --- a/drivers/input/misc/pm8xxx-vibrator.c +++ b/drivers/input/misc/pm8xxx-vibrator.c @@ -98,7 +98,7 @@ static int pm8xxx_vib_set(struct pm8xxx_vib *vib, bool on) if (regs->enable_mask) rc = regmap_update_bits(vib->regmap, regs->enable_addr, - on ? regs->enable_mask : 0, val); + regs->enable_mask, on ? ~0 : 0); return rc; } From 0411b242274fabe83298c0242916216b40b2350f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jan 2020 12:01:27 -0800 Subject: [PATCH 2829/3715] Input: sur40 - fix interface sanity checks commit 6b32391ed675827f8425a414abbc6fbd54ea54fe upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: bdb5c57f209c ("Input: add sur40 driver for Samsung SUR40 (aka MS Surface 2.0/Pixelsense)") Signed-off-by: Johan Hovold Acked-by: Vladis Dronov Link: https://lore.kernel.org/r/20191210113737.4016-8-johan@kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/sur40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index f16f8358c70a..98e03d0ca03c 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -537,7 +537,7 @@ static int sur40_probe(struct usb_interface *interface, int error; /* Check if we really have the right interface. */ - iface_desc = &interface->altsetting[0]; + iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV; From e11d045f564dec4d26e1db28c7e121ae3b69b29e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jan 2020 12:00:18 -0800 Subject: [PATCH 2830/3715] Input: gtco - fix endpoint sanity check commit a8eeb74df5a6bdb214b2b581b14782c5f5a0cf83 upstream. The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could lead to the driver binding to an invalid interface. This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 162f98dea487 ("Input: gtco - fix crash on detecting device without endpoints") Signed-off-by: Johan Hovold Acked-by: Vladis Dronov Link: https://lore.kernel.org/r/20191210113737.4016-5-johan@kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/gtco.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 35031228a6d0..799c94dda651 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -875,18 +875,14 @@ static int gtco_probe(struct usb_interface *usbinterface, } /* Sanity check that a device has an endpoint */ - if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + if (usbinterface->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&usbinterface->dev, "Invalid number of endpoints\n"); error = -EINVAL; goto err_free_urb; } - /* - * The endpoint is always altsetting 0, we know this since we know - * this device only has one interrupt endpoint - */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc; /* Some debug */ dev_dbg(&usbinterface->dev, "gtco # interfaces: %d\n", usbinterface->num_altsetting); @@ -973,7 +969,7 @@ static int gtco_probe(struct usb_interface *usbinterface, input_dev->dev.parent = &usbinterface->dev; /* Setup the URB, it will be posted later on open of input device */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc; usb_fill_int_urb(gtco->urbinfo, udev, From c2764d4449e8634c0d020dcf68aabae2f5ffd85d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jan 2020 11:59:32 -0800 Subject: [PATCH 2831/3715] Input: aiptek - fix endpoint sanity check commit 3111491fca4f01764e0c158c5e0f7ced808eef51 upstream. The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could lead to the driver binding to an invalid interface. This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 8e20cf2bce12 ("Input: aiptek - fix crash on detecting device without endpoints") Signed-off-by: Johan Hovold Acked-by: Vladis Dronov Link: https://lore.kernel.org/r/20191210113737.4016-3-johan@kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/aiptek.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 0b55e1f375b3..fbe2df91aad3 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1822,14 +1822,14 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); /* Verify that a device really has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "interface has %d endpoints, but must have minimum 1\n", - intf->altsetting[0].desc.bNumEndpoints); + intf->cur_altsetting->desc.bNumEndpoints); err = -EINVAL; goto fail3; } - endpoint = &intf->altsetting[0].endpoint[0].desc; + endpoint = &intf->cur_altsetting->endpoint[0].desc; /* Go set up our URB, which is called when the tablet receives * input. From f4c64034ef354509f80e7038924eda37c763af6d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Jan 2020 11:55:47 -0800 Subject: [PATCH 2832/3715] Input: pegasus_notetaker - fix endpoint sanity check commit bcfcb7f9b480dd0be8f0df2df17340ca92a03b98 upstream. The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could be used by a malicious device (or USB descriptor fuzzer) to trigger a NULL-pointer dereference. Fixes: 1afca2b66aac ("Input: add Pegasus Notetaker tablet driver") Signed-off-by: Johan Hovold Acked-by: Martin Kepplinger Acked-by: Vladis Dronov Link: https://lore.kernel.org/r/20191210113737.4016-2-johan@kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/pegasus_notetaker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/tablet/pegasus_notetaker.c b/drivers/input/tablet/pegasus_notetaker.c index 47de5a81172f..2319144802c9 100644 --- a/drivers/input/tablet/pegasus_notetaker.c +++ b/drivers/input/tablet/pegasus_notetaker.c @@ -260,7 +260,7 @@ static int pegasus_probe(struct usb_interface *intf, return -ENODEV; /* Sanity check that the device has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "Invalid number of endpoints\n"); return -EINVAL; } From 59b27a9f7ee3645df0c3b4c763c77eb0e607504c Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 10 Jan 2020 10:30:04 -0800 Subject: [PATCH 2833/3715] Input: sun4i-ts - add a check for devm_thermal_zone_of_sensor_register commit 97e24b095348a15ec08c476423c3b3b939186ad7 upstream. The driver misses a check for devm_thermal_zone_of_sensor_register(). Add a check to fix it. Fixes: e28d0c9cd381 ("input: convert sun4i-ts to use devm_thermal_zone_of_sensor_register") Signed-off-by: Chuhong Yuan Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/sun4i-ts.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index d2e14d9e5975..ab44eb0352d0 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -246,6 +246,7 @@ static int sun4i_ts_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct device *hwmon; + struct thermal_zone_device *thermal; int error; u32 reg; bool ts_attached; @@ -365,7 +366,10 @@ static int sun4i_ts_probe(struct platform_device *pdev) if (IS_ERR(hwmon)) return PTR_ERR(hwmon); - devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, &sun4i_ts_tz_ops); + thermal = devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, + &sun4i_ts_tz_ops); + if (IS_ERR(thermal)) + return PTR_ERR(thermal); writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC); From 788a56f8907560f9814b2894b9094a3272abd4a4 Mon Sep 17 00:00:00 2001 From: Gilles Buloz Date: Wed, 27 Nov 2019 18:09:34 +0100 Subject: [PATCH 2834/3715] hwmon: (nct7802) Fix voltage limits to wrong registers commit 7713e62c8623c54dac88d1fa724aa487a38c3efb upstream. in0 thresholds are written to the in2 thresholds registers in2 thresholds to in3 thresholds in3 thresholds to in4 thresholds in4 thresholds to in0 thresholds Signed-off-by: Gilles Buloz Link: https://lore.kernel.org/r/5de0f509.rc0oEvPOMjbfPW1w%gilles.buloz@kontron.com Fixes: 3434f3783580 ("hwmon: Driver for Nuvoton NCT7802Y") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 38ffbdb0a85f..779ec8fdfae0 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -32,8 +32,8 @@ static const u8 REG_VOLTAGE[5] = { 0x09, 0x0a, 0x0c, 0x0d, 0x0e }; static const u8 REG_VOLTAGE_LIMIT_LSB[2][5] = { - { 0x40, 0x00, 0x42, 0x44, 0x46 }, - { 0x3f, 0x00, 0x41, 0x43, 0x45 }, + { 0x46, 0x00, 0x40, 0x42, 0x44 }, + { 0x45, 0x00, 0x3f, 0x41, 0x43 }, }; static const u8 REG_VOLTAGE_LIMIT_MSB[5] = { 0x48, 0x00, 0x47, 0x47, 0x48 }; From 1ff739768a3d524184306fb84f616bc9672db50d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 15 Jan 2020 20:47:37 -0800 Subject: [PATCH 2835/3715] scsi: RDMA/isert: Fix a recently introduced regression related to logout commit 04060db41178c7c244f2c7dcd913e7fd331de915 upstream. iscsit_close_connection() calls isert_wait_conn(). Due to commit e9d3009cb936 both functions call target_wait_for_sess_cmds() although that last function should be called only once. Fix this by removing the target_wait_for_sess_cmds() call from isert_wait_conn() and by only calling isert_wait_conn() after target_wait_for_sess_cmds(). Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session"). Link: https://lore.kernel.org/r/20200116044737.19507-1-bvanassche@acm.org Reported-by: Rahul Kundu Signed-off-by: Bart Van Assche Tested-by: Mike Marciniszyn Acked-by: Sagi Grimberg Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ------------ drivers/target/iscsi/iscsi_target.c | 6 +++--- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ee3f630c9217..9b5691f306a2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2582,17 +2582,6 @@ isert_wait4logout(struct isert_conn *isert_conn) } } -static void -isert_wait4cmds(struct iscsi_conn *conn) -{ - isert_info("iscsi_conn %p\n", conn); - - if (conn->sess) { - target_sess_cmd_list_set_waiting(conn->sess->se_sess); - target_wait_for_sess_cmds(conn->sess->se_sess); - } -} - /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2640,7 +2629,6 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); - isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 21ce92ee1652..37d64acea5e1 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4155,9 +4155,6 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); - /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4243,6 +4240,9 @@ int iscsit_close_connection( target_sess_cmd_list_set_waiting(sess->se_sess); target_wait_for_sess_cmds(sess->se_sess); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { struct crypto_ahash *tfm; From 09efdaaca8c613bf7c23ec48dc014906b6b6d296 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sun, 12 Jan 2020 11:42:31 +0800 Subject: [PATCH 2836/3715] tracing: xen: Ordered comparison of function pointers commit d0695e2351102affd8efae83989056bc4b275917 upstream. Just as commit 0566e40ce7 ("tracing: initcall: Ordered comparison of function pointers"), this patch fixes another remaining one in xen.h found by clang-9. In file included from arch/x86/xen/trace.c:21: In file included from ./include/trace/events/xen.h:475: In file included from ./include/trace/define_trace.h:102: In file included from ./include/trace/trace_events.h:473: ./include/trace/events/xen.h:69:7: warning: ordered comparison of function \ pointers ('xen_mc_callback_fn_t' (aka 'void (*)(void *)') and 'xen_mc_callback_fn_t') [-Wordered-compare-function-pointers] __field(xen_mc_callback_fn_t, fn) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/trace/trace_events.h:421:29: note: expanded from macro '__field' ^ ./include/trace/trace_events.h:407:6: note: expanded from macro '__field_ext' is_signed_type(type), filter_type); \ ^ ./include/linux/trace_events.h:554:44: note: expanded from macro 'is_signed_type' ^ Fixes: c796f213a6934 ("xen/trace: add multicall tracing") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- include/trace/events/xen.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 2ec9064a2bb7..e5150fc67e91 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -66,7 +66,11 @@ TRACE_EVENT(xen_mc_callback, TP_PROTO(xen_mc_callback_fn_t fn, void *data), TP_ARGS(fn, data), TP_STRUCT__entry( - __field(xen_mc_callback_fn_t, fn) + /* + * Use field_struct to avoid is_signed_type() + * comparison of a function pointer. + */ + __field_struct(xen_mc_callback_fn_t, fn) __field(void *, data) ), TP_fast_assign( From 778de9db9ec2036f0bd82572bbb7b35ec402089c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Jan 2020 09:29:34 -0500 Subject: [PATCH 2837/3715] do_last(): fetch directory ->i_mode and ->i_uid before it's too late commit d0cb50185ae942b03c4327be322055d622dc79f6 upstream. may_create_in_sticky() call is done when we already have dropped the reference to dir. Fixes: 30aba6656f61e (namei: allow restricted O_CREAT of FIFOs and regular files) Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d1e467b7b9de..d648d6d2b635 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1023,7 +1023,8 @@ static int may_linkat(struct path *link) * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. - * @dir: the sticky parent directory + * @dir_mode: mode bits of directory + * @dir_uid: owner of directory * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: @@ -1039,18 +1040,18 @@ static int may_linkat(struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct dentry * const dir, +static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid, struct inode * const inode) { if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir->d_inode->i_mode & S_ISVTX)) || - uid_eq(inode->i_uid, dir->d_inode->i_uid) || + likely(!(dir_mode & S_ISVTX)) || + uid_eq(inode->i_uid, dir_uid) || uid_eq(current_fsuid(), inode->i_uid)) return 0; - if (likely(dir->d_inode->i_mode & 0002) || - (dir->d_inode->i_mode & 0020 && + if (likely(dir_mode & 0002) || + (dir_mode & 0020 && ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { return -EACCES; @@ -3265,6 +3266,8 @@ static int do_last(struct nameidata *nd, int *opened) { struct dentry *dir = nd->path.dentry; + kuid_t dir_uid = dir->d_inode->i_uid; + umode_t dir_mode = dir->d_inode->i_mode; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; @@ -3400,7 +3403,7 @@ finish_open: error = -EISDIR; if (d_is_dir(nd->path.dentry)) goto out; - error = may_create_in_sticky(dir, + error = may_create_in_sticky(dir_mode, dir_uid, d_backing_inode(nd->path.dentry)); if (unlikely(error)) goto out; From 3e0151deb2872230e2a05c11d56f3a80cd5698f7 Mon Sep 17 00:00:00 2001 From: Masato Suzuki Date: Mon, 27 Jan 2020 14:07:46 +0900 Subject: [PATCH 2838/3715] sd: Fix REQ_OP_ZONE_REPORT completion handling ZBC/ZAC report zones command may return less bytes than requested if the number of matching zones for the report request is small. However, unlike read or write commands, the remainder of incomplete report zones commands cannot be automatically requested by the block layer: the start sector of the next report cannot be known, and the report reply may not be 512B aligned for SAS drives (a report zone reply size is always a multiple of 64B). The regular request completion code executing bio_advance() and restart of the command remainder part currently causes invalid zone descriptor data to be reported to the caller if the report zone size is smaller than 512B (a case that can happen easily for a report of the last zones of a SAS drive for example). Since blkdev_report_zones() handles report zone command processing in a loop until completion (no more zones are being reported), we can safely avoid that the block layer performs an incorrect bio_advance() call and restart of the remainder of incomplete report zone BIOs. To do so, always indicate a full completion of REQ_OP_ZONE_REPORT by setting good_bytes to the request buffer size and by setting the command resid to 0. This does not affect the post processing of the report zone reply done by sd_zbc_complete() since the reply header indicates the number of zones reported. Fixes: 89d947561077 ("sd: Implement support for ZBC devices") Cc: # 4.19 Cc: # 4.14 Signed-off-by: Masato Suzuki Reviewed-by: Damien Le Moal Acked-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2955b856e9ec..e8c2afbb82e9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1981,9 +1981,13 @@ static int sd_done(struct scsi_cmnd *SCpnt) } break; case REQ_OP_ZONE_REPORT: + /* To avoid that the block layer performs an incorrect + * bio_advance() call and restart of the remainder of + * incomplete report zone BIOs, always indicate a full + * completion of REQ_OP_ZONE_REPORT. + */ if (!result) { - good_bytes = scsi_bufflen(SCpnt) - - scsi_get_resid(SCpnt); + good_bytes = scsi_bufflen(SCpnt); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0; From a4681849419e18f0592961f7aa88bef19eaa66f3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Jun 2019 16:12:36 -0600 Subject: [PATCH 2839/3715] coresight: etb10: Do not call smp_processor_id from preemptible commit 730766bae3280a25d40ea76a53dc6342e84e6513 upstream. During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it is not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below : BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544 Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs. Fixes: 2997aa4063d97fdb39 ("coresight: etb10: implementing AUX API") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Cc: stable # 4.6+ Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190620221237.3536-5-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etb10.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d14a9cb7959a..cb675a596302 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -287,9 +287,7 @@ static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, int node; struct cs_buffers *buf; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu); buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); if (!buf) From 308856261df4828afbe00abbb7b226ec80555479 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Jun 2019 16:12:35 -0600 Subject: [PATCH 2840/3715] coresight: tmc-etf: Do not call smp_processor_id from preemptible commit 024c1fd9dbcc1d8a847f1311f999d35783921b7f upstream. During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it is not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below : BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544 caller is tmc_alloc_etf_buffer+0x5c/0x60 CPU: 2 PID: 2544 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 Call trace: dump_backtrace+0x0/0x150 show_stack+0x14/0x20 dump_stack+0x9c/0xc4 debug_smp_processor_id+0x10c/0x110 tmc_alloc_etf_buffer+0x5c/0x60 etm_setup_aux+0x1c4/0x230 rb_alloc_aux+0x1b8/0x2b8 perf_mmap+0x35c/0x478 mmap_region+0x34c/0x4f0 do_mmap+0x2d8/0x418 vm_mmap_pgoff+0xd0/0xf8 ksys_mmap_pgoff+0x88/0xf8 __arm64_sys_mmap+0x28/0x38 el0_svc_handler+0xd8/0x138 el0_svc+0x8/0xc Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs. Fixes: 2e499bbc1a929ac ("coresight: tmc: implementing TMC-ETF AUX space API") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Cc: stable # 4.7+ Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190620221237.3536-4-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 336194d059fe..0a00f4e941fb 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -308,9 +308,7 @@ static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, int node; struct cs_buffers *buf; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu); /* Allocate memory structure for interaction with Perf */ buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); From 5cdd9e0e7ee99caf59ad54fa833eeb6033386875 Mon Sep 17 00:00:00 2001 From: Wen Huang Date: Thu, 28 Nov 2019 18:51:04 +0800 Subject: [PATCH 2841/3715] libertas: Fix two buffer overflows at parsing bss descriptor commit e5e884b42639c74b5b57dc277909915c0aefc8bb upstream. add_ie_rates() copys rates without checking the length in bss descriptor from remote AP.when victim connects to remote attacker, this may trigger buffer overflow. lbs_ibss_join_existing() copys rates without checking the length in bss descriptor from remote IBSS node.when victim connects to remote attacker, this may trigger buffer overflow. Fix them by putting the length check before performing copy. This fix addresses CVE-2019-14896 and CVE-2019-14897. This also fix build warning of mixed declarations and code. Reported-by: kbuild test robot Signed-off-by: Wen Huang Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/cfg.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 9f3a7b512673..4ffc188d2ffd 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -273,6 +273,10 @@ add_ie_rates(u8 *tlv, const u8 *ie, int *nrates) int hw, ap, ap_max = ie[1]; u8 hw_rate; + if (ap_max > MAX_RATES) { + lbs_deb_assoc("invalid rates\n"); + return tlv; + } /* Advance past IE header */ ie += 2; @@ -1720,6 +1724,9 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, struct cmd_ds_802_11_ad_hoc_join cmd; u8 preamble = RADIO_PREAMBLE_SHORT; int ret = 0; + int hw, i; + u8 rates_max; + u8 *rates; /* TODO: set preamble based on scan result */ ret = lbs_set_radio(priv, preamble, 1); @@ -1778,9 +1785,12 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, if (!rates_eid) { lbs_add_rates(cmd.bss.rates); } else { - int hw, i; - u8 rates_max = rates_eid[1]; - u8 *rates = cmd.bss.rates; + rates_max = rates_eid[1]; + if (rates_max > MAX_RATES) { + lbs_deb_join("invalid rates"); + goto out; + } + rates = cmd.bss.rates; for (hw = 0; hw < ARRAY_SIZE(lbs_rates); hw++) { u8 hw_rate = lbs_rates[hw].bitrate / 5; for (i = 0; i < rates_max; i++) { From bef0dc84c65d057f3bbdb577028b73692ca1556f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 10 Nov 2019 07:27:04 +0100 Subject: [PATCH 2842/3715] media: v4l2-ioctl.c: zero reserved fields for S/TRY_FMT commit ee8951e56c0f960b9621636603a822811cef3158 upstream. v4l2_vbi_format, v4l2_sliced_vbi_format and v4l2_sdr_format have a reserved array at the end that should be zeroed by drivers as per the V4L2 spec. Older drivers often do not do this, so just handle this in the core. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 7cafc8a57950..8eb52139684a 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1496,12 +1496,12 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vid_out)) @@ -1524,22 +1524,22 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_meta_cap)) @@ -1583,12 +1583,12 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vid_out)) @@ -1611,22 +1611,22 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_meta_cap)) From 20c0aa965935903656ec476ca3ffcfbee10d07e8 Mon Sep 17 00:00:00 2001 From: Bo Wu Date: Wed, 20 Nov 2019 13:26:17 +0000 Subject: [PATCH 2843/3715] scsi: iscsi: Avoid potential deadlock in iscsi_if_rx func commit bba340c79bfe3644829db5c852fdfa9e33837d6d upstream. In iscsi_if_rx func, after receiving one request through iscsi_if_recv_msg func, iscsi_if_send_reply will be called to try to reply to the request in a do-while loop. If the iscsi_if_send_reply function keeps returning -EAGAIN, a deadlock will occur. For example, a client only send msg without calling recvmsg func, then it will result in the watchdog soft lockup. The details are given as follows: sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ISCSI); retval = bind(sock_fd, (struct sock addr*) & src_addr, sizeof(src_addr); while (1) { state_msg = sendmsg(sock_fd, &msg, 0); //Note: recvmsg(sock_fd, &msg, 0) is not processed here. } close(sock_fd); watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [netlink_test:253305] Sample time: 4000897528 ns(HZ: 250) Sample stat: curr: user: 675503481560, nice: 321724050, sys: 448689506750, idle: 4654054240530, iowait: 40885550700, irq: 14161174020, softirq: 8104324140, st: 0 deta: user: 0, nice: 0, sys: 3998210100, idle: 0, iowait: 0, irq: 1547170, softirq: 242870, st: 0 Sample softirq: TIMER: 992 SCHED: 8 Sample irqstat: irq 2: delta 1003, curr: 3103802, arch_timer CPU: 7 PID: 253305 Comm: netlink_test Kdump: loaded Tainted: G OE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 40400005 (nZcv daif +PAN -UAO) pc : __alloc_skb+0x104/0x1b0 lr : __alloc_skb+0x9c/0x1b0 sp : ffff000033603a30 x29: ffff000033603a30 x28: 00000000000002dd x27: ffff800b34ced810 x26: ffff800ba7569f00 x25: 00000000ffffffff x24: 0000000000000000 x23: ffff800f7c43f600 x22: 0000000000480020 x21: ffff0000091d9000 x20: ffff800b34eff200 x19: ffff800ba7569f00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0001000101000100 x13: 0000000101010000 x12: 0101000001010100 x11: 0001010101010001 x10: 00000000000002dd x9 : ffff000033603d58 x8 : ffff800b34eff400 x7 : ffff800ba7569200 x6 : ffff800b34eff400 x5 : 0000000000000000 x4 : 00000000ffffffff x3 : 0000000000000000 x2 : 0000000000000001 x1 : ffff800b34eff2c0 x0 : 0000000000000300 Call trace: __alloc_skb+0x104/0x1b0 iscsi_if_rx+0x144/0x12bc [scsi_transport_iscsi] netlink_unicast+0x1e0/0x258 netlink_sendmsg+0x310/0x378 sock_sendmsg+0x4c/0x70 sock_write_iter+0x90/0xf0 __vfs_write+0x11c/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc Link: https://lore.kernel.org/r/EDBAAA0BBBA2AC4E9C8B6B81DEEE1D6915E3D4D2@dggeml505-mbx.china.huawei.com Signed-off-by: Bo Wu Reviewed-by: Zhiqiang Liu Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_iscsi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b4d06bd9ed51..95d71e301a53 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -37,6 +37,8 @@ #define ISCSI_TRANSPORT_VERSION "2.0-870" +#define ISCSI_SEND_MAX_ALLOWED 10 + static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); @@ -3680,6 +3682,7 @@ iscsi_if_rx(struct sk_buff *skb) struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; + int retries = ISCSI_SEND_MAX_ALLOWED; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || @@ -3710,6 +3713,10 @@ iscsi_if_rx(struct sk_buff *skb) break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); + if (err == -EAGAIN && --retries < 0) { + printk(KERN_WARNING "Send reply failed, error %d\n", err); + break; + } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } From e9a80d43d9b50198256eb75c92a1341c6169ec1f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 28 Jan 2020 12:49:12 +0100 Subject: [PATCH 2844/3715] md: Avoid namespace collision with bitmap API commit e64e4018d572710c44f42c923d4ac059f0a23320 upstream. bitmap API (include/linux/bitmap.h) has 'bitmap' prefix for its methods. On the other hand MD bitmap API is special case. Adding 'md' prefix to it to avoid name space collision. No functional changes intended. Signed-off-by: Andy Shevchenko Acked-by: Shaohua Li Signed-off-by: Dmitry Torokhov [only take the bitmap_free change for stable - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 10 +++++----- drivers/md/bitmap.h | 2 +- drivers/md/md-cluster.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 0cabf31fb163..7eb76a1a2505 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1729,7 +1729,7 @@ void bitmap_flush(struct mddev *mddev) /* * free memory that was allocated */ -void bitmap_free(struct bitmap *bitmap) +void md_bitmap_free(struct bitmap *bitmap) { unsigned long k, pages; struct bitmap_page *bp; @@ -1763,7 +1763,7 @@ void bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } -EXPORT_SYMBOL(bitmap_free); +EXPORT_SYMBOL(md_bitmap_free); void bitmap_wait_behind_writes(struct mddev *mddev) { @@ -1796,7 +1796,7 @@ void bitmap_destroy(struct mddev *mddev) if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - bitmap_free(bitmap); + md_bitmap_free(bitmap); } /* @@ -1887,7 +1887,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) return bitmap; error: - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(err); } @@ -1958,7 +1958,7 @@ struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) rv = bitmap_init_from_disk(bitmap, 0); if (rv) { - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(rv); } diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 5df35ca90f58..dd53a978c5f2 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -271,7 +271,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot); int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *lo, sector_t *hi, bool clear_bits); -void bitmap_free(struct bitmap *bitmap); +void md_bitmap_free(struct bitmap *bitmap); void bitmap_wait_behind_writes(struct mddev *mddev); #endif diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 717aaffc227d..10057ac85476 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1128,7 +1128,7 @@ int cluster_check_sync_size(struct mddev *mddev) bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { pr_err("md-cluster: Cannot initialize %s\n", str); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } bm_lockres->flags |= DLM_LKF_NOQUEUE; @@ -1142,11 +1142,11 @@ int cluster_check_sync_size(struct mddev *mddev) sync_size = sb->sync_size; else if (sync_size != sb->sync_size) { kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); } return (my_sync_size == sync_size) ? 0 : -1; From 8f715caa52eae8a31704cb398a2d9fe5250a37bf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 1 Aug 2018 15:42:56 -0700 Subject: [PATCH 2845/3715] bitmap: Add bitmap_alloc(), bitmap_zalloc() and bitmap_free() commit c42b65e363ce97a828f81b59033c3558f8fa7f70 upstream. A lot of code become ugly because of open coding allocations for bitmaps. Introduce three helpers to allow users be more clear of intention and keep their code neat. Note, due to multiple circular dependencies we may not provide the helpers as inliners. For now we keep them exported and, perhaps, at some point in the future we will sort out header inclusion and inheritance. Signed-off-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/bitmap.h | 8 ++++++++ lib/bitmap.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index aec255fb62aa..10a4dd02221d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -86,6 +86,14 @@ * contain all bit positions from 0 to 'bits' - 1. */ +/* + * Allocation and deallocation of bitmap. + * Provided in lib/bitmap.c to avoid circular dependency. + */ +extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); +extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +extern void bitmap_free(const unsigned long *bitmap); + /* * lib/bitmap.c provides these functions: */ diff --git a/lib/bitmap.c b/lib/bitmap.c index 2a9373ef4054..fbe38a83acb3 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -1212,3 +1213,22 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } EXPORT_SYMBOL(bitmap_copy_le); #endif + +unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags) +{ + return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long), + flags); +} +EXPORT_SYMBOL(bitmap_alloc); + +unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags) +{ + return bitmap_alloc(nbits, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(bitmap_zalloc); + +void bitmap_free(const unsigned long *bitmap) +{ + kfree(bitmap); +} +EXPORT_SYMBOL(bitmap_free); From f3439dd7ee8662c4f8558b5f41676e15c31776c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= Date: Sun, 19 Jan 2020 22:06:49 +0100 Subject: [PATCH 2846/3715] netfilter: ipset: use bitmap infrastructure completely commit 32c72165dbd0e246e69d16a3ad348a4851afd415 upstream. The bitmap allocation did not use full unsigned long sizes when calculating the required size and that was triggered by KASAN as slab-out-of-bounds read in several places. The patch fixes all of them. Reported-by: syzbot+fabca5cbf5e54f3fe2de@syzkaller.appspotmail.com Reported-by: syzbot+827ced406c9a1d9570ed@syzkaller.appspotmail.com Reported-by: syzbot+190d63957b22ef673ea5@syzkaller.appspotmail.com Reported-by: syzbot+dfccdb2bdb4a12ad425e@syzkaller.appspotmail.com Reported-by: syzbot+df0d0f5895ef1f41a65b@syzkaller.appspotmail.com Reported-by: syzbot+b08bd19bb37513357fd4@syzkaller.appspotmail.com Reported-by: syzbot+53cdd0ec0bbabd53370a@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter/ipset/ip_set.h | 7 ------- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_port.c | 6 +++--- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 91a533bd3eb1..b7246b7e0bf4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -445,13 +445,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) sizeof(*addr)); } -/* Calculate the bytes required to store the inclusive range of a-b */ -static inline int -bitmap_bytes(u32 a, u32 b) -{ - return 4 * ((((b - a + 8) / 8) + 3) / 4); -} - #include #include #include diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index b0701f6259cc..3c0e345367a5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -79,7 +79,7 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); - memset(map->members, 0, map->memsize); + bitmap_zero(map->members, map->elements); set->elements = 0; set->ext_size = 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 4783efff0bde..a4c104a4977f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -40,7 +40,7 @@ MODULE_ALIAS("ip_set_bitmap:ip"); /* Type structure */ struct bitmap_ip { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -222,7 +222,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, u32 first_ip, u32 last_ip, u32 elements, u32 hosts, u8 netmask) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -315,7 +315,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 9a065f672d3a..8e58e7e34981 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -46,7 +46,7 @@ enum { /* Type structure */ struct bitmap_ipmac { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -299,7 +299,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 7f0c733358a4..6771b362a123 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -34,7 +34,7 @@ MODULE_ALIAS("ip_set_bitmap:port"); /* Type structure */ struct bitmap_port { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u16 first_port; /* host byte order, included in range */ u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -207,7 +207,7 @@ static bool init_map_port(struct ip_set *set, struct bitmap_port *map, u16 first_port, u16 last_port) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_port = first_port; @@ -250,7 +250,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = elements; - map->memsize = bitmap_bytes(0, map->elements); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); From 94868d28db84f25e316e1d9d914263df4496e70b Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Thu, 9 Jan 2020 07:31:14 +0100 Subject: [PATCH 2847/3715] net/x25: fix nonblocking connect commit e21dba7a4df4d93da237da65a096084b4f2e87b4 upstream. This patch fixes 2 issues in x25_connect(): 1. It makes absolutely no sense to reset the neighbour and the connection state after a (successful) nonblocking call of x25_connect. This prevents any connection from being established, since the response (call accept) cannot be processed. 2. Any further calls to x25_connect() while a call is pending should simply return, instead of creating new Call Request (on different logical channels). This patch should also fix the "KASAN: null-ptr-deref Write in x25_connect" and "BUG: unable to handle kernel NULL pointer dereference in x25_connect" bugs reported by syzbot. Signed-off-by: Martin Schiller Reported-by: syzbot+429c200ffc8772bfe070@syzkaller.appspotmail.com Reported-by: syzbot+eec0c87f31a7c3b66f7b@syzkaller.appspotmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a156b6dc3a72..f4fa33b84cde 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -764,6 +764,10 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state == TCP_ESTABLISHED) goto out; + rc = -EALREADY; /* Do nothing if call is already in progress */ + if (sk->sk_state == TCP_SYN_SENT) + goto out; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; @@ -810,7 +814,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, /* Now the loop */ rc = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - goto out_put_neigh; + goto out; rc = x25_wait_for_connection_establishment(sk); if (rc) From 9fa690a2a016e1b55356835f047b952e67d3d73a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Jan 2020 15:02:39 +0100 Subject: [PATCH 2848/3715] Linux 4.14.169 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1e74ba09cdda..795d93bfe156 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 168 +SUBLEVEL = 169 EXTRAVERSION = NAME = Petit Gorille From 8f92c773d6e715fe7b8ad23bf7ba148b74401ca9 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 29 Jan 2020 09:15:17 -0800 Subject: [PATCH 2849/3715] ANDROID: Incremental fs: Fix sparse errors Fix all sparse errors in fs/incfs except fs/incfs/integrity.c:192:9: warning: Variable length array is used Test: incfs_test passes Bug: 133435829 Change-Id: I9c2e26e4e1a06a894977f11a3c8559b968dd115e Signed-off-by: Paul Lawrence --- fs/incfs/format.c | 4 ++-- fs/incfs/main.c | 2 +- fs/incfs/vfs.c | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 247e1b4ec563..db71f527cf36 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -178,7 +178,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, record_size = le16_to_cpu(record->h_record_size); file_pos = incfs_get_end_offset(bfc->bc_file); - record->h_prev_md_offset = bfc->bc_last_md_record_offset; + record->h_prev_md_offset = cpu_to_le64(bfc->bc_last_md_record_offset); record->h_next_md_offset = 0; record->h_record_crc = cpu_to_le32(calc_md_crc(record)); @@ -281,7 +281,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); file_attr.fa_size = cpu_to_le16((u16)value.len); file_attr.fa_offset = cpu_to_le64(value_offset); - file_attr.fa_crc = cpu_to_le64(crc); + file_attr.fa_crc = cpu_to_le32(crc); result = write_to_bf(bfc, value.data, value.len, value_offset, true); if (result) diff --git a/fs/incfs/main.c b/fs/incfs/main.c index d9eec7496846..e65d0d895128 100644 --- a/fs/incfs/main.c +++ b/fs/incfs/main.c @@ -12,7 +12,7 @@ #define INCFS_NODE_FEATURES "features" -struct file_system_type incfs_fs_type = { +static struct file_system_type incfs_fs_type = { .owner = THIS_MODULE, .name = INCFS_NAME, .mount = incfs_mount_fs, diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 0c2f23e5ca55..e4790189abd3 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -18,6 +18,7 @@ #include +#include "vfs.h" #include "data_mgmt.h" #include "format.h" #include "integrity.h" @@ -173,7 +174,7 @@ static const struct xattr_handler incfs_xattr_handler = { .get = incfs_handler_getxattr, }; -const struct xattr_handler *incfs_xattr_ops[] = { +static const struct xattr_handler *incfs_xattr_ops[] = { &incfs_xattr_handler, NULL, }; @@ -933,11 +934,11 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, struct path path = {}; struct file *new_file; int error = 0; - struct backing_file_context *bfc = 0; + struct backing_file_context *bfc = NULL; u32 block_count; - struct mem_range mem_range = {0}; - struct signature_info *si = 0; - struct mtree *hash_tree = 0; + struct mem_range mem_range = {NULL}; + struct signature_info *si = NULL; + struct mtree *hash_tree = NULL; if (!mi || !dentry || !uuid) return -EFAULT; From 3a88df7f17ab1671ff2e68a5305451d251d870cd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Feb 2018 21:27:22 -0800 Subject: [PATCH 2850/3715] UPSTREAM: libnvdimm, namespace: make min namespace size 4K The arbitrary 4MB minimum namespace size turns out to be too large for some environments. Quoting Cheng-mean Liu: In the case of emulated NVDIMM devices in the VM environment, there are scenarios that NVDIMM device with much smaller sizes are desired, for example, we might use a single enumerated NVDIMM DAX device for representing each container layer, which in some cases could be just a few KBs size. PAGE_SIZE is the minimum where we can still support DAX of at least a single page. Cc: Matthew Wilcox Reported-by: Cheng-mean Liu Signed-off-by: Dan Williams (cherry picked from commit f2ba5a5baecf795c2150826bd0c95fc3f7f3d226) Bug: 146400078 Bug: 148297388 Change-Id: I362c3d1bf27921f69e78a9c34674176e85e391f7 Signed-off-by: Alistair Delva --- include/uapi/linux/ndctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 145f242c7c90..c58bf7b7d906 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -257,7 +257,7 @@ enum nd_driver_flags { }; enum { - ND_MIN_NAMESPACE_SIZE = 0x00400000, + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, }; enum ars_masks { From 153e6e252a37429cafb850d285a36a60bc76fd89 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Sep 2018 10:19:30 +0100 Subject: [PATCH 2851/3715] UPSTREAM: UAPI: ndctl: Remove use of PAGE_SIZE The macro PAGE_SIZE isn't valid outside of the kernel, so it should not appear in UAPI headers. Furthermore, the actual machine page size could theoretically change from an application's point of view if it's running in a container that gets migrated to another machine (say 4K/ppc64 to 64K/ppc64). Fixes: f2ba5a5baecf ("libnvdimm, namespace: make min namespace size 4K") Signed-off-by: David Howells Signed-off-by: Dan Williams (cherry picked from commit f366d322aea782cf786aa821d5accdc1609f9e10) Bug: 146400078 Bug: 148297388 Change-Id: I9eda3e848190b5bd26e5fc7f4d3cfdcb648fd815 Signed-off-by: Alistair Delva --- include/linux/ndctl.h | 22 ++++++++++++++++++++++ include/uapi/linux/ndctl.h | 4 ---- 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 include/linux/ndctl.h diff --git a/include/linux/ndctl.h b/include/linux/ndctl.h new file mode 100644 index 000000000000..cd5a293ce3ae --- /dev/null +++ b/include/linux/ndctl.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2014-2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU Lesser General Public License, + * version 2.1, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + */ +#ifndef _LINUX_NDCTL_H +#define _LINUX_NDCTL_H + +#include + +enum { + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, +}; + +#endif /* _LINUX_NDCTL_H */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index c58bf7b7d906..0303ad623ab4 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -256,10 +256,6 @@ enum nd_driver_flags { ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -enum { - ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, -}; - enum ars_masks { ARS_STATUS_MASK = 0x0000FFFF, ARS_EXT_STATUS_SHIFT = 16, From 8497411066cc5f74872cd81d4b1cb0ea4bb4f839 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Tue, 28 Jan 2020 14:15:46 -0800 Subject: [PATCH 2852/3715] ANDROID: cuttlefish_defconfig: set CONFIG_IKHEADERS to y Change-Id: I0ed529c1e2fee8890c7e97d35097197f7d9f0e6a Signed-off-by: Ram Muthiah Bug: 143710295 Test: Treehugger --- arch/arm64/configs/cuttlefish_defconfig | 2 +- arch/x86/configs/x86_64_cuttlefish_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 7fd46d9c1629..6765af189582 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -9,7 +9,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_IKHEADERS=m +CONFIG_IKHEADERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index ca27bedba3bf..cbb844f4b3d6 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -12,7 +12,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_IKHEADERS=m +CONFIG_IKHEADERS=y CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y From 141c248217d7556cc1a2d7c31ad1a9aecaef754c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 29 Jan 2020 06:45:56 -0800 Subject: [PATCH 2853/3715] ANDROID: fix bpf jit + cfi interactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit change from: https://android-review.googlesource.com/c/kernel/common/+/1126406 ANDROID: bpf: validate bpf_func when BPF_JIT is enabled with CFI was incorrectly reverted in: https://android-review.googlesource.com/c/kernel/common/+/1184358 UPSTREAM: bpf: multi program support for cgroup+bpf Test: builds Bug: 121213201 Bug: 138317270 Signed-off-by: Maciej Żenczykowski Change-Id: I2b238de61340e58eb71aaa6cf6b59945a8740a08 --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 5a5786240006..f33f80ee9dc6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -536,7 +536,7 @@ static inline void bpf_jit_set_header_magic(struct bpf_binary_header *hdr) } #endif -#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) +#define BPF_PROG_RUN(filter, ctx) bpf_call_func(filter, ctx) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN From 98c326e94fad8628bae2bd62e6ae0c179972bbd1 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 9 Jan 2020 11:48:58 -0800 Subject: [PATCH 2854/3715] ANDROID: Incremental fs: Enable incremental-fs in cuttlefish_defconfig Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: Iaf3926ae0bdf2363d61f83254f4150288173911f --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 6765af189582..d0d8ee4a9c24 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -437,6 +437,7 @@ CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y CONFIG_OVERLAY_FS=y +CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index cbb844f4b3d6..ba51925cfe19 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -453,6 +453,7 @@ CONFIG_QFMT_V2=y CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=y CONFIG_OVERLAY_FS=y +CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y From 75f80b3632ccbc11b48cb164b5ef0a49b5663a57 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 29 Jan 2020 16:04:42 -0800 Subject: [PATCH 2855/3715] ANDROID: kallsyms: strip hashes from function names with ThinLTO With CONFIG_THINLTO and CFI both enabled, LLVM appends a hash to the names of all static functions. This breaks userspace tools, so strip out the hash from output. Bug: 147422318 Change-Id: Ibea6be089d530e92dcd191481cb02549041203f6 Signed-off-by: Sami Tolvanen --- kernel/kallsyms.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 06432e1a792a..4ad367a05274 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -314,6 +314,12 @@ static inline void cleanup_symbol_name(char *s) { char *res; +#ifdef CONFIG_THINLTO + /* Filter out hashes from static functions */ + res = strrchr(s, '$'); + if (res) + *res = '\0'; +#endif res = strrchr(s, '.'); if (res && !strcmp(res, ".cfi")) *res = '\0'; From 5fc0809078fd047fdd840cfa4d1f7f81612b1f7c Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Thu, 30 Jan 2020 21:39:15 -0800 Subject: [PATCH 2856/3715] ANDROID: cf_defconfig: removed old VIRTIO configs SCSI_VIRTIO and VIRTIO_BALLOON are not needed to run cuttlefish. Test: Treehugger Bug: 135937364 Change-Id: Ie65598c1adc834d2a88799d73eee55bc86d329ae Signed-off-by: Ram Muthiah --- arch/arm64/configs/cuttlefish_defconfig | 2 -- arch/x86/configs/x86_64_cuttlefish_defconfig | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index d0d8ee4a9c24..cebf887fabcc 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -220,7 +220,6 @@ CONFIG_UID_SYS_STATS=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_VIRTIO=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y @@ -405,7 +404,6 @@ CONFIG_RTC_DRV_PL031=y CONFIG_VIRTIO_PCI=y # CONFIG_VIRTIO_PCI_LEGACY is not set CONFIG_VIRTIO_PMEM=y -CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index ba51925cfe19..1f8f9ebca55e 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -234,7 +234,6 @@ CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_VIRTIO=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y @@ -421,7 +420,6 @@ CONFIG_RTC_DRV_TEST=y CONFIG_SW_SYNC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_PMEM=y -CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y From 19a55f5defbc82dee173cc91f480cb8d5dcc3bc6 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Fri, 31 Jan 2020 18:03:10 -0800 Subject: [PATCH 2857/3715] ANDROID: cf_defconfig: removed CONFIG_CAN Test: Treehugger Change-Id: Ib973c53d0c58906fcbf551f6cda5a2c915119742 Signed-off-by: Ram Muthiah --- arch/arm64/configs/cuttlefish_defconfig | 4 ---- arch/x86/configs/x86_64_cuttlefish_defconfig | 4 ---- 2 files changed, 8 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index cebf887fabcc..7b49457d4b61 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -196,10 +196,6 @@ CONFIG_NET_CLS_ACT=y CONFIG_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS=y CONFIG_BPF_JIT=y -CONFIG_CAN=y -# CONFIG_CAN_BCM is not set -# CONFIG_CAN_GW is not set -CONFIG_CAN_VCAN=y CONFIG_CFG80211=y # CONFIG_CFG80211_DEFAULT_PS is not set CONFIG_MAC80211=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 1f8f9ebca55e..35ae5541007f 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -207,10 +207,6 @@ CONFIG_NET_CLS_ACT=y CONFIG_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS=y CONFIG_BPF_JIT=y -CONFIG_CAN=y -# CONFIG_CAN_BCM is not set -# CONFIG_CAN_GW is not set -CONFIG_CAN_VCAN=y CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_RFKILL=y From 24ac4314b915c8c6687717b5db4ff004ad034456 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Fri, 31 Jan 2020 18:26:35 -0800 Subject: [PATCH 2858/3715] ANDROID: cf_defconfig: Remove VIRTIO_CONSOLE Test: Treehugger Change-Id: Icd7ab67ef6da790bec5895bb2a6a137e2a14845f Signed-off-by: Ram Muthiah --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 7b49457d4b61..930d43759d06 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -297,7 +297,6 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_DEV_BUS=y -CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CAVIUM is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 35ae5541007f..81950eb19c4f 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -311,7 +311,6 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_DEV_BUS=y -CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_INTEL is not set # CONFIG_HW_RANDOM_AMD is not set From a79def80c7a478ac5b07d16998cf2fa52affa13b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:23 +0100 Subject: [PATCH 2859/3715] orinoco_usb: fix interface sanity check commit b73e05aa543cf8db4f4927e36952360d71291d41 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 9afac70a7305 ("orinoco: add orinoco_usb driver") Cc: stable # 2.6.35 Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f6e3b71f48..95015d74b1c0 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1613,9 +1613,9 @@ static int ezusb_probe(struct usb_interface *interface, /* set up the endpoint information */ /* check out the endpoints */ - iface_desc = &interface->altsetting[0].desc; + iface_desc = &interface->cur_altsetting->desc; for (i = 0; i < iface_desc->bNumEndpoints; ++i) { - ep = &interface->altsetting[0].endpoint[i].desc; + ep = &interface->cur_altsetting->endpoint[i].desc; if (usb_endpoint_is_bulk_in(ep)) { /* we found a bulk in endpoint */ From 98a23baf0794352313aeda5d7ebcfb867db05929 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:25 +0100 Subject: [PATCH 2860/3715] rsi_91x_usb: fix interface sanity check commit 3139b180906af43bc09bd3373fc2338a8271d9d9 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver") Cc: stable # 3.15 Cc: Fariya Fatima Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index f90c10b3c921..786a330bc470 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -105,7 +105,7 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface, __le16 buffer_size; int ii, bep_found = 0; - iface_desc = &(interface->altsetting[0]); + iface_desc = interface->cur_altsetting; for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) { endpoint = &(iface_desc->endpoint[ii].desc); From 172b7f71488cfcb1426790bdefe334b1487fb65c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Jan 2020 11:15:26 +0100 Subject: [PATCH 2861/3715] USB: serial: ir-usb: add missing endpoint sanity check commit 2988a8ae7476fe9535ab620320790d1714bdad1d upstream. Add missing endpoint sanity check to avoid dereferencing a NULL-pointer on open() in case a device lacks a bulk-out endpoint. Note that prior to commit f4a4cbb2047e ("USB: ir-usb: reimplement using generic framework") the oops would instead happen on open() if the device lacked a bulk-in endpoint and on write() if it lacked a bulk-out endpoint. Fixes: f4a4cbb2047e ("USB: ir-usb: reimplement using generic framework") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ir-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index f9734a96d516..069e34a1f8e3 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -199,6 +199,9 @@ static int ir_startup(struct usb_serial *serial) struct usb_irda_cs_descriptor *irda_desc; int rates; + if (serial->num_bulk_in < 1 || serial->num_bulk_out < 1) + return -ENODEV; + irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { dev_err(&serial->dev->dev, From dc7692a8975be3e97a5e0df02ab619430eb3ead8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Jan 2020 11:15:27 +0100 Subject: [PATCH 2862/3715] USB: serial: ir-usb: fix link-speed handling commit 17a0184ca17e288decdca8b2841531e34d49285f upstream. Commit e0d795e4f36c ("usb: irda: cleanup on ir-usb module") added a USB IrDA header with common defines, but mistakingly switched to using the class-descriptor baud-rate bitmask values for the outbound header. This broke link-speed handling for rates above 9600 baud, but a device would also be able to operate at the default 9600 baud until a link-speed request was issued (e.g. using the TCGETS ioctl). Fixes: e0d795e4f36c ("usb: irda: cleanup on ir-usb module") Cc: stable # 2.6.27 Cc: Felipe Balbi Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ir-usb.c | 20 ++++++++++---------- include/linux/usb/irda.h | 13 ++++++++++++- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 069e34a1f8e3..e1c4a81a556c 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -339,34 +339,34 @@ static void ir_set_termios(struct tty_struct *tty, switch (baud) { case 2400: - ir_baud = USB_IRDA_BR_2400; + ir_baud = USB_IRDA_LS_2400; break; case 9600: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; break; case 19200: - ir_baud = USB_IRDA_BR_19200; + ir_baud = USB_IRDA_LS_19200; break; case 38400: - ir_baud = USB_IRDA_BR_38400; + ir_baud = USB_IRDA_LS_38400; break; case 57600: - ir_baud = USB_IRDA_BR_57600; + ir_baud = USB_IRDA_LS_57600; break; case 115200: - ir_baud = USB_IRDA_BR_115200; + ir_baud = USB_IRDA_LS_115200; break; case 576000: - ir_baud = USB_IRDA_BR_576000; + ir_baud = USB_IRDA_LS_576000; break; case 1152000: - ir_baud = USB_IRDA_BR_1152000; + ir_baud = USB_IRDA_LS_1152000; break; case 4000000: - ir_baud = USB_IRDA_BR_4000000; + ir_baud = USB_IRDA_LS_4000000; break; default: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; baud = 9600; } diff --git a/include/linux/usb/irda.h b/include/linux/usb/irda.h index 396d2b043e64..556a801efce3 100644 --- a/include/linux/usb/irda.h +++ b/include/linux/usb/irda.h @@ -119,11 +119,22 @@ struct usb_irda_cs_descriptor { * 6 - 115200 bps * 7 - 576000 bps * 8 - 1.152 Mbps - * 9 - 5 mbps + * 9 - 4 Mbps * 10..15 - Reserved */ #define USB_IRDA_STATUS_LINK_SPEED 0x0f +#define USB_IRDA_LS_NO_CHANGE 0 +#define USB_IRDA_LS_2400 1 +#define USB_IRDA_LS_9600 2 +#define USB_IRDA_LS_19200 3 +#define USB_IRDA_LS_38400 4 +#define USB_IRDA_LS_57600 5 +#define USB_IRDA_LS_115200 6 +#define USB_IRDA_LS_576000 7 +#define USB_IRDA_LS_1152000 8 +#define USB_IRDA_LS_4000000 9 + /* The following is a 4-bit value used only for * outbound header: * From 2cb7f8d0e7512189b3b7ea287ffce36d7831897f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Jan 2020 11:15:28 +0100 Subject: [PATCH 2863/3715] USB: serial: ir-usb: fix IrLAP framing commit 38c0d5bdf4973f9f5a888166e9d3e9ed0d32057a upstream. Commit f4a4cbb2047e ("USB: ir-usb: reimplement using generic framework") switched to using the generic write implementation which may combine multiple write requests into larger transfers. This can break the IrLAP protocol where end-of-frame is determined using the USB short packet mechanism, for example, if multiple frames are sent in rapid succession. Fixes: f4a4cbb2047e ("USB: ir-usb: reimplement using generic framework") Cc: stable # 2.6.35 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ir-usb.c | 113 +++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 22 deletions(-) diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index e1c4a81a556c..a3e3b4703f38 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -49,9 +49,10 @@ static int buffer_size; static int xbof = -1; static int ir_startup (struct usb_serial *serial); -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port); -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size); +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count); +static int ir_write_room(struct tty_struct *tty); +static void ir_write_bulk_callback(struct urb *urb); static void ir_process_read_urb(struct urb *urb); static void ir_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios); @@ -81,8 +82,9 @@ static struct usb_serial_driver ir_device = { .num_ports = 1, .set_termios = ir_set_termios, .attach = ir_startup, - .open = ir_open, - .prepare_write_buffer = ir_prepare_write_buffer, + .write = ir_write, + .write_room = ir_write_room, + .write_bulk_callback = ir_write_bulk_callback, .process_read_urb = ir_process_read_urb, }; @@ -258,35 +260,102 @@ static int ir_startup(struct usb_serial *serial) return 0; } -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port) +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count) { - int i; + struct urb *urb = NULL; + unsigned long flags; + int ret; - for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) - port->write_urbs[i]->transfer_flags = URB_ZERO_PACKET; + if (port->bulk_out_size == 0) + return -EINVAL; - /* Start reading from the device */ - return usb_serial_generic_open(tty, port); -} + if (count == 0) + return 0; -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size) -{ - unsigned char *buf = dest; - int count; + count = min(count, port->bulk_out_size - 1); + + spin_lock_irqsave(&port->lock, flags); + if (__test_and_clear_bit(0, &port->write_urbs_free)) { + urb = port->write_urbs[0]; + port->tx_bytes += count; + } + spin_unlock_irqrestore(&port->lock, flags); + + if (!urb) + return 0; /* * The first byte of the packet we send to the device contains an - * inbound header which indicates an additional number of BOFs and + * outbound header which indicates an additional number of BOFs and * a baud rate change. * * See section 5.4.2.2 of the USB IrDA spec. */ - *buf = ir_xbof | ir_baud; + *(u8 *)urb->transfer_buffer = ir_xbof | ir_baud; - count = kfifo_out_locked(&port->write_fifo, buf + 1, size - 1, - &port->lock); - return count + 1; + memcpy(urb->transfer_buffer + 1, buf, count); + + urb->transfer_buffer_length = count + 1; + urb->transfer_flags = URB_ZERO_PACKET; + + ret = usb_submit_urb(urb, GFP_ATOMIC); + if (ret) { + dev_err(&port->dev, "failed to submit write urb: %d\n", ret); + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= count; + spin_unlock_irqrestore(&port->lock, flags); + + return ret; + } + + return count; +} + +static void ir_write_bulk_callback(struct urb *urb) +{ + struct usb_serial_port *port = urb->context; + int status = urb->status; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= urb->transfer_buffer_length - 1; + spin_unlock_irqrestore(&port->lock, flags); + + switch (status) { + case 0: + break; + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + dev_dbg(&port->dev, "write urb stopped: %d\n", status); + return; + case -EPIPE: + dev_err(&port->dev, "write urb stopped: %d\n", status); + return; + default: + dev_err(&port->dev, "nonzero write-urb status: %d\n", status); + break; + } + + usb_serial_port_softint(port); +} + +static int ir_write_room(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + int count = 0; + + if (port->bulk_out_size == 0) + return 0; + + if (test_bit(0, &port->write_urbs_free)) + count = port->bulk_out_size - 1; + + return count; } static void ir_process_read_urb(struct urb *urb) From e1b4f5461f8b7f11d3882e2725f0fed2632ef02e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 11 Dec 2019 10:10:03 -0600 Subject: [PATCH 2864/3715] usb: dwc3: turn off VBUS when leaving host mode commit 09ed259fac621634d51cd986aa8d65f035662658 upstream. VBUS should be turned off when leaving the host mode. Set GCTL_PRTCAP to device mode in teardown to de-assert DRVVBUS pin to turn off VBUS power. Fixes: 5f94adfeed97 ("usb: dwc3: core: refactor mode initialization to its own function") Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index a497b878c3e2..021899c58028 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1031,6 +1031,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) /* do nothing */ break; } + + /* de-assert DRVVBUS for HOST and OTG mode */ + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); } static void dwc3_get_properties(struct dwc3 *dwc) From 6560fd66278fec045902010ff7b07878a13e501f Mon Sep 17 00:00:00 2001 From: Andrey Shvetsov Date: Thu, 16 Jan 2020 18:22:39 +0100 Subject: [PATCH 2865/3715] staging: most: net: fix buffer overflow commit 4d1356ac12f4d5180d0df345d85ff0ee42b89c72 upstream. If the length of the socket buffer is 0xFFFFFFFF (max size for an unsigned int), then payload_len becomes 0xFFFFFFF1 after subtracting 14 (ETH_HLEN). Then, mdp_len is set to payload_len + 16 (MDP_HDR_LEN) which overflows and results in a value of 2. These values for payload_len and mdp_len will pass current buffer size checks. This patch checks if derived from skb->len sum may overflow. The check is based on the following idea: For any `unsigned V1, V2` and derived `unsigned SUM = V1 + V2`, `V1 + V2` overflows iif `SUM < V1`. Reported-by: Greg Kroah-Hartman Signed-off-by: Andrey Shvetsov Cc: stable Link: https://lore.kernel.org/r/20200116172238.6046-1-andrey.shvetsov@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/aim-network/networking.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/staging/most/aim-network/networking.c b/drivers/staging/most/aim-network/networking.c index 936f013c350e..6398c27563c9 100644 --- a/drivers/staging/most/aim-network/networking.c +++ b/drivers/staging/most/aim-network/networking.c @@ -85,6 +85,11 @@ static int skb_to_mamac(const struct sk_buff *skb, struct mbo *mbo) unsigned int payload_len = skb->len - ETH_HLEN; unsigned int mdp_len = payload_len + MDP_HDR_LEN; + if (mdp_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mdp_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mdp_len); @@ -132,6 +137,11 @@ static int skb_to_mep(const struct sk_buff *skb, struct mbo *mbo) u8 *buff = mbo->virt_address; unsigned int mep_len = skb->len + MEP_HDR_LEN; + if (mep_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mep_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mep_len); From 07219a4cf2fca988405fb835ef950191ec972dd8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Jan 2020 18:16:04 +0000 Subject: [PATCH 2866/3715] staging: wlan-ng: ensure error return is actually returned commit 4cc41cbce536876678b35e03c4a8a7bb72c78fa9 upstream. Currently when the call to prism2sta_ifst fails a netdev_err error is reported, error return variable result is set to -1 but the function always returns 0 for success. Fix this by returning the error value in variable result rather than 0. Addresses-Coverity: ("Unused value") Fixes: 00b3ed168508 ("Staging: add wlan-ng prism2 usb driver") Signed-off-by: Colin Ian King Cc: stable Link: https://lore.kernel.org/r/20200114181604.390235-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/prism2mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index c4aa9e7e7003..be89a0ee44bf 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -945,7 +945,7 @@ int prism2mgmt_flashdl_state(struct wlandevice *wlandev, void *msgp) } } - return 0; + return result; } /*---------------------------------------------------------------- From abb33ca718674b7bc027dd16377b3553b44f4ee2 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Wed, 8 Jan 2020 21:40:58 +0000 Subject: [PATCH 2867/3715] staging: vt6656: correct packet types for CTS protect, mode. commit d971fdd3412f8342747778fb59b8803720ed82b1 upstream. It appears that the driver still transmits in CTS protect mode even though it is not enabled in mac80211. That is both packet types PK_TYPE_11GA and PK_TYPE_11GB both use CTS protect. The only difference between them GA does not use B rates. Find if only B rate in GB or GA in protect mode otherwise transmit packets as PK_TYPE_11A. Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/9c1323ff-dbb3-0eaa-43e1-9453f7390dc0@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/device.h | 2 ++ drivers/staging/vt6656/rxtx.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/staging/vt6656/device.h b/drivers/staging/vt6656/device.h index 705fffa59da9..41a4f9555d07 100644 --- a/drivers/staging/vt6656/device.h +++ b/drivers/staging/vt6656/device.h @@ -62,6 +62,8 @@ #define RATE_AUTO 12 #define MAX_RATE 12 +#define VNT_B_RATES (BIT(RATE_1M) | BIT(RATE_2M) |\ + BIT(RATE_5M) | BIT(RATE_11M)) /* * device specific diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index a44abcce6fb4..f15990491ce4 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -825,10 +825,14 @@ int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) if (info->band == NL80211_BAND_5GHZ) { pkt_type = PK_TYPE_11A; } else { - if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) - pkt_type = PK_TYPE_11GB; - else - pkt_type = PK_TYPE_11GA; + if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { + if (priv->basic_rates & VNT_B_RATES) + pkt_type = PK_TYPE_11GB; + else + pkt_type = PK_TYPE_11GA; + } else { + pkt_type = PK_TYPE_11A; + } } } else { pkt_type = PK_TYPE_11B; From bba758d3f3b9d3295f191e42edc7bd9820f183a1 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Wed, 8 Jan 2020 21:41:20 +0000 Subject: [PATCH 2868/3715] staging: vt6656: use NULLFUCTION stack on mac80211 commit d579c43c82f093e63639151625b2139166c730fd upstream. It appears that the drivers does not go into power save correctly the NULL data packets are not being transmitted because it not enabled in mac80211. The driver needs to capture ieee80211_is_nullfunc headers and copy the duration_id to it's own duration data header. Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/610971ae-555b-a6c3-61b3-444a0c1e35b4@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 1 + drivers/staging/vt6656/rxtx.c | 14 +++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 645ea16b53d5..e8ccd800c94f 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -977,6 +977,7 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(priv->hw, SUPPORTS_PS); + ieee80211_hw_set(priv->hw, PS_NULLFUNC_STACK); priv->hw->max_signal = 100; diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index f15990491ce4..f78f31ce6443 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -288,11 +288,9 @@ static u16 vnt_rxtx_datahead_g(struct vnt_usb_send_context *tx_context, PK_TYPE_11B, &buf->b); /* Get Duration and TimeStamp */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration_a = dur; - buf->duration_b = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration_a = hdr->duration_id; + buf->duration_b = hdr->duration_id; } else { buf->duration_a = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -381,10 +379,8 @@ static u16 vnt_rxtx_datahead_ab(struct vnt_usb_send_context *tx_context, tx_context->pkt_type, &buf->ab); /* Get Duration and TimeStampOff */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration = hdr->duration_id; } else { buf->duration = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); From c6c4999a16f2af38ca4fb8cb57a86062d15961d1 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Wed, 8 Jan 2020 21:41:36 +0000 Subject: [PATCH 2869/3715] staging: vt6656: Fix false Tx excessive retries reporting. commit 9dd631fa99dc0a0dfbd191173bf355ba30ea786a upstream. The driver reporting IEEE80211_TX_STAT_ACK is not being handled correctly. The driver should only report on TSR_TMO flag is not set indicating no transmission errors and when not IEEE80211_TX_CTL_NO_ACK is being requested. Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/340f1f7f-c310-dca5-476f-abc059b9cd97@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/int.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index c6ffbe0e2728..c521729c4192 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -107,9 +107,11 @@ static int vnt_int_report_rate(struct vnt_private *priv, u8 pkt_no, u8 tsr) info->status.rates[0].count = tx_retry; - if (!(tsr & (TSR_TMO | TSR_RETRYTMO))) { + if (!(tsr & TSR_TMO)) { info->status.rates[0].idx = idx; - info->flags |= IEEE80211_TX_STAT_ACK; + + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) + info->flags |= IEEE80211_TX_STAT_ACK; } ieee80211_tx_status_irqsafe(priv->hw, context->skb); From a2e73efbb3125dfe1e19cccd883f1721b342a760 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 16 Jan 2020 13:14:01 +0100 Subject: [PATCH 2870/3715] serial: 8250_bcm2835aux: Fix line mismatch on driver unbind commit dc76697d7e933d5e299116f219c890568785ea15 upstream. Unbinding the bcm2835aux UART driver raises the following error if the maximum number of 8250 UARTs is set to 1 (via the 8250.nr_uarts module parameter or CONFIG_SERIAL_8250_RUNTIME_UARTS): (NULL device *): Removing wrong port: a6f80333 != fa20408b That's because bcm2835aux_serial_probe() retrieves UART line number 1 from the devicetree and stores it in data->uart.port.line, while serial8250_register_8250_port() instead uses UART line number 0, which is stored in data->line. On driver unbind, bcm2835aux_serial_remove() uses data->uart.port.line, which contains the wrong number. Fix it. The issue does not occur if the maximum number of 8250 UARTs is >= 2. Fixes: bdc5f3009580 ("serial: bcm2835: add driver for bcm2835-aux-uart") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.6+ Cc: Martin Sperl Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/912ccf553c5258135c6d7e8f404a101ef320f0f4.1579175223.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_bcm2835aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c index a23c7da42ea8..7bbcae75e651 100644 --- a/drivers/tty/serial/8250/8250_bcm2835aux.c +++ b/drivers/tty/serial/8250/8250_bcm2835aux.c @@ -119,7 +119,7 @@ static int bcm2835aux_serial_remove(struct platform_device *pdev) { struct bcm2835aux_data *data = platform_get_drvdata(pdev); - serial8250_unregister_port(data->uart.port.line); + serial8250_unregister_port(data->line); clk_disable_unprepare(data->clk); return 0; From 6a35190cc2ed47344e719694f1a99255ef0dc20a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:31 -0600 Subject: [PATCH 2871/3715] crypto: chelsio - fix writing tfm flags to wrong place commit bd56cea012fc2d6381e8cd3209510ce09f9de8c9 upstream. The chelsio crypto driver is casting 'struct crypto_aead' directly to 'struct crypto_tfm', which is incorrect because the crypto_tfm isn't the first field of 'struct crypto_aead'. Consequently, the calls to crypto_tfm_set_flags() are modifying some other field in the struct. Also, the driver is setting CRYPTO_TFM_RES_BAD_KEY_LEN in ->setauthsize(), not just in ->setkey(). This is incorrect since this flag is for bad key lengths, not for bad authentication tag lengths. Fix these bugs by removing the broken crypto_tfm_set_flags() calls from ->setauthsize() and by fixing them in ->setkey(). Fixes: 324429d74127 ("chcr: Support for Chelsio's Crypto Hardware") Cc: # v4.9+ Cc: Atul Gupta Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chcr_algo.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index bb7b59fc5c08..8d39f3a07bf8 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2693,9 +2693,6 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) aeadctx->mayverify = VERIFY_SW; break; default: - - crypto_tfm_set_flags((struct crypto_tfm *) tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2720,8 +2717,6 @@ static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2762,8 +2757,6 @@ static int chcr_ccm_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2790,8 +2783,7 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead, ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -2831,8 +2823,7 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key, int error; if (keylen < 3) { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -2883,8 +2874,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, } else if (keylen == AES_KEYSIZE_256) { ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); pr_err("GCM: Invalid key length %d\n", keylen); ret = -EINVAL; goto out; From 2f4c65c23fcd208138836e1bd0aa3198a43d4c08 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:20 +0100 Subject: [PATCH 2872/3715] ath9k: fix storage endpoint lookup commit 0ef332951e856efa89507cdd13ba8f4fb8d4db12 upstream. Make sure to use the current alternate setting when verifying the storage interface descriptors to avoid submitting an URB to an invalid endpoint. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 36bcce430657 ("ath9k_htc: Handle storage devices") Cc: stable # 2.6.39 Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hif_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index c5f4dd808745..6f669166c263 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1214,7 +1214,7 @@ err_fw: static int send_eject_command(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); - struct usb_host_interface *iface_desc = &interface->altsetting[0]; + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; From 684cf943bb4782b49a28de5f5d0d3bf97c152c4b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:22 +0100 Subject: [PATCH 2873/3715] brcmfmac: fix interface sanity check commit 3428fbcd6e6c0850b1a8b2a12082b7b2aabb3da3 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable # 3.4 Cc: Arend van Spriel Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index be855aa32154..2eb5fe7367c6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1333,7 +1333,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) goto fail; } - desc = &intf->altsetting[0].desc; + desc = &intf->cur_altsetting->desc; if ((desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || (desc->bInterfaceSubClass != 2) || (desc->bInterfaceProtocol != 0xff)) { @@ -1346,7 +1346,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) num_of_eps = desc->bNumEndpoints; for (ep = 0; ep < num_of_eps; ep++) { - endpoint = &intf->altsetting[0].endpoint[ep].desc; + endpoint = &intf->cur_altsetting->endpoint[ep].desc; endpoint_num = usb_endpoint_num(endpoint); if (!usb_endpoint_xfer_bulk(endpoint)) continue; From 419d0f93ac3c043c73b6387090cb3bee416e53fe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:24 +0100 Subject: [PATCH 2874/3715] rtl8xxxu: fix interface sanity check commit 39a4281c312f2d226c710bc656ce380c621a2b16 upstream. Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Cc: stable # 4.4 Cc: Jes Sorensen Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 73fc5952fd37..63f37fa72e4b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5921,7 +5921,7 @@ static int rtl8xxxu_parse_usb(struct rtl8xxxu_priv *priv, u8 dir, xtype, num; int ret = 0; - host_interface = &interface->altsetting[0]; + host_interface = interface->cur_altsetting; interface_desc = &host_interface->desc; endpoints = interface_desc->bNumEndpoints; From 108b4537f1a450a4b979857f3ad5113e485917ba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Dec 2019 12:44:26 +0100 Subject: [PATCH 2875/3715] zd1211rw: fix storage endpoint lookup commit 2d68bb2687abb747558b933e80845ff31570a49c upstream. Make sure to use the current alternate setting when verifying the storage interface descriptors to avoid submitting an URB to an invalid endpoint. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: a1030e92c150 ("[PATCH] zd1211rw: Convert installer CDROM device into WLAN device") Cc: stable # 2.6.19 Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index c30bf118c67d..1e396eb26ccf 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1272,7 +1272,7 @@ static void print_id(struct usb_device *udev) static int eject_installer(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - struct usb_host_interface *iface_desc = &intf->altsetting[0]; + struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; From 0f6f0693493719ff84c7ff5ae161605d86aed619 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Nov 2019 17:54:09 -0800 Subject: [PATCH 2876/3715] arc: eznps: fix allmodconfig kconfig warning [ Upstream commit 1928b36cfa4df1aeedf5f2644d0c33f3a1fcfd7b ] Fix kconfig warning for arch/arc/plat-eznps/Kconfig allmodconfig: WARNING: unmet direct dependencies detected for CLKSRC_NPS Depends on [n]: GENERIC_CLOCKEVENTS [=y] && !PHYS_ADDR_T_64BIT [=y] Selected by [y]: - ARC_PLAT_EZNPS [=y] Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: Ofer Levi Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/plat-eznps/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 8eff057efcae..ce908e2c5282 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -7,7 +7,7 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help From a30b232c18fa4736efa685f70cd00454a6e998d4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 19 Nov 2019 15:57:11 +0100 Subject: [PATCH 2877/3715] HID: ite: Add USB id match for Acer SW5-012 keyboard dock [ Upstream commit 8f18eca9ebc57d6b150237033f6439242907e0ba ] The Acer SW5-012 2-in-1 keyboard dock uses a Synaptics S91028 touchpad which is connected to an ITE 8595 USB keyboard controller chip. This keyboard has the same quirk for its rfkill / airplane mode hotkey as other keyboards with the ITE 8595 chip, it only sends a single release event when pressed and released, it never sends a press event. This commit adds this keyboards USB id to the hid-ite id-table, fixing the rfkill key not working on this keyboard. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-ite.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1e2e6e58256a..9d372fa7c298 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1024,6 +1024,7 @@ #define USB_DEVICE_ID_SYNAPTICS_LTS2 0x1d10 #define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 98b059d79bc8..2ce1eb0c9212 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -43,6 +43,9 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, + /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From aba29b46dfea2367977a7bd06a2ccdead8bf48f9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 22 Dec 2019 10:17:02 -0800 Subject: [PATCH 2878/3715] phy: cpcap-usb: Prevent USB line glitches from waking up modem [ Upstream commit 63078b6ba09e842f09df052c5728857389fddcd2 ] The micro-USB connector on Motorola Mapphone devices can be muxed between the SoC and the mdm6600 modem. But even when used for the SoC, configuring the PHY with ID pin grounded will wake up the modem from idle state. Looks like the issue is probably caused by line glitches. We can prevent the glitches by using a previously unknown mode of the GPIO mux to prevent the USB lines from being connected to the moden while configuring the USB PHY, and enable the USB lines after configuring the PHY. Note that this only prevents waking up mdm6600 as regular USB A-host mode, and does not help when connected to a lapdock. The lapdock specific issue still needs to be debugged separately. Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Acked-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/motorola/phy-cpcap-usb.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 4ba3634009af..593c77dbde2e 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -115,7 +115,7 @@ struct cpcap_usb_ints_state { enum cpcap_gpio_mode { CPCAP_DM_DP, CPCAP_MDM_RX_TX, - CPCAP_UNKNOWN, + CPCAP_UNKNOWN_DISABLED, /* Seems to disable USB lines */ CPCAP_OTG_DM_DP, }; @@ -379,7 +379,8 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) { int error; - error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) goto out_err; @@ -406,6 +407,11 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err; + /* Enable UART mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + if (error) + goto out_err; + return 0; out_err: @@ -418,7 +424,8 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) { int error; - error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) return error; @@ -458,6 +465,11 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err; + /* Enable USB mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + if (error) + goto out_err; + return 0; out_err: From d2b24c965f8f91a8d4d284808201e8c1eb2c27dc Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 27 Nov 2019 09:46:17 +0100 Subject: [PATCH 2879/3715] watchdog: max77620_wdt: fix potential build errors [ Upstream commit da9e3f4e30a53cd420cf1e6961c3b4110f0f21f0 ] max77620_wdt uses watchdog core functions. Enable CONFIG_WATCHDOG_CORE to fix potential build errors. Signed-off-by: David Engraf Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191127084617.16937-1-david.engraf@sysgo.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f55328a31629..fa15a683ae2d 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -563,6 +563,7 @@ config MAX63XX_WATCHDOG config MAX77620_WATCHDOG tristate "Maxim Max77620 Watchdog Timer" depends on MFD_MAX77620 || COMPILE_TEST + select WATCHDOG_CORE help This is the driver for the Max77620 watchdog timer. Say 'Y' here to enable the watchdog timer support for From 60cf76ecde8c765bc7721a390bb59d08d129efe1 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 13 Dec 2019 22:48:02 +0100 Subject: [PATCH 2880/3715] watchdog: rn5t618_wdt: fix module aliases [ Upstream commit a76dfb859cd42df6e3d1910659128ffcd2fb6ba2 ] Platform device aliases were missing so module autoloading did not work. Signed-off-by: Andreas Kemnade Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191213214802.22268-1-andreas@kemnade.info Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/rn5t618_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c index e60f55702ab7..d2e79cf70e77 100644 --- a/drivers/watchdog/rn5t618_wdt.c +++ b/drivers/watchdog/rn5t618_wdt.c @@ -193,6 +193,7 @@ static struct platform_driver rn5t618_wdt_driver = { module_platform_driver(rn5t618_wdt_driver); +MODULE_ALIAS("platform:rn5t618-wdt"); MODULE_AUTHOR("Beniamino Galvani "); MODULE_DESCRIPTION("RN5T618 watchdog driver"); MODULE_LICENSE("GPL v2"); From b56f2a4a4327f609e30872e37eda6d73a6433ed2 Mon Sep 17 00:00:00 2001 From: "wuxu.wu" Date: Wed, 1 Jan 2020 11:39:41 +0800 Subject: [PATCH 2881/3715] spi: spi-dw: Add lock protect dw_spi rx/tx to prevent concurrent calls [ Upstream commit 19b61392c5a852b4e8a0bf35aecb969983c5932d ] dw_spi_irq() and dw_spi_transfer_one concurrent calls. I find a panic in dw_writer(): txw = *(u8 *)(dws->tx), when dw->tx==null, dw->len==4, and dw->tx_end==1. When tpm driver's message overtime dw_spi_irq() and dw_spi_transfer_one may concurrent visit dw_spi, so I think dw_spi structure lack of protection. Otherwise dw_spi_transfer_one set dw rx/tx buffer and then open irq, store dw rx/tx instructions and other cores handle irq load dw rx/tx instructions may out of order. [ 1025.321302] Call trace: ... [ 1025.321319] __crash_kexec+0x98/0x148 [ 1025.321323] panic+0x17c/0x314 [ 1025.321329] die+0x29c/0x2e8 [ 1025.321334] die_kernel_fault+0x68/0x78 [ 1025.321337] __do_kernel_fault+0x90/0xb0 [ 1025.321346] do_page_fault+0x88/0x500 [ 1025.321347] do_translation_fault+0xa8/0xb8 [ 1025.321349] do_mem_abort+0x68/0x118 [ 1025.321351] el1_da+0x20/0x8c [ 1025.321362] dw_writer+0xc8/0xd0 [ 1025.321364] interrupt_transfer+0x60/0x110 [ 1025.321365] dw_spi_irq+0x48/0x70 ... Signed-off-by: wuxu.wu Link: https://lore.kernel.org/r/1577849981-31489-1-git-send-email-wuxu.wu@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-dw.c | 15 ++++++++++++--- drivers/spi/spi-dw.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index b217c22ff72f..b461200871f8 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -180,9 +180,11 @@ static inline u32 rx_max(struct dw_spi *dws) static void dw_writer(struct dw_spi *dws) { - u32 max = tx_max(dws); + u32 max; u16 txw = 0; + spin_lock(&dws->buf_lock); + max = tx_max(dws); while (max--) { /* Set the tx word if the transfer's original "tx" is not null */ if (dws->tx_end - dws->len) { @@ -194,13 +196,16 @@ static void dw_writer(struct dw_spi *dws) dw_write_io_reg(dws, DW_SPI_DR, txw); dws->tx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); } static void dw_reader(struct dw_spi *dws) { - u32 max = rx_max(dws); + u32 max; u16 rxw; + spin_lock(&dws->buf_lock); + max = rx_max(dws); while (max--) { rxw = dw_read_io_reg(dws, DW_SPI_DR); /* Care rx only if the transfer's original "rx" is not null */ @@ -212,6 +217,7 @@ static void dw_reader(struct dw_spi *dws) } dws->rx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); } static void int_error_stop(struct dw_spi *dws, const char *msg) @@ -284,18 +290,20 @@ static int dw_spi_transfer_one(struct spi_master *master, { struct dw_spi *dws = spi_master_get_devdata(master); struct chip_data *chip = spi_get_ctldata(spi); + unsigned long flags; u8 imask = 0; u16 txlevel = 0; u32 cr0; int ret; dws->dma_mapped = 0; - + spin_lock_irqsave(&dws->buf_lock, flags); dws->tx = (void *)transfer->tx_buf; dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; dws->len = transfer->len; + spin_unlock_irqrestore(&dws->buf_lock, flags); spi_enable_chip(dws, 0); @@ -486,6 +494,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws) dws->type = SSI_MOTO_SPI; dws->dma_inited = 0; dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR); + spin_lock_init(&dws->buf_lock); ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev), master); diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index 5c07cf8f19e0..45fbf3ad591c 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -117,6 +117,7 @@ struct dw_spi { size_t len; void *tx; void *tx_end; + spinlock_t buf_lock; void *rx; void *rx_end; int dma_mapped; From d24cfcdb6285470316c71558722d30aa73c55be7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 2 Jan 2020 13:27:06 -0800 Subject: [PATCH 2882/3715] drivers/net/b44: Change to non-atomic bit operations on pwol_mask [ Upstream commit f11421ba4af706cb4f5703de34fa77fba8472776 ] Atomic operations that span cache lines are super-expensive on x86 (not just to the current processor, but also to other processes as all memory operations are blocked until the operation completes). Upcoming x86 processors have a switch to cause such operations to generate a #AC trap. It is expected that some real time systems will enable this mode in BIOS. In preparation for this, it is necessary to fix code that may execute atomic instructions with operands that cross cachelines because the #AC trap will crash the kernel. Since "pwol_mask" is local and never exposed to concurrency, there is no need to set bits in pwol_mask using atomic operations. Directly operate on the byte which contains the bit instead of using __set_bit() to avoid any big endian concern due to type cast to unsigned long in __set_bit(). Suggested-by: Peter Zijlstra Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/b44.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d10c825..8b9a0ce1d29f 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1521,8 +1521,10 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) int ethaddr_bytes = ETH_ALEN; memset(ppattern + offset, 0xff, magicsync); - for (j = 0; j < magicsync; j++) - set_bit(len++, (unsigned long *) pmask); + for (j = 0; j < magicsync; j++) { + pmask[len >> 3] |= BIT(len & 7); + len++; + } for (j = 0; j < B44_MAX_PATTERNS; j++) { if ((B44_PATTERN_SIZE - len) >= ETH_ALEN) @@ -1534,7 +1536,8 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) for (k = 0; k< ethaddr_bytes; k++) { ppattern[offset + magicsync + (j * ETH_ALEN) + k] = macaddr[k]; - set_bit(len++, (unsigned long *) pmask); + pmask[len >> 3] |= BIT(len & 7); + len++; } } return len - 1; From e52f8ff34eaa49047c225a42c0a4d2976c4ab20c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 4 Jan 2020 15:31:43 +0100 Subject: [PATCH 2883/3715] net: wan: sdla: Fix cast from pointer to integer of different size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 00c0688cecadbf7ac2f5b4cdb36d912a2d3f0cca ] Since net_device.mem_start is unsigned long, it should not be cast to int right before casting to pointer. This fixes warning (compile testing on alpha architecture): drivers/net/wan/sdla.c: In function ‘sdla_transmit’: drivers/net/wan/sdla.c:711:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/sdla.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 236c62538036..1eb329fc7241 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -711,7 +711,7 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb, spin_lock_irqsave(&sdla_lock, flags); SDLA_WINDOW(dev, addr); - pbuf = (void *)(((int) dev->mem_start) + (addr & SDLA_ADDR_MASK)); + pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK)); __sdla_write(dev, pbuf->buf_addr, skb->data, skb->len); SDLA_WINDOW(dev, addr); pbuf->opp_flag = 1; From c698d678854c08590c3d1964ecca1bfabc138950 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 6 Jan 2020 04:51:54 +0300 Subject: [PATCH 2884/3715] gpio: max77620: Add missing dependency on GPIOLIB_IRQCHIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c5706c7defc79de68a115b5536376298a8fef111 ] Driver fails to compile in a minimized kernel's configuration because of the missing dependency on GPIOLIB_IRQCHIP. error: ‘struct gpio_chip’ has no member named ‘irq’ 44 | virq = irq_find_mapping(gpio->gpio_chip.irq.domain, offset); Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200106015154.12040-1-digetx@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 2357d2f73c1a..8d2ab77c6581 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -990,6 +990,7 @@ config GPIO_LP87565 config GPIO_MAX77620 tristate "GPIO support for PMIC MAX77620 and MAX20024" depends on MFD_MAX77620 + select GPIOLIB_IRQCHIP help GPIO driver for MAX77620 and MAX20024 PMIC from Maxim Semiconductor. MAX77620 PMIC has 8 pins that can be configured as GPIOs. The From 5be2654a3fd996d423ca26fc6a48b9f1aa7199d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 21:43:59 +0100 Subject: [PATCH 2885/3715] atm: eni: fix uninitialized variable warning [ Upstream commit 30780d086a83332adcd9362281201cee7c3d9d19 ] With -O3, gcc has found an actual unintialized variable stored into an mmio register in two instances: drivers/atm/eni.c: In function 'discard': drivers/atm/eni.c:465:13: error: 'dma[1]' is used uninitialized in this function [-Werror=uninitialized] writel(dma[i*2+1],eni_dev->rx_dma+dma_wr*8+4); ^ drivers/atm/eni.c:465:13: error: 'dma[3]' is used uninitialized in this function [-Werror=uninitialized] Change the code to always write zeroes instead. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/eni.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index ce47eb17901d..a106d15f6def 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -372,7 +372,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, here = (eni_vcc->descr+skip) & (eni_vcc->words-1); dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } here = (eni_vcc->descr+size+skip) & (eni_vcc->words-1); if (!eff) size += skip; @@ -445,7 +445,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, if (size != eff) { dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } if (!j || j > 2*RX_DMA_BUF) { printk(KERN_CRIT DEV_LABEL "!j or j too big!!!\n"); From 5ed8ea1798f5585f81252fbbf49ddf50029de2a4 Mon Sep 17 00:00:00 2001 From: Slawomir Pawlowski Date: Tue, 17 Sep 2019 09:20:48 +0000 Subject: [PATCH 2886/3715] PCI: Add DMA alias quirk for Intel VCA NTB [ Upstream commit 56b4cd4b7da9ee95778eb5c8abea49f641ebfd91 ] Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices exposing computational units via Non Transparent Bridges (NTB, PEX 87xx). Similarly to MIC x200, we need to add DMA aliases to allow buffer access when IOMMU is enabled. Add aliases to allow computational unit access to host memory. These aliases mark the whole VCA device as one IOMMU group. All possible slot numbers (0x20) are used, since we are unable to tell what slot is used on other side. This quirk is intended for both host and computational unit sides. The VCA devices have up to five functions: four for DMA channels and one additional. Link: https://lore.kernel.org/r/5683A335CC8BE1438C3C30C49DCC38DF637CED8E@IRSMSX102.ger.corp.intel.com Signed-off-by: Slawomir Pawlowski Signed-off-by: Przemek Kitszel Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 90df085e9f92..e7ed051ec125 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4019,6 +4019,40 @@ static void quirk_mic_x200_dma_alias(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias); +/* + * Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices + * exposing computational units via Non Transparent Bridges (NTB, PEX 87xx). + * + * Similarly to MIC x200, we need to add DMA aliases to allow buffer access + * when IOMMU is enabled. These aliases allow computational unit access to + * host memory. These aliases mark the whole VCA device as one IOMMU + * group. + * + * All possible slot numbers (0x20) are used, since we are unable to tell + * what slot is used on other side. This quirk is intended for both host + * and computational unit sides. The VCA devices have up to five functions + * (four for DMA channels and one additional). + */ +static void quirk_pex_vca_alias(struct pci_dev *pdev) +{ + const unsigned int num_pci_slots = 0x20; + unsigned int slot; + + for (slot = 0; slot < num_pci_slots; slot++) { + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x1)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x2)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x3)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x4)); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2954, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2955, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2956, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2958, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2959, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x295A, quirk_pex_vca_alias); + /* * The IOMMU and interrupt controller on Broadcom Vulcan/Cavium ThunderX2 are * associated not at the root bus, but at a bridge below. This quirk avoids From ebb7fb7dfd657b65fd7d315b0c8709af94db535f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 8 Sep 2015 09:53:38 -0700 Subject: [PATCH 2887/3715] usb-storage: Disable UAS on JMicron SATA enclosure [ Upstream commit bc3bdb12bbb3492067c8719011576370e959a2e6 ] Steve Ellis reported incorrect block sizes and alignement offsets with a SATA enclosure. Adding a quirk to disable UAS fixes the problems. Reported-by: Steven Ellis Cc: Pacho Ramos Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/storage/unusual_uas.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index f15aa47c54a9..0eb8c67ee138 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -163,12 +163,15 @@ UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_UAS), -/* Reported-by: Takeo Nakayama */ +/* + * Initially Reported-by: Takeo Nakayama + * UAS Ignore Reported by Steven Ellis + */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", "JMS566", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_NO_REPORT_OPCODES | US_FL_IGNORE_UAS), /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x4971, 0x1012, 0x0000, 0x9999, From b4cdf5066ce23d1cc23c1dd4c71438e762c82581 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Jan 2020 14:57:20 -0800 Subject: [PATCH 2888/3715] net_sched: ematch: reject invalid TCF_EM_SIMPLE [ Upstream commit 55cd9f67f1e45de8517cdaab985fb8e56c0bc1d8 ] It is possible for malicious userspace to set TCF_EM_SIMPLE bit even for matches that should not have this bit set. This can fool two places using tcf_em_is_simple() 1) tcf_em_tree_destroy() -> memory leak of em->data if ops->destroy() is NULL 2) tcf_em_tree_dump() wrongly report/leak 4 low-order bytes of a kernel pointer. BUG: memory leak unreferenced object 0xffff888121850a40 (size 32): comm "syz-executor927", pid 7193, jiffies 4294941655 (age 19.840s) hex dump (first 32 bytes): 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f67036ea>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000f67036ea>] slab_post_alloc_hook mm/slab.h:586 [inline] [<00000000f67036ea>] slab_alloc mm/slab.c:3320 [inline] [<00000000f67036ea>] __do_kmalloc mm/slab.c:3654 [inline] [<00000000f67036ea>] __kmalloc_track_caller+0x165/0x300 mm/slab.c:3671 [<00000000fab0cc8e>] kmemdup+0x27/0x60 mm/util.c:127 [<00000000d9992e0a>] kmemdup include/linux/string.h:453 [inline] [<00000000d9992e0a>] em_nbyte_change+0x5b/0x90 net/sched/em_nbyte.c:32 [<000000007e04f711>] tcf_em_validate net/sched/ematch.c:241 [inline] [<000000007e04f711>] tcf_em_tree_validate net/sched/ematch.c:359 [inline] [<000000007e04f711>] tcf_em_tree_validate+0x332/0x46f net/sched/ematch.c:300 [<000000007a769204>] basic_set_parms net/sched/cls_basic.c:157 [inline] [<000000007a769204>] basic_change+0x1d7/0x5f0 net/sched/cls_basic.c:219 [<00000000e57a5997>] tc_new_tfilter+0x566/0xf70 net/sched/cls_api.c:2104 [<0000000074b68559>] rtnetlink_rcv_msg+0x3b2/0x4b0 net/core/rtnetlink.c:5415 [<00000000b7fe53fb>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<00000000e83a40d0>] rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442 [<00000000d62ba933>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000d62ba933>] netlink_unicast+0x223/0x310 net/netlink/af_netlink.c:1328 [<0000000088070f72>] netlink_sendmsg+0x2c0/0x570 net/netlink/af_netlink.c:1917 [<00000000f70b15ea>] sock_sendmsg_nosec net/socket.c:639 [inline] [<00000000f70b15ea>] sock_sendmsg+0x54/0x70 net/socket.c:659 [<00000000ef95a9be>] ____sys_sendmsg+0x2d0/0x300 net/socket.c:2330 [<00000000b650f1ab>] ___sys_sendmsg+0x8a/0xd0 net/socket.c:2384 [<0000000055bfa74a>] __sys_sendmsg+0x80/0xf0 net/socket.c:2417 [<000000002abac183>] __do_sys_sendmsg net/socket.c:2426 [inline] [<000000002abac183>] __se_sys_sendmsg net/socket.c:2424 [inline] [<000000002abac183>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2424 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot+03c4738ed29d5d366ddf@syzkaller.appspotmail.com Cc: Cong Wang Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/ematch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 60f2354c1789..a48dca26f178 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -242,6 +242,9 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; if (em->ops->change) { + err = -EINVAL; + if (em_hdr->flags & TCF_EM_SIMPLE) + goto errout; err = em->ops->change(net, data, data_len, em); if (err < 0) goto errout; From c662ea4fab81c530590c2be3e86e26313d047e3b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 28 Nov 2019 18:22:01 +0100 Subject: [PATCH 2889/3715] rsi: fix use-after-free on probe errors commit 92aafe77123ab478e5f5095878856ab0424910da upstream. The driver would fail to stop the command timer in most error paths, something which specifically could lead to the timer being freed while still active on I/O errors during probe. Fix this by making sure that each function starting the timer also stops it in all relevant error paths. Reported-by: syzbot+1d1597a5aa3679c65b9f@syzkaller.appspotmail.com Fixes: b78e91bcfb33 ("rsi: Add new firmware loading method") Cc: stable # 4.12 Cc: Prameela Rani Garnepudi Cc: Amitkumar Karwar Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_hal.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 120b0ff545c1..d205947c4c55 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -541,6 +541,7 @@ static int bl_cmd(struct rsi_hw *adapter, u8 cmd, u8 exp_resp, char *str) bl_start_cmd_timer(adapter, timeout); status = bl_write_cmd(adapter, cmd, exp_resp, ®out_val); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: Command %s (%0x) writing failed..\n", __func__, str, cmd); @@ -656,10 +657,9 @@ static int ping_pong_write(struct rsi_hw *adapter, u8 cmd, u8 *addr, u32 size) } status = bl_cmd(adapter, cmd_req, cmd_resp, str); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + return 0; } @@ -749,10 +749,9 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content, status = bl_cmd(adapter, EOF_REACHED, FW_LOADING_SUCCESSFUL, "EOF_REACHED"); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + rsi_dbg(INFO_ZONE, "FW loading is done and FW is running..\n"); return 0; } @@ -773,6 +772,7 @@ static int rsi_load_firmware(struct rsi_hw *adapter) status = hif_ops->master_reg_read(adapter, SWBL_REGOUT, ®out_val, 2); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: REGOUT read failed\n", __func__); return status; From 713ff7e4d605c4dd1efd838e3f0092cd93733f0c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Dec 2019 13:45:05 +0800 Subject: [PATCH 2890/3715] crypto: af_alg - Use bh_lock_sock in sk_destruct commit 37f96694cf73ba116993a9d2d99ad6a75fa7fdb0 upstream. As af_alg_release_parent may be called from BH context (most notably due to an async request that only completes after socket closure, or as reported here because of an RCU-delayed sk_destruct call), we must use bh_lock_sock instead of lock_sock. Reported-by: syzbot+c2f1558d49e25cc36e5e@syzkaller.appspotmail.com Reported-by: Eric Dumazet Fixes: c840ac6af3f8 ("crypto: af_alg - Disallow bind/setkey/...") Cc: Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 422bba808f73..0679c35adf55 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -139,11 +139,13 @@ void af_alg_release_parent(struct sock *sk) sk = ask->parent; ask = alg_sk(sk); - lock_sock(sk); + local_bh_disable(); + bh_lock_sock(sk); ask->nokey_refcnt -= nokey; if (!last) last = !--ask->refcnt; - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable(); if (last) sock_put(sk); From 40642747dd9feab4912157882166c05722cec7b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Feb 2020 16:26:45 +0000 Subject: [PATCH 2891/3715] vfs: fix do_last() regression commit 6404674acd596de41fd3ad5f267b4525494a891a upstream. Brown paperbag time: fetching ->i_uid/->i_mode really should've been done from nd->inode. I even suggested that, but the reason for that has slipped through the cracks and I went for dir->d_inode instead - made for more "obvious" patch. Analysis: - at the entry into do_last() and all the way to step_into(): dir (aka nd->path.dentry) is known not to have been freed; so's nd->inode and it's equal to dir->d_inode unless we are already doomed to -ECHILD. inode of the file to get opened is not known. - after step_into(): inode of the file to get opened is known; dir might be pointing to freed memory/be negative/etc. - at the call of may_create_in_sticky(): guaranteed to be out of RCU mode; inode of the file to get opened is known and pinned; dir might be garbage. The last was the reason for the original patch. Except that at the do_last() entry we can be in RCU mode and it is possible that nd->path.dentry->d_inode has already changed under us. In that case we are going to fail with -ECHILD, but we need to be careful; nd->inode is pointing to valid struct inode and it's the same as nd->path.dentry->d_inode in "won't fail with -ECHILD" case, so we should use that. Reported-by: "Rantala, Tommi T. (Nokia - FI/Espoo)" Reported-by: syzbot+190005201ced78a74ad6@syzkaller.appspotmail.com Wearing-brown-paperbag: Al Viro Cc: stable@kernel.org Fixes: d0cb50185ae9 ("do_last(): fetch directory ->i_mode and ->i_uid before it's too late") Signed-off-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d648d6d2b635..f421f8d80f4d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3266,8 +3266,8 @@ static int do_last(struct nameidata *nd, int *opened) { struct dentry *dir = nd->path.dentry; - kuid_t dir_uid = dir->d_inode->i_uid; - umode_t dir_mode = dir->d_inode->i_mode; + kuid_t dir_uid = nd->inode->i_uid; + umode_t dir_mode = nd->inode->i_mode; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; From d20edc0bca5577bab38acb5b190619c922ddebf8 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sun, 2 Feb 2020 05:59:30 +0800 Subject: [PATCH 2892/3715] x86/resctrl: Fix use-after-free when deleting resource groups commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream. A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount) that indicates how many waiters expect this rdtgrp to exist. Waiters could be waiting on rdtgroup_mutex or some work sitting on a task's workqueue for when the task returns from kernel mode or exits. The deletion of a rdtgrp is intended to have two phases: (1) while holding rdtgroup_mutex the necessary cleanup is done and rdtgrp->flags is set to RDT_DELETED, (2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed only if there are no waiters and its flag is set to RDT_DELETED. Upon gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check for the RDT_DELETED flag. When unmounting the resctrl file system or deleting ctrl_mon groups, all of the subdirectories are removed and the data structure of rdtgrp is forcibly freed without checking rdtgrp->waitcount. If at this point there was a waiter on rdtgrp then a use-after-free issue occurs when the waiter starts running and accesses the rdtgrp structure it was waiting on. See kfree() calls in [1], [2] and [3] in these two call paths in following scenarios: (1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp() There are several scenarios that result in use-after-free issue in following: Scenario 1: ----------- In Thread 1, rdtgroup_tasks_write() adds a task_work callback move_myself(). If move_myself() is scheduled to execute after Thread 2 rdt_kill_sb() is finished, referring to earlier rdtgrp memory (rdtgrp->waitcount) which was already freed in Thread 2 results in use-after-free issue. Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb) ------------------------------- ---------------------- rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) mutex_lock rdtgroup_move_task __rdtgroup_move_task /* * Take an extra refcount, so rdtgrp cannot be freed * before the call back move_myself has been invoked */ atomic_inc(&rdtgrp->waitcount) /* Callback move_myself will be scheduled for later */ task_work_add(move_myself) rdtgroup_kn_unlock mutex_unlock atomic_dec_and_test(&rdtgrp->waitcount) && (flags & RDT_DELETED) mutex_lock rmdir_all_sub /* * sentry and rdtgrp are freed * without checking refcount */ free_all_child_rdtgrp kfree(sentry)*[1] kfree(rdtgrp)*[2] mutex_unlock /* * Callback is scheduled to execute * after rdt_kill_sb is finished */ move_myself /* * Use-after-free: refer to earlier rdtgrp * memory which was freed in [1] or [2]. */ atomic_dec_and_test(&rdtgrp->waitcount) && (flags & RDT_DELETED) kfree(rdtgrp) Scenario 2: ----------- In Thread 1, rdtgroup_tasks_write() adds a task_work callback move_myself(). If move_myself() is scheduled to execute after Thread 2 rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory (rdtgrp->waitcount) which was already freed in Thread 2 results in use-after-free issue. Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir) ------------------------------- ------------------------- rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) mutex_lock rdtgroup_move_task __rdtgroup_move_task /* * Take an extra refcount, so rdtgrp cannot be freed * before the call back move_myself has been invoked */ atomic_inc(&rdtgrp->waitcount) /* Callback move_myself will be scheduled for later */ task_work_add(move_myself) rdtgroup_kn_unlock mutex_unlock atomic_dec_and_test(&rdtgrp->waitcount) && (flags & RDT_DELETED) rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) mutex_lock rdtgroup_rmdir_ctrl free_all_child_rdtgrp /* * sentry is freed without * checking refcount */ kfree(sentry)*[3] rdtgroup_ctrl_remove rdtgrp->flags = RDT_DELETED rdtgroup_kn_unlock mutex_unlock atomic_dec_and_test( &rdtgrp->waitcount) && (flags & RDT_DELETED) kfree(rdtgrp) /* * Callback is scheduled to execute * after rdt_kill_sb is finished */ move_myself /* * Use-after-free: refer to earlier rdtgrp * memory which was freed in [3]. */ atomic_dec_and_test(&rdtgrp->waitcount) && (flags & RDT_DELETED) kfree(rdtgrp) If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed like following. Note that "0x6b" is POISON_FREE after kfree(). The corrupted bits "0x6a", "0x64" at offset 0x424 correspond to waitcount member of struct rdtgroup which was freed: Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048 420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk Single bit error detected. Probably bad RAM. Run memtest86+ or a similar memory test tool. Next obj: start=ffff9504c5b0d800, len=2048 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048 420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk Prev obj: start=ffff9504c58ab000, len=2048 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Fix this by taking reference count (waitcount) of rdtgrp into account in the two call paths that currently do not do so. Instead of always freeing the resource group it will only be freed if there are no waiters on it. If there are waiters, the resource group will have its flags set to RDT_DELETED. It will be left to the waiter to free the resource group when it starts running and finding that it was the last waiter and the resource group has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp() Backporting notes: Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c in older stable trees. Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Suggested-by: Reinette Chatre Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 2dae1b3c42fc..734996904dc3 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1260,7 +1260,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -1294,7 +1298,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); From df57e8ba374ca0e3e6dec6633ce3ed2adfe0d9a8 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sun, 2 Feb 2020 06:00:23 +0800 Subject: [PATCH 2893/3715] x86/resctrl: Fix use-after-free due to inaccurate refcount of rdtgroup commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream. There is a race condition in the following scenario which results in an use-after-free issue when reading a monitoring file and deleting the parent ctrl_mon group concurrently: Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls kernfs_break_active_protection() to drop the active reference of kernfs node in rdtgroup_kn_lock_live(). In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub kernfs nodes to drop the active reference when removing all subtree kernfs nodes recursively. Thread 2 could block on kernfs_remove() until Thread 1 calls kernfs_break_active_protection(). Only after kernfs_remove() completes the refcount of rdtgrp could be trusted. Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(), Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0 instead of 1 due to the race. In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory (rdtgrp->waitcount) which was already freed in Thread 2 results in use-after-free issue. Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir) -------------------------------- ------------------------- rdtgroup_kn_lock_live /* * kn active protection until * kernfs_break_active_protection(kn) */ rdtgrp = kernfs_to_rdtgroup(kn) rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) mutex_lock rdtgroup_rmdir_ctrl free_all_child_rdtgrp /* * sentry->waitcount should be 1 * but is 0 now due to the race. */ kfree(sentry)*[1] /* * Only after kernfs_remove() * completes, the refcount of * rdtgrp could be trusted. */ atomic_inc(&rdtgrp->waitcount) /* kn->active-- */ kernfs_break_active_protection(kn) rdtgroup_ctrl_remove rdtgrp->flags = RDT_DELETED /* * Blocking routine, wait for * all sub kernfs nodes to drop * active reference in * kernfs_break_active_protection. */ kernfs_remove(rdtgrp->kn) rdtgroup_kn_unlock mutex_unlock atomic_dec_and_test( &rdtgrp->waitcount) && (flags & RDT_DELETED) kernfs_unbreak_active_protection(kn) kfree(rdtgrp) mutex_lock mon_event_read rdtgroup_kn_unlock mutex_unlock /* * Use-after-free: refer to earlier rdtgrp * memory which was freed in [1]. */ atomic_dec_and_test(&rdtgrp->waitcount) && (flags & RDT_DELETED) /* kn->active++ */ kernfs_unbreak_active_protection(kn) kfree(rdtgrp) Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp. Backporting notes: Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c for older stable trees. Upstream commit 17eafd076291 ("x86/intel_rdt: Split resource group removal in two") moved part of resource group removal code from rdtgroup_rmdir_mon() into a separate function rdtgroup_ctrl_remove(). Apply the change against original code base of rdtgroup_rmdir_mon() for older stable trees. Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Suggested-by: Reinette Chatre Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 734996904dc3..01574966d91f 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1800,11 +1800,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); - /* - * Free all the child monitor group rmids. - */ - free_all_child_rdtgrp(rdtgrp); - list_del(&rdtgrp->rdtgroup_list); /* @@ -1814,6 +1809,11 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, kernfs_get(kn); kernfs_remove(rdtgrp->kn); + /* + * Free all the child monitor group rmids. + */ + free_all_child_rdtgrp(rdtgrp); + return 0; } From e3f5c2e99092f2e57886ac40c6fa62ffcf87ab1e Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sun, 2 Feb 2020 06:00:53 +0800 Subject: [PATCH 2894/3715] x86/resctrl: Fix a deadlock due to inaccurate reference commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream. There is a race condition which results in a deadlock when rmdir and mkdir execute concurrently: $ ls /sys/fs/resctrl/c1/mon_groups/m1/ cpus cpus_list mon_data tasks Thread 1: rmdir /sys/fs/resctrl/c1 Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1 3 locks held by mkdir/48649: #0: (sb_writers#17){.+.+}, at: [] mnt_want_write+0x20/0x50 #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [] filename_create+0x7b/0x170 #2: (rdtgroup_mutex){+.+.}, at: [] rdtgroup_kn_lock_live+0x3d/0x70 4 locks held by rmdir/48652: #0: (sb_writers#17){.+.+}, at: [] mnt_want_write+0x20/0x50 #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [] do_rmdir+0x13f/0x1e0 #2: (&type->i_mutex_dir_key#8){++++}, at: [] vfs_rmdir+0x4d/0x120 #3: (rdtgroup_mutex){+.+.}, at: [] rdtgroup_kn_lock_live+0x3d/0x70 Thread 1 is deleting control group "c1". Holding rdtgroup_mutex, kernfs_remove() removes all kernfs nodes under directory "c1" recursively, then waits for sub kernfs node "mon_groups" to drop active reference. Thread 2 is trying to create a subdirectory "m1" in the "mon_groups" directory. The wrapper kernfs_iop_mkdir() takes an active reference to the "mon_groups" directory but the code drops the active reference to the parent directory "c1" instead. As a result, Thread 1 is blocked on waiting for active reference to drop and never release rdtgroup_mutex, while Thread 2 is also blocked on trying to get rdtgroup_mutex. Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir) (rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1) ------------------------- ------------------------- kernfs_iop_mkdir /* * kn: "m1", parent_kn: "mon_groups", * prgrp_kn: parent_kn->parent: "c1", * * "mon_groups", parent_kn->active++: 1 */ kernfs_get_active(parent_kn) kernfs_iop_rmdir /* "c1", kn->active++ */ kernfs_get_active(kn) rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) /* "c1", kn->active-- */ kernfs_break_active_protection(kn) mutex_lock rdtgroup_rmdir_ctrl free_all_child_rdtgrp sentry->flags = RDT_DELETED rdtgroup_ctrl_remove rdtgrp->flags = RDT_DELETED kernfs_get(kn) kernfs_remove(rdtgrp->kn) __kernfs_remove /* "mon_groups", sub_kn */ atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active) kernfs_drain(sub_kn) /* * sub_kn->active == KN_DEACTIVATED_BIAS + 1, * waiting on sub_kn->active to drop, but it * never drops in Thread 2 which is blocked * on getting rdtgroup_mutex. */ Thread 1 hangs here ----> wait_event(sub_kn->active == KN_DEACTIVATED_BIAS) ... rdtgroup_mkdir rdtgroup_mkdir_mon(parent_kn, prgrp_kn) mkdir_rdt_prepare(parent_kn, prgrp_kn) rdtgroup_kn_lock_live(prgrp_kn) atomic_inc(&rdtgrp->waitcount) /* * "c1", prgrp_kn->active-- * * The active reference on "c1" is * dropped, but not matching the * actual active reference taken * on "mon_groups", thus causing * Thread 1 to wait forever while * holding rdtgroup_mutex. */ kernfs_break_active_protection( prgrp_kn) /* * Trying to get rdtgroup_mutex * which is held by Thread 1. */ Thread 2 hangs here ----> mutex_lock ... The problem is that the creation of a subdirectory in the "mon_groups" directory incorrectly releases the active protection of its parent directory instead of itself before it starts waiting for rdtgroup_mutex. This is triggered by the rdtgroup_mkdir() flow calling rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the parent control group ("c1") as argument. It should be called with kernfs node "mon_groups" instead. What is currently missing is that the kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp. Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() instead. And then it operates on the same rdtgroup structure but handles the active reference of kernfs node "mon_groups" to prevent deadlock. The same changes are also made to the "mon_data" directories. This results in some unused function parameters that will be cleaned up in follow-up patch as the focus here is on the fix only in support of backporting efforts. Backporting notes: Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c for older stable trees. Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Suggested-by: Reinette Chatre Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 01574966d91f..0ec30b2384c0 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1107,7 +1107,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret) { dentry = ERR_PTR(ret); @@ -1499,7 +1499,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -1533,7 +1533,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -1589,7 +1589,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -1600,7 +1600,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -1638,7 +1638,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -1675,7 +1675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) goto out_id_free; } @@ -1688,7 +1688,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } From db4d8e42eca72d4c29192e793475c1e6a42908cf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Nov 2019 17:41:31 +0800 Subject: [PATCH 2895/3715] crypto: pcrypt - Fix user-after-free on module unload [ Upstream commit 07bfd9bdf568a38d9440c607b72342036011f727 ] On module unload of pcrypt we must unregister the crypto algorithms first and then tear down the padata structure. As otherwise the crypto algorithms are still alive and can be used while the padata structure is being freed. Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...") Cc: Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/pcrypt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index a5718c0a3dc4..1348541da463 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -505,11 +505,12 @@ err: static void __exit pcrypt_exit(void) { + crypto_unregister_template(&pcrypt_tmpl); + pcrypt_fini_padata(&pencrypt); pcrypt_fini_padata(&pdecrypt); kset_unregister(pcrypt_kset); - crypto_unregister_template(&pcrypt_tmpl); } module_init(pcrypt_init); From 806dbe2dfa4855c97ec1da876fbd2fdfb61426f5 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 8 Jan 2020 20:30:30 -0800 Subject: [PATCH 2896/3715] perf c2c: Fix return type for histogram sorting comparision functions commit c1c8013ec34d7163431d18367808ea40b2e305f8 upstream. Commit 722ddfde366f ("perf tools: Fix time sorting") changed - correctly so - hist_entry__sort to return int64. Unfortunately several of the builtin-c2c.c comparison routines only happened to work due the cast caused by the wrong return type. This causes meaningless ordering of both the cacheline list, and the cacheline details page. E.g a simple: perf c2c record -a sleep 3 perf c2c report will result in cacheline table like ================================================= Shared Data Cache Line Table ================================================= # # ------- Cacheline ---------- Total Tot - LLC Load Hitm - - Store Reference - - Load Dram - LLC Total - Core Load Hit - - LLC Load Hit - # Index Address Node PA cnt records Hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2 Llc Rmt # ..... .............. .... ...... ....... ...... ..... ..... ... .... ..... ...... ...... .... ...... ..... ..... ..... ... .... ....... 0 0x7f0d27ffba00 N/A 0 52 0.12% 13 6 7 12 12 0 0 7 14 40 4 16 0 0 0 1 0x7f0d27ff61c0 N/A 0 6353 14.04% 1475 801 674 779 779 0 0 718 1392 5574 1299 1967 0 115 0 2 0x7f0d26d3ec80 N/A 0 71 0.15% 16 4 12 13 13 0 0 12 24 58 1 20 0 9 0 3 0x7f0d26d3ec00 N/A 0 98 0.22% 23 17 6 19 19 0 0 6 12 79 0 40 0 10 0 i.e. with the list not being ordered by Total Hitm. Fixes: 722ddfde366f ("perf tools: Fix time sorting") Signed-off-by: Andres Freund Tested-by: Michael Petlan Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org # v3.16+ Link: http://lore.kernel.org/lkml/20200109043030.233746-1-andres@anarazel.de Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-c2c.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bec7a2f1fb4d..264d458bfe2a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -528,8 +528,8 @@ tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, { struct c2c_hist_entry *c2c_left; struct c2c_hist_entry *c2c_right; - unsigned int tot_hitm_left; - unsigned int tot_hitm_right; + uint64_t tot_hitm_left; + uint64_t tot_hitm_right; c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); @@ -562,7 +562,8 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ \ c2c_left = container_of(left, struct c2c_hist_entry, he); \ c2c_right = container_of(right, struct c2c_hist_entry, he); \ - return c2c_left->stats.__f - c2c_right->stats.__f; \ + return (uint64_t) c2c_left->stats.__f - \ + (uint64_t) c2c_right->stats.__f; \ } #define STAT_FN(__f) \ @@ -615,7 +616,8 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); - return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); + return (uint64_t) llc_miss(&c2c_left->stats) - + (uint64_t) llc_miss(&c2c_right->stats); } static uint64_t total_records(struct c2c_stats *stats) From 1fa12145cd03de637939c6bc1d4df9fbb6116e33 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 5 Nov 2019 18:18:03 +0900 Subject: [PATCH 2897/3715] PM / devfreq: Add new name attribute for sysfs commit 2fee1a7cc6b1ce6634bb0f025be2c94a58dfa34d upstream. The commit 4585fbcb5331 ("PM / devfreq: Modify the device name as devfreq(X) for sysfs") changed the node name to devfreq(x). After this commit, it is not possible to get the device name through /sys/class/devfreq/devfreq(X)/*. Add new name attribute in order to get device name. Cc: stable@vger.kernel.org Fixes: 4585fbcb5331 ("PM / devfreq: Modify the device name as devfreq(X) for sysfs") Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-devfreq | 7 +++++++ drivers/devfreq/devfreq.c | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index ee39acacf6f8..335595a79866 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -7,6 +7,13 @@ Description: The name of devfreq object denoted as ... is same as the name of device using devfreq. +What: /sys/class/devfreq/.../name +Date: November 2019 +Contact: Chanwoo Choi +Description: + The /sys/class/devfreq/.../name shows the name of device + of the corresponding devfreq object. + What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ad18de955b6c..58ec3abfd321 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -902,6 +902,14 @@ err_out: } EXPORT_SYMBOL(devfreq_remove_governor); +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + return sprintf(buf, "%s\n", dev_name(devfreq->dev.parent)); +} +static DEVICE_ATTR_RO(name); + static ssize_t governor_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1200,6 +1208,7 @@ static ssize_t trans_stat_show(struct device *dev, static DEVICE_ATTR_RO(trans_stat); static struct attribute *devfreq_attrs[] = { + &dev_attr_name.attr, &dev_attr_governor.attr, &dev_attr_available_governors.attr, &dev_attr_cur_freq.attr, From 44d8703769f363593b41d51aeaac6ddeee8bc7da Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Tue, 24 Dec 2019 20:20:29 +0300 Subject: [PATCH 2898/3715] tools lib: Fix builds when glibc contains strlcpy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c4798d3f08b81c2c52936b10e0fa872590c96ae upstream. Disable a couple of compilation warnings (which are treated as errors) on strlcpy() definition and declaration, allowing users to compile perf and kernel (objtool) when: 1. glibc have strlcpy() (such as in ALT Linux since 2004) objtool and perf build fails with this (in gcc): In file included from exec-cmd.c:3: tools/include/linux/string.h:20:15: error: redundant redeclaration of ‘strlcpy’ [-Werror=redundant-decls] 20 | extern size_t strlcpy(char *dest, const char *src, size_t size); 2. clang ignores `-Wredundant-decls', but produces another warning when building perf: CC util/string.o ../lib/string.c:99:8: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] size_t __weak strlcpy(char *dest, const char *src, size_t size) ../../tools/include/linux/compiler.h:66:34: note: expanded from macro '__weak' # define __weak __attribute__((weak)) /usr/include/bits/string_fortified.h:151:8: note: previous definition is here __NTH (strlcpy (char *__restrict __dest, const char *__restrict __src, Committer notes: The #pragma GCC diagnostic directive was introduced in gcc 4.6, so check for that as well. Fixes: ce99091 ("perf tools: Move strlcpy() from perf to tools/lib/string.c") Fixes: 0215d59 ("tools lib: Reinstate strlcpy() header guard with __UCLIBC__") Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=118481 Signed-off-by: Vitaly Chikunov Reviewed-by: Dmitry Levin Cc: Dmitry Levin Cc: Josh Poimboeuf Cc: kbuild test robot Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Vineet Gupta Link: http://lore.kernel.org/lkml/20191224172029.19690-1-vt@altlinux.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/string.h | 8 ++++++++ tools/lib/string.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 6c3e2cc274c5..0ec646f127dc 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -14,7 +14,15 @@ int strtobool(const char *s, bool *res); * However uClibc headers also define __GLIBC__ hence the hack below */ #if defined(__GLIBC__) && !defined(__UCLIBC__) +// pragma diagnostic was introduced in gcc 4.6 +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif extern size_t strlcpy(char *dest, const char *src, size_t size); +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic pop +#endif #endif char *str_error_r(int errnum, char *buf, size_t buflen); diff --git a/tools/lib/string.c b/tools/lib/string.c index 93b3d4b6feac..ee0afcbdd696 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -95,6 +95,10 @@ int strtobool(const char *s, bool *res) * If libc has strlcpy() then that version will override this * implementation: */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wignored-attributes" +#endif size_t __weak strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -106,3 +110,6 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif From 64700ad97eab8b533afd5dbfff22266b68900e35 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Tue, 21 Jan 2020 16:54:39 +0100 Subject: [PATCH 2899/3715] arm64: kbuild: remove compressed images on 'make ARCH=arm64 (dist)clean' commit d7bbd6c1b01cb5dd13c245d4586a83145c1d5f52 upstream. Since v4.3-rc1 commit 0723c05fb75e44 ("arm64: enable more compressed Image formats"), it is possible to build Image.{bz2,lz4,lzma,lzo} AArch64 images. However, the commit missed adding support for removing those images on 'make ARCH=arm64 (dist)clean'. Fix this by adding them to the target list. Make sure to match the order of the recipes in the makefile. Cc: stable@vger.kernel.org # v4.3+ Fixes: 0723c05fb75e44 ("arm64: enable more compressed Image formats") Signed-off-by: Dirk Behme Signed-off-by: Eugeniu Rosca Reviewed-by: Masahiro Yamada Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..cd3414898d10 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) From 08e4a312439c294b9753166537baf3cc0bd6bb07 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 15 Dec 2019 01:09:03 -0500 Subject: [PATCH 2900/3715] ext4: validate the debug_want_extra_isize mount option at parse time commit 9803387c55f7d2ce69aa64340c5fdc6b3027dbc8 upstream. Instead of setting s_want_extra_size and then making sure that it is a valid value afterwards, validate the field before we set it. This avoids races and other problems when remounting the file system. Link: https://lore.kernel.org/r/20191215063020.GA11512@mit.edu Cc: stable@kernel.org Signed-off-by: Theodore Ts'o Reported-and-tested-by: syzbot+4a39a025912b265cacef@syzkaller.appspotmail.com Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 127 +++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 61 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1a0a56647974..93d8aa6ef661 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1782,6 +1782,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, arg = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * arg; } else if (token == Opt_debug_want_extra_isize) { + if ((arg & 1) || + (arg < 4) || + (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { + ext4_msg(sb, KERN_ERR, + "Invalid want_extra_isize %d", arg); + return -1; + } sbi->s_want_extra_isize = arg; } else if (token == Opt_max_batch_time) { sbi->s_max_batch_time = arg; @@ -3454,40 +3461,6 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } -static void ext4_clamp_want_extra_isize(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - unsigned def_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - - if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) { - sbi->s_want_extra_isize = 0; - return; - } - if (sbi->s_want_extra_isize < 4) { - sbi->s_want_extra_isize = def_extra_isize; - if (ext4_has_feature_extra_isize(sb)) { - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_want_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_want_extra_isize); - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_min_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_min_extra_isize); - } - } - /* Check if enough inode space is available */ - if ((sbi->s_want_extra_isize > sbi->s_inode_size) || - (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size)) { - sbi->s_want_extra_isize = def_extra_isize; - ext4_msg(sb, KERN_INFO, - "required extra inode space not available"); - } -} - static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; @@ -3695,6 +3668,65 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { + sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; + sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; + } else { + sbi->s_inode_size = le16_to_cpu(es->s_inode_size); + sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } + if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || + (!is_power_of_2(sbi->s_inode_size)) || + (sbi->s_inode_size > blocksize)) { + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", + sbi->s_inode_size); + goto failed_mount; + } + /* + * i_atime_extra is the last extra field available for + * [acm]times in struct ext4_inode. Checking for that + * field should suffice to ensure we have extra space + * for all three. + */ + if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) + + sizeof(((struct ext4_inode *)0)->i_atime_extra)) { + sb->s_time_gran = 1; + } else { + sb->s_time_gran = NSEC_PER_SEC; + } + } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (ext4_has_feature_extra_isize(sb)) { + unsigned v, max = (sbi->s_inode_size - + EXT4_GOOD_OLD_INODE_SIZE); + + v = le16_to_cpu(es->s_want_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_want_extra_isize: %d", v); + goto failed_mount; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + + v = le16_to_cpu(es->s_min_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_min_extra_isize: %d", v); + goto failed_mount; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + } + } + if (sbi->s_es->s_mount_opts[0]) { char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, sizeof(sbi->s_es->s_mount_opts), @@ -3893,29 +3925,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { - sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; - sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); - if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { - ext4_msg(sb, KERN_ERR, "invalid first ino: %u", - sbi->s_first_ino); - goto failed_mount; - } - if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || - (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { - ext4_msg(sb, KERN_ERR, - "unsupported inode size: %d", - sbi->s_inode_size); - goto failed_mount; - } - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) - sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); - } - sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (ext4_has_feature_64bit(sb)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || @@ -4354,8 +4363,6 @@ no_journal: if (ext4_setup_super(sb, es, sb_rdonly(sb))) sb->s_flags |= MS_RDONLY; - ext4_clamp_want_extra_isize(sb); - ext4_set_resv_clusters(sb); err = ext4_setup_system_zone(sb); @@ -5139,8 +5146,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - ext4_clamp_want_extra_isize(sb); - if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "changing journal_checksum " From 569ae81e2ed8eab6c3b99d7364ef129f8c21f193 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2020 22:11:07 -0800 Subject: [PATCH 2901/3715] mm/mempolicy.c: fix out of bounds write in mpol_parse_str() commit c7a91bc7c2e17e0a9c8b9745a2cb118891218fd1 upstream. What we are trying to do is change the '=' character to a NUL terminator and then at the end of the function we restore it back to an '='. The problem is there are two error paths where we jump to the end of the function before we have replaced the '=' with NUL. We end up putting the '=' in the wrong place (possibly one element before the start of the buffer). Link: http://lkml.kernel.org/r/20200115055426.vdjwvry44nfug7yy@kili.mountain Reported-by: syzbot+e64a13c5369a194d67df@syzkaller.appspotmail.com Fixes: 095f1fc4ebf3 ("mempolicy: rework shmem mpol parsing and display") Signed-off-by: Dan Carpenter Acked-by: Vlastimil Babka Dmitry Vyukov Cc: Michal Hocko Cc: Dan Carpenter Cc: Lee Schermerhorn Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37cfa88669e..1b34f2e35951 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2724,6 +2724,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) char *flags = strchr(str, '='); int err = 1; + if (flags) + *flags++ = '\0'; /* terminate mode string */ + if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; @@ -2734,9 +2737,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) } else nodes_clear(nodes); - if (flags) - *flags++ = '\0'; /* terminate mode string */ - for (mode = 0; mode < MPOL_MAX; mode++) { if (!strcmp(str, policy_modes[mode])) { break; From 4397069f236d9d9888f23c0ed814c403f80bfd1c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Dec 2019 11:30:03 +0100 Subject: [PATCH 2902/3715] reiserfs: Fix memory leak of journal device string commit 5474ca7da6f34fa95e82edc747d5faa19cbdfb5c upstream. When a filesystem is mounted with jdev mount option, we store the journal device name in an allocated string in superblock. However we fail to ever free that string. Fix it. Reported-by: syzbot+1c6756baf4b16b94d2a6@syzkaller.appspotmail.com Fixes: c3aa077648e1 ("reiserfs: Properly display mount options in /proc/mounts") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index cc0b22c72e83..5208d85dd30c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -629,6 +629,7 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); destroy_workqueue(REISERFS_SB(s)->commit_wq); + kfree(REISERFS_SB(s)->s_jdev); kfree(s->s_fs_info); s->s_fs_info = NULL; } @@ -2243,6 +2244,7 @@ error_unlocked: kfree(qf_names[j]); } #endif + kfree(sbi->s_jdev); kfree(sbi); s->s_fs_info = NULL; From 2e0ebd897abc719448d9d10e3a7bbeac6294c30e Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 10 Nov 2019 11:04:40 +0100 Subject: [PATCH 2903/3715] media: digitv: don't continue if remote control state can't be read commit eecc70d22ae51225de1ef629c1159f7116476b2e upstream. This results in an uninitialized variable read. Reported-by: syzbot+6bf9606ee955b646c0e1@syzkaller.appspotmail.com Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/digitv.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index 475a3c0cdee7..20d33f0544ed 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -233,18 +233,22 @@ static struct rc_map_table rc_map_digitv_table[] = { static int digitv_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { - int i; + int ret, i; u8 key[5]; u8 b[4] = { 0 }; *event = 0; *state = REMOTE_NO_KEY_PRESSED; - digitv_ctrl_msg(d,USB_READ_REMOTE,0,NULL,0,&key[1],4); + ret = digitv_ctrl_msg(d, USB_READ_REMOTE, 0, NULL, 0, &key[1], 4); + if (ret) + return ret; /* Tell the device we've read the remote. Not sure how necessary this is, but the Nebula SDK does it. */ - digitv_ctrl_msg(d,USB_WRITE_REMOTE,0,b,4,NULL,0); + ret = digitv_ctrl_msg(d, USB_WRITE_REMOTE, 0, b, 4, NULL, 0); + if (ret) + return ret; /* if something is inside the buffer, simulate key press */ if (key[1] != 0) From b7fae41e420f3db83466b9dcd26e06ae706b85ab Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 10 Nov 2019 11:15:37 +0100 Subject: [PATCH 2904/3715] media: af9005: uninitialized variable printked commit 51d0c99b391f0cac61ad7b827c26f549ee55672c upstream. If usb_bulk_msg() fails, actual_length can be uninitialized. Reported-by: syzbot+9d42b7773d2fecd983ab@syzkaller.appspotmail.com Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/af9005.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index c047a0bdf91f..66990a193bc5 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -563,7 +563,7 @@ static int af9005_boot_packet(struct usb_device *udev, int type, u8 *reply, u8 *buf, int size) { u16 checksum; - int act_len, i, ret; + int act_len = 0, i, ret; memset(buf, 0, size); buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff); From 03a8533d9d06b029dcc8bb818a6ca30db7782ab3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 12 Nov 2019 10:22:24 +0100 Subject: [PATCH 2905/3715] media: gspca: zero usb_buf commit de89d0864f66c2a1b75becfdd6bf3793c07ce870 upstream. Allocate gspca_dev->usb_buf with kzalloc instead of kmalloc to ensure it is property zeroed. This fixes various syzbot errors about uninitialized data. Syzbot links: https://syzkaller.appspot.com/bug?extid=32310fc2aea76898d074 https://syzkaller.appspot.com/bug?extid=99706d6390be1ac542a2 https://syzkaller.appspot.com/bug?extid=64437af5c781a7f0e08e Reported-and-tested-by: syzbot+32310fc2aea76898d074@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+99706d6390be1ac542a2@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+64437af5c781a7f0e08e@syzkaller.appspotmail.com Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/gspca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index 0f141762abf1..87582be4a39d 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -2038,7 +2038,7 @@ int gspca_dev_probe2(struct usb_interface *intf, pr_err("couldn't kzalloc gspca struct\n"); return -ENOMEM; } - gspca_dev->usb_buf = kmalloc(USB_BUF_SZ, GFP_KERNEL); + gspca_dev->usb_buf = kzalloc(USB_BUF_SZ, GFP_KERNEL); if (!gspca_dev->usb_buf) { pr_err("out of memory\n"); ret = -ENOMEM; From fb5e3b56c4c4cc7a83a5f8bd6e9869e53015e41c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 12 Nov 2019 10:22:28 +0100 Subject: [PATCH 2906/3715] media: dvb-usb/dvb-usb-urb.c: initialize actlen to 0 commit 569bc8d6a6a50acb5fcf07fb10b8d2d461fdbf93 upstream. This fixes a syzbot failure since actlen could be uninitialized, but it was still used. Syzbot link: https://syzkaller.appspot.com/bug?extid=6bf9606ee955b646c0e1 Reported-and-tested-by: syzbot+6bf9606ee955b646c0e1@syzkaller.appspotmail.com Signed-off-by: Hans Verkuil Acked-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dvb-usb-urb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-urb.c b/drivers/media/usb/dvb-usb/dvb-usb-urb.c index c1b4e94a37f8..2aabf90d8697 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-urb.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-urb.c @@ -12,7 +12,7 @@ int dvb_usb_generic_rw(struct dvb_usb_device *d, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen, int delay_ms) { - int actlen,ret = -ENOMEM; + int actlen = 0, ret = -ENOMEM; if (!d || wbuf == NULL || wlen == 0) return -EINVAL; From ab84fd0d3dc83277d6ab7246a6b2cd45ba924367 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 13 Jan 2020 11:48:42 +0800 Subject: [PATCH 2907/3715] ttyprintk: fix a potential deadlock in interrupt context issue commit 9a655c77ff8fc65699a3f98e237db563b37c439b upstream. tpk_write()/tpk_close() could be interrupted when holding a mutex, then in timer handler tpk_write() may be called again trying to acquire same mutex, lead to deadlock. Google syzbot reported this issue with CONFIG_DEBUG_ATOMIC_SLEEP enabled: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:938 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/1 1 lock held by swapper/1/0: ... Call Trace: dump_stack+0x197/0x210 ___might_sleep.cold+0x1fb/0x23e __might_sleep+0x95/0x190 __mutex_lock+0xc5/0x13c0 mutex_lock_nested+0x16/0x20 tpk_write+0x5d/0x340 resync_tnc+0x1b6/0x320 call_timer_fn+0x1ac/0x780 run_timer_softirq+0x6c3/0x1790 __do_softirq+0x262/0x98c irq_exit+0x19b/0x1e0 smp_apic_timer_interrupt+0x1a3/0x610 apic_timer_interrupt+0xf/0x20 See link https://syzkaller.appspot.com/bug?extid=2eeef62ee31f9460ad65 for more details. Fix it by using spinlock in process context instead of mutex and having interrupt disabled in critical section. Reported-by: syzbot+2eeef62ee31f9460ad65@syzkaller.appspotmail.com Signed-off-by: Zhenzhong Duan Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200113034842.435-1-zhenzhong.duan@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/ttyprintk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c index 67549ce88cc9..774748497ace 100644 --- a/drivers/char/ttyprintk.c +++ b/drivers/char/ttyprintk.c @@ -18,10 +18,11 @@ #include #include #include +#include struct ttyprintk_port { struct tty_port port; - struct mutex port_write_mutex; + spinlock_t spinlock; }; static struct ttyprintk_port tpk_port; @@ -100,11 +101,12 @@ static int tpk_open(struct tty_struct *tty, struct file *filp) static void tpk_close(struct tty_struct *tty, struct file *filp) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); /* flush tpk_printk buffer */ tpk_printk(NULL, 0); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); tty_port_close(&tpkp->port, tty, filp); } @@ -116,13 +118,14 @@ static int tpk_write(struct tty_struct *tty, const unsigned char *buf, int count) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; int ret; /* exclusive use of tpk_printk within this tty */ - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); ret = tpk_printk(buf, count); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); return ret; } @@ -172,7 +175,7 @@ static int __init ttyprintk_init(void) { int ret = -ENOMEM; - mutex_init(&tpk_port.port_write_mutex); + spin_lock_init(&tpk_port.spinlock); ttyprintk_driver = tty_alloc_driver(1, TTY_DRIVER_RESET_TERMIOS | From 58e957b9c7c22e89188558c27437b6e9a2ddae3d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 Jan 2020 20:49:04 +0300 Subject: [PATCH 2908/3715] Bluetooth: Fix race condition in hci_release_sock() commit 11eb85ec42dc8c7a7ec519b90ccf2eeae9409de8 upstream. Syzbot managed to trigger a use after free "KASAN: use-after-free Write in hci_sock_bind". I have reviewed the code manually and one possibly cause I have found is that we are not holding lock_sock(sk) when we do the hci_dev_put(hdev) in hci_sock_release(). My theory is that the bind and the release are racing against each other which results in this use after free. Reported-by: syzbot+eba992608adf3d796bcc@syzkaller.appspotmail.com Signed-off-by: Dan Carpenter Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4a05235929b9..93093d7c3824 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,6 +826,8 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; + lock_sock(sk); + switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -873,6 +875,7 @@ static int hci_sock_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + release_sock(sk); sock_put(sk); return 0; } From 060af799ef5a17a4a6dc3e95c1bacb51fa6ebec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 9 Jan 2020 16:05:59 +0100 Subject: [PATCH 2909/3715] cgroup: Prevent double killing of css when enabling threaded cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3bc0bb36fa30e95ca829e9cf480e1ef7f7638333 upstream. The test_cgcore_no_internal_process_constraint_on_threads selftest when running with subsystem controlling noise triggers two warnings: > [ 597.443115] WARNING: CPU: 1 PID: 28167 at kernel/cgroup/cgroup.c:3131 cgroup_apply_control_enable+0xe0/0x3f0 > [ 597.443413] WARNING: CPU: 1 PID: 28167 at kernel/cgroup/cgroup.c:3177 cgroup_apply_control_disable+0xa6/0x160 Both stem from a call to cgroup_type_write. The first warning was also triggered by syzkaller. When we're switching cgroup to threaded mode shortly after a subsystem was disabled on it, we can see the respective subsystem css dying there. The warning in cgroup_apply_control_enable is harmless in this case since we're not adding new subsys anyway. The warning in cgroup_apply_control_disable indicates an attempt to kill css of recently disabled subsystem repeatedly. The commit prevents these situations by making cgroup_type_write wait for all dying csses to go away before re-applying subtree controls. When at it, the locations of WARN_ON_ONCE calls are moved so that warning is triggered only when we are about to misuse the dying css. Reported-by: syzbot+5493b2a54d31d6aea629@syzkaller.appspotmail.com Reported-by: Christian Brauner Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2c57030f54aa..829943aad7be 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2884,8 +2884,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; @@ -2895,6 +2893,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) @@ -2930,11 +2930,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!css) continue; + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); @@ -3221,7 +3221,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, if (strcmp(strstrip(buf), "threaded")) return -EINVAL; - cgrp = cgroup_kn_lock_live(of->kn, false); + /* drain dying csses before we re-apply (threaded) subtree control */ + cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENOENT; From c7d812223d2241b331e60df738918f05b93173ac Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 3 Feb 2020 13:21:30 +0000 Subject: [PATCH 2910/3715] media: si470x-i2c: Move free() past last use of 'radio' A pointer to 'struct si470x_device' is currently used after free: drivers/media/radio/si470x/radio-si470x-i2c.c:462:25-30: ERROR: reference preceded by free on line 460 Shift the call to free() down past its final use. NB: Not sending to Mainline, since the problem does not exist there, it was caused by the backport of 2df200ab234a ("media: si470x-i2c: add missed operations in remove") to the stable trees. Cc: # v3.18+ Reported-by: kbuild test robot Reported-by: Julia Lawall Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/media/radio/si470x/radio-si470x-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index b60fb6ed5aeb..527535614342 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -453,10 +453,10 @@ static int si470x_i2c_remove(struct i2c_client *client) free_irq(client->irq, radio); video_unregister_device(&radio->videodev); - kfree(radio); v4l2_ctrl_handler_free(&radio->hdl); v4l2_device_unregister(&radio->v4l2_dev); + kfree(radio); return 0; } From 719e8e93e81e4398af2881388fb88000d3b76a1a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 11 Dec 2019 15:52:17 +0100 Subject: [PATCH 2911/3715] ARM: dts: sun8i: a83t: Correct USB3503 GPIOs polarity [ Upstream commit 1c226017d3ec93547b58082bdf778d9db7401c95 ] Current USB3503 driver ignores GPIO polarity and always operates as if the GPIO lines were flagged as ACTIVE_HIGH. Fix the polarity for the existing USB3503 chip applications to match the chip specification and common convention for naming the pins. The only pin, which has to be ACTIVE_LOW is the reset pin. The remaining are ACTIVE_HIGH. This change allows later to fix the USB3503 driver to properly use generic GPIO bindings and read polarity from DT. Signed-off-by: Marek Szyprowski Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index 716a205c6dbb..1fed3231f5c1 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -90,7 +90,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; From f795e1f7b43490c482c9762287167aa64ce7f6f1 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 17 Dec 2019 14:21:24 +0530 Subject: [PATCH 2912/3715] ARM: dts: beagle-x15-common: Model 5V0 regulator [ Upstream commit e17e7c498d4f734df93c300441e100818ed58168 ] On am57xx-beagle-x15, 5V0 is connected to P16, P17, P18 and P19 connectors. On am57xx-evm, 5V0 regulator is used to get 3V6 regulator which is connected to the COMQ port. Model 5V0 regulator here in order for it to be used in am57xx-evm to model 3V6 regulator. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- .../boot/dts/am57xx-beagle-x15-common.dtsi | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index 49aeecd312b4..d578a9f7e1a0 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -32,6 +32,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; From 852c2bb979257581311ed3baa7911d012532951f Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 11 Dec 2019 22:03:14 -0600 Subject: [PATCH 2913/3715] soc: ti: wkup_m3_ipc: Fix race condition with rproc_boot [ Upstream commit 03729cfa0d543bc996bf959e762ec999afc8f3d2 ] Any user of wkup_m3_ipc calls wkup_m3_ipc_get to get a handle and this checks the value of the static variable m3_ipc_state to see if the wkup_m3 is ready. Currently this is populated during probe before rproc_boot has been called, meaning there is a window of time that wkup_m3_ipc_get can return a valid handle but the wkup_m3 itself is not ready, leading to invalid IPC calls to the wkup_m3 and system instability. To avoid this, move the population of the m3_ipc_state variable until after rproc_boot has succeeded to guarantee a valid and usable handle is always returned. Reported-by: Suman Anna Signed-off-by: Dave Gerlach Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- drivers/soc/ti/wkup_m3_ipc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 369aef5e7228..651827c6ee6f 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -375,6 +375,8 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) ret = rproc_boot(m3_ipc->rproc); if (ret) dev_err(dev, "rproc_boot failed\n"); + else + m3_ipc_state = m3_ipc; do_exit(0); } @@ -461,8 +463,6 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) goto err_put_rproc; } - m3_ipc_state = m3_ipc; - return 0; err_put_rproc: From 89f54ffd507359db9aef5e59e32312773fc72747 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Tue, 3 Dec 2019 19:06:44 +0100 Subject: [PATCH 2914/3715] mac80211: mesh: restrict airtime metric to peered established plinks [ Upstream commit 02a614499600af836137c3fbc4404cd96365fff2 ] The following warning is triggered every time an unestablished mesh peer gets dumped. Checks if a peer link is established before retrieving the airtime link metric. [ 9563.022567] WARNING: CPU: 0 PID: 6287 at net/mac80211/mesh_hwmp.c:345 airtime_link_metric_get+0xa2/0xb0 [mac80211] [ 9563.022697] Hardware name: PC Engines apu2/apu2, BIOS v4.10.0.3 [ 9563.022756] RIP: 0010:airtime_link_metric_get+0xa2/0xb0 [mac80211] [ 9563.022838] Call Trace: [ 9563.022897] sta_set_sinfo+0x936/0xa10 [mac80211] [ 9563.022964] ieee80211_dump_station+0x6d/0x90 [mac80211] [ 9563.023062] nl80211_dump_station+0x154/0x2a0 [cfg80211] [ 9563.023120] netlink_dump+0x17b/0x370 [ 9563.023130] netlink_recvmsg+0x2a4/0x480 [ 9563.023140] ____sys_recvmsg+0xa6/0x160 [ 9563.023154] ___sys_recvmsg+0x93/0xe0 [ 9563.023169] __sys_recvmsg+0x7e/0xd0 [ 9563.023210] do_syscall_64+0x4e/0x140 [ 9563.023217] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20191203180644.70653-1-markus.theil@tu-ilmenau.de [rewrite commit message] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_hwmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index fab0764c315f..994dde6e5f9d 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -326,6 +326,9 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, unsigned long fail_avg = ewma_mesh_fail_avg_read(&sta->mesh->fail_avg); + if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) + return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. * Rate is returned in units of Kbps, correct this * to comply with airtime calculation units From aecd1fe0edacfd5d3ff3d4c0752bc89b83d13d72 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 18 Dec 2019 20:04:54 +0100 Subject: [PATCH 2915/3715] clk: mmp2: Fix the order of timer mux parents [ Upstream commit 8bea5ac0fbc5b2103f8779ddff216122e3c2e1ad ] Determined empirically, no documentation is available. The OLPC XO-1.75 laptop used parent 1, that one being VCTCXO/4 (65MHz), but thought it's a VCTCXO/2 (130MHz). The mmp2 timer driver, not knowing what is going on, ended up just dividing the rate as of commit f36797ee4380 ("ARM: mmp/mmp2: dt: enable the clock")' Link: https://lore.kernel.org/r/20191218190454.420358-3-lkundrak@v3.sk Signed-off-by: Lubomir Rintel Acked-by: Stephen Boyd Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- drivers/clk/mmp/clk-of-mmp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index d083b860f083..10689d8cd386 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -134,7 +134,7 @@ static DEFINE_SPINLOCK(ssp3_lock); static const char *ssp_parent_names[] = {"vctcxo_4", "vctcxo_2", "vctcxo", "pll1_16"}; static DEFINE_SPINLOCK(timer_lock); -static const char *timer_parent_names[] = {"clk32", "vctcxo_2", "vctcxo_4", "vctcxo"}; +static const char *timer_parent_names[] = {"clk32", "vctcxo_4", "vctcxo_2", "vctcxo"}; static DEFINE_SPINLOCK(reset_lock); From ca60e5ca55b860dafda4937c5a9631e149cbbb90 Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 25 Nov 2019 15:24:52 +0100 Subject: [PATCH 2916/3715] ixgbevf: Remove limit of 10 entries for unicast filter list [ Upstream commit aa604651d523b1493988d0bf6710339f3ee60272 ] Currently, though the FDB entry is added to VF, it does not appear in RAR filters. VF driver only allows to add 10 entries. Attempting to add another causes an error. This patch removes limitation and allows use of all free RAR entries for the FDB if needed. Fixes: 46ec20ff7d ("ixgbevf: Add macvlan support in the set rx mode op") Signed-off-by: Radoslaw Tyl Acked-by: Paul Menzel Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e238f6e85ab6..a7708e14aa5c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1858,11 +1858,6 @@ static int ixgbevf_write_uc_addr_list(struct net_device *netdev) struct ixgbe_hw *hw = &adapter->hw; int count = 0; - if ((netdev_uc_count(netdev)) > 10) { - pr_err("Too many unicast filters - No Space\n"); - return -ENOSPC; - } - if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha; From 0350ed7bccd87c9ce86c55f2c39b69d9f4c9a799 Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Wed, 27 Nov 2019 17:03:55 +0800 Subject: [PATCH 2917/3715] ixgbe: Fix calculation of queue with VFs and flow director on interface flap [ Upstream commit 4fad78ad6422d9bca62135bbed8b6abc4cbb85b8 ] This patch fixes the calculation of queue when we restore flow director filters after resetting adapter. In ixgbe_fdir_filter_restore(), filter's vf may be zero which makes the queue outside of the rx_ring array. The calculation is changed to the same as ixgbe_add_ethtool_fdir_entry(). Signed-off-by: Cambda Zhu Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e4c1e6345edd..ba184287e11f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5131,7 +5131,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; struct ixgbe_fdir_filter *filter; - u64 action; + u8 queue; spin_lock(&adapter->fdir_perfect_lock); @@ -5140,17 +5140,34 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { - action = filter->action; - if (action != IXGBE_FDIR_DROP_QUEUE && action != 0) - action = - (action >> ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF) - 1; + if (filter->action == IXGBE_FDIR_DROP_QUEUE) { + queue = IXGBE_FDIR_DROP_QUEUE; + } else { + u32 ring = ethtool_get_flow_spec_ring(filter->action); + u8 vf = ethtool_get_flow_spec_ring_vf(filter->action); + + if (!vf && (ring >= adapter->num_rx_queues)) { + e_err(drv, "FDIR restore failed without VF, ring: %u\n", + ring); + continue; + } else if (vf && + ((vf > adapter->num_vfs) || + ring >= adapter->num_rx_queues_per_pool)) { + e_err(drv, "FDIR restore failed with VF, vf: %hhu, ring: %u\n", + vf, ring); + continue; + } + + /* Map the ring onto the absolute queue index */ + if (!vf) + queue = adapter->rx_ring[ring]->reg_idx; + else + queue = ((vf - 1) * + adapter->num_rx_queues_per_pool) + ring; + } ixgbe_fdir_write_perfect_filter_82599(hw, - &filter->filter, - filter->sw_idx, - (action == IXGBE_FDIR_DROP_QUEUE) ? - IXGBE_FDIR_DROP_QUEUE : - adapter->rx_ring[action]->reg_idx); + &filter->filter, filter->sw_idx, queue); } spin_unlock(&adapter->fdir_perfect_lock); From 5c273c3a8bb00213dc6bfb3a1e941355bcdaa2d9 Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Wed, 4 Dec 2019 11:40:26 +0100 Subject: [PATCH 2918/3715] igb: Fix SGMII SFP module discovery for 100FX/LX. [ Upstream commit 5365ec1aeff5b9f2962a9c9b31d63f9dad7e0e2d ] Changing the link mode should also be done for 100BaseFX SGMII modules, otherwise they just don't work when the default link mode in CTRL_EXT coming from the EEPROM is SERDES. Additionally 100Base-LX SGMII SFP modules are also supported now, which was not the case before. Tested with an i210 using Flexoptix S.1303.2M.G 100FX and S.1303.10.G 100LX SGMII SFP modules. Signed-off-by: Manfred Rudigier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/e1000_82575.c | 8 ++------ drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index c37cc8bccf47..158c277ec353 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -562,7 +562,7 @@ static s32 igb_set_sfp_media_type_82575(struct e1000_hw *hw) dev_spec->module_plugged = true; if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { hw->phy.media_type = e1000_media_type_internal_serdes; - } else if (eth_flags->e100_base_fx) { + } else if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) { dev_spec->sgmii_active = true; hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e1000_base_t) { @@ -689,14 +689,10 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) break; } - /* do not change link mode for 100BaseFX */ - if (dev_spec->eth_flags.e100_base_fx) - break; - /* change current link mode setting */ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; - if (hw->phy.media_type == e1000_media_type_copper) + if (dev_spec->sgmii_active) ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; else ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d06a8db514d4..82028ce355fb 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -201,7 +201,7 @@ static int igb_get_link_ksettings(struct net_device *netdev, advertising &= ~ADVERTISED_1000baseKX_Full; } } - if (eth_flags->e100_base_fx) { + if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) { supported |= SUPPORTED_100baseT_Full; advertising |= ADVERTISED_100baseT_Full; } From 3403f8652c453536da61efba3236b531d63701b9 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 13 Jan 2020 11:04:00 +0100 Subject: [PATCH 2919/3715] ASoC: sti: fix possible sleep-in-atomic [ Upstream commit ce780a47c3c01e1e179d0792df6b853a913928f1 ] Change mutex and spinlock management to avoid sleep in atomic issue. Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20200113100400.30472-1-arnaud.pouliquen@st.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sti/uniperif_player.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index d8b6936e544e..908f13623f8c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -226,7 +226,6 @@ static void uni_player_set_channel_status(struct uniperif *player, * sampling frequency. If no sample rate is already specified, then * set one. */ - mutex_lock(&player->ctrl_lock); if (runtime) { switch (runtime->rate) { case 22050: @@ -303,7 +302,6 @@ static void uni_player_set_channel_status(struct uniperif *player, player->stream_settings.iec958.status[3 + (n * 4)] << 24; SET_UNIPERIF_CHANNEL_STA_REGN(player, n, status); } - mutex_unlock(&player->ctrl_lock); /* Update the channel status */ if (player->ver < SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0) @@ -365,8 +363,10 @@ static int uni_player_prepare_iec958(struct uniperif *player, SET_UNIPERIF_CTRL_ZERO_STUFF_HW(player); + mutex_lock(&player->ctrl_lock); /* Update the channel status */ uni_player_set_channel_status(player, runtime); + mutex_unlock(&player->ctrl_lock); /* Clear the user validity user bits */ SET_UNIPERIF_USER_VALIDITY_VALIDITY_LR(player, 0); @@ -598,7 +598,6 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, iec958->status[1] = ucontrol->value.iec958.status[1]; iec958->status[2] = ucontrol->value.iec958.status[2]; iec958->status[3] = ucontrol->value.iec958.status[3]; - mutex_unlock(&player->ctrl_lock); spin_lock_irqsave(&player->irq_lock, flags); if (player->substream && player->substream->runtime) @@ -608,6 +607,8 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, uni_player_set_channel_status(player, NULL); spin_unlock_irqrestore(&player->irq_lock, flags); + mutex_unlock(&player->ctrl_lock); + return 0; } From f5929c9f3a52a6137b8c5ed2007a4cd725ab7aaf Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Mon, 13 Jan 2020 14:57:40 +0100 Subject: [PATCH 2920/3715] qmi_wwan: Add support for Quectel RM500Q MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a9ff44f0e61d074f29770413fef6a5452be7b83e ] RM500Q is a 5G module from Quectel, supporting both standalone and non-standalone modes. The normal Quectel quirks apply (DTR and dynamic interface numbers). Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 4a984b76a60e..db70d4c5778a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -999,6 +999,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ From 8f6b2439a2b939d92b25201e456c7949a1f8ba79 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Fri, 20 Dec 2019 10:14:32 +0000 Subject: [PATCH 2921/3715] wireless: fix enabling channel 12 for custom regulatory domain [ Upstream commit c4b9d655e445a8be0bff624aedea190606b5ebbc ] Commit e33e2241e272 ("Revert "cfg80211: Use 5MHz bandwidth by default when checking usable channels"") fixed a broken regulatory (leaving channel 12 open for AP where not permitted). Apply a similar fix to custom regulatory domain processing. Signed-off-by: Cathy Luo Signed-off-by: Ganapathi Bhat Link: https://lore.kernel.org/r/1576836859-8945-1-git-send-email-ganapathi.bhat@nxp.com [reword commit message, fix coding style, add a comment] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 804eac073b6b..e60a7dedfbf1 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1718,14 +1718,15 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) static void handle_channel_custom(struct wiphy *wiphy, struct ieee80211_channel *chan, - const struct ieee80211_regdomain *regd) + const struct ieee80211_regdomain *regd, + u32 min_bw) { u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; u32 bw; - for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { + for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), regd, bw); if (!IS_ERR(reg_rule)) @@ -1781,8 +1782,14 @@ static void handle_band_custom(struct wiphy *wiphy, if (!sband) return; + /* + * We currently assume that you always want at least 20 MHz, + * otherwise channel 12 might get enabled if this rule is + * compatible to US, which permits 2402 - 2472 MHz. + */ for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, &sband->channels[i], regd); + handle_channel_custom(wiphy, &sband->channels[i], regd, + MHZ_TO_KHZ(20)); } /* Used by drivers prior to wiphy registration */ From 2dbb6faebb94d6d5ae87e5ea6be9280c366393e1 Mon Sep 17 00:00:00 2001 From: Orr Mazor Date: Sun, 22 Dec 2019 14:55:31 +0000 Subject: [PATCH 2922/3715] cfg80211: Fix radar event during another phy CAC [ Upstream commit 26ec17a1dc5ecdd8d91aba63ead6f8b5ad5dea0d ] In case a radar event of CAC_FINISHED or RADAR_DETECTED happens during another phy is during CAC we might need to cancel that CAC. If we got a radar in a channel that another phy is now doing CAC on then the CAC should be canceled there. If, for example, 2 phys doing CAC on the same channels, or on comptable channels, once on of them will finish his CAC the other might need to cancel his CAC, since it is no longer relevant. To fix that the commit adds an callback and implement it in mac80211 to end CAC. This commit also adds a call to said callback if after a radar event we see the CAC is no longer relevant Signed-off-by: Orr Mazor Reviewed-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20191222145449.15792-1-Orr.Mazor@tandemg.com [slightly reformat/reword commit message] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- include/net/cfg80211.h | 5 +++++ net/mac80211/cfg.c | 23 +++++++++++++++++++++++ net/wireless/rdev-ops.h | 10 ++++++++++ net/wireless/reg.c | 23 ++++++++++++++++++++++- net/wireless/trace.h | 5 +++++ 5 files changed, 65 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a4c8e9d7dd06..030eea38f258 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2843,6 +2843,9 @@ struct cfg80211_pmk_conf { * * @start_radar_detection: Start radar detection in the driver. * + * @end_cac: End running CAC, probably because a related CAC + * was finished on another phy. + * * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. @@ -3148,6 +3151,8 @@ struct cfg80211_ops { struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms); + void (*end_cac)(struct wiphy *wiphy, + struct net_device *dev); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); int (*crit_proto_start)(struct wiphy *wiphy, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d437007b15bb..b1484b8316e8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2800,6 +2800,28 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, return err; } +static void ieee80211_end_cac(struct wiphy *wiphy, + struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + /* it might be waiting for the local->mtx, but then + * by the time it gets it, sdata->wdev.cac_started + * will no longer be true + */ + cancel_delayed_work(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + sdata->wdev.cac_started = false; + } + } + mutex_unlock(&local->mtx); +} + static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { @@ -3730,6 +3752,7 @@ const struct cfg80211_ops mac80211_config_ops = { #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, + .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 249919bdfc64..4077bb3af440 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1143,6 +1143,16 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, return ret; } +static inline void +rdev_end_cac(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + trace_rdev_end_cac(&rdev->wiphy, dev); + if (rdev->ops->end_cac) + rdev->ops->end_cac(&rdev->wiphy, dev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e60a7dedfbf1..a520f433d476 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3303,6 +3303,25 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy) return pre_cac_allowed; } +static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) +{ + struct wireless_dev *wdev; + /* If we finished CAC or received radar, we should end any + * CAC running on the same channels. + * the check !cfg80211_chandef_dfs_usable contain 2 options: + * either all channels are available - those the CAC_FINISHED + * event has effected another wdev state, or there is a channel + * in unavailable state in wdev chandef - those the RADAR_DETECTED + * event has effected another wdev state. + * In both cases we should end the CAC on the wdev. + */ + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (wdev->cac_started && + !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef)) + rdev_end_cac(rdev, wdev->netdev); + } +} + void regulatory_propagate_dfs_state(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state, @@ -3329,8 +3348,10 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state); if (event == NL80211_RADAR_DETECTED || - event == NL80211_RADAR_CAC_FINISHED) + event == NL80211_RADAR_CAC_FINISHED) { cfg80211_sched_dfs_chan_update(rdev); + cfg80211_check_and_end_cac(rdev); + } nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL); } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f3353fe5b35b..cd0a1c7c185d 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -607,6 +607,11 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa, TP_ARGS(wiphy, netdev) ); +DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), + TP_ARGS(wiphy, netdev) +); + DECLARE_EVENT_CLASS(station_add_change, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), From 0a4b3ef53f0a237672b842b639284ac6ca6ef9b7 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 7 Jan 2020 17:35:45 +0200 Subject: [PATCH 2923/3715] mac80211: Fix TKIP replay protection immediately after key setup [ Upstream commit 6f601265215a421f425ba3a4850a35861d024643 ] TKIP replay protection was skipped for the very first frame received after a new key is configured. While this is potentially needed to avoid dropping a frame in some cases, this does leave a window for replay attacks with group-addressed frames at the station side. Any earlier frame sent by the AP using the same key would be accepted as a valid frame and the internal RSC would then be updated to the TSC from that frame. This would allow multiple previously transmitted group-addressed frames to be replayed until the next valid new group-addressed frame from the AP is received by the station. Fix this by limiting the no-replay-protection exception to apply only for the case where TSC=0, i.e., when this is for the very first frame protected using the new key, and the local RSC had not been set to a higher value when configuring the key (which may happen with GTK). Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200107153545.10934-1-j@w1.fi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tkip.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index b3622823bad2..ebd66e8f46b3 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -266,9 +266,21 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm, if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; - if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT && - (iv32 < rx_ctx->iv32 || - (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16))) + /* Reject replays if the received TSC is smaller than or equal to the + * last received value in a valid message, but with an exception for + * the case where a new key has been set and no valid frame using that + * key has yet received and the local RSC was initialized to 0. This + * exception allows the very first frame sent by the transmitter to be + * accepted even if that transmitter were to use TSC 0 (IEEE 802.11 + * described TSC to be initialized to 1 whenever a new key is taken into + * use). + */ + if (iv32 < rx_ctx->iv32 || + (iv32 == rx_ctx->iv32 && + (iv16 < rx_ctx->iv16 || + (iv16 == rx_ctx->iv16 && + (rx_ctx->iv32 || rx_ctx->iv16 || + rx_ctx->ctx.state != TKIP_STATE_NOT_INIT))))) return TKIP_DECRYPT_REPLAY; if (only_iv) { From 95f5057bb1a300e840e697463003cb242fa491bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 21:07:35 +0100 Subject: [PATCH 2924/3715] wireless: wext: avoid gcc -O3 warning [ Upstream commit e16119655c9e6c4aa5767cd971baa9c491f41b13 ] After the introduction of CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3, the wext code produces a bogus warning: In function 'iw_handler_get_iwstats', inlined from 'ioctl_standard_call' at net/wireless/wext-core.c:1015:9, inlined from 'wireless_process_ioctl' at net/wireless/wext-core.c:935:10, inlined from 'wext_ioctl_dispatch.part.8' at net/wireless/wext-core.c:986:8, inlined from 'wext_handle_ioctl': net/wireless/wext-core.c:671:3: error: argument 1 null where non-null expected [-Werror=nonnull] memcpy(extra, stats, sizeof(struct iw_statistics)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from arch/x86/include/asm/string.h:5, net/wireless/wext-core.c: In function 'wext_handle_ioctl': arch/x86/include/asm/string_64.h:14:14: note: in a call to function 'memcpy' declared here The problem is that ioctl_standard_call() sometimes calls the handler with a NULL argument that would cause a problem for iw_handler_get_iwstats. However, iw_handler_get_iwstats never actually gets called that way. Marking that function as noinline avoids the warning and leads to slightly smaller object code as well. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200107200741.3588770-1-arnd@arndb.de Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/wext-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6cdb054484d6..5236a3c2c0cc 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -659,7 +659,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) return NULL; } -static int iw_handler_get_iwstats(struct net_device * dev, +/* noinline to avoid a bogus warning with -O3 */ +static noinline int iw_handler_get_iwstats(struct net_device * dev, struct iw_request_info * info, union iwreq_data * wrqu, char * extra) From dc2e1d86500c904f7a4f730ad0481bb42f2f6188 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Jan 2020 12:55:48 -0800 Subject: [PATCH 2925/3715] net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec [ Upstream commit 8f1880cbe8d0d49ebb7e9ae409b3b96676e5aa97 ] With the implementation of the system reset controller we lost a setting that is currently applied by the bootloader and which configures the IMP port for 2Gb/sec, the default is 1Gb/sec. This is needed given the number of ports and applications we expect to run so bring back that setting. Fixes: 01b0ac07589e ("net: dsa: bcm_sf2: Add support for optional reset controller line") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 94ad2fdd6ef0..05440b727261 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -137,7 +137,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS); + reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G); core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ From e99b5648c7a719e5bf4431ed59ab6e98485268e9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 17 Jan 2020 00:32:46 -0500 Subject: [PATCH 2926/3715] bnxt_en: Fix ipv6 RFS filter matching logic. [ Upstream commit 6fc7caa84e713f7627e171ab1e7c4b5be0dc9b3d ] Fix bnxt_fltr_match() to match ipv6 source and destination addresses. The function currently only checks ipv4 addresses and will not work corrently on ipv6 filters. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 38ee7692132c..7461e7b9eaae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7402,11 +7402,23 @@ static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1, struct flow_keys *keys1 = &f1->fkeys; struct flow_keys *keys2 = &f2->fkeys; - if (keys1->addrs.v4addrs.src == keys2->addrs.v4addrs.src && - keys1->addrs.v4addrs.dst == keys2->addrs.v4addrs.dst && - keys1->ports.ports == keys2->ports.ports && - keys1->basic.ip_proto == keys2->basic.ip_proto && - keys1->basic.n_proto == keys2->basic.n_proto && + if (keys1->basic.n_proto != keys2->basic.n_proto || + keys1->basic.ip_proto != keys2->basic.ip_proto) + return false; + + if (keys1->basic.n_proto == htons(ETH_P_IP)) { + if (keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src || + keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst) + return false; + } else { + if (memcmp(&keys1->addrs.v6addrs.src, &keys2->addrs.v6addrs.src, + sizeof(keys1->addrs.v6addrs.src)) || + memcmp(&keys1->addrs.v6addrs.dst, &keys2->addrs.v6addrs.dst, + sizeof(keys1->addrs.v6addrs.dst))) + return false; + } + + if (keys1->ports.ports == keys2->ports.ports && keys1->control.flags == keys2->control.flags && ether_addr_equal(f1->src_mac_addr, f2->src_mac_addr) && ether_addr_equal(f1->dst_mac_addr, f2->dst_mac_addr)) From adafa3c54b4ad81622a4508ed2c29cb1da46801b Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Mon, 6 Jan 2020 16:09:08 +0300 Subject: [PATCH 2927/3715] ARM: dts: am335x-boneblack-common: fix memory size [ Upstream commit 5abd45ea0fc3060f7805e131753fdcbafd6c6618 ] BeagleBone Black series is equipped with 512MB RAM whereas only 256MB is included from am335x-bone-common.dtsi This leads to an issue with unusual setups when devicetree is loaded by GRUB2 directly. Signed-off-by: Matwey V. Kornilov Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am335x-boneblack-common.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 325daae40278..485c27f039f5 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; From 9c8c51176e9d3035f6e89b6664b4dd67a0873cbf Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jan 2020 09:32:46 +0100 Subject: [PATCH 2928/3715] vti[6]: fix packet tx through bpf_redirect() [ Upstream commit 95224166a9032ff5d08fca633d37113078ce7d01 ] With an ebpf program that redirects packets through a vti[6] interface, the packets are dropped because no dst is attached. This could also be reproduced with an AF_PACKET socket, with the following python script (vti1 is an ip_vti interface): import socket send_s = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, 0) # scapy # p = IP(src='10.100.0.2', dst='10.200.0.1')/ICMP(type='echo-request') # raw(p) req = b'E\x00\x00\x1c\x00\x01\x00\x00@\x01e\xb2\nd\x00\x02\n\xc8\x00\x01\x08\x00\xf7\xff\x00\x00\x00\x00' send_s.sendto(req, ('vti1', 0x800, 0, 0)) Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/ip_vti.c | 13 +++++++++++-- net/ipv6/ip6_vti.c | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 08c15dd42d93..59384ffe89f7 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -208,8 +208,17 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 557fe3880a3f..396a0f61f5f8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -453,8 +453,17 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int err = -1; int mtu; - if (!dst) - goto tx_err_link_failure; + if (!dst) { + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + } dst_hold(dst); dst = xfrm_lookup(t->net, dst, fl, NULL, 0); From 49e509787b77e4991f89703f81a0f53d7739fc58 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 16 Jan 2020 11:20:53 +0100 Subject: [PATCH 2929/3715] scsi: fnic: do not queue commands during fwreset [ Upstream commit 0e2209629fec427ba75a6351486153a9feddd36b ] When a link is going down the driver will be calling fnic_cleanup_io(), which will traverse all commands and calling 'done' for each found command. While the traversal is handled under the host_lock, calling 'done' happens after the host_lock is being dropped. As fnic_queuecommand_lck() is being called with the host_lock held, it might well be that it will pick the command being selected for abortion from the above routine and enqueue it for sending, but then 'done' is being called on that very command from the above routine. Which of course confuses the hell out of the scsi midlayer. So fix this by not queueing commands when fnic_cleanup_io is active. Link: https://lore.kernel.org/r/20200116102053.62755-1-hare@suse.de Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/fnic/fnic_scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 242e2ee494a1..d79ac0b24f5a 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -446,6 +446,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) return SCSI_MLQUEUE_HOST_BUSY; + if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_FWRESET))) + return SCSI_MLQUEUE_HOST_BUSY; + rport = starget_to_rport(scsi_target(sc->device)); if (!rport) { FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, From 9da4de4d00cc926d98fb13d284104210122ae1fa Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 20 Jan 2020 15:07:46 +0100 Subject: [PATCH 2930/3715] ARM: 8955/1: virt: Relax arch timer version check during early boot [ Upstream commit 6849b5eba1965ceb0cad3a75877ef4569dd3638e ] Updates to the Generic Timer architecture allow ID_PFR1.GenTimer to have values other than 0 or 1 while still preserving backward compatibility. At the moment, Linux is quite strict in the way it handles this field at early boot and will not configure arch timer if it doesn't find the value 1. Since here use ubfx for arch timer version extraction (hyb-stub build with -march=armv7-a, so it is safe) To help backports (even though the code was correct at the time of writing) Fixes: 8ec58be9f3ff ("ARM: virt: arch_timers: enable access to physical timers") Acked-by: Marc Zyngier Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/hyp-stub.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 82a942894fc0..83e463c05dcd 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -159,10 +159,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL From ffc1f3076eda033f86e9b0cb873788a01292f737 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 10 Jan 2020 12:28:07 +0000 Subject: [PATCH 2931/3715] tee: optee: Fix compilation issue with nommu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9e0caab8e0f96f0af7d1dd388e62f44184a75372 ] The optee driver uses specific page table types to verify if a memory region is normal. These types are not defined in nommu systems. Trying to compile the driver in these systems results in a build error: linux/drivers/tee/optee/call.c: In function ‘is_normal_memory’: linux/drivers/tee/optee/call.c:533:26: error: ‘L_PTE_MT_MASK’ undeclared (first use in this function); did you mean ‘PREEMPT_MASK’? return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; ^~~~~~~~~~~~~ PREEMPT_MASK linux/drivers/tee/optee/call.c:533:26: note: each undeclared identifier is reported only once for each function it appears in linux/drivers/tee/optee/call.c:533:44: error: ‘L_PTE_MT_WRITEALLOC’ undeclared (first use in this function) return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; ^~~~~~~~~~~~~~~~~~~ Make the optee driver depend on MMU to fix the compilation issue. Signed-off-by: Vincenzo Frascino [jw: update commit title] Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- drivers/tee/optee/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig index 0126de898036..108600c6eb56 100644 --- a/drivers/tee/optee/Kconfig +++ b/drivers/tee/optee/Kconfig @@ -2,6 +2,7 @@ config OPTEE tristate "OP-TEE" depends on HAVE_ARM_SMCCC + depends on MMU help This implements the OP-TEE Trusted Execution Environment (TEE) driver. From 33fc8d611378dc649f8c342596f7d3be52bdc941 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Jan 2020 15:07:27 +1100 Subject: [PATCH 2932/3715] airo: Fix possible info leak in AIROOLDIOCTL/SIOCDEVPRIVATE [ Upstream commit d6bce2137f5d6bb1093e96d2f801479099b28094 ] The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). The ioctl handler copies an aironet_ioctl struct from userspace, which includes a command and a length. Some of the commands are handled in readrids(), which kmalloc()'s a buffer of RIDSIZE (2048) bytes. That buffer is then passed to PC4500_readrid(), which has two cases. The else case does some setup and then reads up to RIDSIZE bytes from the hardware into the kmalloc()'ed buffer. Here len == RIDSIZE, pBuf is the kmalloc()'ed buffer: // read the rid length field bap_read(ai, pBuf, 2, BAP1); // length for remaining part of rid len = min(len, (int)le16_to_cpu(*(__le16*)pBuf)) - 2; ... // read remainder of the rid rc = bap_read(ai, ((__le16*)pBuf)+1, len, BAP1); PC4500_readrid() then returns to readrids() which does: len = comp->len; if (copy_to_user(comp->data, iobuf, min(len, (int)RIDSIZE))) { Where comp->len is the user controlled length field. So if the "rid length field" returned by the hardware is < 2048, and the user requests 2048 bytes in comp->len, we will leak the previous contents of the kmalloc()'ed buffer to userspace. Fix it by kzalloc()'ing the buffer. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index fc49255bab00..c9ffbdd42e67 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7811,7 +7811,7 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { return -EINVAL; } - if ((iobuf = kmalloc(RIDSIZE, GFP_KERNEL)) == NULL) + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) return -ENOMEM; PC4500_readrid(ai,ridcode,iobuf,RIDSIZE, 1); From 6f0f284b770a29d0094b4671e37ac0ee521903b6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Jan 2020 15:07:28 +1100 Subject: [PATCH 2933/3715] airo: Add missing CAP_NET_ADMIN check in AIROOLDIOCTL/SIOCDEVPRIVATE [ Upstream commit 78f7a7566f5eb59321e99b55a6fdb16ea05b37d1 ] The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). The ioctl handler copies an aironet_ioctl struct from userspace, which includes a command. Some of the commands are handled in readrids(), where the user controlled command is converted into a driver-internal value called "ridcode". There are two command values, AIROGWEPKTMP and AIROGWEPKNV, which correspond to ridcode values of RID_WEP_TEMP and RID_WEP_PERM respectively. These commands both have checks that the user has CAP_NET_ADMIN, with the comment that "Only super-user can read WEP keys", otherwise they return -EPERM. However there is another command value, AIRORRID, that lets the user specify the ridcode value directly, with no other checks. This means the user can bypass the CAP_NET_ADMIN check on AIROGWEPKTMP and AIROGWEPKNV. Fix it by moving the CAP_NET_ADMIN check out of the command handling and instead do it later based on the ridcode. That way regardless of whether the ridcode is set via AIROGWEPKTMP or AIROGWEPKNV, or passed in using AIRORID, we always do the CAP_NET_ADMIN check. Found by Ilja by code inspection, not tested as I don't have the required hardware. Reported-by: Ilja Van Sprundel Signed-off-by: Michael Ellerman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index c9ffbdd42e67..f3f20abbe269 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7788,16 +7788,8 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { case AIROGVLIST: ridcode = RID_APLIST; break; case AIROGDRVNAM: ridcode = RID_DRVNAME; break; case AIROGEHTENC: ridcode = RID_ETHERENCAP; break; - case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - case AIROGWEPKNV: ridcode = RID_WEP_PERM; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; + case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; break; + case AIROGWEPKNV: ridcode = RID_WEP_PERM; break; case AIROGSTAT: ridcode = RID_STATUS; break; case AIROGSTATSD32: ridcode = RID_STATSDELTA; break; case AIROGSTATSC32: ridcode = RID_STATS; break; @@ -7811,6 +7803,12 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { return -EINVAL; } + if (ridcode == RID_WEP_TEMP || ridcode == RID_WEP_PERM) { + /* Only super-user can read WEP keys */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + } + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) return -ENOMEM; From ce9ba09d135bb93652811783bce421be074fa5e4 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 22 Jan 2020 16:02:07 +0800 Subject: [PATCH 2934/3715] r8152: get default setting of WOL before initializing [ Upstream commit 9583a3638dc07cc1878f41265e85ed497f72efcb ] Initailization would reset runtime suspend by tp->saved_wolopts, so the tp->saved_wolopts should be set before initializing. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0083c60f5cdf..a7f9c1886bd4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5244,6 +5244,11 @@ static int rtl8152_probe(struct usb_interface *intf, intf->needs_remote_wakeup = 1; + if (!rtl_can_wakeup(tp)) + __rtl_set_wol(tp, 0); + else + tp->saved_wolopts = __rtl_get_wol(tp); + tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); set_ethernet_addr(tp); @@ -5257,10 +5262,6 @@ static int rtl8152_probe(struct usb_interface *intf, goto out1; } - if (!rtl_can_wakeup(tp)) - __rtl_set_wol(tp, 0); - - tp->saved_wolopts = __rtl_get_wol(tp); if (tp->saved_wolopts) device_set_wakeup_enable(&udev->dev, true); else From fcd0c0d36a58b9d99856de8aa6cde011e1134630 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 22 Jan 2020 01:43:38 -0800 Subject: [PATCH 2935/3715] qlcnic: Fix CPU soft lockup while collecting firmware dump [ Upstream commit 22e984493a41bf8081f13d9ed84def3ca8cfd427 ] Driver while collecting firmware dump takes longer time to collect/process some of the firmware dump entries/memories. Bigger capture masks makes it worse as it results in larger amount of data being collected and results in CPU soft lockup. Place cond_resched() in some of the driver flows that are expectedly time consuming to relinquish the CPU to avoid CPU soft lockup panic. Signed-off-by: Shahed Shaikh Tested-by: Yonggen Xu Signed-off-by: Manish Chopra Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index a496390b8632..07f9067affc6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2043,6 +2043,7 @@ static void qlcnic_83xx_exec_template_cmd(struct qlcnic_adapter *p_dev, break; } entry += p_hdr->size; + cond_resched(); } p_dev->ahw->reset.seq_index = index; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index afa10a163da1..f34ae8c75bc5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -703,6 +703,7 @@ static u32 qlcnic_read_memory_test_agent(struct qlcnic_adapter *adapter, addr += 16; reg_read -= 16; ret += 16; + cond_resched(); } out: mutex_unlock(&adapter->ahw->mem_lock); @@ -1383,6 +1384,7 @@ int qlcnic_dump_fw(struct qlcnic_adapter *adapter) buf_offset += entry->hdr.cap_size; entry_offset += entry->hdr.offset; buffer = fw_dump->data + buf_offset; + cond_resched(); } fw_dump->clr = 1; From 5d45e8aa20fbb1097d6d1d6866c91aeb81941663 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 15:20:28 +0200 Subject: [PATCH 2936/3715] powerpc/fsl/dts: add fsl,erratum-a011043 [ Upstream commit 73d527aef68f7644e59f22ce7f9ac75e7b533aea ] Add fsl,erratum-a011043 to internal MDIO buses. Software may get false read error when reading internal PCS registers through MDIO. As a workaround, all internal MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi | 1 + arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi | 1 + 18 files changed, 18 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; From 743bc2ab644f8d2c85f4021e1d2d72dd39d1ace8 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 15:20:29 +0200 Subject: [PATCH 2937/3715] net/fsl: treat fsl,erratum-a011043 [ Upstream commit 1d3ca681b9d9575ccf696ebc2840a1ebb1fd4074 ] When fsl,erratum-a011043 is set, adjust for erratum A011043: MDIO reads to internal PCS registers may result in having the MDIO_CFG[MDIO_RD_ER] bit set, even when there is no error and read data (MDIO_DATA[MDIO_DATA]) is correct. Software may get false read error when reading internal PCS registers through MDIO. As a workaround, all internal MDIO accesses should ignore the MDIO_CFG[MDIO_RD_ER] bit. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/xgmac_mdio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index e03b30c60dcf..c82c85ef5fb3 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -49,6 +49,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a011043; }; static u32 xgmac_read32(void __iomem *regs, @@ -226,7 +227,8 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; /* Return all Fs if nothing was there */ - if (xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) { + if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && + !priv->has_a011043) { dev_err(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); @@ -274,6 +276,9 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = of_property_read_bool(pdev->dev.of_node, "little-endian"); + priv->has_a011043 = of_property_read_bool(pdev->dev.of_node, + "fsl,erratum-a011043"); + ret = of_mdiobus_register(bus, np); if (ret) { dev_err(&pdev->dev, "cannot register MDIO bus\n"); From 5d47f5ed5a1a4d08cd889f5b5fcacc1c0285b5bd Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 22 Jan 2020 16:15:14 +0200 Subject: [PATCH 2938/3715] net: fsl/fman: rename IF_MODE_XGMII to IF_MODE_10G [ Upstream commit 457bfc0a4bf531487ecc3cf82ec728a5e114fb1e ] As the only 10G PHY interface type defined at the moment the code was developed was XGMII, although the PHY interface mode used was not XGMII, XGMII was used in the code to denote 10G. This patch renames the 10G interface mode to remove the ambiguity. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/fman_memac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 75ce773c21a6..b33650a897f1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -110,7 +110,7 @@ do { \ /* Interface Mode Register (IF_MODE) */ #define IF_MODE_MASK 0x00000003 /* 30-31 Mask on i/f mode bits */ -#define IF_MODE_XGMII 0x00000000 /* 30-31 XGMII (10G) interface */ +#define IF_MODE_10G 0x00000000 /* 30-31 10G interface */ #define IF_MODE_GMII 0x00000002 /* 30-31 GMII (1G) interface */ #define IF_MODE_RGMII 0x00000004 #define IF_MODE_RGMII_AUTO 0x00008000 @@ -439,7 +439,7 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg, tmp = 0; switch (phy_if) { case PHY_INTERFACE_MODE_XGMII: - tmp |= IF_MODE_XGMII; + tmp |= IF_MODE_10G; break; default: tmp |= IF_MODE_GMII; From 3c8a4483d801cb8fd8eaf74faacfd6eebece7139 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: [PATCH 2939/3715] net/sonic: Add mutual exclusion for accessing shared state [ Upstream commit 865ad2f2201dc18685ba2686f13217f8b3a9c52c ] The netif_stop_queue() call in sonic_send_packet() races with the netif_wake_queue() call in sonic_interrupt(). This causes issues like "NETDEV WATCHDOG: eth0 (macsonic): transmit queue 0 timed out". Fix this by disabling interrupts when accessing tx_skb[] and next_tx. Update a comment to clarify the synchronization properties. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.c | 49 ++++++++++++++++++++-------- drivers/net/ethernet/natsemi/sonic.h | 1 + 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index a051dddcbd76..7aa7f8050d44 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -50,6 +50,8 @@ static int sonic_open(struct net_device *dev) if (sonic_debug > 2) printk("sonic_open: initializing sonic driver.\n"); + spin_lock_init(&lp->lock); + for (i = 0; i < SONIC_NUM_RRS; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (skb == NULL) { @@ -194,8 +196,6 @@ static void sonic_tx_timeout(struct net_device *dev) * wake the tx queue * Concurrently with all of this, the SONIC is potentially writing to * the status flags of the TDs. - * Until some mutual exclusion is added, this code will not work with SMP. However, - * MIPS Jazz machines and m68k Macs were all uni-processor machines. */ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) @@ -203,7 +203,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); dma_addr_t laddr; int length; - int entry = lp->next_tx; + int entry; + unsigned long flags; if (sonic_debug > 2) printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev); @@ -226,6 +227,10 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + spin_lock_irqsave(&lp->lock, flags); + + entry = lp->next_tx; + sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1); /* single fragment */ sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */ @@ -235,10 +240,6 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) sonic_tda_put(dev, entry, SONIC_TD_LINK, sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL); - /* - * Must set tx_skb[entry] only after clearing status, and - * before clearing EOL and before stopping queue - */ wmb(); lp->tx_len[entry] = length; lp->tx_laddr[entry] = laddr; @@ -263,6 +264,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); + spin_unlock_irqrestore(&lp->lock, flags); + return NETDEV_TX_OK; } @@ -275,9 +278,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct sonic_local *lp = netdev_priv(dev); int status; + unsigned long flags; + + /* The lock has two purposes. Firstly, it synchronizes sonic_interrupt() + * with sonic_send_packet() so that the two functions can share state. + * Secondly, it makes sonic_interrupt() re-entrant, as that is required + * by macsonic which must use two IRQs with different priority levels. + */ + spin_lock_irqsave(&lp->lock, flags); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + if (!status) { + spin_unlock_irqrestore(&lp->lock, flags); - if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) return IRQ_NONE; + } do { if (status & SONIC_INT_PKTRX) { @@ -292,11 +307,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) int td_status; int freed_some = 0; - /* At this point, cur_tx is the index of a TD that is one of: - * unallocated/freed (status set & tx_skb[entry] clear) - * allocated and sent (status set & tx_skb[entry] set ) - * allocated and not yet sent (status clear & tx_skb[entry] set ) - * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) + /* The state of a Transmit Descriptor may be inferred + * from { tx_skb[entry], td_status } as follows. + * { clear, clear } => the TD has never been used + * { set, clear } => the TD was handed to SONIC + * { set, set } => the TD was handed back + * { clear, set } => the TD is available for re-use */ if (sonic_debug > 2) @@ -398,7 +414,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* load CAM done */ if (status & SONIC_INT_LCD) SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + } while (status); + + spin_unlock_irqrestore(&lp->lock, flags); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 421b1a283fed..944f4830c4a1 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -321,6 +321,7 @@ struct sonic_local { unsigned int next_tx; /* next free TD */ struct device *device; /* generic device */ struct net_device_stats stats; + spinlock_t lock; }; #define TX_TIMEOUT (3 * HZ) From 3a8debf8b1ee9b3d87df1bdd86883717b793b9be Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: [PATCH 2940/3715] net/sonic: Use MMIO accessors [ Upstream commit e3885f576196ddfc670b3d53e745de96ffcb49ab ] The driver accesses descriptor memory which is simultaneously accessed by the chip, so the compiler must not be allowed to re-order CPU accesses. sonic_buf_get() used 'volatile' to prevent that. sonic_buf_put() should have done so too but was overlooked. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 944f4830c4a1..7057760cb55c 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -343,30 +343,30 @@ static void sonic_tx_timeout(struct net_device *dev); as far as we can tell. */ /* OpenBSD calls this "SWO". I'd like to think that sonic_buf_put() is a much better name. */ -static inline void sonic_buf_put(void* base, int bitmode, +static inline void sonic_buf_put(u16 *base, int bitmode, int offset, __u16 val) { if (bitmode) #ifdef __BIG_ENDIAN - ((__u16 *) base + (offset*2))[1] = val; + __raw_writew(val, base + (offset * 2) + 1); #else - ((__u16 *) base + (offset*2))[0] = val; + __raw_writew(val, base + (offset * 2) + 0); #endif else - ((__u16 *) base)[offset] = val; + __raw_writew(val, base + (offset * 1) + 0); } -static inline __u16 sonic_buf_get(void* base, int bitmode, +static inline __u16 sonic_buf_get(u16 *base, int bitmode, int offset) { if (bitmode) #ifdef __BIG_ENDIAN - return ((volatile __u16 *) base + (offset*2))[1]; + return __raw_readw(base + (offset * 2) + 1); #else - return ((volatile __u16 *) base + (offset*2))[0]; + return __raw_readw(base + (offset * 2) + 0); #endif else - return ((volatile __u16 *) base)[offset]; + return __raw_readw(base + (offset * 1) + 0); } /* Inlines that you should actually use for reading/writing DMA buffers */ From 36db400a662d4651a7987f0ec3598de03eb951a8 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: [PATCH 2941/3715] net/sonic: Fix receive buffer handling [ Upstream commit 9e311820f67e740f4fb8dcb82b4c4b5b05bdd1a5 ] The SONIC can sometimes advance its rx buffer pointer (RRP register) without advancing its rx descriptor pointer (CRDA register). As a result the index of the current rx descriptor may not equal that of the current rx buffer. The driver mistakenly assumes that they are always equal. This assumption leads to incorrect packet lengths and possible packet duplication. Avoid this by calling a new function to locate the buffer corresponding to a given descriptor. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.c | 35 ++++++++++++++++++++++++---- drivers/net/ethernet/natsemi/sonic.h | 5 ++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 7aa7f8050d44..b6599aa22504 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -423,6 +423,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +/* Return the array index corresponding to a given Receive Buffer pointer. */ +static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, + unsigned int last) +{ + unsigned int i = last; + + do { + i = (i + 1) & SONIC_RRS_MASK; + if (addr == lp->rx_laddr[i]) + return i; + } while (i != last); + + return -ENOENT; +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -442,6 +457,16 @@ static void sonic_rx(struct net_device *dev) status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); if (status & SONIC_RCR_PRX) { + u32 addr = (sonic_rda_get(dev, entry, + SONIC_RD_PKTPTR_H) << 16) | + sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); + int i = index_from_addr(lp, addr, entry); + + if (i < 0) { + WARN_ONCE(1, "failed to find buffer!\n"); + break; + } + /* Malloc up new buffer. */ new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (new_skb == NULL) { @@ -463,7 +488,7 @@ static void sonic_rx(struct net_device *dev) /* now we have a new skb to replace it, pass the used one up the stack */ dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[entry]; + used_skb = lp->rx_skb[i]; pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); skb_trim(used_skb, pkt_len); used_skb->protocol = eth_type_trans(used_skb, dev); @@ -472,13 +497,13 @@ static void sonic_rx(struct net_device *dev) lp->stats.rx_bytes += pkt_len; /* and insert the new skb */ - lp->rx_laddr[entry] = new_laddr; - lp->rx_skb[entry] = new_skb; + lp->rx_laddr[i] = new_laddr; + lp->rx_skb[i] = new_skb; bufadr_l = (unsigned long)new_laddr & 0xffff; bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ lp->stats.rx_errors++; diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 7057760cb55c..83905eee6960 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -274,8 +274,9 @@ #define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ #define SONIC_NUM_TDS 16 /* number of transmit descriptors */ -#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) -#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RRS_MASK (SONIC_NUM_RRS - 1) +#define SONIC_RDS_MASK (SONIC_NUM_RDS - 1) +#define SONIC_TDS_MASK (SONIC_NUM_TDS - 1) #define SONIC_RBSIZE 1520 /* size of one resource buffer */ From 772c96a03b0d0e45170cf8352078fcbf9ba9dd7c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 23 Jan 2020 09:07:26 +1100 Subject: [PATCH 2942/3715] net/sonic: Quiesce SONIC before re-initializing descriptor memory [ Upstream commit 3f4b7e6a2be982fd8820a2b54d46dd9c351db899 ] Make sure the SONIC's DMA engine is idle before altering the transmit and receive descriptors. Add a helper for this as it will be needed again. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/natsemi/sonic.c | 25 +++++++++++++++++++++++++ drivers/net/ethernet/natsemi/sonic.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b6599aa22504..254e6dbc4c6a 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -103,6 +103,24 @@ static int sonic_open(struct net_device *dev) return 0; } +/* Wait for the SONIC to become idle. */ +static void sonic_quiesce(struct net_device *dev, u16 mask) +{ + struct sonic_local * __maybe_unused lp = netdev_priv(dev); + int i; + u16 bits; + + for (i = 0; i < 1000; ++i) { + bits = SONIC_READ(SONIC_CMD) & mask; + if (!bits) + return; + if (irqs_disabled() || in_interrupt()) + udelay(20); + else + usleep_range(100, 200); + } + WARN_ONCE(1, "command deadline expired! 0x%04x\n", bits); +} /* * Close the SONIC device @@ -120,6 +138,9 @@ static int sonic_close(struct net_device *dev) /* * stop the SONIC, disable interrupts */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -159,6 +180,9 @@ static void sonic_tx_timeout(struct net_device *dev) * put the Sonic into software-reset mode and * disable all interrupts before releasing DMA buffers */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -638,6 +662,7 @@ static int sonic_init(struct net_device *dev) */ SONIC_WRITE(SONIC_CMD, 0); SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); /* * initialize the receive resource area diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 83905eee6960..7dc011655e70 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -110,6 +110,9 @@ #define SONIC_CR_TXP 0x0002 #define SONIC_CR_HTX 0x0001 +#define SONIC_CR_ALL (SONIC_CR_LCAM | SONIC_CR_RRRA | \ + SONIC_CR_RXEN | SONIC_CR_TXP) + /* * SONIC data configuration bits */ From b2fd13e9ced42f35deb3faa0b65966ec9fd237a4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:08 +0300 Subject: [PATCH 2943/3715] seq_tab_next() should increase position index [ Upstream commit 70a87287c821e9721b62463777f55ba588ac4623 ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 9e5cd18e7358..8bd90ad15607 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -66,8 +66,7 @@ static void *seq_tab_start(struct seq_file *seq, loff_t *pos) static void *seq_tab_next(struct seq_file *seq, void *v, loff_t *pos) { v = seq_tab_get_idx(seq->private, *pos + 1); - if (v) - ++*pos; + ++(*pos); return v; } From 964646964ef4028148b5916f6ad5b55a2d9c9152 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:13 +0300 Subject: [PATCH 2944/3715] l2t_seq_next should increase position index [ Upstream commit 66018a102f7756cf72db4d2704e1b93969d9d332 ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index f7ef8871dd0b..67aa3c997417 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -682,8 +682,7 @@ static void *l2t_seq_start(struct seq_file *seq, loff_t *pos) static void *l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = l2t_get_idx(seq, *pos); - if (v) - ++*pos; + ++(*pos); return v; } From 9b7a4bed4c7894442fd0f339b84e022a02174c5d Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Thu, 23 Jan 2020 12:33:28 -0800 Subject: [PATCH 2945/3715] net: Fix skb->csum update in inet_proto_csum_replace16(). [ Upstream commit 189c9b1e94539b11c80636bc13e9cf47529e7bba ] skb->csum is updated incorrectly, when manipulation for NF_NAT_MANIP_SRC\DST is done on IPV6 packet. Fix: There is no need to update skb->csum in inet_proto_csum_replace16(), because update in two fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to update skb->csum, because update in 3 fields a.) IPv4 src/dst address, b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as L4 Header checksum for skb->csum calculation. [ pablo@netfilter.org: a few comestic documentation edits ] Signed-off-by: Praveen Chaudhary Signed-off-by: Zhenggen Xu Signed-off-by: Andy Stracner Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/core/utils.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/core/utils.c b/net/core/utils.c index 93066bd0305a..b1823e76b877 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -419,6 +419,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +/** + * inet_proto_csum_replace16 - update layer 4 header checksum field + * @sum: Layer 4 header checksum field + * @skb: sk_buff for the packet + * @from: old IPv6 address + * @to: new IPv6 address + * @pseudohdr: True if layer 4 header checksum includes pseudoheader + * + * Update layer 4 header as per the update in IPv6 src/dst address. + * + * There is no need to update skb->csum in this function, because update in two + * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other + * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to + * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, + * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as + * L4 Header checksum for skb->csum calculation. + */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) @@ -430,9 +447,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial(diff, sizeof(diff), - ~skb->csum); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); From e1404bf5369466ec41eb57852ccf5c24d5c69fe4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 31 Jan 2020 09:31:05 -0500 Subject: [PATCH 2946/3715] btrfs: do not zero f_bavail if we have available space commit d55966c4279bfc6a0cf0b32bf13f5df228a1eeb6 upstream. There was some logic added a while ago to clear out f_bavail in statfs() if we did not have enough free metadata space to satisfy our global reserve. This was incorrect at the time, however didn't really pose a problem for normal file systems because we would often allocate chunks if we got this low on free metadata space, and thus wouldn't really hit this case unless we were actually full. Fast forward to today and now we are much better about not allocating metadata chunks all of the time. Couple this with d792b0f19711 ("btrfs: always reserve our entire size for the global reserve") which now means we'll easily have a larger global reserve than our free space, we are now more likely to trip over this while still having plenty of space. Fix this by skipping this logic if the global rsv's space_info is not full. space_info->full is 0 unless we've attempted to allocate a chunk for that space_info and that has failed. If this happens then the space for the global reserve is definitely sacred and we need to report b_avail == 0, but before then we can just use our calculated b_avail. Reported-by: Martin Steigerwald Fixes: ca8a51b3a979 ("btrfs: statfs: report zero available if metadata are exhausted") CC: stable@vger.kernel.org # 4.5+ Reviewed-by: Qu Wenruo Tested-By: Martin Steigerwald Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 204d585e012a..3ab79fa00dc7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2114,7 +2114,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ thresh = 4 * 1024 * 1024; - if (!mixed && total_free_meta - thresh < block_rsv->size) + /* + * We only want to claim there's no available space if we can no longer + * allocate chunks for our metadata profile and our global reserve will + * not fit in the free metadata space. If we aren't ->full then we + * still can allocate chunks and thus are fine using the currently + * calculated f_bavail. + */ + if (!mixed && block_rsv->space_info->full && + total_free_meta - thresh < block_rsv->size) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; From 5008c125d5535e7ff13085553b0ec55547ef72a3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 8 Jan 2020 03:17:45 +0800 Subject: [PATCH 2947/3715] perf report: Fix no libunwind compiled warning break s390 issue [ Upstream commit c3314a74f86dc00827e0945c8e5039fc3aebaa3c ] Commit 800d3f561659 ("perf report: Add warning when libunwind not compiled in") breaks the s390 platform. S390 uses libdw-dwarf-unwind for call chain unwinding and had no support for libunwind. So the warning "Please install libunwind development packages during the perf build." caused the confusion even if the call-graph is displayed correctly. This patch adds checking for HAVE_DWARF_SUPPORT, which is set when libdw-dwarf-unwind is compiled in. Fixes: 800d3f561659 ("perf report: Add warning when libunwind not compiled in") Signed-off-by: Jin Yao Reviewed-by: Thomas Richter Tested-by: Thomas Richter Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200107191745.18415-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 17b26661b2f6..429c3e140dc3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -342,10 +342,10 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#ifndef HAVE_LIBUNWIND_SUPPORT +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) if (dwarf_callchain_users) { - ui__warning("Please install libunwind development packages " - "during the perf build.\n"); + ui__warning("Please install libunwind or libdw " + "development packages during the perf build.\n"); } #endif From e0f8b8a65a473a8baa439cf865a694bbeb83fe90 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Feb 2020 14:18:29 +0000 Subject: [PATCH 2948/3715] Linux 4.14.170 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 795d93bfe156..b614291199f8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 169 +SUBLEVEL = 170 EXTRAVERSION = NAME = Petit Gorille From d0d1db1b77fa7e29868657268111518b99567afd Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 5 Feb 2020 09:30:55 -0800 Subject: [PATCH 2949/3715] ANDROID: Incremental fs: Remove C++-style comments Change-Id: I89e1dc6020e596fb36694f8646f78b98f7ad4a7f Bug: 133435829 Signed-off-by: Paul Lawrence --- fs/incfs/vfs.c | 14 +++++++------- include/uapi/linux/incrementalfs.h | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index e4790189abd3..dfe4ec44f3c4 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -868,9 +868,10 @@ static struct signature_info *incfs_copy_signature_info_from_user( goto err; } - // TODO this sets the root_hash length to MAX_HASH_SIZE not - // the actual size. Fix, then set INCFS_MAX_HASH_SIZE back - // to 64 + /* TODO this sets the root_hash length to MAX_HASH_SIZE not + * the actual size. Fix, then set INCFS_MAX_HASH_SIZE back + * to 64 + */ result->root_hash = range(p, INCFS_MAX_HASH_SIZE); if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), result->root_hash.len) > 0) { @@ -1005,8 +1006,9 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, goto out; } - // TODO This code seems wrong when len is zero - we - // should error out?? + /* TODO This code seems wrong when len is zero - we + * should error out?? + */ if (si->signature.len > 0) error = incfs_validate_pkcs7_signature( si->signature, @@ -1114,8 +1116,6 @@ static int dir_relative_path_resolve( LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); out: - // TODO sys_close should be replaced with ksys_close on later kernel - // Add to compat or some such? sys_close(dir_fd); if (error) pr_debug("incfs: %s %d\n", __func__, error); diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 8a06e2e48fc4..b257b9f0ec3f 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -22,8 +22,9 @@ #define INCFS_DATA_FILE_BLOCK_SIZE 4096 #define INCFS_HEADER_VER 1 -// TODO: This value is assumed in incfs_copy_signature_info_from_user to be the -// actual signature length. Set back to 64 when fixed. +/* TODO: This value is assumed in incfs_copy_signature_info_from_user to be the + * actual signature length. Set back to 64 when fixed. + */ #define INCFS_MAX_HASH_SIZE 32 #define INCFS_MAX_FILE_ATTR_SIZE 512 From bc5e5bc1d007e99228ca0717daa12639627819ba Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Fri, 31 Jan 2020 11:36:29 -0800 Subject: [PATCH 2950/3715] ANDROID: Incremental fs: Make files writeable - added chmod() to +0222 to make all backing files and dirs writable. vold/system_server have a umask that clears those flags, making incfs unusable Signed-off-by: Yurii Zubrytskyi Signed-off-by: Paul Lawrence Bug: 133435829 Change-Id: Id9258401570cc2cc7cd5735aace89d379a9b043d --- fs/incfs/vfs.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index dfe4ec44f3c4..1f7e49fba755 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -350,7 +350,6 @@ static int inode_set(struct inode *inode, void *opaque) struct dentry *backing_dentry = search->backing_dentry; struct inode *backing_inode = d_inode(backing_dentry); - inode_init_owner(inode, NULL, backing_inode->i_mode); fsstack_copy_attr_all(inode, backing_inode); if (S_ISREG(inode->i_mode)) { u64 size = read_size_attr(backing_dentry); @@ -382,6 +381,7 @@ static int inode_set(struct inode *inode, void *opaque) pr_warn("incfs: ino conflict with backing FS %ld\n", backing_inode->i_ino); } + return 0; } else if (search->ino == INCFS_PENDING_READS_INODE) { /* It's an inode for .pending_reads pseudo file. */ @@ -1140,6 +1140,27 @@ static int validate_name(char *file_name) return 0; } +static int chmod(struct dentry *dentry, umode_t mode) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + struct iattr newattrs; + int error; + +retry_deleg: + inode_lock(inode); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + error = notify_change(dentry, &newattrs, &delegated_inode); + inode_unlock(inode); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + return error; +} + static long ioctl_create_file(struct mount_info *mi, struct incfs_new_file_args __user *usr_args) { @@ -1240,8 +1261,8 @@ static long ioctl_create_file(struct mount_info *mi, /* Creating a file in the .index dir. */ index_dir_inode = d_inode(mi->mi_index_dir); inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); - error = vfs_create(index_dir_inode, index_file_dentry, - args.mode, true); + error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, + true); inode_unlock(index_dir_inode); if (error) @@ -1251,6 +1272,12 @@ static long ioctl_create_file(struct mount_info *mi, goto out; } + error = chmod(index_file_dentry, args.mode | 0222); + if (error) { + pr_debug("incfs: chmod err: %d\n", error); + goto delete_index_file; + } + /* Save the file's ID as an xattr for easy fetching in future. */ error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, file_id_str, strlen(file_id_str), XATTR_CREATE); @@ -1540,7 +1567,7 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); - err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode); + err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode | 0222); inode_unlock(dir_node->n_backing_inode); if (!err) { struct inode *inode = NULL; From 334164ca0f18ea89a922b90020f5e3840a928503 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 5 Feb 2020 08:50:12 -0800 Subject: [PATCH 2951/3715] ANDROID: Incremental fs: Fix crash on failed lookup Don't call dput on error code Change-Id: Ie63645c9ed67fa231829917ae8ca154e049b4921 Signed-off-by: Paul Lawrence Bug: 133435829 --- fs/incfs/vfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 1f7e49fba755..aebd2b02bd83 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -1484,6 +1484,7 @@ static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, err = IS_ERR(backing_dentry) ? PTR_ERR(backing_dentry) : -EFAULT; + backing_dentry = NULL; goto out; } else { struct inode *inode = NULL; From 9771d9b3779d1f8149f04e70f89fd04f5600051e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:20 +0200 Subject: [PATCH 2952/3715] BACKPORT: atomic: Add irqsave variant of atomic_dec_and_lock() There are in-tree users of atomic_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of atomic_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-3-bigeasy@linutronix.de (cherry picked from commit ccfbb5bed407053b27492a9adc06064d949a9aa6) BUG: 137270441 Change-Id: I49051801a73ac0de11d71bfa23930ce4826f7c61 Signed-off-by: Satya Tangirala --- include/linux/spinlock.h | 5 +++++ lib/dec_and_lock.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 341e1a12bfc7..b5a6719c5b04 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -421,4 +421,9 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ + __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) + #endif /* __LINUX_SPINLOCK_H */ diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); From 66b5609826d60f80623643f1a7a1d865b5233f19 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:23 -0700 Subject: [PATCH 2953/3715] BACKPORT: FROMLIST: block: Keyslot Manager for Inline Encryption Inline Encryption hardware allows software to specify an encryption context (an encryption key, crypto algorithm, data unit num, data unit size, etc.) along with a data transfer request to a storage device, and the inline encryption hardware will use that context to en/decrypt the data. The inline encryption hardware is part of the storage device, and it conceptually sits on the data path between system memory and the storage device. Inline Encryption hardware implementations often function around the concept of "keyslots". These implementations often have a limited number of "keyslots", each of which can hold an encryption context (we say that an encryption context can be "programmed" into a keyslot). Requests made to the storage device may have a keyslot associated with them, and the inline encryption hardware will en/decrypt the data in the requests using the encryption context programmed into that associated keyslot. As keyslots are limited, and programming keys may be expensive in many implementations, and multiple requests may use exactly the same encryption contexts, we introduce a Keyslot Manager to efficiently manage keyslots. The keyslot manager also functions as the interface that upper layers will use to program keys into inline encryption hardware. For more information on the Keyslot Manager, refer to documentation found in block/keyslot-manager.c and linux/keyslot-manager.h. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: Iea1ee5a7eec46cb50d33cf1e2d20dfb7335af4ed Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-2-satyat@google.com/ --- block/Kconfig | 8 + block/Makefile | 1 + block/keyslot-manager.c | 353 ++++++++++++++++++++++++++++++++ include/linux/bio.h | 5 + include/linux/blkdev.h | 6 + include/linux/keyslot-manager.h | 98 +++++++++ 6 files changed, 471 insertions(+) create mode 100644 block/keyslot-manager.c create mode 100644 include/linux/keyslot-manager.h diff --git a/block/Kconfig b/block/Kconfig index 28ec55752b68..960e7fdf6214 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -184,6 +184,14 @@ config BLK_SED_OPAL Enabling this option enables users to setup/unlock/lock Locking ranges for SED devices using the Opal protocol. +config BLK_INLINE_ENCRYPTION + bool "Enable inline encryption support in block layer" + help + Build the blk-crypto subsystem. + Enabling this lets the block layer handle encryption, + so users can take advantage of inline encryption + hardware if present. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/Makefile b/block/Makefile index 6a56303b9925..a52d8d22077c 100644 --- a/block/Makefile +++ b/block/Makefile @@ -35,3 +35,4 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c new file mode 100644 index 000000000000..bcc0c9e77e59 --- /dev/null +++ b/block/keyslot-manager.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * keyslot-manager.c + * + * Copyright 2019 Google LLC + */ + +/** + * DOC: The Keyslot Manager + * + * Many devices with inline encryption support have a limited number of "slots" + * into which encryption contexts may be programmed, and requests can be tagged + * with a slot number to specify the key to use for en/decryption. + * + * As the number of slots are limited, and programming keys is expensive on + * many inline encryption hardware, we don't want to program the same key into + * multiple slots - if multiple requests are using the same key, we want to + * program just one slot with that key and use that slot for all requests. + * + * The keyslot manager manages these keyslots appropriately, and also acts as + * an abstraction between the inline encryption hardware and the upper layers. + * + * Lower layer devices will set up a keyslot manager in their request queue + * and tell it how to perform device specific operations like programming/ + * evicting keys from keyslots. + * + * Upper layers will call keyslot_manager_get_slot_for_key() to program a + * key into some slot in the inline encryption hardware. + */ +#include +#include +#include +#include +#include +#include + +struct keyslot { + atomic_t slot_refs; + struct list_head idle_slot_node; +}; + +struct keyslot_manager { + unsigned int num_slots; + atomic_t num_idle_slots; + struct keyslot_mgmt_ll_ops ksm_ll_ops; + void *ll_priv_data; + + /* Protects programming and evicting keys from the device */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; + struct list_head idle_slots; + spinlock_t idle_slots_lock; + + /* Per-keyslot data */ + struct keyslot slots[]; +}; + +/** + * keyslot_manager_create() - Create a keyslot manager + * @num_slots: The number of key slots to manage. + * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot + * manager will use to perform operations like programming and + * evicting keys. + * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. + * + * Allocate memory for and initialize a keyslot manager. Called by e.g. + * storage drivers to set up a keyslot manager in their request_queue. + * + * Context: May sleep + * Return: Pointer to constructed keyslot manager or NULL on error. + */ +struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, + const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + void *ll_priv_data) +{ + struct keyslot_manager *ksm; + int slot; + + if (num_slots == 0) + return NULL; + + /* Check that all ops are specified */ + if (ksm_ll_ops->keyslot_program == NULL || + ksm_ll_ops->keyslot_evict == NULL || + ksm_ll_ops->crypto_mode_supported == NULL || + ksm_ll_ops->keyslot_find == NULL) + return NULL; + + ksm = kvzalloc(struct_size(ksm, slots, num_slots), GFP_KERNEL); + if (!ksm) + return NULL; + + ksm->num_slots = num_slots; + atomic_set(&ksm->num_idle_slots, num_slots); + ksm->ksm_ll_ops = *ksm_ll_ops; + ksm->ll_priv_data = ll_priv_data; + + init_rwsem(&ksm->lock); + + init_waitqueue_head(&ksm->idle_slots_wait_queue); + INIT_LIST_HEAD(&ksm->idle_slots); + + for (slot = 0; slot < num_slots; slot++) { + list_add_tail(&ksm->slots[slot].idle_slot_node, + &ksm->idle_slots); + } + + spin_lock_init(&ksm->idle_slots_lock); + + return ksm; +} +EXPORT_SYMBOL(keyslot_manager_create); + +static void remove_slot_from_lru_list(struct keyslot_manager *ksm, int slot) +{ + unsigned long flags; + + spin_lock_irqsave(&ksm->idle_slots_lock, flags); + list_del(&ksm->slots[slot].idle_slot_node); + spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); + + atomic_dec(&ksm->num_idle_slots); +} + +static int find_and_grab_keyslot(struct keyslot_manager *ksm, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + int slot; + + slot = ksm->ksm_ll_ops.keyslot_find(ksm->ll_priv_data, key, + crypto_mode, data_unit_size); + if (slot < 0) + return slot; + if (WARN_ON(slot >= ksm->num_slots)) + return -EINVAL; + if (atomic_inc_return(&ksm->slots[slot].slot_refs) == 1) { + /* Took first reference to this slot; remove it from LRU list */ + remove_slot_from_lru_list(ksm, slot); + } + return slot; +} + +/** + * keyslot_manager_get_slot_for_key() - Program a key into a keyslot. + * @ksm: The keyslot manager to program the key into. + * @key: Pointer to the bytes of the key to program. Must be the correct length + * for the chosen @crypto_mode; see blk_crypto_modes in blk-crypto.c. + * @crypto_mode: Identifier for the encryption algorithm to use. + * @data_unit_size: The data unit size to use for en/decryption. + * + * Get a keyslot that's been programmed with the specified key, crypto_mode, and + * data_unit_size. If one already exists, return it with incremented refcount. + * Otherwise, wait for a keyslot to become idle and program it. + * + * Context: Process context. Takes and releases ksm->lock. + * Return: The keyslot on success, else a -errno value. + */ +int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + int slot; + int err; + struct keyslot *idle_slot; + + down_read(&ksm->lock); + slot = find_and_grab_keyslot(ksm, key, crypto_mode, data_unit_size); + up_read(&ksm->lock); + if (slot != -ENOKEY) + return slot; + + for (;;) { + down_write(&ksm->lock); + slot = find_and_grab_keyslot(ksm, key, crypto_mode, + data_unit_size); + if (slot != -ENOKEY) { + up_write(&ksm->lock); + return slot; + } + + /* + * If we're here, that means there wasn't a slot that was + * already programmed with the key. So try to program it. + */ + if (atomic_read(&ksm->num_idle_slots) > 0) + break; + + up_write(&ksm->lock); + wait_event(ksm->idle_slots_wait_queue, + (atomic_read(&ksm->num_idle_slots) > 0)); + } + + idle_slot = list_first_entry(&ksm->idle_slots, struct keyslot, + idle_slot_node); + slot = idle_slot - ksm->slots; + + err = ksm->ksm_ll_ops.keyslot_program(ksm->ll_priv_data, key, + crypto_mode, + data_unit_size, + slot); + + if (err) { + wake_up(&ksm->idle_slots_wait_queue); + up_write(&ksm->lock); + return err; + } + + atomic_set(&ksm->slots[slot].slot_refs, 1); + remove_slot_from_lru_list(ksm, slot); + + up_write(&ksm->lock); + return slot; + +} +EXPORT_SYMBOL(keyslot_manager_get_slot_for_key); + +/** + * keyslot_manager_get_slot() - Increment the refcount on the specified slot. + * @ksm - The keyslot manager that we want to modify. + * @slot - The slot to increment the refcount of. + * + * This function assumes that there is already an active reference to that slot + * and simply increments the refcount. This is useful when cloning a bio that + * already has a reference to a keyslot, and we want the cloned bio to also have + * its own reference. + * + * Context: Any context. + */ +void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot) +{ + if (WARN_ON(slot >= ksm->num_slots)) + return; + + WARN_ON(atomic_inc_return(&ksm->slots[slot].slot_refs) < 2); +} +EXPORT_SYMBOL(keyslot_manager_get_slot); + +/** + * keyslot_manager_put_slot() - Release a reference to a slot + * @ksm: The keyslot manager to release the reference from. + * @slot: The slot to release the reference from. + * + * Context: Any context. + */ +void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) +{ + unsigned long flags; + + if (WARN_ON(slot >= ksm->num_slots)) + return; + + if (atomic_dec_and_lock_irqsave(&ksm->slots[slot].slot_refs, + &ksm->idle_slots_lock, flags)) { + list_add_tail(&ksm->slots[slot].idle_slot_node, + &ksm->idle_slots); + spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); + atomic_inc(&ksm->num_idle_slots); + wake_up(&ksm->idle_slots_wait_queue); + } +} +EXPORT_SYMBOL(keyslot_manager_put_slot); + +/** + * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode/data + * unit size combination is supported + * by a ksm. + * @ksm - The keyslot manager to check + * @crypto_mode - The crypto mode to check for. + * @data_unit_size - The data_unit_size for the mode. + * + * Calls and returns the result of the crypto_mode_supported function specified + * by the ksm. + * + * Context: Process context. + * Return: Whether or not this ksm supports the specified crypto_mode/ + * data_unit_size combo. + */ +bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + if (!ksm) + return false; + return ksm->ksm_ll_ops.crypto_mode_supported(ksm->ll_priv_data, + crypto_mode, + data_unit_size); +} +EXPORT_SYMBOL(keyslot_manager_crypto_mode_supported); + +bool keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + return keyslot_manager_crypto_mode_supported(q->ksm, crypto_mode, + data_unit_size); +} +EXPORT_SYMBOL(keyslot_manager_rq_crypto_mode_supported); + +/** + * keyslot_manager_evict_key() - Evict a key from the lower layer device. + * @ksm - The keyslot manager to evict from + * @key - The key to evict + * @crypto_mode - The crypto algorithm the key was programmed with. + * @data_unit_size - The data_unit_size the key was programmed with. + * + * Finds the slot that the specified key, crypto_mode, data_unit_size combo + * was programmed into, and evicts that slot from the lower layer device if + * the refcount on the slot is 0. Returns -EBUSY if the refcount is not 0, and + * -errno on error. + * + * Context: Process context. Takes and releases ksm->lock. + */ +int keyslot_manager_evict_key(struct keyslot_manager *ksm, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + int slot; + int err = 0; + + down_write(&ksm->lock); + slot = ksm->ksm_ll_ops.keyslot_find(ksm->ll_priv_data, key, + crypto_mode, + data_unit_size); + + if (slot < 0) { + up_write(&ksm->lock); + return slot; + } + + if (atomic_read(&ksm->slots[slot].slot_refs) == 0) { + err = ksm->ksm_ll_ops.keyslot_evict(ksm->ll_priv_data, key, + crypto_mode, + data_unit_size, + slot); + } else { + err = -EBUSY; + } + + up_write(&ksm->lock); + return err; +} +EXPORT_SYMBOL(keyslot_manager_evict_key); + +void keyslot_manager_destroy(struct keyslot_manager *ksm) +{ + kvfree(ksm); +} +EXPORT_SYMBOL(keyslot_manager_destroy); diff --git a/include/linux/bio.h b/include/linux/bio.h index e260f000b9ac..990cdd236b34 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -595,6 +595,11 @@ static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +enum blk_crypto_mode_num { + BLK_ENCRYPTION_MODE_INVALID = 0, + BLK_ENCRYPTION_MODE_AES_256_XTS = 1, +}; + /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad940102451c..909af748ea1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,6 +43,7 @@ struct pr_ops; struct rq_wb; struct blk_queue_stats; struct blk_stat_callback; +struct keyslot_manager; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -542,6 +543,11 @@ struct request_queue { */ unsigned int request_fn_active; +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + /* Inline crypto capabilities */ + struct keyslot_manager *ksm; +#endif + unsigned int rq_timeout; int poll_nsec; diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h new file mode 100644 index 000000000000..0777ade7907c --- /dev/null +++ b/include/linux/keyslot-manager.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#include + +#ifdef CONFIG_BLOCK + +#ifndef __LINUX_KEYSLOT_MANAGER_H +#define __LINUX_KEYSLOT_MANAGER_H + +/** + * struct keyslot_mgmt_ll_ops - functions to manage keyslots in hardware + * @keyslot_program: Program the specified key and algorithm into the + * specified slot in the inline encryption hardware. + * @keyslot_evict: Evict key from the specified keyslot in the hardware. + * The key, crypto_mode and data_unit_size are also passed + * down so that e.g. dm layers can evict keys from + * the devices that they map over. + * Returns 0 on success, -errno otherwise. + * @crypto_mode_supported: Check whether a crypto_mode and data_unit_size + * combo is supported. + * @keyslot_find: Returns the slot number that matches the key, + * or -ENOKEY if no match found, or -errno on + * error. + * + * This structure should be provided by storage device drivers when they set up + * a keyslot manager - this structure holds the function ptrs that the keyslot + * manager will use to manipulate keyslots in the hardware. + */ +struct keyslot_mgmt_ll_ops { + int (*keyslot_program)(void *ll_priv_data, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot); + int (*keyslot_evict)(void *ll_priv_data, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot); + bool (*crypto_mode_supported)(void *ll_priv_data, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + int (*keyslot_find)(void *ll_priv_data, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); +}; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +struct keyslot_manager; + +extern struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, + const struct keyslot_mgmt_ll_ops *ksm_ops, + void *ll_priv_data); + +extern int +keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +extern void keyslot_manager_get_slot(struct keyslot_manager *ksm, + unsigned int slot); + +extern void keyslot_manager_put_slot(struct keyslot_manager *ksm, + unsigned int slot); + +extern bool +keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +extern bool +keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +extern int keyslot_manager_evict_key(struct keyslot_manager *ksm, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +extern void keyslot_manager_destroy(struct keyslot_manager *ksm); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline bool +keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + return false; +} +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#endif /* __LINUX_KEYSLOT_MANAGER_H */ + +#endif /* CONFIG_BLOCK */ From 138adbbe5e4bfb6dee0571261f4d96a98f71d228 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:24 -0700 Subject: [PATCH 2954/3715] BACKPORT: FROMLIST: block: Add encryption context to struct bio We must have some way of letting a storage device driver know what encryption context it should use for en/decrypting a request. However, it's the filesystem/fscrypt that knows about and manages encryption contexts. As such, when the filesystem layer submits a bio to the block layer, and this bio eventually reaches a device driver with support for inline encryption, the device driver will need to have been told the encryption context for that bio. We want to communicate the encryption context from the filesystem layer to the storage device along with the bio, when the bio is submitted to the block layer. To do this, we add a struct bio_crypt_ctx to struct bio, which can represent an encryption context (note that we can't use the bi_private field in struct bio to do this because that field does not function to pass information across layers in the storage stack). We also introduce various functions to manipulate the bio_crypt_ctx and make the bio/request merging logic aware of the bio_crypt_ctx. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I479de9ec13758f1978b34d897e6956e680caeb92 Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-3-satyat@google.com/ --- block/Makefile | 2 +- block/bio-crypt-ctx.c | 137 +++++++++++++++++++++ block/bio.c | 23 ++-- block/blk-core.c | 3 + block/blk-merge.c | 35 +++++- drivers/md/dm.c | 15 ++- include/linux/bio-crypt-ctx.h | 219 ++++++++++++++++++++++++++++++++++ include/linux/bio.h | 6 +- include/linux/blk_types.h | 6 + 9 files changed, 423 insertions(+), 23 deletions(-) create mode 100644 block/bio-crypt-ctx.c create mode 100644 include/linux/bio-crypt-ctx.h diff --git a/block/Makefile b/block/Makefile index a52d8d22077c..437a897c99e4 100644 --- a/block/Makefile +++ b/block/Makefile @@ -35,4 +35,4 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o -obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o bio-crypt-ctx.o diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c new file mode 100644 index 000000000000..aa3571f72ee7 --- /dev/null +++ b/block/bio-crypt-ctx.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include +#include +#include +#include + +static int num_prealloc_crypt_ctxs = 128; +static struct kmem_cache *bio_crypt_ctx_cache; +static mempool_t *bio_crypt_ctx_pool; + +int bio_crypt_ctx_init(void) +{ + bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0); + if (!bio_crypt_ctx_cache) + return -ENOMEM; + + bio_crypt_ctx_pool = mempool_create_slab_pool( + num_prealloc_crypt_ctxs, + bio_crypt_ctx_cache); + + if (!bio_crypt_ctx_pool) + return -ENOMEM; + + return 0; +} + +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask) +{ + return mempool_alloc(bio_crypt_ctx_pool, gfp_mask); +} +EXPORT_SYMBOL(bio_crypt_alloc_ctx); + +void bio_crypt_free_ctx(struct bio *bio) +{ + mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool); + bio->bi_crypt_context = NULL; +} +EXPORT_SYMBOL(bio_crypt_free_ctx); + +int bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) +{ + if (!bio_has_crypt_ctx(src)) + return 0; + + dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask); + if (!dst->bi_crypt_context) + return -ENOMEM; + + *dst->bi_crypt_context = *src->bi_crypt_context; + + if (bio_crypt_has_keyslot(src)) + keyslot_manager_get_slot(src->bi_crypt_context->processing_ksm, + src->bi_crypt_context->keyslot); + + return 0; +} +EXPORT_SYMBOL(bio_crypt_clone); + +bool bio_crypt_should_process(struct bio *bio, struct request_queue *q) +{ + if (!bio_has_crypt_ctx(bio)) + return false; + + WARN_ON(!bio_crypt_has_keyslot(bio)); + return q->ksm == bio->bi_crypt_context->processing_ksm; +} +EXPORT_SYMBOL(bio_crypt_should_process); + +/* + * Checks that two bio crypt contexts are compatible - i.e. that + * they are mergeable except for data_unit_num continuity. + */ +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) +{ + struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; + struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; + + if (bio_has_crypt_ctx(b_1) != bio_has_crypt_ctx(b_2)) + return false; + + if (!bio_has_crypt_ctx(b_1)) + return true; + + return bc1->keyslot == bc2->keyslot && + bc1->data_unit_size_bits == bc2->data_unit_size_bits; +} + +/* + * Checks that two bio crypt contexts are compatible, and also + * that their data_unit_nums are continuous (and can hence be merged) + */ +bool bio_crypt_ctx_back_mergeable(struct bio *b_1, + unsigned int b1_sectors, + struct bio *b_2) +{ + struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; + struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; + + if (!bio_crypt_ctx_compatible(b_1, b_2)) + return false; + + return !bio_has_crypt_ctx(b_1) || + (bc1->data_unit_num + + (b1_sectors >> (bc1->data_unit_size_bits - 9)) == + bc2->data_unit_num); +} + +void bio_crypt_ctx_release_keyslot(struct bio *bio) +{ + struct bio_crypt_ctx *crypt_ctx = bio->bi_crypt_context; + + keyslot_manager_put_slot(crypt_ctx->processing_ksm, crypt_ctx->keyslot); + bio->bi_crypt_context->processing_ksm = NULL; + bio->bi_crypt_context->keyslot = -1; +} + +int bio_crypt_ctx_acquire_keyslot(struct bio *bio, struct keyslot_manager *ksm) +{ + int slot; + enum blk_crypto_mode_num crypto_mode = bio_crypto_mode(bio); + + if (!ksm) + return -ENOMEM; + + slot = keyslot_manager_get_slot_for_key(ksm, + bio_crypt_raw_key(bio), crypto_mode, + 1 << bio->bi_crypt_context->data_unit_size_bits); + if (slot < 0) + return slot; + + bio_crypt_set_keyslot(bio, slot, ksm); + return 0; +} diff --git a/block/bio.c b/block/bio.c index a3c4fd9ec478..b2746f89f8f9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -251,6 +251,7 @@ static void bio_free(struct bio *bio) struct bio_set *bs = bio->bi_pool; void *p; + bio_crypt_free_ctx(bio); bio_uninit(bio); if (bs) { @@ -628,15 +629,15 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) __bio_clone_fast(b, bio); - if (bio_integrity(bio)) { - int ret; + if (bio_crypt_clone(b, bio, gfp_mask) < 0) { + bio_put(b); + return NULL; + } - ret = bio_integrity_clone(b, bio, gfp_mask); - - if (ret < 0) { - bio_put(b); - return NULL; - } + if (bio_integrity(bio) && + bio_integrity_clone(b, bio, gfp_mask) < 0) { + bio_put(b); + return NULL; } return b; @@ -704,6 +705,11 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, break; } + if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0) { + bio_put(bio); + return NULL; + } + if (bio_integrity(bio_src)) { int ret; @@ -1035,6 +1041,7 @@ void bio_advance(struct bio *bio, unsigned bytes) if (bio_integrity(bio)) bio_integrity_advance(bio, bytes); + bio_crypt_advance(bio, bytes); bio_advance_iter(bio, &bio->bi_iter, bytes); } EXPORT_SYMBOL(bio_advance); diff --git a/block/blk-core.c b/block/blk-core.c index 79352e101172..9471bbeb238a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3667,5 +3667,8 @@ int __init blk_dev_init(void) blk_debugfs_root = debugfs_create_dir("block", NULL); #endif + if (bio_crypt_ctx_init() < 0) + panic("Failed to allocate mem for bio crypt ctxs\n"); + return 0; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 415b5dafd9e6..bc8da688525f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -479,6 +479,9 @@ static inline int ll_new_hw_segment(struct request_queue *q, if (blk_integrity_merge_bio(q, req, bio) == false) goto no_merge; + if (WARN_ON_ONCE(!bio_crypt_ctx_compatible(bio, req->bio))) + goto no_merge; + /* * This will form the start of a new hw segment. Bump both * counters. @@ -673,8 +676,14 @@ enum elv_merge blk_try_req_merge(struct request *req, struct request *next) { if (blk_discard_mergable(req)) return ELEVATOR_DISCARD_MERGE; - else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) + else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) { + if (!bio_crypt_ctx_back_mergeable(req->bio, + blk_rq_sectors(req), + next->bio)) { + return ELEVATOR_NO_MERGE; + } return ELEVATOR_BACK_MERGE; + } return ELEVATOR_NO_MERGE; } @@ -711,6 +720,9 @@ static struct request *attempt_merge(struct request_queue *q, if (req->write_hint != next->write_hint) return NULL; + if (!bio_crypt_ctx_compatible(req->bio, next->bio)) + return NULL; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -850,16 +862,31 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (rq->write_hint != bio->bi_write_hint) return false; + /* Only merge if the crypt contexts are compatible */ + if (!bio_crypt_ctx_compatible(bio, rq->bio)) + return false; + return true; } enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (blk_discard_mergable(rq)) + if (blk_discard_mergable(rq)) { return ELEVATOR_DISCARD_MERGE; - else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) + } else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == + bio->bi_iter.bi_sector) { + if (!bio_crypt_ctx_back_mergeable(rq->bio, + blk_rq_sectors(rq), bio)) { + return ELEVATOR_NO_MERGE; + } return ELEVATOR_BACK_MERGE; - else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) + } else if (blk_rq_pos(rq) - bio_sectors(bio) == + bio->bi_iter.bi_sector) { + if (!bio_crypt_ctx_back_mergeable(bio, + bio_sectors(bio), rq->bio)) { + return ELEVATOR_NO_MERGE; + } return ELEVATOR_FRONT_MERGE; + } return ELEVATOR_NO_MERGE; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a56008b2e7c2..1ca1a512bc2a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1246,12 +1246,15 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, sector_t sector, unsigned len) { struct bio *clone = &tio->clone; + int ret; __bio_clone_fast(clone, bio); - if (unlikely(bio_integrity(bio) != NULL)) { - int r; + ret = bio_crypt_clone(clone, bio, GFP_NOIO); + if (ret < 0) + return ret; + if (unlikely(bio_integrity(bio) != NULL)) { if (unlikely(!dm_target_has_integrity(tio->ti->type) && !dm_target_passes_integrity(tio->ti->type))) { DMWARN("%s: the target %s doesn't support integrity data.", @@ -1260,9 +1263,11 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return -EIO; } - r = bio_integrity_clone(clone, bio, GFP_NOIO); - if (r < 0) - return r; + ret = bio_integrity_clone(clone, bio, GFP_NOIO); + if (ret < 0) { + bio_crypt_free_ctx(clone); + return ret; + } } if (bio_op(bio) != REQ_OP_ZONE_REPORT) diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h new file mode 100644 index 000000000000..5cd569f77c31 --- /dev/null +++ b/include/linux/bio-crypt-ctx.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef __LINUX_BIO_CRYPT_CTX_H +#define __LINUX_BIO_CRYPT_CTX_H + +enum blk_crypto_mode_num { + BLK_ENCRYPTION_MODE_INVALID = 0, + BLK_ENCRYPTION_MODE_AES_256_XTS = 1, +}; + +#ifdef CONFIG_BLOCK +#include + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +struct bio_crypt_ctx { + int keyslot; + const u8 *raw_key; + enum blk_crypto_mode_num crypto_mode; + u64 data_unit_num; + unsigned int data_unit_size_bits; + + /* + * The keyslot manager where the key has been programmed + * with keyslot. + */ + struct keyslot_manager *processing_ksm; + + /* + * Copy of the bvec_iter when this bio was submitted. + * We only want to en/decrypt the part of the bio + * as described by the bvec_iter upon submission because + * bio might be split before being resubmitted + */ + struct bvec_iter crypt_iter; + u64 sw_data_unit_num; +}; + +extern int bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask); + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return bio->bi_crypt_context; +} + +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) +{ + if (bio_has_crypt_ctx(bio)) { + bio->bi_crypt_context->data_unit_num += + bytes >> bio->bi_crypt_context->data_unit_size_bits; + } +} + +static inline bool bio_crypt_has_keyslot(struct bio *bio) +{ + return bio->bi_crypt_context->keyslot >= 0; +} + +extern int bio_crypt_ctx_init(void); + +extern struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask); + +extern void bio_crypt_free_ctx(struct bio *bio); + +static inline int bio_crypt_set_ctx(struct bio *bio, + const u8 *raw_key, + enum blk_crypto_mode_num crypto_mode, + u64 dun, + unsigned int dun_bits, + gfp_t gfp_mask) +{ + struct bio_crypt_ctx *crypt_ctx; + + crypt_ctx = bio_crypt_alloc_ctx(gfp_mask); + if (!crypt_ctx) + return -ENOMEM; + + crypt_ctx->raw_key = raw_key; + crypt_ctx->data_unit_num = dun; + crypt_ctx->data_unit_size_bits = dun_bits; + crypt_ctx->crypto_mode = crypto_mode; + crypt_ctx->processing_ksm = NULL; + crypt_ctx->keyslot = -1; + bio->bi_crypt_context = crypt_ctx; + + return 0; +} + +static inline void bio_set_data_unit_num(struct bio *bio, u64 dun) +{ + bio->bi_crypt_context->data_unit_num = dun; +} + +static inline int bio_crypt_get_keyslot(struct bio *bio) +{ + return bio->bi_crypt_context->keyslot; +} + +static inline void bio_crypt_set_keyslot(struct bio *bio, + unsigned int keyslot, + struct keyslot_manager *ksm) +{ + bio->bi_crypt_context->keyslot = keyslot; + bio->bi_crypt_context->processing_ksm = ksm; +} + +extern void bio_crypt_ctx_release_keyslot(struct bio *bio); + +extern int bio_crypt_ctx_acquire_keyslot(struct bio *bio, + struct keyslot_manager *ksm); + +static inline const u8 *bio_crypt_raw_key(struct bio *bio) +{ + return bio->bi_crypt_context->raw_key; +} + +static inline enum blk_crypto_mode_num bio_crypto_mode(struct bio *bio) +{ + return bio->bi_crypt_context->crypto_mode; +} + +static inline u64 bio_crypt_data_unit_num(struct bio *bio) +{ + return bio->bi_crypt_context->data_unit_num; +} + +static inline u64 bio_crypt_sw_data_unit_num(struct bio *bio) +{ + return bio->bi_crypt_context->sw_data_unit_num; +} + +extern bool bio_crypt_should_process(struct bio *bio, struct request_queue *q); + +extern bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2); + +extern bool bio_crypt_ctx_back_mergeable(struct bio *b_1, + unsigned int b1_sectors, + struct bio *b_2); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +struct keyslot_manager; + +static inline int bio_crypt_ctx_init(void) +{ + return 0; +} + +static inline int bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask) +{ + return 0; +} + +static inline void bio_crypt_advance(struct bio *bio, + unsigned int bytes) { } + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return false; +} + +static inline void bio_crypt_free_ctx(struct bio *bio) { } + +static inline void bio_crypt_set_ctx(struct bio *bio, + u8 *raw_key, + enum blk_crypto_mode_num crypto_mode, + u64 dun, + unsigned int dun_bits, + gfp_t gfp_mask) { } + +static inline void bio_set_data_unit_num(struct bio *bio, u64 dun) { } + +static inline bool bio_crypt_has_keyslot(struct bio *bio) +{ + return false; +} + +static inline void bio_crypt_set_keyslot(struct bio *bio, + unsigned int keyslot, + struct keyslot_manager *ksm) { } + +static inline int bio_crypt_get_keyslot(struct bio *bio) +{ + return -1; +} + +static inline u8 *bio_crypt_raw_key(struct bio *bio) +{ + return NULL; +} + +static inline u64 bio_crypt_data_unit_num(struct bio *bio) +{ + return 0; +} + +static inline bool bio_crypt_should_process(struct bio *bio, + struct request_queue *q) +{ + return false; +} + +static inline bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) +{ + return true; +} + +static inline bool bio_crypt_ctx_back_mergeable(struct bio *b_1, + unsigned int b1_sectors, + struct bio *b_2) +{ + return true; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +#endif /* CONFIG_BLOCK */ +#endif /* __LINUX_BIO_CRYPT_CTX_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 990cdd236b34..2e08e3731376 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -595,11 +596,6 @@ static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) -enum blk_crypto_mode_num { - BLK_ENCRYPTION_MODE_INVALID = 0, - BLK_ENCRYPTION_MODE_AES_256_XTS = 1, -}; - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1cc3d4d3bc43..32dc18c5cef3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,6 +17,7 @@ struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +struct bio_crypt_ctx; /* * Block error status values. See block/blk-core:blk_errors for the details. @@ -95,6 +96,11 @@ struct bio { struct blk_issue_stat bi_issue_stat; #endif #endif + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *bi_crypt_context; +#endif + union { #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ From 760b3283e8056ffa6382722457c2e0cf08328629 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Wed, 30 Oct 2019 14:14:33 -0700 Subject: [PATCH 2955/3715] ANDROID: block: Fix bio_crypt_should_process WARN_ON bio_crypt_should_process would WARN that the bio did not have a keyslot in any keyslot manager even when we were on the decrypt path of blk-crypto, which is a bug. The WARN is now conditional on the caller being responible for handling encryption rather than blk-crypto (i.e. the WARN happens only if this function return true). Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I01aa7a04a5ab9c9e579bde8a06d095916d880e2c Signed-off-by: Satya Tangirala --- block/bio-crypt-ctx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index aa3571f72ee7..95fd57896466 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -65,8 +65,11 @@ bool bio_crypt_should_process(struct bio *bio, struct request_queue *q) if (!bio_has_crypt_ctx(bio)) return false; + if (q->ksm != bio->bi_crypt_context->processing_ksm) + return false; + WARN_ON(!bio_crypt_has_keyslot(bio)); - return q->ksm == bio->bi_crypt_context->processing_ksm; + return true; } EXPORT_SYMBOL(bio_crypt_should_process); From ec0b569b5cc89391d9d6c90d2f76dc0a4db03e57 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:25 -0700 Subject: [PATCH 2956/3715] BACKPORT: FROMLIST: block: blk-crypto for Inline Encryption We introduce blk-crypto, which manages programming keyslots for struct bios. With blk-crypto, filesystems only need to call bio_crypt_set_ctx with the encryption key, algorithm and data_unit_num; they don't have to worry about getting a keyslot for each encryption context, as blk-crypto handles that. Blk-crypto also makes it possible for layered devices like device mapper to make use of inline encryption hardware. Blk-crypto delegates crypto operations to inline encryption hardware when available, and also contains a software fallback to the kernel crypto API. For more details, refer to Documentation/block/inline-encryption.rst. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I7df59fef0c1e90043b1899c5a95973e23afac0c5 Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-4-satyat@google.com/ --- Documentation/block/00-INDEX | 2 + Documentation/block/index.rst | 26 + Documentation/block/inline-encryption.rst | 183 +++++ block/Kconfig | 2 + block/Makefile | 3 +- block/bio-crypt-ctx.c | 7 +- block/bio.c | 5 + block/blk-core.c | 8 +- block/blk-crypto.c | 797 ++++++++++++++++++++++ include/linux/bio-crypt-ctx.h | 7 + include/linux/blk-crypto.h | 64 ++ 11 files changed, 1101 insertions(+), 3 deletions(-) create mode 100644 Documentation/block/index.rst create mode 100644 Documentation/block/inline-encryption.rst create mode 100644 block/blk-crypto.c create mode 100644 include/linux/blk-crypto.h diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index 8d55b4bbb5e2..7300403c5a0d 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -16,6 +16,8 @@ data-integrity.txt - Block data integrity deadline-iosched.txt - Deadline IO scheduler tunables +inline-encryption.rst + - Blk-crypto internals and inline encryption ioprio.txt - Block io priorities (in CFQ scheduler) pr.txt diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst new file mode 100644 index 000000000000..026addfc69bc --- /dev/null +++ b/Documentation/block/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +Block +===== + +.. toctree:: + :maxdepth: 1 + + bfq-iosched + biodoc + biovecs + capability + cmdline-partition + data-integrity + deadline-iosched + inline-encryption + ioprio + kyber-iosched + null_blk + pr + queue-sysfs + request + stat + switching-sched + writeback_cache_control diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst new file mode 100644 index 000000000000..202826cee95e --- /dev/null +++ b/Documentation/block/inline-encryption.rst @@ -0,0 +1,183 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Inline Encryption +================= + +Objective +========= + +We want to support inline encryption (IE) in the kernel. +To allow for testing, we also want a crypto API fallback when actual +IE hardware is absent. We also want IE to work with layered devices +like dm and loopback (i.e. we want to be able to use the IE hardware +of the underlying devices if present, or else fall back to crypto API +en/decryption). + + +Constraints and notes +===================== + +- IE hardware have a limited number of "keyslots" that can be programmed + with an encryption context (key, algorithm, data unit size, etc.) at any time. + One can specify a keyslot in a data request made to the device, and the + device will en/decrypt the data using the encryption context programmed into + that specified keyslot. When possible, we want to make multiple requests with + the same encryption context share the same keyslot. + +- We need a way for filesystems to specify an encryption context to use for + en/decrypting a struct bio, and a device driver (like UFS) needs to be able + to use that encryption context when it processes the bio. + +- We need a way for device drivers to expose their capabilities in a unified + way to the upper layers. + + +Design +====== + +We add a struct bio_crypt_ctx to struct bio that can represent an +encryption context, because we need to be able to pass this encryption +context from the FS layer to the device driver to act upon. + +While IE hardware works on the notion of keyslots, the FS layer has no +knowledge of keyslots - it simply wants to specify an encryption context to +use while en/decrypting a bio. + +We introduce a keyslot manager (KSM) that handles the translation from +encryption contexts specified by the FS to keyslots on the IE hardware. +This KSM also serves as the way IE hardware can expose their capabilities to +upper layers. The generic mode of operation is: each device driver that wants +to support IE will construct a KSM and set it up in its struct request_queue. +Upper layers that want to use IE on this device can then use this KSM in +the device's struct request_queue to translate an encryption context into +a keyslot. The presence of the KSM in the request queue shall be used to mean +that the device supports IE. + +On the device driver end of the interface, the device driver needs to tell the +KSM how to actually manipulate the IE hardware in the device to do things like +programming the crypto key into the IE hardware into a particular keyslot. All +this is achieved through the :c:type:`struct keyslot_mgmt_ll_ops` that the +device driver passes to the KSM when creating it. + +It uses refcounts to track which keyslots are idle (either they have no +encryption context programmed, or there are no in-flight struct bios +referencing that keyslot). When a new encryption context needs a keyslot, it +tries to find a keyslot that has already been programmed with the same +encryption context, and if there is no such keyslot, it evicts the least +recently used idle keyslot and programs the new encryption context into that +one. If no idle keyslots are available, then the caller will sleep until there +is at least one. + + +Blk-crypto +========== + +The above is sufficient for simple cases, but does not work if there is a +need for a crypto API fallback, or if we are want to use IE with layered +devices. To these ends, we introduce blk-crypto. Blk-crypto allows us to +present a unified view of encryption to the FS (so FS only needs to specify +an encryption context and not worry about keyslots at all), and blk-crypto +can decide whether to delegate the en/decryption to IE hardware or to the +crypto API. Blk-crypto maintains an internal KSM that serves as the crypto +API fallback. + +Blk-crypto needs to ensure that the encryption context is programmed into the +"correct" keyslot manager for IE. If a bio is submitted to a layered device +that eventually passes the bio down to a device that really does support IE, we +want the encryption context to be programmed into a keyslot for the KSM of the +device with IE support. However, blk-crypto does not know a priori whether a +particular device is the final device in the layering structure for a bio or +not. So in the case that a particular device does not support IE, since it is +possibly the final destination device for the bio, if the bio requires +encryption (i.e. the bio is doing a write operation), blk-crypto must fallback +to the crypto API *before* sending the bio to the device. + +Blk-crypto ensures that: + +- The bio's encryption context is programmed into a keyslot in the KSM of the + request queue that the bio is being submitted to (or the crypto API fallback + KSM if the request queue doesn't have a KSM), and that the ``processing_ksm`` + in the ``bi_crypt_context`` is set to this KSM + +- That the bio has its own individual reference to the keyslot in this KSM. + Once the bio passes through blk-crypto, its encryption context is programmed + in some KSM. The "its own individual reference to the keyslot" ensures that + keyslots can be released by each bio independently of other bios while + ensuring that the bio has a valid reference to the keyslot when, for e.g., the + crypto API fallback KSM in blk-crypto performs crypto on the device's behalf. + The individual references are ensured by increasing the refcount for the + keyslot in the ``processing_ksm`` when a bio with a programmed encryption + context is cloned. + + +What blk-crypto does on bio submission +-------------------------------------- + +**Case 1:** blk-crypto is given a bio with only an encryption context that hasn't +been programmed into any keyslot in any KSM (for e.g. a bio from the FS). + In this case, blk-crypto will program the encryption context into the KSM of the + request queue the bio is being submitted to (and if this KSM does not exist, + then it will program it into blk-crypto's internal KSM for crypto API + fallback). The KSM that this encryption context was programmed into is stored + as the ``processing_ksm`` in the bio's ``bi_crypt_context``. + +**Case 2:** blk-crypto is given a bio whose encryption context has already been +programmed into a keyslot in the *crypto API fallback* KSM. + In this case, blk-crypto does nothing; it treats the bio as not having + specified an encryption context. Note that we cannot do here what we will do + in Case 3 because we would have already encrypted the bio via the crypto API + by this point. + +**Case 3:** blk-crypto is given a bio whose encryption context has already been +programmed into a keyslot in some KSM (that is *not* the crypto API fallback +KSM). + In this case, blk-crypto first releases that keyslot from that KSM and then + treats the bio as in Case 1. + +This way, when a device driver is processing a bio, it can be sure that +the bio's encryption context has been programmed into some KSM (either the +device driver's request queue's KSM, or blk-crypto's crypto API fallback KSM). +It then simply needs to check if the bio's processing_ksm is the device's +request queue's KSM. If so, then it should proceed with IE. If not, it should +simply do nothing with respect to crypto, because some other KSM (perhaps the +blk-crypto crypto API fallback KSM) is handling the en/decryption. + +Blk-crypto will release the keyslot that is being held by the bio (and also +decrypt it if the bio is using the crypto API fallback KSM) once +``bio_remaining_done`` returns true for the bio. + + +Layered Devices +=============== + +Layered devices that wish to support IE need to create their own keyslot +manager for their request queue, and expose whatever functionality they choose. +When a layered device wants to pass a bio to another layer (either by +resubmitting the same bio, or by submitting a clone), it doesn't need to do +anything special because the bio (or the clone) will once again pass through +blk-crypto, which will work as described in Case 3. If a layered device wants +for some reason to do the IO by itself instead of passing it on to a child +device, but it also chose to expose IE capabilities by setting up a KSM in its +request queue, it is then responsible for en/decrypting the data itself. In +such cases, the device can choose to call the blk-crypto function +``blk_crypto_fallback_to_kernel_crypto_api`` (TODO: Not yet implemented), which will +cause the en/decryption to be done via the crypto API fallback. + + +Future Optimizations for layered devices +======================================== + +Creating a keyslot manager for the layered device uses up memory for each +keyslot, and in general, a layered device (like dm-linear) merely passes the +request on to a "child" device, so the keyslots in the layered device itself +might be completely unused. We can instead define a new type of KSM; the +"passthrough KSM", that layered devices can use to let blk-crypto know that +this layered device *will* pass the bio to some child device (and hence +through blk-crypto again, at which point blk-crypto can program the encryption +context, instead of programming it into the layered device's KSM). Again, if +the device "lies" and decides to do the IO itself instead of passing it on to +a child device, it is responsible for doing the en/decryption (and can choose +to call ``blk_crypto_fallback_to_kernel_crypto_api``). Another use case for the +"passthrough KSM" is for IE devices that want to manage their own keyslots/do +not have a limited number of keyslots. diff --git a/block/Kconfig b/block/Kconfig index 960e7fdf6214..c4334605377f 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -186,6 +186,8 @@ config BLK_SED_OPAL config BLK_INLINE_ENCRYPTION bool "Enable inline encryption support in block layer" + select CRYPTO + select CRYPTO_BLKCIPHER help Build the blk-crypto subsystem. Enabling this lets the block layer handle encryption, diff --git a/block/Makefile b/block/Makefile index 437a897c99e4..a2618d3d1df4 100644 --- a/block/Makefile +++ b/block/Makefile @@ -35,4 +35,5 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o -obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o bio-crypt-ctx.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o bio-crypt-ctx.o \ + blk-crypto.o diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index 95fd57896466..0f7641b875e9 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -43,7 +43,12 @@ EXPORT_SYMBOL(bio_crypt_free_ctx); int bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) { - if (!bio_has_crypt_ctx(src)) + /* + * If a bio is swhandled, then it will be decrypted when bio_endio + * is called. As we only want the data to be decrypted once, copies + * of the bio must not have have a crypt context. + */ + if (!bio_has_crypt_ctx(src) || bio_crypt_swhandled(src)) return 0; dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask); diff --git a/block/bio.c b/block/bio.c index b2746f89f8f9..9b5630b67a55 100644 --- a/block/bio.c +++ b/block/bio.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "blk.h" @@ -1899,6 +1900,10 @@ void bio_endio(struct bio *bio) again: if (!bio_remaining_done(bio)) return; + + if (!blk_crypto_endio(bio)) + return; + if (!bio_integrity_endio(bio)) return; diff --git a/block/blk-core.c b/block/blk-core.c index 9471bbeb238a..2395bd140ae3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -35,6 +35,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2225,7 +2226,9 @@ blk_qc_t generic_make_request(struct bio *bio) /* Create a fresh bio_list for all subordinate requests */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = q->make_request_fn(q, bio); + + if (!blk_crypto_submit_bio(&bio)) + ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -3670,5 +3673,8 @@ int __init blk_dev_init(void) if (bio_crypt_ctx_init() < 0) panic("Failed to allocate mem for bio crypt ctxs\n"); + if (blk_crypto_init() < 0) + panic("Failed to init blk-crypto\n"); + return 0; } diff --git a/block/blk-crypto.c b/block/blk-crypto.c new file mode 100644 index 000000000000..3cb3412665b2 --- /dev/null +++ b/block/blk-crypto.c @@ -0,0 +1,797 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +/* + * Refer to Documentation/block/inline-encryption.rst for detailed explanation. + */ + +#define pr_fmt(fmt) "blk-crypto: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Represents a crypto mode supported by blk-crypto */ +struct blk_crypto_mode { + const char *cipher_str; /* crypto API name (for fallback case) */ + size_t keysize; /* key size in bytes */ +}; + +static const struct blk_crypto_mode blk_crypto_modes[] = { + [BLK_ENCRYPTION_MODE_AES_256_XTS] = { + .cipher_str = "xts(aes)", + .keysize = 64, + }, +}; + +static unsigned int num_prealloc_bounce_pg = 32; +module_param(num_prealloc_bounce_pg, uint, 0); +MODULE_PARM_DESC(num_prealloc_bounce_pg, + "Number of preallocated bounce pages for blk-crypto to use during crypto API fallback encryption"); + +#define BLK_CRYPTO_MAX_KEY_SIZE 64 +static int blk_crypto_num_keyslots = 100; +module_param_named(num_keyslots, blk_crypto_num_keyslots, int, 0); +MODULE_PARM_DESC(num_keyslots, + "Number of keyslots for crypto API fallback in blk-crypto."); + +static struct blk_crypto_keyslot { + struct crypto_skcipher *tfm; + enum blk_crypto_mode_num crypto_mode; + u8 key[BLK_CRYPTO_MAX_KEY_SIZE]; + struct crypto_skcipher *tfms[ARRAY_SIZE(blk_crypto_modes)]; +} *blk_crypto_keyslots; + +/* + * Allocating a crypto tfm during I/O can deadlock, so we have to preallocate + * all of a mode's tfms when that mode starts being used. Since each mode may + * need all the keyslots at some point, each mode needs its own tfm for each + * keyslot; thus, a keyslot may contain tfms for multiple modes. However, to + * match the behavior of real inline encryption hardware (which only supports a + * single encryption context per keyslot), we only allow one tfm per keyslot to + * be used at a time - the rest of the unused tfms have their keys cleared. + */ +static struct mutex tfms_lock[ARRAY_SIZE(blk_crypto_modes)]; +static bool tfms_inited[ARRAY_SIZE(blk_crypto_modes)]; + +struct work_mem { + struct work_struct crypto_work; + struct bio *bio; +}; + +/* The following few vars are only used during the crypto API fallback */ +static struct keyslot_manager *blk_crypto_ksm; +static struct workqueue_struct *blk_crypto_wq; +static mempool_t *blk_crypto_page_pool; +static struct kmem_cache *blk_crypto_work_mem_cache; + +bool bio_crypt_swhandled(struct bio *bio) +{ + return bio_has_crypt_ctx(bio) && + bio->bi_crypt_context->processing_ksm == blk_crypto_ksm; +} + +static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; +static void evict_keyslot(unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode; + int err; + + WARN_ON(slotp->crypto_mode == BLK_ENCRYPTION_MODE_INVALID); + + /* Clear the key in the skcipher */ + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], blank_key, + blk_crypto_modes[crypto_mode].keysize); + WARN_ON(err); + memzero_explicit(slotp->key, BLK_CRYPTO_MAX_KEY_SIZE); + slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; +} + +static int blk_crypto_keyslot_program(void *priv, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + const struct blk_crypto_mode *mode = &blk_crypto_modes[crypto_mode]; + size_t keysize = mode->keysize; + int err; + + if (crypto_mode != slotp->crypto_mode && + slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID) { + evict_keyslot(slot); + } + + if (!slotp->tfms[crypto_mode]) + return -ENOMEM; + slotp->crypto_mode = crypto_mode; + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key, keysize); + + if (err) { + evict_keyslot(slot); + return err; + } + + memcpy(slotp->key, key, keysize); + + return 0; +} + +static int blk_crypto_keyslot_evict(void *priv, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot) +{ + evict_keyslot(slot); + return 0; +} + +static int blk_crypto_keyslot_find(void *priv, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size_bytes) +{ + int slot; + const size_t keysize = blk_crypto_modes[crypto_mode].keysize; + + for (slot = 0; slot < blk_crypto_num_keyslots; slot++) { + if (blk_crypto_keyslots[slot].crypto_mode == crypto_mode && + !crypto_memneq(blk_crypto_keyslots[slot].key, key, keysize)) + return slot; + } + + return -ENOKEY; +} + +static bool blk_crypto_mode_supported(void *priv, + enum blk_crypto_mode_num crypt_mode, + unsigned int data_unit_size) +{ + /* All blk_crypto_modes are required to have a crypto API fallback. */ + return true; +} + +/* + * The crypto API fallback KSM ops - only used for a bio when it specifies a + * blk_crypto_mode for which we failed to get a keyslot in the device's inline + * encryption hardware (which probably means the device doesn't have inline + * encryption hardware that supports that crypto mode). + */ +static const struct keyslot_mgmt_ll_ops blk_crypto_ksm_ll_ops = { + .keyslot_program = blk_crypto_keyslot_program, + .keyslot_evict = blk_crypto_keyslot_evict, + .keyslot_find = blk_crypto_keyslot_find, + .crypto_mode_supported = blk_crypto_mode_supported, +}; + +static void blk_crypto_encrypt_endio(struct bio *enc_bio) +{ + struct bio *src_bio = enc_bio->bi_private; + int i; + + for (i = 0; i < enc_bio->bi_vcnt; i++) + mempool_free(enc_bio->bi_io_vec[i].bv_page, + blk_crypto_page_pool); + + src_bio->bi_status = enc_bio->bi_status; + + bio_put(enc_bio); + bio_endio(src_bio); +} + +static struct bio *blk_crypto_clone_bio(struct bio *bio_src) +{ + struct bvec_iter iter; + struct bio_vec bv; + struct bio *bio; + + bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL); + if (!bio) + return NULL; + bio->bi_disk = bio_src->bi_disk; + bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; + bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; + bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; + + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + + if (bio_integrity(bio_src) && + bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0) { + bio_put(bio); + return NULL; + } + + bio_clone_blkcg_association(bio, bio_src); + + return bio; +} + +/* Check that all I/O segments are data unit aligned */ +static int bio_crypt_check_alignment(struct bio *bio) +{ + int data_unit_size = 1 << bio->bi_crypt_context->data_unit_size_bits; + struct bvec_iter iter; + struct bio_vec bv; + + bio_for_each_segment(bv, bio, iter) { + if (!IS_ALIGNED(bv.bv_len | bv.bv_offset, data_unit_size)) + return -EIO; + } + return 0; +} + +static int blk_crypto_alloc_cipher_req(struct bio *src_bio, + struct skcipher_request **ciph_req_ptr, + struct crypto_wait *wait) +{ + int slot; + struct skcipher_request *ciph_req; + struct blk_crypto_keyslot *slotp; + + slot = bio_crypt_get_keyslot(src_bio); + slotp = &blk_crypto_keyslots[slot]; + ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode], + GFP_NOIO); + if (!ciph_req) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + skcipher_request_set_callback(ciph_req, + CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, wait); + *ciph_req_ptr = ciph_req; + return 0; +} + +static int blk_crypto_split_bio_if_needed(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + unsigned int i = 0; + unsigned int num_sectors = 0; + struct bio_vec bv; + struct bvec_iter iter; + + bio_for_each_segment(bv, bio, iter) { + num_sectors += bv.bv_len >> SECTOR_SHIFT; + if (++i == BIO_MAX_PAGES) + break; + } + if (num_sectors < bio_sectors(bio)) { + struct bio *split_bio; + + split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL); + if (!split_bio) { + bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + bio_chain(split_bio, bio); + generic_make_request(bio); + *bio_ptr = split_bio; + } + return 0; +} + +/* + * The crypto API fallback's encryption routine. + * Allocate a bounce bio for encryption, encrypt the input bio using + * crypto API, and replace *bio_ptr with the bounce bio. May split input + * bio if it's too large. + */ +static int blk_crypto_encrypt_bio(struct bio **bio_ptr) +{ + struct bio *src_bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + int err = 0; + u64 curr_dun; + union { + __le64 dun; + u8 bytes[16]; + } iv; + struct scatterlist src, dst; + struct bio *enc_bio; + struct bio_vec *enc_bvec; + int i, j; + int data_unit_size; + + /* Split the bio if it's too big for single page bvec */ + err = blk_crypto_split_bio_if_needed(bio_ptr); + if (err) + return err; + + src_bio = *bio_ptr; + data_unit_size = 1 << src_bio->bi_crypt_context->data_unit_size_bits; + + /* Allocate bounce bio for encryption */ + enc_bio = blk_crypto_clone_bio(src_bio); + if (!enc_bio) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + err = bio_crypt_ctx_acquire_keyslot(src_bio, blk_crypto_ksm); + if (err) { + src_bio->bi_status = BLK_STS_IOERR; + goto out_put_enc_bio; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(src_bio, &ciph_req, &wait); + if (err) + goto out_release_keyslot; + + curr_dun = bio_crypt_data_unit_num(src_bio); + sg_init_table(&src, 1); + sg_init_table(&dst, 1); + + skcipher_request_set_crypt(ciph_req, &src, &dst, + data_unit_size, iv.bytes); + + /* Encrypt each page in the bounce bio */ + for (i = 0, enc_bvec = enc_bio->bi_io_vec; i < enc_bio->bi_vcnt; + enc_bvec++, i++) { + struct page *plaintext_page = enc_bvec->bv_page; + struct page *ciphertext_page = + mempool_alloc(blk_crypto_page_pool, GFP_NOIO); + + enc_bvec->bv_page = ciphertext_page; + + if (!ciphertext_page) { + src_bio->bi_status = BLK_STS_RESOURCE; + err = -ENOMEM; + goto out_free_bounce_pages; + } + + sg_set_page(&src, plaintext_page, data_unit_size, + enc_bvec->bv_offset); + sg_set_page(&dst, ciphertext_page, data_unit_size, + enc_bvec->bv_offset); + + /* Encrypt each data unit in this page */ + for (j = 0; j < enc_bvec->bv_len; j += data_unit_size) { + memset(&iv, 0, sizeof(iv)); + iv.dun = cpu_to_le64(curr_dun); + + err = crypto_wait_req(crypto_skcipher_encrypt(ciph_req), + &wait); + if (err) { + i++; + src_bio->bi_status = BLK_STS_RESOURCE; + goto out_free_bounce_pages; + } + curr_dun++; + src.offset += data_unit_size; + dst.offset += data_unit_size; + } + } + + enc_bio->bi_private = src_bio; + enc_bio->bi_end_io = blk_crypto_encrypt_endio; + *bio_ptr = enc_bio; + + enc_bio = NULL; + err = 0; + goto out_free_ciph_req; + +out_free_bounce_pages: + while (i > 0) + mempool_free(enc_bio->bi_io_vec[--i].bv_page, + blk_crypto_page_pool); +out_free_ciph_req: + skcipher_request_free(ciph_req); +out_release_keyslot: + bio_crypt_ctx_release_keyslot(src_bio); +out_put_enc_bio: + if (enc_bio) + bio_put(enc_bio); + + return err; +} + +/* + * The crypto API fallback's main decryption routine. + * Decrypts input bio in place. + */ +static void blk_crypto_decrypt_bio(struct work_struct *w) +{ + struct work_mem *work_mem = + container_of(w, struct work_mem, crypto_work); + struct bio *bio = work_mem->bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + struct bio_vec bv; + struct bvec_iter iter; + u64 curr_dun; + union { + __le64 dun; + u8 bytes[16]; + } iv; + struct scatterlist sg; + int data_unit_size = 1 << bio->bi_crypt_context->data_unit_size_bits; + int i; + int err; + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + if (bio_crypt_ctx_acquire_keyslot(bio, blk_crypto_ksm)) { + bio->bi_status = BLK_STS_RESOURCE; + goto out_no_keyslot; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(bio, &ciph_req, &wait); + if (err) + goto out; + + curr_dun = bio_crypt_sw_data_unit_num(bio); + sg_init_table(&sg, 1); + skcipher_request_set_crypt(ciph_req, &sg, &sg, data_unit_size, + iv.bytes); + + /* Decrypt each segment in the bio */ + __bio_for_each_segment(bv, bio, iter, + bio->bi_crypt_context->crypt_iter) { + struct page *page = bv.bv_page; + + sg_set_page(&sg, page, data_unit_size, bv.bv_offset); + + /* Decrypt each data unit in the segment */ + for (i = 0; i < bv.bv_len; i += data_unit_size) { + memset(&iv, 0, sizeof(iv)); + iv.dun = cpu_to_le64(curr_dun); + if (crypto_wait_req(crypto_skcipher_decrypt(ciph_req), + &wait)) { + bio->bi_status = BLK_STS_IOERR; + goto out; + } + curr_dun++; + sg.offset += data_unit_size; + } + } + +out: + skcipher_request_free(ciph_req); + bio_crypt_ctx_release_keyslot(bio); +out_no_keyslot: + kmem_cache_free(blk_crypto_work_mem_cache, work_mem); + bio_endio(bio); +} + +/* Queue bio for decryption */ +static void blk_crypto_queue_decrypt_bio(struct bio *bio) +{ + struct work_mem *work_mem = + kmem_cache_zalloc(blk_crypto_work_mem_cache, GFP_ATOMIC); + + if (!work_mem) { + bio->bi_status = BLK_STS_RESOURCE; + bio_endio(bio); + return; + } + + INIT_WORK(&work_mem->crypto_work, blk_crypto_decrypt_bio); + work_mem->bio = bio; + queue_work(blk_crypto_wq, &work_mem->crypto_work); +} + +/** + * blk_crypto_submit_bio - handle submitting bio for inline encryption + * + * @bio_ptr: pointer to original bio pointer + * + * If the bio doesn't have inline encryption enabled or the submitter already + * specified a keyslot for the target device, do nothing. Else, a raw key must + * have been provided, so acquire a device keyslot for it if supported. Else, + * use the crypto API fallback. + * + * When the crypto API fallback is used for encryption, blk-crypto may choose to + * split the bio into 2 - the first one that will continue to be processed and + * the second one that will be resubmitted via generic_make_request. + * A bounce bio will be allocated to encrypt the contents of the aforementioned + * "first one", and *bio_ptr will be updated to this bounce bio. + * + * Return: 0 if bio submission should continue; nonzero if bio_endio() was + * already called so bio submission should abort. + */ +int blk_crypto_submit_bio(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + struct request_queue *q; + int err; + struct bio_crypt_ctx *crypt_ctx; + + if (!bio_has_crypt_ctx(bio) || !bio_has_data(bio)) + return 0; + + /* + * When a read bio is marked for sw decryption, its bi_iter is saved + * so that when we decrypt the bio later, we know what part of it was + * marked for sw decryption (when the bio is passed down after + * blk_crypto_submit bio, it may be split or advanced so we cannot rely + * on the bi_iter while decrypting in blk_crypto_endio) + */ + if (bio_crypt_swhandled(bio)) + return 0; + + err = bio_crypt_check_alignment(bio); + if (err) { + bio->bi_status = BLK_STS_IOERR; + goto out; + } + + crypt_ctx = bio->bi_crypt_context; + q = bio->bi_disk->queue; + + if (bio_crypt_has_keyslot(bio)) { + /* Key already programmed into device? */ + if (q->ksm == crypt_ctx->processing_ksm) + return 0; + + /* Nope, release the existing keyslot. */ + bio_crypt_ctx_release_keyslot(bio); + } + + /* Get device keyslot if supported */ + if (q->ksm) { + err = bio_crypt_ctx_acquire_keyslot(bio, q->ksm); + if (!err) + return 0; + + pr_warn_once("Failed to acquire keyslot for %s (err=%d). Falling back to crypto API.\n", + bio->bi_disk->disk_name, err); + } + + /* Fallback to crypto API */ + if (!READ_ONCE(tfms_inited[bio->bi_crypt_context->crypto_mode])) { + err = -EIO; + bio->bi_status = BLK_STS_IOERR; + goto out; + } + + if (bio_data_dir(bio) == WRITE) { + /* Encrypt the data now */ + err = blk_crypto_encrypt_bio(bio_ptr); + if (err) + goto out; + } else { + /* Mark bio as swhandled */ + bio->bi_crypt_context->processing_ksm = blk_crypto_ksm; + bio->bi_crypt_context->crypt_iter = bio->bi_iter; + bio->bi_crypt_context->sw_data_unit_num = + bio->bi_crypt_context->data_unit_num; + } + return 0; +out: + bio_endio(*bio_ptr); + return err; +} + +/** + * blk_crypto_endio - clean up bio w.r.t inline encryption during bio_endio + * + * @bio - the bio to clean up + * + * If blk_crypto_submit_bio decided to fallback to crypto API for this + * bio, we queue the bio for decryption into a workqueue and return false, + * and call bio_endio(bio) at a later time (after the bio has been decrypted). + * + * If the bio is not to be decrypted by the crypto API, this function releases + * the reference to the keyslot that blk_crypto_submit_bio got. + * + * Return: true if bio_endio should continue; false otherwise (bio_endio will + * be called again when bio has been decrypted). + */ +bool blk_crypto_endio(struct bio *bio) +{ + if (!bio_has_crypt_ctx(bio)) + return true; + + if (bio_crypt_swhandled(bio)) { + /* + * The only bios that are swhandled when they reach here + * are those with bio_data_dir(bio) == READ, since WRITE + * bios that are encrypted by the crypto API fallback are + * handled by blk_crypto_encrypt_endio. + */ + + /* If there was an IO error, don't decrypt. */ + if (bio->bi_status) + return true; + + blk_crypto_queue_decrypt_bio(bio); + return false; + } + + if (bio_crypt_has_keyslot(bio)) + bio_crypt_ctx_release_keyslot(bio); + + return true; +} + +/** + * blk_crypto_start_using_mode() - Allocate skciphers for a + * mode_num for all keyslots + * @mode_num - the blk_crypto_mode we want to allocate ciphers for. + * + * Upper layers (filesystems) should call this function to ensure that a + * the crypto API fallback has transforms for this algorithm, if they become + * necessary. + * + * Return: 0 on success and -err on error. + */ +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + struct blk_crypto_keyslot *slotp; + int err = 0; + int i; + + /* + * Fast path + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we try to access them. + */ + if (likely(smp_load_acquire(&tfms_inited[mode_num]))) + return 0; + + /* + * If the keyslot manager of the request queue supports this + * crypto mode, then we don't need to allocate this mode. + */ + if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, + data_unit_size)) { + return 0; + } + + mutex_lock(&tfms_lock[mode_num]); + if (likely(tfms_inited[mode_num])) + goto out; + + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + slotp->tfms[mode_num] = crypto_alloc_skcipher( + blk_crypto_modes[mode_num].cipher_str, + 0, 0); + if (IS_ERR(slotp->tfms[mode_num])) { + err = PTR_ERR(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + goto out_free_tfms; + } + + crypto_skcipher_set_flags(slotp->tfms[mode_num], + CRYPTO_TFM_REQ_WEAK_KEY); + } + + /* + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we set tfms_inited[mode_num]. + */ + smp_store_release(&tfms_inited[mode_num], true); + goto out; + +out_free_tfms: + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + crypto_free_skcipher(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + } +out: + mutex_unlock(&tfms_lock[mode_num]); + return err; +} +EXPORT_SYMBOL(blk_crypto_start_using_mode); + +/** + * blk_crypto_evict_key() - Evict a key from any inline encryption hardware + * it may have been programmed into + * @q - The request queue who's keyslot manager this key might have been + * programmed into + * @key - The key to evict + * @mode - The blk_crypto_mode_num used with this key + * @data_unit_size - The data unit size used with this key + * + * Upper layers (filesystems) should call this function to ensure that a key + * is evicted from hardware that it might have been programmed into. This + * will call keyslot_manager_evict_key on the queue's keyslot manager, if one + * exists, and supports the crypto algorithm with the specified data unit size. + * Otherwise, it will evict the key from the blk_crypto_ksm. + * + * Return: 0 on success, -err on error. + */ +int blk_crypto_evict_key(struct request_queue *q, const u8 *key, + enum blk_crypto_mode_num mode, + unsigned int data_unit_size) +{ + struct keyslot_manager *ksm = blk_crypto_ksm; + + if (q && q->ksm && keyslot_manager_crypto_mode_supported(q->ksm, mode, + data_unit_size)) { + ksm = q->ksm; + } + + return keyslot_manager_evict_key(ksm, key, mode, data_unit_size); +} +EXPORT_SYMBOL(blk_crypto_evict_key); + +int __init blk_crypto_init(void) +{ + int i; + int err = -ENOMEM; + + prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE); + + blk_crypto_ksm = keyslot_manager_create(blk_crypto_num_keyslots, + &blk_crypto_ksm_ll_ops, + NULL); + if (!blk_crypto_ksm) + goto out; + + blk_crypto_wq = alloc_workqueue("blk_crypto_wq", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, + num_online_cpus()); + if (!blk_crypto_wq) + goto out_free_ksm; + + blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots, + sizeof(*blk_crypto_keyslots), + GFP_KERNEL); + if (!blk_crypto_keyslots) + goto out_free_workqueue; + + for (i = 0; i < blk_crypto_num_keyslots; i++) { + blk_crypto_keyslots[i].crypto_mode = + BLK_ENCRYPTION_MODE_INVALID; + } + + for (i = 0; i < ARRAY_SIZE(blk_crypto_modes); i++) + mutex_init(&tfms_lock[i]); + + blk_crypto_page_pool = + mempool_create_page_pool(num_prealloc_bounce_pg, 0); + if (!blk_crypto_page_pool) + goto out_free_keyslots; + + blk_crypto_work_mem_cache = KMEM_CACHE(work_mem, SLAB_RECLAIM_ACCOUNT); + if (!blk_crypto_work_mem_cache) + goto out_free_page_pool; + + return 0; + +out_free_page_pool: + mempool_destroy(blk_crypto_page_pool); + blk_crypto_page_pool = NULL; +out_free_keyslots: + kzfree(blk_crypto_keyslots); + blk_crypto_keyslots = NULL; +out_free_workqueue: + destroy_workqueue(blk_crypto_wq); + blk_crypto_wq = NULL; +out_free_ksm: + keyslot_manager_destroy(blk_crypto_ksm); + blk_crypto_ksm = NULL; +out: + pr_warn("No memory for blk-crypto crypto API fallback."); + return err; +} diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 5cd569f77c31..7c389f310bab 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -53,6 +53,8 @@ static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) } } +extern bool bio_crypt_swhandled(struct bio *bio); + static inline bool bio_crypt_has_keyslot(struct bio *bio) { return bio->bi_crypt_context->keyslot >= 0; @@ -170,6 +172,11 @@ static inline void bio_crypt_set_ctx(struct bio *bio, unsigned int dun_bits, gfp_t gfp_mask) { } +static inline bool bio_crypt_swhandled(struct bio *bio) +{ + return false; +} + static inline void bio_set_data_unit_num(struct bio *bio, u64 dun) { } static inline bool bio_crypt_has_keyslot(struct bio *bio) diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h new file mode 100644 index 000000000000..9ce39b1d8c7c --- /dev/null +++ b/include/linux/blk-crypto.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_H +#define __LINUX_BLK_CRYPTO_H + +#include +#include + +#define SECTOR_SHIFT 9 + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +int blk_crypto_init(void); + +int blk_crypto_submit_bio(struct bio **bio_ptr); + +bool blk_crypto_endio(struct bio *bio); + +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q); + +int blk_crypto_evict_key(struct request_queue *q, const u8 *key, + enum blk_crypto_mode_num mode, + unsigned int data_unit_size); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline int blk_crypto_init(void) +{ + return 0; +} + +static inline int blk_crypto_submit_bio(struct bio **bio_ptr) +{ + return 0; +} + +static inline bool blk_crypto_endio(struct bio *bio) +{ + return true; +} + +static inline int +blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + return -EOPNOTSUPP; +} + +static inline int blk_crypto_evict_key(struct request_queue *q, const u8 *key, + enum blk_crypto_mode_num mode, + unsigned int data_unit_size) +{ + return 0; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#endif /* __LINUX_BLK_CRYPTO_H */ From 011b8344c36d39255b8057c63d98e593e364ed7f Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:26 -0700 Subject: [PATCH 2957/3715] BACKPORT: FROMLIST: scsi: ufs: UFS driver v2.1 spec crypto additions Add the crypto registers and structs defined in v2.1 of the JEDEC UFSHCI specification in preparation to add support for inline encryption to UFS. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I8e4006bfd69692f7d5c1c37f660d90b9e2fa1274 Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-5-satyat@google.com/ --- drivers/scsi/ufs/ufshcd.c | 2 ++ drivers/scsi/ufs/ufshcd.h | 5 +++ drivers/scsi/ufs/ufshci.h | 67 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index d25082e573e0..e21cbc790da1 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4570,6 +4570,8 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case OCS_MISMATCH_RESP_UPIU_SIZE: case OCS_PEER_COMM_FAILURE: case OCS_FATAL_ERROR: + case OCS_INVALID_CRYPTO_CONFIG: + case OCS_GENERAL_CRYPTO_ERROR: default: result |= DID_ERROR << 16; dev_err(hba->dev, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index cdc8bd05f7df..cdb6f97c1d13 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -667,6 +667,11 @@ struct ufs_hba { * the performance of ongoing read/write operations. */ #define UFSHCD_CAP_KEEP_AUTO_BKOPS_ENABLED_EXCEPT_SUSPEND (1 << 5) + /* + * This capability allows the host controller driver to use the + * inline crypto engine, if it is present + */ +#define UFSHCD_CAP_CRYPTO (1 << 6) struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index f60145d4a66e..97006b324ee7 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -89,6 +89,7 @@ enum { MASK_64_ADDRESSING_SUPPORT = 0x01000000, MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, + MASK_CRYPTO_SUPPORT = 0x10000000, }; #define UFS_MASK(mask, offset) ((mask) << (offset)) @@ -135,6 +136,7 @@ enum { #define DEVICE_FATAL_ERROR UFS_BIT(11) #define CONTROLLER_FATAL_ERROR UFS_BIT(16) #define SYSTEM_BUS_FATAL_ERROR UFS_BIT(17) +#define CRYPTO_ENGINE_FATAL_ERROR UFS_BIT(18) #define UFSHCD_UIC_PWR_MASK (UIC_HIBERNATE_ENTER |\ UIC_HIBERNATE_EXIT |\ @@ -145,11 +147,13 @@ enum { #define UFSHCD_ERROR_MASK (UIC_ERROR |\ DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR) + SYSTEM_BUS_FATAL_ERROR |\ + CRYPTO_ENGINE_FATAL_ERROR) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR) + SYSTEM_BUS_FATAL_ERROR |\ + CRYPTO_ENGINE_FATAL_ERROR) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT UFS_BIT(0) @@ -293,6 +297,61 @@ enum { INTERRUPT_MASK_ALL_VER_21 = 0x71FFF, }; +/* CCAP - Crypto Capability 100h */ +union ufs_crypto_capabilities { + __le32 reg_val; + struct { + u8 num_crypto_cap; + u8 config_count; + u8 reserved; + u8 config_array_ptr; + }; +}; + +enum ufs_crypto_key_size { + UFS_CRYPTO_KEY_SIZE_INVALID = 0x0, + UFS_CRYPTO_KEY_SIZE_128 = 0x1, + UFS_CRYPTO_KEY_SIZE_192 = 0x2, + UFS_CRYPTO_KEY_SIZE_256 = 0x3, + UFS_CRYPTO_KEY_SIZE_512 = 0x4, +}; + +enum ufs_crypto_alg { + UFS_CRYPTO_ALG_AES_XTS = 0x0, + UFS_CRYPTO_ALG_BITLOCKER_AES_CBC = 0x1, + UFS_CRYPTO_ALG_AES_ECB = 0x2, + UFS_CRYPTO_ALG_ESSIV_AES_CBC = 0x3, +}; + +/* x-CRYPTOCAP - Crypto Capability X */ +union ufs_crypto_cap_entry { + __le32 reg_val; + struct { + u8 algorithm_id; + u8 sdus_mask; /* Supported data unit size mask */ + u8 key_size; + u8 reserved; + }; +}; + +#define UFS_CRYPTO_CONFIGURATION_ENABLE (1 << 7) +#define UFS_CRYPTO_KEY_MAX_SIZE 64 +/* x-CRYPTOCFG - Crypto Configuration X */ +union ufs_crypto_cfg_entry { + __le32 reg_val[32]; + struct { + u8 crypto_key[UFS_CRYPTO_KEY_MAX_SIZE]; + u8 data_unit_size; + u8 crypto_cap_idx; + u8 reserved_1; + u8 config_enable; + u8 reserved_multi_host; + u8 reserved_2; + u8 vsb[2]; + u8 reserved_3[56]; + }; +}; + /* * Request Descriptor Definitions */ @@ -314,6 +373,7 @@ enum { UTP_NATIVE_UFS_COMMAND = 0x10000000, UTP_DEVICE_MANAGEMENT_FUNCTION = 0x20000000, UTP_REQ_DESC_INT_CMD = 0x01000000, + UTP_REQ_DESC_CRYPTO_ENABLE_CMD = 0x00800000, }; /* UTP Transfer Request Data Direction (DD) */ @@ -333,6 +393,9 @@ enum { OCS_PEER_COMM_FAILURE = 0x5, OCS_ABORTED = 0x6, OCS_FATAL_ERROR = 0x7, + OCS_DEVICE_FATAL_ERROR = 0x8, + OCS_INVALID_CRYPTO_CONFIG = 0x9, + OCS_GENERAL_CRYPTO_ERROR = 0xA, OCS_INVALID_COMMAND_STATUS = 0x0F, MASK_OCS = 0x0F, }; From f858a9981a94a4e1d1b77b00bc05ab61b8431bce Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:27 -0700 Subject: [PATCH 2958/3715] BACKPORT: FROMLIST: scsi: ufs: UFS crypto API Introduce functions to manipulate UFS inline encryption hardware in line with the JEDEC UFSHCI v2.1 specification and to work with the block keyslot manager. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I98d2512858a231f11a6bf868198446378d3aa491 Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-6-satyat@google.com/ --- drivers/scsi/ufs/Kconfig | 9 + drivers/scsi/ufs/Makefile | 4 +- drivers/scsi/ufs/ufshcd-crypto.c | 391 +++++++++++++++++++++++++++++++ drivers/scsi/ufs/ufshcd-crypto.h | 86 +++++++ drivers/scsi/ufs/ufshcd.h | 14 ++ 5 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 drivers/scsi/ufs/ufshcd-crypto.c create mode 100644 drivers/scsi/ufs/ufshcd-crypto.h diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index e27b4d4e6ae2..a2ec629bc466 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -100,3 +100,12 @@ config SCSI_UFS_QCOM Select this if you have UFS controller on QCOM chipset. If unsure, say N. + +config SCSI_UFS_CRYPTO + bool "UFS Crypto Engine Support" + depends on SCSI_UFSHCD && BLK_INLINE_ENCRYPTION + help + Enable Crypto Engine Support in UFS. + Enabling this makes it possible for the kernel to use the crypto + capabilities of the UFS device (if present) to perform crypto + operations on data being transferred to/from the device. diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index 9310c6c83041..9e94ee6d5289 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o obj-$(CONFIG_SCSI_UFS_QCOM) += ufs-qcom.o -obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o +obj-$(CONFIG_SCSI_UFSHCD) += ufshcd-core.o +ufshcd-core-y := ufshcd.o obj-$(CONFIG_SCSI_UFSHCD_PCI) += ufshcd-pci.o obj-$(CONFIG_SCSI_UFSHCD_PLATFORM) += ufshcd-pltfrm.o +ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO) += ufshcd-crypto.o diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c new file mode 100644 index 000000000000..3900a07a7e9b --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include + +#include "ufshcd.h" +#include "ufshcd-crypto.h" + +static bool ufshcd_cap_idx_valid(struct ufs_hba *hba, unsigned int cap_idx) +{ + return cap_idx < hba->crypto_capabilities.num_crypto_cap; +} + +static u8 get_data_unit_size_mask(unsigned int data_unit_size) +{ + if (data_unit_size < 512 || data_unit_size > 65536 || + !is_power_of_2(data_unit_size)) + return 0; + + return data_unit_size / 512; +} + +static size_t get_keysize_bytes(enum ufs_crypto_key_size size) +{ + switch (size) { + case UFS_CRYPTO_KEY_SIZE_128: return 16; + case UFS_CRYPTO_KEY_SIZE_192: return 24; + case UFS_CRYPTO_KEY_SIZE_256: return 32; + case UFS_CRYPTO_KEY_SIZE_512: return 64; + default: return 0; + } +} + +static int ufshcd_crypto_cap_find(void *hba_p, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + struct ufs_hba *hba = hba_p; + enum ufs_crypto_alg ufs_alg; + u8 data_unit_mask; + int cap_idx; + enum ufs_crypto_key_size ufs_key_size; + union ufs_crypto_cap_entry *ccap_array = hba->crypto_cap_array; + + if (!ufshcd_hba_is_crypto_supported(hba)) + return -EINVAL; + + switch (crypto_mode) { + case BLK_ENCRYPTION_MODE_AES_256_XTS: + ufs_alg = UFS_CRYPTO_ALG_AES_XTS; + ufs_key_size = UFS_CRYPTO_KEY_SIZE_256; + break; + default: return -EINVAL; + } + + data_unit_mask = get_data_unit_size_mask(data_unit_size); + + for (cap_idx = 0; cap_idx < hba->crypto_capabilities.num_crypto_cap; + cap_idx++) { + if (ccap_array[cap_idx].algorithm_id == ufs_alg && + (ccap_array[cap_idx].sdus_mask & data_unit_mask) && + ccap_array[cap_idx].key_size == ufs_key_size) + return cap_idx; + } + + return -EINVAL; +} + +/** + * ufshcd_crypto_cfg_entry_write_key - Write a key into a crypto_cfg_entry + * + * Writes the key with the appropriate format - for AES_XTS, + * the first half of the key is copied as is, the second half is + * copied with an offset halfway into the cfg->crypto_key array. + * For the other supported crypto algs, the key is just copied. + * + * @cfg: The crypto config to write to + * @key: The key to write + * @cap: The crypto capability (which specifies the crypto alg and key size) + * + * Returns 0 on success, or -EINVAL + */ +static int ufshcd_crypto_cfg_entry_write_key(union ufs_crypto_cfg_entry *cfg, + const u8 *key, + union ufs_crypto_cap_entry cap) +{ + size_t key_size_bytes = get_keysize_bytes(cap.key_size); + + if (key_size_bytes == 0) + return -EINVAL; + + switch (cap.algorithm_id) { + case UFS_CRYPTO_ALG_AES_XTS: + key_size_bytes *= 2; + if (key_size_bytes > UFS_CRYPTO_KEY_MAX_SIZE) + return -EINVAL; + + memcpy(cfg->crypto_key, key, key_size_bytes/2); + memcpy(cfg->crypto_key + UFS_CRYPTO_KEY_MAX_SIZE/2, + key + key_size_bytes/2, key_size_bytes/2); + return 0; + case UFS_CRYPTO_ALG_BITLOCKER_AES_CBC: // fallthrough + case UFS_CRYPTO_ALG_AES_ECB: // fallthrough + case UFS_CRYPTO_ALG_ESSIV_AES_CBC: + memcpy(cfg->crypto_key, key, key_size_bytes); + return 0; + } + + return -EINVAL; +} + +static void program_key(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, + int slot) +{ + int i; + u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); + + /* Clear the dword 16 */ + ufshcd_writel(hba, 0, slot_offset + 16 * sizeof(cfg->reg_val[0])); + /* Ensure that CFGE is cleared before programming the key */ + wmb(); + for (i = 0; i < 16; i++) { + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[i]), + slot_offset + i * sizeof(cfg->reg_val[0])); + /* Spec says each dword in key must be written sequentially */ + wmb(); + } + /* Write dword 17 */ + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[17]), + slot_offset + 17 * sizeof(cfg->reg_val[0])); + /* Dword 16 must be written last */ + wmb(); + /* Write dword 16 */ + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[16]), + slot_offset + 16 * sizeof(cfg->reg_val[0])); + wmb(); +} + +static int ufshcd_crypto_keyslot_program(void *hba_p, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot) +{ + struct ufs_hba *hba = hba_p; + int err = 0; + u8 data_unit_mask; + union ufs_crypto_cfg_entry cfg; + union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; + int cap_idx; + + cap_idx = ufshcd_crypto_cap_find(hba_p, crypto_mode, + data_unit_size); + + if (!ufshcd_is_crypto_enabled(hba) || + !ufshcd_keyslot_valid(hba, slot) || + !ufshcd_cap_idx_valid(hba, cap_idx)) + return -EINVAL; + + data_unit_mask = get_data_unit_size_mask(data_unit_size); + + if (!(data_unit_mask & hba->crypto_cap_array[cap_idx].sdus_mask)) + return -EINVAL; + + memset(&cfg, 0, sizeof(cfg)); + cfg.data_unit_size = data_unit_mask; + cfg.crypto_cap_idx = cap_idx; + cfg.config_enable |= UFS_CRYPTO_CONFIGURATION_ENABLE; + + err = ufshcd_crypto_cfg_entry_write_key(&cfg, key, + hba->crypto_cap_array[cap_idx]); + if (err) + return err; + + program_key(hba, &cfg, slot); + + memcpy(&cfg_arr[slot], &cfg, sizeof(cfg)); + memzero_explicit(&cfg, sizeof(cfg)); + + return 0; +} + +static int ufshcd_crypto_keyslot_find(void *hba_p, + const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + struct ufs_hba *hba = hba_p; + int err = 0; + int slot; + u8 data_unit_mask; + union ufs_crypto_cfg_entry cfg; + union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; + int cap_idx; + + cap_idx = ufshcd_crypto_cap_find(hba_p, crypto_mode, + data_unit_size); + + if (!ufshcd_is_crypto_enabled(hba) || + !ufshcd_cap_idx_valid(hba, cap_idx)) + return -EINVAL; + + data_unit_mask = get_data_unit_size_mask(data_unit_size); + + if (!(data_unit_mask & hba->crypto_cap_array[cap_idx].sdus_mask)) + return -EINVAL; + + memset(&cfg, 0, sizeof(cfg)); + err = ufshcd_crypto_cfg_entry_write_key(&cfg, key, + hba->crypto_cap_array[cap_idx]); + + if (err) + return -EINVAL; + + for (slot = 0; slot < NUM_KEYSLOTS(hba); slot++) { + if ((cfg_arr[slot].config_enable & + UFS_CRYPTO_CONFIGURATION_ENABLE) && + data_unit_mask == cfg_arr[slot].data_unit_size && + cap_idx == cfg_arr[slot].crypto_cap_idx && + !crypto_memneq(&cfg.crypto_key, cfg_arr[slot].crypto_key, + UFS_CRYPTO_KEY_MAX_SIZE)) { + memzero_explicit(&cfg, sizeof(cfg)); + return slot; + } + } + + memzero_explicit(&cfg, sizeof(cfg)); + return -ENOKEY; +} + +static int ufshcd_crypto_keyslot_evict(void *hba_p, const u8 *key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size, + unsigned int slot) +{ + struct ufs_hba *hba = hba_p; + int i = 0; + u32 reg_base; + union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; + + if (!ufshcd_is_crypto_enabled(hba) || + !ufshcd_keyslot_valid(hba, slot)) + return -EINVAL; + + memset(&cfg_arr[slot], 0, sizeof(cfg_arr[slot])); + reg_base = hba->crypto_cfg_register + slot * sizeof(cfg_arr[0]); + + /* + * Clear the crypto cfg on the device. Clearing CFGE + * might not be sufficient, so just clear the entire cfg. + */ + for (i = 0; i < sizeof(cfg_arr[0]); i += sizeof(__le32)) + ufshcd_writel(hba, 0, reg_base + i); + wmb(); + + return 0; +} + +static bool ufshcd_crypto_mode_supported(void *hba_p, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + return ufshcd_crypto_cap_find(hba_p, crypto_mode, data_unit_size) >= 0; +} + +void ufshcd_crypto_enable(struct ufs_hba *hba) +{ + union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; + int slot; + + if (!ufshcd_hba_is_crypto_supported(hba)) + return; + + hba->caps |= UFSHCD_CAP_CRYPTO; + /* + * Reset might clear all keys, so reprogram all the keys. + * Also serves to clear keys on driver init. + */ + for (slot = 0; slot < NUM_KEYSLOTS(hba); slot++) + program_key(hba, &cfg_arr[slot], slot); +} + +void ufshcd_crypto_disable(struct ufs_hba *hba) +{ + hba->caps &= ~UFSHCD_CAP_CRYPTO; +} + +static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { + .keyslot_program = ufshcd_crypto_keyslot_program, + .keyslot_evict = ufshcd_crypto_keyslot_evict, + .keyslot_find = ufshcd_crypto_keyslot_find, + .crypto_mode_supported = ufshcd_crypto_mode_supported, +}; + +/** + * ufshcd_hba_init_crypto - Read crypto capabilities, init crypto fields in hba + * @hba: Per adapter instance + * + * Returns 0 on success. Returns -ENODEV if such capabilities don't exist, and + * -ENOMEM upon OOM. + */ +int ufshcd_hba_init_crypto(struct ufs_hba *hba) +{ + int cap_idx = 0; + int err = 0; + + /* Default to disabling crypto */ + hba->caps &= ~UFSHCD_CAP_CRYPTO; + + if (!(hba->capabilities & MASK_CRYPTO_SUPPORT)) { + err = -ENODEV; + goto out; + } + + /* + * Crypto Capabilities should never be 0, because the + * config_array_ptr > 04h. So we use a 0 value to indicate that + * crypto init failed, and can't be enabled. + */ + hba->crypto_capabilities.reg_val = + cpu_to_le32(ufshcd_readl(hba, REG_UFS_CCAP)); + hba->crypto_cfg_register = + (u32)hba->crypto_capabilities.config_array_ptr * 0x100; + hba->crypto_cap_array = + devm_kcalloc(hba->dev, + hba->crypto_capabilities.num_crypto_cap, + sizeof(hba->crypto_cap_array[0]), + GFP_KERNEL); + if (!hba->crypto_cap_array) { + err = -ENOMEM; + goto out; + } + + hba->crypto_cfgs = + devm_kcalloc(hba->dev, + NUM_KEYSLOTS(hba), + sizeof(hba->crypto_cfgs[0]), + GFP_KERNEL); + if (!hba->crypto_cfgs) { + err = -ENOMEM; + goto out_free_cfg_mem; + } + + /* + * Store all the capabilities now so that we don't need to repeatedly + * access the device each time we want to know its capabilities + */ + for (cap_idx = 0; cap_idx < hba->crypto_capabilities.num_crypto_cap; + cap_idx++) { + hba->crypto_cap_array[cap_idx].reg_val = + cpu_to_le32(ufshcd_readl(hba, + REG_UFS_CRYPTOCAP + + cap_idx * sizeof(__le32))); + } + + hba->ksm = keyslot_manager_create(NUM_KEYSLOTS(hba), &ufshcd_ksm_ops, + hba); + + if (!hba->ksm) { + err = -ENOMEM; + goto out_free_crypto_cfgs; + } + + return 0; +out_free_crypto_cfgs: + devm_kfree(hba->dev, hba->crypto_cfgs); +out_free_cfg_mem: + devm_kfree(hba->dev, hba->crypto_cap_array); +out: + // TODO: print error? + /* Indicate that init failed by setting crypto_capabilities to 0 */ + hba->crypto_capabilities.reg_val = 0; + return err; +} + +void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) +{ + if (!ufshcd_hba_is_crypto_supported(hba) || !q) + return; + + q->ksm = hba->ksm; +} + +void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) +{ + keyslot_manager_destroy(hba->ksm); +} diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h new file mode 100644 index 000000000000..73ddc8e493fb --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef _UFSHCD_CRYPTO_H +#define _UFSHCD_CRYPTO_H + +struct ufs_hba; + +#ifdef CONFIG_SCSI_UFS_CRYPTO +#include + +#include "ufshci.h" + +#define NUM_KEYSLOTS(hba) (hba->crypto_capabilities.config_count + 1) + +static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, unsigned int slot) +{ + /* + * The actual number of configurations supported is (CFGC+1), so slot + * numbers range from 0 to config_count inclusive. + */ + return slot < NUM_KEYSLOTS(hba); +} + +static inline bool ufshcd_hba_is_crypto_supported(struct ufs_hba *hba) +{ + return hba->crypto_capabilities.reg_val != 0; +} + +static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) +{ + return hba->caps & UFSHCD_CAP_CRYPTO; +} + +void ufshcd_crypto_enable(struct ufs_hba *hba); + +void ufshcd_crypto_disable(struct ufs_hba *hba); + +int ufshcd_hba_init_crypto(struct ufs_hba *hba); + +void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q); + +void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q); + +#else /* CONFIG_SCSI_UFS_CRYPTO */ + +static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, + unsigned int slot) +{ + return false; +} + +static inline bool ufshcd_hba_is_crypto_supported(struct ufs_hba *hba) +{ + return false; +} + +static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) +{ + return false; +} + +static inline void ufshcd_crypto_enable(struct ufs_hba *hba) { } + +static inline void ufshcd_crypto_disable(struct ufs_hba *hba) { } + +static inline int ufshcd_hba_init_crypto(struct ufs_hba *hba) +{ + return 0; +} + +static inline void ufshcd_crypto_setup_rq_keyslot_manager( + struct ufs_hba *hba, + struct request_queue *q) { } + +static inline void ufshcd_crypto_destroy_rq_keyslot_manager( + struct ufs_hba *hba, + struct request_queue *q) { } + +#endif /* CONFIG_SCSI_UFS_CRYPTO */ + +#endif /* _UFSHCD_CRYPTO_H */ diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index cdb6f97c1d13..10a228ca260e 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -496,6 +496,11 @@ struct ufs_stats { * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. + * @crypto_capabilities: Content of crypto capabilities register (0x100) + * @crypto_cap_array: Array of crypto capabilities + * @crypto_cfg_register: Start of the crypto cfg array + * @crypto_cfgs: Array of crypto configurations (i.e. config for each slot) + * @ksm: the keyslot manager tied to this hba */ struct ufs_hba { void __iomem *mmio_base; @@ -682,6 +687,15 @@ struct ufs_hba { struct rw_semaphore clk_scaling_lock; struct ufs_desc_size desc_size; + +#ifdef CONFIG_SCSI_UFS_CRYPTO + /* crypto */ + union ufs_crypto_capabilities crypto_capabilities; + union ufs_crypto_cap_entry *crypto_cap_array; + u32 crypto_cfg_register; + union ufs_crypto_cfg_entry *crypto_cfgs; + struct keyslot_manager *ksm; +#endif /* CONFIG_SCSI_UFS_CRYPTO */ }; /* Returns true if clocks can be gated. Otherwise false */ From 19c3c62836e5dbc9ceb620ecef0aa0c81578ed43 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:28 -0700 Subject: [PATCH 2959/3715] BACKPORT: FROMLIST: scsi: ufs: Add inline encryption support to UFS Wire up ufshcd.c with the UFS Crypto API, the block layer inline encryption additions and the keyslot manager. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: Idc703c9617497e368ce83af36587708968660b23 Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-7-satyat@google.com/ --- drivers/scsi/ufs/ufshcd.c | 82 ++++++++++++++++++++++++++++++++++++--- drivers/scsi/ufs/ufshcd.h | 6 +++ 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e21cbc790da1..765cb331ccb7 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -44,6 +44,7 @@ #include "ufshcd.h" #include "ufs_quirks.h" #include "unipro.h" +#include "ufshcd-crypto.h" #define CREATE_TRACE_POINTS #include @@ -796,7 +797,14 @@ static void ufshcd_enable_run_stop_reg(struct ufs_hba *hba) */ static inline void ufshcd_hba_start(struct ufs_hba *hba) { - ufshcd_writel(hba, CONTROLLER_ENABLE, REG_CONTROLLER_ENABLE); + u32 val = CONTROLLER_ENABLE; + + if (ufshcd_hba_is_crypto_supported(hba)) { + ufshcd_crypto_enable(hba); + val |= CRYPTO_GENERAL_ENABLE; + } + + ufshcd_writel(hba, val, REG_CONTROLLER_ENABLE); } /** @@ -2080,9 +2088,21 @@ static void ufshcd_prepare_req_desc_hdr(struct ufshcd_lrb *lrbp, dword_0 |= UTP_REQ_DESC_INT_CMD; /* Transfer request descriptor header fields */ + if (lrbp->crypto_enable) { + dword_0 |= UTP_REQ_DESC_CRYPTO_ENABLE_CMD; + dword_0 |= lrbp->crypto_key_slot; + req_desc->header.dword_1 = + cpu_to_le32((u32)lrbp->data_unit_num); + req_desc->header.dword_3 = + cpu_to_le32((u32)(lrbp->data_unit_num >> 32)); + } else { + /* dword_1 and dword_3 are reserved, hence they are set to 0 */ + req_desc->header.dword_1 = 0; + req_desc->header.dword_3 = 0; + } + req_desc->header.dword_0 = cpu_to_le32(dword_0); - /* dword_1 is reserved, hence it is set to 0 */ - req_desc->header.dword_1 = 0; + /* * assigning invalid value for command status. Controller * updates OCS on command completion, with the command @@ -2090,8 +2110,6 @@ static void ufshcd_prepare_req_desc_hdr(struct ufshcd_lrb *lrbp, */ req_desc->header.dword_2 = cpu_to_le32(OCS_INVALID_COMMAND_STATUS); - /* dword_3 is reserved, hence it is set to 0 */ - req_desc->header.dword_3 = 0; req_desc->prd_table_length = 0; } @@ -2267,6 +2285,37 @@ static inline u16 ufshcd_upiu_wlun_to_scsi_wlun(u8 upiu_wlun_id) return (upiu_wlun_id & ~UFS_UPIU_WLUN_ID) | SCSI_W_LUN_BASE; } +static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + int key_slot; + + if (!cmd->request->bio || + !bio_crypt_should_process(cmd->request->bio, cmd->request->q)) { + lrbp->crypto_enable = false; + return 0; + } + + if (WARN_ON(!ufshcd_is_crypto_enabled(hba))) { + /* + * Upper layer asked us to do inline encryption + * but that isn't enabled, so we fail this request. + */ + return -EINVAL; + } + key_slot = bio_crypt_get_keyslot(cmd->request->bio); + if (!ufshcd_keyslot_valid(hba, key_slot)) + return -EINVAL; + + lrbp->crypto_enable = true; + lrbp->crypto_key_slot = key_slot; + lrbp->data_unit_num = bio_crypt_data_unit_num(cmd->request->bio); + + return 0; +} + + /** * ufshcd_queuecommand - main entry point for SCSI requests * @cmd: command from SCSI Midlayer @@ -2354,6 +2403,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->task_tag = tag; lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba) ? true : false; + + err = ufshcd_prepare_lrbp_crypto(hba, cmd, lrbp); + if (err) { + lrbp->cmd = NULL; + clear_bit_unlock(tag, &hba->lrb_in_use); + goto out; + } lrbp->req_abort_skip = false; ufshcd_comp_scsi_upiu(hba, lrbp); @@ -2387,6 +2443,7 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, lrbp->task_tag = tag; lrbp->lun = 0; /* device management cmd is not specific to any LUN */ lrbp->intr_cmd = true; /* No interrupt aggregation */ + lrbp->crypto_enable = false; /* No crypto operations */ hba->dev_cmd.type = cmd_type; return ufshcd_comp_devman_upiu(hba, lrbp); @@ -4024,6 +4081,8 @@ static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep) { int err; + ufshcd_crypto_disable(hba); + ufshcd_writel(hba, CONTROLLER_DISABLE, REG_CONTROLLER_ENABLE); err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE, CONTROLLER_ENABLE, CONTROLLER_DISABLE, @@ -4392,10 +4451,13 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth) static int ufshcd_slave_configure(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; + struct ufs_hba *hba = shost_priv(sdev->host); blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX); + ufshcd_crypto_setup_rq_keyslot_manager(hba, q); + return 0; } @@ -4406,6 +4468,7 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) static void ufshcd_slave_destroy(struct scsi_device *sdev) { struct ufs_hba *hba; + struct request_queue *q = sdev->request_queue; hba = shost_priv(sdev->host); /* Drop the reference as it won't be needed anymore */ @@ -4416,6 +4479,8 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev) hba->sdev_ufs_device = NULL; spin_unlock_irqrestore(hba->host->host_lock, flags); } + + ufshcd_crypto_destroy_rq_keyslot_manager(hba, q); } /** @@ -7993,6 +8058,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) goto exit_gating; } + /* Init crypto */ + err = ufshcd_hba_init_crypto(hba); + if (err) { + dev_err(hba->dev, "crypto setup failed\n"); + goto out_remove_scsi_host; + } + /* Host controller enable */ err = ufshcd_hba_enable(hba); if (err) { diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 10a228ca260e..d0882b1fde05 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -166,6 +166,9 @@ struct ufs_pm_lvl_states { * @lun: LUN of the command * @intr_cmd: Interrupt command (doesn't participate in interrupt aggregation) * @issue_time_stamp: time stamp for debug purposes + * @crypto_enable: whether or not the request needs inline crypto operations + * @crypto_key_slot: the key slot to use for inline crypto + * @data_unit_num: the data unit number for the first block for inline crypto * @req_abort_skip: skip request abort task flag */ struct ufshcd_lrb { @@ -189,6 +192,9 @@ struct ufshcd_lrb { u8 lun; /* UPIU LUN id field is only 8-bit wide */ bool intr_cmd; ktime_t issue_time_stamp; + bool crypto_enable; + u8 crypto_key_slot; + u64 data_unit_num; bool req_abort_skip; }; From a0dc8da519ccf2040af2dbbd6b4f688b50eb1755 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:29 -0700 Subject: [PATCH 2960/3715] BACKPORT: FROMLIST: fscrypt: add inline encryption support Add support for inline encryption to fs/crypto/. With "inline encryption", the block layer handles the decryption/encryption as part of the bio, instead of the filesystem doing the crypto itself via Linux's crypto API. This model is needed in order to take advantage of the inline encryption hardware present on most modern mobile SoCs. To use inline encryption, the filesystem needs to be mounted with '-o inlinecrypt'. The contents of any AES-256-XTS encrypted files will then be encrypted using blk-crypto, instead of using the traditional filesystem-layer crypto. fscrypt still provides the key and IV to use, and the actual ciphertext on-disk is still the same; therefore it's testable using the existing fscrypt ciphertext verification tests. Note that since blk-crypto has a fallack to Linux's crypto API, this feature is usable and testable even without actual inline encryption hardware. Per-filesystem changes will be needed to set encryption contexts when submitting bios and to implement the 'inlinecrypt' mount option. This patch just adds the common code. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I72e7e29db017404cdf7b5125718b5ba9590d31b4 Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-8-satyat@google.com/ --- fs/crypto/Kconfig | 6 + fs/crypto/Makefile | 1 + fs/crypto/bio.c | 31 ++- fs/crypto/fscrypt_private.h | 72 +++++++ fs/crypto/inline_crypt.c | 390 ++++++++++++++++++++++++++++++++++++ fs/crypto/keyring.c | 2 + fs/crypto/keysetup.c | 18 +- include/linux/fscrypt.h | 60 ++++++ 8 files changed, 566 insertions(+), 14 deletions(-) create mode 100644 fs/crypto/inline_crypt.c diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 4bc66f2c571e..0701bb90f99c 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -15,3 +15,9 @@ config FS_ENCRYPTION efficient since it avoids caching the encrypted and decrypted pages in the page cache. Currently Ext4, F2FS and UBIFS make use of this feature. + +config FS_ENCRYPTION_INLINE_CRYPT + bool "Enable fscrypt to use inline crypto" + depends on FS_ENCRYPTION && BLK_INLINE_ENCRYPTION + help + Enable fscrypt to use inline encryption hardware if available. diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 0a78543f6cec..1a6b0774f3ff 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -10,3 +10,4 @@ fscrypto-y := crypto.o \ policy.o fscrypto-$(CONFIG_BLOCK) += bio.o +fscrypto-$(CONFIG_FS_ENCRYPTION_INLINE_CRYPT) += inline_crypt.o diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 4a7f4d78ef90..9dcb57089f4d 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -46,26 +46,38 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, { const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; + const bool inlinecrypt = fscrypt_inode_uses_inline_crypto(inode); struct page *ciphertext_page; struct bio *bio; int ret, err = 0; - ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); - if (!ciphertext_page) - return -ENOMEM; + if (inlinecrypt) { + ciphertext_page = ZERO_PAGE(0); + } else { + ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); + if (!ciphertext_page) + return -ENOMEM; + } while (len--) { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - blocksize, 0, GFP_NOFS); - if (err) - goto errout; + if (!inlinecrypt) { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), ciphertext_page, + blocksize, 0, GFP_NOFS); + if (err) + goto errout; + } bio = bio_alloc(GFP_NOWAIT, 1); if (!bio) { err = -ENOMEM; goto errout; } + err = fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOIO); + if (err) { + bio_put(bio); + goto errout; + } bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -87,7 +99,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, } err = 0; errout: - fscrypt_free_bounce_page(ciphertext_page); + if (!inlinecrypt) + fscrypt_free_bounce_page(ciphertext_page); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 130b50e5a011..466fa2b038ec 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -13,6 +13,9 @@ #include #include +#include + +struct fscrypt_master_key; #define CONST_STRLEN(str) (sizeof(str) - 1) @@ -163,6 +166,14 @@ struct fscrypt_info { /* The actual crypto transform used for encryption and decryption */ struct crypto_skcipher *ci_ctfm; +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + /* + * The raw key for inline encryption, if this file is using inline + * encryption rather than the traditional filesystem layer encryption. + */ + const u8 *ci_inline_crypt_key; +#endif + /* True if the key should be freed when this fscrypt_info is freed */ bool ci_owns_key; @@ -293,6 +304,54 @@ extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); +/* inline_crypt.c */ +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT +extern bool fscrypt_should_use_inline_encryption(const struct fscrypt_info *ci); + +extern int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, + const u8 *derived_key); + +extern void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci); + +extern int fscrypt_setup_per_mode_inline_crypt_key( + struct fscrypt_info *ci, + struct fscrypt_master_key *mk); + +extern void fscrypt_evict_inline_crypt_keys(struct fscrypt_master_key *mk); + +#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + +static inline bool fscrypt_should_use_inline_encryption( + const struct fscrypt_info *ci) +{ + return false; +} + +static inline int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, + const u8 *derived_key) +{ + WARN_ON(1); + return -EOPNOTSUPP; +} + +static inline void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci) +{ +} + +static inline int fscrypt_setup_per_mode_inline_crypt_key( + struct fscrypt_info *ci, + struct fscrypt_master_key *mk) +{ + WARN_ON(1); + return -EOPNOTSUPP; +} + +static inline void fscrypt_evict_inline_crypt_keys( + struct fscrypt_master_key *mk) +{ +} +#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + /* keyring.c */ /* @@ -391,6 +450,16 @@ struct fscrypt_master_key { */ struct crypto_skcipher *mk_iv_ino_lblk_64_tfms[__FSCRYPT_MODE_MAX + 1]; +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + /* Raw keys for IV_INO_LBLK_64 policies, allocated on-demand */ + u8 *mk_iv_ino_lblk_64_raw_keys[__FSCRYPT_MODE_MAX + 1]; + + /* The data unit size being used for inline encryption */ + unsigned int mk_data_unit_size; + + /* The filesystem's block device */ + struct block_device *mk_bdev; +#endif } __randomize_layout; static inline bool @@ -445,9 +514,12 @@ struct fscrypt_mode { const char *cipher_str; int keysize; int ivsize; + enum blk_crypto_mode_num blk_crypto_mode; int logged_impl_name; }; +extern struct fscrypt_mode fscrypt_modes[]; + static inline bool fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) { diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c new file mode 100644 index 000000000000..e41c6d66ff0d --- /dev/null +++ b/fs/crypto/inline_crypt.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inline encryption support for fscrypt + * + * Copyright 2019 Google LLC + */ + +/* + * With "inline encryption", the block layer handles the decryption/encryption + * as part of the bio, instead of the filesystem doing the crypto itself via + * crypto API. See Documentation/block/inline-encryption.rst. fscrypt still + * provides the key and IV to use. + */ + +#include +#include +#include +#include + +#include "fscrypt_private.h" + +/* Return true iff inline encryption should be used for this file */ +bool fscrypt_should_use_inline_encryption(const struct fscrypt_info *ci) +{ + const struct inode *inode = ci->ci_inode; + struct super_block *sb = inode->i_sb; + + /* The file must need contents encryption, not filenames encryption */ + if (!S_ISREG(inode->i_mode)) + return false; + + /* blk-crypto must implement the needed encryption algorithm */ + if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + return false; + + /* DIRECT_KEY needs a 24+ byte IV, so it can't work with 8-byte DUNs */ + if (fscrypt_is_direct_key_policy(&ci->ci_policy)) + return false; + + /* The filesystem must be mounted with -o inlinecrypt */ + if (!sb->s_cop->inline_crypt_enabled || + !sb->s_cop->inline_crypt_enabled(sb)) + return false; + + return true; +} + +/* Set a per-file inline encryption key (for passing to blk-crypto) */ +int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, const u8 *derived_key) +{ + const struct fscrypt_mode *mode = ci->ci_mode; + const struct super_block *sb = ci->ci_inode->i_sb; + + ci->ci_inline_crypt_key = kmemdup(derived_key, mode->keysize, GFP_NOFS); + if (!ci->ci_inline_crypt_key) + return -ENOMEM; + ci->ci_owns_key = true; + + return blk_crypto_start_using_mode(mode->blk_crypto_mode, + sb->s_blocksize, + sb->s_bdev->bd_queue); +} + +/* Free a per-file inline encryption key and evict it from blk-crypto */ +void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci) +{ + if (ci->ci_inline_crypt_key != NULL) { + const struct fscrypt_mode *mode = ci->ci_mode; + const struct super_block *sb = ci->ci_inode->i_sb; + + blk_crypto_evict_key(sb->s_bdev->bd_queue, + ci->ci_inline_crypt_key, + mode->blk_crypto_mode, sb->s_blocksize); + kzfree(ci->ci_inline_crypt_key); + } +} + +/* + * Set up ->inline_crypt_key (for passing to blk-crypto) for inodes which use an + * IV_INO_LBLK_64 encryption policy. + * + * Return: 0 on success, -errno on failure + */ +int fscrypt_setup_per_mode_inline_crypt_key(struct fscrypt_info *ci, + struct fscrypt_master_key *mk) +{ + static DEFINE_MUTEX(inline_crypt_setup_mutex); + const struct super_block *sb = ci->ci_inode->i_sb; + struct block_device *bdev = sb->s_bdev; + const struct fscrypt_mode *mode = ci->ci_mode; + const u8 mode_num = mode - fscrypt_modes; + u8 *raw_key; + u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; + int err; + + if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) + return -EINVAL; + + /* pairs with smp_store_release() below */ + raw_key = smp_load_acquire(&mk->mk_iv_ino_lblk_64_raw_keys[mode_num]); + if (raw_key) { + err = 0; + goto out; + } + + mutex_lock(&inline_crypt_setup_mutex); + + raw_key = mk->mk_iv_ino_lblk_64_raw_keys[mode_num]; + if (raw_key) { + err = 0; + goto out_unlock; + } + + raw_key = kmalloc(mode->keysize, GFP_NOFS); + if (!raw_key) { + err = -ENOMEM; + goto out_unlock; + } + + BUILD_BUG_ON(sizeof(mode_num) != 1); + BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); + BUILD_BUG_ON(sizeof(hkdf_info) != 17); + hkdf_info[0] = mode_num; + memcpy(&hkdf_info[1], &sb->s_uuid, sizeof(sb->s_uuid)); + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + hkdf_info, sizeof(hkdf_info), + raw_key, mode->keysize); + if (err) + goto out_unlock; + + err = blk_crypto_start_using_mode(mode->blk_crypto_mode, + sb->s_blocksize, bdev->bd_queue); + if (err) + goto out_unlock; + + /* + * When a master key's first inline encryption key is set up, save a + * reference to the filesystem's block device so that the inline + * encryption keys can be evicted when the master key is destroyed. + */ + if (!mk->mk_bdev) { + mk->mk_bdev = bdgrab(bdev); + mk->mk_data_unit_size = sb->s_blocksize; + } + + /* pairs with smp_load_acquire() above */ + smp_store_release(&mk->mk_iv_ino_lblk_64_raw_keys[mode_num], raw_key); + err = 0; +out_unlock: + mutex_unlock(&inline_crypt_setup_mutex); +out: + if (err == 0) { + ci->ci_inline_crypt_key = raw_key; + /* + * Since each struct fscrypt_master_key belongs to a particular + * filesystem (a struct super_block), there should be only one + * block device, and only one data unit size as it should equal + * the filesystem's blocksize (i.e. s_blocksize). + */ + if (WARN_ON(mk->mk_bdev != bdev)) + err = -EINVAL; + if (WARN_ON(mk->mk_data_unit_size != sb->s_blocksize)) + err = -EINVAL; + } else { + kzfree(raw_key); + } + return err; +} + +/* + * Evict per-mode inline encryption keys from blk-crypto when a master key is + * destroyed. + */ +void fscrypt_evict_inline_crypt_keys(struct fscrypt_master_key *mk) +{ + struct block_device *bdev = mk->mk_bdev; + size_t i; + + if (!bdev) /* No inline encryption keys? */ + return; + + for (i = 0; i < ARRAY_SIZE(mk->mk_iv_ino_lblk_64_raw_keys); i++) { + u8 *raw_key = mk->mk_iv_ino_lblk_64_raw_keys[i]; + + if (raw_key != NULL) { + blk_crypto_evict_key(bdev->bd_queue, raw_key, + fscrypt_modes[i].blk_crypto_mode, + mk->mk_data_unit_size); + kzfree(raw_key); + } + } + bdput(bdev); +} + +/** + * fscrypt_inode_uses_inline_crypto - test whether an inode uses inline encryption + * @inode: an inode + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the block layer via blk-crypto rather + * than in the filesystem layer. + */ +bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && + inode->i_crypt_info->ci_inline_crypt_key != NULL; +} +EXPORT_SYMBOL_GPL(fscrypt_inode_uses_inline_crypto); + +/** + * fscrypt_inode_uses_fs_layer_crypto - test whether an inode uses fs-layer encryption + * @inode: an inode + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the filesystem layer rather than in the + * block layer via blk-crypto. + */ +bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && + inode->i_crypt_info->ci_inline_crypt_key == NULL; +} +EXPORT_SYMBOL_GPL(fscrypt_inode_uses_fs_layer_crypto); + +static inline u64 fscrypt_generate_dun(const struct fscrypt_info *ci, + u64 lblk_num) +{ + union fscrypt_iv iv; + + fscrypt_generate_iv(&iv, lblk_num, ci); + /* + * fscrypt_should_use_inline_encryption() ensures we never get here if + * more than the first 8 bytes of the IV are nonzero. + */ + BUG_ON(memchr_inv(&iv.raw[8], 0, ci->ci_mode->ivsize - 8)); + return le64_to_cpu(iv.lblk_num); +} + +/** + * fscrypt_set_bio_crypt_ctx - prepare a file contents bio for inline encryption + * @bio: a bio which will eventually be submitted to the file + * @inode: the file's inode + * @first_lblk: the first file logical block number in the I/O + * @gfp_mask: memory allocation flags + * + * If the contents of the file should be encrypted (or decrypted) with inline + * encryption, then assign the appropriate encryption context to the bio. + * + * Normally the bio should be newly allocated (i.e. no pages added yet), as + * otherwise fscrypt_mergeable_bio() won't work as intended. + * + * The encryption context will be freed automatically when the bio is freed. + * + * Return: 0 on success, -errno on failure. If __GFP_NOFAIL is specified, this + * is guaranteed to succeed. + */ +int fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) +{ + const struct fscrypt_info *ci = inode->i_crypt_info; + u64 dun; + + if (!fscrypt_inode_uses_inline_crypto(inode)) + return 0; + + dun = fscrypt_generate_dun(ci, first_lblk); + + return bio_crypt_set_ctx(bio, ci->ci_inline_crypt_key, + ci->ci_mode->blk_crypto_mode, + dun, inode->i_blkbits, gfp_mask); +} +EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx); + +/* Extract the inode and logical block number from a buffer_head. */ +static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh, + const struct inode **inode_ret, + u64 *lblk_num_ret) +{ + struct page *page = bh->b_page; + const struct address_space *mapping; + const struct inode *inode; + + /* + * The ext4 journal (jbd2) can submit a buffer_head it directly created + * for a non-pagecache page. fscrypt doesn't care about these. + */ + mapping = page_mapping(page); + if (!mapping) + return false; + inode = mapping->host; + + *inode_ret = inode; + *lblk_num_ret = ((u64)page->index << (PAGE_SHIFT - inode->i_blkbits)) + + (bh_offset(bh) >> inode->i_blkbits); + return true; +} + +/** + * fscrypt_set_bio_crypt_ctx_bh - prepare a file contents bio for inline encryption + * @bio: a bio which will eventually be submitted to the file + * @first_bh: the first buffer_head for which I/O will be submitted + * @gfp_mask: memory allocation flags + * + * Same as fscrypt_set_bio_crypt_ctx(), except this takes a buffer_head instead + * of an inode and block number directly. + * + * Return: 0 on success, -errno on failure + */ +int fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) +{ + const struct inode *inode; + u64 first_lblk; + + if (!bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk)) + return 0; + + return fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask); +} +EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); + +/** + * fscrypt_mergeable_bio - test whether data can be added to a bio + * @bio: the bio being built up + * @inode: the inode for the next part of the I/O + * @next_lblk: the next file logical block number in the I/O + * + * When building a bio which may contain data which should undergo inline + * encryption (or decryption) via fscrypt, filesystems should call this function + * to ensure that the resulting bio contains only logically contiguous data. + * This will return false if the next part of the I/O cannot be merged with the + * bio because either the encryption key would be different or the encryption + * data unit numbers would be discontiguous. + * + * fscrypt_set_bio_crypt_ctx() must have already been called on the bio. + * + * Return: true iff the I/O is mergeable + */ +bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, + u64 next_lblk) +{ + const struct bio_crypt_ctx *bc; + const u8 *next_key; + u64 next_dun; + + if (bio_has_crypt_ctx(bio) != fscrypt_inode_uses_inline_crypto(inode)) + return false; + if (!bio_has_crypt_ctx(bio)) + return true; + bc = bio->bi_crypt_context; + next_key = inode->i_crypt_info->ci_inline_crypt_key; + next_dun = fscrypt_generate_dun(inode->i_crypt_info, next_lblk); + + /* + * Comparing the key pointers is good enough, as all I/O for each key + * uses the same pointer. I.e., there's currently no need to support + * merging requests where the keys are the same but the pointers differ. + */ + return next_key == bc->raw_key && + next_dun == bc->data_unit_num + + (bio_sectors(bio) >> + (bc->data_unit_size_bits - SECTOR_SHIFT)); +} +EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio); + +/** + * fscrypt_mergeable_bio_bh - test whether data can be added to a bio + * @bio: the bio being built up + * @next_bh: the next buffer_head for which I/O will be submitted + * + * Same as fscrypt_mergeable_bio(), except this takes a buffer_head instead of + * an inode and block number directly. + * + * Return: true iff the I/O is mergeable + */ +bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh) +{ + const struct inode *inode; + u64 next_lblk; + + if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk)) + return !bio_has_crypt_ctx(bio); + + return fscrypt_mergeable_bio(bio, inode, next_lblk); +} +EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index cdc9c0c74878..48d3a0c9a415 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -48,6 +48,8 @@ static void free_master_key(struct fscrypt_master_key *mk) crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]); } + fscrypt_evict_inline_crypt_keys(mk); + key_put(mk->mk_users); kzfree(mk); } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 2f926d3e6b5d..309c23bf87e1 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -13,12 +13,13 @@ #include "fscrypt_private.h" -static struct fscrypt_mode available_modes[] = { +struct fscrypt_mode fscrypt_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, .ivsize = 16, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS, }, [FSCRYPT_MODE_AES_256_CTS] = { .friendly_name = "AES-256-CTS-CBC", @@ -51,10 +52,10 @@ select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { if (S_ISREG(inode->i_mode)) - return &available_modes[fscrypt_policy_contents_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - return &available_modes[fscrypt_policy_fnames_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)]; WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", inode->i_ino, (inode->i_mode & S_IFMT)); @@ -109,6 +110,9 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) { struct crypto_skcipher *tfm; + if (fscrypt_should_use_inline_encryption(ci)) + return fscrypt_set_inline_crypt_key(ci, derived_key); + tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode); if (IS_ERR(tfm)) return PTR_ERR(tfm); @@ -126,7 +130,7 @@ static int setup_per_mode_key(struct fscrypt_info *ci, const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; - u8 mode_num = mode - available_modes; + const u8 mode_num = mode - fscrypt_modes; struct crypto_skcipher *tfm, *prev_tfm; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; @@ -202,6 +206,8 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ + if (fscrypt_should_use_inline_encryption(ci)) + return fscrypt_setup_per_mode_inline_crypt_key(ci, mk); return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); @@ -328,8 +334,10 @@ static void put_crypt_info(struct fscrypt_info *ci) if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); - else if (ci->ci_owns_key) + else if (ci->ci_owns_key) { crypto_free_skcipher(ci->ci_ctfm); + fscrypt_free_inline_crypt_key(ci); + } key = ci->ci_master_key; if (key) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 81ec35256aed..6febaa570160 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -64,6 +64,7 @@ struct fscrypt_operations { bool (*has_stable_inodes)(struct super_block *sb); void (*get_ino_and_lblk_bits)(struct super_block *sb, int *ino_bits_ret, int *lblk_bits_ret); + bool (*inline_crypt_enabled)(struct super_block *sb); }; static inline bool fscrypt_has_encryption_key(const struct inode *inode) @@ -532,6 +533,65 @@ static inline const char *fscrypt_get_symlink(struct inode *inode, } #endif /* !CONFIG_FS_ENCRYPTION */ +/* inline_crypt.c */ +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT +extern bool fscrypt_inode_uses_inline_crypto(const struct inode *inode); + +extern bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode); + +extern int fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask); + +extern int fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask); + +extern bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, + u64 next_lblk); + +extern bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh); + +#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ +static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + +static inline int fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) +{ + return 0; +} + +static inline int fscrypt_set_bio_crypt_ctx_bh( + struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) +{ + return 0; +} + +static inline bool fscrypt_mergeable_bio(struct bio *bio, + const struct inode *inode, + u64 next_lblk) +{ + return true; +} + +static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh) +{ + return true; +} +#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + /** * fscrypt_require_key - require an inode's encryption key * @inode: the inode we need the key for From e64327f5719b4a41e0de341ead7d48ed73216a23 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Oct 2019 14:44:30 -0700 Subject: [PATCH 2961/3715] BACKPORT: FROMLIST: f2fs: add inline encryption support Wire up f2fs to support inline encryption via the helper functions which fs/crypto/ now provides. This includes: - Adding a mount option 'inlinecrypt' which enables inline encryption on encrypted files where it can be used. - Setting the bio_crypt_ctx on bios that will be submitted to an inline-encrypted file. - Not adding logically discontiguous data to bios that will be submitted to an inline-encrypted file. - Not doing filesystem-layer crypto on inline-encrypted files. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I2ea4da9e251e668b908408ecc091f09ebf8be8f4 Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-9-satyat@google.com/ --- fs/f2fs/data.c | 76 +++++++++++++++++++++++++++++++++++++++++++------ fs/f2fs/f2fs.h | 3 ++ fs/f2fs/super.c | 20 +++++++++++++ 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a1c57acefc78..bdd472783795 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -317,6 +317,35 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) return bio; } +static int f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + pgoff_t first_idx, + const struct f2fs_io_info *fio, + gfp_t gfp_mask) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (fio && fio->encrypted_page) + return 0; + + return fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); +} + +static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, + pgoff_t next_idx, + const struct f2fs_io_info *fio) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (fio && fio->encrypted_page) + return true; + + return fscrypt_mergeable_bio(bio, inode, next_idx); +} + static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { @@ -514,6 +543,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct bio *bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + int err; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? @@ -526,6 +556,13 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio, 1); + err = f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, GFP_NOIO); + if (err) { + bio_put(bio); + return err; + } + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -716,12 +753,17 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); - if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, - fio->new_blkaddr)) + if (bio && (!page_is_mergeable(fio->sbi, bio, *fio->last_block, + fio->new_blkaddr) || + !f2fs_crypt_mergeable_bio(bio, fio->page->mapping->host, + fio->page->index, fio))) f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, + GFP_NOIO | __GFP_NOFAIL); bio_set_op_attrs(bio, fio->op, fio->op_flags); add_bio_entry(fio->sbi, bio, page, fio->temp); @@ -773,8 +815,11 @@ next: inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio, - io->last_block_in_bio, fio->new_blkaddr)) + if (io->bio && + (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, + fio->new_blkaddr) || + !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, + fio->page->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { @@ -786,6 +831,9 @@ alloc_new: goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, + fio->page->index, fio, + GFP_NOIO | __GFP_NOFAIL); io->fio = *fio; } @@ -825,15 +873,23 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio *bio; struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; + int err; bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); + + err = f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); + if (err) { + bio_put(bio); + return ERR_PTR(err); + } + f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; bio_set_op_attrs(bio, REQ_OP_READ, op_flag); - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (f2fs_need_verity(inode, first_idx)) @@ -1870,8 +1926,9 @@ zero_out: * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio, - *last_block_in_bio, block_nr)) { + if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, + *last_block_in_bio, block_nr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { submit_and_realloc: __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; @@ -2011,6 +2068,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio) /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); + if (fscrypt_inode_uses_inline_crypto(inode)) + return 0; + retry_encrypt: fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, PAGE_SIZE, 0, @@ -2185,7 +2245,7 @@ got_it: f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); if (err) { - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3a17f3ba954d..2c181a4bec9d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -138,6 +138,9 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ +#ifdef CONFIG_FS_ENCRYPTION + bool inlinecrypt; /* inline encryption enabled */ +#endif block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6ed411850b1c..f3d0e9375e53 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -137,6 +137,7 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_inlinecrypt, Opt_checkpoint_disable, Opt_checkpoint_disable_cap, Opt_checkpoint_disable_cap_perc, @@ -199,6 +200,7 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_inlinecrypt, "inlinecrypt"}, {Opt_checkpoint_disable, "checkpoint=disable"}, {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, @@ -783,6 +785,13 @@ static int parse_options(struct super_block *sb, char *options) f2fs_info(sbi, "Test dummy encryption mode enabled"); #else f2fs_info(sbi, "Test dummy encryption mount option ignored"); +#endif + break; + case Opt_inlinecrypt: +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + F2FS_OPTION(sbi).inlinecrypt = true; +#else + f2fs_info(sbi, "inline encryption not supported"); #endif break; case Opt_checkpoint_disable_cap_perc: @@ -1454,6 +1463,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_FS_ENCRYPTION if (F2FS_OPTION(sbi).test_dummy_encryption) seq_puts(seq, ",test_dummy_encryption"); + if (F2FS_OPTION(sbi).inlinecrypt) + seq_puts(seq, ",inlinecrypt"); #endif if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) @@ -1482,6 +1493,9 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).test_dummy_encryption = false; +#ifdef CONFIG_FS_ENCRYPTION + F2FS_OPTION(sbi).inlinecrypt = false; +#endif F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); @@ -2336,6 +2350,11 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, *lblk_bits_ret = 8 * sizeof(block_t); } +static bool f2fs_inline_crypt_enabled(struct super_block *sb) +{ + return F2FS_OPTION(F2FS_SB(sb)).inlinecrypt; +} + static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, @@ -2345,6 +2364,7 @@ static const struct fscrypt_operations f2fs_cryptops = { .max_namelen = F2FS_NAME_LEN, .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, + .inline_crypt_enabled = f2fs_inline_crypt_enabled, }; #endif From b8f7b236748261bec545b69b39d7fb75e519f4ed Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Oct 2019 14:44:31 -0700 Subject: [PATCH 2962/3715] BACKPORT: FROMLIST: ext4: add inline encryption support Wire up ext4 to support inline encryption via the helper functions which fs/crypto/ now provides. This includes: - Adding a mount option 'inlinecrypt' which enables inline encryption on encrypted files where it can be used. - Setting the bio_crypt_ctx on bios that will be submitted to an inline-encrypted file. Note: submit_bh_wbc() in fs/buffer.c also needed to be patched for this part, since ext4 sometimes uses ll_rw_block() on file data. - Not adding logically discontiguous data to bios that will be submitted to an inline-encrypted file. - Not doing filesystem-layer crypto on inline-encrypted files. Bug: 137270441 Test: tested as series; see Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Change-Id: I73dac46ff1eba56a13975c387b20554416ddbad8 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala Link: https://lore.kernel.org/linux-fscrypt/20191028072032.6911-10-satyat@google.com/ --- fs/buffer.c | 3 +++ fs/ext4/ext4.h | 1 + fs/ext4/inode.c | 4 ++-- fs/ext4/page-io.c | 11 +++++++++-- fs/ext4/readpage.c | 15 ++++++++++++--- fs/ext4/super.c | 13 +++++++++++++ 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index bdca7b10e239..30315edcfd91 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -46,6 +46,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, @@ -3125,6 +3126,8 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO | __GFP_NOFAIL); + if (wbc) { wbc_init_bio(wbc, bio); wbc_account_io(wbc, bh->b_page, bh->b_size); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b993e4df016a..9decc3e73ca4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1155,6 +1155,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_INLINECRYPT 0x4000000 /* Inline encryption support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e2b466fd7f3a..0aefed560d91 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1232,7 +1232,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, (block_start < from || block_end > to)) { ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++ = bh; - decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); + decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); } } /* @@ -4061,7 +4061,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) goto unlock; - if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) { + if (fscrypt_inode_uses_fs_layer_crypto(inode)) { /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b028265a1fbb..9d547bace7f2 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -362,10 +362,16 @@ static int io_submit_init_bio(struct ext4_io_submit *io, struct buffer_head *bh) { struct bio *bio; + int err; bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; + err = fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); + if (err) { + bio_put(bio); + return err; + } wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); @@ -383,7 +389,8 @@ static int io_submit_add_bh(struct ext4_io_submit *io, { int ret; - if (io->io_bio && bh->b_blocknr != io->io_next_block) { + if (io->io_bio && (bh->b_blocknr != io->io_next_block || + !fscrypt_mergeable_bio_bh(io->io_bio, bh))) { submit_and_retry: ext4_io_submit(io); } @@ -469,7 +476,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, bh = head = page_buffers(page); - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) { + if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; retry_encrypt: diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index f74dead3fcc0..a92409f1d0ae 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -198,7 +198,7 @@ static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode, unsigned int post_read_steps = 0; struct bio_post_read_ctx *ctx = NULL; - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (ext4_need_verity(inode, first_idx)) @@ -259,6 +259,7 @@ int ext4_mpage_readpages(struct address_space *mapping, const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; + sector_t next_block; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -290,7 +291,8 @@ int ext4_mpage_readpages(struct address_space *mapping, if (page_has_buffers(page)) goto confused; - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); + block_in_file = next_block = + (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; last_block_in_file = (ext4_readpage_limit(inode) + blocksize - 1) >> blkbits; @@ -390,7 +392,8 @@ int ext4_mpage_readpages(struct address_space *mapping, * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && (last_block_in_bio != blocks[0] - 1)) { + if (bio && (last_block_in_bio != blocks[0] - 1 || + !fscrypt_mergeable_bio(bio, inode, next_block))) { submit_and_realloc: ext4_submit_bio_read(bio); bio = NULL; @@ -402,6 +405,12 @@ int ext4_mpage_readpages(struct address_space *mapping, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) goto set_error_page; + if (fscrypt_set_bio_crypt_ctx(bio, inode, next_block, + GFP_KERNEL) != 0) { + bio_put(bio); + bio = NULL; + goto set_error_page; + } ctx = get_bio_post_read_ctx(inode, bio, page->index); if (IS_ERR(ctx)) { bio_put(bio); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 96235d0cc01c..25fe536638d0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1297,6 +1297,11 @@ static void ext4_get_ino_and_lblk_bits(struct super_block *sb, *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); } +static bool ext4_inline_crypt_enabled(struct super_block *sb) +{ + return test_opt(sb, INLINECRYPT); +} + static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, @@ -1306,6 +1311,7 @@ static const struct fscrypt_operations ext4_cryptops = { .max_namelen = EXT4_NAME_LEN, .has_stable_inodes = ext4_has_stable_inodes, .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, + .inline_crypt_enabled = ext4_inline_crypt_enabled, }; #endif @@ -1400,6 +1406,7 @@ enum { Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, + Opt_inlinecrypt, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, @@ -1493,6 +1500,7 @@ static const match_table_t tokens = { {Opt_noinit_itable, "noinit_itable"}, {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_inlinecrypt, "inlinecrypt"}, {Opt_nombcache, "nombcache"}, {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ {Opt_removed, "check=none"}, /* mount option from ext2/3 */ @@ -1702,6 +1710,11 @@ static const struct mount_opts { {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, {Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_test_dummy_encryption, 0, MOPT_GTE0}, +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_SET}, +#else + {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_NOSUPPORT}, +#endif {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_err, 0, 0} }; From a69516d0913e7f2c9bdde17c2ea6a793bb474830 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Mon, 27 Jan 2020 17:36:43 -0800 Subject: [PATCH 2963/3715] ANDROID: cuttlefish_defconfig: enable inline encryption enable CONFIG_BLK_INLINE_ENCRYPTION and CONFIG_FS_ENCRYPTION_INLINE_CRYPT Bug: 137270441 Test: Test cuttlefish boots both with and without inlinecrypt mount option specified in fstab, while using both F2FS and EXT4 for userdata.img. Also tested by running gce-xfstests on both the auto and encrypt test groups on EXT4 and F2FS both with and without the inlinecrypt mount option. The UFS changes were tested on a Pixel 4 device. Change-Id: Ie1b77f7615d6a7a60fdc9105c7ab2200d17636a8 Signed-off-by: Satya Tangirala --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 930d43759d06..99cb9b35a849 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -47,6 +47,7 @@ CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_PCI=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PREEMPT=y @@ -423,6 +424,7 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y CONFIG_FS_VERITY=y CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y # CONFIG_DNOTIFY is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 81950eb19c4f..a0434800549d 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -41,6 +41,7 @@ CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_PARTITION_ADVANCED=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -437,6 +438,7 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y CONFIG_FS_VERITY=y CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_QUOTA=y From dae9899044f320bb119e02b45d816a493b1488ae Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Wed, 8 May 2019 03:44:29 -0700 Subject: [PATCH 2964/3715] ANDROID: scsi: ufs: UFS crypto variant operations API Introduce UFS crypto variant operations to handle quirks in individual UFS inline encryption hardware. We also expose a default implementation for crypto operations that conforms to the UFSHCI v2.1 specification, so that any user of crypto variant operations can fall back on the default implementation whenever there aren't any special quirks to handle. Bug: 137270441 Change-Id: I8bd9bced66b50cfdd0483bbc3e999b00c0f11386 Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufshcd-crypto.c | 152 +++++++++++++++++++++++++++++-- drivers/scsi/ufs/ufshcd-crypto.h | 83 +++++++++++++++-- drivers/scsi/ufs/ufshcd.c | 48 ++++------ drivers/scsi/ufs/ufshcd.h | 23 +++++ 4 files changed, 256 insertions(+), 50 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 3900a07a7e9b..988d8df8f394 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -34,8 +34,8 @@ static size_t get_keysize_bytes(enum ufs_crypto_key_size size) } static int ufshcd_crypto_cap_find(void *hba_p, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) { struct ufs_hba *hba = hba_p; enum ufs_crypto_alg ufs_alg; @@ -265,7 +265,8 @@ static bool ufshcd_crypto_mode_supported(void *hba_p, return ufshcd_crypto_cap_find(hba_p, crypto_mode, data_unit_size) >= 0; } -void ufshcd_crypto_enable(struct ufs_hba *hba) +/* Functions implementing UFSHCI v2.1 specification behaviour */ +void ufshcd_crypto_enable_spec(struct ufs_hba *hba) { union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; int slot; @@ -281,11 +282,13 @@ void ufshcd_crypto_enable(struct ufs_hba *hba) for (slot = 0; slot < NUM_KEYSLOTS(hba); slot++) program_key(hba, &cfg_arr[slot], slot); } +EXPORT_SYMBOL(ufshcd_crypto_enable_spec); -void ufshcd_crypto_disable(struct ufs_hba *hba) +void ufshcd_crypto_disable_spec(struct ufs_hba *hba) { hba->caps &= ~UFSHCD_CAP_CRYPTO; } +EXPORT_SYMBOL(ufshcd_crypto_disable_spec); static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { .keyslot_program = ufshcd_crypto_keyslot_program, @@ -301,7 +304,8 @@ static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { * Returns 0 on success. Returns -ENODEV if such capabilities don't exist, and * -ENOMEM upon OOM. */ -int ufshcd_hba_init_crypto(struct ufs_hba *hba) +int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops) { int cap_idx = 0; int err = 0; @@ -355,8 +359,7 @@ int ufshcd_hba_init_crypto(struct ufs_hba *hba) cap_idx * sizeof(__le32))); } - hba->ksm = keyslot_manager_create(NUM_KEYSLOTS(hba), &ufshcd_ksm_ops, - hba); + hba->ksm = keyslot_manager_create(NUM_KEYSLOTS(hba), ksm_ops, hba); if (!hba->ksm) { err = -ENOMEM; @@ -374,18 +377,147 @@ out: hba->crypto_capabilities.reg_val = 0; return err; } +EXPORT_SYMBOL(ufshcd_hba_init_crypto_spec); -void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, - struct request_queue *q) +void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q) { if (!ufshcd_hba_is_crypto_supported(hba) || !q) return; q->ksm = hba->ksm; } +EXPORT_SYMBOL(ufshcd_crypto_setup_rq_keyslot_manager_spec); + +void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q) +{ + keyslot_manager_destroy(hba->ksm); +} +EXPORT_SYMBOL(ufshcd_crypto_destroy_rq_keyslot_manager_spec); + +int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + int key_slot; + + if (!cmd->request->bio || + !bio_crypt_should_process(cmd->request->bio, cmd->request->q)) { + lrbp->crypto_enable = false; + return 0; + } + + if (WARN_ON(!ufshcd_is_crypto_enabled(hba))) { + /* + * Upper layer asked us to do inline encryption + * but that isn't enabled, so we fail this request. + */ + return -EINVAL; + } + key_slot = bio_crypt_get_keyslot(cmd->request->bio); + if (!ufshcd_keyslot_valid(hba, key_slot)) + return -EINVAL; + + lrbp->crypto_enable = true; + lrbp->crypto_key_slot = key_slot; + lrbp->data_unit_num = bio_crypt_data_unit_num(cmd->request->bio); + + return 0; +} +EXPORT_SYMBOL(ufshcd_prepare_lrbp_crypto_spec); + +/* Crypto Variant Ops Support */ + +void ufshcd_crypto_enable(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->enable) + return hba->crypto_vops->enable(hba); + + return ufshcd_crypto_enable_spec(hba); +} + +void ufshcd_crypto_disable(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->disable) + return hba->crypto_vops->disable(hba); + + return ufshcd_crypto_disable_spec(hba); +} + +int ufshcd_hba_init_crypto(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->hba_init_crypto) + return hba->crypto_vops->hba_init_crypto(hba, + &ufshcd_ksm_ops); + + return ufshcd_hba_init_crypto_spec(hba, &ufshcd_ksm_ops); +} + +void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) +{ + if (hba->crypto_vops && hba->crypto_vops->setup_rq_keyslot_manager) + return hba->crypto_vops->setup_rq_keyslot_manager(hba, q); + + return ufshcd_crypto_setup_rq_keyslot_manager_spec(hba, q); +} void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, struct request_queue *q) { - keyslot_manager_destroy(hba->ksm); + if (hba->crypto_vops && hba->crypto_vops->destroy_rq_keyslot_manager) + return hba->crypto_vops->destroy_rq_keyslot_manager(hba, q); + + return ufshcd_crypto_destroy_rq_keyslot_manager_spec(hba, q); +} + +int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + if (hba->crypto_vops && hba->crypto_vops->prepare_lrbp_crypto) + return hba->crypto_vops->prepare_lrbp_crypto(hba, cmd, lrbp); + + return ufshcd_prepare_lrbp_crypto_spec(hba, cmd, lrbp); +} + +int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + if (hba->crypto_vops && hba->crypto_vops->complete_lrbp_crypto) + return hba->crypto_vops->complete_lrbp_crypto(hba, cmd, lrbp); + + return 0; +} + +void ufshcd_crypto_debug(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->debug) + hba->crypto_vops->debug(hba); +} + +int ufshcd_crypto_suspend(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + if (hba->crypto_vops && hba->crypto_vops->suspend) + return hba->crypto_vops->suspend(hba, pm_op); + + return 0; +} + +int ufshcd_crypto_resume(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + if (hba->crypto_vops && hba->crypto_vops->resume) + return hba->crypto_vops->resume(hba, pm_op); + + return 0; +} + +void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops) +{ + hba->crypto_vops = crypto_vops; } diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h index 73ddc8e493fb..3c03d0e23e87 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.h +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -6,11 +6,9 @@ #ifndef _UFSHCD_CRYPTO_H #define _UFSHCD_CRYPTO_H -struct ufs_hba; - #ifdef CONFIG_SCSI_UFS_CRYPTO #include - +#include "ufshcd.h" #include "ufshci.h" #define NUM_KEYSLOTS(hba) (hba->crypto_capabilities.config_count + 1) @@ -34,6 +32,26 @@ static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) return hba->caps & UFSHCD_CAP_CRYPTO; } +/* Functions implementing UFSHCI v2.1 specification behaviour */ +int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +void ufshcd_crypto_enable_spec(struct ufs_hba *hba); + +void ufshcd_crypto_disable_spec(struct ufs_hba *hba); + +struct keyslot_mgmt_ll_ops; +int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops); + +void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q); + +void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q); + +/* Crypto Variant Ops Support */ void ufshcd_crypto_enable(struct ufs_hba *hba); void ufshcd_crypto_disable(struct ufs_hba *hba); @@ -46,6 +64,23 @@ void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, struct request_queue *q); +int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +void ufshcd_crypto_debug(struct ufs_hba *hba); + +int ufshcd_crypto_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op); + +int ufshcd_crypto_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op); + +void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops); + #else /* CONFIG_SCSI_UFS_CRYPTO */ static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, @@ -73,13 +108,43 @@ static inline int ufshcd_hba_init_crypto(struct ufs_hba *hba) return 0; } -static inline void ufshcd_crypto_setup_rq_keyslot_manager( - struct ufs_hba *hba, - struct request_queue *q) { } +static inline void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) { } -static inline void ufshcd_crypto_destroy_rq_keyslot_manager( - struct ufs_hba *hba, - struct request_queue *q) { } +static inline void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) { } + +static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + lrbp->crypto_enable = false; + return 0; +} + +static inline int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + return 0; +} + +static inline void ufshcd_crypto_debug(struct ufs_hba *hba) { } + +static inline int ufshcd_crypto_suspend(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + return 0; +} + +static inline int ufshcd_crypto_resume(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + return 0; +} + +static inline void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops) { } #endif /* CONFIG_SCSI_UFS_CRYPTO */ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 765cb331ccb7..8253a3ee6148 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -372,6 +372,8 @@ static void ufshcd_print_host_regs(struct ufs_hba *hba) if (hba->vops && hba->vops->dbg_register_dump) hba->vops->dbg_register_dump(hba); + + ufshcd_crypto_debug(hba); } static @@ -2285,37 +2287,6 @@ static inline u16 ufshcd_upiu_wlun_to_scsi_wlun(u8 upiu_wlun_id) return (upiu_wlun_id & ~UFS_UPIU_WLUN_ID) | SCSI_W_LUN_BASE; } -static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, - struct scsi_cmnd *cmd, - struct ufshcd_lrb *lrbp) -{ - int key_slot; - - if (!cmd->request->bio || - !bio_crypt_should_process(cmd->request->bio, cmd->request->q)) { - lrbp->crypto_enable = false; - return 0; - } - - if (WARN_ON(!ufshcd_is_crypto_enabled(hba))) { - /* - * Upper layer asked us to do inline encryption - * but that isn't enabled, so we fail this request. - */ - return -EINVAL; - } - key_slot = bio_crypt_get_keyslot(cmd->request->bio); - if (!ufshcd_keyslot_valid(hba, key_slot)) - return -EINVAL; - - lrbp->crypto_enable = true; - lrbp->crypto_key_slot = key_slot; - lrbp->data_unit_num = bio_crypt_data_unit_num(cmd->request->bio); - - return 0; -} - - /** * ufshcd_queuecommand - main entry point for SCSI requests * @cmd: command from SCSI Midlayer @@ -4692,6 +4663,7 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, result = ufshcd_transfer_rsp_status(hba, lrbp); scsi_dma_unmap(cmd); cmd->result = result; + ufshcd_complete_lrbp_crypto(hba, cmd, lrbp); /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; clear_bit_unlock(index, &hba->lrb_in_use); @@ -7330,6 +7302,10 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) req_link_state = UIC_LINK_OFF_STATE; } + ret = ufshcd_crypto_suspend(hba, pm_op); + if (ret) + goto out; + /* * If we can't transition into any of the low power modes * just gate the clocks. @@ -7433,6 +7409,7 @@ enable_gating: ufshcd_resume_clkscaling(hba); hba->clk_gating.is_suspended = false; ufshcd_release(hba); + ufshcd_crypto_resume(hba, pm_op); out: hba->pm_op_in_progress = 0; return ret; @@ -7452,9 +7429,11 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) { int ret; enum uic_link_state old_link_state; + enum ufs_dev_pwr_mode old_pwr_mode; hba->pm_op_in_progress = 1; old_link_state = hba->uic_link_state; + old_pwr_mode = hba->curr_dev_pwr_mode; ufshcd_hba_vreg_set_hpm(hba); /* Make sure clocks are enabled before accessing controller */ @@ -7502,6 +7481,10 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) goto set_old_link_state; } + ret = ufshcd_crypto_resume(hba, pm_op); + if (ret) + goto set_old_dev_pwr_mode; + if (ufshcd_keep_autobkops_enabled_except_suspend(hba)) ufshcd_enable_auto_bkops(hba); else @@ -7520,6 +7503,9 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_release(hba); goto out; +set_old_dev_pwr_mode: + if (old_pwr_mode != hba->curr_dev_pwr_mode) + ufshcd_set_dev_pwr_mode(hba, old_pwr_mode); set_old_link_state: ufshcd_link_state_transition(hba, old_link_state, 0); vendor_suspend: diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index d0882b1fde05..ad24d4aed313 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -331,6 +331,28 @@ struct ufs_hba_variant_ops { int (*phy_initialization)(struct ufs_hba *); }; +struct keyslot_mgmt_ll_ops; +struct ufs_hba_crypto_variant_ops { + void (*setup_rq_keyslot_manager)(struct ufs_hba *hba, + struct request_queue *q); + void (*destroy_rq_keyslot_manager)(struct ufs_hba *hba, + struct request_queue *q); + int (*hba_init_crypto)(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops); + void (*enable)(struct ufs_hba *hba); + void (*disable)(struct ufs_hba *hba); + int (*suspend)(struct ufs_hba *hba, enum ufs_pm_op pm_op); + int (*resume)(struct ufs_hba *hba, enum ufs_pm_op pm_op); + int (*debug)(struct ufs_hba *hba); + int (*prepare_lrbp_crypto)(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + int (*complete_lrbp_crypto)(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + void *priv; +}; + /* clock gating state */ enum clk_gating_state { CLKS_OFF, @@ -551,6 +573,7 @@ struct ufs_hba { u32 ufs_version; struct ufs_hba_variant_ops *vops; void *priv; + const struct ufs_hba_crypto_variant_ops *crypto_vops; unsigned int irq; bool is_irq_enabled; From 8e8f55d1a7e865562d2e3e022a7fcf13753a9c8e Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 7 Nov 2019 21:16:46 -0800 Subject: [PATCH 2965/3715] ANDROID: scsi: ufs: UFS init should not require inline crypto UFS initialization should carry on even if inline crypto support is absent, instead of just erroring out. Bug: 137270441 Change-Id: I4a508640f803dc8aaff1033b5e1d5c229a0b03de Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufshcd-crypto.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 988d8df8f394..7599d77725e4 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -301,8 +301,7 @@ static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { * ufshcd_hba_init_crypto - Read crypto capabilities, init crypto fields in hba * @hba: Per adapter instance * - * Returns 0 on success. Returns -ENODEV if such capabilities don't exist, and - * -ENOMEM upon OOM. + * Return: 0 if crypto was initialized or is not supported, else a -errno value. */ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, const struct keyslot_mgmt_ll_ops *ksm_ops) @@ -313,10 +312,9 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, /* Default to disabling crypto */ hba->caps &= ~UFSHCD_CAP_CRYPTO; - if (!(hba->capabilities & MASK_CRYPTO_SUPPORT)) { - err = -ENODEV; + /* Return 0 if crypto support isn't present */ + if (!(hba->capabilities & MASK_CRYPTO_SUPPORT)) goto out; - } /* * Crypto Capabilities should never be 0, because the @@ -372,7 +370,6 @@ out_free_crypto_cfgs: out_free_cfg_mem: devm_kfree(hba->dev, hba->crypto_cap_array); out: - // TODO: print error? /* Indicate that init failed by setting crypto_capabilities to 0 */ hba->crypto_capabilities.reg_val = 0; return err; From e12563c18d484e6379d03105b4565db7bb3a7975 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Tue, 17 Dec 2019 14:26:29 -0800 Subject: [PATCH 2966/3715] BACKPORT: FROMLIST: Update Inline Encryption from v5 to v6 of patch series Changes v5 => v6: - Blk-crypto's kernel crypto API fallback is no longer restricted to 8-byte DUNs. It's also now separately configurable from blk-crypto, and can be disabled entirely, while still allowing the kernel to use inline encryption hardware. Further, struct bio_crypt_ctx takes up less space, and no longer contains the information needed by the crypto API fallback - the fallback allocates the required memory when necessary. - Blk-crypto now supports all file content encryption modes supported by fscrypt. - Fixed bio merging logic in blk-merge.c - Fscrypt now supports inline encryption with the direct key policy, since blk-crypto now has support for larger DUNs. - Keyslot manager now uses a hashtable to lookup which keyslot contains any particular key (thanks Eric!) - Fscrypt support for inline encryption now handles filesystems with multiple underlying block devices (thanks Eric!) - Numerous cleanups Backport notes: In the time between the update from v5 to v6, "scsi: ufs: override auto suspend tunables for ufs" was merged in upstream, and as a result, UFSHCD_CAP_RPM_AUTOSUSPEND took up the 7th bit in the ufs crypto caps - however, that patch has not been backported to 4.14 yet, so we manually change UFSHCD_CAP_CRYPTO to use the 8th bit (to reflect what's in v6 in android-mainline). Bug: 137270441 Test: refer to I26376479ee38259b8c35732cb3a1d7e15f9b05a3 Change-Id: I13e2e327e0b4784b394cb1e7cf32a04856d95f01 Link: https://lore.kernel.org/linux-block/20191218145136.172774-1-satyat@google.com/ Signed-off-by: Satya Tangirala --- Documentation/block/inline-encryption.rst | 8 +- block/Kconfig | 15 +- block/Makefile | 1 + block/bio-crypt-ctx.c | 117 ++-- block/bio.c | 13 +- block/blk-core.c | 4 +- block/blk-crypto-fallback.c | 647 +++++++++++++++++++ block/blk-crypto-internal.h | 58 ++ block/blk-crypto.c | 747 +++------------------- block/blk-merge.c | 38 +- block/keyslot-manager.c | 256 +++++--- drivers/md/dm.c | 14 +- drivers/scsi/ufs/ufshcd-crypto.c | 235 +++---- drivers/scsi/ufs/ufshcd-crypto.h | 18 +- drivers/scsi/ufs/ufshcd.c | 12 +- drivers/scsi/ufs/ufshcd.h | 6 +- fs/buffer.c | 2 +- fs/crypto/bio.c | 7 +- fs/crypto/crypto.c | 2 +- fs/crypto/fname.c | 4 +- fs/crypto/fscrypt_private.h | 136 ++-- fs/crypto/inline_crypt.c | 302 ++++----- fs/crypto/keyring.c | 6 +- fs/crypto/keysetup.c | 102 +-- fs/crypto/keysetup_v1.c | 16 +- fs/ext4/page-io.c | 7 +- fs/ext4/readpage.c | 8 +- fs/f2fs/data.c | 30 +- fs/f2fs/super.c | 21 + include/linux/bio-crypt-ctx.h | 291 ++++----- include/linux/blk-crypto.h | 39 +- include/linux/fscrypt.h | 34 +- include/linux/keyslot-manager.h | 94 +-- 33 files changed, 1686 insertions(+), 1604 deletions(-) create mode 100644 block/blk-crypto-fallback.c create mode 100644 block/blk-crypto-internal.h diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index 202826cee95e..330106b23c09 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -97,7 +97,7 @@ Blk-crypto ensures that: - The bio's encryption context is programmed into a keyslot in the KSM of the request queue that the bio is being submitted to (or the crypto API fallback - KSM if the request queue doesn't have a KSM), and that the ``processing_ksm`` + KSM if the request queue doesn't have a KSM), and that the ``bc_ksm`` in the ``bi_crypt_context`` is set to this KSM - That the bio has its own individual reference to the keyslot in this KSM. @@ -107,7 +107,7 @@ Blk-crypto ensures that: ensuring that the bio has a valid reference to the keyslot when, for e.g., the crypto API fallback KSM in blk-crypto performs crypto on the device's behalf. The individual references are ensured by increasing the refcount for the - keyslot in the ``processing_ksm`` when a bio with a programmed encryption + keyslot in the ``bc_ksm`` when a bio with a programmed encryption context is cloned. @@ -120,7 +120,7 @@ been programmed into any keyslot in any KSM (for e.g. a bio from the FS). request queue the bio is being submitted to (and if this KSM does not exist, then it will program it into blk-crypto's internal KSM for crypto API fallback). The KSM that this encryption context was programmed into is stored - as the ``processing_ksm`` in the bio's ``bi_crypt_context``. + as the ``bc_ksm`` in the bio's ``bi_crypt_context``. **Case 2:** blk-crypto is given a bio whose encryption context has already been programmed into a keyslot in the *crypto API fallback* KSM. @@ -138,7 +138,7 @@ KSM). This way, when a device driver is processing a bio, it can be sure that the bio's encryption context has been programmed into some KSM (either the device driver's request queue's KSM, or blk-crypto's crypto API fallback KSM). -It then simply needs to check if the bio's processing_ksm is the device's +It then simply needs to check if the bio's ``bc_ksm`` is the device's request queue's KSM. If so, then it should proceed with IE. If not, it should simply do nothing with respect to crypto, because some other KSM (perhaps the blk-crypto crypto API fallback KSM) is handling the en/decryption. diff --git a/block/Kconfig b/block/Kconfig index c4334605377f..4d9bcb951d83 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -186,13 +186,20 @@ config BLK_SED_OPAL config BLK_INLINE_ENCRYPTION bool "Enable inline encryption support in block layer" + help + Build the blk-crypto subsystem. Enabling this lets the + block layer handle encryption, so users can take + advantage of inline encryption hardware if present. + +config BLK_INLINE_ENCRYPTION_FALLBACK + bool "Enable crypto API fallback for blk-crypto" + depends on BLK_INLINE_ENCRYPTION select CRYPTO select CRYPTO_BLKCIPHER help - Build the blk-crypto subsystem. - Enabling this lets the block layer handle encryption, - so users can take advantage of inline encryption - hardware if present. + Enabling this lets the block layer handle inline encryption + by falling back to the kernel crypto API when inline + encryption hardware is not present. menu "Partition Types" diff --git a/block/Makefile b/block/Makefile index a2618d3d1df4..ab14055d8222 100644 --- a/block/Makefile +++ b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o bio-crypt-ctx.o \ blk-crypto.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o \ No newline at end of file diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index 0f7641b875e9..75982dabc7a3 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -5,26 +5,43 @@ #include #include -#include #include +#include +#include + +#include "blk-crypto-internal.h" static int num_prealloc_crypt_ctxs = 128; + +module_param(num_prealloc_crypt_ctxs, int, 0444); +MODULE_PARM_DESC(num_prealloc_crypt_ctxs, + "Number of bio crypto contexts to preallocate"); + static struct kmem_cache *bio_crypt_ctx_cache; static mempool_t *bio_crypt_ctx_pool; -int bio_crypt_ctx_init(void) +int __init bio_crypt_ctx_init(void) { + size_t i; + bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0); if (!bio_crypt_ctx_cache) return -ENOMEM; - bio_crypt_ctx_pool = mempool_create_slab_pool( - num_prealloc_crypt_ctxs, - bio_crypt_ctx_cache); - + bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs, + bio_crypt_ctx_cache); if (!bio_crypt_ctx_pool) return -ENOMEM; + /* This is assumed in various places. */ + BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0); + + /* Sanity check that no algorithm exceeds the defined limits. */ + for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) { + BUG_ON(blk_crypto_modes[i].keysize > BLK_CRYPTO_MAX_KEY_SIZE); + BUG_ON(blk_crypto_modes[i].ivsize > BLK_CRYPTO_MAX_IV_SIZE); + } + return 0; } @@ -32,51 +49,43 @@ struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask) { return mempool_alloc(bio_crypt_ctx_pool, gfp_mask); } -EXPORT_SYMBOL(bio_crypt_alloc_ctx); void bio_crypt_free_ctx(struct bio *bio) { mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool); bio->bi_crypt_context = NULL; } -EXPORT_SYMBOL(bio_crypt_free_ctx); -int bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) { + const struct bio_crypt_ctx *src_bc = src->bi_crypt_context; + /* - * If a bio is swhandled, then it will be decrypted when bio_endio - * is called. As we only want the data to be decrypted once, copies - * of the bio must not have have a crypt context. + * If a bio is fallback_crypted, then it will be decrypted when + * bio_endio is called. As we only want the data to be decrypted once, + * copies of the bio must not have have a crypt context. */ - if (!bio_has_crypt_ctx(src) || bio_crypt_swhandled(src)) - return 0; + if (!src_bc || bio_crypt_fallback_crypted(src_bc)) + return; dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask); - if (!dst->bi_crypt_context) - return -ENOMEM; + *dst->bi_crypt_context = *src_bc; - *dst->bi_crypt_context = *src->bi_crypt_context; - - if (bio_crypt_has_keyslot(src)) - keyslot_manager_get_slot(src->bi_crypt_context->processing_ksm, - src->bi_crypt_context->keyslot); - - return 0; + if (src_bc->bc_keyslot >= 0) + keyslot_manager_get_slot(src_bc->bc_ksm, src_bc->bc_keyslot); } -EXPORT_SYMBOL(bio_crypt_clone); +EXPORT_SYMBOL_GPL(bio_crypt_clone); -bool bio_crypt_should_process(struct bio *bio, struct request_queue *q) +bool bio_crypt_should_process(struct request *rq) { - if (!bio_has_crypt_ctx(bio)) + struct bio *bio = rq->bio; + + if (!bio || !bio->bi_crypt_context) return false; - if (q->ksm != bio->bi_crypt_context->processing_ksm) - return false; - - WARN_ON(!bio_crypt_has_keyslot(bio)); - return true; + return rq->q->ksm == bio->bi_crypt_context->bc_ksm; } -EXPORT_SYMBOL(bio_crypt_should_process); +EXPORT_SYMBOL_GPL(bio_crypt_should_process); /* * Checks that two bio crypt contexts are compatible - i.e. that @@ -87,23 +96,19 @@ bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; - if (bio_has_crypt_ctx(b_1) != bio_has_crypt_ctx(b_2)) + if (bc1 != bc2) return false; - if (!bio_has_crypt_ctx(b_1)) - return true; - - return bc1->keyslot == bc2->keyslot && - bc1->data_unit_size_bits == bc2->data_unit_size_bits; + return !bc1 || bc1->bc_key == bc2->bc_key; } /* * Checks that two bio crypt contexts are compatible, and also * that their data_unit_nums are continuous (and can hence be merged) + * in the order b_1 followed by b_2. */ -bool bio_crypt_ctx_back_mergeable(struct bio *b_1, - unsigned int b1_sectors, - struct bio *b_2) +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes, + struct bio *b_2) { struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; @@ -111,35 +116,25 @@ bool bio_crypt_ctx_back_mergeable(struct bio *b_1, if (!bio_crypt_ctx_compatible(b_1, b_2)) return false; - return !bio_has_crypt_ctx(b_1) || - (bc1->data_unit_num + - (b1_sectors >> (bc1->data_unit_size_bits - 9)) == - bc2->data_unit_num); + return !bc1 || bio_crypt_dun_is_contiguous(bc1, b1_bytes, bc2->bc_dun); } -void bio_crypt_ctx_release_keyslot(struct bio *bio) +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc) { - struct bio_crypt_ctx *crypt_ctx = bio->bi_crypt_context; - - keyslot_manager_put_slot(crypt_ctx->processing_ksm, crypt_ctx->keyslot); - bio->bi_crypt_context->processing_ksm = NULL; - bio->bi_crypt_context->keyslot = -1; + keyslot_manager_put_slot(bc->bc_ksm, bc->bc_keyslot); + bc->bc_ksm = NULL; + bc->bc_keyslot = -1; } -int bio_crypt_ctx_acquire_keyslot(struct bio *bio, struct keyslot_manager *ksm) +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc, + struct keyslot_manager *ksm) { - int slot; - enum blk_crypto_mode_num crypto_mode = bio_crypto_mode(bio); + int slot = keyslot_manager_get_slot_for_key(ksm, bc->bc_key); - if (!ksm) - return -ENOMEM; - - slot = keyslot_manager_get_slot_for_key(ksm, - bio_crypt_raw_key(bio), crypto_mode, - 1 << bio->bi_crypt_context->data_unit_size_bits); if (slot < 0) return slot; - bio_crypt_set_keyslot(bio, slot, ksm); + bc->bc_keyslot = slot; + bc->bc_ksm = ksm; return 0; } diff --git a/block/bio.c b/block/bio.c index 9b5630b67a55..6ef2e22d2bf3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -244,6 +244,8 @@ fallback: void bio_uninit(struct bio *bio) { bio_disassociate_task(bio); + + bio_crypt_free_ctx(bio); } EXPORT_SYMBOL(bio_uninit); @@ -252,7 +254,6 @@ static void bio_free(struct bio *bio) struct bio_set *bs = bio->bi_pool; void *p; - bio_crypt_free_ctx(bio); bio_uninit(bio); if (bs) { @@ -630,10 +631,7 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) __bio_clone_fast(b, bio); - if (bio_crypt_clone(b, bio, gfp_mask) < 0) { - bio_put(b); - return NULL; - } + bio_crypt_clone(b, bio, gfp_mask); if (bio_integrity(bio) && bio_integrity_clone(b, bio, gfp_mask) < 0) { @@ -706,10 +704,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, break; } - if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0) { - bio_put(bio); - return NULL; - } + bio_crypt_clone(bio, bio_src, gfp_mask); if (bio_integrity(bio_src)) { int ret; diff --git a/block/blk-core.c b/block/blk-core.c index 2395bd140ae3..4322a1895e32 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3673,8 +3673,8 @@ int __init blk_dev_init(void) if (bio_crypt_ctx_init() < 0) panic("Failed to allocate mem for bio crypt ctxs\n"); - if (blk_crypto_init() < 0) - panic("Failed to init blk-crypto\n"); + if (blk_crypto_fallback_init() < 0) + panic("Failed to init blk-crypto-fallback\n"); return 0; } diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c new file mode 100644 index 000000000000..3f2a7d3be07b --- /dev/null +++ b/block/blk-crypto-fallback.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +/* + * Refer to Documentation/block/inline-encryption.rst for detailed explanation. + */ + +#define pr_fmt(fmt) "blk-crypto-fallback: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blk-crypto-internal.h" + +static unsigned int num_prealloc_bounce_pg = 32; +module_param(num_prealloc_bounce_pg, uint, 0); +MODULE_PARM_DESC(num_prealloc_bounce_pg, + "Number of preallocated bounce pages for the blk-crypto crypto API fallback"); + +static unsigned int blk_crypto_num_keyslots = 100; +module_param_named(num_keyslots, blk_crypto_num_keyslots, uint, 0); +MODULE_PARM_DESC(num_keyslots, + "Number of keyslots for the blk-crypto crypto API fallback"); + +static unsigned int num_prealloc_fallback_crypt_ctxs = 128; +module_param(num_prealloc_fallback_crypt_ctxs, uint, 0); +MODULE_PARM_DESC(num_prealloc_crypt_fallback_ctxs, + "Number of preallocated bio fallback crypto contexts for blk-crypto to use during crypto API fallback"); + +struct bio_fallback_crypt_ctx { + struct bio_crypt_ctx crypt_ctx; + /* + * Copy of the bvec_iter when this bio was submitted. + * We only want to en/decrypt the part of the bio as described by the + * bvec_iter upon submission because bio might be split before being + * resubmitted + */ + struct bvec_iter crypt_iter; + u64 fallback_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; +}; + +/* The following few vars are only used during the crypto API fallback */ +static struct kmem_cache *bio_fallback_crypt_ctx_cache; +static mempool_t *bio_fallback_crypt_ctx_pool; + +/* + * Allocating a crypto tfm during I/O can deadlock, so we have to preallocate + * all of a mode's tfms when that mode starts being used. Since each mode may + * need all the keyslots at some point, each mode needs its own tfm for each + * keyslot; thus, a keyslot may contain tfms for multiple modes. However, to + * match the behavior of real inline encryption hardware (which only supports a + * single encryption context per keyslot), we only allow one tfm per keyslot to + * be used at a time - the rest of the unused tfms have their keys cleared. + */ +static DEFINE_MUTEX(tfms_init_lock); +static bool tfms_inited[BLK_ENCRYPTION_MODE_MAX]; + +struct blk_crypto_decrypt_work { + struct work_struct work; + struct bio *bio; +}; + +static struct blk_crypto_keyslot { + struct crypto_skcipher *tfm; + enum blk_crypto_mode_num crypto_mode; + struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; +} *blk_crypto_keyslots; + +/* The following few vars are only used during the crypto API fallback */ +static struct keyslot_manager *blk_crypto_ksm; +static struct workqueue_struct *blk_crypto_wq; +static mempool_t *blk_crypto_bounce_page_pool; +static struct kmem_cache *blk_crypto_decrypt_work_cache; + +bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) +{ + return bc && bc->bc_ksm == blk_crypto_ksm; +} + +/* + * This is the key we set when evicting a keyslot. This *should* be the all 0's + * key, but AES-XTS rejects that key, so we use some random bytes instead. + */ +static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; + +static void blk_crypto_evict_keyslot(unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode; + int err; + + WARN_ON(slotp->crypto_mode == BLK_ENCRYPTION_MODE_INVALID); + + /* Clear the key in the skcipher */ + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], blank_key, + blk_crypto_modes[crypto_mode].keysize); + WARN_ON(err); + slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; +} + +static int blk_crypto_keyslot_program(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + const enum blk_crypto_mode_num crypto_mode = key->crypto_mode; + int err; + + if (crypto_mode != slotp->crypto_mode && + slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID) { + blk_crypto_evict_keyslot(slot); + } + + if (!slotp->tfms[crypto_mode]) + return -ENOMEM; + slotp->crypto_mode = crypto_mode; + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw, + key->size); + if (err) { + blk_crypto_evict_keyslot(slot); + return err; + } + return 0; +} + +static int blk_crypto_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + blk_crypto_evict_keyslot(slot); + return 0; +} + +/* + * The crypto API fallback KSM ops - only used for a bio when it specifies a + * blk_crypto_mode for which we failed to get a keyslot in the device's inline + * encryption hardware (which probably means the device doesn't have inline + * encryption hardware that supports that crypto mode). + */ +static const struct keyslot_mgmt_ll_ops blk_crypto_ksm_ll_ops = { + .keyslot_program = blk_crypto_keyslot_program, + .keyslot_evict = blk_crypto_keyslot_evict, +}; + +static void blk_crypto_encrypt_endio(struct bio *enc_bio) +{ + struct bio *src_bio = enc_bio->bi_private; + int i; + + for (i = 0; i < enc_bio->bi_vcnt; i++) + mempool_free(enc_bio->bi_io_vec[i].bv_page, + blk_crypto_bounce_page_pool); + + src_bio->bi_status = enc_bio->bi_status; + + bio_put(enc_bio); + bio_endio(src_bio); +} + +static struct bio *blk_crypto_clone_bio(struct bio *bio_src) +{ + struct bvec_iter iter; + struct bio_vec bv; + struct bio *bio; + + bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL); + if (!bio) + return NULL; + bio->bi_disk = bio_src->bi_disk; + bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; + bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; + bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; + + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + + if (bio_integrity(bio_src) && + bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0) { + bio_put(bio); + return NULL; + } + + bio_clone_blkcg_association(bio, bio_src); + + return bio; +} + +static int blk_crypto_alloc_cipher_req(struct bio *src_bio, + struct skcipher_request **ciph_req_ret, + struct crypto_wait *wait) +{ + struct skcipher_request *ciph_req; + const struct blk_crypto_keyslot *slotp; + + slotp = &blk_crypto_keyslots[src_bio->bi_crypt_context->bc_keyslot]; + ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode], + GFP_NOIO); + if (!ciph_req) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + skcipher_request_set_callback(ciph_req, + CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, wait); + *ciph_req_ret = ciph_req; + return 0; +} + +static int blk_crypto_split_bio_if_needed(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + unsigned int i = 0; + unsigned int num_sectors = 0; + struct bio_vec bv; + struct bvec_iter iter; + + bio_for_each_segment(bv, bio, iter) { + num_sectors += bv.bv_len >> SECTOR_SHIFT; + if (++i == BIO_MAX_PAGES) + break; + } + if (num_sectors < bio_sectors(bio)) { + struct bio *split_bio; + + split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL); + if (!split_bio) { + bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + bio_chain(split_bio, bio); + generic_make_request(bio); + *bio_ptr = split_bio; + } + return 0; +} + +union blk_crypto_iv { + __le64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + u8 bytes[BLK_CRYPTO_MAX_IV_SIZE]; +}; + +static void blk_crypto_dun_to_iv(const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + union blk_crypto_iv *iv) +{ + int i; + + for (i = 0; i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) + iv->dun[i] = cpu_to_le64(dun[i]); +} + +/* + * The crypto API fallback's encryption routine. + * Allocate a bounce bio for encryption, encrypt the input bio using crypto API, + * and replace *bio_ptr with the bounce bio. May split input bio if it's too + * large. + */ +static int blk_crypto_encrypt_bio(struct bio **bio_ptr) +{ + struct bio *src_bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + union blk_crypto_iv iv; + struct scatterlist src, dst; + struct bio *enc_bio; + unsigned int i, j; + int data_unit_size; + struct bio_crypt_ctx *bc; + int err = 0; + + /* Split the bio if it's too big for single page bvec */ + err = blk_crypto_split_bio_if_needed(bio_ptr); + if (err) + return err; + + src_bio = *bio_ptr; + bc = src_bio->bi_crypt_context; + data_unit_size = bc->bc_key->data_unit_size; + + /* Allocate bounce bio for encryption */ + enc_bio = blk_crypto_clone_bio(src_bio); + if (!enc_bio) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + err = bio_crypt_ctx_acquire_keyslot(bc, blk_crypto_ksm); + if (err) { + src_bio->bi_status = BLK_STS_IOERR; + goto out_put_enc_bio; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(src_bio, &ciph_req, &wait); + if (err) + goto out_release_keyslot; + + memcpy(curr_dun, bc->bc_dun, sizeof(curr_dun)); + sg_init_table(&src, 1); + sg_init_table(&dst, 1); + + skcipher_request_set_crypt(ciph_req, &src, &dst, data_unit_size, + iv.bytes); + + /* Encrypt each page in the bounce bio */ + for (i = 0; i < enc_bio->bi_vcnt; i++) { + struct bio_vec *enc_bvec = &enc_bio->bi_io_vec[i]; + struct page *plaintext_page = enc_bvec->bv_page; + struct page *ciphertext_page = + mempool_alloc(blk_crypto_bounce_page_pool, GFP_NOIO); + + enc_bvec->bv_page = ciphertext_page; + + if (!ciphertext_page) { + src_bio->bi_status = BLK_STS_RESOURCE; + err = -ENOMEM; + goto out_free_bounce_pages; + } + + sg_set_page(&src, plaintext_page, data_unit_size, + enc_bvec->bv_offset); + sg_set_page(&dst, ciphertext_page, data_unit_size, + enc_bvec->bv_offset); + + /* Encrypt each data unit in this page */ + for (j = 0; j < enc_bvec->bv_len; j += data_unit_size) { + blk_crypto_dun_to_iv(curr_dun, &iv); + err = crypto_wait_req(crypto_skcipher_encrypt(ciph_req), + &wait); + if (err) { + i++; + src_bio->bi_status = BLK_STS_RESOURCE; + goto out_free_bounce_pages; + } + bio_crypt_dun_increment(curr_dun, 1); + src.offset += data_unit_size; + dst.offset += data_unit_size; + } + } + + enc_bio->bi_private = src_bio; + enc_bio->bi_end_io = blk_crypto_encrypt_endio; + *bio_ptr = enc_bio; + + enc_bio = NULL; + err = 0; + goto out_free_ciph_req; + +out_free_bounce_pages: + while (i > 0) + mempool_free(enc_bio->bi_io_vec[--i].bv_page, + blk_crypto_bounce_page_pool); +out_free_ciph_req: + skcipher_request_free(ciph_req); +out_release_keyslot: + bio_crypt_ctx_release_keyslot(bc); +out_put_enc_bio: + if (enc_bio) + bio_put(enc_bio); + + return err; +} + +static void blk_crypto_free_fallback_crypt_ctx(struct bio *bio) +{ + mempool_free(container_of(bio->bi_crypt_context, + struct bio_fallback_crypt_ctx, + crypt_ctx), + bio_fallback_crypt_ctx_pool); + bio->bi_crypt_context = NULL; +} + +/* + * The crypto API fallback's main decryption routine. + * Decrypts input bio in place. + */ +static void blk_crypto_decrypt_bio(struct work_struct *work) +{ + struct blk_crypto_decrypt_work *decrypt_work = + container_of(work, struct blk_crypto_decrypt_work, work); + struct bio *bio = decrypt_work->bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + struct bio_vec bv; + struct bvec_iter iter; + u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + union blk_crypto_iv iv; + struct scatterlist sg; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + struct bio_fallback_crypt_ctx *f_ctx = + container_of(bc, struct bio_fallback_crypt_ctx, crypt_ctx); + const int data_unit_size = bc->bc_key->data_unit_size; + unsigned int i; + int err; + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + if (bio_crypt_ctx_acquire_keyslot(bc, blk_crypto_ksm)) { + bio->bi_status = BLK_STS_RESOURCE; + goto out_no_keyslot; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(bio, &ciph_req, &wait); + if (err) + goto out; + + memcpy(curr_dun, f_ctx->fallback_dun, sizeof(curr_dun)); + sg_init_table(&sg, 1); + skcipher_request_set_crypt(ciph_req, &sg, &sg, data_unit_size, + iv.bytes); + + /* Decrypt each segment in the bio */ + __bio_for_each_segment(bv, bio, iter, f_ctx->crypt_iter) { + struct page *page = bv.bv_page; + + sg_set_page(&sg, page, data_unit_size, bv.bv_offset); + + /* Decrypt each data unit in the segment */ + for (i = 0; i < bv.bv_len; i += data_unit_size) { + blk_crypto_dun_to_iv(curr_dun, &iv); + if (crypto_wait_req(crypto_skcipher_decrypt(ciph_req), + &wait)) { + bio->bi_status = BLK_STS_IOERR; + goto out; + } + bio_crypt_dun_increment(curr_dun, 1); + sg.offset += data_unit_size; + } + } + +out: + skcipher_request_free(ciph_req); + bio_crypt_ctx_release_keyslot(bc); +out_no_keyslot: + kmem_cache_free(blk_crypto_decrypt_work_cache, decrypt_work); + blk_crypto_free_fallback_crypt_ctx(bio); + bio_endio(bio); +} + +/* + * Queue bio for decryption. + * Returns true iff bio was queued for decryption. + */ +bool blk_crypto_queue_decrypt_bio(struct bio *bio) +{ + struct blk_crypto_decrypt_work *decrypt_work; + + /* If there was an IO error, don't queue for decrypt. */ + if (bio->bi_status) + goto out; + + decrypt_work = kmem_cache_zalloc(blk_crypto_decrypt_work_cache, + GFP_ATOMIC); + if (!decrypt_work) { + bio->bi_status = BLK_STS_RESOURCE; + goto out; + } + + INIT_WORK(&decrypt_work->work, blk_crypto_decrypt_bio); + decrypt_work->bio = bio; + queue_work(blk_crypto_wq, &decrypt_work->work); + + return true; +out: + blk_crypto_free_fallback_crypt_ctx(bio); + return false; +} + +/** + * blk_crypto_start_using_mode() - Start using a crypto algorithm on a device + * @mode_num: the blk_crypto_mode we want to allocate ciphers for. + * @data_unit_size: the data unit size that will be used + * @q: the request queue for the device + * + * Upper layers must call this function to ensure that a the crypto API fallback + * has transforms for this algorithm, if they become necessary. + * + * Return: 0 on success and -err on error. + */ +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + struct blk_crypto_keyslot *slotp; + unsigned int i; + int err = 0; + + /* + * Fast path + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we try to access them. + */ + if (likely(smp_load_acquire(&tfms_inited[mode_num]))) + return 0; + + /* + * If the keyslot manager of the request queue supports this + * crypto mode, then we don't need to allocate this mode. + */ + if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, + data_unit_size)) + return 0; + + mutex_lock(&tfms_init_lock); + if (likely(tfms_inited[mode_num])) + goto out; + + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + slotp->tfms[mode_num] = crypto_alloc_skcipher( + blk_crypto_modes[mode_num].cipher_str, + 0, 0); + if (IS_ERR(slotp->tfms[mode_num])) { + err = PTR_ERR(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + goto out_free_tfms; + } + + crypto_skcipher_set_flags(slotp->tfms[mode_num], + CRYPTO_TFM_REQ_WEAK_KEY); + } + + /* + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we set tfms_inited[mode_num]. + */ + smp_store_release(&tfms_inited[mode_num], true); + goto out; + +out_free_tfms: + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + crypto_free_skcipher(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + } +out: + mutex_unlock(&tfms_init_lock); + return err; +} + +int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) +{ + return keyslot_manager_evict_key(blk_crypto_ksm, key); +} + +int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + struct bio_fallback_crypt_ctx *f_ctx; + + if (WARN_ON_ONCE(!tfms_inited[bc->bc_key->crypto_mode])) { + bio->bi_status = BLK_STS_IOERR; + return -EIO; + } + + if (bio_data_dir(bio) == WRITE) + return blk_crypto_encrypt_bio(bio_ptr); + + /* + * Mark bio as fallback crypted and replace the bio_crypt_ctx with + * another one contained in a bio_fallback_crypt_ctx, so that the + * fallback has space to store the info it needs for decryption. + */ + bc->bc_ksm = blk_crypto_ksm; + f_ctx = mempool_alloc(bio_fallback_crypt_ctx_pool, GFP_NOIO); + f_ctx->crypt_ctx = *bc; + memcpy(f_ctx->fallback_dun, bc->bc_dun, sizeof(f_ctx->fallback_dun)); + f_ctx->crypt_iter = bio->bi_iter; + + bio_crypt_free_ctx(bio); + bio->bi_crypt_context = &f_ctx->crypt_ctx; + + return 0; +} + +int __init blk_crypto_fallback_init(void) +{ + int i; + unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; + + prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE); + + /* All blk-crypto modes have a crypto API fallback. */ + for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) + crypto_mode_supported[i] = 0xFFFFFFFF; + crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; + + blk_crypto_ksm = keyslot_manager_create(blk_crypto_num_keyslots, + &blk_crypto_ksm_ll_ops, + crypto_mode_supported, NULL); + if (!blk_crypto_ksm) + return -ENOMEM; + + blk_crypto_wq = alloc_workqueue("blk_crypto_wq", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, num_online_cpus()); + if (!blk_crypto_wq) + return -ENOMEM; + + blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots, + sizeof(blk_crypto_keyslots[0]), + GFP_KERNEL); + if (!blk_crypto_keyslots) + return -ENOMEM; + + blk_crypto_bounce_page_pool = + mempool_create_page_pool(num_prealloc_bounce_pg, 0); + if (!blk_crypto_bounce_page_pool) + return -ENOMEM; + + blk_crypto_decrypt_work_cache = KMEM_CACHE(blk_crypto_decrypt_work, + SLAB_RECLAIM_ACCOUNT); + if (!blk_crypto_decrypt_work_cache) + return -ENOMEM; + + bio_fallback_crypt_ctx_cache = KMEM_CACHE(bio_fallback_crypt_ctx, 0); + if (!bio_fallback_crypt_ctx_cache) + return -ENOMEM; + + bio_fallback_crypt_ctx_pool = + mempool_create_slab_pool(num_prealloc_fallback_crypt_ctxs, + bio_fallback_crypt_ctx_cache); + if (!bio_fallback_crypt_ctx_pool) + return -ENOMEM; + + return 0; +} diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h new file mode 100644 index 000000000000..43351eecc97a --- /dev/null +++ b/block/blk-crypto-internal.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_INTERNAL_H +#define __LINUX_BLK_CRYPTO_INTERNAL_H + +#include + +/* Represents a crypto mode supported by blk-crypto */ +struct blk_crypto_mode { + const char *cipher_str; /* crypto API name (for fallback case) */ + unsigned int keysize; /* key size in bytes */ + unsigned int ivsize; /* iv size in bytes */ +}; + +extern const struct blk_crypto_mode blk_crypto_modes[]; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK + +int blk_crypto_fallback_submit_bio(struct bio **bio_ptr); + +bool blk_crypto_queue_decrypt_bio(struct bio *bio); + +int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key); + +bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +static inline bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) +{ + return false; +} + +static inline int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) +{ + pr_warn_once("blk-crypto crypto API fallback disabled; failing request"); + (*bio_ptr)->bi_status = BLK_STS_NOTSUPP; + return -EIO; +} + +static inline bool blk_crypto_queue_decrypt_bio(struct bio *bio) +{ + WARN_ON(1); + return false; +} + +static inline int +blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) +{ + return 0; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +#endif /* __LINUX_BLK_CRYPTO_INTERNAL_H */ diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 3cb3412665b2..2c47e8eec865 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -10,218 +10,36 @@ #define pr_fmt(fmt) "blk-crypto: " fmt #include +#include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include -/* Represents a crypto mode supported by blk-crypto */ -struct blk_crypto_mode { - const char *cipher_str; /* crypto API name (for fallback case) */ - size_t keysize; /* key size in bytes */ -}; +#include "blk-crypto-internal.h" -static const struct blk_crypto_mode blk_crypto_modes[] = { +const struct blk_crypto_mode blk_crypto_modes[] = { [BLK_ENCRYPTION_MODE_AES_256_XTS] = { .cipher_str = "xts(aes)", .keysize = 64, + .ivsize = 16, + }, + [BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = { + .cipher_str = "essiv(cbc(aes),sha256)", + .keysize = 16, + .ivsize = 16, + }, + [BLK_ENCRYPTION_MODE_ADIANTUM] = { + .cipher_str = "adiantum(xchacha12,aes)", + .keysize = 32, + .ivsize = 32, }, }; -static unsigned int num_prealloc_bounce_pg = 32; -module_param(num_prealloc_bounce_pg, uint, 0); -MODULE_PARM_DESC(num_prealloc_bounce_pg, - "Number of preallocated bounce pages for blk-crypto to use during crypto API fallback encryption"); - -#define BLK_CRYPTO_MAX_KEY_SIZE 64 -static int blk_crypto_num_keyslots = 100; -module_param_named(num_keyslots, blk_crypto_num_keyslots, int, 0); -MODULE_PARM_DESC(num_keyslots, - "Number of keyslots for crypto API fallback in blk-crypto."); - -static struct blk_crypto_keyslot { - struct crypto_skcipher *tfm; - enum blk_crypto_mode_num crypto_mode; - u8 key[BLK_CRYPTO_MAX_KEY_SIZE]; - struct crypto_skcipher *tfms[ARRAY_SIZE(blk_crypto_modes)]; -} *blk_crypto_keyslots; - -/* - * Allocating a crypto tfm during I/O can deadlock, so we have to preallocate - * all of a mode's tfms when that mode starts being used. Since each mode may - * need all the keyslots at some point, each mode needs its own tfm for each - * keyslot; thus, a keyslot may contain tfms for multiple modes. However, to - * match the behavior of real inline encryption hardware (which only supports a - * single encryption context per keyslot), we only allow one tfm per keyslot to - * be used at a time - the rest of the unused tfms have their keys cleared. - */ -static struct mutex tfms_lock[ARRAY_SIZE(blk_crypto_modes)]; -static bool tfms_inited[ARRAY_SIZE(blk_crypto_modes)]; - -struct work_mem { - struct work_struct crypto_work; - struct bio *bio; -}; - -/* The following few vars are only used during the crypto API fallback */ -static struct keyslot_manager *blk_crypto_ksm; -static struct workqueue_struct *blk_crypto_wq; -static mempool_t *blk_crypto_page_pool; -static struct kmem_cache *blk_crypto_work_mem_cache; - -bool bio_crypt_swhandled(struct bio *bio) -{ - return bio_has_crypt_ctx(bio) && - bio->bi_crypt_context->processing_ksm == blk_crypto_ksm; -} - -static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; -static void evict_keyslot(unsigned int slot) -{ - struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; - enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode; - int err; - - WARN_ON(slotp->crypto_mode == BLK_ENCRYPTION_MODE_INVALID); - - /* Clear the key in the skcipher */ - err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], blank_key, - blk_crypto_modes[crypto_mode].keysize); - WARN_ON(err); - memzero_explicit(slotp->key, BLK_CRYPTO_MAX_KEY_SIZE); - slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; -} - -static int blk_crypto_keyslot_program(void *priv, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - unsigned int slot) -{ - struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; - const struct blk_crypto_mode *mode = &blk_crypto_modes[crypto_mode]; - size_t keysize = mode->keysize; - int err; - - if (crypto_mode != slotp->crypto_mode && - slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID) { - evict_keyslot(slot); - } - - if (!slotp->tfms[crypto_mode]) - return -ENOMEM; - slotp->crypto_mode = crypto_mode; - err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key, keysize); - - if (err) { - evict_keyslot(slot); - return err; - } - - memcpy(slotp->key, key, keysize); - - return 0; -} - -static int blk_crypto_keyslot_evict(void *priv, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, - unsigned int slot) -{ - evict_keyslot(slot); - return 0; -} - -static int blk_crypto_keyslot_find(void *priv, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size_bytes) -{ - int slot; - const size_t keysize = blk_crypto_modes[crypto_mode].keysize; - - for (slot = 0; slot < blk_crypto_num_keyslots; slot++) { - if (blk_crypto_keyslots[slot].crypto_mode == crypto_mode && - !crypto_memneq(blk_crypto_keyslots[slot].key, key, keysize)) - return slot; - } - - return -ENOKEY; -} - -static bool blk_crypto_mode_supported(void *priv, - enum blk_crypto_mode_num crypt_mode, - unsigned int data_unit_size) -{ - /* All blk_crypto_modes are required to have a crypto API fallback. */ - return true; -} - -/* - * The crypto API fallback KSM ops - only used for a bio when it specifies a - * blk_crypto_mode for which we failed to get a keyslot in the device's inline - * encryption hardware (which probably means the device doesn't have inline - * encryption hardware that supports that crypto mode). - */ -static const struct keyslot_mgmt_ll_ops blk_crypto_ksm_ll_ops = { - .keyslot_program = blk_crypto_keyslot_program, - .keyslot_evict = blk_crypto_keyslot_evict, - .keyslot_find = blk_crypto_keyslot_find, - .crypto_mode_supported = blk_crypto_mode_supported, -}; - -static void blk_crypto_encrypt_endio(struct bio *enc_bio) -{ - struct bio *src_bio = enc_bio->bi_private; - int i; - - for (i = 0; i < enc_bio->bi_vcnt; i++) - mempool_free(enc_bio->bi_io_vec[i].bv_page, - blk_crypto_page_pool); - - src_bio->bi_status = enc_bio->bi_status; - - bio_put(enc_bio); - bio_endio(src_bio); -} - -static struct bio *blk_crypto_clone_bio(struct bio *bio_src) -{ - struct bvec_iter iter; - struct bio_vec bv; - struct bio *bio; - - bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL); - if (!bio) - return NULL; - bio->bi_disk = bio_src->bi_disk; - bio->bi_opf = bio_src->bi_opf; - bio->bi_ioprio = bio_src->bi_ioprio; - bio->bi_write_hint = bio_src->bi_write_hint; - bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; - bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - - if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0) { - bio_put(bio); - return NULL; - } - - bio_clone_blkcg_association(bio, bio_src); - - return bio; -} - /* Check that all I/O segments are data unit aligned */ static int bio_crypt_check_alignment(struct bio *bio) { - int data_unit_size = 1 << bio->bi_crypt_context->data_unit_size_bits; + const unsigned int data_unit_size = + bio->bi_crypt_context->bc_key->data_unit_size; struct bvec_iter iter; struct bio_vec bv; @@ -232,268 +50,6 @@ static int bio_crypt_check_alignment(struct bio *bio) return 0; } -static int blk_crypto_alloc_cipher_req(struct bio *src_bio, - struct skcipher_request **ciph_req_ptr, - struct crypto_wait *wait) -{ - int slot; - struct skcipher_request *ciph_req; - struct blk_crypto_keyslot *slotp; - - slot = bio_crypt_get_keyslot(src_bio); - slotp = &blk_crypto_keyslots[slot]; - ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode], - GFP_NOIO); - if (!ciph_req) { - src_bio->bi_status = BLK_STS_RESOURCE; - return -ENOMEM; - } - - skcipher_request_set_callback(ciph_req, - CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, - crypto_req_done, wait); - *ciph_req_ptr = ciph_req; - return 0; -} - -static int blk_crypto_split_bio_if_needed(struct bio **bio_ptr) -{ - struct bio *bio = *bio_ptr; - unsigned int i = 0; - unsigned int num_sectors = 0; - struct bio_vec bv; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - num_sectors += bv.bv_len >> SECTOR_SHIFT; - if (++i == BIO_MAX_PAGES) - break; - } - if (num_sectors < bio_sectors(bio)) { - struct bio *split_bio; - - split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL); - if (!split_bio) { - bio->bi_status = BLK_STS_RESOURCE; - return -ENOMEM; - } - bio_chain(split_bio, bio); - generic_make_request(bio); - *bio_ptr = split_bio; - } - return 0; -} - -/* - * The crypto API fallback's encryption routine. - * Allocate a bounce bio for encryption, encrypt the input bio using - * crypto API, and replace *bio_ptr with the bounce bio. May split input - * bio if it's too large. - */ -static int blk_crypto_encrypt_bio(struct bio **bio_ptr) -{ - struct bio *src_bio; - struct skcipher_request *ciph_req = NULL; - DECLARE_CRYPTO_WAIT(wait); - int err = 0; - u64 curr_dun; - union { - __le64 dun; - u8 bytes[16]; - } iv; - struct scatterlist src, dst; - struct bio *enc_bio; - struct bio_vec *enc_bvec; - int i, j; - int data_unit_size; - - /* Split the bio if it's too big for single page bvec */ - err = blk_crypto_split_bio_if_needed(bio_ptr); - if (err) - return err; - - src_bio = *bio_ptr; - data_unit_size = 1 << src_bio->bi_crypt_context->data_unit_size_bits; - - /* Allocate bounce bio for encryption */ - enc_bio = blk_crypto_clone_bio(src_bio); - if (!enc_bio) { - src_bio->bi_status = BLK_STS_RESOURCE; - return -ENOMEM; - } - - /* - * Use the crypto API fallback keyslot manager to get a crypto_skcipher - * for the algorithm and key specified for this bio. - */ - err = bio_crypt_ctx_acquire_keyslot(src_bio, blk_crypto_ksm); - if (err) { - src_bio->bi_status = BLK_STS_IOERR; - goto out_put_enc_bio; - } - - /* and then allocate an skcipher_request for it */ - err = blk_crypto_alloc_cipher_req(src_bio, &ciph_req, &wait); - if (err) - goto out_release_keyslot; - - curr_dun = bio_crypt_data_unit_num(src_bio); - sg_init_table(&src, 1); - sg_init_table(&dst, 1); - - skcipher_request_set_crypt(ciph_req, &src, &dst, - data_unit_size, iv.bytes); - - /* Encrypt each page in the bounce bio */ - for (i = 0, enc_bvec = enc_bio->bi_io_vec; i < enc_bio->bi_vcnt; - enc_bvec++, i++) { - struct page *plaintext_page = enc_bvec->bv_page; - struct page *ciphertext_page = - mempool_alloc(blk_crypto_page_pool, GFP_NOIO); - - enc_bvec->bv_page = ciphertext_page; - - if (!ciphertext_page) { - src_bio->bi_status = BLK_STS_RESOURCE; - err = -ENOMEM; - goto out_free_bounce_pages; - } - - sg_set_page(&src, plaintext_page, data_unit_size, - enc_bvec->bv_offset); - sg_set_page(&dst, ciphertext_page, data_unit_size, - enc_bvec->bv_offset); - - /* Encrypt each data unit in this page */ - for (j = 0; j < enc_bvec->bv_len; j += data_unit_size) { - memset(&iv, 0, sizeof(iv)); - iv.dun = cpu_to_le64(curr_dun); - - err = crypto_wait_req(crypto_skcipher_encrypt(ciph_req), - &wait); - if (err) { - i++; - src_bio->bi_status = BLK_STS_RESOURCE; - goto out_free_bounce_pages; - } - curr_dun++; - src.offset += data_unit_size; - dst.offset += data_unit_size; - } - } - - enc_bio->bi_private = src_bio; - enc_bio->bi_end_io = blk_crypto_encrypt_endio; - *bio_ptr = enc_bio; - - enc_bio = NULL; - err = 0; - goto out_free_ciph_req; - -out_free_bounce_pages: - while (i > 0) - mempool_free(enc_bio->bi_io_vec[--i].bv_page, - blk_crypto_page_pool); -out_free_ciph_req: - skcipher_request_free(ciph_req); -out_release_keyslot: - bio_crypt_ctx_release_keyslot(src_bio); -out_put_enc_bio: - if (enc_bio) - bio_put(enc_bio); - - return err; -} - -/* - * The crypto API fallback's main decryption routine. - * Decrypts input bio in place. - */ -static void blk_crypto_decrypt_bio(struct work_struct *w) -{ - struct work_mem *work_mem = - container_of(w, struct work_mem, crypto_work); - struct bio *bio = work_mem->bio; - struct skcipher_request *ciph_req = NULL; - DECLARE_CRYPTO_WAIT(wait); - struct bio_vec bv; - struct bvec_iter iter; - u64 curr_dun; - union { - __le64 dun; - u8 bytes[16]; - } iv; - struct scatterlist sg; - int data_unit_size = 1 << bio->bi_crypt_context->data_unit_size_bits; - int i; - int err; - - /* - * Use the crypto API fallback keyslot manager to get a crypto_skcipher - * for the algorithm and key specified for this bio. - */ - if (bio_crypt_ctx_acquire_keyslot(bio, blk_crypto_ksm)) { - bio->bi_status = BLK_STS_RESOURCE; - goto out_no_keyslot; - } - - /* and then allocate an skcipher_request for it */ - err = blk_crypto_alloc_cipher_req(bio, &ciph_req, &wait); - if (err) - goto out; - - curr_dun = bio_crypt_sw_data_unit_num(bio); - sg_init_table(&sg, 1); - skcipher_request_set_crypt(ciph_req, &sg, &sg, data_unit_size, - iv.bytes); - - /* Decrypt each segment in the bio */ - __bio_for_each_segment(bv, bio, iter, - bio->bi_crypt_context->crypt_iter) { - struct page *page = bv.bv_page; - - sg_set_page(&sg, page, data_unit_size, bv.bv_offset); - - /* Decrypt each data unit in the segment */ - for (i = 0; i < bv.bv_len; i += data_unit_size) { - memset(&iv, 0, sizeof(iv)); - iv.dun = cpu_to_le64(curr_dun); - if (crypto_wait_req(crypto_skcipher_decrypt(ciph_req), - &wait)) { - bio->bi_status = BLK_STS_IOERR; - goto out; - } - curr_dun++; - sg.offset += data_unit_size; - } - } - -out: - skcipher_request_free(ciph_req); - bio_crypt_ctx_release_keyslot(bio); -out_no_keyslot: - kmem_cache_free(blk_crypto_work_mem_cache, work_mem); - bio_endio(bio); -} - -/* Queue bio for decryption */ -static void blk_crypto_queue_decrypt_bio(struct bio *bio) -{ - struct work_mem *work_mem = - kmem_cache_zalloc(blk_crypto_work_mem_cache, GFP_ATOMIC); - - if (!work_mem) { - bio->bi_status = BLK_STS_RESOURCE; - bio_endio(bio); - return; - } - - INIT_WORK(&work_mem->crypto_work, blk_crypto_decrypt_bio); - work_mem->bio = bio; - queue_work(blk_crypto_wq, &work_mem->crypto_work); -} - /** * blk_crypto_submit_bio - handle submitting bio for inline encryption * @@ -517,20 +73,20 @@ int blk_crypto_submit_bio(struct bio **bio_ptr) { struct bio *bio = *bio_ptr; struct request_queue *q; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; int err; - struct bio_crypt_ctx *crypt_ctx; - if (!bio_has_crypt_ctx(bio) || !bio_has_data(bio)) + if (!bc || !bio_has_data(bio)) return 0; /* - * When a read bio is marked for sw decryption, its bi_iter is saved - * so that when we decrypt the bio later, we know what part of it was - * marked for sw decryption (when the bio is passed down after + * When a read bio is marked for fallback decryption, its bi_iter is + * saved so that when we decrypt the bio later, we know what part of it + * was marked for fallback decryption (when the bio is passed down after * blk_crypto_submit bio, it may be split or advanced so we cannot rely * on the bi_iter while decrypting in blk_crypto_endio) */ - if (bio_crypt_swhandled(bio)) + if (bio_crypt_fallback_crypted(bc)) return 0; err = bio_crypt_check_alignment(bio); @@ -539,21 +95,22 @@ int blk_crypto_submit_bio(struct bio **bio_ptr) goto out; } - crypt_ctx = bio->bi_crypt_context; q = bio->bi_disk->queue; - if (bio_crypt_has_keyslot(bio)) { + if (bc->bc_ksm) { /* Key already programmed into device? */ - if (q->ksm == crypt_ctx->processing_ksm) + if (q->ksm == bc->bc_ksm) return 0; /* Nope, release the existing keyslot. */ - bio_crypt_ctx_release_keyslot(bio); + bio_crypt_ctx_release_keyslot(bc); } /* Get device keyslot if supported */ - if (q->ksm) { - err = bio_crypt_ctx_acquire_keyslot(bio, q->ksm); + if (keyslot_manager_crypto_mode_supported(q->ksm, + bc->bc_key->crypto_mode, + bc->bc_key->data_unit_size)) { + err = bio_crypt_ctx_acquire_keyslot(bc, q->ksm); if (!err) return 0; @@ -562,24 +119,10 @@ int blk_crypto_submit_bio(struct bio **bio_ptr) } /* Fallback to crypto API */ - if (!READ_ONCE(tfms_inited[bio->bi_crypt_context->crypto_mode])) { - err = -EIO; - bio->bi_status = BLK_STS_IOERR; + err = blk_crypto_fallback_submit_bio(bio_ptr); + if (err) goto out; - } - if (bio_data_dir(bio) == WRITE) { - /* Encrypt the data now */ - err = blk_crypto_encrypt_bio(bio_ptr); - if (err) - goto out; - } else { - /* Mark bio as swhandled */ - bio->bi_crypt_context->processing_ksm = blk_crypto_ksm; - bio->bi_crypt_context->crypt_iter = bio->bi_iter; - bio->bi_crypt_context->sw_data_unit_num = - bio->bi_crypt_context->data_unit_num; - } return 0; out: bio_endio(*bio_ptr); @@ -589,10 +132,10 @@ out: /** * blk_crypto_endio - clean up bio w.r.t inline encryption during bio_endio * - * @bio - the bio to clean up + * @bio: the bio to clean up * - * If blk_crypto_submit_bio decided to fallback to crypto API for this - * bio, we queue the bio for decryption into a workqueue and return false, + * If blk_crypto_submit_bio decided to fallback to crypto API for this bio, + * we queue the bio for decryption into a workqueue and return false, * and call bio_endio(bio) at a later time (after the bio has been decrypted). * * If the bio is not to be decrypted by the crypto API, this function releases @@ -603,195 +146,97 @@ out: */ bool blk_crypto_endio(struct bio *bio) { - if (!bio_has_crypt_ctx(bio)) + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + if (!bc) return true; - if (bio_crypt_swhandled(bio)) { + if (bio_crypt_fallback_crypted(bc)) { /* - * The only bios that are swhandled when they reach here - * are those with bio_data_dir(bio) == READ, since WRITE - * bios that are encrypted by the crypto API fallback are - * handled by blk_crypto_encrypt_endio. + * The only bios who's crypto is handled by the blk-crypto + * fallback when they reach here are those with + * bio_data_dir(bio) == READ, since WRITE bios that are + * encrypted by the crypto API fallback are handled by + * blk_crypto_encrypt_endio(). */ - - /* If there was an IO error, don't decrypt. */ - if (bio->bi_status) - return true; - - blk_crypto_queue_decrypt_bio(bio); - return false; + return !blk_crypto_queue_decrypt_bio(bio); } - if (bio_crypt_has_keyslot(bio)) - bio_crypt_ctx_release_keyslot(bio); + if (bc->bc_keyslot >= 0) + bio_crypt_ctx_release_keyslot(bc); return true; } /** - * blk_crypto_start_using_mode() - Allocate skciphers for a - * mode_num for all keyslots - * @mode_num - the blk_crypto_mode we want to allocate ciphers for. + * blk_crypto_init_key() - Prepare a key for use with blk-crypto + * @blk_key: Pointer to the blk_crypto_key to initialize. + * @raw_key: Pointer to the raw key. Must be the correct length for the chosen + * @crypto_mode; see blk_crypto_modes[]. + * @crypto_mode: identifier for the encryption algorithm to use + * @data_unit_size: the data unit size to use for en/decryption * - * Upper layers (filesystems) should call this function to ensure that a - * the crypto API fallback has transforms for this algorithm, if they become - * necessary. - * - * Return: 0 on success and -err on error. + * Return: The blk_crypto_key that was prepared, or an ERR_PTR() on error. When + * done using the key, it must be freed with blk_crypto_free_key(). */ -int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, - unsigned int data_unit_size, - struct request_queue *q) +int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) { - struct blk_crypto_keyslot *slotp; - int err = 0; - int i; + const struct blk_crypto_mode *mode; + static siphash_key_t hash_key; + + memset(blk_key, 0, sizeof(*blk_key)); + + if (crypto_mode >= ARRAY_SIZE(blk_crypto_modes)) + return -EINVAL; + + mode = &blk_crypto_modes[crypto_mode]; + if (mode->keysize == 0) + return -EINVAL; + + if (!is_power_of_2(data_unit_size)) + return -EINVAL; + + blk_key->crypto_mode = crypto_mode; + blk_key->data_unit_size = data_unit_size; + blk_key->data_unit_size_bits = ilog2(data_unit_size); + blk_key->size = mode->keysize; + memcpy(blk_key->raw, raw_key, mode->keysize); /* - * Fast path - * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] - * for each i are visible before we try to access them. + * The keyslot manager uses the SipHash of the key to implement O(1) key + * lookups while avoiding leaking information about the keys. It's + * precomputed here so that it only needs to be computed once per key. */ - if (likely(smp_load_acquire(&tfms_inited[mode_num]))) - return 0; + get_random_once(&hash_key, sizeof(hash_key)); + blk_key->hash = siphash(raw_key, mode->keysize, &hash_key); - /* - * If the keyslot manager of the request queue supports this - * crypto mode, then we don't need to allocate this mode. - */ - if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, - data_unit_size)) { - return 0; - } - - mutex_lock(&tfms_lock[mode_num]); - if (likely(tfms_inited[mode_num])) - goto out; - - for (i = 0; i < blk_crypto_num_keyslots; i++) { - slotp = &blk_crypto_keyslots[i]; - slotp->tfms[mode_num] = crypto_alloc_skcipher( - blk_crypto_modes[mode_num].cipher_str, - 0, 0); - if (IS_ERR(slotp->tfms[mode_num])) { - err = PTR_ERR(slotp->tfms[mode_num]); - slotp->tfms[mode_num] = NULL; - goto out_free_tfms; - } - - crypto_skcipher_set_flags(slotp->tfms[mode_num], - CRYPTO_TFM_REQ_WEAK_KEY); - } - - /* - * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] - * for each i are visible before we set tfms_inited[mode_num]. - */ - smp_store_release(&tfms_inited[mode_num], true); - goto out; - -out_free_tfms: - for (i = 0; i < blk_crypto_num_keyslots; i++) { - slotp = &blk_crypto_keyslots[i]; - crypto_free_skcipher(slotp->tfms[mode_num]); - slotp->tfms[mode_num] = NULL; - } -out: - mutex_unlock(&tfms_lock[mode_num]); - return err; + return 0; } -EXPORT_SYMBOL(blk_crypto_start_using_mode); /** * blk_crypto_evict_key() - Evict a key from any inline encryption hardware * it may have been programmed into - * @q - The request queue who's keyslot manager this key might have been - * programmed into - * @key - The key to evict - * @mode - The blk_crypto_mode_num used with this key - * @data_unit_size - The data unit size used with this key + * @q: The request queue who's keyslot manager this key might have been + * programmed into + * @key: The key to evict * * Upper layers (filesystems) should call this function to ensure that a key * is evicted from hardware that it might have been programmed into. This * will call keyslot_manager_evict_key on the queue's keyslot manager, if one * exists, and supports the crypto algorithm with the specified data unit size. - * Otherwise, it will evict the key from the blk_crypto_ksm. + * Otherwise, it will evict the key from the blk-crypto-fallback's ksm. * * Return: 0 on success, -err on error. */ -int blk_crypto_evict_key(struct request_queue *q, const u8 *key, - enum blk_crypto_mode_num mode, - unsigned int data_unit_size) +int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key) { - struct keyslot_manager *ksm = blk_crypto_ksm; + if (q->ksm && + keyslot_manager_crypto_mode_supported(q->ksm, key->crypto_mode, + key->data_unit_size)) + return keyslot_manager_evict_key(q->ksm, key); - if (q && q->ksm && keyslot_manager_crypto_mode_supported(q->ksm, mode, - data_unit_size)) { - ksm = q->ksm; - } - - return keyslot_manager_evict_key(ksm, key, mode, data_unit_size); -} -EXPORT_SYMBOL(blk_crypto_evict_key); - -int __init blk_crypto_init(void) -{ - int i; - int err = -ENOMEM; - - prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE); - - blk_crypto_ksm = keyslot_manager_create(blk_crypto_num_keyslots, - &blk_crypto_ksm_ll_ops, - NULL); - if (!blk_crypto_ksm) - goto out; - - blk_crypto_wq = alloc_workqueue("blk_crypto_wq", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, - num_online_cpus()); - if (!blk_crypto_wq) - goto out_free_ksm; - - blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots, - sizeof(*blk_crypto_keyslots), - GFP_KERNEL); - if (!blk_crypto_keyslots) - goto out_free_workqueue; - - for (i = 0; i < blk_crypto_num_keyslots; i++) { - blk_crypto_keyslots[i].crypto_mode = - BLK_ENCRYPTION_MODE_INVALID; - } - - for (i = 0; i < ARRAY_SIZE(blk_crypto_modes); i++) - mutex_init(&tfms_lock[i]); - - blk_crypto_page_pool = - mempool_create_page_pool(num_prealloc_bounce_pg, 0); - if (!blk_crypto_page_pool) - goto out_free_keyslots; - - blk_crypto_work_mem_cache = KMEM_CACHE(work_mem, SLAB_RECLAIM_ACCOUNT); - if (!blk_crypto_work_mem_cache) - goto out_free_page_pool; - - return 0; - -out_free_page_pool: - mempool_destroy(blk_crypto_page_pool); - blk_crypto_page_pool = NULL; -out_free_keyslots: - kzfree(blk_crypto_keyslots); - blk_crypto_keyslots = NULL; -out_free_workqueue: - destroy_workqueue(blk_crypto_wq); - blk_crypto_wq = NULL; -out_free_ksm: - keyslot_manager_destroy(blk_crypto_ksm); - blk_crypto_ksm = NULL; -out: - pr_warn("No memory for blk-crypto crypto API fallback."); - return err; + return blk_crypto_fallback_evict_key(key); } diff --git a/block/blk-merge.c b/block/blk-merge.c index bc8da688525f..71768dc75602 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -479,9 +479,6 @@ static inline int ll_new_hw_segment(struct request_queue *q, if (blk_integrity_merge_bio(q, req, bio) == false) goto no_merge; - if (WARN_ON_ONCE(!bio_crypt_ctx_compatible(bio, req->bio))) - goto no_merge; - /* * This will form the start of a new hw segment. Bump both * counters. @@ -507,6 +504,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, req_set_nomerge(q, req); return 0; } + if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio)) + return 0; if (!bio_flagged(req->biotail, BIO_SEG_VALID)) blk_recount_segments(q, req->biotail); if (!bio_flagged(bio, BIO_SEG_VALID)) @@ -529,6 +528,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, req_set_nomerge(q, req); return 0; } + if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio)) + return 0; if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); if (!bio_flagged(req->bio, BIO_SEG_VALID)) @@ -605,6 +606,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (blk_integrity_merge_rq(q, req, next) == false) return 0; + if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio)) + return 0; + /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; return 1; @@ -676,14 +680,8 @@ enum elv_merge blk_try_req_merge(struct request *req, struct request *next) { if (blk_discard_mergable(req)) return ELEVATOR_DISCARD_MERGE; - else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) { - if (!bio_crypt_ctx_back_mergeable(req->bio, - blk_rq_sectors(req), - next->bio)) { - return ELEVATOR_NO_MERGE; - } + else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) return ELEVATOR_BACK_MERGE; - } return ELEVATOR_NO_MERGE; } @@ -720,9 +718,6 @@ static struct request *attempt_merge(struct request_queue *q, if (req->write_hint != next->write_hint) return NULL; - if (!bio_crypt_ctx_compatible(req->bio, next->bio)) - return NULL; - /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -871,22 +866,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (blk_discard_mergable(rq)) { + if (blk_discard_mergable(rq)) return ELEVATOR_DISCARD_MERGE; - } else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == - bio->bi_iter.bi_sector) { - if (!bio_crypt_ctx_back_mergeable(rq->bio, - blk_rq_sectors(rq), bio)) { - return ELEVATOR_NO_MERGE; - } + else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; - } else if (blk_rq_pos(rq) - bio_sectors(bio) == - bio->bi_iter.bi_sector) { - if (!bio_crypt_ctx_back_mergeable(bio, - bio_sectors(bio), rq->bio)) { - return ELEVATOR_NO_MERGE; - } + else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) return ELEVATOR_FRONT_MERGE; - } return ELEVATOR_NO_MERGE; } diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index bcc0c9e77e59..b19d8a47225d 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* - * keyslot-manager.c - * * Copyright 2019 Google LLC */ @@ -27,6 +25,7 @@ * Upper layers will call keyslot_manager_get_slot_for_key() to program a * key into some slot in the inline encryption hardware. */ +#include #include #include #include @@ -37,12 +36,14 @@ struct keyslot { atomic_t slot_refs; struct list_head idle_slot_node; + struct hlist_node hash_node; + struct blk_crypto_key key; }; struct keyslot_manager { unsigned int num_slots; - atomic_t num_idle_slots; struct keyslot_mgmt_ll_ops ksm_ll_ops; + unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; void *ll_priv_data; /* Protects programming and evicting keys from the device */ @@ -53,6 +54,15 @@ struct keyslot_manager { struct list_head idle_slots; spinlock_t idle_slots_lock; + /* + * Hash table which maps key hashes to keyslots, so that we can find a + * key's keyslot in O(1) time rather than O(num_slots). Protected by + * 'lock'. A cryptographic hash function is used so that timing attacks + * can't leak information about the raw keys. + */ + struct hlist_head *slot_hashtable; + unsigned int slot_hashtable_size; + /* Per-keyslot data */ struct keyslot slots[]; }; @@ -63,6 +73,13 @@ struct keyslot_manager { * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot * manager will use to perform operations like programming and * evicting keys. + * @crypto_mode_supported: Array of size BLK_ENCRYPTION_MODE_MAX of + * bitmasks that represents whether a crypto mode + * and data unit size are supported. The i'th bit + * of crypto_mode_supported[crypto_mode] is set iff + * a data unit size of (1 << i) is supported. We + * only support data unit sizes that are powers of + * 2. * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. * * Allocate memory for and initialize a keyslot manager. Called by e.g. @@ -72,20 +89,20 @@ struct keyslot_manager { * Return: Pointer to constructed keyslot manager or NULL on error. */ struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, - const struct keyslot_mgmt_ll_ops *ksm_ll_ops, - void *ll_priv_data) + const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data) { struct keyslot_manager *ksm; - int slot; + unsigned int slot; + unsigned int i; if (num_slots == 0) return NULL; /* Check that all ops are specified */ if (ksm_ll_ops->keyslot_program == NULL || - ksm_ll_ops->keyslot_evict == NULL || - ksm_ll_ops->crypto_mode_supported == NULL || - ksm_ll_ops->keyslot_find == NULL) + ksm_ll_ops->keyslot_evict == NULL) return NULL; ksm = kvzalloc(struct_size(ksm, slots, num_slots), GFP_KERNEL); @@ -93,8 +110,9 @@ struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, return NULL; ksm->num_slots = num_slots; - atomic_set(&ksm->num_idle_slots, num_slots); ksm->ksm_ll_ops = *ksm_ll_ops; + memcpy(ksm->crypto_mode_supported, crypto_mode_supported, + sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; init_rwsem(&ksm->lock); @@ -109,9 +127,29 @@ struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, spin_lock_init(&ksm->idle_slots_lock); + ksm->slot_hashtable_size = roundup_pow_of_two(num_slots); + ksm->slot_hashtable = kvmalloc_array(ksm->slot_hashtable_size, + sizeof(ksm->slot_hashtable[0]), + GFP_KERNEL); + if (!ksm->slot_hashtable) + goto err_free_ksm; + for (i = 0; i < ksm->slot_hashtable_size; i++) + INIT_HLIST_HEAD(&ksm->slot_hashtable[i]); + return ksm; + +err_free_ksm: + keyslot_manager_destroy(ksm); + return NULL; +} +EXPORT_SYMBOL_GPL(keyslot_manager_create); + +static inline struct hlist_head * +hash_bucket_for_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + return &ksm->slot_hashtable[key->hash & (ksm->slot_hashtable_size - 1)]; } -EXPORT_SYMBOL(keyslot_manager_create); static void remove_slot_from_lru_list(struct keyslot_manager *ksm, int slot) { @@ -120,22 +158,32 @@ static void remove_slot_from_lru_list(struct keyslot_manager *ksm, int slot) spin_lock_irqsave(&ksm->idle_slots_lock, flags); list_del(&ksm->slots[slot].idle_slot_node); spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); - - atomic_dec(&ksm->num_idle_slots); } -static int find_and_grab_keyslot(struct keyslot_manager *ksm, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) +static int find_keyslot(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + const struct hlist_head *head = hash_bucket_for_key(ksm, key); + const struct keyslot *slotp; + + hlist_for_each_entry(slotp, head, hash_node) { + if (slotp->key.hash == key->hash && + slotp->key.crypto_mode == key->crypto_mode && + slotp->key.data_unit_size == key->data_unit_size && + !crypto_memneq(slotp->key.raw, key->raw, key->size)) + return slotp - ksm->slots; + } + return -ENOKEY; +} + +static int find_and_grab_keyslot(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) { int slot; - slot = ksm->ksm_ll_ops.keyslot_find(ksm->ll_priv_data, key, - crypto_mode, data_unit_size); + slot = find_keyslot(ksm, key); if (slot < 0) return slot; - if (WARN_ON(slot >= ksm->num_slots)) - return -EINVAL; if (atomic_inc_return(&ksm->slots[slot].slot_refs) == 1) { /* Took first reference to this slot; remove it from LRU list */ remove_slot_from_lru_list(ksm, slot); @@ -146,37 +194,32 @@ static int find_and_grab_keyslot(struct keyslot_manager *ksm, const u8 *key, /** * keyslot_manager_get_slot_for_key() - Program a key into a keyslot. * @ksm: The keyslot manager to program the key into. - * @key: Pointer to the bytes of the key to program. Must be the correct length - * for the chosen @crypto_mode; see blk_crypto_modes in blk-crypto.c. - * @crypto_mode: Identifier for the encryption algorithm to use. - * @data_unit_size: The data unit size to use for en/decryption. + * @key: Pointer to the key object to program, including the raw key, crypto + * mode, and data unit size. * - * Get a keyslot that's been programmed with the specified key, crypto_mode, and - * data_unit_size. If one already exists, return it with incremented refcount. - * Otherwise, wait for a keyslot to become idle and program it. + * Get a keyslot that's been programmed with the specified key. If one already + * exists, return it with incremented refcount. Otherwise, wait for a keyslot + * to become idle and program it. * * Context: Process context. Takes and releases ksm->lock. * Return: The keyslot on success, else a -errno value. */ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) + const struct blk_crypto_key *key) { int slot; int err; struct keyslot *idle_slot; down_read(&ksm->lock); - slot = find_and_grab_keyslot(ksm, key, crypto_mode, data_unit_size); + slot = find_and_grab_keyslot(ksm, key); up_read(&ksm->lock); if (slot != -ENOKEY) return slot; for (;;) { down_write(&ksm->lock); - slot = find_and_grab_keyslot(ksm, key, crypto_mode, - data_unit_size); + slot = find_and_grab_keyslot(ksm, key); if (slot != -ENOKEY) { up_write(&ksm->lock); return slot; @@ -186,42 +229,43 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, * If we're here, that means there wasn't a slot that was * already programmed with the key. So try to program it. */ - if (atomic_read(&ksm->num_idle_slots) > 0) + if (!list_empty(&ksm->idle_slots)) break; up_write(&ksm->lock); wait_event(ksm->idle_slots_wait_queue, - (atomic_read(&ksm->num_idle_slots) > 0)); + !list_empty(&ksm->idle_slots)); } idle_slot = list_first_entry(&ksm->idle_slots, struct keyslot, idle_slot_node); slot = idle_slot - ksm->slots; - err = ksm->ksm_ll_ops.keyslot_program(ksm->ll_priv_data, key, - crypto_mode, - data_unit_size, - slot); - + err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); if (err) { wake_up(&ksm->idle_slots_wait_queue); up_write(&ksm->lock); return err; } - atomic_set(&ksm->slots[slot].slot_refs, 1); + /* Move this slot to the hash list for the new key. */ + if (idle_slot->key.crypto_mode != BLK_ENCRYPTION_MODE_INVALID) + hlist_del(&idle_slot->hash_node); + hlist_add_head(&idle_slot->hash_node, hash_bucket_for_key(ksm, key)); + + atomic_set(&idle_slot->slot_refs, 1); + idle_slot->key = *key; + remove_slot_from_lru_list(ksm, slot); up_write(&ksm->lock); return slot; - } -EXPORT_SYMBOL(keyslot_manager_get_slot_for_key); /** * keyslot_manager_get_slot() - Increment the refcount on the specified slot. - * @ksm - The keyslot manager that we want to modify. - * @slot - The slot to increment the refcount of. + * @ksm: The keyslot manager that we want to modify. + * @slot: The slot to increment the refcount of. * * This function assumes that there is already an active reference to that slot * and simply increments the refcount. This is useful when cloning a bio that @@ -237,7 +281,6 @@ void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot) WARN_ON(atomic_inc_return(&ksm->slots[slot].slot_refs) < 2); } -EXPORT_SYMBOL(keyslot_manager_get_slot); /** * keyslot_manager_put_slot() - Release a reference to a slot @@ -258,19 +301,17 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) list_add_tail(&ksm->slots[slot].idle_slot_node, &ksm->idle_slots); spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); - atomic_inc(&ksm->num_idle_slots); wake_up(&ksm->idle_slots_wait_queue); } } -EXPORT_SYMBOL(keyslot_manager_put_slot); /** * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode/data * unit size combination is supported * by a ksm. - * @ksm - The keyslot manager to check - * @crypto_mode - The crypto mode to check for. - * @data_unit_size - The data_unit_size for the mode. + * @ksm: The keyslot manager to check + * @crypto_mode: The crypto mode to check for. + * @data_unit_size: The data_unit_size for the mode. * * Calls and returns the result of the crypto_mode_supported function specified * by the ksm. @@ -285,69 +326,102 @@ bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, { if (!ksm) return false; - return ksm->ksm_ll_ops.crypto_mode_supported(ksm->ll_priv_data, - crypto_mode, - data_unit_size); + if (WARN_ON(crypto_mode >= BLK_ENCRYPTION_MODE_MAX)) + return false; + if (WARN_ON(!is_power_of_2(data_unit_size))) + return false; + return ksm->crypto_mode_supported[crypto_mode] & data_unit_size; } -EXPORT_SYMBOL(keyslot_manager_crypto_mode_supported); - -bool keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) -{ - return keyslot_manager_crypto_mode_supported(q->ksm, crypto_mode, - data_unit_size); -} -EXPORT_SYMBOL(keyslot_manager_rq_crypto_mode_supported); /** * keyslot_manager_evict_key() - Evict a key from the lower layer device. - * @ksm - The keyslot manager to evict from - * @key - The key to evict - * @crypto_mode - The crypto algorithm the key was programmed with. - * @data_unit_size - The data_unit_size the key was programmed with. + * @ksm: The keyslot manager to evict from + * @key: The key to evict * - * Finds the slot that the specified key, crypto_mode, data_unit_size combo - * was programmed into, and evicts that slot from the lower layer device if - * the refcount on the slot is 0. Returns -EBUSY if the refcount is not 0, and - * -errno on error. + * Find the keyslot that the specified key was programmed into, and evict that + * slot from the lower layer device if that slot is not currently in use. * * Context: Process context. Takes and releases ksm->lock. + * Return: 0 on success, -EBUSY if the key is still in use, or another + * -errno value on other error. */ int keyslot_manager_evict_key(struct keyslot_manager *ksm, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) + const struct blk_crypto_key *key) { int slot; - int err = 0; + int err; + struct keyslot *slotp; down_write(&ksm->lock); - slot = ksm->ksm_ll_ops.keyslot_find(ksm->ll_priv_data, key, - crypto_mode, - data_unit_size); - + slot = find_keyslot(ksm, key); if (slot < 0) { - up_write(&ksm->lock); - return slot; + err = slot; + goto out_unlock; } + slotp = &ksm->slots[slot]; - if (atomic_read(&ksm->slots[slot].slot_refs) == 0) { - err = ksm->ksm_ll_ops.keyslot_evict(ksm->ll_priv_data, key, - crypto_mode, - data_unit_size, - slot); - } else { + if (atomic_read(&slotp->slot_refs) != 0) { err = -EBUSY; + goto out_unlock; } + err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, slot); + if (err) + goto out_unlock; + hlist_del(&slotp->hash_node); + memzero_explicit(&slotp->key, sizeof(slotp->key)); + err = 0; +out_unlock: up_write(&ksm->lock); return err; } -EXPORT_SYMBOL(keyslot_manager_evict_key); + +/** + * keyslot_manager_reprogram_all_keys() - Re-program all keyslots. + * @ksm: The keyslot manager + * + * Re-program all keyslots that are supposed to have a key programmed. This is + * intended only for use by drivers for hardware that loses its keys on reset. + * + * Context: Process context. Takes and releases ksm->lock. + */ +void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm) +{ + unsigned int slot; + + down_write(&ksm->lock); + for (slot = 0; slot < ksm->num_slots; slot++) { + const struct keyslot *slotp = &ksm->slots[slot]; + int err; + + if (slotp->key.crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + continue; + + err = ksm->ksm_ll_ops.keyslot_program(ksm, &slotp->key, slot); + WARN_ON(err); + } + up_write(&ksm->lock); +} +EXPORT_SYMBOL_GPL(keyslot_manager_reprogram_all_keys); + +/** + * keyslot_manager_private() - return the private data stored with ksm + * @ksm: The keyslot manager + * + * Returns the private data passed to the ksm when it was created. + */ +void *keyslot_manager_private(struct keyslot_manager *ksm) +{ + return ksm->ll_priv_data; +} +EXPORT_SYMBOL_GPL(keyslot_manager_private); void keyslot_manager_destroy(struct keyslot_manager *ksm) { - kvfree(ksm); + if (ksm) { + kvfree(ksm->slot_hashtable); + memzero_explicit(ksm, struct_size(ksm, slots, ksm->num_slots)); + kvfree(ksm); + } } -EXPORT_SYMBOL(keyslot_manager_destroy); +EXPORT_SYMBOL_GPL(keyslot_manager_destroy); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1ca1a512bc2a..85e594d3329c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1246,15 +1246,13 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, sector_t sector, unsigned len) { struct bio *clone = &tio->clone; - int ret; __bio_clone_fast(clone, bio); - ret = bio_crypt_clone(clone, bio, GFP_NOIO); - if (ret < 0) - return ret; + bio_crypt_clone(clone, bio, GFP_NOIO); if (unlikely(bio_integrity(bio) != NULL)) { + int r; if (unlikely(!dm_target_has_integrity(tio->ti->type) && !dm_target_passes_integrity(tio->ti->type))) { DMWARN("%s: the target %s doesn't support integrity data.", @@ -1263,11 +1261,9 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return -EIO; } - ret = bio_integrity_clone(clone, bio, GFP_NOIO); - if (ret < 0) { - bio_crypt_free_ctx(clone); - return ret; - } + r = bio_integrity_clone(clone, bio, GFP_NOIO); + if (r < 0) + return r; } if (bio_op(bio) != REQ_OP_ZONE_REPORT) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 7599d77725e4..acd63ef15be5 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -3,8 +3,7 @@ * Copyright 2019 Google LLC */ -#include - +#include #include "ufshcd.h" #include "ufshcd-crypto.h" @@ -25,19 +24,23 @@ static u8 get_data_unit_size_mask(unsigned int data_unit_size) static size_t get_keysize_bytes(enum ufs_crypto_key_size size) { switch (size) { - case UFS_CRYPTO_KEY_SIZE_128: return 16; - case UFS_CRYPTO_KEY_SIZE_192: return 24; - case UFS_CRYPTO_KEY_SIZE_256: return 32; - case UFS_CRYPTO_KEY_SIZE_512: return 64; - default: return 0; + case UFS_CRYPTO_KEY_SIZE_128: + return 16; + case UFS_CRYPTO_KEY_SIZE_192: + return 24; + case UFS_CRYPTO_KEY_SIZE_256: + return 32; + case UFS_CRYPTO_KEY_SIZE_512: + return 64; + default: + return 0; } } -static int ufshcd_crypto_cap_find(void *hba_p, +static int ufshcd_crypto_cap_find(struct ufs_hba *hba, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size) { - struct ufs_hba *hba = hba_p; enum ufs_crypto_alg ufs_alg; u8 data_unit_mask; int cap_idx; @@ -52,7 +55,8 @@ static int ufshcd_crypto_cap_find(void *hba_p, ufs_alg = UFS_CRYPTO_ALG_AES_XTS; ufs_key_size = UFS_CRYPTO_KEY_SIZE_256; break; - default: return -EINVAL; + default: + return -EINVAL; } data_unit_mask = get_data_unit_size_mask(data_unit_size); @@ -101,8 +105,10 @@ static int ufshcd_crypto_cfg_entry_write_key(union ufs_crypto_cfg_entry *cfg, memcpy(cfg->crypto_key + UFS_CRYPTO_KEY_MAX_SIZE/2, key + key_size_bytes/2, key_size_bytes/2); return 0; - case UFS_CRYPTO_ALG_BITLOCKER_AES_CBC: // fallthrough - case UFS_CRYPTO_ALG_AES_ECB: // fallthrough + case UFS_CRYPTO_ALG_BITLOCKER_AES_CBC: + /* fall through */ + case UFS_CRYPTO_ALG_AES_ECB: + /* fall through */ case UFS_CRYPTO_ALG_ESSIV_AES_CBC: memcpy(cfg->crypto_key, key, key_size_bytes); return 0; @@ -111,13 +117,15 @@ static int ufshcd_crypto_cfg_entry_write_key(union ufs_crypto_cfg_entry *cfg, return -EINVAL; } -static void program_key(struct ufs_hba *hba, - const union ufs_crypto_cfg_entry *cfg, - int slot) +static void ufshcd_program_key(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, + int slot) { int i; u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); + pm_runtime_get_sync(hba->dev); + ufshcd_hold(hba, false); /* Clear the dword 16 */ ufshcd_writel(hba, 0, slot_offset + 16 * sizeof(cfg->reg_val[0])); /* Ensure that CFGE is cleared before programming the key */ @@ -137,29 +145,45 @@ static void program_key(struct ufs_hba *hba, ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[16]), slot_offset + 16 * sizeof(cfg->reg_val[0])); wmb(); + ufshcd_release(hba); + pm_runtime_put_sync(hba->dev); } -static int ufshcd_crypto_keyslot_program(void *hba_p, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, +static void ufshcd_clear_keyslot(struct ufs_hba *hba, int slot) +{ + union ufs_crypto_cfg_entry cfg = { 0 }; + + ufshcd_program_key(hba, &cfg, slot); +} + +/* Clear all keyslots at driver init time */ +static void ufshcd_clear_all_keyslots(struct ufs_hba *hba) +{ + int slot; + + for (slot = 0; slot < ufshcd_num_keyslots(hba); slot++) + ufshcd_clear_keyslot(hba, slot); +} + +static int ufshcd_crypto_keyslot_program(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot) { - struct ufs_hba *hba = hba_p; + struct ufs_hba *hba = keyslot_manager_private(ksm); int err = 0; u8 data_unit_mask; union ufs_crypto_cfg_entry cfg; - union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; int cap_idx; - cap_idx = ufshcd_crypto_cap_find(hba_p, crypto_mode, - data_unit_size); + cap_idx = ufshcd_crypto_cap_find(hba, key->crypto_mode, + key->data_unit_size); if (!ufshcd_is_crypto_enabled(hba) || !ufshcd_keyslot_valid(hba, slot) || !ufshcd_cap_idx_valid(hba, cap_idx)) return -EINVAL; - data_unit_mask = get_data_unit_size_mask(data_unit_size); + data_unit_mask = get_data_unit_size_mask(key->data_unit_size); if (!(data_unit_mask & hba->crypto_cap_array[cap_idx].sdus_mask)) return -EINVAL; @@ -169,134 +193,74 @@ static int ufshcd_crypto_keyslot_program(void *hba_p, const u8 *key, cfg.crypto_cap_idx = cap_idx; cfg.config_enable |= UFS_CRYPTO_CONFIGURATION_ENABLE; - err = ufshcd_crypto_cfg_entry_write_key(&cfg, key, - hba->crypto_cap_array[cap_idx]); + err = ufshcd_crypto_cfg_entry_write_key(&cfg, key->raw, + hba->crypto_cap_array[cap_idx]); if (err) return err; - program_key(hba, &cfg, slot); + ufshcd_program_key(hba, &cfg, slot); - memcpy(&cfg_arr[slot], &cfg, sizeof(cfg)); memzero_explicit(&cfg, sizeof(cfg)); - return 0; } -static int ufshcd_crypto_keyslot_find(void *hba_p, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) -{ - struct ufs_hba *hba = hba_p; - int err = 0; - int slot; - u8 data_unit_mask; - union ufs_crypto_cfg_entry cfg; - union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; - int cap_idx; - - cap_idx = ufshcd_crypto_cap_find(hba_p, crypto_mode, - data_unit_size); - - if (!ufshcd_is_crypto_enabled(hba) || - !ufshcd_cap_idx_valid(hba, cap_idx)) - return -EINVAL; - - data_unit_mask = get_data_unit_size_mask(data_unit_size); - - if (!(data_unit_mask & hba->crypto_cap_array[cap_idx].sdus_mask)) - return -EINVAL; - - memset(&cfg, 0, sizeof(cfg)); - err = ufshcd_crypto_cfg_entry_write_key(&cfg, key, - hba->crypto_cap_array[cap_idx]); - - if (err) - return -EINVAL; - - for (slot = 0; slot < NUM_KEYSLOTS(hba); slot++) { - if ((cfg_arr[slot].config_enable & - UFS_CRYPTO_CONFIGURATION_ENABLE) && - data_unit_mask == cfg_arr[slot].data_unit_size && - cap_idx == cfg_arr[slot].crypto_cap_idx && - !crypto_memneq(&cfg.crypto_key, cfg_arr[slot].crypto_key, - UFS_CRYPTO_KEY_MAX_SIZE)) { - memzero_explicit(&cfg, sizeof(cfg)); - return slot; - } - } - - memzero_explicit(&cfg, sizeof(cfg)); - return -ENOKEY; -} - -static int ufshcd_crypto_keyslot_evict(void *hba_p, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, +static int ufshcd_crypto_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot) { - struct ufs_hba *hba = hba_p; - int i = 0; - u32 reg_base; - union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; + struct ufs_hba *hba = keyslot_manager_private(ksm); if (!ufshcd_is_crypto_enabled(hba) || !ufshcd_keyslot_valid(hba, slot)) return -EINVAL; - memset(&cfg_arr[slot], 0, sizeof(cfg_arr[slot])); - reg_base = hba->crypto_cfg_register + slot * sizeof(cfg_arr[0]); - /* * Clear the crypto cfg on the device. Clearing CFGE * might not be sufficient, so just clear the entire cfg. */ - for (i = 0; i < sizeof(cfg_arr[0]); i += sizeof(__le32)) - ufshcd_writel(hba, 0, reg_base + i); - wmb(); + ufshcd_clear_keyslot(hba, slot); return 0; } -static bool ufshcd_crypto_mode_supported(void *hba_p, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) -{ - return ufshcd_crypto_cap_find(hba_p, crypto_mode, data_unit_size) >= 0; -} - /* Functions implementing UFSHCI v2.1 specification behaviour */ void ufshcd_crypto_enable_spec(struct ufs_hba *hba) { - union ufs_crypto_cfg_entry *cfg_arr = hba->crypto_cfgs; - int slot; - if (!ufshcd_hba_is_crypto_supported(hba)) return; hba->caps |= UFSHCD_CAP_CRYPTO; - /* - * Reset might clear all keys, so reprogram all the keys. - * Also serves to clear keys on driver init. - */ - for (slot = 0; slot < NUM_KEYSLOTS(hba); slot++) - program_key(hba, &cfg_arr[slot], slot); + + /* Reset might clear all keys, so reprogram all the keys. */ + keyslot_manager_reprogram_all_keys(hba->ksm); } -EXPORT_SYMBOL(ufshcd_crypto_enable_spec); +EXPORT_SYMBOL_GPL(ufshcd_crypto_enable_spec); void ufshcd_crypto_disable_spec(struct ufs_hba *hba) { hba->caps &= ~UFSHCD_CAP_CRYPTO; } -EXPORT_SYMBOL(ufshcd_crypto_disable_spec); +EXPORT_SYMBOL_GPL(ufshcd_crypto_disable_spec); static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { .keyslot_program = ufshcd_crypto_keyslot_program, .keyslot_evict = ufshcd_crypto_keyslot_evict, - .keyslot_find = ufshcd_crypto_keyslot_find, - .crypto_mode_supported = ufshcd_crypto_mode_supported, }; +enum blk_crypto_mode_num ufshcd_blk_crypto_mode_num_for_alg_dusize( + enum ufs_crypto_alg ufs_crypto_alg, + enum ufs_crypto_key_size key_size) +{ + /* + * This is currently the only mode that UFS and blk-crypto both support. + */ + if (ufs_crypto_alg == UFS_CRYPTO_ALG_AES_XTS && + key_size == UFS_CRYPTO_KEY_SIZE_256) + return BLK_ENCRYPTION_MODE_AES_256_XTS; + + return BLK_ENCRYPTION_MODE_INVALID; +} + /** * ufshcd_hba_init_crypto - Read crypto capabilities, init crypto fields in hba * @hba: Per adapter instance @@ -308,6 +272,8 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, { int cap_idx = 0; int err = 0; + unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; + enum blk_crypto_mode_num blk_mode_num; /* Default to disabling crypto */ hba->caps &= ~UFSHCD_CAP_CRYPTO; @@ -335,16 +301,7 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, goto out; } - hba->crypto_cfgs = - devm_kcalloc(hba->dev, - NUM_KEYSLOTS(hba), - sizeof(hba->crypto_cfgs[0]), - GFP_KERNEL); - if (!hba->crypto_cfgs) { - err = -ENOMEM; - goto out_free_cfg_mem; - } - + memset(crypto_modes_supported, 0, sizeof(crypto_modes_supported)); /* * Store all the capabilities now so that we don't need to repeatedly * access the device each time we want to know its capabilities @@ -355,26 +312,35 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, cpu_to_le32(ufshcd_readl(hba, REG_UFS_CRYPTOCAP + cap_idx * sizeof(__le32))); + blk_mode_num = ufshcd_blk_crypto_mode_num_for_alg_dusize( + hba->crypto_cap_array[cap_idx].algorithm_id, + hba->crypto_cap_array[cap_idx].key_size); + if (blk_mode_num == BLK_ENCRYPTION_MODE_INVALID) + continue; + crypto_modes_supported[blk_mode_num] |= + hba->crypto_cap_array[cap_idx].sdus_mask * 512; } - hba->ksm = keyslot_manager_create(NUM_KEYSLOTS(hba), ksm_ops, hba); + ufshcd_clear_all_keyslots(hba); + + hba->ksm = keyslot_manager_create(ufshcd_num_keyslots(hba), ksm_ops, + crypto_modes_supported, hba); if (!hba->ksm) { err = -ENOMEM; - goto out_free_crypto_cfgs; + goto out_free_caps; } return 0; -out_free_crypto_cfgs: - devm_kfree(hba->dev, hba->crypto_cfgs); -out_free_cfg_mem: + +out_free_caps: devm_kfree(hba->dev, hba->crypto_cap_array); out: /* Indicate that init failed by setting crypto_capabilities to 0 */ hba->crypto_capabilities.reg_val = 0; return err; } -EXPORT_SYMBOL(ufshcd_hba_init_crypto_spec); +EXPORT_SYMBOL_GPL(ufshcd_hba_init_crypto_spec); void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, struct request_queue *q) @@ -384,26 +350,26 @@ void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, q->ksm = hba->ksm; } -EXPORT_SYMBOL(ufshcd_crypto_setup_rq_keyslot_manager_spec); +EXPORT_SYMBOL_GPL(ufshcd_crypto_setup_rq_keyslot_manager_spec); void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, struct request_queue *q) { keyslot_manager_destroy(hba->ksm); } -EXPORT_SYMBOL(ufshcd_crypto_destroy_rq_keyslot_manager_spec); +EXPORT_SYMBOL_GPL(ufshcd_crypto_destroy_rq_keyslot_manager_spec); int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp) { - int key_slot; + struct bio_crypt_ctx *bc; - if (!cmd->request->bio || - !bio_crypt_should_process(cmd->request->bio, cmd->request->q)) { + if (!bio_crypt_should_process(cmd->request)) { lrbp->crypto_enable = false; return 0; } + bc = cmd->request->bio->bi_crypt_context; if (WARN_ON(!ufshcd_is_crypto_enabled(hba))) { /* @@ -412,17 +378,16 @@ int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, */ return -EINVAL; } - key_slot = bio_crypt_get_keyslot(cmd->request->bio); - if (!ufshcd_keyslot_valid(hba, key_slot)) + if (!ufshcd_keyslot_valid(hba, bc->bc_keyslot)) return -EINVAL; lrbp->crypto_enable = true; - lrbp->crypto_key_slot = key_slot; - lrbp->data_unit_num = bio_crypt_data_unit_num(cmd->request->bio); + lrbp->crypto_key_slot = bc->bc_keyslot; + lrbp->data_unit_num = bc->bc_dun[0]; return 0; } -EXPORT_SYMBOL(ufshcd_prepare_lrbp_crypto_spec); +EXPORT_SYMBOL_GPL(ufshcd_prepare_lrbp_crypto_spec); /* Crypto Variant Ops Support */ diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h index 3c03d0e23e87..24a587c7a94e 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.h +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -11,7 +11,10 @@ #include "ufshcd.h" #include "ufshci.h" -#define NUM_KEYSLOTS(hba) (hba->crypto_capabilities.config_count + 1) +static inline int ufshcd_num_keyslots(struct ufs_hba *hba) +{ + return hba->crypto_capabilities.config_count + 1; +} static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, unsigned int slot) { @@ -19,7 +22,7 @@ static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, unsigned int slot) * The actual number of configurations supported is (CFGC+1), so slot * numbers range from 0 to config_count inclusive. */ - return slot < NUM_KEYSLOTS(hba); + return slot < ufshcd_num_keyslots(hba); } static inline bool ufshcd_hba_is_crypto_supported(struct ufs_hba *hba) @@ -51,6 +54,11 @@ void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, struct request_queue *q); +static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) +{ + return lrbp->crypto_enable; +} + /* Crypto Variant Ops Support */ void ufshcd_crypto_enable(struct ufs_hba *hba); @@ -118,10 +126,14 @@ static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp) { - lrbp->crypto_enable = false; return 0; } +static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) +{ + return false; +} + static inline int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 8253a3ee6148..f2aa21ba4421 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2090,13 +2090,15 @@ static void ufshcd_prepare_req_desc_hdr(struct ufshcd_lrb *lrbp, dword_0 |= UTP_REQ_DESC_INT_CMD; /* Transfer request descriptor header fields */ - if (lrbp->crypto_enable) { + if (ufshcd_lrbp_crypto_enabled(lrbp)) { +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) dword_0 |= UTP_REQ_DESC_CRYPTO_ENABLE_CMD; dword_0 |= lrbp->crypto_key_slot; req_desc->header.dword_1 = - cpu_to_le32((u32)lrbp->data_unit_num); + cpu_to_le32(lower_32_bits(lrbp->data_unit_num)); req_desc->header.dword_3 = - cpu_to_le32((u32)(lrbp->data_unit_num >> 32)); + cpu_to_le32(upper_32_bits(lrbp->data_unit_num)); +#endif /* CONFIG_SCSI_UFS_CRYPTO */ } else { /* dword_1 and dword_3 are reserved, hence they are set to 0 */ req_desc->header.dword_1 = 0; @@ -2414,7 +2416,9 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, lrbp->task_tag = tag; lrbp->lun = 0; /* device management cmd is not specific to any LUN */ lrbp->intr_cmd = true; /* No interrupt aggregation */ +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) lrbp->crypto_enable = false; /* No crypto operations */ +#endif hba->dev_cmd.type = cmd_type; return ufshcd_comp_devman_upiu(hba, lrbp); @@ -4421,8 +4425,8 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth) */ static int ufshcd_slave_configure(struct scsi_device *sdev) { - struct request_queue *q = sdev->request_queue; struct ufs_hba *hba = shost_priv(sdev->host); + struct request_queue *q = sdev->request_queue; blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index ad24d4aed313..159c98149474 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -192,9 +192,11 @@ struct ufshcd_lrb { u8 lun; /* UPIU LUN id field is only 8-bit wide */ bool intr_cmd; ktime_t issue_time_stamp; +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) bool crypto_enable; u8 crypto_key_slot; u64 data_unit_num; +#endif /* CONFIG_SCSI_UFS_CRYPTO */ bool req_abort_skip; }; @@ -527,7 +529,6 @@ struct ufs_stats { * @crypto_capabilities: Content of crypto capabilities register (0x100) * @crypto_cap_array: Array of crypto capabilities * @crypto_cfg_register: Start of the crypto cfg array - * @crypto_cfgs: Array of crypto configurations (i.e. config for each slot) * @ksm: the keyslot manager tied to this hba */ struct ufs_hba { @@ -705,7 +706,7 @@ struct ufs_hba { * This capability allows the host controller driver to use the * inline crypto engine, if it is present */ -#define UFSHCD_CAP_CRYPTO (1 << 6) +#define UFSHCD_CAP_CRYPTO (1 << 7) struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; @@ -722,7 +723,6 @@ struct ufs_hba { union ufs_crypto_capabilities crypto_capabilities; union ufs_crypto_cap_entry *crypto_cap_array; u32 crypto_cfg_register; - union ufs_crypto_cfg_entry *crypto_cfgs; struct keyslot_manager *ksm; #endif /* CONFIG_SCSI_UFS_CRYPTO */ }; diff --git a/fs/buffer.c b/fs/buffer.c index 30315edcfd91..fff033d48f45 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3126,7 +3126,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); - fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO | __GFP_NOFAIL); + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); if (wbc) { wbc_init_bio(wbc, bio); diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 9dcb57089f4d..f62375d39b0f 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -73,11 +73,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, err = -ENOMEM; goto errout; } - err = fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOIO); - if (err) { - bio_put(bio); - goto errout; - } + fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOIO); + bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 6e6f39ea18a7..41b4fe15b4b6 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -96,7 +96,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = ci->ci_key.tfm; int res = 0; if (WARN_ON_ONCE(len <= 0)) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3da3707c10e3..3aafddaab703 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -40,7 +40,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; struct scatterlist sg; int res; @@ -93,7 +93,7 @@ static int fname_decrypt(struct inode *inode, DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; int res; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 466fa2b038ec..7005dbe6bfec 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -15,8 +15,6 @@ #include #include -struct fscrypt_master_key; - #define CONST_STRLEN(str) (sizeof(str) - 1) #define FS_KEY_DERIVATION_NONCE_SIZE 16 @@ -154,6 +152,20 @@ struct fscrypt_symlink_data { char encrypted_path[1]; } __packed; +/** + * struct fscrypt_prepared_key - a key prepared for actual encryption/decryption + * @tfm: crypto API transform object + * @blk_key: key for blk-crypto + * + * Normally only one of the fields will be non-NULL. + */ +struct fscrypt_prepared_key { + struct crypto_skcipher *tfm; +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + struct fscrypt_blk_crypto_key *blk_key; +#endif +}; + /* * fscrypt_info - the "encryption key" for an inode * @@ -163,20 +175,20 @@ struct fscrypt_symlink_data { */ struct fscrypt_info { - /* The actual crypto transform used for encryption and decryption */ - struct crypto_skcipher *ci_ctfm; - -#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - /* - * The raw key for inline encryption, if this file is using inline - * encryption rather than the traditional filesystem layer encryption. - */ - const u8 *ci_inline_crypt_key; -#endif + /* The key in a form prepared for actual encryption/decryption */ + struct fscrypt_prepared_key ci_key; /* True if the key should be freed when this fscrypt_info is freed */ bool ci_owns_key; +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + /* + * True if this inode will use inline encryption (blk-crypto) instead of + * the traditional filesystem-layer encryption. + */ + bool ci_inlinecrypt; +#endif + /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. @@ -201,7 +213,7 @@ struct fscrypt_info { /* * If non-NULL, then encryption is done using the master key directly - * and ci_ctfm will equal ci_direct_key->dk_ctfm. + * and ci_key will equal ci_direct_key->dk_key. */ struct fscrypt_direct_key *ci_direct_key; @@ -265,6 +277,7 @@ union fscrypt_iv { u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; }; u8 raw[FSCRYPT_MAX_IV_SIZE]; + __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)]; }; void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, @@ -306,49 +319,71 @@ extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT -extern bool fscrypt_should_use_inline_encryption(const struct fscrypt_info *ci); +extern void fscrypt_select_encryption_impl(struct fscrypt_info *ci); -extern int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, - const u8 *derived_key); +static inline bool +fscrypt_using_inline_encryption(const struct fscrypt_info *ci) +{ + return ci->ci_inlinecrypt; +} -extern void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci); +extern int fscrypt_prepare_inline_crypt_key( + struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + const struct fscrypt_info *ci); -extern int fscrypt_setup_per_mode_inline_crypt_key( - struct fscrypt_info *ci, - struct fscrypt_master_key *mk); +extern void fscrypt_destroy_inline_crypt_key( + struct fscrypt_prepared_key *prep_key); -extern void fscrypt_evict_inline_crypt_keys(struct fscrypt_master_key *mk); +/* + * Check whether the crypto transform or blk-crypto key has been allocated in + * @prep_key, depending on which encryption implementation the file will use. + */ +static inline bool +fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, + const struct fscrypt_info *ci) +{ + /* + * The READ_ONCE() here pairs with the smp_store_release() in + * fscrypt_prepare_key(). (This only matters for the per-mode keys, + * which are shared by multiple inodes.) + */ + if (fscrypt_using_inline_encryption(ci)) + return READ_ONCE(prep_key->blk_key) != NULL; + return READ_ONCE(prep_key->tfm) != NULL; +} #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ -static inline bool fscrypt_should_use_inline_encryption( +static inline void fscrypt_select_encryption_impl(struct fscrypt_info *ci) +{ +} + +static inline bool fscrypt_using_inline_encryption( const struct fscrypt_info *ci) { return false; } -static inline int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, - const u8 *derived_key) +static inline int +fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + const struct fscrypt_info *ci) { WARN_ON(1); return -EOPNOTSUPP; } -static inline void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci) +static inline void +fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) { } -static inline int fscrypt_setup_per_mode_inline_crypt_key( - struct fscrypt_info *ci, - struct fscrypt_master_key *mk) -{ - WARN_ON(1); - return -EOPNOTSUPP; -} - -static inline void fscrypt_evict_inline_crypt_keys( - struct fscrypt_master_key *mk) +static inline bool +fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, + const struct fscrypt_info *ci) { + return READ_ONCE(prep_key->tfm) != NULL; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ @@ -441,25 +476,12 @@ struct fscrypt_master_key { struct list_head mk_decrypted_inodes; spinlock_t mk_decrypted_inodes_lock; - /* Crypto API transforms for DIRECT_KEY policies, allocated on-demand */ - struct crypto_skcipher *mk_direct_tfms[__FSCRYPT_MODE_MAX + 1]; + /* Per-mode keys for DIRECT_KEY policies, allocated on-demand */ + struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1]; - /* - * Crypto API transforms for filesystem-layer implementation of - * IV_INO_LBLK_64 policies, allocated on-demand. - */ - struct crypto_skcipher *mk_iv_ino_lblk_64_tfms[__FSCRYPT_MODE_MAX + 1]; + /* Per-mode keys for IV_INO_LBLK_64 policies, allocated on-demand */ + struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1]; -#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - /* Raw keys for IV_INO_LBLK_64 policies, allocated on-demand */ - u8 *mk_iv_ino_lblk_64_raw_keys[__FSCRYPT_MODE_MAX + 1]; - - /* The data unit size being used for inline encryption */ - unsigned int mk_data_unit_size; - - /* The filesystem's block device */ - struct block_device *mk_bdev; -#endif } __randomize_layout; static inline bool @@ -514,8 +536,8 @@ struct fscrypt_mode { const char *cipher_str; int keysize; int ivsize; - enum blk_crypto_mode_num blk_crypto_mode; int logged_impl_name; + enum blk_crypto_mode_num blk_crypto_mode; }; extern struct fscrypt_mode fscrypt_modes[]; @@ -526,9 +548,11 @@ fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) return mode->ivsize >= offsetofend(union fscrypt_iv, nonce); } -extern struct crypto_skcipher * -fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, - const struct inode *inode); +extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + const struct fscrypt_info *ci); + +extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); extern int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index e41c6d66ff0d..af54d1fed3f6 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -15,187 +15,127 @@ #include #include #include -#include +#include #include "fscrypt_private.h" -/* Return true iff inline encryption should be used for this file */ -bool fscrypt_should_use_inline_encryption(const struct fscrypt_info *ci) +struct fscrypt_blk_crypto_key { + struct blk_crypto_key base; + int num_devs; + struct request_queue *devs[]; +}; + +/* Enable inline encryption for this file if supported. */ +void fscrypt_select_encryption_impl(struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; /* The file must need contents encryption, not filenames encryption */ if (!S_ISREG(inode->i_mode)) - return false; + return; /* blk-crypto must implement the needed encryption algorithm */ if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID) - return false; - - /* DIRECT_KEY needs a 24+ byte IV, so it can't work with 8-byte DUNs */ - if (fscrypt_is_direct_key_policy(&ci->ci_policy)) - return false; + return; /* The filesystem must be mounted with -o inlinecrypt */ if (!sb->s_cop->inline_crypt_enabled || !sb->s_cop->inline_crypt_enabled(sb)) - return false; + return; - return true; + ci->ci_inlinecrypt = true; } -/* Set a per-file inline encryption key (for passing to blk-crypto) */ -int fscrypt_set_inline_crypt_key(struct fscrypt_info *ci, const u8 *derived_key) +int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + const struct fscrypt_info *ci) { - const struct fscrypt_mode *mode = ci->ci_mode; - const struct super_block *sb = ci->ci_inode->i_sb; - - ci->ci_inline_crypt_key = kmemdup(derived_key, mode->keysize, GFP_NOFS); - if (!ci->ci_inline_crypt_key) - return -ENOMEM; - ci->ci_owns_key = true; - - return blk_crypto_start_using_mode(mode->blk_crypto_mode, - sb->s_blocksize, - sb->s_bdev->bd_queue); -} - -/* Free a per-file inline encryption key and evict it from blk-crypto */ -void fscrypt_free_inline_crypt_key(struct fscrypt_info *ci) -{ - if (ci->ci_inline_crypt_key != NULL) { - const struct fscrypt_mode *mode = ci->ci_mode; - const struct super_block *sb = ci->ci_inode->i_sb; - - blk_crypto_evict_key(sb->s_bdev->bd_queue, - ci->ci_inline_crypt_key, - mode->blk_crypto_mode, sb->s_blocksize); - kzfree(ci->ci_inline_crypt_key); - } -} - -/* - * Set up ->inline_crypt_key (for passing to blk-crypto) for inodes which use an - * IV_INO_LBLK_64 encryption policy. - * - * Return: 0 on success, -errno on failure - */ -int fscrypt_setup_per_mode_inline_crypt_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk) -{ - static DEFINE_MUTEX(inline_crypt_setup_mutex); - const struct super_block *sb = ci->ci_inode->i_sb; - struct block_device *bdev = sb->s_bdev; - const struct fscrypt_mode *mode = ci->ci_mode; - const u8 mode_num = mode - fscrypt_modes; - u8 *raw_key; - u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; + const struct inode *inode = ci->ci_inode; + struct super_block *sb = inode->i_sb; + enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; + int num_devs = 1; + int queue_refs = 0; + struct fscrypt_blk_crypto_key *blk_key; int err; + int i; - if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) + if (sb->s_cop->get_num_devices) + num_devs = sb->s_cop->get_num_devices(sb); + if (WARN_ON(num_devs < 1)) return -EINVAL; - /* pairs with smp_store_release() below */ - raw_key = smp_load_acquire(&mk->mk_iv_ino_lblk_64_raw_keys[mode_num]); - if (raw_key) { - err = 0; - goto out; + blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_NOFS); + if (!blk_key) + return -ENOMEM; + + blk_key->num_devs = num_devs; + if (num_devs == 1) + blk_key->devs[0] = bdev_get_queue(sb->s_bdev); + else + sb->s_cop->get_devices(sb, blk_key->devs); + + err = blk_crypto_init_key(&blk_key->base, raw_key, crypto_mode, + sb->s_blocksize); + if (err) { + fscrypt_err(inode, "error %d initializing blk-crypto key", err); + goto fail; } - mutex_lock(&inline_crypt_setup_mutex); - - raw_key = mk->mk_iv_ino_lblk_64_raw_keys[mode_num]; - if (raw_key) { - err = 0; - goto out_unlock; - } - - raw_key = kmalloc(mode->keysize, GFP_NOFS); - if (!raw_key) { - err = -ENOMEM; - goto out_unlock; - } - - BUILD_BUG_ON(sizeof(mode_num) != 1); - BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); - BUILD_BUG_ON(sizeof(hkdf_info) != 17); - hkdf_info[0] = mode_num; - memcpy(&hkdf_info[1], &sb->s_uuid, sizeof(sb->s_uuid)); - - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, - hkdf_info, sizeof(hkdf_info), - raw_key, mode->keysize); - if (err) - goto out_unlock; - - err = blk_crypto_start_using_mode(mode->blk_crypto_mode, - sb->s_blocksize, bdev->bd_queue); - if (err) - goto out_unlock; - /* - * When a master key's first inline encryption key is set up, save a - * reference to the filesystem's block device so that the inline - * encryption keys can be evicted when the master key is destroyed. + * We have to start using blk-crypto on all the filesystem's devices. + * We also have to save all the request_queue's for later so that the + * key can be evicted from them. This is needed because some keys + * aren't destroyed until after the filesystem was already unmounted + * (namely, the per-mode keys in struct fscrypt_master_key). */ - if (!mk->mk_bdev) { - mk->mk_bdev = bdgrab(bdev); - mk->mk_data_unit_size = sb->s_blocksize; - } + for (i = 0; i < num_devs; i++) { + if (!blk_get_queue(blk_key->devs[i])) { + fscrypt_err(inode, "couldn't get request_queue"); + err = -EAGAIN; + goto fail; + } + queue_refs++; - /* pairs with smp_load_acquire() above */ - smp_store_release(&mk->mk_iv_ino_lblk_64_raw_keys[mode_num], raw_key); - err = 0; -out_unlock: - mutex_unlock(&inline_crypt_setup_mutex); -out: - if (err == 0) { - ci->ci_inline_crypt_key = raw_key; - /* - * Since each struct fscrypt_master_key belongs to a particular - * filesystem (a struct super_block), there should be only one - * block device, and only one data unit size as it should equal - * the filesystem's blocksize (i.e. s_blocksize). - */ - if (WARN_ON(mk->mk_bdev != bdev)) - err = -EINVAL; - if (WARN_ON(mk->mk_data_unit_size != sb->s_blocksize)) - err = -EINVAL; - } else { - kzfree(raw_key); + err = blk_crypto_start_using_mode(crypto_mode, sb->s_blocksize, + blk_key->devs[i]); + if (err) { + fscrypt_err(inode, + "error %d starting to use blk-crypto", err); + goto fail; + } } + /* + * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters + * for the per-mode keys, which are shared by multiple inodes.) + */ + smp_store_release(&prep_key->blk_key, blk_key); + return 0; + +fail: + for (i = 0; i < queue_refs; i++) + blk_put_queue(blk_key->devs[i]); + kzfree(blk_key); return err; } -/* - * Evict per-mode inline encryption keys from blk-crypto when a master key is - * destroyed. - */ -void fscrypt_evict_inline_crypt_keys(struct fscrypt_master_key *mk) +void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) { - struct block_device *bdev = mk->mk_bdev; - size_t i; + struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key; + int i; - if (!bdev) /* No inline encryption keys? */ - return; - - for (i = 0; i < ARRAY_SIZE(mk->mk_iv_ino_lblk_64_raw_keys); i++) { - u8 *raw_key = mk->mk_iv_ino_lblk_64_raw_keys[i]; - - if (raw_key != NULL) { - blk_crypto_evict_key(bdev->bd_queue, raw_key, - fscrypt_modes[i].blk_crypto_mode, - mk->mk_data_unit_size); - kzfree(raw_key); + if (blk_key) { + for (i = 0; i < blk_key->num_devs; i++) { + blk_crypto_evict_key(blk_key->devs[i], &blk_key->base); + blk_put_queue(blk_key->devs[i]); } + kzfree(blk_key); } - bdput(bdev); } /** - * fscrypt_inode_uses_inline_crypto - test whether an inode uses inline encryption + * fscrypt_inode_uses_inline_crypto - test whether an inode uses inline + * encryption * @inode: an inode * * Return: true if the inode requires file contents encryption and if the @@ -205,12 +145,13 @@ void fscrypt_evict_inline_crypt_keys(struct fscrypt_master_key *mk) bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && - inode->i_crypt_info->ci_inline_crypt_key != NULL; + inode->i_crypt_info->ci_inlinecrypt; } EXPORT_SYMBOL_GPL(fscrypt_inode_uses_inline_crypto); /** - * fscrypt_inode_uses_fs_layer_crypto - test whether an inode uses fs-layer encryption + * fscrypt_inode_uses_fs_layer_crypto - test whether an inode uses fs-layer + * encryption * @inode: an inode * * Return: true if the inode requires file contents encryption and if the @@ -220,22 +161,22 @@ EXPORT_SYMBOL_GPL(fscrypt_inode_uses_inline_crypto); bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) { return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && - inode->i_crypt_info->ci_inline_crypt_key == NULL; + !inode->i_crypt_info->ci_inlinecrypt; } EXPORT_SYMBOL_GPL(fscrypt_inode_uses_fs_layer_crypto); -static inline u64 fscrypt_generate_dun(const struct fscrypt_info *ci, - u64 lblk_num) +static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) { union fscrypt_iv iv; + int i; fscrypt_generate_iv(&iv, lblk_num, ci); - /* - * fscrypt_should_use_inline_encryption() ensures we never get here if - * more than the first 8 bytes of the IV are nonzero. - */ - BUG_ON(memchr_inv(&iv.raw[8], 0, ci->ci_mode->ivsize - 8)); - return le64_to_cpu(iv.lblk_num); + + BUILD_BUG_ON(FSCRYPT_MAX_IV_SIZE > BLK_CRYPTO_MAX_IV_SIZE); + memset(dun, 0, BLK_CRYPTO_MAX_IV_SIZE); + for (i = 0; i < ci->ci_mode->ivsize/sizeof(dun[0]); i++) + dun[i] = le64_to_cpu(iv.dun[i]); } /** @@ -243,7 +184,8 @@ static inline u64 fscrypt_generate_dun(const struct fscrypt_info *ci, * @bio: a bio which will eventually be submitted to the file * @inode: the file's inode * @first_lblk: the first file logical block number in the I/O - * @gfp_mask: memory allocation flags + * @gfp_mask: memory allocation flags - these must be a waiting mask so that + * bio_crypt_set_ctx can't fail. * * If the contents of the file should be encrypted (or decrypted) with inline * encryption, then assign the appropriate encryption context to the bio. @@ -252,24 +194,18 @@ static inline u64 fscrypt_generate_dun(const struct fscrypt_info *ci, * otherwise fscrypt_mergeable_bio() won't work as intended. * * The encryption context will be freed automatically when the bio is freed. - * - * Return: 0 on success, -errno on failure. If __GFP_NOFAIL is specified, this - * is guaranteed to succeed. */ -int fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, - u64 first_lblk, gfp_t gfp_mask) +void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) { const struct fscrypt_info *ci = inode->i_crypt_info; - u64 dun; + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; if (!fscrypt_inode_uses_inline_crypto(inode)) - return 0; + return; - dun = fscrypt_generate_dun(ci, first_lblk); - - return bio_crypt_set_ctx(bio, ci->ci_inline_crypt_key, - ci->ci_mode->blk_crypto_mode, - dun, inode->i_blkbits, gfp_mask); + fscrypt_generate_dun(ci, first_lblk, dun); + bio_crypt_set_ctx(bio, &ci->ci_key.blk_key->base, dun, gfp_mask); } EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx); @@ -298,27 +234,24 @@ static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh, } /** - * fscrypt_set_bio_crypt_ctx_bh - prepare a file contents bio for inline encryption + * fscrypt_set_bio_crypt_ctx_bh - prepare a file contents bio for inline + * encryption * @bio: a bio which will eventually be submitted to the file * @first_bh: the first buffer_head for which I/O will be submitted * @gfp_mask: memory allocation flags * * Same as fscrypt_set_bio_crypt_ctx(), except this takes a buffer_head instead * of an inode and block number directly. - * - * Return: 0 on success, -errno on failure */ -int fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, +void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask) { const struct inode *inode; u64 first_lblk; - if (!bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk)) - return 0; - - return fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask); + if (bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk)) + fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask); } EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); @@ -342,27 +275,24 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk) { - const struct bio_crypt_ctx *bc; - const u8 *next_key; - u64 next_dun; + const struct bio_crypt_ctx *bc = bio->bi_crypt_context; + u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; - if (bio_has_crypt_ctx(bio) != fscrypt_inode_uses_inline_crypto(inode)) + if (!!bc != fscrypt_inode_uses_inline_crypto(inode)) return false; - if (!bio_has_crypt_ctx(bio)) + if (!bc) return true; - bc = bio->bi_crypt_context; - next_key = inode->i_crypt_info->ci_inline_crypt_key; - next_dun = fscrypt_generate_dun(inode->i_crypt_info, next_lblk); /* * Comparing the key pointers is good enough, as all I/O for each key * uses the same pointer. I.e., there's currently no need to support * merging requests where the keys are the same but the pointers differ. */ - return next_key == bc->raw_key && - next_dun == bc->data_unit_num + - (bio_sectors(bio) >> - (bc->data_unit_size_bits - SECTOR_SHIFT)); + if (bc->bc_key != &inode->i_crypt_info->ci_key.blk_key->base) + return false; + + fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); + return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun); } EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio); @@ -383,7 +313,7 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, u64 next_lblk; if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk)) - return !bio_has_crypt_ctx(bio); + return !bio->bi_crypt_context; return fscrypt_mergeable_bio(bio, inode, next_lblk); } diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 48d3a0c9a415..9052197d8f5a 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -44,12 +44,10 @@ static void free_master_key(struct fscrypt_master_key *mk) wipe_master_key_secret(&mk->mk_secret); for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { - crypto_free_skcipher(mk->mk_direct_tfms[i]); - crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]); + fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); + fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); } - fscrypt_evict_inline_crypt_keys(mk); - key_put(mk->mk_users); kzfree(mk); } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 309c23bf87e1..8c2675d0b2d0 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -32,6 +32,7 @@ struct fscrypt_mode fscrypt_modes[] = { .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, .ivsize = 16, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, }, [FSCRYPT_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CTS-CBC", @@ -44,6 +45,7 @@ struct fscrypt_mode fscrypt_modes[] = { .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, .ivsize = 32, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM, }, }; @@ -63,9 +65,9 @@ select_encryption_mode(const union fscrypt_policy *policy, } /* Create a symmetric cipher object for the given encryption mode and key */ -struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, - const u8 *raw_key, - const struct inode *inode) +static struct crypto_skcipher * +fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, + const struct inode *inode) { struct crypto_skcipher *tfm; int err; @@ -105,33 +107,55 @@ err_free_tfm: return ERR_PTR(err); } -/* Given the per-file key, set up the file's crypto transform object */ -int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) +/* + * Prepare the crypto transform object or blk-crypto key in @prep_key, given the + * raw key, encryption mode, and flag indicating which encryption implementation + * (fs-layer or blk-crypto) will be used. + */ +int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, const struct fscrypt_info *ci) { struct crypto_skcipher *tfm; - if (fscrypt_should_use_inline_encryption(ci)) - return fscrypt_set_inline_crypt_key(ci, derived_key); + if (fscrypt_using_inline_encryption(ci)) + return fscrypt_prepare_inline_crypt_key(prep_key, raw_key, ci); - tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode); + tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); if (IS_ERR(tfm)) return PTR_ERR(tfm); - - ci->ci_ctfm = tfm; - ci->ci_owns_key = true; + /* + * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters + * for the per-mode keys, which are shared by multiple inodes.) + */ + smp_store_release(&prep_key->tfm, tfm); return 0; } +/* Destroy a crypto transform object and/or blk-crypto key. */ +void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) +{ + crypto_free_skcipher(prep_key->tfm); + fscrypt_destroy_inline_crypt_key(prep_key); +} + +/* Given the per-file key, set up the file's crypto transform object */ +int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) +{ + ci->ci_owns_key = true; + return fscrypt_prepare_key(&ci->ci_key, derived_key, ci); +} + static int setup_per_mode_key(struct fscrypt_info *ci, struct fscrypt_master_key *mk, - struct crypto_skcipher **tfms, + struct fscrypt_prepared_key *keys, u8 hkdf_context, bool include_fs_uuid) { + static DEFINE_MUTEX(mode_key_setup_mutex); const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; const u8 mode_num = mode - fscrypt_modes; - struct crypto_skcipher *tfm, *prev_tfm; + struct fscrypt_prepared_key *prep_key; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; unsigned int hkdf_infolen = 0; @@ -140,10 +164,16 @@ static int setup_per_mode_key(struct fscrypt_info *ci, if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) return -EINVAL; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(tfms[mode_num]); - if (likely(tfm != NULL)) - goto done; + prep_key = &keys[mode_num]; + if (fscrypt_is_key_prepared(prep_key, ci)) { + ci->ci_key = *prep_key; + return 0; + } + + mutex_lock(&mode_key_setup_mutex); + + if (fscrypt_is_key_prepared(prep_key, ci)) + goto done_unlock; BUILD_BUG_ON(sizeof(mode_num) != 1); BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); @@ -158,21 +188,17 @@ static int setup_per_mode_key(struct fscrypt_info *ci, hkdf_context, hkdf_info, hkdf_infolen, mode_key, mode->keysize); if (err) - return err; - tfm = fscrypt_allocate_skcipher(mode, mode_key, inode); + goto out_unlock; + err = fscrypt_prepare_key(prep_key, mode_key, ci); memzero_explicit(mode_key, mode->keysize); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - /* pairs with READ_ONCE() above */ - prev_tfm = cmpxchg(&tfms[mode_num], NULL, tfm); - if (prev_tfm != NULL) { - crypto_free_skcipher(tfm); - tfm = prev_tfm; - } -done: - ci->ci_ctfm = tfm; - return 0; + if (err) + goto out_unlock; +done_unlock: + ci->ci_key = *prep_key; + err = 0; +out_unlock: + mutex_unlock(&mode_key_setup_mutex); + return err; } static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, @@ -196,7 +222,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, ci->ci_mode->friendly_name); return -EINVAL; } - return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, + return setup_per_mode_key(ci, mk, mk->mk_direct_keys, HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { @@ -206,9 +232,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - if (fscrypt_should_use_inline_encryption(ci)) - return fscrypt_setup_per_mode_inline_crypt_key(ci, mk); - return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_keys, HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); } @@ -243,6 +267,8 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, struct fscrypt_key_specifier mk_spec; int err; + fscrypt_select_encryption_impl(ci); + switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR; @@ -334,10 +360,8 @@ static void put_crypt_info(struct fscrypt_info *ci) if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); - else if (ci->ci_owns_key) { - crypto_free_skcipher(ci->ci_ctfm); - fscrypt_free_inline_crypt_key(ci); - } + else if (ci->ci_owns_key) + fscrypt_destroy_prepared_key(&ci->ci_key); key = ci->ci_master_key; if (key) { diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 454fb03fc30e..8cafdbf47002 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -146,7 +146,7 @@ struct fscrypt_direct_key { struct hlist_node dk_node; refcount_t dk_refcount; const struct fscrypt_mode *dk_mode; - struct crypto_skcipher *dk_ctfm; + struct fscrypt_prepared_key dk_key; u8 dk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; u8 dk_raw[FSCRYPT_MAX_KEY_SIZE]; }; @@ -154,7 +154,7 @@ struct fscrypt_direct_key { static void free_direct_key(struct fscrypt_direct_key *dk) { if (dk) { - crypto_free_skcipher(dk->dk_ctfm); + fscrypt_destroy_prepared_key(&dk->dk_key); kzfree(dk); } } @@ -199,6 +199,8 @@ find_or_insert_direct_key(struct fscrypt_direct_key *to_insert, continue; if (ci->ci_mode != dk->dk_mode) continue; + if (!fscrypt_is_key_prepared(&dk->dk_key, ci)) + continue; if (crypto_memneq(raw_key, dk->dk_raw, ci->ci_mode->keysize)) continue; /* using existing tfm with same (descriptor, mode, raw_key) */ @@ -231,13 +233,9 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) return ERR_PTR(-ENOMEM); refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; - dk->dk_ctfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, - ci->ci_inode); - if (IS_ERR(dk->dk_ctfm)) { - err = PTR_ERR(dk->dk_ctfm); - dk->dk_ctfm = NULL; + err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci); + if (err) goto err_free_dk; - } memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE); memcpy(dk->dk_raw, raw_key, ci->ci_mode->keysize); @@ -274,7 +272,7 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci, if (IS_ERR(dk)) return PTR_ERR(dk); ci->ci_direct_key = dk; - ci->ci_ctfm = dk->dk_ctfm; + ci->ci_key = dk->dk_key; return 0; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 9d547bace7f2..18509ea3d7c7 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -362,16 +362,11 @@ static int io_submit_init_bio(struct ext4_io_submit *io, struct buffer_head *bh) { struct bio *bio; - int err; bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; - err = fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); - if (err) { - bio_put(bio); - return err; - } + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index a92409f1d0ae..405b91f258a4 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -405,12 +405,8 @@ int ext4_mpage_readpages(struct address_space *mapping, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) goto set_error_page; - if (fscrypt_set_bio_crypt_ctx(bio, inode, next_block, - GFP_KERNEL) != 0) { - bio_put(bio); - bio = NULL; - goto set_error_page; - } + fscrypt_set_bio_crypt_ctx(bio, inode, next_block, + GFP_KERNEL); ctx = get_bio_post_read_ctx(inode, bio, page->index); if (IS_ERR(ctx)) { bio_put(bio); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bdd472783795..59296091afc0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -317,7 +317,7 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) return bio; } -static int f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, +static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, pgoff_t first_idx, const struct f2fs_io_info *fio, gfp_t gfp_mask) @@ -326,10 +326,8 @@ static int f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, * The f2fs garbage collector sets ->encrypted_page when it wants to * read/write raw data without encryption. */ - if (fio && fio->encrypted_page) - return 0; - - return fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); + if (!fio || !fio->encrypted_page) + fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); } static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, @@ -341,7 +339,7 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, * read/write raw data without encryption. */ if (fio && fio->encrypted_page) - return true; + return !bio_has_crypt_ctx(bio); return fscrypt_mergeable_bio(bio, inode, next_idx); } @@ -543,7 +541,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct bio *bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - int err; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? @@ -556,12 +553,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio, 1); - err = f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); - if (err) { - bio_put(bio); - return err; - } + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -763,7 +756,7 @@ alloc_new: bio = __bio_alloc(fio, BIO_MAX_PAGES); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, fio->page->index, fio, - GFP_NOIO | __GFP_NOFAIL); + GFP_NOIO); bio_set_op_attrs(bio, fio->op, fio->op_flags); add_bio_entry(fio->sbi, bio, page, fio->temp); @@ -833,7 +826,7 @@ alloc_new: io->bio = __bio_alloc(fio, BIO_MAX_PAGES); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, fio->page->index, fio, - GFP_NOIO | __GFP_NOFAIL); + GFP_NOIO); io->fio = *fio; } @@ -873,17 +866,12 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio *bio; struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - int err; bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); - err = f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); - if (err) { - bio_put(bio); - return ERR_PTR(err); - } + f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f3d0e9375e53..1aba3eb96c40 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2355,6 +2355,25 @@ static bool f2fs_inline_crypt_enabled(struct super_block *sb) return F2FS_OPTION(F2FS_SB(sb)).inlinecrypt; } +static int f2fs_get_num_devices(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (f2fs_is_multi_device(sbi)) + return sbi->s_ndevs; + return 1; +} + +static void f2fs_get_devices(struct super_block *sb, + struct request_queue **devs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + devs[i] = bdev_get_queue(FDEV(i).bdev); +} + static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, @@ -2365,6 +2384,8 @@ static const struct fscrypt_operations f2fs_cryptops = { .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, .inline_crypt_enabled = f2fs_inline_crypt_enabled, + .get_num_devices = f2fs_get_num_devices, + .get_devices = f2fs_get_devices, }; #endif diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 7c389f310bab..4535df0a6349 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -6,221 +6,188 @@ #define __LINUX_BIO_CRYPT_CTX_H enum blk_crypto_mode_num { - BLK_ENCRYPTION_MODE_INVALID = 0, - BLK_ENCRYPTION_MODE_AES_256_XTS = 1, + BLK_ENCRYPTION_MODE_INVALID, + BLK_ENCRYPTION_MODE_AES_256_XTS, + BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, + BLK_ENCRYPTION_MODE_ADIANTUM, + BLK_ENCRYPTION_MODE_MAX, }; #ifdef CONFIG_BLOCK #include #ifdef CONFIG_BLK_INLINE_ENCRYPTION -struct bio_crypt_ctx { - int keyslot; - const u8 *raw_key; + +#define BLK_CRYPTO_MAX_KEY_SIZE 64 + +/** + * struct blk_crypto_key - an inline encryption key + * @crypto_mode: encryption algorithm this key is for + * @data_unit_size: the data unit size for all encryption/decryptions with this + * key. This is the size in bytes of each individual plaintext and + * ciphertext. This is always a power of 2. It might be e.g. the + * filesystem block size or the disk sector size. + * @data_unit_size_bits: log2 of data_unit_size + * @size: size of this key in bytes (determined by @crypto_mode) + * @hash: hash of this key, for keyslot manager use only + * @raw: the raw bytes of this key. Only the first @size bytes are used. + * + * A blk_crypto_key is immutable once created, and many bios can reference it at + * the same time. It must not be freed until all bios using it have completed. + */ +struct blk_crypto_key { enum blk_crypto_mode_num crypto_mode; - u64 data_unit_num; + unsigned int data_unit_size; unsigned int data_unit_size_bits; + unsigned int size; + unsigned int hash; + u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; +}; + +#define BLK_CRYPTO_MAX_IV_SIZE 32 +#define BLK_CRYPTO_DUN_ARRAY_SIZE (BLK_CRYPTO_MAX_IV_SIZE/sizeof(u64)) + +/** + * struct bio_crypt_ctx - an inline encryption context + * @bc_key: the key, algorithm, and data unit size to use + * @bc_keyslot: the keyslot that has been assigned for this key in @bc_ksm, + * or -1 if no keyslot has been assigned yet. + * @bc_dun: the data unit number (starting IV) to use + * @bc_ksm: the keyslot manager into which the key has been programmed with + * @bc_keyslot, or NULL if this key hasn't yet been programmed. + * + * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for + * write requests) or decrypted (for read requests) inline by the storage device + * or controller, or by the crypto API fallback. + */ +struct bio_crypt_ctx { + const struct blk_crypto_key *bc_key; + int bc_keyslot; + + /* Data unit number */ + u64 bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; /* * The keyslot manager where the key has been programmed * with keyslot. */ - struct keyslot_manager *processing_ksm; - - /* - * Copy of the bvec_iter when this bio was submitted. - * We only want to en/decrypt the part of the bio - * as described by the bvec_iter upon submission because - * bio might be split before being resubmitted - */ - struct bvec_iter crypt_iter; - u64 sw_data_unit_num; + struct keyslot_manager *bc_ksm; }; -extern int bio_crypt_clone(struct bio *dst, struct bio *src, - gfp_t gfp_mask); +int bio_crypt_ctx_init(void); + +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask); + +void bio_crypt_free_ctx(struct bio *bio); static inline bool bio_has_crypt_ctx(struct bio *bio) { return bio->bi_crypt_context; } -static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); + +static inline void bio_crypt_set_ctx(struct bio *bio, + const struct blk_crypto_key *key, + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + gfp_t gfp_mask) { - if (bio_has_crypt_ctx(bio)) { - bio->bi_crypt_context->data_unit_num += - bytes >> bio->bi_crypt_context->data_unit_size_bits; + struct bio_crypt_ctx *bc = bio_crypt_alloc_ctx(gfp_mask); + + bc->bc_key = key; + memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun)); + bc->bc_ksm = NULL; + bc->bc_keyslot = -1; + + bio->bi_crypt_context = bc; +} + +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc); + +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc, + struct keyslot_manager *ksm); + +struct request; +bool bio_crypt_should_process(struct request *rq); + +static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, + unsigned int bytes, + u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) +{ + int i = 0; + unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits; + + while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) { + if (bc->bc_dun[i] + inc != next_dun[i]) + return false; + inc = ((bc->bc_dun[i] + inc) < inc); + i++; + } + + return true; +} + + +static inline void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + unsigned int inc) +{ + int i = 0; + + while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) { + dun[i] += inc; + inc = (dun[i] < inc); + i++; } } -extern bool bio_crypt_swhandled(struct bio *bio); - -static inline bool bio_crypt_has_keyslot(struct bio *bio) +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) { - return bio->bi_crypt_context->keyslot >= 0; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + if (!bc) + return; + + bio_crypt_dun_increment(bc->bc_dun, + bytes >> bc->bc_key->data_unit_size_bits); } -extern int bio_crypt_ctx_init(void); +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2); -extern struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask); - -extern void bio_crypt_free_ctx(struct bio *bio); - -static inline int bio_crypt_set_ctx(struct bio *bio, - const u8 *raw_key, - enum blk_crypto_mode_num crypto_mode, - u64 dun, - unsigned int dun_bits, - gfp_t gfp_mask) -{ - struct bio_crypt_ctx *crypt_ctx; - - crypt_ctx = bio_crypt_alloc_ctx(gfp_mask); - if (!crypt_ctx) - return -ENOMEM; - - crypt_ctx->raw_key = raw_key; - crypt_ctx->data_unit_num = dun; - crypt_ctx->data_unit_size_bits = dun_bits; - crypt_ctx->crypto_mode = crypto_mode; - crypt_ctx->processing_ksm = NULL; - crypt_ctx->keyslot = -1; - bio->bi_crypt_context = crypt_ctx; - - return 0; -} - -static inline void bio_set_data_unit_num(struct bio *bio, u64 dun) -{ - bio->bi_crypt_context->data_unit_num = dun; -} - -static inline int bio_crypt_get_keyslot(struct bio *bio) -{ - return bio->bi_crypt_context->keyslot; -} - -static inline void bio_crypt_set_keyslot(struct bio *bio, - unsigned int keyslot, - struct keyslot_manager *ksm) -{ - bio->bi_crypt_context->keyslot = keyslot; - bio->bi_crypt_context->processing_ksm = ksm; -} - -extern void bio_crypt_ctx_release_keyslot(struct bio *bio); - -extern int bio_crypt_ctx_acquire_keyslot(struct bio *bio, - struct keyslot_manager *ksm); - -static inline const u8 *bio_crypt_raw_key(struct bio *bio) -{ - return bio->bi_crypt_context->raw_key; -} - -static inline enum blk_crypto_mode_num bio_crypto_mode(struct bio *bio) -{ - return bio->bi_crypt_context->crypto_mode; -} - -static inline u64 bio_crypt_data_unit_num(struct bio *bio) -{ - return bio->bi_crypt_context->data_unit_num; -} - -static inline u64 bio_crypt_sw_data_unit_num(struct bio *bio) -{ - return bio->bi_crypt_context->sw_data_unit_num; -} - -extern bool bio_crypt_should_process(struct bio *bio, struct request_queue *q); - -extern bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2); - -extern bool bio_crypt_ctx_back_mergeable(struct bio *b_1, - unsigned int b1_sectors, - struct bio *b_2); +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes, + struct bio *b_2); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ -struct keyslot_manager; - static inline int bio_crypt_ctx_init(void) { return 0; } -static inline int bio_crypt_clone(struct bio *dst, struct bio *src, - gfp_t gfp_mask) -{ - return 0; -} - -static inline void bio_crypt_advance(struct bio *bio, - unsigned int bytes) { } - static inline bool bio_has_crypt_ctx(struct bio *bio) { return false; } +static inline void bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask) { } + static inline void bio_crypt_free_ctx(struct bio *bio) { } -static inline void bio_crypt_set_ctx(struct bio *bio, - u8 *raw_key, - enum blk_crypto_mode_num crypto_mode, - u64 dun, - unsigned int dun_bits, - gfp_t gfp_mask) { } - -static inline bool bio_crypt_swhandled(struct bio *bio) -{ - return false; -} - -static inline void bio_set_data_unit_num(struct bio *bio, u64 dun) { } - -static inline bool bio_crypt_has_keyslot(struct bio *bio) -{ - return false; -} - -static inline void bio_crypt_set_keyslot(struct bio *bio, - unsigned int keyslot, - struct keyslot_manager *ksm) { } - -static inline int bio_crypt_get_keyslot(struct bio *bio) -{ - return -1; -} - -static inline u8 *bio_crypt_raw_key(struct bio *bio) -{ - return NULL; -} - -static inline u64 bio_crypt_data_unit_num(struct bio *bio) -{ - return 0; -} - -static inline bool bio_crypt_should_process(struct bio *bio, - struct request_queue *q) -{ - return false; -} +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) { } static inline bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) { return true; } -static inline bool bio_crypt_ctx_back_mergeable(struct bio *b_1, - unsigned int b1_sectors, - struct bio *b_2) +static inline bool bio_crypt_ctx_mergeable(struct bio *b_1, + unsigned int b1_bytes, + struct bio *b_2) { return true; } #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + #endif /* CONFIG_BLOCK */ + #endif /* __LINUX_BIO_CRYPT_CTX_H */ diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 9ce39b1d8c7c..f2b2390b4236 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -6,34 +6,25 @@ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H -#include #include #define SECTOR_SHIFT 9 #ifdef CONFIG_BLK_INLINE_ENCRYPTION -int blk_crypto_init(void); - int blk_crypto_submit_bio(struct bio **bio_ptr); bool blk_crypto_endio(struct bio *bio); -int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, - unsigned int data_unit_size, - struct request_queue *q); +int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); -int blk_crypto_evict_key(struct request_queue *q, const u8 *key, - enum blk_crypto_mode_num mode, - unsigned int data_unit_size); +int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ -static inline int blk_crypto_init(void) -{ - return 0; -} - static inline int blk_crypto_submit_bio(struct bio **bio_ptr) { return 0; @@ -44,21 +35,31 @@ static inline bool blk_crypto_endio(struct bio *bio) return true; } +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK + +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q); + +int blk_crypto_fallback_init(void); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + static inline int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, unsigned int data_unit_size, struct request_queue *q) { - return -EOPNOTSUPP; + return 0; } -static inline int blk_crypto_evict_key(struct request_queue *q, const u8 *key, - enum blk_crypto_mode_num mode, - unsigned int data_unit_size) +static inline int blk_crypto_fallback_init(void) { return 0; } -#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +#endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ #endif /* __LINUX_BLK_CRYPTO_H */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 6febaa570160..841dcbec26b5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -65,6 +65,9 @@ struct fscrypt_operations { void (*get_ino_and_lblk_bits)(struct super_block *sb, int *ino_bits_ret, int *lblk_bits_ret); bool (*inline_crypt_enabled)(struct super_block *sb); + int (*get_num_devices)(struct super_block *sb); + void (*get_devices)(struct super_block *sb, + struct request_queue **devs); }; static inline bool fscrypt_has_encryption_key(const struct inode *inode) @@ -539,12 +542,13 @@ extern bool fscrypt_inode_uses_inline_crypto(const struct inode *inode); extern bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode); -extern int fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, - u64 first_lblk, gfp_t gfp_mask); +extern void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask); -extern int fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, - const struct buffer_head *first_bh, - gfp_t gfp_mask); +extern void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask); extern bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk); @@ -563,20 +567,14 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } -static inline int fscrypt_set_bio_crypt_ctx(struct bio *bio, - const struct inode *inode, - u64 first_lblk, gfp_t gfp_mask) -{ - return 0; -} +static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) { } -static inline int fscrypt_set_bio_crypt_ctx_bh( - struct bio *bio, - const struct buffer_head *first_bh, - gfp_t gfp_mask) -{ - return 0; -} +static inline void fscrypt_set_bio_crypt_ctx_bh( + struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) { } static inline bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 0777ade7907c..fbc423fe5cd5 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -3,96 +3,58 @@ * Copyright 2019 Google LLC */ -#include - -#ifdef CONFIG_BLOCK - #ifndef __LINUX_KEYSLOT_MANAGER_H #define __LINUX_KEYSLOT_MANAGER_H +#include + +struct keyslot_manager; + /** * struct keyslot_mgmt_ll_ops - functions to manage keyslots in hardware - * @keyslot_program: Program the specified key and algorithm into the - * specified slot in the inline encryption hardware. + * @keyslot_program: Program the specified key into the specified slot in the + * inline encryption hardware. * @keyslot_evict: Evict key from the specified keyslot in the hardware. - * The key, crypto_mode and data_unit_size are also passed - * down so that e.g. dm layers can evict keys from - * the devices that they map over. + * The key is provided so that e.g. dm layers can evict + * keys from the devices that they map over. * Returns 0 on success, -errno otherwise. - * @crypto_mode_supported: Check whether a crypto_mode and data_unit_size - * combo is supported. - * @keyslot_find: Returns the slot number that matches the key, - * or -ENOKEY if no match found, or -errno on - * error. * * This structure should be provided by storage device drivers when they set up * a keyslot manager - this structure holds the function ptrs that the keyslot * manager will use to manipulate keyslots in the hardware. */ struct keyslot_mgmt_ll_ops { - int (*keyslot_program)(void *ll_priv_data, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, + int (*keyslot_program)(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot); - int (*keyslot_evict)(void *ll_priv_data, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size, + int (*keyslot_evict)(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot); - bool (*crypto_mode_supported)(void *ll_priv_data, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); - int (*keyslot_find)(void *ll_priv_data, const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); }; -#ifdef CONFIG_BLK_INLINE_ENCRYPTION -struct keyslot_manager; +struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, + const struct keyslot_mgmt_ll_ops *ksm_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data); -extern struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, - const struct keyslot_mgmt_ll_ops *ksm_ops, - void *ll_priv_data); +int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key); -extern int -keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); +void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot); -extern void keyslot_manager_get_slot(struct keyslot_manager *ksm, - unsigned int slot); +void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot); -extern void keyslot_manager_put_slot(struct keyslot_manager *ksm, - unsigned int slot); +bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); -extern bool -keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); +int keyslot_manager_evict_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key); -extern bool -keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); +void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm); -extern int keyslot_manager_evict_key(struct keyslot_manager *ksm, - const u8 *key, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size); +void *keyslot_manager_private(struct keyslot_manager *ksm); -extern void keyslot_manager_destroy(struct keyslot_manager *ksm); - -#else /* CONFIG_BLK_INLINE_ENCRYPTION */ - -static inline bool -keyslot_manager_rq_crypto_mode_supported(struct request_queue *q, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) -{ - return false; -} -#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +void keyslot_manager_destroy(struct keyslot_manager *ksm); #endif /* __LINUX_KEYSLOT_MANAGER_H */ - -#endif /* CONFIG_BLOCK */ From ea09b9954cc40b3088b8b2778b2daab12820a7e6 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Mon, 27 Jan 2020 16:14:18 -0800 Subject: [PATCH 2967/3715] ANDROID: cuttlefish_defconfig: Enable blk-crypto fallback Enable blk-crypto's kernel crypto API fallback, so that devices without inline encryption hardware can continue to use the inlinecrypt mount option and have file content crypto handled by the block layer. Bug: 137270441 Test: xfstests, cuttlefish boot and stress test, pixel 4 boot and stress test Change-Id: I26376479ee38259b8c35732cb3a1d7e15f9b05a3 Signed-off-by: Satya Tangirala --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 99cb9b35a849..89ce97cf812c 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -48,6 +48,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PCI=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PREEMPT=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index a0434800549d..e6874f1f68cf 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -42,6 +42,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y From c266a1311e74b3ae1047a9d6abd6c6044059995c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 Nov 2019 04:42:58 +0000 Subject: [PATCH 2968/3715] ANDROID: scsi: ufs: Add quirk bit for controllers that don't play well with inline crypto A number of devices (hikey960 and db845c at least) don't work well with the inline crypto enablement, causing them to crash in early boot. In order to allow those boards to continue booting, add a BROKEN_CRYPTO quirk flag that the drivers can enable until we sort out how/if they can be fixed. Bug: 137270441 Change-Id: I9f2c3d75412e0aaa22fe6e7c9929cd18b1efa9ba Signed-off-by: John Stultz Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufshcd-crypto.c | 3 ++- drivers/scsi/ufs/ufshcd.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index acd63ef15be5..1cfd9a327049 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -279,7 +279,8 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, hba->caps &= ~UFSHCD_CAP_CRYPTO; /* Return 0 if crypto support isn't present */ - if (!(hba->capabilities & MASK_CRYPTO_SUPPORT)) + if (!(hba->capabilities & MASK_CRYPTO_SUPPORT) || + (hba->quirks & UFSHCD_QUIRK_BROKEN_CRYPTO)) goto out; /* diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 159c98149474..78ec1c588f1a 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -624,6 +624,12 @@ struct ufs_hba { */ #define UFSHCD_QUIRK_PRDT_BYTE_GRAN UFS_BIT(7) + /* + * This quirk needs to be enabled if the host controller advertises + * inline encryption support but it doesn't work correctly. + */ + #define UFSHCD_QUIRK_BROKEN_CRYPTO UFS_BIT(11) + unsigned int quirks; /* Deviations from standard UFSHCI spec. */ /* Device deviations from standard UFS device spec. */ From 83bc20ed4ba7dbf76964fd68905fde591b5de8b2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 13 Nov 2019 14:17:25 -0800 Subject: [PATCH 2969/3715] ANDROID: scsi: ufs-qcom: Enable BROKEN_CRYPTO quirk flag DragonBoard 845c is currently crashing at boot time because the device tree doesn't include the UFS crypto registers. There are likely to be other issues with the crypto support that will need to be addressed too. Disable crypto support in ufs-qcom until we can get it working properly. Bug: 137270441 Change-Id: I54e32fa14431bbbe39f054cda20c646164c687f7 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufs-qcom.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index c87d770b519a..55463471651c 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1103,6 +1103,13 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) | UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE | UFSHCD_QUIRK_BROKEN_PA_RXHSUNTERMCAP); } + + /* + * Inline crypto is currently broken with ufs-qcom at least because the + * device tree doesn't include the crypto registers. There are likely + * to be other issues that will need to be addressed too. + */ + hba->quirks |= UFSHCD_QUIRK_BROKEN_CRYPTO; } static void ufs_qcom_set_caps(struct ufs_hba *hba) From 86646ebb1742a663c4c9c39c06d58dcb3f8f89e5 Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 2 Jan 2020 11:40:53 -0800 Subject: [PATCH 2970/3715] ANDROID: ufshcd-crypto: export cap find API Export symbol ufshcd_crypto_cap_find to find the crypto capabilities from the crypto engine in the storage controller. This is used to validate that the crypto_mode and data_unit_size provided is supported by the inline encryption hardware. This can be used by all vops. Bug: 147209885 Change-Id: I1020f88a35664dd1a3829750ba805ea76c93dc89 Signed-off-by: Barani Muthukumaran Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufshcd-crypto.c | 7 ++++--- drivers/scsi/ufs/ufshcd-crypto.h | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 1cfd9a327049..2c88398b03c7 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -37,9 +37,9 @@ static size_t get_keysize_bytes(enum ufs_crypto_key_size size) } } -static int ufshcd_crypto_cap_find(struct ufs_hba *hba, - enum blk_crypto_mode_num crypto_mode, - unsigned int data_unit_size) +int ufshcd_crypto_cap_find(struct ufs_hba *hba, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) { enum ufs_crypto_alg ufs_alg; u8 data_unit_mask; @@ -71,6 +71,7 @@ static int ufshcd_crypto_cap_find(struct ufs_hba *hba, return -EINVAL; } +EXPORT_SYMBOL(ufshcd_crypto_cap_find); /** * ufshcd_crypto_cfg_entry_write_key - Write a key into a crypto_cfg_entry diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h index 24a587c7a94e..95f37c9f7672 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.h +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -36,6 +36,10 @@ static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) } /* Functions implementing UFSHCI v2.1 specification behaviour */ +int ufshcd_crypto_cap_find(struct ufs_hba *hba, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, struct scsi_cmnd *cmd, struct ufshcd_lrb *lrbp); From d42ba87e29ab44aac446b5434298d1369c44fe3c Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 2 Jan 2020 11:57:39 -0800 Subject: [PATCH 2971/3715] ANDROID: block: provide key size as input to inline crypto APIs Currently, blk-crypto uses the algorithm to determine the size of keys. However, some inline encryption hardware supports protecting keys from software by wrapping the storage keys with an ephemeral key. Since these wrapped keys are not of a fixed size, add the capability to provide the key size when initializing a blk_crypto_key, and update the keyslot manager to take size into account when comparing keys. Bug: 147209885 Change-Id: I9bf26d06d18a2d671c51111b4896abe4df303988 Co-developed-by: Gaurav Kashyap Signed-off-by: Gaurav Kashyap Signed-off-by: Barani Muthukumaran Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/blk-crypto.c | 21 ++++++++++++++------- block/keyslot-manager.c | 1 + fs/crypto/inline_crypt.c | 4 ++-- include/linux/bio-crypt-ctx.h | 3 ++- include/linux/blk-crypto.h | 3 ++- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 2c47e8eec865..3e1bb4192498 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -171,15 +171,19 @@ bool blk_crypto_endio(struct bio *bio) /** * blk_crypto_init_key() - Prepare a key for use with blk-crypto * @blk_key: Pointer to the blk_crypto_key to initialize. - * @raw_key: Pointer to the raw key. Must be the correct length for the chosen - * @crypto_mode; see blk_crypto_modes[]. + * @raw_key: Pointer to the raw key. + * @raw_key_size: Size of raw key. Must be at least the required size for the + * chosen @crypto_mode; see blk_crypto_modes[]. (It's allowed + * to be longer than the mode's actual key size, in order to + * support inline encryption hardware that accepts wrapped keys.) * @crypto_mode: identifier for the encryption algorithm to use * @data_unit_size: the data unit size to use for en/decryption * * Return: The blk_crypto_key that was prepared, or an ERR_PTR() on error. When * done using the key, it must be freed with blk_crypto_free_key(). */ -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *raw_key, unsigned int raw_key_size, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size) { @@ -191,8 +195,11 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, if (crypto_mode >= ARRAY_SIZE(blk_crypto_modes)) return -EINVAL; + BUILD_BUG_ON(BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE < BLK_CRYPTO_MAX_KEY_SIZE); + mode = &blk_crypto_modes[crypto_mode]; - if (mode->keysize == 0) + if (raw_key_size < mode->keysize || + raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) return -EINVAL; if (!is_power_of_2(data_unit_size)) @@ -201,8 +208,8 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, blk_key->crypto_mode = crypto_mode; blk_key->data_unit_size = data_unit_size; blk_key->data_unit_size_bits = ilog2(data_unit_size); - blk_key->size = mode->keysize; - memcpy(blk_key->raw, raw_key, mode->keysize); + blk_key->size = raw_key_size; + memcpy(blk_key->raw, raw_key, raw_key_size); /* * The keyslot manager uses the SipHash of the key to implement O(1) key @@ -210,7 +217,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, * precomputed here so that it only needs to be computed once per key. */ get_random_once(&hash_key, sizeof(hash_key)); - blk_key->hash = siphash(raw_key, mode->keysize, &hash_key); + blk_key->hash = siphash(raw_key, raw_key_size, &hash_key); return 0; } diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index b19d8a47225d..ade50bcde2c2 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -169,6 +169,7 @@ static int find_keyslot(struct keyslot_manager *ksm, hlist_for_each_entry(slotp, head, hash_node) { if (slotp->key.hash == key->hash && slotp->key.crypto_mode == key->crypto_mode && + slotp->key.size == key->size && slotp->key.data_unit_size == key->data_unit_size && !crypto_memneq(slotp->key.raw, key->raw, key->size)) return slotp - ksm->slots; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index af54d1fed3f6..a1bd550f668b 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -75,8 +75,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, else sb->s_cop->get_devices(sb, blk_key->devs); - err = blk_crypto_init_key(&blk_key->base, raw_key, crypto_mode, - sb->s_blocksize); + err = blk_crypto_init_key(&blk_key->base, raw_key, ci->ci_mode->keysize, + crypto_mode, sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 4535df0a6349..2e06b06fce47 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -19,6 +19,7 @@ enum blk_crypto_mode_num { #ifdef CONFIG_BLK_INLINE_ENCRYPTION #define BLK_CRYPTO_MAX_KEY_SIZE 64 +#define BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE 128 /** * struct blk_crypto_key - an inline encryption key @@ -41,7 +42,7 @@ struct blk_crypto_key { unsigned int data_unit_size_bits; unsigned int size; unsigned int hash; - u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; + u8 raw[BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index f2b2390b4236..485cee0b92dd 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -16,7 +16,8 @@ int blk_crypto_submit_bio(struct bio **bio_ptr); bool blk_crypto_endio(struct bio *bio); -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *raw_key, unsigned int raw_key_size, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size); From bb5a65771a206ae39086af1a9e78afeaf654cf03 Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 2 Jan 2020 12:01:34 -0800 Subject: [PATCH 2972/3715] ANDROID: block: add KSM op to derive software secret from wrapped key Some inline encryption hardware supports protecting the keys in hardware and only exposing wrapped keys to software. To use this capability, userspace must provide a hardware-wrapped key rather than a raw key. However, users of inline encryption in the kernel won't necessarily use the user-specified key directly for inline encryption. E.g. with fscrypt with IV_INO_LBLK_64 policies, each user-provided key is used to derive a file contents encryption key, filenames encryption key, and key identifier. Since inline encryption can only be used with file contents, if the user were to provide a wrapped key there would (naively) be no way to encrypt filenames or derive the key identifier. This problem is solved by designing the hardware to internally use the unwrapped key as input to a KDF from which multiple cryptographically isolated keys can be derived, including both the inline crypto key (not exposed to software) and a secret that *is* exposed to software. Add a function to the keyslot manager to allow upper layers to request this software secret from a hardware-wrapped key. Bug: 147209885 Change-Id: I32f3aa4f25bcf6b9d6f7d8890260533fad00dd1d Co-developed-by: Gaurav Kashyap Signed-off-by: Gaurav Kashyap Signed-off-by: Barani Muthukumaran Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/keyslot-manager.c | 38 +++++++++++++++++++++++++++++++++ include/linux/keyslot-manager.h | 12 +++++++++++ 2 files changed, 50 insertions(+) diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index ade50bcde2c2..6fad96a855b3 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -426,3 +426,41 @@ void keyslot_manager_destroy(struct keyslot_manager *ksm) } } EXPORT_SYMBOL_GPL(keyslot_manager_destroy); + +/** + * keyslot_manager_derive_raw_secret() - Derive software secret from wrapped key + * @ksm: The keyslot manager + * @wrapped_key: The wrapped key + * @wrapped_key_size: Size of the wrapped key in bytes + * @secret: (output) the software secret + * @secret_size: (output) the number of secret bytes to derive + * + * Given a hardware-wrapped key, ask the hardware to derive a secret which + * software can use for cryptographic tasks other than inline encryption. The + * derived secret is guaranteed to be cryptographically isolated from the key + * with which any inline encryption with this wrapped key would actually be + * done. I.e., both will be derived from the unwrapped key. + * + * Return: 0 on success, -EOPNOTSUPP if hardware-wrapped keys are unsupported, + * or another -errno code. + */ +int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size) +{ + int err; + + down_write(&ksm->lock); + if (ksm->ksm_ll_ops.derive_raw_secret) { + err = ksm->ksm_ll_ops.derive_raw_secret(ksm, wrapped_key, + wrapped_key_size, + secret, secret_size); + } else { + err = -EOPNOTSUPP; + } + up_write(&ksm->lock); + + return err; +} +EXPORT_SYMBOL_GPL(keyslot_manager_derive_raw_secret); diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index fbc423fe5cd5..17dfcaf208fb 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -18,6 +18,9 @@ struct keyslot_manager; * The key is provided so that e.g. dm layers can evict * keys from the devices that they map over. * Returns 0 on success, -errno otherwise. + * @derive_raw_secret: (Optional) Derive a software secret from a + * hardware-wrapped key. Returns 0 on success, -EOPNOTSUPP + * if unsupported on the hardware, or another -errno code. * * This structure should be provided by storage device drivers when they set up * a keyslot manager - this structure holds the function ptrs that the keyslot @@ -30,6 +33,10 @@ struct keyslot_mgmt_ll_ops { int (*keyslot_evict)(struct keyslot_manager *ksm, const struct blk_crypto_key *key, unsigned int slot); + int (*derive_raw_secret)(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size); }; struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, @@ -57,4 +64,9 @@ void *keyslot_manager_private(struct keyslot_manager *ksm); void keyslot_manager_destroy(struct keyslot_manager *ksm); +int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size); + #endif /* __LINUX_KEYSLOT_MANAGER_H */ From 2871f731940165ed4042001a36bbe7d58f9d983b Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Wed, 15 Jan 2020 18:41:54 -0800 Subject: [PATCH 2973/3715] ANDROID: fscrypt: add support for hardware-wrapped keys To prevent keys from being compromised if an attacker acquires read access to kernel memory, some inline encryption hardware supports protecting the keys in hardware without software having access to or the ability to set the plaintext keys. Instead, software only sees "wrapped keys", which may differ on every boot. The keys can be initially generated either by software (in which case they need to be imported to hardware to be wrapped), or directly by the hardware. Add support for this type of hardware by allowing keys to be flagged as hardware-wrapped and encryption policies to be flagged as needing a hardware-wrapped key. When used, fscrypt will pass the wrapped key directly to the inline encryption hardware to encrypt file contents. The hardware is responsible for internally unwrapping the key and deriving the actual file contents encryption key. fscrypt also asks the inline encryption hardware to derive a cryptographically isolated software "secret", which fscrypt then uses as the master key for all other purposes besides file contents encryption, e.g. to derive filenames encryption keys and the key identifier. Bug: 147209885 Change-Id: I58d1a37f5ba8cf178b80036b813e0bc99512ef3b Co-developed-by: Gaurav Kashyap Signed-off-by: Gaurav Kashyap Signed-off-by: Barani Muthukumaran Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- fs/crypto/fscrypt_private.h | 34 ++++++++++++++-- fs/crypto/inline_crypt.c | 23 ++++++++++- fs/crypto/keyring.c | 50 ++++++++++++++++++----- fs/crypto/keysetup.c | 77 ++++++++++++++++++++++++++---------- fs/crypto/keysetup_v1.c | 3 +- include/uapi/linux/fscrypt.h | 5 ++- 6 files changed, 155 insertions(+), 37 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7005dbe6bfec..5d7b20fead86 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -20,6 +20,7 @@ #define FS_KEY_DERIVATION_NONCE_SIZE 16 #define FSCRYPT_MIN_KEY_SIZE 16 +#define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE 128 #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 @@ -330,11 +331,18 @@ fscrypt_using_inline_encryption(const struct fscrypt_info *ci) extern int fscrypt_prepare_inline_crypt_key( struct fscrypt_prepared_key *prep_key, const u8 *raw_key, + unsigned int raw_key_size, const struct fscrypt_info *ci); extern void fscrypt_destroy_inline_crypt_key( struct fscrypt_prepared_key *prep_key); +extern int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, + unsigned int raw_secret_size); + /* * Check whether the crypto transform or blk-crypto key has been allocated in * @prep_key, depending on which encryption implementation the file will use. @@ -367,7 +375,7 @@ static inline bool fscrypt_using_inline_encryption( static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, - const u8 *raw_key, + const u8 *raw_key, unsigned int raw_key_size, const struct fscrypt_info *ci) { WARN_ON(1); @@ -379,6 +387,17 @@ fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) { } +static inline int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, + unsigned int raw_secret_size) +{ + fscrypt_warn(NULL, + "kernel built without support for hardware-wrapped keys"); + return -EOPNOTSUPP; +} + static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_info *ci) @@ -403,8 +422,15 @@ struct fscrypt_master_key_secret { /* Size of the raw key in bytes. Set even if ->raw isn't set. */ u32 size; - /* For v1 policy keys: the raw key. Wiped for v2 policy keys. */ - u8 raw[FSCRYPT_MAX_KEY_SIZE]; + /* True if the key in ->raw is a hardware-wrapped key. */ + bool is_hw_wrapped; + + /* + * For v1 policy keys: the raw key. Wiped for v2 policy keys, unless + * ->is_hw_wrapped is true, in which case this contains the wrapped key + * rather than the key with which 'hkdf' was keyed. + */ + u8 raw[FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE]; } __randomize_layout; @@ -549,7 +575,7 @@ fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) } extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, - const u8 *raw_key, + const u8 *raw_key, unsigned int raw_key_size, const struct fscrypt_info *ci); extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index a1bd550f668b..e7e8e9a27e8b 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "fscrypt_private.h" @@ -49,6 +50,7 @@ void fscrypt_select_encryption_impl(struct fscrypt_info *ci) int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, + unsigned int raw_key_size, const struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; @@ -75,7 +77,10 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, else sb->s_cop->get_devices(sb, blk_key->devs); - err = blk_crypto_init_key(&blk_key->base, raw_key, ci->ci_mode->keysize, + BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE > + BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); + + err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, crypto_mode, sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); @@ -133,6 +138,22 @@ void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) } } +int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, unsigned int raw_secret_size) +{ + struct request_queue *q; + + q = sb->s_bdev->bd_queue; + if (!q->ksm) + return -EOPNOTSUPP; + + return keyslot_manager_derive_raw_secret(q->ksm, + wrapped_key, wrapped_key_size, + raw_secret, raw_secret_size); +} + /** * fscrypt_inode_uses_inline_crypto - test whether an inode uses inline * encryption diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 9052197d8f5a..7facb5993b03 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -465,6 +465,9 @@ out_unlock: return err; } +/* Size of software "secret" derived from hardware-wrapped key */ +#define RAW_SECRET_SIZE 32 + /* * Add a master encryption key to the filesystem, causing all files which were * encrypted with it to appear "unlocked" (decrypted) when accessed. @@ -495,6 +498,9 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) struct fscrypt_add_key_arg __user *uarg = _uarg; struct fscrypt_add_key_arg arg; struct fscrypt_master_key_secret secret; + u8 _kdf_key[RAW_SECRET_SIZE]; + u8 *kdf_key; + unsigned int kdf_key_size; int err; if (copy_from_user(&arg, uarg, sizeof(arg))) @@ -503,11 +509,16 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) if (!valid_key_spec(&arg.key_spec)) return -EINVAL; - if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || - arg.raw_size > FSCRYPT_MAX_KEY_SIZE) + if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; - if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) + BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE < + FSCRYPT_MAX_KEY_SIZE); + + if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || + arg.raw_size > + ((arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) ? + FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE : FSCRYPT_MAX_KEY_SIZE)) return -EINVAL; memset(&secret, 0, sizeof(secret)); @@ -526,18 +537,37 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) err = -EACCES; if (!capable(CAP_SYS_ADMIN)) goto out_wipe_secret; + + err = -EINVAL; + if (arg.__flags) + goto out_wipe_secret; break; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: - err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size); + err = -EINVAL; + if (arg.__flags & ~__FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) + goto out_wipe_secret; + if (arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) { + kdf_key = _kdf_key; + kdf_key_size = RAW_SECRET_SIZE; + err = fscrypt_derive_raw_secret(sb, secret.raw, + secret.size, + kdf_key, kdf_key_size); + if (err) + goto out_wipe_secret; + secret.is_hw_wrapped = true; + } else { + kdf_key = secret.raw; + kdf_key_size = secret.size; + } + err = fscrypt_init_hkdf(&secret.hkdf, kdf_key, kdf_key_size); + /* + * Now that the HKDF context is initialized, the raw HKDF + * key is no longer needed. + */ + memzero_explicit(kdf_key, kdf_key_size); if (err) goto out_wipe_secret; - /* - * Now that the HKDF context is initialized, the raw key is no - * longer needed. - */ - memzero_explicit(secret.raw, secret.size); - /* Calculate the key identifier and return it to userspace. */ err = fscrypt_hkdf_expand(&secret.hkdf, HKDF_CONTEXT_KEY_IDENTIFIER, diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 8c2675d0b2d0..f87daf215ac9 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -113,12 +113,17 @@ err_free_tfm: * (fs-layer or blk-crypto) will be used. */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, - const u8 *raw_key, const struct fscrypt_info *ci) + const u8 *raw_key, unsigned int raw_key_size, + const struct fscrypt_info *ci) { struct crypto_skcipher *tfm; if (fscrypt_using_inline_encryption(ci)) - return fscrypt_prepare_inline_crypt_key(prep_key, raw_key, ci); + return fscrypt_prepare_inline_crypt_key(prep_key, + raw_key, raw_key_size, ci); + + if (WARN_ON(raw_key_size != ci->ci_mode->keysize)) + return -EINVAL; tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); if (IS_ERR(tfm)) @@ -142,7 +147,8 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) { ci->ci_owns_key = true; - return fscrypt_prepare_key(&ci->ci_key, derived_key, ci); + return fscrypt_prepare_key(&ci->ci_key, derived_key, + ci->ci_mode->keysize, ci); } static int setup_per_mode_key(struct fscrypt_info *ci, @@ -175,24 +181,48 @@ static int setup_per_mode_key(struct fscrypt_info *ci, if (fscrypt_is_key_prepared(prep_key, ci)) goto done_unlock; - BUILD_BUG_ON(sizeof(mode_num) != 1); - BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); - BUILD_BUG_ON(sizeof(hkdf_info) != 17); - hkdf_info[hkdf_infolen++] = mode_num; - if (include_fs_uuid) { - memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid, - sizeof(sb->s_uuid)); - hkdf_infolen += sizeof(sb->s_uuid); + if (mk->mk_secret.is_hw_wrapped && S_ISREG(inode->i_mode)) { + int i; + + if (!fscrypt_using_inline_encryption(ci)) { + fscrypt_warn(ci->ci_inode, + "Hardware-wrapped keys require inline encryption (-o inlinecrypt)"); + err = -EINVAL; + goto out_unlock; + } + for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + if (fscrypt_is_key_prepared(&keys[i], ci)) { + fscrypt_warn(ci->ci_inode, + "Each hardware-wrapped key can only be used with one encryption mode"); + err = -EINVAL; + goto out_unlock; + } + } + err = fscrypt_prepare_key(prep_key, mk->mk_secret.raw, + mk->mk_secret.size, ci); + if (err) + goto out_unlock; + } else { + BUILD_BUG_ON(sizeof(mode_num) != 1); + BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); + BUILD_BUG_ON(sizeof(hkdf_info) != 17); + hkdf_info[hkdf_infolen++] = mode_num; + if (include_fs_uuid) { + memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid, + sizeof(sb->s_uuid)); + hkdf_infolen += sizeof(sb->s_uuid); + } + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + hkdf_context, hkdf_info, hkdf_infolen, + mode_key, mode->keysize); + if (err) + goto out_unlock; + err = fscrypt_prepare_key(prep_key, mode_key, mode->keysize, + ci); + memzero_explicit(mode_key, mode->keysize); + if (err) + goto out_unlock; } - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - hkdf_context, hkdf_info, hkdf_infolen, - mode_key, mode->keysize); - if (err) - goto out_unlock; - err = fscrypt_prepare_key(prep_key, mode_key, ci); - memzero_explicit(mode_key, mode->keysize); - if (err) - goto out_unlock; done_unlock: ci->ci_key = *prep_key; err = 0; @@ -207,6 +237,13 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; int err; + if (mk->mk_secret.is_hw_wrapped && + !(ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64)) { + fscrypt_warn(ci->ci_inode, + "Hardware-wrapped keys are only supported with IV_INO_LBLK_64 policies"); + return -EINVAL; + } + if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* * DIRECT_KEY: instead of deriving per-file keys, the per-file diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 8cafdbf47002..47591c54dc3d 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -233,7 +233,8 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) return ERR_PTR(-ENOMEM); refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; - err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci); + err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci->ci_mode->keysize, + ci); if (err) goto err_free_dk; memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 1beb174ad950..7d150d800abc 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -113,7 +113,10 @@ struct fscrypt_key_specifier { struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 __reserved[8]; + /* N.B.: "temporary" flag, not reserved upstream */ +#define __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 __flags; __u8 raw[]; }; From 75fea5f6057df78af1655f2f79a9c66a94bc838f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jan 2020 09:39:22 -0800 Subject: [PATCH 2974/3715] ANDROID: block: fix some inline crypto bugs While we're waiting for v7 of the inline crypto patchset, fix some bugs that made it into the v6 patchset, including one that caused bios with an encryption context to never be merged, and one that could cause non-contiguous pages to incorrectly added to a bio. Bug: 137270441 Change-Id: I3911fcd6c76b5c9063b86d6af6267ad990a46718 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/bio-crypt-ctx.c | 7 +++---- block/blk-crypto-fallback.c | 2 +- block/blk-crypto-internal.h | 2 +- include/linux/bio-crypt-ctx.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index 75982dabc7a3..d02d2bff991a 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -96,10 +96,9 @@ bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; - if (bc1 != bc2) - return false; - - return !bc1 || bc1->bc_key == bc2->bc_key; + if (!bc1) + return !bc2; + return bc2 && bc1->bc_key == bc2->bc_key; } /* diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 3f2a7d3be07b..d349b904d95d 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -568,7 +568,7 @@ int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) struct bio_crypt_ctx *bc = bio->bi_crypt_context; struct bio_fallback_crypt_ctx *f_ctx; - if (WARN_ON_ONCE(!tfms_inited[bc->bc_key->crypto_mode])) { + if (!tfms_inited[bc->bc_key->crypto_mode]) { bio->bi_status = BLK_STS_IOERR; return -EIO; } diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 43351eecc97a..40d826b743da 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -36,7 +36,7 @@ static inline bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) static inline int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) { - pr_warn_once("blk-crypto crypto API fallback disabled; failing request"); + pr_warn_once("crypto API fallback disabled; failing request\n"); (*bio_ptr)->bi_status = BLK_STS_NOTSUPP; return -EIO; } diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 2e06b06fce47..652f92ff75fd 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -118,7 +118,7 @@ static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, int i = 0; unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits; - while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) { + while (i < BLK_CRYPTO_DUN_ARRAY_SIZE) { if (bc->bc_dun[i] + inc != next_dun[i]) return false; inc = ((bc->bc_dun[i] + inc) < inc); From f9a8e4a5c5455a6bada70ed6d2f0af8900a872cb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Jan 2020 12:32:33 -0800 Subject: [PATCH 2975/3715] ANDROID: block: export symbols needed for modules to use inline crypto Export the blk-crypto symbols needed for modules to use inline crypto. These would have already been exported, except that so far they've only been used by fs/crypto/, which is no longer modular. Bug: 137270441 Bug: 147814592 Change-Id: I64bf98aecabe891c188b30dd50124aacb1e008ca Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/bio-crypt-ctx.c | 1 + block/blk-crypto-fallback.c | 1 + block/blk-crypto.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index d02d2bff991a..b6df3dcf28cc 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -49,6 +49,7 @@ struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask) { return mempool_alloc(bio_crypt_ctx_pool, gfp_mask); } +EXPORT_SYMBOL_GPL(bio_crypt_alloc_ctx); void bio_crypt_free_ctx(struct bio *bio) { diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index d349b904d95d..ef293547c56b 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -556,6 +556,7 @@ out: mutex_unlock(&tfms_init_lock); return err; } +EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) { diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 3e1bb4192498..a8de0d9680e0 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -221,6 +221,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, return 0; } +EXPORT_SYMBOL_GPL(blk_crypto_init_key); /** * blk_crypto_evict_key() - Evict a key from any inline encryption hardware @@ -247,3 +248,4 @@ int blk_crypto_evict_key(struct request_queue *q, return blk_crypto_fallback_evict_key(key); } +EXPORT_SYMBOL_GPL(blk_crypto_evict_key); From c91db466b51479ae761becc233d79c50ca3748a5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 9 Jan 2020 21:10:06 -0800 Subject: [PATCH 2976/3715] BACKPORT: FROMLIST: scsi: ufs: add program_key() variant op On Snapdragon SoCs, the Linux kernel isn't permitted to directly access the standard UFS crypto configuration registers. Instead, programming and evicting keys must be done through vendor-specific SMC calls. To support this hardware, add a ->program_key() method to 'struct ufs_hba_variant_ops'. This allows overriding the UFS standard key programming procedure. Link: https://lore.kernel.org/r/20200110061634.46742-5-ebiggers@kernel.org Bug: 137270441 Bug: 147259927 Change-Id: Ia561d5a51421baaf78de52a1eaec496093a0d0ad Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- drivers/scsi/ufs/ufshcd-crypto.c | 24 ++++++++++++++++++------ drivers/scsi/ufs/ufshcd.h | 5 +++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 2c88398b03c7..276b49ad13be 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -118,15 +118,21 @@ static int ufshcd_crypto_cfg_entry_write_key(union ufs_crypto_cfg_entry *cfg, return -EINVAL; } -static void ufshcd_program_key(struct ufs_hba *hba, - const union ufs_crypto_cfg_entry *cfg, - int slot) +static int ufshcd_program_key(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, int slot) { int i; u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); + int err; pm_runtime_get_sync(hba->dev); ufshcd_hold(hba, false); + + if (hba->vops->program_key) { + err = hba->vops->program_key(hba, cfg, slot); + goto out; + } + /* Clear the dword 16 */ ufshcd_writel(hba, 0, slot_offset + 16 * sizeof(cfg->reg_val[0])); /* Ensure that CFGE is cleared before programming the key */ @@ -146,15 +152,20 @@ static void ufshcd_program_key(struct ufs_hba *hba, ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[16]), slot_offset + 16 * sizeof(cfg->reg_val[0])); wmb(); + err = 0; +out: ufshcd_release(hba); pm_runtime_put_sync(hba->dev); + return err; } static void ufshcd_clear_keyslot(struct ufs_hba *hba, int slot) { union ufs_crypto_cfg_entry cfg = { 0 }; + int err; - ufshcd_program_key(hba, &cfg, slot); + err = ufshcd_program_key(hba, &cfg, slot); + WARN_ON_ONCE(err); } /* Clear all keyslots at driver init time */ @@ -199,10 +210,11 @@ static int ufshcd_crypto_keyslot_program(struct keyslot_manager *ksm, if (err) return err; - ufshcd_program_key(hba, &cfg, slot); + err = ufshcd_program_key(hba, &cfg, slot); memzero_explicit(&cfg, sizeof(cfg)); - return 0; + + return err; } static int ufshcd_crypto_keyslot_evict(struct keyslot_manager *ksm, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 78ec1c588f1a..38381217762c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -277,6 +277,8 @@ struct ufs_pwr_mode_info { struct ufs_pa_layer_attr info; }; +union ufs_crypto_cfg_entry; + /** * struct ufs_hba_variant_ops - variant specific callbacks * @name: variant name @@ -303,6 +305,7 @@ struct ufs_pwr_mode_info { * @resume: called during host controller PM callback * @dbg_register_dump: used to dump controller debug information * @phy_initialization: used to initialize phys + * @program_key: program an inline encryption key into a keyslot */ struct ufs_hba_variant_ops { const char *name; @@ -331,6 +334,8 @@ struct ufs_hba_variant_ops { int (*resume)(struct ufs_hba *, enum ufs_pm_op); void (*dbg_register_dump)(struct ufs_hba *hba); int (*phy_initialization)(struct ufs_hba *); + int (*program_key)(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, int slot); }; struct keyslot_mgmt_ll_ops; From 4f27c8b90bd223e967c98dc658961e67b9b864ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jan 2020 09:25:59 -0800 Subject: [PATCH 2977/3715] ANDROID: ext4, f2fs: enable direct I/O with inline encryption ext4 and f2fs have traditionally not supported direct I/O on encrypted files, since it's difficult to implement with the traditional filesystem-layer encryption. But when inline encryption is used instead, it's straightforward to support direct I/O, as long as the I/O is fully filesystem-block-aligned. Add support for it by: - Making the two generic direct I/O implementations in the kernel, __blockdev_direct_IO() and iomap_dio_rw(), set the encryption context on bios for inline-encrypted files. __blockdev_direct_IO() is used by f2fs, and was used by ext4 in kernel v5.4 and earlier. iomap_dio_rw() is used by ext4 in kernel v5.5 and later. - Making ext4 and f2fs allow direct I/O to encrypted files (rather the current behavior of falling back to buffered I/O) when the file is using inline encryption and the I/O is fully filesystem-block-aligned. Bug: 137270441 Change-Id: I4c8f7497eb8f829d03611d24281113d68c21d4d1 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- fs/direct-io.c | 5 +++++ fs/ext4/inode.c | 10 ++++++---- fs/f2fs/f2fs.h | 8 +++++++- fs/iomap.c | 6 ++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 30bf22c989de..729c59213d2e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -431,6 +432,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, sector_t first_sector, int nr_vecs) { struct bio *bio; + struct inode *inode = dio->inode; /* * bio_alloc() is guaranteed to return a bio when called with @@ -438,6 +440,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, */ bio = bio_alloc(GFP_KERNEL, nr_vecs); + fscrypt_set_bio_crypt_ctx(bio, inode, + sdio->cur_page_fs_offset >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_sector; bio_set_op_attrs(bio, dio->op, dio->op_flags); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0aefed560d91..638151a97bfd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3850,10 +3850,12 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ssize_t ret; int rw = iov_iter_rw(iter); -#ifdef CONFIG_FS_ENCRYPTION - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) - return 0; -#endif + if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) { + if (!fscrypt_inode_uses_inline_crypto(inode) || + !IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), + i_blocksize(inode))) + return 0; + } if (fsverity_active(inode)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c181a4bec9d..7cc17c1640d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3733,7 +3733,13 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - if (f2fs_post_read_required(inode)) + if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && f2fs_encrypted_file(inode)) { + if (!fscrypt_inode_uses_inline_crypto(inode) || + !IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), + F2FS_BLKSIZE)) + return true; + } + if (fsverity_active(inode)) return true; if (f2fs_is_multi_device(sbi)) return true; diff --git a/fs/iomap.c b/fs/iomap.c index 3f5b1655cfce..1e573a59ea71 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -825,10 +826,13 @@ static blk_qc_t iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, unsigned len) { + struct inode *inode = file_inode(dio->iocb->ki_filp); struct page *page = ZERO_PAGE(0); struct bio *bio; bio = bio_alloc(GFP_KERNEL, 1); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); @@ -908,6 +912,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, return 0; bio = bio_alloc(GFP_KERNEL, nr_pages); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); From aeed6db424b22148964d9788d4f9abac6e6cd7d8 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Tue, 21 Jan 2020 09:27:43 -0800 Subject: [PATCH 2978/3715] ANDROID: block: Introduce passthrough keyslot manager The regular keyslot manager is designed for devices that have a small number of keyslots that need to be programmed with keys ahead of time, and bios that are sent to the device need to be tagged with a keyslot index. Some inline encryption hardware may not have any limitations on the number of keyslot, and may instead allow each bio to be tagged with a raw key, data unit number, etc. rather than a pre-programmed keyslot's index. These devices don't need any sort of keyslot management, and it's better for these devices not to have to allocate a regular keyslot manager with some fixed number of keyslots. These devices can instead set up a passthrough keyslot manager in their request queue, which require less resources than regular keyslot managers, as they simply do no-ops when trying to program keys into slots. Separately, the device mapper may map over devices that have inline encryption hardware, and it wants to pass the key along to the underlying hardware. While the DM layer can expose inline encryption capabilities by setting up a regular keyslot manager with some fixed number of keyslots in the dm device's request queue, this only wastes memory since the keys programmed into the dm device's request queue will never be used. Instead, it's better to set up a passthrough keyslot manager for dm devices. Bug: 137270441 Bug: 147814592 Change-Id: I6d91e83e86a73b0d6066873c8a9117cf2c089234 Signed-off-by: Satya Tangirala Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/keyslot-manager.c | 66 +++++++++++++++++++++++++++++++++ include/linux/keyslot-manager.h | 5 +++ 2 files changed, 71 insertions(+) diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 6fad96a855b3..5dd5884514cb 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -67,6 +67,11 @@ struct keyslot_manager { struct keyslot slots[]; }; +static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) +{ + return ksm->num_slots == 0; +} + /** * keyslot_manager_create() - Create a keyslot manager * @num_slots: The number of key slots to manage. @@ -212,6 +217,9 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, int err; struct keyslot *idle_slot; + if (keyslot_manager_is_passthrough(ksm)) + return 0; + down_read(&ksm->lock); slot = find_and_grab_keyslot(ksm, key); up_read(&ksm->lock); @@ -277,6 +285,9 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, */ void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot) { + if (keyslot_manager_is_passthrough(ksm)) + return; + if (WARN_ON(slot >= ksm->num_slots)) return; @@ -294,6 +305,9 @@ void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) { unsigned long flags; + if (keyslot_manager_is_passthrough(ksm)) + return; + if (WARN_ON(slot >= ksm->num_slots)) return; @@ -353,6 +367,16 @@ int keyslot_manager_evict_key(struct keyslot_manager *ksm, int err; struct keyslot *slotp; + if (keyslot_manager_is_passthrough(ksm)) { + if (ksm->ksm_ll_ops.keyslot_evict) { + down_write(&ksm->lock); + err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); + up_write(&ksm->lock); + return err; + } + return 0; + } + down_write(&ksm->lock); slot = find_keyslot(ksm, key); if (slot < 0) { @@ -390,6 +414,9 @@ void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm) { unsigned int slot; + if (WARN_ON(keyslot_manager_is_passthrough(ksm))) + return; + down_write(&ksm->lock); for (slot = 0; slot < ksm->num_slots; slot++) { const struct keyslot *slotp = &ksm->slots[slot]; @@ -427,6 +454,45 @@ void keyslot_manager_destroy(struct keyslot_manager *ksm) } EXPORT_SYMBOL_GPL(keyslot_manager_destroy); +/** + * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager + * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops + * @crypto_mode_supported: Bitmasks for supported encryption modes + * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. + * + * Allocate memory for and initialize a passthrough keyslot manager. + * Called by e.g. storage drivers to set up a keyslot manager in their + * request_queue, when the storage driver wants to manage its keys by itself. + * This is useful for inline encryption hardware that don't have a small fixed + * number of keyslots, and for layered devices. + * + * See keyslot_manager_create() for more details about the parameters. + * + * Context: This function may sleep + * Return: Pointer to constructed keyslot manager or NULL on error. + */ +struct keyslot_manager *keyslot_manager_create_passthrough( + const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data) +{ + struct keyslot_manager *ksm; + + ksm = kzalloc(sizeof(*ksm), GFP_KERNEL); + if (!ksm) + return NULL; + + ksm->ksm_ll_ops = *ksm_ll_ops; + memcpy(ksm->crypto_mode_supported, crypto_mode_supported, + sizeof(ksm->crypto_mode_supported)); + ksm->ll_priv_data = ll_priv_data; + + init_rwsem(&ksm->lock); + + return ksm; +} +EXPORT_SYMBOL_GPL(keyslot_manager_create_passthrough); + /** * keyslot_manager_derive_raw_secret() - Derive software secret from wrapped key * @ksm: The keyslot manager diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 17dfcaf208fb..85532baa89b2 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -64,6 +64,11 @@ void *keyslot_manager_private(struct keyslot_manager *ksm); void keyslot_manager_destroy(struct keyslot_manager *ksm); +struct keyslot_manager *keyslot_manager_create_passthrough( + const struct keyslot_mgmt_ll_ops *ksm_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data); + int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, const u8 *wrapped_key, unsigned int wrapped_key_size, From 53bc059bc6d98631e8936ab9eeb7ac780c9ab2c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jan 2020 09:27:43 -0800 Subject: [PATCH 2979/3715] ANDROID: dm: add support for passing through inline crypto support Update the device-mapper core to support exposing the inline crypto support of the underlying device(s) through the device-mapper device. This works by creating a "passthrough keyslot manager" for the dm device, which declares support for the set of (crypto_mode, data_unit_size) combos which all the underlying devices support. When a supported combo is used, the bio cloning code handles cloning the crypto context to the bios for all the underlying devices. When an unsupported combo is used, the blk-crypto fallback is used as usual. Crypto support on each underlying device is ignored unless the corresponding dm target opts into exposing it. This is needed because for inline crypto to semantically operate on the original bio, the data must not be transformed by the dm target. Thus, targets like dm-linear can expose crypto support of the underlying device, but targets like dm-crypt can't. (dm-crypt could use inline crypto itself, though.) When a key is evicted from the dm device, it is evicted from all underlying devices. Bug: 137270441 Bug: 147814592 Change-Id: If28b574f2e28268db5eb9f325d4cf8f96cb63e3f Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/keyslot-manager.c | 28 ++++++++++ drivers/md/dm-table.c | 52 ++++++++++++++++++ drivers/md/dm.c | 97 ++++++++++++++++++++++++++++++++- include/linux/device-mapper.h | 6 ++ include/linux/keyslot-manager.h | 7 +++ 5 files changed, 189 insertions(+), 1 deletion(-) diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 5dd5884514cb..7e42813c9de0 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -493,6 +493,34 @@ struct keyslot_manager *keyslot_manager_create_passthrough( } EXPORT_SYMBOL_GPL(keyslot_manager_create_passthrough); +/** + * keyslot_manager_intersect_modes() - restrict supported modes by child device + * @parent: The keyslot manager for parent device + * @child: The keyslot manager for child device, or NULL + * + * Clear any crypto mode support bits in @parent that aren't set in @child. + * If @child is NULL, then all parent bits are cleared. + * + * Only use this when setting up the keyslot manager for a layered device, + * before it's been exposed yet. + */ +void keyslot_manager_intersect_modes(struct keyslot_manager *parent, + const struct keyslot_manager *child) +{ + if (child) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(child->crypto_mode_supported); i++) { + parent->crypto_mode_supported[i] &= + child->crypto_mode_supported[i]; + } + } else { + memset(parent->crypto_mode_supported, 0, + sizeof(parent->crypto_mode_supported)); + } +} +EXPORT_SYMBOL_GPL(keyslot_manager_intersect_modes); + /** * keyslot_manager_derive_raw_secret() - Derive software secret from wrapped key * @ksm: The keyslot manager diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 6e184378ac61..3d095bdc2681 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #define DM_MSG_PREFIX "table" @@ -1596,6 +1598,54 @@ static void dm_table_verify_integrity(struct dm_table *t) } } +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +static int device_intersect_crypto_modes(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct keyslot_manager *parent = data; + struct keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm; + + keyslot_manager_intersect_modes(parent, child); + return 0; +} + +/* + * Update the inline crypto modes supported by 'q->ksm' to be the intersection + * of the modes supported by all targets in the table. + * + * For any mode to be supported at all, all targets must have explicitly + * declared that they can pass through inline crypto support. For a particular + * mode to be supported, all underlying devices must also support it. + * + * Assume that 'q->ksm' initially declares all modes to be supported. + */ +static void dm_calculate_supported_crypto_modes(struct dm_table *t, + struct request_queue *q) +{ + struct dm_target *ti; + unsigned int i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!ti->may_passthrough_inline_crypto) { + keyslot_manager_intersect_modes(q->ksm, NULL); + return; + } + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, device_intersect_crypto_modes, + q->ksm); + } +} +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline void dm_calculate_supported_crypto_modes(struct dm_table *t, + struct request_queue *q) +{ +} +#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ + static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1870,6 +1920,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_verify_integrity(t); + dm_calculate_supported_crypto_modes(t, q); + /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 85e594d3329c..ab1f49f2ee88 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #define DM_MSG_PREFIX "core" @@ -1662,6 +1664,8 @@ void dm_init_normal_md_queue(struct mapped_device *md) md->queue->backing_dev_info->congested_fn = dm_any_congested; } +static void dm_destroy_inline_encryption(struct request_queue *q); + static void cleanup_mapped_device(struct mapped_device *md) { if (md->wq) @@ -1686,8 +1690,10 @@ static void cleanup_mapped_device(struct mapped_device *md) put_disk(md->disk); } - if (md->queue) + if (md->queue) { + dm_destroy_inline_encryption(md->queue); blk_cleanup_queue(md->queue); + } cleanup_srcu_struct(&md->io_barrier); @@ -2030,6 +2036,89 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +struct dm_keyslot_evict_args { + const struct blk_crypto_key *key; + int err; +}; + +static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_keyslot_evict_args *args = data; + int err; + + err = blk_crypto_evict_key(dev->bdev->bd_queue, args->key); + if (!args->err) + args->err = err; + /* Always try to evict the key from all devices. */ + return 0; +} + +/* + * When an inline encryption key is evicted from a device-mapper device, evict + * it from all the underlying devices. + */ +static int dm_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot) +{ + struct mapped_device *md = keyslot_manager_private(ksm); + struct dm_keyslot_evict_args args = { key }; + struct dm_table *t; + int srcu_idx; + int i; + struct dm_target *ti; + + t = dm_get_live_table(md, &srcu_idx); + if (!t) + return 0; + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); + } + dm_put_live_table(md, srcu_idx); + return args.err; +} + +static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { + .keyslot_evict = dm_keyslot_evict, +}; + +static int dm_init_inline_encryption(struct mapped_device *md) +{ + unsigned int mode_masks[BLK_ENCRYPTION_MODE_MAX]; + + /* + * Start out with all crypto mode support bits set. Any unsupported + * bits will be cleared later when calculating the device restrictions. + */ + memset(mode_masks, 0xFF, sizeof(mode_masks)); + + md->queue->ksm = keyslot_manager_create_passthrough(&dm_ksm_ll_ops, + mode_masks, md); + if (!md->queue->ksm) + return -ENOMEM; + return 0; +} + +static void dm_destroy_inline_encryption(struct request_queue *q) +{ + keyslot_manager_destroy(q->ksm); + q->ksm = NULL; +} +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline int dm_init_inline_encryption(struct mapped_device *md) +{ + return 0; +} + +static inline void dm_destroy_inline_encryption(struct request_queue *q) +{ +} +#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ + /* * Setup the DM device's queue based on md's type */ @@ -2069,6 +2158,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; } + r = dm_init_inline_encryption(md); + if (r) { + DMERR("Cannot initialize inline encryption"); + return r; + } + return 0; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 91a063a1f3b3..248e6ecea471 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -321,6 +321,12 @@ struct dm_target { * on max_io_len boundary. */ bool split_discard_bios:1; + + /* + * Set if inline crypto capabilities from this target's underlying + * device(s) can be exposed via the device-mapper device. + */ + bool may_passthrough_inline_crypto:1; }; /* Each target can link one of these into the table */ diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 85532baa89b2..6d32a031218e 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -8,6 +8,8 @@ #include +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct keyslot_manager; /** @@ -69,9 +71,14 @@ struct keyslot_manager *keyslot_manager_create_passthrough( const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); +void keyslot_manager_intersect_modes(struct keyslot_manager *parent, + const struct keyslot_manager *child); + int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, const u8 *wrapped_key, unsigned int wrapped_key_size, u8 *secret, unsigned int secret_size); +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + #endif /* __LINUX_KEYSLOT_MANAGER_H */ From 232fd353e45d13576d507a011b5dac17e3c320ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jan 2020 16:43:41 -0800 Subject: [PATCH 2980/3715] ANDROID: dm: enable may_passthrough_inline_crypto on some targets dm-linear obviously can pass through inline crypto support. In addition, we need dm-bow to pass through inline crypto support, as the userdata partition in Android may be located on top of dm-bow. Bug: 137270441 Change-Id: Ib9b0f67b77391fa450394568ceed441c1508de6e Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- drivers/md/dm-bow.c | 1 + drivers/md/dm-linear.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index b92da30a3d42..28df18633853 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -726,6 +726,7 @@ static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv) rb_insert_color(&br->node, &bc->ranges); ti->discards_supported = true; + ti->may_passthrough_inline_crypto = true; return 0; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index e6fd31b03c38..aa294720b14e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -61,6 +61,7 @@ int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_bios = 1; ti->num_write_same_bios = 1; ti->num_write_zeroes_bios = 1; + ti->may_passthrough_inline_crypto = true; ti->private = lc; return 0; From e1a94e6b17e2610b56c5740b763df7858dad40f0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jan 2020 09:27:47 -0800 Subject: [PATCH 2981/3715] ANDROID: dm: add dm-default-key target for metadata encryption Add a device-mapper target "dm-default-key" which assigns an encryption key to bios that aren't for the contents of an encrypted file. This ensures that all blocks on-disk will be encrypted with some key, without the performance hit of file contents being encrypted twice when fscrypt (File-Based Encryption) is used. It is only appropriate to use dm-default-key when key configuration is tightly controlled, like it is in Android, such that all fscrypt keys are at least as hard to compromise as the default key. Compared to the original version of dm-default-key, this has been modified to use the new vendor-independent inline encryption framework (which works even when no inline encryption hardware is present), the table syntax has been changed to match dm-crypt, and support for specifying Adiantum encryption has been added. These changes also mean that dm-default-key now always explicitly specifies the DUN (the IV). Also, to handle f2fs moving blocks of encrypted files around without the key, and to handle ext4 and f2fs filesystems mounted without '-o inlinecrypt', the mapping logic is no longer "set a key on the bio if it doesn't have one already", but rather "set a key on the bio unless the bio has the bi_skip_dm_default_key flag set". Filesystems set this flag on *all* bios for encrypted file contents, regardless of whether they are encrypting/decrypting the file using inline encryption or the traditional filesystem-layer encryption, or moving the raw data. For the bi_skip_dm_default_key flag, a new field in struct bio is used rather than a bit in bi_opf so that fscrypt_set_bio_crypt_ctx() can set the flag, minimizing the changes needed to filesystems. (bi_opf is usually overwritten after fscrypt_set_bio_crypt_ctx() is called.) Bug: 137270441 Bug: 147814592 Change-Id: I69c9cd1e968ccf990e4ad96e5115b662237f5095 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- block/bio-crypt-ctx.c | 2 + block/blk-crypto-fallback.c | 2 + drivers/md/Kconfig | 18 ++ drivers/md/Makefile | 1 + drivers/md/dm-default-key.c | 403 ++++++++++++++++++++++++++++++++++ fs/crypto/inline_crypt.c | 14 +- fs/f2fs/data.c | 6 +- include/linux/bio-crypt-ctx.h | 32 +++ include/linux/blk_types.h | 3 + include/linux/fscrypt.h | 14 ++ 10 files changed, 493 insertions(+), 2 deletions(-) create mode 100644 drivers/md/dm-default-key.c diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c index b6df3dcf28cc..75008b2afea2 100644 --- a/block/bio-crypt-ctx.c +++ b/block/bio-crypt-ctx.c @@ -61,6 +61,8 @@ void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) { const struct bio_crypt_ctx *src_bc = src->bi_crypt_context; + bio_clone_skip_dm_default_key(dst, src); + /* * If a bio is fallback_crypted, then it will be decrypted when * bio_endio is called. As we only want the data to be decrypted once, diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index ef293547c56b..cce3317cba80 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -192,6 +192,8 @@ static struct bio *blk_crypto_clone_bio(struct bio *bio_src) bio_clone_blkcg_association(bio, bio_src); + bio_clone_skip_dm_default_key(bio, bio_src); + return bio; } diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0e643556bb44..1f38aa9d3db8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -286,6 +286,24 @@ config DM_CRYPT If unsure, say N. +config DM_DEFAULT_KEY + tristate "Default-key target support" + depends on BLK_DEV_DM + depends on BLK_INLINE_ENCRYPTION + help + This device-mapper target allows you to create a device that + assigns a default encryption key to bios that aren't for the + contents of an encrypted file. + + This ensures that all blocks on-disk will be encrypted with + some key, without the performance hit of file contents being + encrypted twice when fscrypt (File-Based Encryption) is used. + + It is only appropriate to use dm-default-key when key + configuration is tightly controlled, like it is in Android, + such that all fscrypt keys are at least as hard to compromise + as the default key. + config DM_SNAPSHOT tristate "Snapshot target" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 214be71181f4..144aec4fd08d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o +obj-$(CONFIG_DM_DEFAULT_KEY) += dm-default-key.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c new file mode 100644 index 000000000000..43a30c076aa6 --- /dev/null +++ b/drivers/md/dm-default-key.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Google, Inc. + */ + +#include +#include +#include + +#define DM_MSG_PREFIX "default-key" + +#define DM_DEFAULT_KEY_MAX_KEY_SIZE 64 + +#define SECTOR_SIZE (1 << SECTOR_SHIFT) + +static const struct dm_default_key_cipher { + const char *name; + enum blk_crypto_mode_num mode_num; + int key_size; +} dm_default_key_ciphers[] = { + { + .name = "aes-xts-plain64", + .mode_num = BLK_ENCRYPTION_MODE_AES_256_XTS, + .key_size = 64, + }, { + .name = "xchacha12,aes-adiantum-plain64", + .mode_num = BLK_ENCRYPTION_MODE_ADIANTUM, + .key_size = 32, + }, +}; + +/** + * struct dm_default_c - private data of a default-key target + * @dev: the underlying device + * @start: starting sector of the range of @dev which this target actually maps. + * For this purpose a "sector" is 512 bytes. + * @cipher_string: the name of the encryption algorithm being used + * @iv_offset: starting offset for IVs. IVs are generated as if the target were + * preceded by @iv_offset 512-byte sectors. + * @sector_size: crypto sector size in bytes (usually 4096) + * @sector_bits: log2(sector_size) + * @key: the encryption key to use + */ +struct default_key_c { + struct dm_dev *dev; + sector_t start; + const char *cipher_string; + u64 iv_offset; + unsigned int sector_size; + unsigned int sector_bits; + struct blk_crypto_key key; +}; + +static const struct dm_default_key_cipher * +lookup_cipher(const char *cipher_string) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dm_default_key_ciphers); i++) { + if (strcmp(cipher_string, dm_default_key_ciphers[i].name) == 0) + return &dm_default_key_ciphers[i]; + } + return NULL; +} + +static void default_key_dtr(struct dm_target *ti) +{ + struct default_key_c *dkc = ti->private; + int err; + + if (dkc->dev) { + err = blk_crypto_evict_key(dkc->dev->bdev->bd_queue, &dkc->key); + if (err && err != -ENOKEY) + DMWARN("Failed to evict crypto key: %d", err); + dm_put_device(ti, dkc->dev); + } + kzfree(dkc->cipher_string); + kzfree(dkc); +} + +static int default_key_ctr_optional(struct dm_target *ti, + unsigned int argc, char **argv) +{ + struct default_key_c *dkc = ti->private; + struct dm_arg_set as; + static const struct dm_arg _args[] = { + {0, 3, "Invalid number of feature args"}, + }; + unsigned int opt_params; + const char *opt_string; + bool iv_large_sectors = false; + char dummy; + int err; + + as.argc = argc; + as.argv = argv; + + err = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (err) + return err; + + while (opt_params--) { + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + return -EINVAL; + } + if (!strcmp(opt_string, "allow_discards")) { + ti->num_discard_bios = 1; + } else if (sscanf(opt_string, "sector_size:%u%c", + &dkc->sector_size, &dummy) == 1) { + if (dkc->sector_size < SECTOR_SIZE || + dkc->sector_size > 4096 || + !is_power_of_2(dkc->sector_size)) { + ti->error = "Invalid sector_size"; + return -EINVAL; + } + } else if (!strcmp(opt_string, "iv_large_sectors")) { + iv_large_sectors = true; + } else { + ti->error = "Invalid feature arguments"; + return -EINVAL; + } + } + + /* dm-default-key doesn't implement iv_large_sectors=false. */ + if (dkc->sector_size != SECTOR_SIZE && !iv_large_sectors) { + ti->error = "iv_large_sectors must be specified"; + return -EINVAL; + } + + return 0; +} + +/* + * Construct a default-key mapping: + * + * + * This syntax matches dm-crypt's, but lots of unneeded functionality has been + * removed. Also, dm-default-key requires that the "iv_large_sectors" option be + * given whenever a non-default sector size is used. + */ +static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct default_key_c *dkc; + const struct dm_default_key_cipher *cipher; + u8 raw_key[DM_DEFAULT_KEY_MAX_KEY_SIZE]; + unsigned long long tmpll; + char dummy; + int err; + + if (argc < 5) { + ti->error = "Not enough arguments"; + return -EINVAL; + } + + dkc = kzalloc(sizeof(*dkc), GFP_KERNEL); + if (!dkc) { + ti->error = "Out of memory"; + return -ENOMEM; + } + ti->private = dkc; + + /* */ + dkc->cipher_string = kstrdup(argv[0], GFP_KERNEL); + if (!dkc->cipher_string) { + ti->error = "Out of memory"; + err = -ENOMEM; + goto bad; + } + cipher = lookup_cipher(dkc->cipher_string); + if (!cipher) { + ti->error = "Unsupported cipher"; + err = -EINVAL; + goto bad; + } + + /* */ + if (strlen(argv[1]) != 2 * cipher->key_size) { + ti->error = "Incorrect key size for cipher"; + err = -EINVAL; + goto bad; + } + if (hex2bin(raw_key, argv[1], cipher->key_size) != 0) { + ti->error = "Malformed key string"; + err = -EINVAL; + goto bad; + } + + /* */ + if (sscanf(argv[2], "%llu%c", &dkc->iv_offset, &dummy) != 1) { + ti->error = "Invalid iv_offset sector"; + err = -EINVAL; + goto bad; + } + + /* */ + err = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dkc->dev); + if (err) { + ti->error = "Device lookup failed"; + goto bad; + } + + /* */ + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || + tmpll != (sector_t)tmpll) { + ti->error = "Invalid start sector"; + err = -EINVAL; + goto bad; + } + dkc->start = tmpll; + + /* optional arguments */ + dkc->sector_size = SECTOR_SIZE; + if (argc > 5) { + err = default_key_ctr_optional(ti, argc - 5, &argv[5]); + if (err) + goto bad; + } + dkc->sector_bits = ilog2(dkc->sector_size); + if (ti->len & ((dkc->sector_size >> SECTOR_SHIFT) - 1)) { + ti->error = "Device size is not a multiple of sector_size"; + err = -EINVAL; + goto bad; + } + + err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, + cipher->mode_num, dkc->sector_size); + if (err) { + ti->error = "Error initializing blk-crypto key"; + goto bad; + } + + err = blk_crypto_start_using_mode(cipher->mode_num, dkc->sector_size, + dkc->dev->bdev->bd_queue); + if (err) { + ti->error = "Error starting to use blk-crypto"; + goto bad; + } + + ti->num_flush_bios = 1; + + ti->may_passthrough_inline_crypto = true; + + err = 0; + goto out; + +bad: + default_key_dtr(ti); +out: + memzero_explicit(raw_key, sizeof(raw_key)); + return err; +} + +static int default_key_map(struct dm_target *ti, struct bio *bio) +{ + const struct default_key_c *dkc = ti->private; + sector_t sector_in_target; + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE] = { 0 }; + + bio_set_dev(bio, dkc->dev->bdev); + + /* + * If the bio is a device-level request which doesn't target a specific + * sector, there's nothing more to do. + */ + if (bio_sectors(bio) == 0) + return DM_MAPIO_REMAPPED; + + /* Map the bio's sector to the underlying device. (512-byte sectors) */ + sector_in_target = dm_target_offset(ti, bio->bi_iter.bi_sector); + bio->bi_iter.bi_sector = dkc->start + sector_in_target; + + /* + * If the bio should skip dm-default-key (i.e. if it's for an encrypted + * file's contents), or if it doesn't have any data (e.g. if it's a + * DISCARD request), there's nothing more to do. + */ + if (bio_should_skip_dm_default_key(bio) || !bio_has_data(bio)) + return DM_MAPIO_REMAPPED; + + /* + * Else, dm-default-key needs to set this bio's encryption context. + * It must not already have one. + */ + if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) + return DM_MAPIO_KILL; + + /* Calculate the DUN and enforce data-unit (crypto sector) alignment. */ + dun[0] = dkc->iv_offset + sector_in_target; /* 512-byte sectors */ + if (dun[0] & ((dkc->sector_size >> SECTOR_SHIFT) - 1)) + return DM_MAPIO_KILL; + dun[0] >>= dkc->sector_bits - SECTOR_SHIFT; /* crypto sectors */ + + bio_crypt_set_ctx(bio, &dkc->key, dun, GFP_NOIO); + + return DM_MAPIO_REMAPPED; +} + +static void default_key_status(struct dm_target *ti, status_type_t type, + unsigned int status_flags, char *result, + unsigned int maxlen) +{ + const struct default_key_c *dkc = ti->private; + unsigned int sz = 0; + int num_feature_args = 0; + + switch (type) { + case STATUSTYPE_INFO: + result[0] = '\0'; + break; + + case STATUSTYPE_TABLE: + /* Omit the key for now. */ + DMEMIT("%s - %llu %s %llu", dkc->cipher_string, dkc->iv_offset, + dkc->dev->name, (unsigned long long)dkc->start); + + num_feature_args += !!ti->num_discard_bios; + if (dkc->sector_size != SECTOR_SIZE) + num_feature_args += 2; + if (num_feature_args != 0) { + DMEMIT(" %d", num_feature_args); + if (ti->num_discard_bios) + DMEMIT(" allow_discards"); + if (dkc->sector_size != SECTOR_SIZE) { + DMEMIT(" sector_size:%u", dkc->sector_size); + DMEMIT(" iv_large_sectors"); + } + } + break; + } +} + +static int default_key_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, + fmode_t *mode) +{ + const struct default_key_c *dkc = ti->private; + const struct dm_dev *dev = dkc->dev; + + *bdev = dev->bdev; + + /* Only pass ioctls through if the device sizes match exactly. */ + if (dkc->start != 0 || + ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + return 1; + return 0; +} + +static int default_key_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, + void *data) +{ + const struct default_key_c *dkc = ti->private; + + return fn(ti, dkc->dev, dkc->start, ti->len, data); +} + +static void default_key_io_hints(struct dm_target *ti, + struct queue_limits *limits) +{ + const struct default_key_c *dkc = ti->private; + const unsigned int sector_size = dkc->sector_size; + + limits->logical_block_size = + max_t(unsigned short, limits->logical_block_size, sector_size); + limits->physical_block_size = + max_t(unsigned int, limits->physical_block_size, sector_size); + limits->io_min = max_t(unsigned int, limits->io_min, sector_size); +} + +static struct target_type default_key_target = { + .name = "default-key", + .version = {2, 0, 0}, + .module = THIS_MODULE, + .ctr = default_key_ctr, + .dtr = default_key_dtr, + .map = default_key_map, + .status = default_key_status, + .prepare_ioctl = default_key_prepare_ioctl, + .iterate_devices = default_key_iterate_devices, + .io_hints = default_key_io_hints, +}; + +static int __init dm_default_key_init(void) +{ + return dm_register_target(&default_key_target); +} + +static void __exit dm_default_key_exit(void) +{ + dm_unregister_target(&default_key_target); +} + +module_init(dm_default_key_init); +module_exit(dm_default_key_exit); + +MODULE_AUTHOR("Paul Lawrence "); +MODULE_AUTHOR("Paul Crowley "); +MODULE_AUTHOR("Eric Biggers "); +MODULE_DESCRIPTION(DM_NAME " target for encrypting filesystem metadata"); +MODULE_LICENSE("GPL"); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index e7e8e9a27e8b..92c471d3db73 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -215,6 +215,8 @@ static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, * otherwise fscrypt_mergeable_bio() won't work as intended. * * The encryption context will be freed automatically when the bio is freed. + * + * This function also handles setting bi_skip_dm_default_key when needed. */ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask) @@ -222,6 +224,9 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, const struct fscrypt_info *ci = inode->i_crypt_info; u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + if (fscrypt_inode_should_skip_dm_default_key(inode)) + bio_set_skip_dm_default_key(bio); + if (!fscrypt_inode_uses_inline_crypto(inode)) return; @@ -291,6 +296,9 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); * * fscrypt_set_bio_crypt_ctx() must have already been called on the bio. * + * This function also returns false if the next part of the I/O would need to + * have a different value for the bi_skip_dm_default_key flag. + * * Return: true iff the I/O is mergeable */ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, @@ -301,6 +309,9 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, if (!!bc != fscrypt_inode_uses_inline_crypto(inode)) return false; + if (bio_should_skip_dm_default_key(bio) != + fscrypt_inode_should_skip_dm_default_key(inode)) + return false; if (!bc) return true; @@ -334,7 +345,8 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, u64 next_lblk; if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk)) - return !bio->bi_crypt_context; + return !bio->bi_crypt_context && + !bio_should_skip_dm_default_key(bio); return fscrypt_mergeable_bio(bio, inode, next_lblk); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 59296091afc0..61545b810f98 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -328,6 +328,8 @@ static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, */ if (!fio || !fio->encrypted_page) fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); + else if (fscrypt_inode_should_skip_dm_default_key(inode)) + bio_set_skip_dm_default_key(bio); } static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, @@ -339,7 +341,9 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, * read/write raw data without encryption. */ if (fio && fio->encrypted_page) - return !bio_has_crypt_ctx(bio); + return !bio_has_crypt_ctx(bio) && + (bio_should_skip_dm_default_key(bio) == + fscrypt_inode_should_skip_dm_default_key(inode)); return fscrypt_mergeable_bio(bio, inode, next_idx); } diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index 652f92ff75fd..ab22dbe7b880 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -189,6 +189,38 @@ static inline bool bio_crypt_ctx_mergeable(struct bio *b_1, #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +#if IS_ENABLED(CONFIG_DM_DEFAULT_KEY) +static inline void bio_set_skip_dm_default_key(struct bio *bio) +{ + bio->bi_skip_dm_default_key = true; +} + +static inline bool bio_should_skip_dm_default_key(const struct bio *bio) +{ + return bio->bi_skip_dm_default_key; +} + +static inline void bio_clone_skip_dm_default_key(struct bio *dst, + const struct bio *src) +{ + dst->bi_skip_dm_default_key = src->bi_skip_dm_default_key; +} +#else /* CONFIG_DM_DEFAULT_KEY */ +static inline void bio_set_skip_dm_default_key(struct bio *bio) +{ +} + +static inline bool bio_should_skip_dm_default_key(const struct bio *bio) +{ + return false; +} + +static inline void bio_clone_skip_dm_default_key(struct bio *dst, + const struct bio *src) +{ +} +#endif /* !CONFIG_DM_DEFAULT_KEY */ + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_CRYPT_CTX_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32dc18c5cef3..736dc8c10f61 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -99,6 +99,9 @@ struct bio { #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *bi_crypt_context; +#if IS_ENABLED(CONFIG_DM_DEFAULT_KEY) + bool bi_skip_dm_default_key; +#endif #endif union { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 841dcbec26b5..6ac092cc4067 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -590,6 +590,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_DM_DEFAULT_KEY) +static inline bool +fscrypt_inode_should_skip_dm_default_key(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} +#else +static inline bool +fscrypt_inode_should_skip_dm_default_key(const struct inode *inode) +{ + return false; +} +#endif + /** * fscrypt_require_key - require an inode's encryption key * @inode: the inode we need the key for From cd9b69d7f4c908436e0e33577c37fbf54aa468d9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Jan 2020 13:19:43 -0800 Subject: [PATCH 2982/3715] ANDROID: cuttlefish_defconfig: enable dm-default-key dm-default-key is needed for metadata encryption (https://source.android.com/security/encryption/metadata). The new version of dm-default-key is vendor-independent and can be used both with and without inline encryption hardware. Bug: 137270441 Bug: 147814592 Change-Id: I92ce45c4b1543ff8bc5cdb5ebe4ddcdb740c2dc7 Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 89ce97cf812c..35b12642b72c 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -221,6 +221,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y +CONFIG_DM_DEFAULT_KEY=y CONFIG_DM_SNAPSHOT=y CONFIG_DM_UEVENT=y CONFIG_DM_VERITY=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index e6874f1f68cf..8dd1542ff08f 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -235,6 +235,7 @@ CONFIG_SCSI_SPI_ATTRS=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y +CONFIG_DM_DEFAULT_KEY=y CONFIG_DM_SNAPSHOT=y CONFIG_DM_MIRROR=y CONFIG_DM_ZERO=y From 2664a43d88d91eb09f212cb26e111656a4acee49 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 29 Jan 2020 12:47:53 -0800 Subject: [PATCH 2983/3715] ANDROID: Incremental fs: Fix initialization, use of bitfields Test: incfs_test passes Signed-off-by: Paul Lawrence Bug: 133435829 Change-Id: I824286b77f665d2409c5e88930057a97da82ce82 --- fs/incfs/data_mgmt.c | 17 ++++++++------- fs/incfs/data_mgmt.h | 34 ++++++++++++++++++++++++++---- include/uapi/linux/incrementalfs.h | 10 +++++++++ 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 109329e0a180..4698f14bbdf7 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -85,11 +85,11 @@ static void data_file_segment_destroy(struct data_file_segment *segment) struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) { - struct data_file *df = NULL; - struct backing_file_context *bfc = NULL; + struct data_file *df; + struct backing_file_context *bfc; int md_records; u64 size; - int error = 0; + int error; int i; if (!bf || !mi) @@ -160,7 +160,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct file *backing_file) { struct inode_info *node = get_incfs_node(inode); - struct data_file *df = NULL; + struct data_file *df; int err = 0; inode_lock(inode); @@ -181,7 +181,7 @@ int make_inode_ready_for_data_ops(struct mount_info *mi, struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) { - struct dir_file *dir = NULL; + struct dir_file *dir; if (!S_ISDIR(bf->f_inode->i_mode)) return ERR_PTR(-EBADF); @@ -222,11 +222,12 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, s64 now_us = ktime_to_us(ktime_get()); struct read_log_record record = { .file_id = *id, - .block_index = block_index, - .timed_out = timed_out, .timestamp_us = now_us }; + set_block_index(&record, block_index); + set_timed_out(&record, timed_out); + if (log->rl_size == 0) return; @@ -1062,7 +1063,7 @@ static void fill_pending_read_from_log_record( struct read_log_state *state, u64 log_size) { dest->file_id = src->file_id; - dest->block_index = src->block_index; + dest->block_index = get_block_index(src); dest->serial_number = state->current_pass_no * log_size + state->next_index; dest->timestamp_us = src->timestamp_us; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 82ccab3be4bb..6722cef1608c 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -21,15 +21,41 @@ #define SEGMENTS_PER_FILE 3 struct read_log_record { - u32 block_index : 31; - - u32 timed_out : 1; + u32 bitfield; u64 timestamp_us; incfs_uuid_t file_id; } __packed; +#define RLR_BLOCK_INDEX_MASK 0x7fff +#define RLR_TIMED_OUT_MASK 0x8000 + +static inline u32 get_block_index(const struct read_log_record *rlr) +{ + return rlr->bitfield & RLR_BLOCK_INDEX_MASK; +} + +static inline void set_block_index(struct read_log_record *rlr, + u32 block_index) +{ + rlr->bitfield = (rlr->bitfield & ~RLR_BLOCK_INDEX_MASK) + | (block_index & RLR_BLOCK_INDEX_MASK); +} + +static inline bool get_timed_out(const struct read_log_record *rlr) +{ + return (rlr->bitfield & RLR_TIMED_OUT_MASK) == RLR_TIMED_OUT_MASK; +} + +static inline void set_timed_out(struct read_log_record *rlr, bool timed_out) +{ + if (timed_out) + rlr->bitfield |= RLR_TIMED_OUT_MASK; + else + rlr->bitfield &= ~RLR_TIMED_OUT_MASK; +} + struct read_log_state { /* Next slot in rl_ring_buf to write to. */ u32 next_index; @@ -271,7 +297,7 @@ static inline struct inode_info *get_incfs_node(struct inode *inode) static inline struct data_file *get_incfs_data_file(struct file *f) { - struct inode_info *node = NULL; + struct inode_info *node; if (!f) return NULL; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index b257b9f0ec3f..787049031cca 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -107,10 +107,13 @@ struct incfs_new_data_block { /* Values from enum incfs_block_flags */ __u8 flags; + /* Reserved - must be 0 */ __u16 reserved1; + /* Reserved - must be 0 */ __u32 reserved2; + /* Reserved - must be 0 */ __aligned_u64 reserved3; }; @@ -140,6 +143,7 @@ struct incfs_file_signature_info { /* Size of additional data. */ __u32 additional_data_size; + /* Reserved - must be 0 */ __u32 reserved1; /* @@ -153,6 +157,7 @@ struct incfs_file_signature_info { /* Size of pkcs7 signature DER blob */ __u32 signature_size; + /* Reserved - must be 0 */ __u32 reserved2; /* Value from incfs_hash_tree_algorithm */ @@ -176,8 +181,10 @@ struct incfs_new_file_args { */ __u16 mode; + /* Reserved - must be 0 */ __u16 reserved1; + /* Reserved - must be 0 */ __u32 reserved2; /* @@ -210,13 +217,16 @@ struct incfs_new_file_args { */ __u32 file_attr_len; + /* Reserved - must be 0 */ __u32 reserved4; /* struct incfs_file_signature_info *signature_info; */ __aligned_u64 signature_info; + /* Reserved - must be 0 */ __aligned_u64 reserved5; + /* Reserved - must be 0 */ __aligned_u64 reserved6; }; From 66ff25e225c39daba3923eccd8315e7c7f4bb0d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Feb 2020 13:47:07 -0800 Subject: [PATCH 2984/3715] f2fs: fix build error on PAGE_KERNEL_RO This fixes build error reported by kbuild test robot. tree: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-stable.git linux-4.14.y head: 2945d197414d9732c680ea0b709735d3b0d8ea57 commit: f6574fbf6578e47cfa3cace486ca852979a1e433 [868/885] f2fs: support data compression config: mips-allyesconfig (attached as .config) compiler: mips-linux-gcc (GCC) 7.5.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout f6574fbf6578e47cfa3cace486ca852979a1e433 # save the attached .config to linux build tree GCC_VERSION=7.5.0 make.cross ARCH=mips If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): fs/f2fs/compress.c: In function 'f2fs_compress_pages': >> fs/f2fs/compress.c:359:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC fs/f2fs/compress.c:359:56: note: each undeclared identifier is reported only once for each function it appears +in fs/f2fs/compress.c: In function 'f2fs_decompress_pages': fs/f2fs/compress.c:456:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC vim +359 fs/f2fs/compress.c Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d8a64be90a50..17e10c4cd880 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -16,6 +16,11 @@ #include "node.h" #include +/* Some architectures don't have PAGE_KERNEL_RO */ +#ifndef PAGE_KERNEL_RO +#define PAGE_KERNEL_RO PAGE_KERNEL +#endif + struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); From a444cff8fe37bdd6981358de3e870b52f8e18012 Mon Sep 17 00:00:00 2001 From: Ram Muthiah Date: Fri, 7 Feb 2020 17:38:04 -0800 Subject: [PATCH 2985/3715] ANDROID: cf: disable virtio crypto Bug: 132629930 Test: Treehugger Change-Id: I0bb435127d0ebb05a2793f8141d582e633b296c7 Signed-off-by: Ram Muthiah --- arch/arm64/configs/cuttlefish_defconfig | 2 +- arch/x86/configs/x86_64_cuttlefish_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 35b12642b72c..b1cfa0669b11 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -466,5 +466,5 @@ CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y CONFIG_CRYPTO_ANSI_CPRNG=y -CONFIG_CRYPTO_DEV_VIRTIO=y +# CONFIG_CRYPTO_DEV_VIRTIO is not set CONFIG_XZ_DEC=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 8dd1542ff08f..17ecf3a45256 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -496,5 +496,5 @@ CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_AES_NI_INTEL=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y -CONFIG_CRYPTO_DEV_VIRTIO=y +# CONFIG_CRYPTO_DEV_VIRTIO is not set CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509" From a9a545067a93d9821f965989b8eaea6fba7d27f7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 7 Feb 2020 10:45:22 -0800 Subject: [PATCH 2986/3715] ANDROID: dm: prevent default-key from being enabled without needed hooks While dm-default-key no longer requires that the filesystem is also using inline encryption (i.e. mounted with '-o inlinecrypt'), it does still rely on the inline crypto hooks like fscrypt_set_bio_crypt_ctx() being built into the kernel rather than stubbed out, as these are used to set the bi_skip_dm_default_key flag on bios for encrypted files. I.e., CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y is still needed. The proper solution is to fix these hooks to handle DM_DEFAULT_KEY && !FS_ENCRYPTION_INLINE_CRYPT, but that would introduce a case that wouldn't get tested. So for now, to avoid problems just make dm-default-key depend on FS_ENCRYPTION_INLINE_CRYPT. Test: make gki_defconfig echo CONFIG_FS_ENCRYPTION_INLINE_CRYPT=n >> .config make olddefconfig grep CONFIG_DM_DEFAULT_KEY .config # was disabled Bug: 147814592 Change-Id: Ie52686a72d5d14457d87dcf81677e33e291680bb Signed-off-by: Eric Biggers --- drivers/md/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1f38aa9d3db8..1abc2380d9a6 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -290,6 +290,9 @@ config DM_DEFAULT_KEY tristate "Default-key target support" depends on BLK_DEV_DM depends on BLK_INLINE_ENCRYPTION + # dm-default-key doesn't require -o inlinecrypt, but it does currently + # rely on the inline encryption hooks being built into the kernel. + depends on FS_ENCRYPTION_INLINE_CRYPT help This device-mapper target allows you to create a device that assigns a default encryption key to bios that aren't for the From 9ad7f75e3870f630af891988160e813eb02d8082 Mon Sep 17 00:00:00 2001 From: Hridya Valsaraju Date: Mon, 10 Feb 2020 16:41:48 -0800 Subject: [PATCH 2987/3715] ANDROID: Set CONFIG_ANDROID_BINDERFS=y Enable binderfs. Bug: 136497735 Test: build, boot, binder devices created in /dev/binderfs Change-Id: I31e27bbcc3dbd89253e6862819284904ed827554 Signed-off-by: Hridya Valsaraju --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index b1cfa0669b11..00d1b1d54a00 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -416,6 +416,7 @@ CONFIG_MAILBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDERFS=y CONFIG_LIBNVDIMM=y # CONFIG_ND_BLK is not set CONFIG_ARM_SCPI_PROTOCOL=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 17ecf3a45256..9e18f3ed255a 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -430,6 +430,7 @@ CONFIG_ION_SYSTEM_HEAP=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDERFS=y CONFIG_LIBNVDIMM=y # CONFIG_ND_BLK is not set # CONFIG_FIRMWARE_MEMMAP is not set From c288e0cf13e115a77b42bec0cea7b5303edf1ef6 Mon Sep 17 00:00:00 2001 From: Sandeep Patil Date: Fri, 7 Feb 2020 10:09:15 -0800 Subject: [PATCH 2988/3715] ANDROID: Revert "ANDROID: gki_defconfig: removed CONFIG_PM_WAKELOCKS" CONFIG_PM_WAKELOCKS are needed by existing driver code that uses APIs like pm_wake_lock/unlock(). We can't disable yet just yet even though userspace has stopped using the /sys/power/wake_lock interface. This reverts commit b82fc46d57da841e02ced5f8e8727aff5510d9bd. Bug: 148922129 Test: Boot Change-Id: I8b8f4f46db3647313485abff21a5659bd881b687 Signed-off-by: Sandeep Patil --- arch/arm64/configs/cuttlefish_defconfig | 3 +++ arch/x86/configs/x86_64_cuttlefish_defconfig | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 00d1b1d54a00..3e9011fdd5d4 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -69,6 +69,9 @@ CONFIG_RANDOMIZE_BASE=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=y CONFIG_COMPAT=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set CONFIG_PM_DEBUG=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 9e18f3ed255a..3dd116141836 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -67,6 +67,9 @@ CONFIG_PHYSICAL_START=0x200000 CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 reboot=p" +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set CONFIG_PM_DEBUG=y CONFIG_ACPI_PROCFS_POWER=y # CONFIG_ACPI_FAN is not set From 2abecdfec86d95eacd8b10c72f0c7b17fba5ab0f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Feb 2020 13:47:07 -0800 Subject: [PATCH 2989/3715] ANDROID: f2fs: fix build error on PAGE_KERNEL_RO This fixes build error reported by kbuild test robot. tree: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-stable.git linux-4.14.y head: 2945d197414d9732c680ea0b709735d3b0d8ea57 commit: f6574fbf6578e47cfa3cace486ca852979a1e433 [868/885] f2fs: support data compression config: mips-allyesconfig (attached as .config) compiler: mips-linux-gcc (GCC) 7.5.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout f6574fbf6578e47cfa3cace486ca852979a1e433 # save the attached .config to linux build tree GCC_VERSION=7.5.0 make.cross ARCH=mips If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): fs/f2fs/compress.c: In function 'f2fs_compress_pages': >> fs/f2fs/compress.c:359:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC fs/f2fs/compress.c:359:56: note: each undeclared identifier is reported only once for each function it appears +in fs/f2fs/compress.c: In function 'f2fs_decompress_pages': fs/f2fs/compress.c:456:56: error: 'PAGE_KERNEL_RO' undeclared (first use in this function); did you mean +'PAGE_KERNEL_NC'? dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); ^~~~~~~~~~~~~~ PAGE_KERNEL_NC vim +359 fs/f2fs/compress.c Change-Id: If921073482232ddddef2e65cc7a7c3e9ae6af008 Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d8a64be90a50..17e10c4cd880 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -16,6 +16,11 @@ #include "node.h" #include +/* Some architectures don't have PAGE_KERNEL_RO */ +#ifndef PAGE_KERNEL_RO +#define PAGE_KERNEL_RO PAGE_KERNEL +#endif + struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); From ff7ba973ce16c976277e0f7b945b71070a6d1062 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Jun 2019 17:35:34 +0200 Subject: [PATCH 2990/3715] UPSTREAM: lib: dynamic_debug: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Bug: 145162121 Cc: linux-kernel@vger.kernel.org Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9fd714cd7f4676e8ff3f840911a8d64cacbeab8b) Signed-off-by: Greg Kroah-Hartman Change-Id: I471385f3af3f96c767f59ae76ea0a115c9eb4f7a --- lib/dynamic_debug.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c7c96bc7654a..91e7d30a71b4 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -956,20 +956,14 @@ static __initdata int ddebug_init_success; static int __init dynamic_debug_init_debugfs(void) { - struct dentry *dir, *file; + struct dentry *dir; if (!ddebug_init_success) return -ENODEV; dir = debugfs_create_dir("dynamic_debug", NULL); - if (!dir) - return -ENOMEM; - file = debugfs_create_file("control", 0644, dir, NULL, - &ddebug_proc_fops); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; - } + debugfs_create_file("control", 0644, dir, NULL, &ddebug_proc_fops); + return 0; } From d9536fce0d661b44e427255e8e433971daf08181 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2020 13:11:42 -0800 Subject: [PATCH 2991/3715] UPSTREAM: dynamic_debug: allow to work if debugfs is disabled With the realization that having debugfs enabled on "production" systems is generally not a good idea, debugfs is being disabled from more and more platforms over time. However, the functionality of dynamic debugging still is needed at times, and since it relies on debugfs for its user api, having debugfs disabled also forces dynamic debug to be disabled. To get around this, also create the "control" file for dynamic_debug in procfs. This allows people turn on debugging as needed at runtime for individual driverfs and subsystems. Bug: 145162121 Reported-by: many different companies Cc: Jason Baron Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200210211142.GB1373304@kroah.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 239a5791ffd5559f51815df442c4dbbe7fc21ade) Signed-off-by: Greg Kroah-Hartman Change-Id: Icd892ea823af6254726847700fd9c251d13b556b --- .../admin-guide/dynamic-debug-howto.rst | 3 +++ lib/Kconfig.debug | 7 ++++--- lib/dynamic_debug.c | 20 ++++++++++++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index 12278a926370..36b7e740558f 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -54,6 +54,9 @@ If you make a mistake with the syntax, the write will fail thus:: /dynamic_debug/control -bash: echo: write error: Invalid argument +Note, for systems without 'debugfs' enabled, the control file can be +found in ``/proc/dynamic_debug/control``. + Viewing Dynamic Debug Behaviour =============================== diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 65ac0511546e..0e89b58b1b75 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -67,7 +67,7 @@ config DYNAMIC_DEBUG bool "Enable dynamic printk() support" default n depends on PRINTK - depends on DEBUG_FS + depends on (DEBUG_FS || PROC_FS) help Compiles debug level messages into the kernel, which would not @@ -85,8 +85,9 @@ config DYNAMIC_DEBUG Usage: Dynamic debugging is controlled via the 'dynamic_debug/control' file, - which is contained in the 'debugfs' filesystem. Thus, the debugfs - filesystem must first be mounted before making use of this feature. + which is contained in the 'debugfs' filesystem or procfs. + Thus, the debugfs or procfs filesystem must first be mounted before + making use of this feature. We refer the control file as: /dynamic_debug/control. This file contains a list of the debug statements that can be enabled. The format for each line of the file is: diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 91e7d30a71b4..3f4a49c3c03b 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -954,15 +954,25 @@ static void ddebug_remove_all_tables(void) static __initdata int ddebug_init_success; -static int __init dynamic_debug_init_debugfs(void) +static int __init dynamic_debug_init_control(void) { - struct dentry *dir; + struct proc_dir_entry *procfs_dir; + struct dentry *debugfs_dir; if (!ddebug_init_success) return -ENODEV; - dir = debugfs_create_dir("dynamic_debug", NULL); - debugfs_create_file("control", 0644, dir, NULL, &ddebug_proc_fops); + /* Create the control file in debugfs if it is enabled */ + if (debugfs_initialized()) { + debugfs_dir = debugfs_create_dir("dynamic_debug", NULL); + debugfs_create_file("control", 0644, debugfs_dir, NULL, + &ddebug_proc_fops); + } + + /* Also create the control file in procfs */ + procfs_dir = proc_mkdir("dynamic_debug", NULL); + if (procfs_dir) + proc_create("control", 0644, procfs_dir, &ddebug_proc_fops); return 0; } @@ -1040,4 +1050,4 @@ out_err: early_initcall(dynamic_debug_init); /* Debugfs setup must be done later */ -fs_initcall(dynamic_debug_init_debugfs); +fs_initcall(dynamic_debug_init_control); From 4c904614f05003cd70c0197878afd6e4018d8644 Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Thu, 21 Nov 2019 19:20:21 +0900 Subject: [PATCH 2992/3715] BACKPORT: debugfs: Fix !DEBUG_FS debugfs_create_automount If DEBUG_FS=n, compile fails with the following error: kernel/trace/trace.c: In function 'tracing_init_dentry': kernel/trace/trace.c:8658:9: error: passing argument 3 of 'debugfs_create_automount' from incompatible pointer type [-Werror=incompatible-pointer-types] 8658 | trace_automount, NULL); | ^~~~~~~~~~~~~~~ | | | struct vfsmount * (*)(struct dentry *, void *) In file included from kernel/trace/trace.c:24: ./include/linux/debugfs.h:206:25: note: expected 'struct vfsmount * (*)(void *)' but argument is of type 'struct vfsmount * (*)(struct dentry *, void *)' 206 | struct vfsmount *(*f)(void *), | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~ Signed-off-by: Kusanagi Kouichi Link: https://lore.kernel.org/r/20191121102021787.MLMY.25002.ppp.dion.ne.jp@dmta0003.auone-net.jp Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4250b047039d324e0ff65267c8beb5bad5052a86) Signed-off-by: Greg Kroah-Hartman Change-Id: Ifddd54e40956f8cdeca8a1a24d0754cda47b91e9 --- include/linux/debugfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index b93efc8feecd..755033acd2b0 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -77,6 +77,8 @@ static const struct file_operations __fops = { \ .llseek = no_llseek, \ } +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_lookup(const char *name, struct dentry *parent); @@ -98,7 +100,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); -typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, @@ -227,7 +228,7 @@ static inline struct dentry *debugfs_create_symlink(const char *name, static inline struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data) { return ERR_PTR(-ENODEV); From 6c35b6009849335faa28691ffaef031e1e14a9a3 Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Wed, 20 Nov 2019 19:43:50 +0900 Subject: [PATCH 2993/3715] BACKPORT: tracing: Remove unnecessary DEBUG_FS dependency Tracing replaced debugfs with tracefs. Signed-off-by: Kusanagi Kouichi Reviewed-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20191120104350753.EWCT.12796.ppp.dion.ne.jp@dmta0009.auone-net.jp Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0e4a459f56c32d3e52ae69a4b447db2f48a65f44) Signed-off-by: Greg Kroah-Hartman Change-Id: Id61dddcb804cf7a5d62d2d04a455d8b84097c967 --- kernel/trace/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3ec4922a2655..6373901b831b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -94,7 +94,6 @@ config RING_BUFFER_ALLOW_SWAP config TRACING bool - select DEBUG_FS select RING_BUFFER select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS From 224ac0edc90f1533d006fe745a629a85349a1068 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Feb 2020 19:38:08 -0800 Subject: [PATCH 2994/3715] ANDROID: update x86_64_cuttlefish_defconfig The ability to detach debugfs from tracing resulted in a few defconfig changes: -CONFIG_SW_SYNC=y -CONFIG_DEBUG_BOOT_PARAMS=y Signed-off-by: Greg Kroah-Hartman Change-Id: If13241fd7555dca3ffd8d0bf8ab089de5f2bda4a --- arch/x86/configs/x86_64_cuttlefish_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 3dd116141836..d26852b031c2 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -418,7 +418,6 @@ CONFIG_USB_CONFIGFS_UEVENT=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TEST=y -CONFIG_SW_SYNC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_PMEM=y CONFIG_VIRTIO_INPUT=y @@ -486,7 +485,6 @@ CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_IO_DELAY_NONE=y -CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_FRAME_POINTER=y CONFIG_SECURITY=y From d4fdc94421725fbc9f58ef8d426d667f1bb0ac7e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 4 Dec 2018 10:31:27 -0800 Subject: [PATCH 2995/3715] keys: Export lookup_user_key to external users Export lookup_user_key() symbol in order to allow nvdimm passphrase update to retrieve user injected keys. Signed-off-by: Dave Jiang Acked-by: David Howells Signed-off-by: Dan Williams --- include/linux/key.h | 3 +++ security/keys/internal.h | 2 -- security/keys/process_keys.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 8a15cabe928d..afe4d6b90cad 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -345,6 +345,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); +extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, + key_perm_t perm); + /* * The permissions required on a key that we're looking up. */ diff --git a/security/keys/internal.h b/security/keys/internal.h index 503adbae7b0d..2bdc05bed4f0 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -158,8 +158,6 @@ extern struct key *request_key_and_link(struct key_type *type, extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); -extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); #define KEY_LOOKUP_CREATE 0x01 #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 740affd65ee9..0fac129b4fa2 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -755,6 +755,7 @@ reget_creds: put_cred(ctx.cred); goto try_again; } +EXPORT_SYMBOL(lookup_user_key); /* * Join the named keyring as the session keyring if possible else attempt to From 35290265028f070dc8e0a79e855f0384b99ff1cc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 19 Nov 2019 14:24:47 -0800 Subject: [PATCH 2996/3715] fscrypt: support passing a keyring key to FS_IOC_ADD_ENCRYPTION_KEY Extend the FS_IOC_ADD_ENCRYPTION_KEY ioctl to allow the raw key to be specified by a Linux keyring key, rather than specified directly. This is useful because fscrypt keys belong to a particular filesystem instance, so they are destroyed when that filesystem is unmounted. Usually this is desired. But in some cases, userspace may need to unmount and re-mount the filesystem while keeping the keys, e.g. during a system update. This requires keeping the keys somewhere else too. The keys could be kept in memory in a userspace daemon. But depending on the security architecture and assumptions, it can be preferable to keep them only in kernel memory, where they are unreadable by userspace. We also can't solve this by going back to the original fscrypt API (where for each file, the master key was looked up in the process's keyring hierarchy) because that caused lots of problems of its own. Therefore, add the ability for FS_IOC_ADD_ENCRYPTION_KEY to accept a Linux keyring key. This solves the problem by allowing userspace to (if needed) save the keys securely in a Linux keyring for re-provisioning, while still using the new fscrypt key management ioctls. This is analogous to how dm-crypt accepts a Linux keyring key, but the key is then stored internally in the dm-crypt data structures rather than being looked up again each time the dm-crypt device is accessed. Use a custom key type "fscrypt-provisioning" rather than one of the existing key types such as "logon". This is strongly desired because it enforces that these keys are only usable for a particular purpose: for fscrypt as input to a particular KDF. Otherwise, the keys could also be passed to any kernel API that accepts a "logon" key with any service prefix, e.g. dm-crypt, UBIFS, or (recently proposed) AF_ALG. This would risk leaking information about the raw key despite it ostensibly being unreadable. Of course, this mistake has already been made for multiple kernel APIs; but since this is a new API, let's do it right. This patch has been tested using an xfstest which I wrote to test it. Link: https://lore.kernel.org/r/20191119222447.226853-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 35 ++++++- fs/crypto/keyring.c | 132 ++++++++++++++++++++++++-- include/uapi/linux/fscrypt.h | 13 ++- 3 files changed, 168 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 471a511c7508..4ed9d58ea0ab 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -638,7 +638,8 @@ follows:: struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; @@ -655,6 +656,12 @@ follows:: } u; }; + struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; + }; + :c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized as follows: @@ -677,9 +684,26 @@ as follows: ``Documentation/security/keys/core.rst``). - ``raw_size`` must be the size of the ``raw`` key provided, in bytes. + Alternatively, if ``key_id`` is nonzero, this field must be 0, since + in that case the size is implied by the specified Linux keyring key. + +- ``key_id`` is 0 if the raw key is given directly in the ``raw`` + field. Otherwise ``key_id`` is the ID of a Linux keyring key of + type "fscrypt-provisioning" whose payload is a :c:type:`struct + fscrypt_provisioning_key_payload` whose ``raw`` field contains the + raw key and whose ``type`` field matches ``key_spec.type``. Since + ``raw`` is variable-length, the total size of this key's payload + must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the + raw key size. The process must have Search permission on this key. + + Most users should leave this 0 and specify the raw key directly. + The support for specifying a Linux keyring key is intended mainly to + allow re-adding keys after a filesystem is unmounted and re-mounted, + without having to store the raw keys in userspace memory. - ``raw`` is a variable-length field which must contain the actual - key, ``raw_size`` bytes long. + key, ``raw_size`` bytes long. Alternatively, if ``key_id`` is + nonzero, then this field is unused. For v2 policy keys, the kernel keeps track of which user (identified by effective user ID) added the key, and only allows the key to be @@ -701,11 +725,16 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors: - ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the caller does not have the CAP_SYS_ADMIN capability in the initial - user namespace + user namespace; or the raw key was specified by Linux key ID but the + process lacks Search permission on the key. - ``EDQUOT``: the key quota for this user would be exceeded by adding the key - ``EINVAL``: invalid key size or key specifier type, or reserved bits were set +- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the + key has the wrong type +- ``ENOKEY``: the raw key was specified by Linux key ID, but no key + exists with that ID - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption support for this filesystem, or the filesystem superblock has not diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index ecbebdc1b02a..395aee2e6e4c 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -465,6 +465,109 @@ out_unlock: return err; } +static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep) +{ + const struct fscrypt_provisioning_key_payload *payload = prep->data; + + if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE || + prep->datalen > sizeof(*payload) + FSCRYPT_MAX_KEY_SIZE) + return -EINVAL; + + if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) + return -EINVAL; + + if (payload->__reserved) + return -EINVAL; + + prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL); + if (!prep->payload.data[0]) + return -ENOMEM; + + prep->quotalen = prep->datalen; + return 0; +} + +static void fscrypt_provisioning_key_free_preparse( + struct key_preparsed_payload *prep) +{ + kzfree(prep->payload.data[0]); +} + +static void fscrypt_provisioning_key_describe(const struct key *key, + struct seq_file *m) +{ + seq_puts(m, key->description); + if (key_is_positive(key)) { + const struct fscrypt_provisioning_key_payload *payload = + key->payload.data[0]; + + seq_printf(m, ": %u [%u]", key->datalen, payload->type); + } +} + +static void fscrypt_provisioning_key_destroy(struct key *key) +{ + kzfree(key->payload.data[0]); +} + +static struct key_type key_type_fscrypt_provisioning = { + .name = "fscrypt-provisioning", + .preparse = fscrypt_provisioning_key_preparse, + .free_preparse = fscrypt_provisioning_key_free_preparse, + .instantiate = generic_key_instantiate, + .describe = fscrypt_provisioning_key_describe, + .destroy = fscrypt_provisioning_key_destroy, +}; + +/* + * Retrieve the raw key from the Linux keyring key specified by 'key_id', and + * store it into 'secret'. + * + * The key must be of type "fscrypt-provisioning" and must have the field + * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's + * only usable with fscrypt with the particular KDF version identified by + * 'type'. We don't use the "logon" key type because there's no way to + * completely restrict the use of such keys; they can be used by any kernel API + * that accepts "logon" keys and doesn't require a specific service prefix. + * + * The ability to specify the key via Linux keyring key is intended for cases + * where userspace needs to re-add keys after the filesystem is unmounted and + * re-mounted. Most users should just provide the raw key directly instead. + */ +static int get_keyring_key(u32 key_id, u32 type, + struct fscrypt_master_key_secret *secret) +{ + key_ref_t ref; + struct key *key; + const struct fscrypt_provisioning_key_payload *payload; + int err; + + ref = lookup_user_key(key_id, 0, KEY_NEED_SEARCH); + if (IS_ERR(ref)) + return PTR_ERR(ref); + key = key_ref_to_ptr(ref); + + if (key->type != &key_type_fscrypt_provisioning) + goto bad_key; + payload = key->payload.data[0]; + + /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ + if (payload->type != type) + goto bad_key; + + secret->size = key->datalen - sizeof(*payload); + memcpy(secret->raw, payload->raw, secret->size); + err = 0; + goto out_put; + +bad_key: + err = -EKEYREJECTED; +out_put: + key_ref_put(ref); + return err; +} + /* * Add a master encryption key to the filesystem, causing all files which were * encrypted with it to appear "unlocked" (decrypted) when accessed. @@ -503,18 +606,25 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) if (!valid_key_spec(&arg.key_spec)) return -EINVAL; - if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || - arg.raw_size > FSCRYPT_MAX_KEY_SIZE) - return -EINVAL; - if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; memset(&secret, 0, sizeof(secret)); - secret.size = arg.raw_size; - err = -EFAULT; - if (copy_from_user(secret.raw, uarg->raw, secret.size)) - goto out_wipe_secret; + if (arg.key_id) { + if (arg.raw_size != 0) + return -EINVAL; + err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret); + if (err) + goto out_wipe_secret; + } else { + if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || + arg.raw_size > FSCRYPT_MAX_KEY_SIZE) + return -EINVAL; + secret.size = arg.raw_size; + err = -EFAULT; + if (copy_from_user(secret.raw, uarg->raw, secret.size)) + goto out_wipe_secret; + } switch (arg.key_spec.type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: @@ -978,8 +1088,14 @@ int __init fscrypt_init_keyring(void) if (err) goto err_unregister_fscrypt; + err = register_key_type(&key_type_fscrypt_provisioning); + if (err) + goto err_unregister_fscrypt_user; + return 0; +err_unregister_fscrypt_user: + unregister_key_type(&key_type_fscrypt_user); err_unregister_fscrypt: unregister_key_type(&key_type_fscrypt); return err; diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 1beb174ad950..d5112a24e8b9 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -109,11 +109,22 @@ struct fscrypt_key_specifier { } u; }; +/* + * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by + * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw. + */ +struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; +}; + /* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; From 9c1b3af1a6869c1207e6941b5707ef2f3ff02d99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:38:10 -0800 Subject: [PATCH 2997/3715] fscrypt: use crypto_skcipher_driver_name() Crypto API users shouldn't really be accessing struct skcipher_alg directly. already has a function crypto_skcipher_driver_name(), so use that instead. No change in behavior. Link: https://lore.kernel.org/r/20191209203810.225302-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 2f926d3e6b5d..0db5130b70de 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -89,8 +89,7 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, * first time a mode is used. */ pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, - crypto_skcipher_alg(tfm)->base.cra_driver_name); + mode->friendly_name, crypto_skcipher_driver_name(tfm)); } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); From a7b6398dee530e0efe68a83ef5c16d15b7b6c646 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:39:18 -0800 Subject: [PATCH 2998/3715] fscrypt: verify that the crypto_skcipher has the correct ivsize As a sanity check, verify that the allocated crypto_skcipher actually has the ivsize that fscrypt is assuming it has. This will always be the case unless there's a bug. But if there ever is such a bug (e.g. like there was in earlier versions of the ESSIV conversion patch [1]) it's preferable for it to be immediately obvious, and not rely on the ciphertext verification tests failing due to uninitialized IV bytes. [1] https://lkml.kernel.org/linux-crypto/20190702215517.GA69157@gmail.com/ Link: https://lore.kernel.org/r/20191209203918.225691-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 0db5130b70de..9ced26a4a887 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -91,6 +91,10 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, pr_info("fscrypt: %s using implementation \"%s\"\n", mode->friendly_name, crypto_skcipher_driver_name(tfm)); } + if (WARN_ON(crypto_skcipher_ivsize(tfm) != mode->ivsize)) { + err = -EINVAL; + goto err_free_tfm; + } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); if (err) From 394222909cb26d8b087904e32406c281d721f9f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:40:54 -0800 Subject: [PATCH 2999/3715] fscrypt: constify struct fscrypt_hkdf parameter to fscrypt_hkdf_expand() Constify the struct fscrypt_hkdf parameter to fscrypt_hkdf_expand(). This makes it clearer that struct fscrypt_hkdf contains the key only, not any per-request state. Link: https://lore.kernel.org/r/20191209204054.227736-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 2 +- fs/crypto/hkdf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 130b50e5a011..23cef4d3793a 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -287,7 +287,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 -extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 2c026009c6e7..fd7f67628561 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -113,7 +113,7 @@ out: * adds to its application-specific info strings to guarantee that it doesn't * accidentally repeat an info string when using HKDF for different purposes.) */ -int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen) { From 39a0accbdfd3897b7b5f6a38eac2e0f2f6f06131 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 15 Dec 2019 13:39:47 -0800 Subject: [PATCH 3000/3715] fscrypt: constify inode parameter to filename encryption functions Constify the struct inode parameter to fscrypt_fname_disk_to_usr() and the other filename encryption functions so that users don't have to pass in a non-const inode when they are dealing with a const one, as in [1]. [1] https://lkml.kernel.org/linux-ext4/20191203051049.44573-6-drosen@google.com/ Cc: Daniel Rosenberg Link: https://lore.kernel.org/r/20191215213947.9521-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fname.c | 20 ++++++++++---------- fs/crypto/fscrypt_private.h | 2 +- include/linux/fscrypt.h | 8 +++++--- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3da3707c10e3..c87b71aa2353 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -34,12 +34,12 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) * * Return: 0 on success, -errno on failure */ -int fname_encrypt(struct inode *inode, const struct qstr *iname, +int fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; union fscrypt_iv iv; struct scatterlist sg; @@ -85,14 +85,14 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, * * Return: 0 on success, -errno on failure */ -static int fname_decrypt(struct inode *inode, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +static int fname_decrypt(const struct inode *inode, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; union fscrypt_iv iv; int res; @@ -247,10 +247,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * Return: 0 on success, -errno on failure */ -int fscrypt_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); struct fscrypt_digested_name digested_name; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 23cef4d3793a..5792ecbd4d24 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -260,7 +260,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fname_encrypt(struct inode *inode, const struct qstr *iname, +extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7f302250e52e..759e75364da3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -153,8 +153,10 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -433,7 +435,7 @@ static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) return; } -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, +static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) From 387197777f2154d0299599f582c45416c661a0ad Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:43:59 -0800 Subject: [PATCH 3001/3715] fscrypt: move fscrypt_d_revalidate() to fname.c fscrypt_d_revalidate() and fscrypt_d_ops really belong in fname.c, since they're specific to filenames encryption. crypto.c is for contents encryption and general fs/crypto/ initialization and utilities. Link: https://lore.kernel.org/r/20191209204359.228544-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 50 ------------------------------------- fs/crypto/fname.c | 49 ++++++++++++++++++++++++++++++++++++ fs/crypto/fscrypt_private.h | 2 +- 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 6e6f39ea18a7..8a783d74137b 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include #include "fscrypt_private.h" @@ -285,54 +283,6 @@ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); -/* - * Validate dentries in encrypted directories to make sure we aren't potentially - * caching stale dentries after a key has been added. - */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct dentry *dir; - int err; - int valid; - - /* - * Plaintext names are always valid, since fscrypt doesn't support - * reverting to ciphertext names without evicting the directory's inode - * -- which implies eviction of the dentries in the directory. - */ - if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) - return 1; - - /* - * Ciphertext name; valid if the directory's key is still unavailable. - * - * Although fscrypt forbids rename() on ciphertext names, we still must - * use dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. - */ - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dget_parent(dentry); - err = fscrypt_get_encryption_info(d_inode(dir)); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - - if (err < 0) - return err; - - return valid; -} - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; - /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index c87b71aa2353..3fd27e14ebdd 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -11,6 +11,7 @@ * This has not yet undergone a rigorous security audit. */ +#include #include #include #include "fscrypt_private.h" @@ -400,3 +401,51 @@ errout: return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); + +/* + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. + */ +static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct dentry *dir; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ + + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dget_parent(dentry); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); + dput(dir); + + if (err < 0) + return err; + + return valid; +} + +const struct dentry_operations fscrypt_d_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 5792ecbd4d24..37c418d23962 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -233,7 +233,6 @@ extern int fscrypt_crypt_block(const struct inode *inode, unsigned int len, unsigned int offs, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); -extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); @@ -265,6 +264,7 @@ extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); +extern const struct dentry_operations fscrypt_d_ops; /* hkdf.c */ From bfa4ca6ee85a82274f5b0ed4782e95fa93f6a315 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:50:21 -0800 Subject: [PATCH 3002/3715] fscrypt: introduce fscrypt_needs_contents_encryption() Add a function fscrypt_needs_contents_encryption() which takes an inode and returns true if it's an encrypted regular file and the kernel was built with fscrypt support. This will allow replacing duplicated checks of IS_ENCRYPTED() && S_ISREG() on the I/O paths in ext4 and f2fs, while also optimizing out unneeded code when !CONFIG_FS_ENCRYPTION. Link: https://lore.kernel.org/r/20191209205021.231767-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 759e75364da3..dd67e7aa148f 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -72,6 +72,21 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return READ_ONCE(inode->i_crypt_info) != NULL; } +/** + * fscrypt_needs_contents_encryption() - check whether an inode needs + * contents encryption + * + * Return: %true iff the inode is an encrypted regular file and the kernel was + * built with fscrypt support. + * + * If you need to know whether the encrypt bit is set even when the kernel was + * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. + */ +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return inode->i_sb->s_cop->dummy_context && @@ -264,6 +279,11 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return false; +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return false; From 2454b5bb0df7a874c1bc849c33eab54ca05bfa48 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:26 -0800 Subject: [PATCH 3003/3715] fscrypt: split up fscrypt_supported_policy() by policy version Make fscrypt_supported_policy() call new functions fscrypt_supported_v1_policy() and fscrypt_supported_v2_policy(), to reduce the indentation level and make the code easier to read. Also adjust the function comment to mention that whether the encryption policy is supported can also depend on the inode. No change in behavior. Link: https://lore.kernel.org/r/20191209211829.239800-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/policy.c | 116 +++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 96f528071bed..fdb13ce69cd2 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -63,13 +63,65 @@ static bool supported_iv_ino_lblk_64_policy( return true; } +static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + return true; +} + +static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && + !supported_iv_ino_lblk_64_policy(policy, inode)) + return false; + + if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { + fscrypt_warn(inode, "Reserved bits set in encryption policy"); + return false; + } + + return true; +} + /** * fscrypt_supported_policy - check whether an encryption policy is supported * * Given an encryption policy, check whether all its encryption modes and other - * settings are supported by this kernel. (But we don't currently don't check - * for crypto API support here, so attempting to use an algorithm not configured - * into the crypto API will still fail later.) + * settings are supported by this kernel on the given inode. (But we don't + * currently don't check for crypto API support here, so attempting to use an + * algorithm not configured into the crypto API will still fail later.) * * Return: %true if supported, else %false */ @@ -77,60 +129,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode) { switch (policy_u->version) { - case FSCRYPT_POLICY_V1: { - const struct fscrypt_policy_v1 *policy = &policy_u->v1; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | - FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - return true; - } - case FSCRYPT_POLICY_V2: { - const struct fscrypt_policy_v2 *policy = &policy_u->v2; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && - !supported_iv_ino_lblk_64_policy(policy, inode)) - return false; - - if (memchr_inv(policy->__reserved, 0, - sizeof(policy->__reserved))) { - fscrypt_warn(inode, - "Reserved bits set in encryption policy"); - return false; - } - - return true; - } + case FSCRYPT_POLICY_V1: + return fscrypt_supported_v1_policy(&policy_u->v1, inode); + case FSCRYPT_POLICY_V2: + return fscrypt_supported_v2_policy(&policy_u->v2, inode); } return false; } From add6ac48dd8504a511a80195b96561454e1df784 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:27 -0800 Subject: [PATCH 3004/3715] fscrypt: check for appropriate use of DIRECT_KEY flag earlier FSCRYPT_POLICY_FLAG_DIRECT_KEY is currently only allowed with Adiantum encryption. But FS_IOC_SET_ENCRYPTION_POLICY allowed it in combination with other encryption modes, and an error wasn't reported until later when the encrypted directory was actually used. Fix it to report the error earlier by validating the correct use of the DIRECT_KEY flag in fscrypt_supported_policy(), similar to how we validate the IV_INO_LBLK_64 flag. Link: https://lore.kernel.org/r/20191209211829.239800-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 6 +----- fs/crypto/keysetup.c | 14 ++++---------- fs/crypto/keysetup_v1.c | 15 --------------- fs/crypto/policy.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 37c418d23962..41b061cdf06e 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -448,11 +448,7 @@ struct fscrypt_mode { int logged_impl_name; }; -static inline bool -fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) -{ - return mode->ivsize >= offsetofend(union fscrypt_iv, nonce); -} +extern struct fscrypt_mode fscrypt_modes[]; extern struct crypto_skcipher * fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 9ced26a4a887..573fdc0f480b 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -13,7 +13,7 @@ #include "fscrypt_private.h" -static struct fscrypt_mode available_modes[] = { +struct fscrypt_mode fscrypt_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", @@ -51,10 +51,10 @@ select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { if (S_ISREG(inode->i_mode)) - return &available_modes[fscrypt_policy_contents_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - return &available_modes[fscrypt_policy_fnames_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)]; WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", inode->i_ino, (inode->i_mode & S_IFMT)); @@ -129,7 +129,7 @@ static int setup_per_mode_key(struct fscrypt_info *ci, const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; - u8 mode_num = mode - available_modes; + const u8 mode_num = mode - fscrypt_modes; struct crypto_skcipher *tfm, *prev_tfm; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; @@ -189,12 +189,6 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * This ensures that the master key is consistently used only * for HKDF, avoiding key reuse issues. */ - if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key flag not allowed with %s", - ci->ci_mode->friendly_name); - return -EINVAL; - } return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 454fb03fc30e..6b8815d7448d 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -253,23 +253,8 @@ err_free_dk: static int setup_v1_file_key_direct(struct fscrypt_info *ci, const u8 *raw_master_key) { - const struct fscrypt_mode *mode = ci->ci_mode; struct fscrypt_direct_key *dk; - if (!fscrypt_mode_supports_direct_key(mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with %s", - mode->friendly_name); - return -EINVAL; - } - - if (ci->ci_policy.v1.contents_encryption_mode != - ci->ci_policy.v1.filenames_encryption_mode) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with different contents and filenames modes"); - return -EINVAL; - } - dk = fscrypt_get_direct_key(ci, raw_master_key); if (IS_ERR(dk)) return PTR_ERR(dk); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index fdb13ce69cd2..e785b00f19b3 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,6 +29,26 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool supported_direct_key_modes(const struct inode *inode, + u32 contents_mode, u32 filenames_mode) +{ + const struct fscrypt_mode *mode; + + if (contents_mode != filenames_mode) { + fscrypt_warn(inode, + "Direct key flag not allowed with different contents and filenames modes"); + return false; + } + mode = &fscrypt_modes[contents_mode]; + + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode, "Direct key flag not allowed with %s", + mode->friendly_name); + return false; + } + return true; +} + static bool supported_iv_ino_lblk_64_policy( const struct fscrypt_policy_v2 *policy, const struct inode *inode) @@ -82,6 +102,11 @@ static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, return false; } + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + return true; } @@ -103,6 +128,11 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, return false; } + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && !supported_iv_ino_lblk_64_policy(policy, inode)) return false; From 19b132bac6d38d2cc164730fe57bf6fe8a3d92ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:28 -0800 Subject: [PATCH 3005/3715] fscrypt: move fscrypt_valid_enc_modes() to policy.c fscrypt_valid_enc_modes() is only used by policy.c, so move it to there. Also adjust the order of the checks to be more natural, matching the numerical order of the constants and also keeping AES-256 (the recommended default) first in the list. No change in behavior. Link: https://lore.kernel.org/r/20191209211829.239800-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 18 ------------------ fs/crypto/policy.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 41b061cdf06e..71f496fe7173 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -206,24 +206,6 @@ typedef enum { FS_ENCRYPT, } fscrypt_direction_t; -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FSCRYPT_MODE_AES_128_CBC && - filenames_mode == FSCRYPT_MODE_AES_128_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_AES_256_XTS && - filenames_mode == FSCRYPT_MODE_AES_256_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_ADIANTUM && - filenames_mode == FSCRYPT_MODE_ADIANTUM) - return true; - - return false; -} - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index e785b00f19b3..f1cff83c151a 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,6 +29,23 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) +{ + if (contents_mode == FSCRYPT_MODE_AES_256_XTS && + filenames_mode == FSCRYPT_MODE_AES_256_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_AES_128_CBC && + filenames_mode == FSCRYPT_MODE_AES_128_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_ADIANTUM && + filenames_mode == FSCRYPT_MODE_ADIANTUM) + return true; + + return false; +} + static bool supported_direct_key_modes(const struct inode *inode, u32 contents_mode, u32 filenames_mode) { From b21b79d7fe40880451189826917daecd3402f6b3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:18:29 -0800 Subject: [PATCH 3006/3715] fscrypt: remove fscrypt_is_direct_key_policy() fscrypt_is_direct_key_policy() is no longer used, so remove it. Link: https://lore.kernel.org/r/20191209211829.239800-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fscrypt_private.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 71f496fe7173..b22e8decebed 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -136,12 +136,6 @@ fscrypt_policy_flags(const union fscrypt_policy *policy) BUG(); } -static inline bool -fscrypt_is_direct_key_policy(const union fscrypt_policy *policy) -{ - return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY; -} - /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. From 8842133ff32ea9fbd138aecebfc45e27102ea066 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:23:48 -0800 Subject: [PATCH 3007/3715] fscrypt: don't check for ENOKEY from fscrypt_get_encryption_info() fscrypt_get_encryption_info() returns 0 if the encryption key is unavailable; it never returns ENOKEY. So remove checks for ENOKEY. Link: https://lore.kernel.org/r/20191209212348.243331-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ext4/dir.c | 2 +- fs/f2fs/dir.c | 2 +- fs/ubifs/dir.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 7219f19710c2..b30052b61c8f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -115,7 +115,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) return err; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1972638165fd..331c90556a0f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1001,7 +1001,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 9cbce7a9c31c..f7057c320a33 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -537,7 +537,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) if (encrypted) { err = fscrypt_get_encryption_info(dir); - if (err && err != -ENOKEY) + if (err) return err; err = fscrypt_fname_alloc_buffer(dir, UBIFS_MAX_NLEN, &fstr); From 737ae902586bbb09549e4604e8c87bee5a797622 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 19 Dec 2019 10:56:24 -0800 Subject: [PATCH 3008/3715] fscrypt: include in UAPI header defines ioctl numbers using the macros like _IOWR() which are defined in , so should be included as a prerequisite, like it is in many other kernel headers. In practice this doesn't really matter since anyone referencing these ioctl numbers will almost certainly include too in order to actually call ioctl(). But we might as well fix this. Link: https://lore.kernel.org/r/20191219185624.21251-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/uapi/linux/fscrypt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index d5112a24e8b9..0d8a6f47711c 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -8,6 +8,7 @@ #ifndef _UAPI_LINUX_FSCRYPT_H #define _UAPI_LINUX_FSCRYPT_H +#include #include /* Encryption policy flags */ From 242a068ac567574cce140319aa61311bd7776da9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Feb 2020 13:49:04 -0800 Subject: [PATCH 3009/3715] ANDROID: f2fs: fix missing blk-crypto changes I missed to fix blk-crypto changes that should be applied in compression flow. Fixes: dad710c56f3c ("Merge remote-tracking branch 'aosp/upstream-f2fs-stable-linux-4.14.y' into android-4.14") Change-Id: Icdf8e0561c6f377fa23ab38cc8c57c2365b297d3 Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 +++-- fs/f2fs/data.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 17e10c4cd880..f335635bb1aa 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -836,7 +836,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, err = f2fs_encrypt_one_page(&fio); if (err) goto out_destroy_crypt; - cc->cpages[i] = fio.encrypted_page; + if (fscrypt_inode_uses_fs_layer_crypto(inode)) + cc->cpages[i] = fio.encrypted_page; } } @@ -876,7 +877,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_bug_on(fio.sbi, blkaddr == NULL_ADDR); - if (fio.encrypted) + if (fio.encrypted && fscrypt_inode_uses_fs_layer_crypto(inode)) fio.encrypted_page = cc->cpages[i - 1]; else fio.compressed_page = cc->cpages[i - 1]; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 81ae6ccb4241..9fc829dd532c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2187,8 +2187,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); - if (bio && !page_is_mergeable(sbi, bio, - *last_block_in_bio, blkaddr)) { + if (bio && (!page_is_mergeable(sbi, bio, + *last_block_in_bio, blkaddr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { submit_and_realloc: __submit_bio(sbi, bio, DATA); bio = NULL; From a744d4f1dda6fb106827042d5087f53d5fa8b328 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 13 Feb 2020 22:09:37 +0100 Subject: [PATCH 3010/3715] ANDROID: cuttlefish_defconfig: enable heap and stack initialization. This patch enables CONFIG_INIT_STACK_ALL=y and CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y, effectively turning on stack and heap initialization in cuttlefish kernels. Doing so will help us mitigate information leaks and make code that depends on uninitialized memory execute deterministically. We'll also get coverage for the initialization features on the existing kernel tests. Bug: 144999193 Change-Id: I1b5914a87a235d4f416cd64d7c9a5d410da09c35 Signed-off-by: Alexander Potapenko --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 3e9011fdd5d4..4545e6f37a83 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -466,6 +466,8 @@ CONFIG_SECURITY_NETWORK=y CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y +CONFIG_INIT_STACK_ALL=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index d26852b031c2..f3191479e562 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -493,6 +493,8 @@ CONFIG_SECURITY_PATH=y CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +CONFIG_INIT_STACK_ALL=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_AES_NI_INTEL=y From 194a2e99be20d0b5c93539d13916d4dc327f7278 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Feb 2020 11:28:00 -0800 Subject: [PATCH 3011/3715] FROMLIST: rename missed uaccess .fixup section When the uaccess .fixup section was renamed to .text.fixup, one case was missed. Under ld.bfd, the orphaned section was moved close to .text (since they share the "ax" bits), so things would work normally on uaccess faults. Under ld.lld, the orphaned section was placed outside the .text section, making it unreachable. Fixes: c4a84ae39b4a5 ("ARM: 8322/1: keep .text and .fixup regions closer together") Link: https://github.com/ClangBuiltLinux/linux/issues/282 Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1020633#c44 Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.1912032147340.17114@knanqh.ubzr Link: https://lore.kernel.org/lkml/202002071754.F5F073F1D@keescook/ Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Bug: 149493767 (am from https://www.armlinux.org.uk/developer/patches/viewpatch.php?id=8958/1) Signed-off-by: Nick Desaulniers Change-Id: I39a2d2e7bfee7ff9854782c06547c277895e8ec1 --- arch/arm/lib/copy_from_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 6709a8d33963..f1e34f16cfab 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -100,7 +100,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} From a9880a8acd531e76e2dc1ff2ac7992019ecbd67d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Jan 2019 12:24:32 -0700 Subject: [PATCH 3012/3715] UPSTREAM: lib: Introduce test_stackinit module Upstream commit 50ceaa95ea09 ("lib: Introduce test_stackinit module"). Adds test for stack initialization coverage. We have several build options that control the level of stack variable initialization. This test lets us visualize which options cover which cases, and provide tests for some of the pathological padding conditions the compiler will sometimes fail to initialize. All options pass the explicit initialization cases and the partial initializers (even with padding): test_stackinit: u8_zero ok test_stackinit: u16_zero ok test_stackinit: u32_zero ok test_stackinit: u64_zero ok test_stackinit: char_array_zero ok test_stackinit: small_hole_zero ok test_stackinit: big_hole_zero ok test_stackinit: trailing_hole_zero ok test_stackinit: packed_zero ok test_stackinit: small_hole_dynamic_partial ok test_stackinit: big_hole_dynamic_partial ok test_stackinit: trailing_hole_dynamic_partial ok test_stackinit: packed_dynamic_partial ok test_stackinit: small_hole_static_partial ok test_stackinit: big_hole_static_partial ok test_stackinit: trailing_hole_static_partial ok test_stackinit: packed_static_partial ok test_stackinit: packed_static_all ok test_stackinit: packed_dynamic_all ok test_stackinit: packed_runtime_all ok The results of the other tests (which contain no explicit initialization), change based on the build's configured compiler instrumentation. No options: test_stackinit: small_hole_static_all FAIL (uninit bytes: 3) test_stackinit: big_hole_static_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_static_all FAIL (uninit bytes: 7) test_stackinit: small_hole_dynamic_all FAIL (uninit bytes: 3) test_stackinit: big_hole_dynamic_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_dynamic_all FAIL (uninit bytes: 7) test_stackinit: small_hole_runtime_partial FAIL (uninit bytes: 23) test_stackinit: big_hole_runtime_partial FAIL (uninit bytes: 127) test_stackinit: trailing_hole_runtime_partial FAIL (uninit bytes: 24) test_stackinit: packed_runtime_partial FAIL (uninit bytes: 24) test_stackinit: small_hole_runtime_all FAIL (uninit bytes: 3) test_stackinit: big_hole_runtime_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_runtime_all FAIL (uninit bytes: 7) test_stackinit: u8_none FAIL (uninit bytes: 1) test_stackinit: u16_none FAIL (uninit bytes: 2) test_stackinit: u32_none FAIL (uninit bytes: 4) test_stackinit: u64_none FAIL (uninit bytes: 8) test_stackinit: char_array_none FAIL (uninit bytes: 16) test_stackinit: switch_1_none FAIL (uninit bytes: 8) test_stackinit: switch_2_none FAIL (uninit bytes: 8) test_stackinit: small_hole_none FAIL (uninit bytes: 24) test_stackinit: big_hole_none FAIL (uninit bytes: 128) test_stackinit: trailing_hole_none FAIL (uninit bytes: 32) test_stackinit: packed_none FAIL (uninit bytes: 32) test_stackinit: user FAIL (uninit bytes: 32) test_stackinit: failures: 25 CONFIG_GCC_PLUGIN_STRUCTLEAK_USER=y This only tries to initialize structs with __user markings, so only the difference from above is now the "user" test passes: test_stackinit: small_hole_static_all FAIL (uninit bytes: 3) test_stackinit: big_hole_static_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_static_all FAIL (uninit bytes: 7) test_stackinit: small_hole_dynamic_all FAIL (uninit bytes: 3) test_stackinit: big_hole_dynamic_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_dynamic_all FAIL (uninit bytes: 7) test_stackinit: small_hole_runtime_partial FAIL (uninit bytes: 23) test_stackinit: big_hole_runtime_partial FAIL (uninit bytes: 127) test_stackinit: trailing_hole_runtime_partial FAIL (uninit bytes: 24) test_stackinit: packed_runtime_partial FAIL (uninit bytes: 24) test_stackinit: small_hole_runtime_all FAIL (uninit bytes: 3) test_stackinit: big_hole_runtime_all FAIL (uninit bytes: 61) test_stackinit: trailing_hole_runtime_all FAIL (uninit bytes: 7) test_stackinit: u8_none FAIL (uninit bytes: 1) test_stackinit: u16_none FAIL (uninit bytes: 2) test_stackinit: u32_none FAIL (uninit bytes: 4) test_stackinit: u64_none FAIL (uninit bytes: 8) test_stackinit: char_array_none FAIL (uninit bytes: 16) test_stackinit: switch_1_none FAIL (uninit bytes: 8) test_stackinit: switch_2_none FAIL (uninit bytes: 8) test_stackinit: small_hole_none FAIL (uninit bytes: 24) test_stackinit: big_hole_none FAIL (uninit bytes: 128) test_stackinit: trailing_hole_none FAIL (uninit bytes: 32) test_stackinit: packed_none FAIL (uninit bytes: 32) test_stackinit: user ok test_stackinit: failures: 24 CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF=y This initializes all structures passed by reference (scalars and strings remain uninitialized): test_stackinit: small_hole_static_all ok test_stackinit: big_hole_static_all ok test_stackinit: trailing_hole_static_all ok test_stackinit: small_hole_dynamic_all ok test_stackinit: big_hole_dynamic_all ok test_stackinit: trailing_hole_dynamic_all ok test_stackinit: small_hole_runtime_partial ok test_stackinit: big_hole_runtime_partial ok test_stackinit: trailing_hole_runtime_partial ok test_stackinit: packed_runtime_partial ok test_stackinit: small_hole_runtime_all ok test_stackinit: big_hole_runtime_all ok test_stackinit: trailing_hole_runtime_all ok test_stackinit: u8_none FAIL (uninit bytes: 1) test_stackinit: u16_none FAIL (uninit bytes: 2) test_stackinit: u32_none FAIL (uninit bytes: 4) test_stackinit: u64_none FAIL (uninit bytes: 8) test_stackinit: char_array_none FAIL (uninit bytes: 16) test_stackinit: switch_1_none FAIL (uninit bytes: 8) test_stackinit: switch_2_none FAIL (uninit bytes: 8) test_stackinit: small_hole_none ok test_stackinit: big_hole_none ok test_stackinit: trailing_hole_none ok test_stackinit: packed_none ok test_stackinit: user ok test_stackinit: failures: 7 CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y This initializes all variables, so it matches above with the scalars and arrays included: test_stackinit: small_hole_static_all ok test_stackinit: big_hole_static_all ok test_stackinit: trailing_hole_static_all ok test_stackinit: small_hole_dynamic_all ok test_stackinit: big_hole_dynamic_all ok test_stackinit: trailing_hole_dynamic_all ok test_stackinit: small_hole_runtime_partial ok test_stackinit: big_hole_runtime_partial ok test_stackinit: trailing_hole_runtime_partial ok test_stackinit: packed_runtime_partial ok test_stackinit: small_hole_runtime_all ok test_stackinit: big_hole_runtime_all ok test_stackinit: trailing_hole_runtime_all ok test_stackinit: u8_none ok test_stackinit: u16_none ok test_stackinit: u32_none ok test_stackinit: u64_none ok test_stackinit: char_array_none ok test_stackinit: switch_1_none ok test_stackinit: switch_2_none ok test_stackinit: small_hole_none ok test_stackinit: big_hole_none ok test_stackinit: trailing_hole_none ok test_stackinit: packed_none ok test_stackinit: user ok test_stackinit: all tests passed! Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Change-Id: I7f97a6dc5887957f8a356bcf2a53e117cbab6bdf Signed-off-by: Alexander Potapenko --- lib/Kconfig.debug | 10 ++ lib/Makefile | 3 + lib/test_stackinit.c | 378 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+) create mode 100644 lib/test_stackinit.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0e89b58b1b75..0ac19cca4654 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1940,6 +1940,16 @@ config TEST_MEMINIT If unsure, say N. +config TEST_STACKINIT + tristate "Test level of stack variable initialization" + help + Test if the kernel is zero-initializing stack variables and + padding. Coverage is controlled by compiler flags, + CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF, + or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL. + + If unsure, say N. + endmenu # runtime tests config MEMTEST diff --git a/lib/Makefile b/lib/Makefile index b892d0261501..d200f404946b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -76,6 +76,9 @@ obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o +obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o +obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c new file mode 100644 index 000000000000..13115b6f2b88 --- /dev/null +++ b/lib/test_stackinit.c @@ -0,0 +1,378 @@ +// SPDX-Licenses: GPLv2 +/* + * Test cases for compiler-based stack variable zeroing via future + * compiler flags or CONFIG_GCC_PLUGIN_STRUCTLEAK*. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +/* Exfiltration buffer. */ +#define MAX_VAR_SIZE 128 +static char check_buf[MAX_VAR_SIZE]; + +/* Character array to trigger stack protector in all functions. */ +#define VAR_BUFFER 32 + +/* Volatile mask to convince compiler to copy memory with 0xff. */ +static volatile u8 forced_mask = 0xff; + +/* Location and size tracking to validate fill and test are colocated. */ +static void *fill_start, *target_start; +static size_t fill_size, target_size; + +static bool range_contains(char *haystack_start, size_t haystack_size, + char *needle_start, size_t needle_size) +{ + if (needle_start >= haystack_start && + needle_start + needle_size <= haystack_start + haystack_size) + return true; + return false; +} + +#define DO_NOTHING_TYPE_SCALAR(var_type) var_type +#define DO_NOTHING_TYPE_STRING(var_type) void +#define DO_NOTHING_TYPE_STRUCT(var_type) void + +#define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) +#define DO_NOTHING_RETURN_STRING(ptr) /**/ +#define DO_NOTHING_RETURN_STRUCT(ptr) /**/ + +#define DO_NOTHING_CALL_SCALAR(var, name) \ + (var) = do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_STRING(var, name) \ + do_nothing_ ## name(var) +#define DO_NOTHING_CALL_STRUCT(var, name) \ + do_nothing_ ## name(&(var)) + +#define FETCH_ARG_SCALAR(var) &var +#define FETCH_ARG_STRING(var) var +#define FETCH_ARG_STRUCT(var) &var + +#define FILL_SIZE_STRING 16 + +#define INIT_CLONE_SCALAR /**/ +#define INIT_CLONE_STRING [FILL_SIZE_STRING] +#define INIT_CLONE_STRUCT /**/ + +#define INIT_SCALAR_none /**/ +#define INIT_SCALAR_zero = 0 + +#define INIT_STRING_none [FILL_SIZE_STRING] /**/ +#define INIT_STRING_zero [FILL_SIZE_STRING] = { } + +#define INIT_STRUCT_none /**/ +#define INIT_STRUCT_zero = { } +#define INIT_STRUCT_static_partial = { .two = 0, } +#define INIT_STRUCT_static_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_dynamic_partial = { .two = arg->two, } +#define INIT_STRUCT_dynamic_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_runtime_partial ; \ + var.two = 0 +#define INIT_STRUCT_runtime_all ; \ + var.one = 0; \ + var.two = 0; \ + var.three = 0; \ + memset(&var.four, 0, \ + sizeof(var.four)) + +/* + * @name: unique string name for the test + * @var_type: type to be tested for zeroing initialization + * @which: is this a SCALAR, STRING, or STRUCT type? + * @init_level: what kind of initialization is performed + */ +#define DEFINE_TEST_DRIVER(name, var_type, which) \ +/* Returns 0 on success, 1 on failure. */ \ +static noinline __init int test_ ## name (void) \ +{ \ + var_type zero INIT_CLONE_ ## which; \ + int ignored; \ + u8 sum = 0, i; \ + \ + /* Notice when a new test is larger than expected. */ \ + BUILD_BUG_ON(sizeof(zero) > MAX_VAR_SIZE); \ + \ + /* Fill clone type with zero for per-field init. */ \ + memset(&zero, 0x00, sizeof(zero)); \ + /* Fill stack with 0xFF. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 1, \ + FETCH_ARG_ ## which(zero)); \ + /* Clear entire check buffer for later bit tests. */ \ + memset(check_buf, 0x00, sizeof(check_buf)); \ + /* Extract stack-defined variable contents. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 0, \ + FETCH_ARG_ ## which(zero)); \ + \ + /* Validate that compiler lined up fill and target. */ \ + if (!range_contains(fill_start, fill_size, \ + target_start, target_size)) { \ + pr_err(#name ": stack fill missed target!?\n"); \ + pr_err(#name ": fill %zu wide\n", fill_size); \ + pr_err(#name ": target offset by %d\n", \ + (int)((ssize_t)(uintptr_t)fill_start - \ + (ssize_t)(uintptr_t)target_start)); \ + return 1; \ + } \ + \ + /* Look for any set bits in the check region. */ \ + for (i = 0; i < sizeof(check_buf); i++) \ + sum += (check_buf[i] != 0); \ + \ + if (sum == 0) \ + pr_info(#name " ok\n"); \ + else \ + pr_warn(#name " FAIL (uninit bytes: %d)\n", \ + sum); \ + \ + return (sum != 0); \ +} +#define DEFINE_TEST(name, var_type, which, init_level) \ +/* no-op to force compiler into ignoring "uninitialized" vars */\ +static noinline __init DO_NOTHING_TYPE_ ## which(var_type) \ +do_nothing_ ## name(var_type *ptr) \ +{ \ + /* Will always be true, but compiler doesn't know. */ \ + if ((unsigned long)ptr > 0x2) \ + return DO_NOTHING_RETURN_ ## which(ptr); \ + else \ + return DO_NOTHING_RETURN_ ## which(ptr + 1); \ +} \ +static noinline __init int leaf_ ## name(unsigned long sp, \ + bool fill, \ + var_type *arg) \ +{ \ + char buf[VAR_BUFFER]; \ + var_type var INIT_ ## which ## _ ## init_level; \ + \ + target_start = &var; \ + target_size = sizeof(var); \ + /* \ + * Keep this buffer around to make sure we've got a \ + * stack frame of SOME kind... \ + */ \ + memset(buf, (char)(sp && 0xff), sizeof(buf)); \ + /* Fill variable with 0xFF. */ \ + if (fill) { \ + fill_start = &var; \ + fill_size = sizeof(var); \ + memset(fill_start, \ + (char)((sp && 0xff) | forced_mask), \ + fill_size); \ + } \ + \ + /* Silence "never initialized" warnings. */ \ + DO_NOTHING_CALL_ ## which(var, name); \ + \ + /* Exfiltrate "var". */ \ + memcpy(check_buf, target_start, target_size); \ + \ + return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ +} \ +DEFINE_TEST_DRIVER(name, var_type, which) + +/* Structure with no padding. */ +struct test_packed { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Simple structure with padding likely to be covered by compiler. */ +struct test_small_hole { + size_t one; + char two; + /* 3 byte padding hole here. */ + int three; + unsigned long four; +}; + +/* Try to trigger unhandled padding in a structure. */ +struct test_aligned { + u32 internal1; + u64 internal2; +} __aligned(64); + +struct test_big_hole { + u8 one; + u8 two; + u8 three; + /* 61 byte padding hole here. */ + struct test_aligned four; +} __aligned(64); + +struct test_trailing_hole { + char *one; + char *two; + char *three; + char four; + /* "sizeof(unsigned long) - 1" byte padding hole here. */ +}; + +/* Test if STRUCTLEAK is clearing structs with __user fields. */ +struct test_user { + u8 one; + unsigned long two; + char __user *three; + unsigned long four; +}; + +#define DEFINE_SCALAR_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, name, SCALAR, init) + +#define DEFINE_SCALAR_TESTS(init) \ + DEFINE_SCALAR_TEST(u8, init); \ + DEFINE_SCALAR_TEST(u16, init); \ + DEFINE_SCALAR_TEST(u32, init); \ + DEFINE_SCALAR_TEST(u64, init); \ + DEFINE_TEST(char_array_ ## init, unsigned char, STRING, init) + +#define DEFINE_STRUCT_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, \ + struct test_ ## name, STRUCT, init) + +#define DEFINE_STRUCT_TESTS(init) \ + DEFINE_STRUCT_TEST(small_hole, init); \ + DEFINE_STRUCT_TEST(big_hole, init); \ + DEFINE_STRUCT_TEST(trailing_hole, init); \ + DEFINE_STRUCT_TEST(packed, init) + +/* These should be fully initialized all the time! */ +DEFINE_SCALAR_TESTS(zero); +DEFINE_STRUCT_TESTS(zero); +/* Static initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(static_partial); +DEFINE_STRUCT_TESTS(static_all); +/* Dynamic initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(dynamic_partial); +DEFINE_STRUCT_TESTS(dynamic_all); +/* Runtime initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(runtime_partial); +DEFINE_STRUCT_TESTS(runtime_all); +/* No initialization without compiler instrumentation. */ +DEFINE_SCALAR_TESTS(none); +DEFINE_STRUCT_TESTS(none); +DEFINE_TEST(user, struct test_user, STRUCT, none); + +/* + * Check two uses through a variable declaration outside either path, + * which was noticed as a special case in porting earlier stack init + * compiler logic. + */ +static int noinline __leaf_switch_none(int path, bool fill) +{ + switch (path) { + uint64_t var; + + case 1: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0x55, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + case 2: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0xaa, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + default: + var = 5; + return var & forced_mask; + } + return 0; +} + +static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(1, fill); +} + +static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(2, fill); +} + +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); + +static int __init test_stackinit_init(void) +{ + unsigned int failures = 0; + +#define test_scalars(init) do { \ + failures += test_u8_ ## init (); \ + failures += test_u16_ ## init (); \ + failures += test_u32_ ## init (); \ + failures += test_u64_ ## init (); \ + failures += test_char_array_ ## init (); \ + } while (0) + +#define test_structs(init) do { \ + failures += test_small_hole_ ## init (); \ + failures += test_big_hole_ ## init (); \ + failures += test_trailing_hole_ ## init (); \ + failures += test_packed_ ## init (); \ + } while (0) + + /* These are explicitly initialized and should always pass. */ + test_scalars(zero); + test_structs(zero); + /* Padding here appears to be accidentally always initialized? */ + test_structs(dynamic_partial); + /* Padding initialization depends on compiler behaviors. */ + test_structs(static_partial); + test_structs(static_all); + test_structs(dynamic_all); + test_structs(runtime_partial); + test_structs(runtime_all); + + /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ + test_scalars(none); + failures += test_switch_1_none(); + failures += test_switch_2_none(); + + /* STRUCTLEAK_BYREF should cover from here down. */ + test_structs(none); + + /* STRUCTLEAK will only cover this. */ + failures += test_user(); + + if (failures == 0) + pr_info("all tests passed!\n"); + else + pr_err("failures: %u\n", failures); + + return failures ? -EINVAL : 0; +} +module_init(test_stackinit_init); + +static void __exit test_stackinit_exit(void) +{ } +module_exit(test_stackinit_exit); + +MODULE_LICENSE("GPL"); From 18ee299fc6fb2c511b2f9d0418b125ce9d794745 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Jun 2019 22:13:59 -0700 Subject: [PATCH 3013/3715] UPSTREAM: lib/test_stackinit: Handle Clang auto-initialization pattern Upstream commit 8c30d32b1a32 ("lib/test_stackinit: Handle Clang auto-initialization pattern"). While the gcc plugin for automatic stack variable initialization (i.e. CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL) performs initialization with 0x00 bytes, the Clang automatic stack variable initialization (i.e. CONFIG_INIT_STACK_ALL) uses various type-specific patterns that are typically 0xAA. Therefore the stackinit selftest has been fixed to check that bytes are no longer the test fill pattern of 0xFF (instead of looking for bytes that have become 0x00). This retains the test coverage for the 0x00 pattern of the gcc plugin while adding coverage for the mostly 0xAA pattern of Clang. Signed-off-by: Kees Cook Acked-by: Ard Biesheuvel Bug: 144999193 Change-Id: Ieb59dd5ba351d9ae96e5e3ad22f686be4524281a Signed-off-by: Alexander Potapenko --- lib/test_stackinit.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 13115b6f2b88..1e45a735961e 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -12,7 +12,7 @@ /* Exfiltration buffer. */ #define MAX_VAR_SIZE 128 -static char check_buf[MAX_VAR_SIZE]; +static u8 check_buf[MAX_VAR_SIZE]; /* Character array to trigger stack protector in all functions. */ #define VAR_BUFFER 32 @@ -106,9 +106,18 @@ static noinline __init int test_ ## name (void) \ \ /* Fill clone type with zero for per-field init. */ \ memset(&zero, 0x00, sizeof(zero)); \ + /* Clear entire check buffer for 0xFF overlap test. */ \ + memset(check_buf, 0x00, sizeof(check_buf)); \ /* Fill stack with 0xFF. */ \ ignored = leaf_ ##name((unsigned long)&ignored, 1, \ FETCH_ARG_ ## which(zero)); \ + /* Verify all bytes overwritten with 0xFF. */ \ + for (sum = 0, i = 0; i < target_size; i++) \ + sum += (check_buf[i] != 0xFF); \ + if (sum) { \ + pr_err(#name ": leaf fill was not 0xFF!?\n"); \ + return 1; \ + } \ /* Clear entire check buffer for later bit tests. */ \ memset(check_buf, 0x00, sizeof(check_buf)); \ /* Extract stack-defined variable contents. */ \ @@ -126,9 +135,9 @@ static noinline __init int test_ ## name (void) \ return 1; \ } \ \ - /* Look for any set bits in the check region. */ \ - for (i = 0; i < sizeof(check_buf); i++) \ - sum += (check_buf[i] != 0); \ + /* Look for any bytes still 0xFF in check region. */ \ + for (sum = 0, i = 0; i < target_size; i++) \ + sum += (check_buf[i] == 0xFF); \ \ if (sum == 0) \ pr_info(#name " ok\n"); \ @@ -162,13 +171,13 @@ static noinline __init int leaf_ ## name(unsigned long sp, \ * Keep this buffer around to make sure we've got a \ * stack frame of SOME kind... \ */ \ - memset(buf, (char)(sp && 0xff), sizeof(buf)); \ + memset(buf, (char)(sp & 0xff), sizeof(buf)); \ /* Fill variable with 0xFF. */ \ if (fill) { \ fill_start = &var; \ fill_size = sizeof(var); \ memset(fill_start, \ - (char)((sp && 0xff) | forced_mask), \ + (char)((sp & 0xff) | forced_mask), \ fill_size); \ } \ \ From 04f51847e5ca898e84a433da5576a5746d0592ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 27 Dec 2019 10:47:00 +0800 Subject: [PATCH 3014/3715] fscrypt: Allow modular crypto algorithms The commit 643fa9612bf1 ("fscrypt: remove filesystem specific build config option") removed modular support for fs/crypto. This causes the Crypto API to be built-in whenever fscrypt is enabled. This makes it very difficult for me to test modular builds of the Crypto API without disabling fscrypt which is a pain. As fscrypt is still evolving and it's developing new ties with the fs layer, it's hard to build it as a module for now. However, the actual algorithms are not required until a filesystem is mounted. Therefore we can allow them to be built as modules. Signed-off-by: Herbert Xu Link: https://lore.kernel.org/r/20191227024700.7vrzuux32uyfdgum@gondor.apana.org.au Signed-off-by: Eric Biggers --- fs/crypto/Kconfig | 21 ++++++++++++++------- fs/ext4/Kconfig | 1 + fs/f2fs/Kconfig | 1 + fs/ubifs/Kconfig | 1 + 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 4bc66f2c571e..d0a0238e37dd 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,13 +1,8 @@ config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_XTS - select CRYPTO_CTS - select CRYPTO_SHA512 - select CRYPTO_HMAC + select CRYPTO_HASH + select CRYPTO_BLKCIPHER select KEYS help Enable encryption of files and directories. This @@ -15,3 +10,15 @@ config FS_ENCRYPTION efficient since it avoids caching the encrypted and decrypted pages in the page cache. Currently Ext4, F2FS and UBIFS make use of this feature. + +# Filesystems supporting encryption must select this if FS_ENCRYPTION. This +# allows the algorithms to be built as modules when all the filesystems are. +config FS_ENCRYPTION_ALGS + tristate + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTS + select CRYPTO_ECB + select CRYPTO_HMAC + select CRYPTO_SHA512 + select CRYPTO_XTS diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e2cfd33fd759..39cd2c054339 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,6 +37,7 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 9bb02d446d44..8f2019d17f34 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -5,6 +5,7 @@ config F2FS_FS select CRYPTO select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index dfc6fdf019d7..fe221d7d99d6 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -7,6 +7,7 @@ config UBIFS_FS select CRYPTO if UBIFS_FS_ZLIB select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. From 1c88eea96e762f274921259e3438c003d6cf26bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:45:09 -0800 Subject: [PATCH 3015/3715] fscrypt: remove redundant bi_status check submit_bio_wait() already returns bi_status translated to an errno. So the additional check of bi_status is redundant and can be removed. Link: https://lore.kernel.org/r/20191209204509.228942-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 4a7f4d78ef90..3548ab118cf9 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -77,8 +77,6 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, goto errout; } err = submit_bio_wait(bio); - if (err == 0 && bio->bi_status) - err = -EIO; bio_put(bio); if (err) goto errout; From bee5bd5b8f2ec1010f53d66aa26ac9b23dcdf271 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 26 Dec 2019 10:08:13 -0600 Subject: [PATCH 3016/3715] fscrypt: optimize fscrypt_zeroout_range() Currently fscrypt_zeroout_range() issues and waits on a bio for each block it writes, which makes it very slow. Optimize it to write up to 16 pages at a time instead. Also add a function comment, and improve reliability by allowing the allocations of the bio and the first ciphertext page to wait on the corresponding mempools. Link: https://lore.kernel.org/r/20191226160813.53182-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 112 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 3548ab118cf9..d7b1ce2aa307 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -41,51 +41,101 @@ void fscrypt_decrypt_bio(struct bio *bio) } EXPORT_SYMBOL(fscrypt_decrypt_bio); +/** + * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file + * @inode: the file's inode + * @lblk: the first file logical block to zero out + * @pblk: the first filesystem physical block to zero out + * @len: number of blocks to zero out + * + * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write + * ciphertext blocks which decrypt to the all-zeroes block. The blocks must be + * both logically and physically contiguous. It's also assumed that the + * filesystem only uses a single block device, ->s_bdev. + * + * Note that since each block uses a different IV, this involves writing a + * different ciphertext to each block; we can't simply reuse the same one. + * + * Return: 0 on success; -errno on failure. + */ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) + sector_t pblk, unsigned int len) { const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; - struct page *ciphertext_page; + const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; + const unsigned int blocks_per_page = 1 << blocks_per_page_bits; + struct page *pages[16]; /* write up to 16 pages at a time */ + unsigned int nr_pages; + unsigned int i; + unsigned int offset; struct bio *bio; - int ret, err = 0; + int ret, err; - ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); - if (!ciphertext_page) - return -ENOMEM; + if (len == 0) + return 0; - while (len--) { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - blocksize, 0, GFP_NOFS); - if (err) - goto errout; + BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); + nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), + (len + blocks_per_page - 1) >> blocks_per_page_bits); - bio = bio_alloc(GFP_NOWAIT, 1); - if (!bio) { - err = -ENOMEM; - goto errout; - } + /* + * We need at least one page for ciphertext. Allocate the first one + * from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail. + * + * Any additional page allocations are allowed to fail, as they only + * help performance, and waiting on the mempool for them could deadlock. + */ + for (i = 0; i < nr_pages; i++) { + pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS : + GFP_NOWAIT | __GFP_NOWARN); + if (!pages[i]) + break; + } + nr_pages = i; + if (WARN_ON(nr_pages <= 0)) + return -EINVAL; + + /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ + bio = bio_alloc(GFP_NOFS, nr_pages); + + do { bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, blocksize, 0); - if (WARN_ON(ret != blocksize)) { - /* should never happen! */ - bio_put(bio); - err = -EIO; - goto errout; - } + + i = 0; + offset = 0; + do { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), pages[i], + blocksize, offset, GFP_NOFS); + if (err) + goto out; + lblk++; + pblk++; + len--; + offset += blocksize; + if (offset == PAGE_SIZE || len == 0) { + ret = bio_add_page(bio, pages[i++], offset, 0); + if (WARN_ON(ret != offset)) { + err = -EIO; + goto out; + } + offset = 0; + } + } while (i != nr_pages && len != 0); + err = submit_bio_wait(bio); - bio_put(bio); if (err) - goto errout; - lblk++; - pblk++; - } + goto out; + bio_reset(bio); + } while (len != 0); err = 0; -errout: - fscrypt_free_bounce_page(ciphertext_page); +out: + bio_put(bio); + for (i = 0; i < nr_pages; i++) + fscrypt_free_bounce_page(pages[i]); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); From 9c5c8c523222e52b2f2b8fce4dabf8a64da671c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 12:10:26 -0600 Subject: [PATCH 3017/3715] fscrypt: document gfp_flags for bounce page allocation Document that fscrypt_encrypt_pagecache_blocks() allocates the bounce page from a mempool, and document what this means for the @gfp_flags argument. Link: https://lore.kernel.org/r/20191231181026.47400-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 8a783d74137b..dcbd507824b7 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -137,7 +137,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to encrypt. Must be * a multiple of the filesystem's block size. - * @gfp_flags: Memory allocation flags + * @gfp_flags: Memory allocation flags. See details below. * * A new bounce page is allocated, and the specified block(s) are encrypted into * it. In the bounce page, the ciphertext block(s) will be located at the same @@ -147,6 +147,11 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * * This is for use by the filesystem's ->writepages() method. * + * The bounce page allocation is mempool-backed, so it will always succeed when + * @gfp_flags includes __GFP_DIRECT_RECLAIM, e.g. when it's GFP_NOFS. However, + * only the first page of each bio can be allocated this way. To prevent + * deadlocks, for any additional pages a mask like GFP_NOWAIT must be used. + * * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, From 85b9c3e49199fe0d307cfba9f59df391e0a8510f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 19 Jan 2020 22:07:32 -0800 Subject: [PATCH 3018/3715] fscrypt: don't print name of busy file when removing key When an encryption key can't be fully removed due to file(s) protected by it still being in-use, we shouldn't really print the path to one of these files to the kernel log, since parts of this path are likely to be encrypted on-disk, and (depending on how the system is set up) the confidentiality of this path might be lost by printing it to the log. This is a trade-off: a single file path often doesn't matter at all, especially if it's a directory; the kernel log might still be protected in some way; and I had originally hoped that any "inode(s) still busy" bugs (which are security weaknesses in their own right) would be quickly fixed and that to do so it would be super helpful to always know the file path and not have to run 'find dir -inum $inum' after the fact. But in practice, these bugs can be hard to fix (e.g. due to asynchronous process killing that is difficult to eliminate, for performance reasons), and also not tied to specific files, so knowing a file path doesn't necessarily help. So to be safe, for now let's just show the inode number, not the path. If someone really wants to know a path they can use 'find -inum'. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200120060732.390362-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 395aee2e6e4c..35ffabfffbf8 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -776,9 +776,6 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; - struct dentry *dentry; - char _path[256]; - char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -797,22 +794,14 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; - dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); - if (dentry) { - path = dentry_path(dentry, _path, sizeof(_path)); - dput(dentry); - } - if (IS_ERR_OR_NULL(path)) - path = "(unknown)"; - fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino, path); + ino); return -EBUSY; } From 0bc68c180e8e56afe74a5c9ae455165414d61373 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 19 Jan 2020 23:17:36 -0800 Subject: [PATCH 3019/3715] fscrypt: add "fscrypt_" prefix to fname_encrypt() fname_encrypt() is a global function, due to being used in both fname.c and hooks.c. So it should be prefixed with "fscrypt_", like all the other global functions in fs/crypto/. Link: https://lore.kernel.org/r/20200120071736.45915-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/fname.c | 10 +++++----- fs/crypto/fscrypt_private.h | 5 +++-- fs/crypto/hooks.c | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3fd27e14ebdd..4614e4969736 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -28,15 +28,15 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) } /** - * fname_encrypt() - encrypt a filename + * fscrypt_fname_encrypt() - encrypt a filename * * The output buffer must be at least as large as the input buffer. * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -int fname_encrypt(const struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen) +int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); @@ -343,8 +343,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (!fname->crypto_buf.name) return -ENOMEM; - ret = fname_encrypt(dir, iname, fname->crypto_buf.name, - fname->crypto_buf.len); + ret = fscrypt_fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index b22e8decebed..fea7f5547428 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -235,8 +235,9 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fname_encrypt(const struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen); +extern int fscrypt_fname_encrypt(const struct inode *inode, + const struct qstr *iname, + u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 30b1ca661249..a0723fcd77a3 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -187,7 +187,8 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, ciphertext_len = disk_link->len - sizeof(*sd); sd->len = cpu_to_le16(ciphertext_len); - err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path, + ciphertext_len); if (err) goto err_free_sd; From e16d8494ecc69348badaaef8631040d40ef1e91d Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:56 -0800 Subject: [PATCH 3020/3715] fscrypt: don't allow v1 policies with casefolding Casefolded encrypted directories will use a new dirhash method that requires a secret key. If the directory uses a v2 encryption policy, it's easy to derive this key from the master key using HKDF. However, v1 encryption policies don't provide a way to derive additional keys. Therefore, don't allow casefolding on directories that use a v1 policy. Specifically, make it so that trying to enable casefolding on a directory that has a v1 policy fails, trying to set a v1 policy on a casefolded directory fails, and trying to open a casefolded directory that has a v1 policy (if one somehow exists on-disk) fails. Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, and other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 4 +++- fs/crypto/hooks.c | 28 +++++++++++++++++++++++++++ fs/crypto/policy.c | 7 +++++++ fs/inode.c | 3 ++- include/linux/fscrypt.h | 9 +++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 4ed9d58ea0ab..9514bef7e99e 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -513,7 +513,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``EEXIST``: the file is already encrypted with an encryption policy different from the one specified - ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags; or reserved bits were set) + version, mode(s), or flags; or reserved bits were set); or a v1 + encryption policy was specified but the directory has the casefold + flag enabled (casefolding is incompatible with v1 policies). - ``ENOKEY``: a v2 encryption policy was specified, but the key with the specified ``master_key_identifier`` has not been added, nor does the process have the CAP_FOWNER capability in the initial user diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a0723fcd77a3..3312d9ac1143 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -121,6 +121,34 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); +/** + * fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS + * @inode: the inode on which flags are being changed + * @oldflags: the old flags + * @flags: the new flags + * + * The caller should be holding i_rwsem for write. + * + * Return: 0 on success; -errno if the flags change isn't allowed or if + * another error occurs. + */ +int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags) +{ + struct fscrypt_info *ci; + int err; + + if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { + err = fscrypt_require_key(inode); + if (err) + return err; + ci = inode->i_crypt_info; + if (ci->ci_policy.version != FSCRYPT_POLICY_V2) + return -EINVAL; + } + return 0; +} + int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index f1cff83c151a..cf2a9d26ef7d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -124,6 +124,13 @@ static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, policy->filenames_encryption_mode)) return false; + if (IS_CASEFOLDED(inode)) { + /* With v1, there's no way to derive dirhash keys. */ + fscrypt_warn(inode, + "v1 policies can't be used on casefolded directories"); + return false; + } + return true; } diff --git a/fs/inode.c b/fs/inode.c index 95fece639652..01ed6d0a4a5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -2146,7 +2147,7 @@ int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - return 0; + return fscrypt_prepare_setflags(inode, oldflags, flags); } EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index dd67e7aa148f..f5a8293964bf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -263,6 +263,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +extern int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -514,6 +516,13 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, + unsigned int flags) +{ + return 0; +} + static inline int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, From 7e2503236b61e3005c4c453b9dd22f05f517956f Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:57 -0800 Subject: [PATCH 3021/3715] fscrypt: derive dirhash key for casefolded directories When we allow indexed directories to use both encryption and casefolding, for the dirhash we can't just hash the ciphertext filenames that are stored on-disk (as is done currently) because the dirhash must be case insensitive, but the stored names are case-preserving. Nor can we hash the plaintext names with an unkeyed hash (or a hash keyed with a value stored on-disk like ext4's s_hash_seed), since that would leak information about the names that encryption is meant to protect. Instead, if we can accept a dirhash that's only computable when the fscrypt key is available, we can hash the plaintext names with a keyed hash using a secret key derived from the directory's fscrypt master key. We'll use SipHash-2-4 for this purpose. Prepare for this by deriving a SipHash key for each casefolded encrypted directory. Make sure to handle deriving the key not only when setting up the directory's fscrypt_info, but also in the case where the casefold flag is enabled after the fscrypt_info was already set up. (We could just always derive the key regardless of casefolding, but that would introduce unnecessary overhead for people not using casefolding.) Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, squashed with change that avoids unnecessarily deriving the key, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 10 +++++ fs/crypto/fname.c | 21 +++++++++++ fs/crypto/fscrypt_private.h | 13 +++++++ fs/crypto/hooks.c | 16 ++++++++ fs/crypto/keysetup.c | 54 ++++++++++++++++++++------- include/linux/fscrypt.h | 10 +++++ 6 files changed, 110 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 9514bef7e99e..a737503f8062 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -302,6 +302,16 @@ For master keys used for v2 encryption policies, a unique 16-byte "key identifier" is also derived using the KDF. This value is stored in the clear, since it is needed to reliably identify the key itself. +Dirhash keys +------------ + +For directories that are indexed using a secret-keyed dirhash over the +plaintext filenames, the KDF is also used to derive a 128-bit +SipHash-2-4 key per directory in order to hash filenames. This works +just like deriving a per-file encryption key, except that a different +KDF context is used. Currently, only casefolded ("case-insensitive") +encrypted directories use this style of hashing. + Encryption modes and usage ========================== diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 4614e4969736..851d2082ecfe 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -402,6 +402,27 @@ errout: } EXPORT_SYMBOL(fscrypt_setup_filename); +/** + * fscrypt_fname_siphash() - calculate the SipHash of a filename + * @dir: the parent directory + * @name: the filename to calculate the SipHash of + * + * Given a plaintext filename @name and a directory @dir which uses SipHash as + * its dirhash method and has had its fscrypt key set up, this function + * calculates the SipHash of that name using the directory's secret dirhash key. + * + * Return: the SipHash of @name using the hash key of @dir + */ +u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) +{ + const struct fscrypt_info *ci = dir->i_crypt_info; + + WARN_ON(!ci->ci_dirhash_key_initialized); + + return siphash(name->name, name->len, &ci->ci_dirhash_key); +} +EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); + /* * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index fea7f5547428..81dbb2befe81 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,6 +12,7 @@ #define _FSCRYPT_PRIVATE_H #include +#include #include #define CONST_STRLEN(str) (sizeof(str) - 1) @@ -188,6 +189,14 @@ struct fscrypt_info { */ struct fscrypt_direct_key *ci_direct_key; + /* + * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 + * key. This is only set for directories that use a keyed dirhash over + * the plaintext filenames -- currently just casefolded directories. + */ + siphash_key_t ci_dirhash_key; + bool ci_dirhash_key_initialized; + /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; @@ -263,6 +272,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, #define HKDF_CONTEXT_PER_FILE_KEY 2 #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 +#define HKDF_CONTEXT_DIRHASH_KEY 5 extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, @@ -434,6 +444,9 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, extern int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key); +extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk); + /* keysetup_v1.c */ extern void fscrypt_put_direct_key(struct fscrypt_direct_key *dk); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 3312d9ac1143..4ca167017d67 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -4,6 +4,8 @@ * Encryption hooks for higher-level filesystem operations. */ +#include + #include "fscrypt_private.h" /** @@ -136,8 +138,14 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; + struct fscrypt_master_key *mk; int err; + /* + * When the CASEFOLD flag is set on an encrypted directory, we must + * derive the secret key needed for the dirhash. This is only possible + * if the directory uses a v2 encryption policy. + */ if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { err = fscrypt_require_key(inode); if (err) @@ -145,6 +153,14 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; + mk = ci->ci_master_key->payload.data[0]; + down_read(&mk->mk_secret_sem); + if (is_master_key_secret_present(&mk->mk_secret)) + err = fscrypt_derive_dirhash_key(ci, mk); + else + err = -ENOKEY; + up_read(&mk->mk_secret_sem); + return err; } return 0; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 573fdc0f480b..258c060b37e2 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -174,10 +174,24 @@ done: return 0; } +int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk) +{ + int err; + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY, + ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, + (u8 *)&ci->ci_dirhash_key, + sizeof(ci->ci_dirhash_key)); + if (err) + return err; + ci->ci_dirhash_key_initialized = true; + return 0; +} + static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, struct fscrypt_master_key *mk) { - u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; int err; if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { @@ -189,8 +203,8 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * This ensures that the master key is consistently used only * for HKDF, avoiding key reuse issues. */ - return setup_per_mode_key(ci, mk, mk->mk_direct_tfms, - HKDF_CONTEXT_DIRECT_KEY, false); + err = setup_per_mode_key(ci, mk, mk->mk_direct_tfms, + HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* @@ -199,21 +213,33 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, - true); - } + err = setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); + } else { + u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_KEY, - ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, - derived_key, ci->ci_mode->keysize); + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + HKDF_CONTEXT_PER_FILE_KEY, + ci->ci_nonce, + FS_KEY_DERIVATION_NONCE_SIZE, + derived_key, ci->ci_mode->keysize); + if (err) + return err; + + err = fscrypt_set_derived_key(ci, derived_key); + memzero_explicit(derived_key, ci->ci_mode->keysize); + } if (err) return err; - err = fscrypt_set_derived_key(ci, derived_key); - memzero_explicit(derived_key, ci->ci_mode->keysize); - return err; + /* Derive a secret dirhash key for directories that need it. */ + if (S_ISDIR(ci->ci_inode->i_mode) && IS_CASEFOLDED(ci->ci_inode)) { + err = fscrypt_derive_dirhash_key(ci, mk); + if (err) + return err; + } + + return 0; } /* diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index f5a8293964bf..ccdee616e7e5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -247,6 +247,9 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +extern u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name); + /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, @@ -474,6 +477,13 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +static inline u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name) +{ + WARN_ON_ONCE(1); + return 0; +} + /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { From 216d8cabb6b8482796ceb2878e803c92e6df8fb8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:31:58 -0800 Subject: [PATCH 3022/3715] fscrypt: clarify what is meant by a per-file key Now that there's sometimes a second type of per-file key (the dirhash key), clarify some function names, macros, and documentation that specifically deal with per-file *encryption* keys. Link: https://lore.kernel.org/r/20200120223201.241390-4-ebiggers@kernel.org Reviewed-by: Daniel Rosenberg Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 24 ++++++++--------- fs/crypto/fscrypt_private.h | 6 ++--- fs/crypto/keysetup.c | 39 ++++++++++++++------------- fs/crypto/keysetup_v1.c | 4 +-- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index a737503f8062..28524dcc51c9 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -234,8 +234,8 @@ HKDF is more flexible, is nonreversible, and evenly distributes entropy from the master key. HKDF is also standardized and widely used by other software, whereas the AES-128-ECB based KDF is ad-hoc. -Per-file keys -------------- +Per-file encryption keys +------------------------ Since each master key can protect many files, it is necessary to "tweak" the encryption of each file so that the same plaintext in two @@ -268,9 +268,9 @@ is greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy, -per-file keys are not used. Instead, whenever any data (contents or -filenames) is encrypted, the file's 16-byte nonce is included in the -IV. Moreover: +per-file encryption keys are not used. Instead, whenever any data +(contents or filenames) is encrypted, the file's 16-byte nonce is +included in the IV. Moreover: - For v1 encryption policies, the encryption is done directly with the master key. Because of this, users **must not** use the same master @@ -335,11 +335,11 @@ used. Adiantum is a (primarily) stream cipher-based mode that is fast even on CPUs without dedicated crypto instructions. It's also a true wide-block mode, unlike XTS. It can also eliminate the need to derive -per-file keys. However, it depends on the security of two primitives, -XChaCha12 and AES-256, rather than just one. See the paper -"Adiantum: length-preserving encryption for entry-level processors" -(https://eprint.iacr.org/2018/720.pdf) for more details. To use -Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +per-file encryption keys. However, it depends on the security of two +primitives, XChaCha12 and AES-256, rather than just one. See the +paper "Adiantum: length-preserving encryption for entry-level +processors" (https://eprint.iacr.org/2018/720.pdf) for more details. +To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast implementations of ChaCha and NHPoly1305 should be enabled, e.g. CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. @@ -1149,8 +1149,8 @@ The context structs contain the same information as the corresponding policy structs (see `Setting an encryption policy`_), except that the context structs also contain a nonce. The nonce is randomly generated by the kernel and is used as KDF input or as a tweak to cause -different files to be encrypted differently; see `Per-file keys`_ and -`DIRECT_KEY policies`_. +different files to be encrypted differently; see `Per-file encryption +keys`_ and `DIRECT_KEY policies`_. Data path changes ----------------- diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 81dbb2befe81..9aae851409e5 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -269,7 +269,7 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 -#define HKDF_CONTEXT_PER_FILE_KEY 2 +#define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 #define HKDF_CONTEXT_DIRECT_KEY 3 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 #define HKDF_CONTEXT_DIRHASH_KEY 5 @@ -441,8 +441,8 @@ extern struct crypto_skcipher * fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, const struct inode *inode); -extern int fscrypt_set_derived_key(struct fscrypt_info *ci, - const u8 *derived_key); +extern int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, + const u8 *raw_key); extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, const struct fscrypt_master_key *mk); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 258c060b37e2..f9ab21c778e1 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -107,12 +107,12 @@ err_free_tfm: return ERR_PTR(err); } -/* Given the per-file key, set up the file's crypto transform object */ -int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) +/* Given a per-file encryption key, set up the file's crypto transform object */ +int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) { struct crypto_skcipher *tfm; - tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode); + tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); if (IS_ERR(tfm)) return PTR_ERR(tfm); @@ -121,10 +121,10 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) return 0; } -static int setup_per_mode_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk, - struct crypto_skcipher **tfms, - u8 hkdf_context, bool include_fs_uuid) +static int setup_per_mode_enc_key(struct fscrypt_info *ci, + struct fscrypt_master_key *mk, + struct crypto_skcipher **tfms, + u8 hkdf_context, bool include_fs_uuid) { const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; @@ -196,15 +196,15 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* - * DIRECT_KEY: instead of deriving per-file keys, the per-file - * nonce will be included in all the IVs. But unlike v1 - * policies, for v2 policies in this case we don't encrypt with - * the master key directly but rather derive a per-mode key. - * This ensures that the master key is consistently used only - * for HKDF, avoiding key reuse issues. + * DIRECT_KEY: instead of deriving per-file encryption keys, the + * per-file nonce will be included in all the IVs. But unlike + * v1 policies, for v2 policies in this case we don't encrypt + * with the master key directly but rather derive a per-mode + * encryption key. This ensures that the master key is + * consistently used only for HKDF, avoiding key reuse issues. */ - err = setup_per_mode_key(ci, mk, mk->mk_direct_tfms, - HKDF_CONTEXT_DIRECT_KEY, false); + err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_tfms, + HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* @@ -213,20 +213,21 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS or eMMC standards. */ - err = setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, - HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); + err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + true); } else { u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_KEY, + HKDF_CONTEXT_PER_FILE_ENC_KEY, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, derived_key, ci->ci_mode->keysize); if (err) return err; - err = fscrypt_set_derived_key(ci, derived_key); + err = fscrypt_set_per_file_enc_key(ci, derived_key); memzero_explicit(derived_key, ci->ci_mode->keysize); } if (err) diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 6b8815d7448d..8a97a8dd8ebb 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -9,7 +9,7 @@ * This file implements compatibility functions for the original encryption * policy version ("v1"), including: * - * - Deriving per-file keys using the AES-128-ECB based KDF + * - Deriving per-file encryption keys using the AES-128-ECB based KDF * (rather than the new method of using HKDF-SHA512) * * - Retrieving fscrypt master keys from process-subscribed keyrings @@ -283,7 +283,7 @@ static int setup_v1_file_key_derived(struct fscrypt_info *ci, if (err) goto out; - err = fscrypt_set_derived_key(ci, derived_key); + err = fscrypt_set_per_file_enc_key(ci, derived_key); out: kzfree(derived_key); return err; From 89aca68d2fbac03d49bf33d1036dc04e769594d5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:31:59 -0800 Subject: [PATCH 3023/3715] ubifs: don't trigger assertion on invalid no-key filename If userspace provides an invalid fscrypt no-key filename which encodes a hash value with any of the UBIFS node type bits set (i.e. the high 3 bits), gracefully report ENOENT rather than triggering ubifs_assert(). Test case with kvm-xfstests shell: . fs/ubifs/config . ~/xfstests/common/encrypt dev=$(__blkdev_to_ubi_volume /dev/vdc) ubiupdatevol $dev -t mount $dev /mnt -t ubifs mkdir /mnt/edir xfs_io -c set_encpolicy /mnt/edir rm /mnt/edir/_,,,,,DAAAAAAAAAAAAAAAAAAAAAAAAAA With the bug, the following assertion fails on the 'rm' command: [ 19.066048] UBIFS error (ubi0:0 pid 379): ubifs_assert_failed: UBIFS assert failed: !(hash & ~UBIFS_S_KEY_HASH_MASK), in fs/ubifs/key.h:170 Fixes: f4f61d2cc6d8 ("ubifs: Implement encrypted filenames") Cc: # v4.10+ Link: https://lore.kernel.org/r/20200120223201.241390-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ubifs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f7057c320a33..6bba57bbf1ae 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -240,6 +240,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, if (nm.hash) { ubifs_assert(fname_len(&nm) == 0); ubifs_assert(fname_name(&nm) == NULL); + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { From 7fd1c005ecf02122fe5026a1e5d0d6e23e298f72 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:32:00 -0800 Subject: [PATCH 3024/3715] ubifs: allow both hash and disk name to be provided in no-key names In order to support a new dirhash method that is a secret-keyed hash over the plaintext filenames (which will be used by encrypted+casefolded directories on ext4 and f2fs), fscrypt will be switching to a new no-key name format that always encodes the dirhash in the name. UBIFS isn't happy with this because it has assertions that verify that either the hash or the disk name is provided, not both. Change it to use the disk name if one is provided, even if a hash is available too; else use the hash. Link: https://lore.kernel.org/r/20200120223201.241390-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ubifs/dir.c | 4 +--- fs/ubifs/journal.c | 4 ++-- fs/ubifs/key.h | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 6bba57bbf1ae..0dc907a67889 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -237,9 +237,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out_fname; } - if (nm.hash) { - ubifs_assert(fname_len(&nm) == 0); - ubifs_assert(fname_name(&nm) == NULL); + if (fname_name(&nm) == NULL) { if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 04c4ec6483e5..708bd86cf020 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -583,7 +583,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (!xent) { dent->ch.node_type = UBIFS_DENT_NODE; - if (nm->hash) + if (fname_name(nm) == NULL) dent_key_init_hash(c, &dent_key, dir->i_ino, nm->hash); else dent_key_init(c, &dent_key, dir->i_ino, nm); @@ -630,7 +630,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, kfree(dent); if (deletion) { - if (nm->hash) + if (fname_name(nm) == NULL) err = ubifs_tnc_remove_dh(c, &dent_key, nm->minor_hash); else err = ubifs_tnc_remove_nm(c, &dent_key, nm); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index b1f7c0caa3ac..7547be512db2 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -162,7 +162,6 @@ static inline void dent_key_init(const struct ubifs_info *c, uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm)); ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - ubifs_assert(!nm->hash && !nm->minor_hash); key->u32[0] = inum; key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); } From fe6e85580b05e9ead1b6dd9fe10201555130c838 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:32:01 -0800 Subject: [PATCH 3025/3715] fscrypt: improve format of no-key names When an encrypted directory is listed without the key, the filesystem must show "no-key names" that uniquely identify directory entries, are at most 255 (NAME_MAX) bytes long, and don't contain '/' or '\0'. Currently, for short names the no-key name is the base64 encoding of the ciphertext filename, while for long names it's the base64 encoding of the ciphertext filename's dirhash and second-to-last 16-byte block. This format has the following problems: - Since it doesn't always include the dirhash, it's incompatible with directories that will use a secret-keyed dirhash over the plaintext filenames. In this case, the dirhash won't be computable from the ciphertext name without the key, so it instead must be retrieved from the directory entry and always included in the no-key name. Casefolded encrypted directories will use this type of dirhash. - It's ambiguous: it's possible to craft two filenames that map to the same no-key name, since the method used to abbreviate long filenames doesn't use a proper cryptographic hash function. Solve both these problems by switching to a new no-key name format that is the base64 encoding of a variable-length structure that contains the dirhash, up to 149 bytes of the ciphertext filename, and (if any bytes remain) the SHA-256 of the remaining bytes of the ciphertext filename. This ensures that each no-key name contains everything needed to find the directory entry again, contains only legal characters, doesn't exceed NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only take the performance hit of SHA-256 on very long filenames. Note: this change does *not* address the existing issue where users can modify the 'dirhash' part of a no-key name and the filesystem may still accept the name. Signed-off-by: Daniel Rosenberg [EB: improved comments and commit message, fixed checking return value of base64_decode(), check for SHA-256 error, continue to set disk_name for short names to keep matching simpler, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 2 +- fs/crypto/Kconfig | 1 + fs/crypto/fname.c | 219 ++++++++++++++++++++------ include/linux/fscrypt.h | 77 +-------- 4 files changed, 172 insertions(+), 127 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 28524dcc51c9..fbcd185d15da 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -1202,7 +1202,7 @@ filesystem-specific hash(es) needed for directory lookups. This allows the filesystem to still, with a high degree of confidence, map the filename given in ->lookup() back to a particular directory entry that was previously listed by readdir(). See :c:type:`struct -fscrypt_digested_name` in the source for more details. +fscrypt_nokey_name` in the source for more details. Note that the precise way that filenames are presented to userspace without the key is subject to change in the future. It is only meant diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index d0a0238e37dd..a7cc1f9808d1 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -20,5 +20,6 @@ config FS_ENCRYPTION_ALGS select CRYPTO_CTS select CRYPTO_ECB select CRYPTO_HMAC + select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_XTS diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 851d2082ecfe..5db3cc8c07e3 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -13,9 +13,86 @@ #include #include +#include +#include #include #include "fscrypt_private.h" +/** + * struct fscrypt_nokey_name - identifier for directory entry when key is absent + * + * When userspace lists an encrypted directory without access to the key, the + * filesystem must present a unique "no-key name" for each filename that allows + * it to find the directory entry again if requested. Naively, that would just + * mean using the ciphertext filenames. However, since the ciphertext filenames + * can contain illegal characters ('\0' and '/'), they must be encoded in some + * way. We use base64. But that can cause names to exceed NAME_MAX (255 + * bytes), so we also need to use a strong hash to abbreviate long names. + * + * The filesystem may also need another kind of hash, the "dirhash", to quickly + * find the directory entry. Since filesystems normally compute the dirhash + * over the on-disk filename (i.e. the ciphertext), it's not computable from + * no-key names that abbreviate the ciphertext using the strong hash to fit in + * NAME_MAX. It's also not computable if it's a keyed hash taken over the + * plaintext (but it may still be available in the on-disk directory entry); + * casefolded directories use this type of dirhash. At least in these cases, + * each no-key name must include the name's dirhash too. + * + * To meet all these requirements, we base64-encode the following + * variable-length structure. It contains the dirhash, or 0's if the filesystem + * didn't provide one; up to 149 bytes of the ciphertext name; and for + * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. + * + * This ensures that each no-key name contains everything needed to find the + * directory entry again, contains only legal characters, doesn't exceed + * NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only + * take the performance hit of SHA-256 on very long filenames (which are rare). + */ +struct fscrypt_nokey_name { + u32 dirhash[2]; + u8 bytes[149]; + u8 sha256[SHA256_DIGEST_SIZE]; +}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */ + +/* + * Decoded size of max-size nokey name, i.e. a name that was abbreviated using + * the strong hash and thus includes the 'sha256' field. This isn't simply + * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. + */ +#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) + +static struct crypto_shash *sha256_hash_tfm; + +static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result) +{ + struct crypto_shash *tfm = READ_ONCE(sha256_hash_tfm); + + if (unlikely(!tfm)) { + struct crypto_shash *prev_tfm; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + fscrypt_err(NULL, + "Error allocating SHA-256 transform: %ld", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + prev_tfm = cmpxchg(&sha256_hash_tfm, NULL, tfm); + if (prev_tfm) { + crypto_free_shash(tfm); + tfm = prev_tfm; + } + } + { + SHASH_DESC_ON_STACK(desc, tfm); + + desc->tfm = tfm; + desc->flags = 0; + + return crypto_shash_digest(desc, data, data_len, result); + } +} + static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') @@ -207,9 +284,7 @@ int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, struct fscrypt_str *crypto_str) { - const u32 max_encoded_len = - max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX); u32 max_presented_len; max_presented_len = max(max_encoded_len, max_encrypted_len); @@ -242,9 +317,9 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * - * If the key is available, we'll decrypt the disk name; otherwise, we'll encode - * it for presentation. Short names are directly base64-encoded, while long - * names are encoded in fscrypt_digested_name format. + * If the key is available, we'll decrypt the disk name. Otherwise, we'll + * encode it for presentation in fscrypt_nokey_name format. + * See struct fscrypt_nokey_name for details. * * Return: 0 on success, -errno on failure */ @@ -254,7 +329,9 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - struct fscrypt_digested_name digested_name; + struct fscrypt_nokey_name nokey_name; + u32 size; /* size of the unencoded no-key name */ + int err; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -269,24 +346,37 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); - if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { - oname->len = base64_encode(iname->name, iname->len, - oname->name); - return 0; - } + /* + * Sanity check that struct fscrypt_nokey_name doesn't have padding + * between fields and that its encoded size never exceeds NAME_MAX. + */ + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, dirhash) != + offsetof(struct fscrypt_nokey_name, bytes)); + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != + offsetof(struct fscrypt_nokey_name, sha256)); + BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX); + if (hash) { - digested_name.hash = hash; - digested_name.minor_hash = minor_hash; + nokey_name.dirhash[0] = hash; + nokey_name.dirhash[1] = minor_hash; } else { - digested_name.hash = 0; - digested_name.minor_hash = 0; + nokey_name.dirhash[0] = 0; + nokey_name.dirhash[1] = 0; } - memcpy(digested_name.digest, - FSCRYPT_FNAME_DIGEST(iname->name, iname->len), - FSCRYPT_FNAME_DIGEST_SIZE); - oname->name[0] = '_'; - oname->len = 1 + base64_encode((const u8 *)&digested_name, - sizeof(digested_name), oname->name + 1); + if (iname->len <= sizeof(nokey_name.bytes)) { + memcpy(nokey_name.bytes, iname->name, iname->len); + size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]); + } else { + memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes)); + /* Compute strong hash of remaining part of name. */ + err = fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)], + iname->len - sizeof(nokey_name.bytes), + nokey_name.sha256); + if (err) + return err; + size = FSCRYPT_NOKEY_NAME_MAX; + } + oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -307,8 +397,7 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); * get the disk_name. * * Else, for keyless @lookup operations, @iname is the presented ciphertext, so - * we decode it to get either the ciphertext disk_name (for short names) or the - * fscrypt_digested_name (for long names). Non-@lookup operations will be + * we decode it to get the fscrypt_nokey_name. Non-@lookup operations will be * impossible in this case, so we fail them with ENOKEY. * * If successful, fscrypt_free_filename() must be called later to clean up. @@ -318,8 +407,8 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { + struct fscrypt_nokey_name *nokey_name; int ret; - int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -359,40 +448,31 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * We don't have the key and we are doing a lookup; decode the * user-supplied name */ - if (iname->name[0] == '_') { - if (iname->len != - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) - return -ENOENT; - digested = 1; - } else { - if (iname->len > - BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) - return -ENOENT; - digested = 0; - } - fname->crypto_buf.name = - kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, - sizeof(struct fscrypt_digested_name)), - GFP_KERNEL); + if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)) + return -ENOENT; + + fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = base64_decode(iname->name + digested, iname->len - digested, - fname->crypto_buf.name); - if (ret < 0) { + ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name); + if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || + (ret > offsetof(struct fscrypt_nokey_name, sha256) && + ret != FSCRYPT_NOKEY_NAME_MAX)) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - if (digested) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - fname->hash = n->hash; - fname->minor_hash = n->minor_hash; - } else { - fname->disk_name.name = fname->crypto_buf.name; - fname->disk_name.len = fname->crypto_buf.len; + + nokey_name = (void *)fname->crypto_buf.name; + fname->hash = nokey_name->dirhash[0]; + fname->minor_hash = nokey_name->dirhash[1]; + if (ret != FSCRYPT_NOKEY_NAME_MAX) { + /* The full ciphertext filename is available. */ + fname->disk_name.name = nokey_name->bytes; + fname->disk_name.len = + ret - offsetof(struct fscrypt_nokey_name, bytes); } return 0; @@ -402,6 +482,43 @@ errout: } EXPORT_SYMBOL(fscrypt_setup_filename); +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and the name we're + * looking for is very long, then we won't have the full disk_name and instead + * we'll need to match against a fscrypt_nokey_name that includes a strong hash. + * + * Return: %true if the name matches, otherwise %false. + */ +bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + const struct fscrypt_nokey_name *nokey_name = + (const void *)fname->crypto_buf.name; + u8 sha256[SHA256_DIGEST_SIZE]; + + if (likely(fname->disk_name.name)) { + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, de_name_len); + } + if (de_name_len <= sizeof(nokey_name->bytes)) + return false; + if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes))) + return false; + if (fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)], + de_name_len - sizeof(nokey_name->bytes), sha256)) + return false; + return !memcmp(sha256, nokey_name->sha256, sizeof(sha256)); +} +EXPORT_SYMBOL_GPL(fscrypt_match_name); + /** * fscrypt_fname_siphash() - calculate the SipHash of a filename * @dir: the parent directory diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index ccdee616e7e5..fd1bc965a3e3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -172,81 +172,8 @@ extern int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - +extern bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len); extern u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); From 693ad1e5b397bbc6c7fb971c8406426a5f21c8c1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:54:10 -0800 Subject: [PATCH 3026/3715] fs-verity: implement readahead for FS_IOC_ENABLE_VERITY When it builds the first level of the Merkle tree, FS_IOC_ENABLE_VERITY sequentially reads each page of the file using read_mapping_page(). This works fine if the file's data is already in pagecache, which should normally be the case, since this ioctl is normally used immediately after writing out the file. But in any other case this implementation performs very poorly, since only one page is read at a time. Fix this by implementing readahead using the functions from mm/readahead.c. This improves performance in the uncached case by about 20x, as seen in the following benchmarks done on a 250MB file (on x86_64 with SHA-NI): FS_IOC_ENABLE_VERITY uncached (before) 3.299s FS_IOC_ENABLE_VERITY uncached (after) 0.160s FS_IOC_ENABLE_VERITY cached 0.147s sha256sum uncached 0.191s sha256sum cached 0.145s Note: we could instead switch to kernel_read(). But that would mean we'd no longer be hashing the data directly from the pagecache, which is a nice optimization of its own. And using kernel_read() would require allocating another temporary buffer, hashing the data and tree pages separately, and explicitly zero-padding the last page -- so it wouldn't really be any simpler than direct pagecache access, at least for now. Link: https://lore.kernel.org/r/20200106205410.136707-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/verity/enable.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index eabc6ac19906..1f05f7319377 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -13,13 +13,42 @@ #include #include -static int build_merkle_tree_level(struct inode *inode, unsigned int level, +/* + * Read a file data page for Merkle tree construction. Do aggressive readahead, + * since we're sequentially reading the entire file. + */ +static struct page *read_file_data_page(struct file *filp, pgoff_t index, + struct file_ra_state *ra, + unsigned long remaining_pages) +{ + struct page *page; + + page = find_get_page_flags(filp->f_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else + page_cache_sync_readahead(filp->f_mapping, ra, filp, + index, remaining_pages); + page = read_mapping_page(filp->f_mapping, index, NULL); + if (IS_ERR(page)) + return page; + } + if (PageReadahead(page)) + page_cache_async_readahead(filp->f_mapping, ra, filp, page, + index, remaining_pages); + return page; +} + +static int build_merkle_tree_level(struct file *filp, unsigned int level, u64 num_blocks_to_hash, const struct merkle_tree_params *params, u8 *pending_hashes, struct ahash_request *req) { + struct inode *inode = file_inode(filp); const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct file_ra_state ra = { 0 }; unsigned int pending_size = 0; u64 dst_block_num; u64 i; @@ -36,6 +65,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, dst_block_num = 0; /* unused */ } + file_ra_state_init(&ra, filp->f_mapping); + for (i = 0; i < num_blocks_to_hash; i++) { struct page *src_page; @@ -45,7 +76,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, if (level == 0) { /* Leaf: hashing a data block */ - src_page = read_mapping_page(inode->i_mapping, i, NULL); + src_page = read_file_data_page(filp, i, &ra, + num_blocks_to_hash - i); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -103,17 +135,18 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, } /* - * Build the Merkle tree for the given inode using the given parameters, and + * Build the Merkle tree for the given file using the given parameters, and * return the root hash in @root_hash. * * The tree is written to a filesystem-specific location as determined by the * ->write_merkle_tree_block() method. However, the blocks that comprise the * tree are the same for all filesystems. */ -static int build_merkle_tree(struct inode *inode, +static int build_merkle_tree(struct file *filp, const struct merkle_tree_params *params, u8 *root_hash) { + struct inode *inode = file_inode(filp); u8 *pending_hashes; struct ahash_request *req; u64 blocks; @@ -139,7 +172,7 @@ static int build_merkle_tree(struct inode *inode, blocks = (inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { - err = build_merkle_tree_level(inode, level, blocks, params, + err = build_merkle_tree_level(filp, level, blocks, params, pending_hashes, req); if (err) goto out; @@ -227,7 +260,7 @@ static int enable_verity(struct file *filp, */ pr_debug("Building Merkle tree...\n"); BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); - err = build_merkle_tree(inode, ¶ms, desc->root_hash); + err = build_merkle_tree(filp, ¶ms, desc->root_hash); if (err) { fsverity_err(inode, "Error %d building Merkle tree", err); goto rollback; From 2ff972ed7e88ba838552d9c5569c699f18244b37 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Aug 2018 15:45:42 -0700 Subject: [PATCH 3027/3715] ext4: readpages() should submit IO as read-ahead a_ops->readpages() is only ever used for read-ahead. Ensure that we pass this information down to the block layer. Link: http://lkml.kernel.org/r/20180621010725.17813-5-axboe@kernel.dk Signed-off-by: Jens Axboe Reviewed-by: Andrew Morton Cc: Al Viro Cc: Chris Mason Cc: Christoph Hellwig Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 5 +++-- fs/ext4/readpage.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8d6accd3f763..f933c35fdcc1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3201,7 +3201,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4f2c73b52c83..4b68bfe9e5cb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3328,7 +3328,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3343,7 +3344,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 49b8af055fa7..b82456f0f691 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -211,7 +211,7 @@ static inline loff_t ext4_readpage_limit(struct inode *inode) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -373,7 +373,8 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, + is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; From 18ddf07481a4bcd0520abd8c8ad92f8bd6db2af2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:55:33 -0800 Subject: [PATCH 3028/3715] fs-verity: implement readahead of Merkle tree pages When fs-verity verifies data pages, currently it reads each Merkle tree page synchronously using read_mapping_page(). Therefore, when the Merkle tree pages aren't already cached, fs-verity causes an extra 4 KiB I/O request for every 512 KiB of data (assuming that the Merkle tree uses SHA-256 and 4 KiB blocks). This results in more I/O requests and performance loss than is strictly necessary. Therefore, implement readahead of the Merkle tree pages. For simplicity, we take advantage of the fact that the kernel already does readahead of the file's *data*, just like it does for any other file. Due to this, we don't really need a separate readahead state (struct file_ra_state) just for the Merkle tree, but rather we just need to piggy-back on the existing data readahead requests. We also only really need to bother with the first level of the Merkle tree, since the usual fan-out factor is 128, so normally over 99% of Merkle tree I/O requests are for the first level. Therefore, make fsverity_verify_bio() enable readahead of the first Merkle tree level, for up to 1/4 the number of pages in the bio, when it sees that the REQ_RAHEAD flag is set on the bio. The readahead size is then passed down to ->read_merkle_tree_page() for the filesystem to (optionally) implement if it sees that the requested page is uncached. While we're at it, also make build_merkle_tree_level() set the Merkle tree readahead size, since it's easy to do there. However, for now don't set the readahead size in fsverity_verify_page(), since currently it's only used to verify holes on ext4 and f2fs, and it would need parameters added to know how much to read ahead. This patch significantly improves fs-verity sequential read performance. Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches: On an ARM64 phone (using sha256-ce): Before: 217 MB/s After: 263 MB/s (compare to sha256sum of non-verity file: 357 MB/s) In an x86_64 VM (using sha256-avx2): Before: 173 MB/s After: 215 MB/s (compare to sha256sum of non-verity file: 223 MB/s) Link: https://lore.kernel.org/r/20200106205533.137005-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/ext4/verity.c | 51 +++++++++++++++++++++++++++++++++--- fs/f2fs/verity.c | 51 +++++++++++++++++++++++++++++++++--- fs/verity/enable.c | 8 +++++- fs/verity/fsverity_private.h | 1 + fs/verity/open.c | 1 + fs/verity/verify.c | 34 +++++++++++++++++++----- include/linux/fsverity.h | 7 ++++- 7 files changed, 139 insertions(+), 14 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index d0d8a9795dd6..bd717248a4bc 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -342,12 +342,57 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, return desc_size; } -static struct page *ext4_read_merkle_tree_page(struct inode *inode, - pgoff_t index) +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void ext4_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) { + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + +static struct page *ext4_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) +{ + struct page *page; + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + ext4_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index a401ef72bc82..5905050f7fb8 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -222,12 +222,57 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } -static struct page *f2fs_read_merkle_tree_page(struct inode *inode, - pgoff_t index) +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void f2fs_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) { + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + +static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) +{ + struct page *page; + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + f2fs_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 1f05f7319377..9d30708d963d 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,6 +8,7 @@ #include "fsverity_private.h" #include +#include #include #include #include @@ -86,9 +87,14 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level, return err; } } else { + unsigned long num_ra_pages = + min_t(unsigned long, num_blocks_to_hash - i, + inode->i_sb->s_bdi->io_pages); + /* Non-leaf: hashing hash block from level below */ src_page = vops->read_merkle_tree_page(inode, - params->level_start[level - 1] + i); + params->level_start[level - 1] + i, + num_ra_pages); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index e74c79b64d88..ab9cfdd8f965 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -50,6 +50,7 @@ struct merkle_tree_params { unsigned int log_arity; /* log2(hashes_per_block) */ unsigned int num_levels; /* number of levels in Merkle tree */ u64 tree_size; /* Merkle tree size in bytes */ + unsigned long level0_blocks; /* number of blocks in tree level 0 */ /* * Starting block index for each tree level, ordered from leaf level (0) diff --git a/fs/verity/open.c b/fs/verity/open.c index 4cdd75acbc97..b7b0a5479c6f 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, /* temporarily using level_start[] to store blocks in level */ params->level_start[params->num_levels++] = blocks; } + params->level0_blocks = params->level_start[0]; /* Compute the starting block of each level */ offset = 0; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index cf09852e5227..461789903709 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi, * Return: true if the page is valid, else false. */ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, - struct ahash_request *req, struct page *data_page) + struct ahash_request *req, struct page *data_page, + unsigned long level0_ra_pages) { const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", level, hindex, hoffset); - hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hindex); + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex, + level == 0 ? level0_ra_pages : 0); if (IS_ERR(hpage)) { err = PTR_ERR(hpage); fsverity_err(inode, @@ -195,7 +196,7 @@ bool fsverity_verify_page(struct page *page) if (unlikely(!req)) return false; - valid = verify_page(inode, vi, req, page); + valid = verify_page(inode, vi, req, page, 0); ahash_request_free(req); @@ -222,21 +223,42 @@ void fsverity_verify_bio(struct bio *bio) { struct inode *inode = bio->bi_io_vec->bv_page->mapping->host; const struct fsverity_info *vi = inode->i_verity_info; + const struct merkle_tree_params *params = &vi->tree_params; struct ahash_request *req; struct bio_vec *bv; int i; + unsigned long max_ra_pages = 0; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS); if (unlikely(!req)) { bio_for_each_segment_all(bv, bio, i) SetPageError(bv->bv_page); return; } + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ + bio_for_each_segment_all(bv, bio, i) + max_ra_pages++; + max_ra_pages /= 4; + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; + unsigned long level0_index = page->index >> params->log_arity; + unsigned long level0_ra_pages = + min(max_ra_pages, params->level0_blocks - level0_index); - if (!PageError(page) && !verify_page(inode, vi, req, page)) + if (!PageError(page) && + !verify_page(inode, vi, req, page, level0_ra_pages)) SetPageError(page); } diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 3b6b8ccebe7d..ecc604e61d61 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -77,6 +77,10 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree + * @num_ra_pages: The number of Merkle tree pages that should be + * prefetched starting at @index if the page at @index + * isn't already cached. Implementations may ignore this + * argument; it's only a performance optimization. * * This can be called at any time on an open verity file, as well as * between ->begin_enable_verity() and ->end_enable_verity(). It may be @@ -87,7 +91,8 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index); + pgoff_t index, + unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. From 4d9ce1827aa2627bb9e08e396185ebb994dccf80 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 11:55:45 -0600 Subject: [PATCH 3029/3715] fs-verity: use mempool for hash requests When initializing an fs-verity hash algorithm, also initialize a mempool that contains a single preallocated hash request object. Then replace the direct calls to ahash_request_alloc() and ahash_request_free() with allocating and freeing from this mempool. This eliminates the possibility of the allocation failing, which is desirable for the I/O path. This doesn't cause deadlocks because there's no case where multiple hash requests are needed at a time to make forward progress. Link: https://lore.kernel.org/r/20191231175545.20709-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- fs/verity/enable.c | 8 +-- fs/verity/fsverity_private.h | 16 ++++-- fs/verity/hash_algs.c | 100 ++++++++++++++++++++++++++--------- fs/verity/open.c | 4 +- fs/verity/verify.c | 17 +++--- 5 files changed, 99 insertions(+), 46 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 9d30708d963d..d22f5161ff9a 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -165,9 +165,11 @@ static int build_merkle_tree(struct file *filp, return 0; } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL); + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); - req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); - if (!pending_hashes || !req) + if (!pending_hashes) goto out; /* @@ -189,7 +191,7 @@ static int build_merkle_tree(struct file *filp, err = 0; out: kfree(pending_hashes); - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); return err; } diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index ab9cfdd8f965..4b2c8aed0563 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -16,6 +16,7 @@ #include #include +#include struct ahash_request; @@ -37,11 +38,12 @@ struct fsverity_hash_alg { const char *name; /* crypto API name, e.g. sha256 */ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ + mempool_t *req_pool; /* mempool with a preallocated hash request */ }; /* Merkle tree parameters: hash algorithm, initial hash state, and topology */ struct merkle_tree_params { - const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ const u8 *hashstate; /* initial hash state or NULL */ unsigned int digest_size; /* same as hash_alg->digest_size */ unsigned int block_size; /* size of data and tree blocks */ @@ -115,14 +117,18 @@ struct fsverity_signed_digest { extern struct fsverity_hash_alg fsverity_hash_algs[]; -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num); -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags); +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req); +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size); int fsverity_hash_page(const struct merkle_tree_params *params, const struct inode *inode, struct ahash_request *req, struct page *page, u8 *out); -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out); void __init fsverity_check_hash_algs(void); diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c index 31e6d7d2389a..6682e4e6b601 100644 --- a/fs/verity/hash_algs.c +++ b/fs/verity/hash_algs.c @@ -24,6 +24,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { }, }; +static DEFINE_MUTEX(fsverity_hash_alg_init_mutex); + /** * fsverity_get_hash_alg() - validate and prepare a hash algorithm * @inode: optional inode for logging purposes @@ -36,8 +38,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { * * Return: pointer to the hash alg on success, else an ERR_PTR() */ -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num) +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) { struct fsverity_hash_alg *alg; struct crypto_ahash *tfm; @@ -50,10 +52,15 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, } alg = &fsverity_hash_algs[num]; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(alg->tfm); - if (likely(tfm != NULL)) + /* pairs with smp_store_release() below */ + if (likely(smp_load_acquire(&alg->tfm) != NULL)) return alg; + + mutex_lock(&fsverity_hash_alg_init_mutex); + + if (alg->tfm != NULL) + goto out_unlock; + /* * Using the shash API would make things a bit simpler, but the ahash * API is preferable as it allows the use of crypto accelerators. @@ -64,12 +71,14 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, fsverity_warn(inode, "Missing crypto API support for hash algorithm \"%s\"", alg->name); - return ERR_PTR(-ENOPKG); + alg = ERR_PTR(-ENOPKG); + goto out_unlock; } fsverity_err(inode, "Error allocating hash algorithm \"%s\": %ld", alg->name, PTR_ERR(tfm)); - return ERR_CAST(tfm); + alg = ERR_CAST(tfm); + goto out_unlock; } err = -EINVAL; @@ -78,18 +87,63 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) goto err_free_tfm; + alg->req_pool = mempool_create_kmalloc_pool(1, + sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm)); + if (!alg->req_pool) { + err = -ENOMEM; + goto err_free_tfm; + } + pr_info("%s using implementation \"%s\"\n", alg->name, crypto_ahash_driver_name(tfm)); - /* pairs with READ_ONCE() above */ - if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) - crypto_free_ahash(tfm); - - return alg; + /* pairs with smp_load_acquire() above */ + smp_store_release(&alg->tfm, tfm); + goto out_unlock; err_free_tfm: crypto_free_ahash(tfm); - return ERR_PTR(err); + alg = ERR_PTR(err); +out_unlock: + mutex_unlock(&fsverity_hash_alg_init_mutex); + return alg; +} + +/** + * fsverity_alloc_hash_request() - allocate a hash request object + * @alg: the hash algorithm for which to allocate the request + * @gfp_flags: memory allocation flags + * + * This is mempool-backed, so this never fails if __GFP_DIRECT_RECLAIM is set in + * @gfp_flags. However, in that case this might need to wait for all + * previously-allocated requests to be freed. So to avoid deadlocks, callers + * must never need multiple requests at a time to make forward progress. + * + * Return: the request object on success; NULL on failure (but see above) + */ +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags) +{ + struct ahash_request *req = mempool_alloc(alg->req_pool, gfp_flags); + + if (req) + ahash_request_set_tfm(req, alg->tfm); + return req; +} + +/** + * fsverity_free_hash_request() - free a hash request object + * @alg: the hash algorithm + * @req: the hash request object to free + */ +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req) +{ + if (req) { + ahash_request_zero(req); + mempool_free(req, alg->req_pool); + } } /** @@ -101,7 +155,7 @@ err_free_tfm: * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed * initial hash state on success or an ERR_PTR() on failure. */ -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size) { u8 *hashstate = NULL; @@ -119,11 +173,8 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (!hashstate) return ERR_PTR(-ENOMEM); - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) { - err = -ENOMEM; - goto err_free; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); /* * Zero-pad the salt to the next multiple of the input size of the hash @@ -158,7 +209,7 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (err) goto err_free; out: - ahash_request_free(req); + fsverity_free_hash_request(alg, req); kfree(padded_salt); return hashstate; @@ -229,7 +280,7 @@ int fsverity_hash_page(const struct merkle_tree_params *params, * * Return: 0 on success, -errno on failure */ -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out) { struct ahash_request *req; @@ -237,9 +288,8 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, DECLARE_CRYPTO_WAIT(wait); int err; - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) - return -ENOMEM; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); sg_init_one(&sg, data, size); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | @@ -249,7 +299,7 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, err = crypto_wait_req(crypto_ahash_digest(req), &wait); - ahash_request_free(req); + fsverity_free_hash_request(alg, req); return err; } diff --git a/fs/verity/open.c b/fs/verity/open.c index b7b0a5479c6f..25b29065d897 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -31,7 +31,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, unsigned int log_blocksize, const u8 *salt, size_t salt_size) { - const struct fsverity_hash_alg *hash_alg; + struct fsverity_hash_alg *hash_alg; int err; u64 blocks; u64 offset; @@ -127,7 +127,7 @@ out_err: * Compute the file measurement by hashing the fsverity_descriptor excluding the * signature and with the sig_size field set to 0. */ -static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, +static int compute_file_measurement(struct fsverity_hash_alg *hash_alg, struct fsverity_descriptor *desc, u8 *measurement) { diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 461789903709..5324270cd7d4 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -192,13 +192,12 @@ bool fsverity_verify_page(struct page *page) struct ahash_request *req; bool valid; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) - return false; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS); valid = verify_page(inode, vi, req, page, 0); - ahash_request_free(req); + fsverity_free_hash_request(vi->tree_params.hash_alg, req); return valid; } @@ -229,12 +228,8 @@ void fsverity_verify_bio(struct bio *bio) int i; unsigned long max_ra_pages = 0; - req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) { - bio_for_each_segment_all(bv, bio, i) - SetPageError(bv->bv_page); - return; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_NOFS); if (bio->bi_opf & REQ_RAHEAD) { /* @@ -262,7 +257,7 @@ void fsverity_verify_bio(struct bio *bio) SetPageError(page); } - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); } EXPORT_SYMBOL_GPL(fsverity_verify_bio); #endif /* CONFIG_BLOCK */ From 7c18ae38056922b251fa24a6af4c73992ee291d7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 11:54:08 -0600 Subject: [PATCH 3030/3715] fs-verity: use u64_to_user_ptr() already provides a macro u64_to_user_ptr(). Use it instead of open-coding the two casts. No change in behavior. Link: https://lore.kernel.org/r/20191231175408.20524-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/enable.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index d22f5161ff9a..15e7d14ec2ff 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -216,8 +216,7 @@ static int enable_verity(struct file *filp, /* Get the salt if the user provided one */ if (arg->salt_size && - copy_from_user(desc->salt, - (const u8 __user *)(uintptr_t)arg->salt_ptr, + copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr), arg->salt_size)) { err = -EFAULT; goto out; @@ -226,8 +225,7 @@ static int enable_verity(struct file *filp, /* Get the signature if the user provided one */ if (arg->sig_size && - copy_from_user(desc->signature, - (const u8 __user *)(uintptr_t)arg->sig_ptr, + copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr), arg->sig_size)) { err = -EFAULT; goto out; From 62772abfd90c1f2173fc3e23489822cbdc6e879e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Feb 2020 00:19:58 +0100 Subject: [PATCH 3031/3715] ANDROID: added memory initialization tests to cuttlefish config Build the cuttlefish kernel with CONFIG_TEST_STACKINIT=y and CONFIG_TEST_MEMINIT=y to provide the tests for stack and heap initialization. Bug: 144999193 Test: run cuttlefish and observe the following lines in dmesg: test_stackinit: all tests passed! test_meminit: all 130 tests passed! Change-Id: I044347880de82211d9a58449ee3e558c570bda31 Signed-off-by: Alexander Potapenko --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 4545e6f37a83..8ed22333c2c3 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -461,6 +461,8 @@ CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_TEST_MEMINIT=y +CONFIG_TEST_STACKINIT=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_LSM_MMAP_MIN_ADDR=65536 diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index f3191479e562..64976b1fbc4d 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -484,6 +484,8 @@ CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_TEST_MEMINIT=y +CONFIG_TEST_STACKINIT=y CONFIG_IO_DELAY_NONE=y CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_FRAME_POINTER=y From d1ac78ab13d03ae7135ffd18abb47c4110e463e6 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Feb 2020 01:20:10 +0100 Subject: [PATCH 3032/3715] ANDROID: make memory initialization tests panic on failure This is a patch specific to android-common-4.14 where lib/test_meminit.c and lib/test_stackinit.c are built into the kernel. Call panic() to make the test failures more visible. Bug: 144999193 Change-Id: Iaa9459f4bf63753c5ac1cf51c9691bf62b3924e9 Signed-off-by: Alexander Potapenko --- lib/test_meminit.c | 11 ++++++++--- lib/test_stackinit.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/test_meminit.c b/lib/test_meminit.c index e4f706a404b3..61d55b74d835 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -397,11 +397,16 @@ static int __init test_meminit_init(void) num_tests += test_kmemcache(&failures); num_tests += test_rcu_persistent(&failures); - if (failures == 0) + if (failures == 0) { pr_info("all %d tests passed!\n", num_tests); - else + } else { pr_info("failures: %d out of %d\n", failures, num_tests); - + /* + * Android 4.14 only: if this test is built as part of the + * kernel, make the failure visible. + */ + panic("Test failed!\n"); + } return failures ? -EINVAL : 0; } module_init(test_meminit_init); diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 1e45a735961e..7c5f9fb49e58 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -371,10 +371,16 @@ static int __init test_stackinit_init(void) /* STRUCTLEAK will only cover this. */ failures += test_user(); - if (failures == 0) + if (failures == 0) { pr_info("all tests passed!\n"); - else + } else { pr_err("failures: %u\n", failures); + /* + * Android 4.14 only: if this test is built as part of the + * kernel, make the failure visible. + */ + panic("Test failed!\n"); + } return failures ? -EINVAL : 0; } From 7645d80ca740aeda9c6a80e1491976bd2c2ff331 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 28 Dec 2019 19:54:55 +0800 Subject: [PATCH 3033/3715] kernel/module: Fix memleak in module_add_modinfo_attrs() [ Upstream commit f6d061d617124abbd55396a3bc37b9bf7d33233c ] In module_add_modinfo_attrs() if sysfs_create_file() fails on the first iteration of the loop (so i = 0), we forget to free the modinfo_attrs. Fixes: bc6f2a757d52 ("kernel/module: Fix mem leak in module_add_modinfo_attrs") Reviewed-by: Miroslav Benes Signed-off-by: YueHaibing Signed-off-by: Jessica Yu Signed-off-by: Sasha Levin --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index feb1e0fbc3e8..2806c9b6577c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1730,6 +1730,8 @@ static int module_add_modinfo_attrs(struct module *mod) error_out: if (i > 0) module_remove_modinfo_attrs(mod, --i); + else + kfree(mod->modinfo_attrs); return error; } From 9a4da2a7546525a8a7d869447c9ea9ca874112a5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Jan 2020 17:35:13 +0100 Subject: [PATCH 3034/3715] media: iguanair: fix endpoint sanity check [ Upstream commit 1b257870a78b0a9ce98fdfb052c58542022ffb5b ] Make sure to use the current alternate setting, which need not be the first one by index, when verifying the endpoint descriptors and initialising the URBs. Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on. Fixes: 26ff63137c45 ("[media] Add support for the IguanaWorks USB IR Transceiver") Fixes: ab1cbdf159be ("media: iguanair: add sanity checks") Cc: stable # 3.6 Cc: Oliver Neukum Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/iguanair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 3c2e248ceca8..03dbbfba71fc 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -427,7 +427,7 @@ static int iguanair_probe(struct usb_interface *intf, int ret, pipein, pipeout; struct usb_host_interface *idesc; - idesc = intf->altsetting; + idesc = intf->cur_altsetting; if (idesc->desc.bNumEndpoints < 2) return -ENODEV; From 57da5bc425bd787c2806cdf84f3921527a5c57ad Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 10 Jan 2020 14:50:54 -0800 Subject: [PATCH 3035/3715] x86/cpu: Update cached HLE state on write to TSX_CTRL_CPUID_CLEAR [ Upstream commit 5efc6fa9044c3356d6046c6e1da6d02572dbed6b ] /proc/cpuinfo currently reports Hardware Lock Elision (HLE) feature to be present on boot cpu even if it was disabled during the bootup. This is because cpuinfo_x86->x86_capability HLE bit is not updated after TSX state is changed via the new MSR IA32_TSX_CTRL. Update the cached HLE bit also since it is expected to change after an update to CPUID_CLEAR bit in MSR IA32_TSX_CTRL. Fixes: 95c5824f75f3 ("x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default") Signed-off-by: Pawan Gupta Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Dave Hansen Reviewed-by: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/2529b99546294c893dfa1c89e2b3e46da3369a59.1578685425.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/tsx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..032509adf9de 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -115,11 +115,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +132,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } From 7a63a59b6644f8b04d05e81096ddf90297c847a8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 25 Nov 2019 13:21:58 +0200 Subject: [PATCH 3036/3715] iwlwifi: mvm: fix NVM check for 3168 devices [ Upstream commit b3f20e098293892388d6a0491d6bbb2efb46fbff ] We had a check on !NVM_EXT and then a check for NVM_SDP in the else block of this if. The else block, obviously, could only be reached if using NVM_EXT, so it would never be NVM_SDP. Fix that by checking whether the nvm_type is IWL_NVM instead of checking for !IWL_NVM_EXT to solve this issue. Reported-by: Stefan Sperling Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index ca2d66ce8424..8f3032b7174d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -298,7 +298,7 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) int regulatory_type; /* Checking for required sections */ - if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { + if (mvm->trans->cfg->nvm_type == IWL_NVM) { if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || !mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data) { IWL_ERR(mvm, "Can't parse empty OTP/NVM sections\n"); From fc91784c307f9b4bf0bea3a36b5f6e484a2c30fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Jan 2020 14:26:14 +0100 Subject: [PATCH 3037/3715] sparc32: fix struct ipc64_perm type definition [ Upstream commit 34ca70ef7d3a9fa7e89151597db5e37ae1d429b4 ] As discussed in the strace issue tracker, it appears that the sparc32 sysvipc support has been broken for the past 11 years. It was however working in compat mode, which is how it must have escaped most of the regular testing. The problem is that a cleanup patch inadvertently changed the uid/gid fields in struct ipc64_perm from 32-bit types to 16-bit types in uapi headers. Both glibc and uclibc-ng still use the original types, so they should work fine with compat mode, but not natively. Change the definitions to use __kernel_uid32_t and __kernel_gid32_t again. Fixes: 83c86984bff2 ("sparc: unify ipcbuf.h") Link: https://github.com/strace/strace/issues/116 Cc: # v2.6.29 Cc: Sam Ravnborg Cc: "Dmitry V . Levin" Cc: Rich Felker Cc: libc-alpha@sourceware.org Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/include/uapi/asm/ipcbuf.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..084b8949ddff 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -15,19 +15,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ From 44220931fc222da3f15efe15495c9f022ace499c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 15:27:04 -0800 Subject: [PATCH 3038/3715] cls_rsvp: fix rsvp_policy [ Upstream commit cb3c0e6bdf64d0d124e94ce43cbe4ccbb9b37f51 ] NLA_BINARY can be confusing, since .len value represents the max size of the blob. cls_rsvp really wants user space to provide long enough data for TCA_RSVP_DST and TCA_RSVP_SRC attributes. BUG: KMSAN: uninit-value in rsvp_get net/sched/cls_rsvp.h:258 [inline] BUG: KMSAN: uninit-value in gen_handle net/sched/cls_rsvp.h:402 [inline] BUG: KMSAN: uninit-value in rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 CPU: 1 PID: 13228 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 rsvp_get net/sched/cls_rsvp.h:258 [inline] gen_handle net/sched/cls_rsvp.h:402 [inline] rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 tc_new_tfilter+0x31fe/0x5010 net/sched/cls_api.c:2104 rtnetlink_rcv_msg+0xcb7/0x1570 net/core/rtnetlink.c:5415 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45b349 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f269d43dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f269d43e6d4 RCX: 000000000045b349 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000009c2 R14: 00000000004cb338 R15: 000000000075bfd4 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2774 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6fa8c0144b77 ("[NET_SCHED]: Use nla_policy for attribute validation in classifiers") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Cong Wang Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_rsvp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index cf325625c99d..89259819e9ed 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -475,10 +475,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; From 52083ed47287d3c508cda33186c898d8a90eba6b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 4 Feb 2020 03:24:59 +0000 Subject: [PATCH 3039/3715] gtp: use __GFP_NOWARN to avoid memalloc warning [ Upstream commit bd5cd35b782abf5437fbd01dfaee12437d20e832 ] gtp hashtable size is received by user-space. So, this hashtable size could be too large. If so, kmalloc will internally print a warning message. This warning message is actually not necessary for the gtp module. So, this patch adds __GFP_NOWARN to avoid this message. Splat looks like: [ 2171.200049][ T1860] WARNING: CPU: 1 PID: 1860 at mm/page_alloc.c:4713 __alloc_pages_nodemask+0x2f3/0x740 [ 2171.238885][ T1860] Modules linked in: gtp veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv] [ 2171.262680][ T1860] CPU: 1 PID: 1860 Comm: gtp-link Not tainted 5.5.0+ #321 [ 2171.263567][ T1860] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 2171.264681][ T1860] RIP: 0010:__alloc_pages_nodemask+0x2f3/0x740 [ 2171.265332][ T1860] Code: 64 fe ff ff 65 48 8b 04 25 c0 0f 02 00 48 05 f0 12 00 00 41 be 01 00 00 00 49 89 47 0 [ 2171.267301][ T1860] RSP: 0018:ffff8880b51af1f0 EFLAGS: 00010246 [ 2171.268320][ T1860] RAX: ffffed1016a35e43 RBX: 0000000000000000 RCX: 0000000000000000 [ 2171.269517][ T1860] RDX: 0000000000000000 RSI: 000000000000000b RDI: 0000000000000000 [ 2171.270305][ T1860] RBP: 0000000000040cc0 R08: ffffed1018893109 R09: dffffc0000000000 [ 2171.275973][ T1860] R10: 0000000000000001 R11: ffffed1018893108 R12: 1ffff11016a35e43 [ 2171.291039][ T1860] R13: 000000000000000b R14: 000000000000000b R15: 00000000000f4240 [ 2171.292328][ T1860] FS: 00007f53cbc83740(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 2171.293409][ T1860] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2171.294586][ T1860] CR2: 000055f540014508 CR3: 00000000b49f2004 CR4: 00000000000606e0 [ 2171.295424][ T1860] Call Trace: [ 2171.295756][ T1860] ? mark_held_locks+0xa5/0xe0 [ 2171.296659][ T1860] ? __alloc_pages_slowpath+0x21b0/0x21b0 [ 2171.298283][ T1860] ? gtp_encap_enable_socket+0x13e/0x400 [gtp] [ 2171.298962][ T1860] ? alloc_pages_current+0xc1/0x1a0 [ 2171.299475][ T1860] kmalloc_order+0x22/0x80 [ 2171.299936][ T1860] kmalloc_order_trace+0x1d/0x140 [ 2171.300437][ T1860] __kmalloc+0x302/0x3a0 [ 2171.300896][ T1860] gtp_newlink+0x293/0xba0 [gtp] [ ... ] Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 3840f21dd635..92e4e5d53053 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -771,11 +771,13 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) { int i; - gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, + GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; - gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, + GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; From ff8e755ddfd46899a07649749a515a083fc333e3 Mon Sep 17 00:00:00 2001 From: Ridge Kennedy Date: Tue, 4 Feb 2020 12:24:00 +1300 Subject: [PATCH 3040/3715] l2tp: Allow duplicate session creation with UDP [ Upstream commit 0d0d9a388a858e271bb70e71e99e7fe2a6fd6f64 ] In the past it was possible to create multiple L2TPv3 sessions with the same session id as long as the sessions belonged to different tunnels. The resulting sessions had issues when used with IP encapsulated tunnels, but worked fine with UDP encapsulated ones. Some applications began to rely on this behaviour to avoid having to negotiate unique session ids. Some time ago a change was made to require session ids to be unique across all tunnels, breaking the applications making use of this "feature". This change relaxes the duplicate session id check to allow duplicates if both of the colliding sessions belong to UDP encapsulated tunnels. Fixes: dbdbc73b4478 ("l2tp: fix duplicate session creation") Signed-off-by: Ridge Kennedy Acked-by: James Chapman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b9be0360ab94..b8c90f8d1a57 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -358,8 +358,13 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } From 7950ef09699d175bfec32d318d547c845462cc2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Feb 2020 10:15:07 -0800 Subject: [PATCH 3041/3715] net: hsr: fix possible NULL deref in hsr_handle_frame() [ Upstream commit 2b5b8251bc9fe2f9118411f037862ee17cf81e97 ] hsr_port_get_rcu() can return NULL, so we need to be careful. general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 10249 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:199 [inline] RIP: 0010:hsr_addr_is_self+0x86/0x330 net/hsr/hsr_framereg.c:44 Code: 04 00 f3 f3 f3 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 e8 6b ff 94 f9 4c 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 02 00 00 48 8b 43 30 49 39 c6 49 89 47 c0 0f RSP: 0018:ffffc90000da8a90 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff87e0cc33 RDX: 0000000000000006 RSI: ffffffff87e035d5 RDI: 0000000000000000 RBP: ffffc90000da8b20 R08: ffff88808e7de040 R09: ffffed1015d2707c R10: ffffed1015d2707b R11: ffff8880ae9383db R12: ffff8880a689bc5e R13: 1ffff920001b5153 R14: 0000000000000030 R15: ffffc90000da8af8 FS: 00007fd7a42be700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32338000 CR3: 00000000a928c000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_handle_frame+0x1c5/0x630 net/hsr/hsr_slave.c:31 __netif_receive_skb_core+0xfbc/0x30b0 net/core/dev.c:5099 __netif_receive_skb_one_core+0xa8/0x1a0 net/core/dev.c:5196 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5312 process_backlog+0x206/0x750 net/core/dev.c:6144 napi_poll net/core/dev.c:6582 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6650 __do_softirq+0x262/0x98c kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/hsr/hsr_slave.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 56080da4aa77..5fee6ec7c93d 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -32,6 +32,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ From 6cb448ee493c8a514c9afa0c346f3f5b3227de85 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 21:14:35 -0800 Subject: [PATCH 3042/3715] net_sched: fix an OOB access in cls_tcindex [ Upstream commit 599be01ee567b61f4471ee8078870847d0a11e8e ] As Eric noticed, tcindex_alloc_perfect_hash() uses cp->hash to compute the size of memory allocation, but cp->hash is set again after the allocation, this caused an out-of-bound access. So we have to move all cp->hash initialization and computation before the memory allocation. Move cp->mask and cp->shift together as cp->hash may need them for computation too. Reported-and-tested-by: syzbot+35d4dea36c387813ed31@syzkaller.appspotmail.com Fixes: 331b72922c5f ("net: sched: RCU cls_tcindex") Cc: Eric Dumazet Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 75c7c7cc7499..10df2c12a3fb 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -351,12 +351,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + + if (tb[TCA_TCINDEX_MASK]) + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + + if (tb[TCA_TCINDEX_SHIFT]) + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + if (p->perfect) { int i; if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; - for (i = 0; i < cp->hash; i++) + for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; } @@ -368,15 +387,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (old_r) cr = r->res; - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -394,16 +404,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; From fa32d7ce0cf8c04cefa72c87a95f222164be0131 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 2 Feb 2020 02:41:38 -0500 Subject: [PATCH 3043/3715] bnxt_en: Fix TC queue mapping. [ Upstream commit 18e4960c18f484ac288f41b43d0e6c4c88e6ea78 ] The driver currently only calls netdev_set_tc_queue when the number of TCs is greater than 1. Instead, the comparison should be greater than or equal to 1. Even with 1 TC, we need to set the queue mapping. This bug can cause warnings when the number of TCs is changed back to 1. Fixes: 7809592d3e2e ("bnxt_en: Enable MSIX early in bnxt_init_one().") Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7461e7b9eaae..41bc7820d2dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5375,7 +5375,7 @@ static void bnxt_setup_msix(struct bnxt *bp) int tcs, i; tcs = netdev_get_num_tc(dev); - if (tcs > 1) { + if (tcs) { int i, off, count; for (i = 0; i < tcs; i++) { From 24070b40926b42c35ca0649f44711cad5da0cf96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 09:14:47 -0800 Subject: [PATCH 3044/3715] tcp: clear tp->total_retrans in tcp_disconnect() [ Upstream commit c13c48c00a6bc1febc73902505bdec0967bd7095 ] total_retrans needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: SeongJae Park Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index db1eceda2359..e2ab94a563a9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2366,6 +2366,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() From 4d469d93a89ee0d9a77b5880b353f4c9dd6ef5e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:22:47 -0800 Subject: [PATCH 3045/3715] tcp: clear tp->delivered in tcp_disconnect() [ Upstream commit 2fbdd56251b5c62f96589f39eded277260de7267 ] tp->delivered needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: ddf1af6fa00e ("tcp: new delivery accounting") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2ab94a563a9..704a1c6373b7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2363,6 +2363,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); From deae87cf1936573cc1eaaee531b9e446ab0bf7ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:32:41 -0800 Subject: [PATCH 3046/3715] tcp: clear tp->data_segs{in|out} in tcp_disconnect() [ Upstream commit db7ffee6f3eb3683cdcaeddecc0a630a14546fe3 ] tp->data_segs_in and tp->data_segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: a44d6eacdaf5 ("tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In") Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 704a1c6373b7..64634b9b6c34 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2381,6 +2381,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_saved_syn_free(tp); tp->bytes_acked = 0; tp->bytes_received = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); From e1b992f58e39cd6f2712608480fdb96ff23e40ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:44:50 -0800 Subject: [PATCH 3047/3715] tcp: clear tp->segs_{in|out} in tcp_disconnect() [ Upstream commit 784f8344de750a41344f4bbbebb8507a730fc99c ] tp->segs_in and tp->segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 2efd055c53c0 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 64634b9b6c34..0c69b66d93d7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2379,6 +2379,8 @@ int tcp_disconnect(struct sock *sk, int flags) dst_release(sk->sk_rx_dst); sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_acked = 0; tp->bytes_received = 0; tp->data_segs_in = 0; From 532e2558e5747fc7f868023fa19e327cbc4608f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: [PATCH 3048/3715] rxrpc: Fix insufficient receive notification generation [ Upstream commit f71dbf2fb28489a79bde0dca1c8adfb9cdb20a6b ] In rxrpc_input_data(), rxrpc_notify_socket() is called if the base sequence number of the packet is immediately following the hard-ack point at the end of the function. However, this isn't sufficient, since the recvmsg side may have been advancing the window and then overrun the position in which we're adding - at which point rx_hard_ack >= seq0 and no notification is generated. Fix this by always generating a notification at the end of the input function. Without this, a long call may stall, possibly indefinitely. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ea506a77f3c8..18ce6f97462b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -585,8 +585,7 @@ ack: immediate_ack, true, rxrpc_propose_ack_input_data); - if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) - rxrpc_notify_socket(call); + rxrpc_notify_socket(call); _leave(" [queued]"); } From 3740a6078d9daefc8ce90f8e50a3cc2115908ba8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: [PATCH 3049/3715] rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect [ Upstream commit 5273a191dca65a675dc0bcf3909e59c6933e2831 ] When a call is disconnected, the connection pointer from the call is cleared to make sure it isn't used again and to prevent further attempted transmission for the call. Unfortunately, there might be a daemon trying to use it at the same time to transmit a packet. Fix this by keeping call->conn set, but setting a flag on the call to indicate disconnection instead. Remove also the bits in the transmission functions where the conn pointer is checked and a ref taken under spinlock as this is now redundant. Fixes: 8d94aa381dab ("rxrpc: Calls shouldn't hold socket refs") Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 4 ++-- net/rxrpc/conn_client.c | 3 +-- net/rxrpc/conn_object.c | 4 ++-- net/rxrpc/output.c | 26 +++++++++----------------- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 71c7f1dd4599..b5581b0b9480 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -451,6 +451,7 @@ enum rxrpc_call_flag { RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index ddaa471a2607..7021725fa38a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -505,7 +505,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { @@ -639,6 +639,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -660,7 +661,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); /* Clean up the Rx/Tx buffer */ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 0aa4bf09fb9c..05d17ec63635 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -762,9 +762,9 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk)); trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -863,7 +863,6 @@ out: spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index a48c817b792b..13b29e491de9 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -163,6 +163,8 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. @@ -207,9 +209,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index edddbacf33bc..9619c56ef4cd 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -96,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, */ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -106,18 +106,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -204,7 +200,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -214,20 +209,18 @@ out: */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; rxrpc_serial_t serial; int ret; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -255,7 +248,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, sizeof(pkt)); - rxrpc_put_connection(conn); return ret; } From 52f001bf9ba6d5fc628852dd6102a98f573e0b3b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Nov 2019 16:48:38 +0100 Subject: [PATCH 3050/3715] media: uvcvideo: Avoid cyclic entity chains due to malformed USB descriptors commit 68035c80e129c4cfec659aac4180354530b26527 upstream. Way back in 2017, fuzzing the 4.14-rc2 USB stack with syzkaller kicked up the following WARNING from the UVC chain scanning code: | list_add double add: new=ffff880069084010, prev=ffff880069084010, | next=ffff880067d22298. | ------------[ cut here ]------------ | WARNING: CPU: 1 PID: 1846 at lib/list_debug.c:31 __list_add_valid+0xbd/0xf0 | Modules linked in: | CPU: 1 PID: 1846 Comm: kworker/1:2 Not tainted | 4.14.0-rc2-42613-g1488251d1a98 #238 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 | Workqueue: usb_hub_wq hub_event | task: ffff88006b01ca40 task.stack: ffff880064358000 | RIP: 0010:__list_add_valid+0xbd/0xf0 lib/list_debug.c:29 | RSP: 0018:ffff88006435ddd0 EFLAGS: 00010286 | RAX: 0000000000000058 RBX: ffff880067d22298 RCX: 0000000000000000 | RDX: 0000000000000058 RSI: ffffffff85a58800 RDI: ffffed000c86bbac | RBP: ffff88006435dde8 R08: 1ffff1000c86ba52 R09: 0000000000000000 | R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069084010 | R13: ffff880067d22298 R14: ffff880069084010 R15: ffff880067d222a0 | FS: 0000000000000000(0000) GS:ffff88006c900000(0000) knlGS:0000000000000000 | CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | CR2: 0000000020004ff2 CR3: 000000006b447000 CR4: 00000000000006e0 | Call Trace: | __list_add ./include/linux/list.h:59 | list_add_tail+0x8c/0x1b0 ./include/linux/list.h:92 | uvc_scan_chain_forward.isra.8+0x373/0x416 | drivers/media/usb/uvc/uvc_driver.c:1471 | uvc_scan_chain drivers/media/usb/uvc/uvc_driver.c:1585 | uvc_scan_device drivers/media/usb/uvc/uvc_driver.c:1769 | uvc_probe+0x77f2/0x8f00 drivers/media/usb/uvc/uvc_driver.c:2104 Looking into the output from usbmon, the interesting part is the following data packet: ffff880069c63e00 30710169 C Ci:1:002:0 0 143 = 09028f00 01030080 00090403 00000e01 00000924 03000103 7c003328 010204db If we drop the lead configuration and interface descriptors, we're left with an output terminal descriptor describing a generic display: /* Output terminal descriptor */ buf[0] 09 buf[1] 24 buf[2] 03 /* UVC_VC_OUTPUT_TERMINAL */ buf[3] 00 /* ID */ buf[4] 01 /* type == 0x0301 (UVC_OTT_DISPLAY) */ buf[5] 03 buf[6] 7c buf[7] 00 /* source ID refers to self! */ buf[8] 33 The problem with this descriptor is that it is self-referential: the source ID of 0 matches itself! This causes the 'struct uvc_entity' representing the display to be added to its chain list twice during 'uvc_scan_chain()': once via 'uvc_scan_chain_entity()' when it is processed directly from the 'dev->entities' list and then again immediately afterwards when trying to follow the source ID in 'uvc_scan_chain_forward()' Add a check before adding an entity to a chain list to ensure that the entity is not already part of a chain. Link: https://lore.kernel.org/linux-media/CAAeHK+z+Si69jUR+N-SjN9q4O+o5KFiNManqEa-PjUta7EOb7A@mail.gmail.com/ Cc: Fixes: c0efd232929c ("V4L/DVB (8145a): USB Video Class driver") Reported-by: Andrey Konovalov Signed-off-by: Will Deacon Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6445b638f207..5899593dabaf 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1446,6 +1446,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, break; if (forward == prev) continue; + if (forward->chain.next || forward->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", forward->id); + return -EINVAL; + } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: @@ -1527,6 +1532,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain, return -1; } + if (term->chain.next || term->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", + term->id); + return -EINVAL; + } + if (uvc_trace_param & UVC_TRACE_PROBE) printk(KERN_CONT " %d", term->id); From bc407553b31f77fc505facfa75965513293e95c7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 21 Nov 2019 11:28:10 +0100 Subject: [PATCH 3051/3715] mfd: dln2: More sanity checking for endpoints commit 2b8bd606b1e60ca28c765f69c1eedd7d2a2e9dca upstream. It is not enough to check for the number of endpoints. The types must also be correct. Reported-and-tested-by: syzbot+48a2851be24583b864dc@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Reviewed-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/dln2.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 704e189ca162..95d0f2df0ad4 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -729,6 +729,8 @@ static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; + struct usb_endpoint_descriptor *epin; + struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; @@ -738,12 +740,19 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; + epin = &hostif->endpoint[0].desc; + epout = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_bulk_out(epout)) + return -ENODEV; + if (!usb_endpoint_is_bulk_in(epin)) + return -ENODEV; + dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; - dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress; - dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress; + dln2->ep_out = epout->bEndpointAddress; + dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); From 93b8f119275ef52b718dd34f1fff269e46e534f1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Aug 2019 10:12:08 -0400 Subject: [PATCH 3052/3715] tracing: Fix sched switch start/stop refcount racy updates commit 64ae572bc7d0060429e40e1c8d803ce5eb31a0d6 upstream. Reading the sched_cmdline_ref and sched_tgid_ref initial state within tracing_start_sched_switch without holding the sched_register_mutex is racy against concurrent updates, which can lead to tracepoint probes being registered more than once (and thus trigger warnings within tracepoint.c). [ May be the fix for this bug ] Link: https://lore.kernel.org/r/000000000000ab6f84056c786b93@google.com Link: http://lkml.kernel.org/r/20190817141208.15226-1-mathieu.desnoyers@efficios.com Cc: stable@vger.kernel.org CC: Steven Rostedt (VMware) CC: Joel Fernandes (Google) CC: Peter Zijlstra CC: Thomas Gleixner CC: Paul E. McKenney Reported-by: syzbot+774fddf07b7ab29a1e55@syzkaller.appspotmail.com Fixes: d914ba37d7145 ("tracing: Add support for recording tgid of tasks") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_sched_switch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: From 67cd8ccd494c155f1d0cbb992fe569832fe46d48 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 14 Dec 2019 19:51:14 -0600 Subject: [PATCH 3053/3715] brcmfmac: Fix memory leak in brcmf_usbdev_qinit commit 4282dc057d750c6a7dd92953564b15c26b54c22c upstream. In the implementation of brcmf_usbdev_qinit() the allocated memory for reqs is leaking if usb_alloc_urb() fails. Release reqs in the error handling path. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Signed-off-by: Navid Emamdoost Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 2eb5fe7367c6..4ad830b7b1c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -441,6 +441,7 @@ fail: usb_free_urb(req->urb); list_del(q->next); } + kfree(reqs); return NULL; } From 6d906054db5a0db80d1801f68b21dc165cc52bbe Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 23 Dec 2019 08:47:35 +0200 Subject: [PATCH 3054/3715] usb: gadget: legacy: set max_speed to super-speed commit 463f67aec2837f981b0a0ce8617721ff59685c00 upstream. These interfaces do support super-speed so let's not limit maximum speed to high-speed. Cc: Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/cdc2.c | 2 +- drivers/usb/gadget/legacy/g_ffs.c | 2 +- drivers/usb/gadget/legacy/multi.c | 2 +- drivers/usb/gadget/legacy/ncm.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c index 51c08682de84..5ee25beb52f0 100644 --- a/drivers/usb/gadget/legacy/cdc2.c +++ b/drivers/usb/gadget/legacy/cdc2.c @@ -229,7 +229,7 @@ static struct usb_composite_driver cdc_driver = { .name = "g_cdc", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = cdc_bind, .unbind = cdc_unbind, }; diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c index 6da7316f8e87..54ee4e31645b 100644 --- a/drivers/usb/gadget/legacy/g_ffs.c +++ b/drivers/usb/gadget/legacy/g_ffs.c @@ -153,7 +153,7 @@ static struct usb_composite_driver gfs_driver = { .name = DRIVER_NAME, .dev = &gfs_dev_desc, .strings = gfs_dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gfs_bind, .unbind = gfs_unbind, }; diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c index a70a406580ea..3b7fc5c7e9c3 100644 --- a/drivers/usb/gadget/legacy/multi.c +++ b/drivers/usb/gadget/legacy/multi.c @@ -486,7 +486,7 @@ static struct usb_composite_driver multi_driver = { .name = "g_multi", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = multi_bind, .unbind = multi_unbind, .needs_serial = 1, diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c index 0aba68253e3d..2fb4a847dd52 100644 --- a/drivers/usb/gadget/legacy/ncm.c +++ b/drivers/usb/gadget/legacy/ncm.c @@ -203,7 +203,7 @@ static struct usb_composite_driver ncm_driver = { .name = "g_ncm", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gncm_bind, .unbind = gncm_unbind, }; From 51f94eb089ee1aed6a846925265bc645dc30a99f Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 9 Jan 2020 13:17:21 +0000 Subject: [PATCH 3055/3715] usb: gadget: f_ncm: Use atomic_t to track in-flight request commit 5b24c28cfe136597dc3913e1c00b119307a20c7e upstream. Currently ncm->notify_req is used to flag when a request is in-flight. ncm->notify_req is set to NULL and when a request completes it is subsequently reset. This is fundamentally buggy in that the unbind logic of the NCM driver will unconditionally free ncm->notify_req leading to a NULL pointer dereference. Fixes: 40d133d7f542 ("usb: gadget: f_ncm: convert to new function interface with backward compatibility") Cc: stable Signed-off-by: Bryan O'Donoghue Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 45b334ceaf2e..5c2d39232bb0 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -58,6 +58,7 @@ struct f_ncm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; const struct ndp_parser_opts *parser_opts; @@ -553,7 +554,7 @@ static void ncm_do_notify(struct f_ncm *ncm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ncm->notify_count)) return; event = req->buf; @@ -593,7 +594,8 @@ static void ncm_do_notify(struct f_ncm *ncm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ncm->ctrl_id); - ncm->notify_req = NULL; + atomic_inc(&ncm->notify_count); + /* * In double buffering if there is a space in FIFO, * completion callback can be called right after the call, @@ -603,7 +605,7 @@ static void ncm_do_notify(struct f_ncm *ncm) status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC); spin_lock(&ncm->lock); if (status < 0) { - ncm->notify_req = req; + atomic_dec(&ncm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -638,17 +640,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req) case 0: VDBG(cdev, "Notification %02x sent\n", event->bNotificationType); + atomic_dec(&ncm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ncm->notify_count, 0); ncm->notify_state = NCM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ncm->notify_count); break; } - ncm->notify_req = req; ncm_do_notify(ncm); spin_unlock(&ncm->lock); } @@ -1632,6 +1636,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); + if (atomic_read(&ncm->notify_count)) { + usb_ep_dequeue(ncm->notify, ncm->notify_req); + atomic_set(&ncm->notify_count, 0); + } + kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); } From 7d0c0dfe866736e221ce1773a6b45b06af945e9b Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 9 Jan 2020 13:17:22 +0000 Subject: [PATCH 3056/3715] usb: gadget: f_ecm: Use atomic_t to track in-flight request commit d710562e01c48d59be3f60d58b7a85958b39aeda upstream. Currently ecm->notify_req is used to flag when a request is in-flight. ecm->notify_req is set to NULL and when a request completes it is subsequently reset. This is fundamentally buggy in that the unbind logic of the ECM driver will unconditionally free ecm->notify_req leading to a NULL pointer dereference. Fixes: da741b8c56d6 ("usb ethernet gadget: split CDC Ethernet function") Cc: stable Signed-off-by: Bryan O'Donoghue Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ecm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index dc99ed94f03d..8e3e44382785 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -56,6 +56,7 @@ struct f_ecm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; /* FIXME is_open needs some irq-ish locking @@ -384,7 +385,7 @@ static void ecm_do_notify(struct f_ecm *ecm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ecm->notify_count)) return; event = req->buf; @@ -424,10 +425,10 @@ static void ecm_do_notify(struct f_ecm *ecm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ecm->ctrl_id); - ecm->notify_req = NULL; + atomic_inc(&ecm->notify_count); status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC); if (status < 0) { - ecm->notify_req = req; + atomic_dec(&ecm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -452,17 +453,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req) switch (req->status) { case 0: /* no fault */ + atomic_dec(&ecm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ecm->notify_count, 0); ecm->notify_state = ECM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ecm->notify_count); break; } - ecm->notify_req = req; ecm_do_notify(ecm); } @@ -909,6 +912,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) usb_free_all_descriptors(f); + if (atomic_read(&ecm->notify_count)) { + usb_ep_dequeue(ecm->notify, ecm->notify_req); + atomic_set(&ecm->notify_count, 0); + } + kfree(ecm->notify_req->buf); usb_ep_free_request(ecm->notify, ecm->notify_req); } From d219e276b2bc7c5517bcd822e9c6ccb3cfbfa0c3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 1 Feb 2020 09:05:30 +0100 Subject: [PATCH 3057/3715] ALSA: dummy: Fix PCM format loop in proc output commit 2acf25f13ebe8beb40e97a1bbe76f36277c64f1e upstream. The loop termination for iterating over all formats should contain SNDRV_PCM_FORMAT_LAST, not less than it. Fixes: 9b151fec139d ("ALSA: dummy - Add debug proc file") Cc: Link: https://lore.kernel.org/r/20200201080530.22390-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index c0939a0164a6..aeb65d7d4cb3 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -933,7 +933,7 @@ static void print_formats(struct snd_dummy *dummy, { int i; - for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { if (dummy->pcm_hw.formats & (1ULL << i)) snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); } From bfc1bb3b70ddac8319b44a9fa69bd9656a6a2eeb Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 30 Jan 2020 22:12:50 -0800 Subject: [PATCH 3058/3715] media/v4l2-core: set pages dirty upon releasing DMA buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3c7470b6f68434acae459482ab920d1e3fabd1c7 upstream. After DMA is complete, and the device and CPU caches are synchronized, it's still required to mark the CPU pages as dirty, if the data was coming from the device. However, this driver was just issuing a bare put_page() call, without any set_page_dirty*() call. Fix the problem, by calling set_page_dirty_lock() if the CPU pages were potentially receiving data from the device. Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Christoph Hellwig Acked-by: Hans Verkuil Cc: Mauro Carvalho Chehab Cc: Cc: Alex Williamson Cc: Aneesh Kumar K.V Cc: Björn Töpel Cc: Daniel Vetter Cc: Dan Williams Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Jerome Glisse Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Leon Romanovsky Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf-dma-sg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f412429cf5ba..c55e607f5631 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -352,8 +352,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) BUG_ON(dma->sglen); if (dma->pages) { - for (i = 0; i < dma->nr_pages; i++) + for (i = 0; i < dma->nr_pages; i++) { + if (dma->direction == DMA_FROM_DEVICE) + set_page_dirty_lock(dma->pages[i]); put_page(dma->pages[i]); + } kfree(dma->pages); dma->pages = NULL; } From 48d8bba5e94d87304391438f548cedf30aefa609 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Tue, 17 Dec 2019 21:00:22 +0100 Subject: [PATCH 3059/3715] media: v4l2-rect.h: fix v4l2_rect_map_inside() top/left adjustments commit f51e50db4c20d46930b33be3f208851265694f3e upstream. boundary->width and boundary->height are sizes relative to boundary->left and boundary->top coordinates, but they were not being taken into consideration to adjust r->left and r->top, leading to the following error: Consider the follow as initial values for boundary and r: struct v4l2_rect boundary = { .left = 100, .top = 100, .width = 800, .height = 600, } struct v4l2_rect r = { .left = 0, .top = 0, .width = 1920, .height = 960, } calling v4l2_rect_map_inside(&r, &boundary) was modifying r to: r = { .left = 0, .top = 0, .width = 800, .height = 600, } Which is wrongly outside the boundary rectangle, because: v4l2_rect_set_max_size(r, boundary); // r->width = 800, r->height = 600 ... if (r->left + r->width > boundary->width) // true r->left = boundary->width - r->width; // r->left = 800 - 800 if (r->top + r->height > boundary->height) // true r->top = boundary->height - r->height; // r->height = 600 - 600 Fix this by considering top/left coordinates from boundary. Fixes: ac49de8c49d7 ("[media] v4l2-rect.h: new header with struct v4l2_rect helper functions") Signed-off-by: Helen Koike Cc: # for v4.7 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- include/media/v4l2-rect.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h index d2125f0cc7cd..1584c760b993 100644 --- a/include/media/v4l2-rect.h +++ b/include/media/v4l2-rect.h @@ -75,10 +75,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r, r->left = boundary->left; if (r->top < boundary->top) r->top = boundary->top; - if (r->left + r->width > boundary->width) - r->left = boundary->width - r->width; - if (r->top + r->height > boundary->height) - r->top = boundary->height - r->height; + if (r->left + r->width > boundary->left + boundary->width) + r->left = boundary->left + boundary->width - r->width; + if (r->top + r->height > boundary->top + boundary->height) + r->top = boundary->top + boundary->height - r->height; } /** From 06495469423cd58f1801c80aeb30a17dcfb16e67 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 30 Jan 2020 22:13:51 -0800 Subject: [PATCH 3060/3715] lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more() commit 3e21d9a501bf99aee2e5835d7f34d8c823f115b5 upstream. In case memory resources for _ptr2_ were allocated, release them before return. Notice that in case _ptr1_ happens to be NULL, krealloc() behaves exactly like kmalloc(). Addresses-Coverity-ID: 1490594 ("Resource leak") Link: http://lkml.kernel.org/r/20200123160115.GA4202@embeddedor Fixes: 3f15801cdc23 ("lib: add kasan test module") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test_kasan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index d6e46dd1350b..1399d1000130 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -126,6 +126,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void) if (!ptr1 || !ptr2) { pr_err("Allocation failed\n"); kfree(ptr1); + kfree(ptr2); return; } From 89f75bff6862cbf627b2b2dfc01da1a70df8ca9d Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Mon, 20 Jan 2020 12:35:47 +0800 Subject: [PATCH 3061/3715] irqdomain: Fix a memory leak in irq_domain_push_irq() commit 0f394daef89b38d58c91118a2b08b8a1b316703b upstream. Fix a memory leak reported by kmemleak: unreferenced object 0xffff000bc6f50e80 (size 128): comm "kworker/23:2", pid 201, jiffies 4294894947 (age 942.132s) hex dump (first 32 bytes): 00 00 00 00 41 00 00 00 86 c0 03 00 00 00 00 00 ....A........... 00 a0 b2 c6 0b 00 ff ff 40 51 fd 10 00 80 ff ff ........@Q...... backtrace: [<00000000e62d2240>] kmem_cache_alloc_trace+0x1a4/0x320 [<00000000279143c9>] irq_domain_push_irq+0x7c/0x188 [<00000000d9f4c154>] thunderx_gpio_probe+0x3ac/0x438 [<00000000fd09ec22>] pci_device_probe+0xe4/0x198 [<00000000d43eca75>] really_probe+0xdc/0x320 [<00000000d3ebab09>] driver_probe_device+0x5c/0xf0 [<000000005b3ecaa0>] __device_attach_driver+0x88/0xc0 [<000000004e5915f5>] bus_for_each_drv+0x7c/0xc8 [<0000000079d4db41>] __device_attach+0xe4/0x140 [<00000000883bbda9>] device_initial_probe+0x18/0x20 [<000000003be59ef6>] bus_probe_device+0x98/0xa0 [<0000000039b03d3f>] deferred_probe_work_func+0x74/0xa8 [<00000000870934ce>] process_one_work+0x1c8/0x470 [<00000000e3cce570>] worker_thread+0x1f8/0x428 [<000000005d64975e>] kthread+0xfc/0x128 [<00000000f0eaa764>] ret_from_fork+0x10/0x18 Fixes: 495c38d3001f ("irqdomain: Add irq_domain_{push,pop}_irq() functions") Signed-off-by: Kevin Hao Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200120043547.22271-1-haokexin@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/irq/irqdomain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 0f0e7975a309..b269ae16b10c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1538,6 +1538,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (rv) { /* Restore the original irq_data. */ *root_irq_data = *child_irq_data; + kfree(child_irq_data); goto error; } From c314b8240df488cfad979ddc86394d1fcb3ba64c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 22 Jan 2020 19:28:04 +0300 Subject: [PATCH 3062/3715] platform/x86: intel_scu_ipc: Fix interrupt support commit e48b72a568bbd641c91dad354138d3c17d03ee6f upstream. Currently the driver has disabled interrupt support for Tangier but actually interrupt works just fine if the command is not written twice in a row. Also we need to ack the interrupt in the handler. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_scu_ipc.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 2c85f75e32b0..2434ce8bead6 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -69,26 +69,22 @@ struct intel_scu_ipc_pdata_t { u32 i2c_base; u32 i2c_len; - u8 irq_mode; }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 0, }; /* Penwell and Cloverview */ static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 1, }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { .i2c_base = 0xff00d000, .i2c_len = 0x10, - .irq_mode = 0, }; struct intel_scu_ipc_dev { @@ -101,6 +97,9 @@ struct intel_scu_ipc_dev { static struct intel_scu_ipc_dev ipcdev; /* Only one for now */ +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ BIT(2) + /* * IPC Read Buffer (Read Only): * 16 byte buffer for receiving data from SCU, if IPC command @@ -122,11 +121,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ */ static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd) { - if (scu->irq_mode) { - reinit_completion(&scu->cmd_complete); - writel(cmd | IPC_IOC, scu->ipc_base); - } - writel(cmd, scu->ipc_base); + reinit_completion(&scu->cmd_complete); + writel(cmd | IPC_IOC, scu->ipc_base); } /* @@ -612,9 +608,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl); static irqreturn_t ioc(int irq, void *dev_id) { struct intel_scu_ipc_dev *scu = dev_id; + int status = ipc_read_status(scu); - if (scu->irq_mode) - complete(&scu->cmd_complete); + writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS); + complete(&scu->cmd_complete); return IRQ_HANDLED; } @@ -640,8 +637,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!pdata) return -ENODEV; - scu->irq_mode = pdata->irq_mode; - err = pcim_enable_device(pdev); if (err) return err; From 4ab16247f636fd37ed2b7a64eb7572d793f8bb02 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 12 Dec 2019 20:50:55 +0100 Subject: [PATCH 3063/3715] KVM: arm64: Only sign-extend MMIO up to register width commit b6ae256afd32f96bec0117175b329d0dd617655e upstream. On AArch64 you can do a sign-extended load to either a 32-bit or 64-bit register, and we should only sign extend the register up to the width of the register as specified in the operation (by using the 32-bit Wn or 64-bit Xn register specifier). As it turns out, the architecture provides this decoding information in the SF ("Sixty-Four" -- how cute...) bit. Let's take advantage of this with the usual 32-bit/64-bit header file dance and do the right thing on AArch64 hosts. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191212195055.5541-1-christoffer.dall@arm.com Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/include/asm/kvm_mmio.h | 2 ++ arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/include/asm/kvm_mmio.h | 6 ++---- virt/kvm/arm/mmio.c | 6 ++++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 98089ffd91bb..078dbd25cca4 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -144,6 +144,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index f3a7de71f515..848339d76f9a 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -26,6 +26,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2b55aee7c051..92f70a34c5e6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -188,6 +188,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 75ea42079757..0240290cf764 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -21,13 +21,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 3caee91bca08..878e0edb2e1b 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -117,6 +117,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) data = (data ^ mask) - mask; } + if (!vcpu->arch.mmio_decode.sixty_four) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); @@ -137,6 +140,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) unsigned long rt; int access_size; bool sign_extend; + bool sixty_four; if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ @@ -150,11 +154,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); + sixty_four = kvm_vcpu_dabt_issf(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; + vcpu->arch.mmio_decode.sixty_four = sixty_four; return 0; } From dd2624ae656864cb10673de3cd9f85d964965ec3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 17 Jan 2020 17:02:07 +0300 Subject: [PATCH 3064/3715] MIPS: fix indentation of the 'RELOCS' message commit a53998802e178451701d59d38e36f551422977ba upstream. quiet_cmd_relocs lacks a whitespace which results in: LD vmlinux SORTEX vmlinux SYSMAP System.map RELOCS vmlinux Building modules, stage 2. MODPOST 64 modules After this patch: LD vmlinux SORTEX vmlinux SYSMAP System.map RELOCS vmlinux Building modules, stage 2. MODPOST 64 modules Typo is present in kernel tree since the introduction of relocatable kernel support in commit e818fac595ab ("MIPS: Generate relocation table when CONFIG_RELOCATABLE"), but the relocation scripts were moved to Makefile.postlink later with commit 44079d3509ae ("MIPS: Use Makefile.postlink to insert relocations into vmlinux"). Fixes: 44079d3509ae ("MIPS: Use Makefile.postlink to insert relocations into vmlinux") Cc: # v4.11+ Signed-off-by: Alexander Lobakin [paulburton@kernel.org: Fixup commit references in commit message.] Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: Masahiro Yamada Cc: Rob Herring Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/Makefile.postlink | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb20..13e0beb9eee3 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -12,7 +12,7 @@ __archpost: include scripts/Kbuild.include CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done From 6d4f4594e80f8f768d1448fd55f4ccbbb87e806a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 16 Jan 2020 19:59:04 +0100 Subject: [PATCH 3065/3715] s390/mm: fix dynamic pagetable upgrade for hugetlbfs commit 5f490a520bcb393389a4d44bec90afcb332eb112 upstream. Commit ee71d16d22bb ("s390/mm: make TASK_SIZE independent from the number of page table levels") changed the logic of TASK_SIZE and also removed the arch_mmap_check() implementation for s390. This combination has a subtle effect on how get_unmapped_area() for hugetlbfs pages works. It is now possible that a user process establishes a hugetlbfs mapping at an address above 4 TB, without triggering a dynamic pagetable upgrade from 3 to 4 levels. This is because hugetlbfs mappings will not use mm->get_unmapped_area, but rather file->f_op->get_unmapped_area, which currently is the generic implementation of hugetlb_get_unmapped_area() that does not know about s390 dynamic pagetable upgrades, but with the new definition of TASK_SIZE, it will now allow mappings above 4 TB. Subsequent access to such a mapped address above 4 TB will result in a page fault loop, because the CPU cannot translate such a large address with 3 pagetable levels. The fault handler will try to map in a hugepage at the address, but due to the folded pagetable logic it will end up with creating entries in the 3 level pagetable, possibly overwriting existing mappings, and then it all repeats when the access is retried. Apart from the page fault loop, this can have various nasty effects, e.g. kernel panic from one of the BUG_ON() checks in memory management code, or even data loss if an existing mapping gets overwritten. Fix this by implementing HAVE_ARCH_HUGETLB_UNMAPPED_AREA support for s390, providing an s390 version for hugetlb_get_unmapped_area() with pagetable upgrade support similar to arch_get_unmapped_area(), which will then be used instead of the generic version. Fixes: ee71d16d22bb ("s390/mm: make TASK_SIZE independent from the number of page table levels") Cc: # 4.12+ Signed-off-by: Gerald Schaefer Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/page.h | 2 + arch/s390/mm/hugetlbpage.c | 100 ++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 41e3908b397f..779c589b7089 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,6 +33,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include #ifndef __ASSEMBLY__ diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470..e19ea9ebe960 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer */ @@ -11,6 +11,9 @@ #include #include +#include +#include +#include /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -243,3 +246,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} From 5823cb6e4cb50e332e0947291ab79ea1b869ed69 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 6 Jan 2020 13:50:02 -0600 Subject: [PATCH 3066/3715] powerpc/xmon: don't access ASDR in VMs commit c2a20711fc181e7f22ee5c16c28cb9578af84729 upstream. ASDR is HV-privileged and must only be accessed in HV-mode. Fixes a Program Check (0x700) when xmon in a VM dumps SPRs. Fixes: d1e1b351f50f ("powerpc/xmon: Add ISA v3.0 SPRs to SPR dump") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Sukadev Bhattiprolu Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200107021633.GB29843@us.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/xmon/xmon.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 51a53fd51722..0885993b2fb4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1830,15 +1830,14 @@ static void dump_300_sprs(void) printf("pidr = %.16lx tidr = %.16lx\n", mfspr(SPRN_PID), mfspr(SPRN_TIDR)); - printf("asdr = %.16lx psscr = %.16lx\n", - mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) - : mfspr(SPRN_PSSCR_PR)); + printf("psscr = %.16lx\n", + hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); if (!hv) return; - printf("ptcr = %.16lx\n", - mfspr(SPRN_PTCR)); + printf("ptcr = %.16lx asdr = %.16lx\n", + mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); #endif } From 11f54321c287f761fcf7c7e184d2569b4ca07b94 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Fri, 10 Jan 2020 12:54:02 +0800 Subject: [PATCH 3067/3715] powerpc/pseries: Advance pfn if section is not present in lmb_is_removable() commit fbee6ba2dca30d302efe6bddb3a886f5e964a257 upstream. In lmb_is_removable(), if a section is not present, it should continue to test the rest of the sections in the block. But the current code fails to do so. Fixes: 51925fb3c5c9 ("powerpc/pseries: Implement memory hotplug remove in the kernel") Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Pingfan Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1578632042-12415-1-git-send-email-kernelfans@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index fdfce7a46d73..a0847be0b035 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -452,8 +452,10 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; From 3760557c3a9bc43bbd782052c81d6aebe5d7317f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 4 Dec 2019 16:27:49 +0100 Subject: [PATCH 3068/3715] mmc: spi: Toggle SPI polarity, do not hardcode it commit af3ed119329cf9690598c5a562d95dfd128e91d6 upstream. The code in mmc_spi_initsequence() tries to send a burst with high chipselect and for this reason hardcodes the device into SPI_CS_HIGH. This is not good because the SPI_CS_HIGH flag indicates logical "asserted" CS not always the physical level. In some cases the signal is inverted in the GPIO library and in that case SPI_CS_HIGH is already set, and enforcing SPI_CS_HIGH again will actually drive it low. Instead of hard-coding this, toggle the polarity so if the default is LOW it goes high to assert chipselect but if it is already high then toggle it low instead. Cc: Phil Elwell Reported-by: Mark Brown Signed-off-by: Linus Walleij Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20191204152749.12652-1-linus.walleij@linaro.org Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmc_spi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index ea254d00541f..24795454d106 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1154,17 +1154,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host) * SPI protocol. Another is that when chipselect is released while * the card returns BUSY status, the clock must issue several cycles * with chipselect high before the card will stop driving its output. + * + * SPI_CS_HIGH means "asserted" here. In some cases like when using + * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically + * inverted by gpiolib, so if we want to ascertain to drive it high + * we should toggle the default with an XOR as we do here. */ - host->spi->mode |= SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Just warn; most cards work without it. */ dev_warn(&host->spi->dev, "can't change chip-select polarity\n"); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; } else { mmc_spi_readbytes(host, 18); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Wot, we can't get the same setup we had before? */ dev_err(&host->spi->dev, From bfd90b0b3732843cbd9fc92f58cbe61333754da6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 17 Dec 2019 20:08:11 +0100 Subject: [PATCH 3069/3715] ACPI: video: Do not export a non working backlight interface on MSI MS-7721 boards commit d21a91629f4b8e794fc4c0e0c17c85cedf1d806c upstream. Despite our heuristics to not wrongly export a non working ACPI backlight interface on desktop machines, we still end up exporting one on desktops using a motherboard from the MSI MS-7721 series. I've looked at improving the heuristics, but in this case a quirk seems to be the only way to solve this. While at it also add a comment to separate the video_detect_force_none entries in the video_detect_dmi_table from other type of entries, as we already do for the other entry types. Cc: All applicable BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1783786 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 43587ac680e4..214c4e2e8ade 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -328,6 +328,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + + /* + * Desktops which falsely report a backlight and which our heuristics + * for this do not catch. + */ { .callback = video_detect_force_none, .ident = "Dell OptiPlex 9020M", @@ -336,6 +341,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"), }, }, + { + .callback = video_detect_force_none, + .ident = "MSI MS-7721", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"), + }, + }, { }, }; From 7e372f648d9efd8002e70845eedd752e2197c6c5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 9 Jan 2020 07:59:07 -0800 Subject: [PATCH 3070/3715] alarmtimer: Unregister wakeup source when module get fails commit 6b6d188aae79a630957aefd88ff5c42af6553ee3 upstream. The alarmtimer_rtc_add_device() function creates a wakeup source and then tries to grab a module reference. If that fails the function returns early with an error code, but fails to remove the wakeup source. Cleanup this exit path so there is no dangling wakeup source, which is named 'alarmtime' left allocated which will conflict with another RTC device that may be registered later. Fixes: 51218298a25e ("alarmtimer: Ensure RTC module is not unloaded") Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200109155910.907-2-swboyd@chromium.org Signed-off-by: Greg Kroah-Hartman --- kernel/time/alarmtimer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f4255a65c44b..9eece67f29f3 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -91,6 +91,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + int ret = 0; if (rtcdev) return -EBUSY; @@ -105,8 +106,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { if (!try_module_get(rtc->owner)) { - spin_unlock_irqrestore(&rtcdev_lock, flags); - return -1; + ret = -1; + goto unlock; } rtcdev = rtc; @@ -115,11 +116,12 @@ static int alarmtimer_rtc_add_device(struct device *dev, ws = __ws; __ws = NULL; } +unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); wakeup_source_unregister(__ws); - return 0; + return ret; } static inline void alarmtimer_rtc_timer_init(void) From c68fb6c809824d4f9b25357b1969d4631c5a8e01 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 9 Feb 2019 16:54:20 +0800 Subject: [PATCH 3071/3715] ubifs: Reject unsupported ioctl flags explicitly commit 2fe8b2d5578d7d142982e3bf62e4c0caf8b8fe02 upstream. Reject unsupported ioctl flags explicitly, so the following command on a regular ubifs file will fail: chattr +d ubifs_file And xfstests generic/424 will pass. Signed-off-by: Hou Tao Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/ioctl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index fdc311246807..55c7e3eb60e8 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -28,6 +28,11 @@ #include #include "ubifs.h" +/* Need to be kept consistent with checked flags in ioctl2ubifs() */ +#define UBIFS_SUPPORTED_IOCTL_FLAGS \ + (FS_COMPR_FL | FS_SYNC_FL | FS_APPEND_FL | \ + FS_IMMUTABLE_FL | FS_DIRSYNC_FL) + /** * ubifs_set_inode_flags - set VFS inode flags. * @inode: VFS inode to set flags for @@ -166,6 +171,9 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (get_user(flags, (int __user *) arg)) return -EFAULT; + if (flags & ~UBIFS_SUPPORTED_IOCTL_FLAGS) + return -EOPNOTSUPP; + if (!S_ISDIR(inode->i_mode)) flags &= ~FS_DIRSYNC_FL; From d95ec03f72325b9830b2a204a93d4233df265139 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 14:23:24 -0800 Subject: [PATCH 3072/3715] ubifs: Fix FS_IOC_SETFLAGS unexpectedly clearing encrypt flag commit 2b57067a7778484c10892fa191997bfda29fea13 upstream. UBIFS's implementation of FS_IOC_SETFLAGS fails to preserve existing inode flags that aren't settable by FS_IOC_SETFLAGS, namely the encrypt flag. This causes the encrypt flag to be unexpectedly cleared. Fix it by preserving existing unsettable flags, like ext4 and f2fs do. Test case with kvm-xfstests shell: FSTYP=ubifs KEYCTL_PROG=keyctl . fs/ubifs/config . ~/xfstests/common/encrypt dev=$(__blkdev_to_ubi_volume /dev/vdc) ubiupdatevol -t $dev mount $dev /mnt -t ubifs k=$(_generate_session_encryption_key) mkdir /mnt/edir xfs_io -c "set_encpolicy $k" /mnt/edir echo contents > /mnt/edir/file chattr +i /mnt/edir/file chattr -i /mnt/edir/file With the bug, the following errors occur on the last command: [ 18.081559] fscrypt (ubifs, inode 67): Inconsistent encryption context (parent directory: 65) chattr: Operation not permitted while reading flags on /mnt/edir/file Fixes: d475a507457b ("ubifs: Add skeleton for fscrypto") Cc: # v4.10+ Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 55c7e3eb60e8..1f6d16105990 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -129,7 +129,8 @@ static int setflags(struct inode *inode, int flags) } } - ui->flags = ioctl2ubifs(flags); + ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS); + ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); inode->i_ctime = current_time(inode); release = ui->dirty; From 3de94c38e987354f52b95c2be940f8685c00f807 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sat, 11 Jan 2020 17:50:36 +0800 Subject: [PATCH 3073/3715] ubifs: Fix deadlock in concurrent bulk-read and writepage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f5de5b83303e61b1f3fb09bd77ce3ac2d7a475f2 upstream. In ubifs, concurrent execution of writepage and bulk read on the same file may cause ABBA deadlock, for example (Reproduce method see Link): Process A(Bulk-read starts from page4) Process B(write page4 back) vfs_read wb_workfn or fsync ... ... generic_file_buffered_read write_cache_pages ubifs_readpage LOCK(page4) ubifs_bulk_read ubifs_writepage LOCK(ui->ui_mutex) ubifs_write_inode ubifs_do_bulk_read LOCK(ui->ui_mutex) find_or_create_page(alloc page4) ↑ LOCK(page4) <-- ABBA deadlock occurs! In order to ensure the serialization execution of bulk read, we can't remove the big lock 'ui->ui_mutex' in ubifs_bulk_read(). Instead, we allow ubifs_do_bulk_read() to lock page failed by replacing find_or_create_page(FGP_LOCK) with pagecache_get_page(FGP_LOCK | FGP_NOWAIT). Signed-off-by: Zhihao Cheng Suggested-by: zhangyi (F) Cc: Fixes: 4793e7c5e1c ("UBIFS: add bulk-read facility") Link: https://bugzilla.kernel.org/show_bug.cgi?id=206153 Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a02aa59d1e24..46e5a58c4b05 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -797,7 +797,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, if (page_offset > end_index) break; - page = find_or_create_page(mapping, page_offset, ra_gfp_mask); + page = pagecache_get_page(mapping, page_offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, + ra_gfp_mask); if (!page) break; if (!PageUptodate(page)) From 969917d2e893cb954ec10524da0fa37bd9df624b Mon Sep 17 00:00:00 2001 From: Yurii Monakov Date: Tue, 17 Dec 2019 14:38:36 +0300 Subject: [PATCH 3074/3715] PCI: keystone: Fix link training retries initiation [ Upstream commit 6df19872d881641e6394f93ef2938cffcbdae5bb ] ks_pcie_stop_link() function does not clear LTSSM_EN_VAL bit so link training was not triggered more than once after startup. In configurations where link can be unstable during early boot, for example, under low temperature, it will never be established. Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver") Signed-off-by: Yurii Monakov Signed-off-by: Lorenzo Pieralisi Acked-by: Andrew Murray Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/pci/dwc/pci-keystone-dw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pci-keystone-dw.c b/drivers/pci/dwc/pci-keystone-dw.c index 2fb20b887d2a..4cf2662930d8 100644 --- a/drivers/pci/dwc/pci-keystone-dw.c +++ b/drivers/pci/dwc/pci-keystone-dw.c @@ -510,7 +510,7 @@ void ks_dw_pcie_initiate_link_train(struct keystone_pcie *ks_pcie) /* Disable Link training */ val = ks_dw_app_readl(ks_pcie, CMD_STATUS); val &= ~LTSSM_EN_VAL; - ks_dw_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); + ks_dw_app_writel(ks_pcie, CMD_STATUS, val); /* Initiate Link Training */ val = ks_dw_app_readl(ks_pcie, CMD_STATUS); From 87c639a95a97355d933f56c7f4d37965e220ff54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 2 Jan 2020 11:42:16 +0100 Subject: [PATCH 3075/3715] mmc: sdhci-of-at91: fix memleak on clk_get failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a04184ce777b46e92c2b3c93c6dcb2754cb005e1 ] sdhci_alloc_host() does its work not using managed infrastructure, so needs explicit free on error path. Add it where needed. Cc: Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC") Signed-off-by: Michał Mirosław Acked-by: Ludovic Desroches Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/b2a44d5be2e06ff075f32477e466598bb0f07b36.1577961679.git.mirq-linux@rere.qmqm.pl Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-of-at91.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 564e7be21e06..1dadd460cc8f 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -331,19 +331,22 @@ static int sdhci_at91_probe(struct platform_device *pdev) priv->mainck = devm_clk_get(&pdev->dev, "baseclk"); if (IS_ERR(priv->mainck)) { dev_err(&pdev->dev, "failed to get baseclk\n"); - return PTR_ERR(priv->mainck); + ret = PTR_ERR(priv->mainck); + goto sdhci_pltfm_free; } priv->hclock = devm_clk_get(&pdev->dev, "hclock"); if (IS_ERR(priv->hclock)) { dev_err(&pdev->dev, "failed to get hclock\n"); - return PTR_ERR(priv->hclock); + ret = PTR_ERR(priv->hclock); + goto sdhci_pltfm_free; } priv->gck = devm_clk_get(&pdev->dev, "multclk"); if (IS_ERR(priv->gck)) { dev_err(&pdev->dev, "failed to get multclk\n"); - return PTR_ERR(priv->gck); + ret = PTR_ERR(priv->gck); + goto sdhci_pltfm_free; } ret = sdhci_at91_set_clks_presets(&pdev->dev); From 7c5cf5c43fdb32354b6653dddd9f88a0393d81ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 Jan 2020 14:31:59 -0800 Subject: [PATCH 3076/3715] ubifs: don't trigger assertion on invalid no-key filename [ Upstream commit f0d07a98a070bb5e443df19c3aa55693cbca9341 ] If userspace provides an invalid fscrypt no-key filename which encodes a hash value with any of the UBIFS node type bits set (i.e. the high 3 bits), gracefully report ENOENT rather than triggering ubifs_assert(). Test case with kvm-xfstests shell: . fs/ubifs/config . ~/xfstests/common/encrypt dev=$(__blkdev_to_ubi_volume /dev/vdc) ubiupdatevol $dev -t mount $dev /mnt -t ubifs mkdir /mnt/edir xfs_io -c set_encpolicy /mnt/edir rm /mnt/edir/_,,,,,DAAAAAAAAAAAAAAAAAAAAAAAAAA With the bug, the following assertion fails on the 'rm' command: [ 19.066048] UBIFS error (ubi0:0 pid 379): ubifs_assert_failed: UBIFS assert failed: !(hash & ~UBIFS_S_KEY_HASH_MASK), in fs/ubifs/key.h:170 Fixes: f4f61d2cc6d8 ("ubifs: Implement encrypted filenames") Cc: # v4.10+ Link: https://lore.kernel.org/r/20200120223201.241390-5-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- fs/ubifs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 4e6e32c0c08a..358abc26dbc0 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -253,6 +253,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, if (nm.hash) { ubifs_assert(fname_len(&nm) == 0); ubifs_assert(fname_name(&nm) == NULL); + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { From 0fb0ea3cb8cc45298ed0be3f3732b7ea6143aecc Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Sat, 25 Jan 2020 16:50:47 -0500 Subject: [PATCH 3077/3715] hv_balloon: Balloon up according to request page number commit d33c240d47dab4fd15123d9e73fc8810cbc6ed6a upstream. Current code has assumption that balloon request memory size aligns with 2MB. But actually Hyper-V doesn't guarantee such alignment. When balloon driver receives non-aligned balloon request, it produces warning and balloon up more memory than requested in order to keep 2MB alignment. Remove the warning and balloon up memory according to actual requested memory size. Fixes: f6712238471a ("hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block") Cc: stable@vger.kernel.org Reviewed-by: Vitaly Kuznetsov Signed-off-by: Tianyu Lan Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 0824405f93fb..2d93c8f454bc 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1170,10 +1170,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int i = 0; struct page *pg; - if (num_pages < alloc_unit) - return 0; - - for (i = 0; (i * alloc_unit) < num_pages; i++) { + for (i = 0; i < num_pages / alloc_unit; i++) { if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > PAGE_SIZE) return i * alloc_unit; @@ -1207,7 +1204,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, } - return num_pages; + return i * alloc_unit; } static void balloon_up(struct work_struct *dummy) @@ -1222,9 +1219,6 @@ static void balloon_up(struct work_struct *dummy) long avail_pages; unsigned long floor; - /* The host balloons pages in 2M granularity. */ - WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); - /* * We will attempt 2M allocations. However, if we fail to * allocate 2M chunks, we will go back to 4k allocations. @@ -1234,14 +1228,13 @@ static void balloon_up(struct work_struct *dummy) avail_pages = si_mem_available(); floor = compute_balloon_floor(); - /* Refuse to balloon below the floor, keep the 2M granularity. */ + /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { pr_warn("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); num_pages = avail_pages > floor ? (avail_pages - floor) : 0; - num_pages -= num_pages % PAGES_IN_2M; } while (!done) { From 4fa9dd9ce20ffd2f327cddcacd19c3e44603df54 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 6 Dec 2019 13:55:17 +0800 Subject: [PATCH 3078/3715] crypto: api - Check spawn->alg under lock in crypto_drop_spawn commit 7db3b61b6bba4310f454588c2ca6faf2958ad79f upstream. We need to check whether spawn->alg is NULL under lock as otherwise the algorithm could be removed from under us after we have checked it and found it to be non-NULL. This could cause us to remove the spawn from a non-existent list. Fixes: 7ede5a5ba55a ("crypto: api - Fix crypto_drop_spawn crash...") Cc: Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 50eb828db767..0e86cab3bb67 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -652,11 +652,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn); void crypto_drop_spawn(struct crypto_spawn *spawn) { - if (!spawn->alg) - return; - down_write(&crypto_alg_sem); - list_del(&spawn->list); + if (spawn->alg) + list_del(&spawn->list); up_write(&crypto_alg_sem); } EXPORT_SYMBOL_GPL(crypto_drop_spawn); From 724106f30c039f124c6de5ac9d28ddd305c25b11 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 17 Dec 2019 14:06:16 -0800 Subject: [PATCH 3079/3715] scsi: qla2xxx: Fix mtcp dump collection failure commit 641e0efddcbde52461e017136acd3ce7f2ef0c14 upstream. MTCP dump failed due to MB Reg 10 was picking garbage data from stack memory. Fixes: 81178772b636a ("[SCSI] qla2xxx: Implemetation of mctp.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191217220617.28084-14-hmadhani@marvell.com Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 459481ce5872..5e8ae510aef8 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -5853,9 +5853,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, mcp->mb[7] = LSW(MSD(req_dma)); mcp->mb[8] = MSW(addr); /* Setting RAM ID to valid */ - mcp->mb[10] |= BIT_7; /* For MCTP RAM ID is 0x40 */ - mcp->mb[10] |= 0x40; + mcp->mb[10] = BIT_7 | 0x40; mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1| MBX_0; From 3bb37758725b2206a19ea7a28ceea908ba1513e8 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 19 Sep 2019 11:11:37 -0400 Subject: [PATCH 3080/3715] power: supply: ltc2941-battery-gauge: fix use-after-free commit a60ec78d306c6548d4adbc7918b587a723c555cc upstream. This driver's remove path calls cancel_delayed_work(). However, that function does not wait until the work function finishes. This could mean that the work function is still running after the driver's remove function has finished, which would result in a use-after-free. Fix by calling cancel_delayed_work_sync(), which ensures that that the work is properly cancelled, no longer running, and unable to re-schedule itself. This issue was detected with the help of Coccinelle. Cc: stable Signed-off-by: Sven Van Asbroeck Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/ltc2941-battery-gauge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index 9621d6dd88c6..50bdf2d5248b 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -406,7 +406,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client) { struct ltc294x_info *info = i2c_get_clientdata(client); - cancel_delayed_work(&info->work); + cancel_delayed_work_sync(&info->work); power_supply_unregister(info->supply); return 0; } From 182f211e1160ab75ee889a19e72b3acb8a58ab81 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 25 Nov 2019 11:20:36 +0800 Subject: [PATCH 3081/3715] f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project() commit 909110c060f22e65756659ec6fa957ae75777e00 upstream. Setting softlimit larger than hardlimit seems meaningless for disk quota but currently it is allowed. In this case, there may be a bit of comfusion for users when they run df comamnd to directory which has project quota. For example, we set 20M softlimit and 10M hardlimit of block usage limit for project quota of test_dir(project id 123). [root@hades f2fs]# repquota -P -a --- fs/f2fs/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e4aabfc21bd4..8635df6cba55 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -912,9 +912,13 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dq_data_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = 0; + if (dquot->dq_dqb.dqb_bsoftlimit) + limit = dquot->dq_dqb.dqb_bsoftlimit; + if (dquot->dq_dqb.dqb_bhardlimit && + (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) + limit = dquot->dq_dqb.dqb_bhardlimit; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -923,9 +927,13 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = 0; + if (dquot->dq_dqb.dqb_isoftlimit) + limit = dquot->dq_dqb.dqb_isoftlimit; + if (dquot->dq_dqb.dqb_ihardlimit && + (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) + limit = dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = From 646b25533fed46c0e9cc3fec4cb3c77302d74825 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 4 Jan 2020 22:20:03 +0800 Subject: [PATCH 3082/3715] f2fs: fix miscounted block limit in f2fs_statfs_project() commit acdf2172172a511f97fa21ed0ee7609a6d3b3a07 upstream. statfs calculates Total/Used/Avail disk space in block unit, so we should translate soft/hard prjquota limit to block unit as well. Below testing result shows the block/inode numbers of Total/Used/Avail from df command are all correct afer applying this patch. [root@localhost quota-tools]\# ./repquota -P /dev/sdb1 --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8635df6cba55..26849250b36b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -918,6 +918,8 @@ static int f2fs_statfs_project(struct super_block *sb, if (dquot->dq_dqb.dqb_bhardlimit && (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) limit = dquot->dq_dqb.dqb_bhardlimit; + if (limit) + limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; From 084f3fc1fb2b4d09be04310ddbb5a428b0a90397 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 4 Jan 2020 22:20:04 +0800 Subject: [PATCH 3083/3715] f2fs: code cleanup for f2fs_statfs_project() commit bf2cbd3c57159c2b639ee8797b52ab5af180bf83 upstream. Calling min_not_zero() to simplify complicated prjquota limit comparison in f2fs_statfs_project(). Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 26849250b36b..2d021a33914a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -912,12 +912,8 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dq_data_lock); - limit = 0; - if (dquot->dq_dqb.dqb_bsoftlimit) - limit = dquot->dq_dqb.dqb_bsoftlimit; - if (dquot->dq_dqb.dqb_bhardlimit && - (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) - limit = dquot->dq_dqb.dqb_bhardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); if (limit) limit >>= sb->s_blocksize_bits; @@ -929,12 +925,8 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = 0; - if (dquot->dq_dqb.dqb_isoftlimit) - limit = dquot->dq_dqb.dqb_isoftlimit; - if (dquot->dq_dqb.dqb_ihardlimit && - (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) - limit = dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); if (limit && buf->f_files > limit) { buf->f_files = limit; From 5d56260c5e9fdbbba59655f63622f6159bf0e595 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Jan 2020 00:11:24 +0100 Subject: [PATCH 3084/3715] PM: core: Fix handling of devices deleted during system-wide resume commit 0552e05fdfea191a2cf3a0abd33574b5ef9ca818 upstream. If a device is deleted by one of its system-wide resume callbacks (for example, because it does not appear to be present or accessible any more) along with its children, the resume of the children may continue leading to use-after-free errors and other issues (potentially). Namely, if the device's children are resumed asynchronously, their resume may have been scheduled already before the device's callback runs and so the device may be deleted while dpm_wait_for_superior() is being executed for them. The memory taken up by the parent device object may be freed then while dpm_wait() is waiting for the parent's resume callback to complete, which leads to a use-after-free. Moreover, the resume of the children is really not expected to continue after they have been unregistered, so it must be terminated right away in that case. To address this problem, modify dpm_wait_for_superior() to check if the target device is still there in the system-wide PM list of devices and if so, to increment its parent's reference counter, both under dpm_list_mtx which prevents device_del() running for the child from dropping the parent's reference counter prematurely. If the device is not present in the system-wide PM list of devices any more, the resume of it cannot continue, so check that again after dpm_wait() returns, which means that the parent's callback has been completed, and pass the result of that check to the caller of dpm_wait_for_superior() to allow it to abort the device's resume if it is not there any more. Link: https://lore.kernel.org/linux-pm/1579568452-27253-1-git-send-email-chanho.min@lge.com Reported-by: Chanho Min Cc: All applicable Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 42 ++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a30ff97632a5..0e7fa1f27ad4 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -269,10 +269,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) device_links_read_unlock(idx); } -static void dpm_wait_for_superior(struct device *dev, bool async) +static bool dpm_wait_for_superior(struct device *dev, bool async) { - dpm_wait(dev->parent, async); + struct device *parent; + + /* + * If the device is resumed asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by reference + * counting the parent once more unless the device has been deleted + * already (in which case return right away). + */ + mutex_lock(&dpm_list_mtx); + + if (!device_pm_initialized(dev)) { + mutex_unlock(&dpm_list_mtx); + return false; + } + + parent = get_device(dev->parent); + + mutex_unlock(&dpm_list_mtx); + + dpm_wait(parent, async); + put_device(parent); + dpm_wait_for_suppliers(dev, async); + + /* + * If the parent's callback has deleted the device, attempting to resume + * it would be invalid, so avoid doing that then. + */ + return device_pm_initialized(dev); } static void dpm_wait_for_consumers(struct device *dev, bool async) @@ -551,7 +579,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_noirq_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; if (dev->pm_domain) { info = "noirq power domain "; @@ -691,7 +720,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_late_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; if (dev->pm_domain) { info = "early power domain "; @@ -823,7 +853,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; } - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Complete; + dpm_watchdog_set(&wd, dev); device_lock(dev); From a687c3bfdc5e24d75048b95ae15a88c86852affb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 26 Jan 2020 22:52:47 +1100 Subject: [PATCH 3085/3715] of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc commit dabf6b36b83a18d57e3d4b9d50544ed040d86255 upstream. There's an OF helper called of_dma_is_coherent(), which checks if a device has a "dma-coherent" property to see if the device is coherent for DMA. But on some platforms devices are coherent by default, and on some platforms it's not possible to update existing device trees to add the "dma-coherent" property. So add a Kconfig symbol to allow arch code to tell of_dma_is_coherent() that devices are coherent by default, regardless of the presence of the property. Select that symbol on powerpc when NOT_COHERENT_CACHE is not set, ie. when the system has a coherent cache. Fixes: 92ea637edea3 ("of: introduce of_dma_is_coherent() helper") Cc: stable@vger.kernel.org # v3.16+ Reported-by: Christian Zigotzky Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman Reviewed-by: Ulf Hansson Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 1 + drivers/of/Kconfig | 4 ++++ drivers/of/address.c | 6 +++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index de3b07c7be30..277e4ffb928b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -225,6 +225,7 @@ config PPC select MODULES_USE_ELF_RELA select NO_BOOTMEM select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OF_RESERVED_MEM select OLD_SIGACTION if PPC32 diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index ba7b034b2b91..6b8646db110c 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -112,4 +112,8 @@ config OF_OVERLAY config OF_NUMA bool +config OF_DMA_DEFAULT_COHERENT + # arches should select this if DMA is coherent by default for OF devices + bool + endif # OF diff --git a/drivers/of/address.c b/drivers/of/address.c index 792722e7d458..456339c19aed 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -894,12 +894,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); * @np: device node * * It returns true if "dma-coherent" property was found - * for this device in DT. + * for this device in the DT, or if DMA is coherent by + * default for OF devices on the current platform. */ bool of_dma_is_coherent(struct device_node *np) { struct device_node *node = of_node_get(np); + if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) + return true; + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); From 9b52f0b54d2647b44fed825aeeea5767cc65c417 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Mon, 23 Dec 2019 17:05:46 -0800 Subject: [PATCH 3086/3715] dm zoned: support zone sizes smaller than 128MiB commit b39962950339912978484cdac50069258545d753 upstream. dm-zoned is observed to log failed kernel assertions and not work correctly when operating against a device with a zone size smaller than 128MiB (e.g. 32768 bits per 4K block). The reason is that the bitmap size per zone is calculated as zero with such a small zone size. Fix this problem and also make the code related to zone bitmap management be able to handle per zone bitmaps smaller than a single block. A dm-zoned-tools patch is required to properly format dm-zoned devices with zone sizes smaller than 128MiB. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 9b78f4a74a12..e3b67b145027 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -132,6 +132,7 @@ struct dmz_metadata { sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; + unsigned int zone_bits_per_mblk; unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; @@ -1165,7 +1166,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd) /* Init */ zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; - zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT; + zmd->zone_nr_bitmap_blocks = + max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); + zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); @@ -1982,7 +1986,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, dmz_release_mblock(zmd, to_mblk); dmz_release_mblock(zmd, from_mblk); - chunk_block += DMZ_BLOCK_SIZE_BITS; + chunk_block += zmd->zone_bits_per_mblk; } to_zone->weight = from_zone->weight; @@ -2043,7 +2047,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Set bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits); if (count) { @@ -2122,7 +2126,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Clear bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_clear_bits((unsigned long *)mblk->data, bit, nr_bits); @@ -2182,6 +2186,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, { struct dmz_mblock *mblk; unsigned int bit, set_bit, nr_bits; + unsigned int zone_bits = zmd->zone_bits_per_mblk; unsigned long *bitmap; int n = 0; @@ -2196,15 +2201,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, /* Get offset */ bitmap = (unsigned long *) mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zone_bits - bit); if (set) - set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_bit(bitmap, zone_bits, bit); else - set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_zero_bit(bitmap, zone_bits, bit); dmz_release_mblock(zmd, mblk); n += set_bit - bit; - if (set_bit < DMZ_BLOCK_SIZE_BITS) + if (set_bit < zone_bits) break; nr_blocks -= nr_bits; @@ -2307,7 +2312,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) /* Count bits in this block */ bitmap = mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); n += dmz_count_bits(bitmap, bit, nr_bits); dmz_release_mblock(zmd, mblk); From 28ae7054c731af053c3915500f2f257fd40ff9b2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 7 Jan 2020 11:58:42 +0000 Subject: [PATCH 3087/3715] dm space map common: fix to ensure new block isn't already in use commit 4feaef830de7ffdd8352e1fe14ad3bf13c9688f8 upstream. The space-maps track the reference counts for disk blocks allocated by both the thin-provisioning and cache targets. There are variants for tracking metadata blocks and data blocks. Transactionality is implemented by never touching blocks from the previous transaction, so we can rollback in the event of a crash. When allocating a new block we need to ensure the block is free (has reference count of 0) in both the current and previous transaction. Prior to this fix we were doing this by searching for a free block in the previous transaction, and relying on a 'begin' counter to track where the last allocation in the current transaction was. This 'begin' field was not being updated in all code paths (eg, increment of a data block reference count due to breaking sharing of a neighbour block in the same btree leaf). This fix keeps the 'begin' field, but now it's just a hint to speed up the search. Instead the current transaction is searched for a free block, and then the old transaction is double checked to ensure it's free. Much simpler. This fixes reports of sm_disk_new_block()'s BUG_ON() triggering when DM thin-provisioning's snapshots are heavily used. Reported-by: Eric Wheeler Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- .../md/persistent-data/dm-space-map-common.c | 27 +++++++++++++++++++ .../md/persistent-data/dm-space-map-common.h | 2 ++ .../md/persistent-data/dm-space-map-disk.c | 6 +++-- .../persistent-data/dm-space-map-metadata.c | 5 +++- 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 829b4ce057d8..97f16fe14f54 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -382,6 +382,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *b) +{ + int r; + uint32_t count; + + do { + r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); + if (r) + break; + + /* double check this block wasn't used in the old transaction */ + if (*b >= old_ll->nr_blocks) + count = 0; + else { + r = sm_ll_lookup(old_ll, *b, &count); + if (r) + break; + + if (count) + begin = *b + 1; + } + } while (count); + + return r; +} + static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, int (*mutator)(void *context, uint32_t old, uint32_t *new), void *context, enum allocation_event *ev) diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index b3078d5eda0c..8de63ce39bdd 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 32adf6b4a9c7..bf4c5e2ccb6f 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - /* FIXME: we should loop round a couple of times */ - r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index b23cac2c4738..31a999458be9 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -447,7 +447,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); if (r) return r; From 41724dfbf8ab1a8a830a5d8fbbc89180f7ac6108 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Mon, 6 Jan 2020 10:11:47 +0100 Subject: [PATCH 3088/3715] dm crypt: fix benbi IV constructor crash if used in authenticated mode commit 4ea9471fbd1addb25a4d269991dc724e200ca5b5 upstream. If benbi IV is used in AEAD construction, for example: cryptsetup luksFormat --cipher twofish-xts-benbi --key-size 512 --integrity=hmac-sha256 the constructor uses wrong skcipher function and crashes: BUG: kernel NULL pointer dereference, address: 00000014 ... EIP: crypt_iv_benbi_ctr+0x15/0x70 [dm_crypt] Call Trace: ? crypt_subkey_size+0x20/0x20 [dm_crypt] crypt_ctr+0x567/0xfc0 [dm_crypt] dm_table_add_target+0x15f/0x340 [dm_mod] Fix this by properly using crypt_aead_blocksize() in this case. Fixes: ef43aa38063a6 ("dm crypt: add cryptographic data integrity protection (authenticated encryption)") Cc: stable@vger.kernel.org # v4.12+ Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=941051 Reported-by: Jerad Simpson Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 94b8d81f6020..d9a67759fdb5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -485,8 +485,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); - int log = ilog2(bs); + unsigned bs; + int log; + + if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) + bs = crypto_aead_blocksize(any_tfm_aead(cc)); + else + bs = crypto_skcipher_blocksize(any_tfm(cc)); + log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ From d10713270dd35966929ffc939ef4017df122c99c Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sat, 1 Feb 2020 12:57:04 +0530 Subject: [PATCH 3089/3715] tracing: Annotate ftrace_graph_hash pointer with __rcu [ Upstream commit 24a9729f831462b1d9d61dc85ecc91c59037243f ] Fix following instances of sparse error kernel/trace/ftrace.c:5664:29: error: incompatible types in comparison kernel/trace/ftrace.c:5785:21: error: incompatible types in comparison kernel/trace/ftrace.c:5864:36: error: incompatible types in comparison kernel/trace/ftrace.c:5866:25: error: incompatible types in comparison Use rcu_dereference_protected to access the __rcu annotated pointer. Link: http://lkml.kernel.org/r/20200201072703.17330-1-frextrite@gmail.com Reviewed-by: Joel Fernandes (Google) Signed-off-by: Amol Grover Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3864d2341442..6af28692f0f5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5146,7 +5146,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dbb212c40a41..17f36488d3c8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -868,22 +868,25 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions From bdfe89cbbcdb643cd5ac68a2c35d5a5ff54e9a52 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Wed, 5 Feb 2020 11:27:02 +0530 Subject: [PATCH 3090/3715] tracing: Annotate ftrace_graph_notrace_hash pointer with __rcu [ Upstream commit fd0e6852c407dd9aefc594f54ddcc21d84803d3b ] Fix following instances of sparse error kernel/trace/ftrace.c:5667:29: error: incompatible types in comparison kernel/trace/ftrace.c:5813:21: error: incompatible types in comparison kernel/trace/ftrace.c:5868:36: error: incompatible types in comparison kernel/trace/ftrace.c:5870:25: error: incompatible types in comparison Use rcu_dereference_protected to dereference the newly annotated pointer. Link: http://lkml.kernel.org/r/20200205055701.30195-1-frextrite@gmail.com Signed-off-by: Amol Grover Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.h | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6af28692f0f5..dd9fdb52e24a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5147,7 +5147,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 17f36488d3c8..757bb1bffed9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -869,7 +869,7 @@ extern void __trace_graph_return(struct trace_array *tr, #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { @@ -922,10 +922,14 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); From 5aa98ea3dc2b34d1d8d9fc8a0dc38c6da225dfa6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Feb 2020 02:17:57 -0500 Subject: [PATCH 3091/3715] ftrace: Add comment to why rcu_dereference_sched() is open coded [ Upstream commit 16052dd5bdfa16dbe18d8c1d4cde2ddab9d23177 ] Because the function graph tracer can execute in sections where RCU is not "watching", the rcu_dereference_sched() for the has needs to be open coded. This is fine because the RCU "flavor" of the ftrace hash is protected by its own RCU handling (it does its own little synchronization on every CPU and does not rely on RCU sched). Acked-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 757bb1bffed9..99af95e294d8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -879,6 +879,11 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) preempt_disable_notrace(); + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { @@ -926,6 +931,11 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) preempt_disable_notrace(); + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); From e78ca4d34768bc7c18cfce47c84b6a46bc40b855 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Feb 2020 09:20:32 -0500 Subject: [PATCH 3092/3715] ftrace: Protect ftrace_graph_hash with ftrace_sync [ Upstream commit 54a16ff6f2e50775145b210bcd94d62c3c2af117 ] As function_graph tracer can run when RCU is not "watching", it can not be protected by synchronize_rcu() it requires running a task on each CPU before it can be freed. Calling schedule_on_each_cpu(ftrace_sync) needs to be used. Link: https://lore.kernel.org/r/20200205131110.GT2935@paulmck-ThinkPad-P72 Cc: stable@vger.kernel.org Fixes: b9b0c831bed26 ("ftrace: Convert graph filter to use hash tables") Reported-by: "Paul E. McKenney" Reviewed-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/ftrace.c | 11 +++++++++-- kernel/trace/trace.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dd9fdb52e24a..8974ecbcca3c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5419,8 +5419,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 99af95e294d8..c4c61ebb8d05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -883,6 +883,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); @@ -935,6 +936,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); From f536b55474f1396151faf086e250cdcdb2dd5752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 20 Jan 2020 14:06:41 +0100 Subject: [PATCH 3093/3715] samples/bpf: Don't try to remove user's homedir on clean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b2e5e93ae8af6a34bca536cdc4b453ab1e707b8b upstream. The 'clean' rule in the samples/bpf Makefile tries to remove backup files (ending in ~). However, if no such files exist, it will instead try to remove the user's home directory. While the attempt is mostly harmless, it does lead to a somewhat scary warning like this: rm: cannot remove '~': Is a directory Fix this by using find instead of shell expansion to locate any actual backup files that need to be removed. Fixes: b62a796c109c ("samples/bpf: allow make to be run from samples/bpf/ directory") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/157952560126.1683545.7273054725976032511.stgit@toke.dk Signed-off-by: Greg Kroah-Hartman --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index c1dc632d4ea4..3460036621e4 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -184,7 +184,7 @@ all: $(LIBBPF) clean: $(MAKE) -C ../../ M=$(CURDIR) clean - @rm -f *~ + @find $(CURDIR) -type f -name '*~' -delete $(LIBBPF): FORCE $(MAKE) -C $(dir $@) $(notdir $@) From 6590628bd5cd71f870785db0f7c9df6cb31a6f3a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 27 Nov 2019 13:01:36 +0100 Subject: [PATCH 3094/3715] crypto: ccp - set max RSA modulus size for v3 platform devices as well commit 11548f5a5747813ff84bed6f2ea01100053b0d8d upstream. AMD Seattle incorporates a non-PCI version of the v3 CCP crypto accelerator, and this version was left behind when the maximum RSA modulus size was parameterized in order to support v5 hardware which supports larger moduli than v3 hardware does. Due to this oversight, RSA acceleration no longer works at all on these systems. Fix this by setting the .rsamax property to the appropriate value for v3 platform hardware. Fixes: e28c190db66830c0 ("csrypto: ccp - Expand RSA support for a v5 ccp") Cc: Gary R Hook Signed-off-by: Ard Biesheuvel Acked-by: Gary R Hook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/ccp-dev-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 240bebbcb8ac..ae0cc0a4dc5c 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -590,6 +590,7 @@ const struct ccp_vdata ccpv3_platform = { .setup = NULL, .perform = &ccp3_actions, .offset = 0, + .rsamax = CCP_RSA_MAX_WIDTH, }; const struct ccp_vdata ccpv3 = { From 3b7b3a5797f161bba8c811812f894c5eb4662e4d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 29 Nov 2019 16:40:24 +0800 Subject: [PATCH 3095/3715] crypto: pcrypt - Do not clear MAY_SLEEP flag in original request commit e8d998264bffade3cfe0536559f712ab9058d654 upstream. We should not be modifying the original request's MAY_SLEEP flag upon completion. It makes no sense to do so anyway. Reported-by: Eric Biggers Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...") Signed-off-by: Herbert Xu Tested-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/pcrypt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 1348541da463..85082574c515 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -130,7 +130,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err) struct padata_priv *padata = pcrypt_request_padata(preq); padata->info = err; - req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; padata_do_serial(padata); } From 6042a22f705a68970a22b37257caaf2089885c7e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 5 Dec 2019 09:54:01 +0000 Subject: [PATCH 3096/3715] crypto: atmel-aes - Fix counter overflow in CTR mode commit 781a08d9740afa73357f1a60d45d7c93d7cca2dd upstream. 32 bit counter is not supported by neither of our AES IPs, all implement a 16 bit block counter. Drop the 32 bit block counter logic. Fixes: fcac83656a3e ("crypto: atmel-aes - fix the counter overflow in CTR mode") Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-aes.c | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 11129b796dda..b8153142bcc6 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -91,7 +91,6 @@ struct atmel_aes_caps { bool has_dualbuff; bool has_cfb64; - bool has_ctr32; bool has_gcm; bool has_xts; bool has_authenc; @@ -990,8 +989,9 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); struct scatterlist *src, *dst; - u32 ctr, blocks; size_t datalen; + u32 ctr; + u16 blocks, start, end; bool use_dma, fragmented = false; /* Check for transfer completion. */ @@ -1003,27 +1003,17 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) datalen = req->nbytes - ctx->offset; blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); ctr = be32_to_cpu(ctx->iv[3]); - if (dd->caps.has_ctr32) { - /* Check 32bit counter overflow. */ - u32 start = ctr; - u32 end = start + blocks - 1; - if (end < start) { - ctr |= 0xffffffff; - datalen = AES_BLOCK_SIZE * -start; - fragmented = true; - } - } else { - /* Check 16bit counter overflow. */ - u16 start = ctr & 0xffff; - u16 end = start + (u16)blocks - 1; + /* Check 16bit counter overflow. */ + start = ctr & 0xffff; + end = start + blocks - 1; - if (blocks >> 16 || end < start) { - ctr |= 0xffff; - datalen = AES_BLOCK_SIZE * (0x10000-start); - fragmented = true; - } + if (blocks >> 16 || end < start) { + ctr |= 0xffff; + datalen = AES_BLOCK_SIZE * (0x10000 - start); + fragmented = true; } + use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD); /* Jump to offset. */ @@ -2536,7 +2526,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) { dd->caps.has_dualbuff = 0; dd->caps.has_cfb64 = 0; - dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; dd->caps.has_authenc = 0; @@ -2547,7 +2536,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; dd->caps.has_authenc = 1; @@ -2556,7 +2544,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x200: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.max_burst_size = 4; break; From 90060df5c2d61e8db99baa72f853e7db136f67f2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 7 Dec 2019 22:15:15 +0800 Subject: [PATCH 3097/3715] crypto: api - Fix race condition in crypto_spawn_alg commit 73669cc556462f4e50376538d77ee312142e8a8a upstream. The function crypto_spawn_alg is racy because it drops the lock before shooting the dying algorithm. The algorithm could disappear altogether before we shoot it. This patch fixes it by moving the shooting into the locked section. Fixes: 6bfd48096ff8 ("[CRYPTO] api: Added spawns") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 16 +++++----------- crypto/api.c | 3 +-- crypto/internal.h | 1 - 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 0e86cab3bb67..603d2d637209 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -662,22 +662,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn); static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) { struct crypto_alg *alg; - struct crypto_alg *alg2; down_read(&crypto_alg_sem); alg = spawn->alg; - alg2 = alg; - if (alg2) - alg2 = crypto_mod_get(alg2); + if (alg && !crypto_mod_get(alg)) { + alg->cra_flags |= CRYPTO_ALG_DYING; + alg = NULL; + } up_read(&crypto_alg_sem); - if (!alg2) { - if (alg) - crypto_shoot_alg(alg); - return ERR_PTR(-EAGAIN); - } - - return alg; + return alg ?: ERR_PTR(-EAGAIN); } struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, diff --git a/crypto/api.c b/crypto/api.c index e485aed11ad0..187795a6687d 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -339,13 +339,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) return len; } -void crypto_shoot_alg(struct crypto_alg *alg) +static void crypto_shoot_alg(struct crypto_alg *alg) { down_write(&crypto_alg_sem); alg->cra_flags |= CRYPTO_ALG_DYING; up_write(&crypto_alg_sem); } -EXPORT_SYMBOL_GPL(crypto_shoot_alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/internal.h b/crypto/internal.h index f07320423191..6262ec0435b4 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -84,7 +84,6 @@ void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); -void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm(struct crypto_alg *alg, From a793a2640be68660ae99e68da0e276239615cb99 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Tue, 10 Dec 2019 00:21:44 +0800 Subject: [PATCH 3098/3715] crypto: picoxcell - adjust the position of tasklet_init and fix missed tasklet_kill commit 7f8c36fe9be46862c4f3c5302f769378028a34fa upstream. Since tasklet is needed to be initialized before registering IRQ handler, adjust the position of tasklet_init to fix the wrong order. Besides, to fix the missed tasklet_kill, this patch adds a helper function and uses devm_add_action to kill the tasklet automatically. Fixes: ce92136843cb ("crypto: picoxcell - add support for the picoxcell crypto engines") Signed-off-by: Chuhong Yuan Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/picoxcell_crypto.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index b6f14844702e..7eaeb8507e06 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1616,6 +1616,11 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ +static void spacc_tasklet_kill(void *data) +{ + tasklet_kill(data); +} + static int spacc_probe(struct platform_device *pdev) { int i, err, ret = -EINVAL; @@ -1659,6 +1664,14 @@ static int spacc_probe(struct platform_device *pdev) return -ENXIO; } + tasklet_init(&engine->complete, spacc_spacc_complete, + (unsigned long)engine); + + ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, + &engine->complete); + if (ret) + return ret; + if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); @@ -1721,8 +1734,6 @@ static int spacc_probe(struct platform_device *pdev) INIT_LIST_HEAD(&engine->completed); INIT_LIST_HEAD(&engine->in_progress); engine->in_flight = 0; - tasklet_init(&engine->complete, spacc_spacc_complete, - (unsigned long)engine); platform_set_drvdata(pdev, engine); From 56846c93728bd7f843127380594602ff5db5a30d Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 23 Jan 2020 20:50:14 -0800 Subject: [PATCH 3099/3715] scsi: qla2xxx: Fix unbound NVME response length commit 00fe717ee1ea3c2979db4f94b1533c57aed8dea9 upstream. On certain cases when response length is less than 32, NVME response data is supplied inline in IOCB. This is indicated by some combination of state flags. There was an instance when a high, and incorrect, response length was indicated causing driver to overrun buffers. Fix this by checking and limiting the response payload length. Fixes: 7401bc18d1ee3 ("scsi: qla2xxx: Add FC-NVMe command handling") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200124045014.23554-1-hmadhani@marvell.com Signed-off-by: Arun Easi Signed-off-by: Himanshu Madhani Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_dbg.c | 6 ------ drivers/scsi/qla2xxx/qla_dbg.h | 6 ++++++ drivers/scsi/qla2xxx/qla_isr.c | 12 ++++++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 3e9dc54b89a3..91e185731b1e 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2517,12 +2517,6 @@ qla83xx_fw_dump_failed: /* Driver Debug Functions. */ /****************************************************************************/ -static inline int -ql_mask_match(uint32_t level) -{ - return (level & ql2xextended_error_logging) == level; -} - /* * This function is for formatting and logging debug information. * It is to be used when vha is available. It formats the message diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 8877aa97d829..ceca6dd34db1 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, struct qla_hw_data *); extern int qla24xx_soft_reset(struct qla_hw_data *); + +static inline int +ql_mask_match(uint level) +{ + return (level & ql2xextended_error_logging) == level; +} diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 648916a9082c..b39faf2bfa0d 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1853,6 +1853,18 @@ qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) inbuf = (uint32_t *)&sts->nvme_ersp_data; outbuf = (uint32_t *)fd->rspaddr; iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); + if (unlikely(iocb->u.nvme.rsp_pyld_len > + sizeof(struct nvme_fc_ersp_iu))) { + if (ql_mask_match(ql_dbg_io)) { + WARN_ONCE(1, "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + ql_log(ql_log_warn, fcport->vha, 0x5100, + "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + } + iocb->u.nvme.rsp_pyld_len = + sizeof(struct nvme_fc_ersp_iu); + } iter = iocb->u.nvme.rsp_pyld_len >> 2; for (; iter; iter--) *outbuf++ = swab32(*inbuf++); From 120112701bc3c0acb8d0bafdbbdf91af76e1eb3f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:53 -0500 Subject: [PATCH 3100/3715] NFS: Fix memory leaks and corruption in readdir commit 4b310319c6a8ce708f1033d57145e2aa027a883c upstream. nfs_readdir_xdr_to_array() must not exit without having initialised the array, so that the page cache deletion routines can safely call nfs_readdir_clear_array(). Furthermore, we should ensure that if we exit nfs_readdir_filler() with an error, we free up any page contents to prevent a leak if we try to fill the page again. Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 50c181fa0025..4aff1c66b586 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -169,6 +169,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -181,6 +192,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -617,6 +629,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -633,8 +647,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -688,6 +700,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } From d046cfa737337c19fa77dc7487453477c99a8334 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2020 17:53:54 -0500 Subject: [PATCH 3101/3715] NFS: Directory page cache pages need to be locked when read commit 114de38225d9b300f027e2aec9afbb6e0def154b upstream. When a NFS directory page cache page is removed from the page cache, its contents are freed through a call to nfs_readdir_clear_array(). To prevent the removal of the page cache entry until after we've finished reading it, we must take the page lock. Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4aff1c66b586..673d89bb817e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -708,8 +708,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -723,19 +721,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -750,7 +757,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -789,7 +796,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = 1; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -835,13 +841,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -906,6 +912,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); From f7a7788766849cfbde527c1fc3765e9033c03dad Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 08:57:51 -0500 Subject: [PATCH 3102/3715] btrfs: set trans->drity in btrfs_commit_transaction commit d62b23c94952e78211a383b7d90ef0afbd9a3717 upstream. If we abort a transaction we have the following sequence if (!trans->dirty && list_empty(&trans->new_bgs)) return; WRITE_ONCE(trans->transaction->aborted, err); The idea being if we didn't modify anything with our trans handle then we don't really need to abort the whole transaction, maybe the other trans handles are fine and we can carry on. However in the case of create_snapshot we add a pending_snapshot object to our transaction and then commit the transaction. We don't actually modify anything. sync() behaves the same way, attach to an existing transaction and commit it. This means that if we have an IO error in the right places we could abort the committing transaction with our trans->dirty being not set and thus not set transaction->aborted. This is a problem because in the create_snapshot() case we depend on pending->error being set to something, or btrfs_commit_transaction returning an error. If we are not the trans handle that gets to commit the transaction, and we're waiting on the commit to happen we get our return value from cur_trans->aborted. If this was not set to anything because sync() hit an error in the transaction commit before it could modify anything then cur_trans->aborted would be 0. Thus we'd return 0 from btrfs_commit_transaction() in create_snapshot. This is a problem because we then try to do things with pending_snapshot->snap, which will be NULL because we didn't create the snapshot, and then we'll get a NULL pointer dereference like the following "BUG: kernel NULL pointer dereference, address: 00000000000001f0" RIP: 0010:btrfs_orphan_cleanup+0x2d/0x330 Call Trace: ? btrfs_mksubvol.isra.31+0x3f2/0x510 btrfs_mksubvol.isra.31+0x4bc/0x510 ? __sb_start_write+0xfa/0x200 ? mnt_want_write_file+0x24/0x50 btrfs_ioctl_snap_create_transid+0x16c/0x1a0 btrfs_ioctl_snap_create_v2+0x11e/0x1a0 btrfs_ioctl+0x1534/0x2c10 ? free_debug_processing+0x262/0x2a3 do_vfs_ioctl+0xa6/0x6b0 ? do_sys_open+0x188/0x220 ? syscall_trace_enter+0x1f8/0x330 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4a/0x1b0 In order to fix this we need to make sure anybody who calls commit_transaction has trans->dirty set so that they properly set the trans->transaction->aborted value properly so any waiters know bad things happened. This was found while I was running generic/475 with my modified fsstress, it reproduced within a few runs. I ran with this patch all night and didn't see the problem again. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fa8f56e6f665..a066ad581976 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1948,6 +1948,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) struct btrfs_transaction *prev_trans = NULL; int ret; + /* + * Some places just start a transaction to commit it. We need to make + * sure that if this commit fails that the abort code actually marks the + * transaction as failed, so set trans->dirty to make the abort code do + * the right thing. + */ + trans->dirty = true; + /* Stop the commit early if ->aborted is set */ if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; From 06bd486ee98b91d58c2f9cbadff4ae35b4cd4dfb Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 3 Oct 2019 14:50:31 -0600 Subject: [PATCH 3103/3715] ARM: tegra: Enable PLLP bypass during Tegra124 LP1 commit 1a3388d506bf5b45bb283e6a4c4706cfb4897333 upstream. For a little over a year, U-Boot has configured the flow controller to perform automatic RAM re-repair on off->on power transitions of the CPU rail[1]. This is mandatory for correct operation of Tegra124. However, RAM re-repair relies on certain clocks, which the kernel must enable and leave running. PLLP is one of those clocks. This clock is shut down during LP1 in order to save power. Enable bypass (which I believe routes osc_div_clk, essentially the crystal clock, to the PLL output) so that this clock signal toggles even though the PLL is not active. This is required so that LP1 power mode (system suspend) operates correctly. The bypass configuration must then be undone when resuming from LP1, so that all peripheral clocks run at the expected rate. Without this, many peripherals won't work correctly; for example, the UART baud rate would be incorrect. NVIDIA's downstream kernel code only does this if not compiled for Tegra30, so the added code is made conditional upon the chip ID. NVIDIA's downstream code makes this change conditional upon the active CPU cluster. The upstream kernel currently doesn't support cluster switching, so this patch doesn't test the active CPU cluster ID. [1] 3cc7942a4ae5 ARM: tegra: implement RAM repair Reported-by: Jonathan Hunter Cc: stable@vger.kernel.org Signed-off-by: Stephen Warren Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/sleep-tegra30.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index dd4a67dabd91..b7cd41461e7d 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -382,6 +382,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -641,7 +649,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] From d2cf2297979f0f4f2a4cc7e540cf0b944e9936c3 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 31 Jan 2020 15:45:25 +0200 Subject: [PATCH 3104/3715] iwlwifi: don't throw error when trying to remove IGTK commit 197288d5ba8a5289f22d3aeb4fca3824bfd9b4af upstream. The IGTK keys are only removed by mac80211 after it has already removed the AP station. This causes the driver to throw an error because mac80211 is trying to remove the IGTK when the station doesn't exist anymore. The firmware is aware that the station has been removed and can deal with it the next time we try to add an IGTK for a station, so we shouldn't try to remove the key if the station ID is IWL_MVM_INVALID_STA. Do this by removing the check for mvm_sta before calling iwl_mvm_send_sta_igtk() and check return from that function gracefully if the station ID is invalid. Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 684c0f65a052..d9ab85c8eb6a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2981,6 +2981,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, igtk_cmd.sta_id = cpu_to_le32(sta_id); if (remove_key) { + /* This is a valid situation for IGTK */ + if (sta_id == IWL_MVM_INVALID_STA) + return 0; + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); } else { struct ieee80211_key_seq seq; @@ -3285,9 +3289,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { From 6a7ad15be999f8f7c78b081c2fc77f3ec1269e6e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 6 Jan 2020 14:42:12 -0800 Subject: [PATCH 3105/3715] mwifiex: fix unbalanced locking in mwifiex_process_country_ie() commit 65b1aae0d9d5962faccc06bdb8e91a2a0b09451c upstream. We called rcu_read_lock(), so we need to call rcu_read_unlock() before we return. Fixes: 3d94a4a8373b ("mwifiex: fix possible heap overflow in mwifiex_process_country_ie()") Cc: stable@vger.kernel.org Cc: huangwen Cc: Ganapathi Bhat Signed-off-by: Brian Norris Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index f88a953b3cd5..652acafca136 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -274,6 +274,7 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, if (country_ie_len > (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { + rcu_read_unlock(); mwifiex_dbg(priv->adapter, ERROR, "11D: country_ie_len overflow!, deauth AP\n"); return -EINVAL; From 4274984b78716c7cd8686c5d9ae0e9c319760ff8 Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Tue, 4 Feb 2020 11:32:56 +0100 Subject: [PATCH 3106/3715] sunrpc: expiry_time should be seconds not timeval commit 3d96208c30f84d6edf9ab4fac813306ac0d20c10 upstream. When upcalling gssproxy, cache_head.expiry_time is set as a timeval, not seconds since boot. As such, RPC cache expiry logic will not clean expired objects created under auth.rpcsec.context cache. This has proven to cause kernel memory leaks on field. Using 64 bit variants of getboottime/timespec Expiration times have worked this way since 2010's c5b29f885afe "sunrpc: use seconds since boot in expiry cache". The gssproxy code introduced in 2012 added gss_proxy_save_rsc and introduced the bug. That's a while for this to lurk, but it required a bit of an extreme case to make it obvious. Signed-off-by: Roberto Bergantinos Corpas Cc: stable@vger.kernel.org Fixes: 030d794bf498 "SUNRPC: Use gssproxy upcall for server..." Tested-By: Frank Sorenson Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index cc08cb1292a9..a457e7afb768 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1188,6 +1188,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1208,6 +1209,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; From 9c1484c4362dd0dc3b4c5cb654d5ec301f37e4aa Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 10 Dec 2019 15:48:29 +1100 Subject: [PATCH 3107/3715] tools/kvm_stat: Fix kvm_exit filter name commit 5fcf3a55a62afb0760ccb6f391d62f20bce4a42f upstream. The filter name is fixed to "exit_reason" for some kvm_exit events, no matter what architect we have. Actually, the filter name ("exit_reason") is only applicable to x86, meaning it's broken on other architects including aarch64. This fixes the issue by providing various kvm_exit filter names, depending on architect we're on. Afterwards, the variable filter name is picked and applied through ioctl(fd, SET_FILTER). Reported-by: Andrew Jones Signed-off-by: Gavin Shan Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- tools/kvm/kvm_stat/kvm_stat | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index c0d653d36c0f..fb02aa4591eb 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -261,6 +261,7 @@ class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons @@ -276,6 +277,7 @@ class ArchPPC(Arch): # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reason_field = 'exit_nr' self.exit_reasons = {} @@ -283,6 +285,7 @@ class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS @@ -290,6 +293,7 @@ class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = None self.exit_reasons = None ARCH = Arch.get_arch() @@ -513,8 +517,8 @@ class TracepointProvider(Provider): """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if ARCH.exit_reasons: - filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + if ARCH.exit_reason_field and ARCH.exit_reasons: + filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) return filters def get_available_fields(self): From 80b93ccca152af665e5c83246dfeedad9b446a02 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 17 Jan 2020 14:49:31 +0100 Subject: [PATCH 3108/3715] xen/balloon: Support xend-based toolstack take two commit eda4eabf86fd6806eaabc23fb90dd056fdac037b upstream. Commit 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack") tried to fix a regression with running on rather ancient Xen versions. Unfortunately the fix was based on the assumption that xend would just use another Xenstore node, but in reality only some downstream versions of xend are doing that. The upstream xend does not write that Xenstore node at all, so the problem must be fixed in another way. The easiest way to achieve that is to fall back to the behavior before commit 96edd61dcf4436 ("xen/balloon: don't online new memory initially") in case the static memory maximum can't be read. This is achieved by setting static_max to the current number of memory pages known by the system resulting in target_diff becoming zero. Fixes: 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Cc: # 4.13 Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index cf8ef8cee5a0..112e8b5e6fee 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -82,7 +82,7 @@ static void watch_target(struct xenbus_watch *watch, "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; else - static_max = new_target; + static_max = balloon_stats.current_pages; target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; From 0be275c65416047541a03c6716f13a337a5ff269 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:43 -0800 Subject: [PATCH 3109/3715] KVM: x86: Refactor picdev_write() to prevent Spectre-v1/L1TF attacks commit 14e32321f3606e4b0970200b6e5e47ee6f1e6410 upstream. This fixes a Spectre-v1/L1TF vulnerability in picdev_write(). It replaces index computations based on the (attacked-controlled) port number with constants through a minor refactoring. Fixes: 85f455f7ddbe ("KVM: Add support for in-kernel PIC emulation") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/i8259.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index bdcd4139eca9..38a36a1cc87f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: From 7341bf4c2aead34731d1a6f3c98fcb69d5bc21c7 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:50 -0800 Subject: [PATCH 3110/3715] KVM: x86: Refactor prefix decoding to prevent Spectre-v1/L1TF attacks commit 125ffc5e0a56a3eded608dc51e09d5ebf72cf652 upstream. This fixes Spectre-v1/L1TF vulnerabilities in vmx_read_guest_seg_selector(), vmx_read_guest_seg_base(), vmx_read_guest_seg_limit() and vmx_read_guest_seg_ar(). When invoked from emulation, these functions contain index computations based on the (attacker-influenced) segment value. Using constants prevents the attack. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index eb8b843325f4..df1770e3c6fb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5094,16 +5094,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) From 8fd994e37dd3e833ad74fb9a63de6b8fbc1c8ef4 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:52 -0800 Subject: [PATCH 3111/3715] KVM: x86: Protect DR-based index computations from Spectre-v1/L1TF attacks commit ea740059ecb37807ba47b84b33d1447435a8d868 upstream. This fixes a Spectre-v1/L1TF vulnerability in __kvm_set_dr() and kvm_get_dr(). Both kvm_get_dr() and kvm_set_dr() (a wrapper of __kvm_set_dr()) are exported symbols so KVM should tream them conservatively from a security perspective. Fixes: 020df0794f57 ("KVM: move DR register access handling into generic code") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a51442247c5..9b3da67fa9d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -924,9 +924,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -963,9 +965,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ From 112a14ba32837b062ac47957679adb875b9adc1e Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:46 -0800 Subject: [PATCH 3112/3715] KVM: x86: Protect kvm_lapic_reg_write() from Spectre-v1/L1TF attacks commit 4bf79cb089f6b1c6c632492c0271054ce52ad766 upstream. This fixes a Spectre-v1/L1TF vulnerability in kvm_lapic_reg_write(). This function contains index computations based on the (attacker-controlled) MSR number. Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2307f63efd20..8715711f2755 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1754,15 +1754,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) From 3645b2277bcddc2ecbb329343ffa7a929c7211e5 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:42 -0800 Subject: [PATCH 3113/3715] KVM: x86: Protect kvm_hv_msr_[get|set]_crash_data() from Spectre-v1/L1TF attacks commit 8618793750071d66028584a83ed0b4fa7eb4f607 upstream. This fixes Spectre-v1/L1TF vulnerabilities in kvm_hv_msr_get_crash_data() and kvm_hv_msr_set_crash_data(). These functions contain index computations that use the (attacker-controlled) MSR number. Fixes: e7d9513b60e8 ("kvm/x86: added hyper-v crash msrs into kvm hyperv context") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5d13abecb384..2fba82b06c2d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -747,11 +747,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -790,11 +791,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } From aa209fe55a32a5f91a087af5ed71d870971461cf Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:45 -0800 Subject: [PATCH 3114/3715] KVM: x86: Protect ioapic_write_indirect() from Spectre-v1/L1TF attacks commit 670564559ca35b439c8d8861fc399451ddf95137 upstream. This fixes a Spectre-v1/L1TF vulnerability in ioapic_write_indirect(). This function contains index computations based on the (attacker-controlled) IOREGSEL register. This patch depends on patch "KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks". Fixes: 70f93dae32ac ("KVM: Use temporary variable to shorten lines.") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 9d270ba9643c..31ef81d6774c 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -297,6 +297,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ From 6b58586c367a8722f45e63411e5de943ddd0c13b Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:48 -0800 Subject: [PATCH 3115/3715] KVM: x86: Protect MSR-based index computations in pmu.h from Spectre-v1/L1TF attacks commit 13c5183a4e643cc2b03a22d0e582c8e17bb7457d upstream. This fixes a Spectre-v1/L1TF vulnerability in the get_gp_pmc() and get_fixed_pmc() functions. They both contain index computations based on the (attacker-controlled) MSR number. Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/pmu.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a9a62b9a73e2..c67a636b268f 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -81,8 +83,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -92,8 +98,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } From ec01d89eeae22f709d95ecddbbae251bea9d2468 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:44 -0800 Subject: [PATCH 3116/3715] KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks commit 8c86405f606ca8508b8d9280680166ca26723695 upstream. This fixes a Spectre-v1/L1TF vulnerability in ioapic_read_indirect(). This function contains index computations based on the (attacker-controlled) IOREGSEL register. Fixes: a2c118bfab8b ("KVM: Fix bounds checking in ioapic indirect register reads (CVE-2013-1798)") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 31ef81d6774c..dab6940ea99c 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -73,13 +74,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : From 86f71e86d42e70a1e21116f28e778eac495843c0 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:49 -0800 Subject: [PATCH 3117/3715] KVM: x86: Protect MSR-based index computations from Spectre-v1/L1TF attacks in x86.c commit 6ec4c5eee1750d5d17951c4e1960d953376a0dda upstream. This fixes a Spectre-v1/L1TF vulnerability in set_msr_mce() and get_msr_mce(). Both functions contain index computations based on the (attacker-controlled) MSR number. Fixes: 890ca9aefa78 ("KVM: Add MCE support") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b3da67fa9d8..808f4174e483 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2165,7 +2165,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2549,7 +2552,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } From 703ae42319295fb9089f2ddb61fb135e0f0e8d20 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:41 -0800 Subject: [PATCH 3118/3715] KVM: x86: Protect x86_decode_insn from Spectre-v1/L1TF attacks commit 3c9053a2cae7ba2ba73766a34cea41baa70f57f7 upstream. This fixes a Spectre-v1/L1TF vulnerability in x86_decode_insn(). kvm_emulate_instruction() (an ancestor of x86_decode_insn()) is an exported symbol, so KVM should treat it conservatively from a security perspective. Fixes: 045a282ca415 ("KVM: emulator: implement fninit, fnstsw, fnstcw") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index df1770e3c6fb..041b9b05fae1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5199,10 +5199,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) From 4609a0bfd5065688d16920a0c401589818ed3e06 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:47 -0800 Subject: [PATCH 3119/3715] KVM: x86: Protect MSR-based index computations in fixed_msr_to_seg_unit() from Spectre-v1/L1TF attacks commit 25a5edea71b7c154b6a0b8cec14c711cafa31d26 upstream. This fixes a Spectre-v1/L1TF vulnerability in fixed_msr_to_seg_unit(). This function contains index computations based on the (attacker-controlled) MSR number. Fixes: de9aef5e1ad6 ("KVM: MTRR: introduce fixed_mtrr_segment table") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mtrr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index e9ea2d45ae66..1209447d6014 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -202,11 +202,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; From 7fe5a37ddec06e9e6544283bd175084b4a827a0f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Dec 2019 13:54:46 -0800 Subject: [PATCH 3120/3715] KVM: PPC: Book3S HV: Uninit vCPU if vcore creation fails commit 1a978d9d3e72ddfa40ac60d26301b154247ee0bc upstream. Call kvm_vcpu_uninit() if vcore creation fails to avoid leaking any resources allocated by kvm_vcpu_init(), i.e. the vcpu->run page. Fixes: 371fefd6f2dc4 ("KVM: PPC: Allow book3s_hv guests to use SMT processor modes") Cc: stable@vger.kernel.org Reviewed-by: Greg Kurz Signed-off-by: Sean Christopherson Acked-by: Paul Mackerras Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7de26809340a..e4f81f014206 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1997,7 +1997,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2014,6 +2014,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: From f572810877041346667414c2b9cf0e368d575f71 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Dec 2019 13:54:47 -0800 Subject: [PATCH 3121/3715] KVM: PPC: Book3S PR: Free shared page if mmu initialization fails commit cb10bf9194f4d2c5d830eddca861f7ca0fecdbb4 upstream. Explicitly free the shared page if kvmppc_mmu_init() fails during kvmppc_core_vcpu_create(), as the page is freed only in kvmppc_core_vcpu_free(), which is not reached via kvm_vcpu_uninit(). Fixes: 96bc451a15329 ("KVM: PPC: Introduce shared page") Cc: stable@vger.kernel.org Reviewed-by: Greg Kurz Signed-off-by: Sean Christopherson Acked-by: Paul Mackerras Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_pr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e2ef16198456..f5bbb188f18d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1482,10 +1482,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: From 724fbee218b56daeee17d20cf4c7de785eef6c67 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Dec 2019 13:54:48 -0800 Subject: [PATCH 3122/3715] KVM: x86: Free wbinvd_dirty_mask if vCPU creation fails commit 16be9ddea268ad841457a59109963fff8c9de38d upstream. Free the vCPU's wbinvd_dirty_mask if vCPU creation fails after kvm_arch_vcpu_init(), e.g. when installing the vCPU's file descriptor. Do the freeing by calling kvm_arch_vcpu_free() instead of open coding the freeing. This adds a likely superfluous, but ultimately harmless, call to kvmclock_reset(), which only clears vcpu->arch.pv_time_enabled. Using kvm_arch_vcpu_free() allows for additional cleanup in the future. Fixes: f5f48ee15c2ee ("KVM: VMX: Execute WBINVD to keep data consistency with assigned devices") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 808f4174e483..b6d80c019056 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8063,7 +8063,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); vcpu_put(vcpu); - kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_free(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) From a6f16a683832cba400f4eab38df4cd92a937e2da Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 3 Oct 2019 14:50:30 -0600 Subject: [PATCH 3123/3715] clk: tegra: Mark fuse clock as critical commit bf83b96f87ae2abb1e535306ea53608e8de5dfbb upstream. For a little over a year, U-Boot on Tegra124 has configured the flow controller to perform automatic RAM re-repair on off->on power transitions of the CPU rail[1]. This is mandatory for correct operation of Tegra124. However, RAM re-repair relies on certain clocks, which the kernel must enable and leave running. The fuse clock is one of those clocks. Mark this clock as critical so that LP1 power mode (system suspend) operates correctly. [1] 3cc7942a4ae5 ARM: tegra: implement RAM repair Reported-by: Jonathan Hunter Cc: stable@vger.kernel.org Signed-off-by: Stephen Warren Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/clk/tegra/clk-tegra-periph.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 848255cc0209..d300a256fcac 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -825,7 +825,11 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("vcp", "clk_m", 29, 0, tegra_clk_vcp, 0), GATE("apbdma", "clk_m", 34, 0, tegra_clk_apbdma, 0), GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), - GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), + /* + * Critical for RAM re-repair operation, which must occur on resume + * from LP1 system suspend and as part of CCPLEX cluster switching. + */ + GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), From 2585e20dfb6ae57b6aec2fc5cf85f43d5fbe96a8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 18 Dec 2019 16:49:05 -0800 Subject: [PATCH 3124/3715] scsi: qla2xxx: Fix the endianness of the qla82xx_get_fw_size() return type commit 3f5f7335e5e234e340b48ecb24c2aba98a61f934 upstream. Since qla82xx_get_fw_size() returns a number in CPU-endian format, change its return type from __le32 into u32. This patch does not change any functionality. Fixes: 9c2b297572bf ("[SCSI] qla2xxx: Support for loading Unified ROM Image (URI) format firmware file.") Cc: Himanshu Madhani Cc: Quinn Tran Cc: Martin Wilck Cc: Daniel Wagner Cc: Roman Bolshakov Link: https://lore.kernel.org/r/20191219004905.39586-1-bvanassche@acm.org Reviewed-by: Daniel Wagner Reviewed-by: Roman Bolshakov Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_nx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index a77c33987703..a5b8313cf491 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1605,8 +1605,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) return (u8 *)&ha->hablob->fw->data[offset]; } -static __le32 -qla82xx_get_fw_size(struct qla_hw_data *ha) +static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) { struct qla82xx_uri_data_desc *uri_desc = NULL; @@ -1617,7 +1616,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha) return cpu_to_le32(uri_desc->size); } - return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]); + return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); } static u8 * @@ -1808,7 +1807,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha) } flashaddr = FLASH_ADDR_START; - size = (__force u32)qla82xx_get_fw_size(ha) / 8; + size = qla82xx_get_fw_size(ha) / 8; ptr64 = (u64 *)qla82xx_get_fw_offs(ha); for (i = 0; i < size; i++) { From 0ee281862e5a0ffd8384472775fb0a68359170f8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:47:26 -0700 Subject: [PATCH 3125/3715] scsi: csiostor: Adjust indentation in csio_device_reset commit a808a04c861782e31fc30e342a619c144aaee14a upstream. Clang warns: ../drivers/scsi/csiostor/csio_scsi.c:1386:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] csio_lnodes_exit(hw, 1); ^ ../drivers/scsi/csiostor/csio_scsi.c:1382:2: note: previous statement is here if (*buf != '1') ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: a3667aaed569 ("[SCSI] csiostor: Chelsio FCoE offload driver") Link: https://github.com/ClangBuiltLinux/linux/issues/818 Link: https://lore.kernel.org/r/20191218014726.8455-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/csiostor/csio_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index e09c7f360dbd..0cb585759de6 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev, return -EINVAL; /* Delete NPIV lnodes */ - csio_lnodes_exit(hw, 1); + csio_lnodes_exit(hw, 1); /* Block upper IOs */ csio_lnodes_block_request(hw); From f9082f8193cd1cb96b61cbe31588a683cdf69264 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:52:52 -0700 Subject: [PATCH 3126/3715] scsi: qla4xxx: Adjust indentation in qla4xxx_mem_free commit aa8679736a82386551eb9f3ea0e6ebe2c0e99104 upstream. Clang warns: ../drivers/scsi/qla4xxx/ql4_os.c:4148:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (ha->fw_dump) ^ ../drivers/scsi/qla4xxx/ql4_os.c:4144:2: note: previous statement is here if (ha->queues) ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 068237c87c64 ("[SCSI] qla4xxx: Capture minidump for ISP82XX on firmware failure") Link: https://github.com/ClangBuiltLinux/linux/issues/819 Link: https://lore.kernel.org/r/20191218015252.20890-1-natechancellor@gmail.com Acked-by: Manish Rangankar Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla4xxx/ql4_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index b0ad60565fe9..fb3abaf817a3 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4150,7 +4150,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues, ha->queues_dma); - if (ha->fw_dump) + if (ha->fw_dump) vfree(ha->fw_dump); ha->queues_len = 0; From c909605d73ab4a26ef98c9fb8c291ff2f4c5f9b4 Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Mon, 25 Nov 2019 22:53:30 -0800 Subject: [PATCH 3127/3715] scsi: ufs: Recheck bkops level if bkops is disabled commit 24366c2afbb0539fb14eff330d4e3a5db5c0a3ef upstream. bkops level should be rechecked upon receiving an exception. Currently the level is being cached and never updated. Update bkops each time the level is checked. Also do not use the cached bkops level value if it is disabled and then enabled. Fixes: afdfff59a0e0 (scsi: ufs: handle non spec compliant bkops behaviour by device) Link: https://lore.kernel.org/r/1574751214-8321-2-git-send-email-cang@qti.qualcomm.com Reviewed-by: Bean Huo Reviewed-by: Alim Akhtar Tested-by: Alim Akhtar Signed-off-by: Asutosh Das Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index d25082e573e0..7ada4f272258 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4812,6 +4812,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); + hba->is_urgent_bkops_lvl_checked = false; out: return err; } @@ -4836,6 +4837,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba) hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS; ufshcd_disable_auto_bkops(hba); } + hba->is_urgent_bkops_lvl_checked = false; } static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status) @@ -4882,6 +4884,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba, err = ufshcd_enable_auto_bkops(hba); else err = ufshcd_disable_auto_bkops(hba); + hba->urgent_bkops_lvl = curr_status; out: return err; } From 0225f69280146a9e75736145c46cb2c31d8f890f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:36:37 -0700 Subject: [PATCH 3128/3715] phy: qualcomm: Adjust indentation in read_poll_timeout commit a89806c998ee123bb9c0f18526e55afd12c0c0ab upstream. Clang warns: ../drivers/phy/qualcomm/phy-qcom-apq8064-sata.c:83:4: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); ^ ../drivers/phy/qualcomm/phy-qcom-apq8064-sata.c:80:3: note: previous statement is here if (readl_relaxed(addr) & mask) ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 1de990d8a169 ("phy: qcom: Add driver for QCOM APQ8064 SATA PHY") Link: https://github.com/ClangBuiltLinux/linux/issues/816 Signed-off-by: Nathan Chancellor Reviewed-by: Bjorn Andersson Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-apq8064-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 69ce2afac015..c6925e3e878b 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -88,7 +88,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask) if (readl_relaxed(addr) & mask) return 0; - usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); + usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); } while (!time_after(jiffies, timeout)); return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT; From 00ab265a94df37909bfbadf494a01a969d329f94 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 20:19:31 -0700 Subject: [PATCH 3129/3715] ext2: Adjust indentation in ext2_fill_super commit d9e9866803f7b6c3fdd35d345e97fb0b2908bbbc upstream. Clang warns: ../fs/ext2/super.c:1076:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - ^ ../fs/ext2/super.c:1074:2: note: previous statement is here if (EXT2_BLOCKS_PER_GROUP(sb) == 0) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 41f04d852e35 ("[PATCH] ext2: fix mounts at 16T") Link: https://github.com/ClangBuiltLinux/linux/issues/827 Link: https://lore.kernel.org/r/20191218031930.31393-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 13f470636672..4a338576ebb1 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1077,9 +1077,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext2; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) - 1) + / EXT2_BLOCKS_PER_GROUP(sb)) + 1; db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); From 4f91c79cd4fe68908e6844560bee3b581aea6c41 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Dec 2019 13:03:38 -0700 Subject: [PATCH 3130/3715] powerpc/44x: Adjust indentation in ibm4xx_denali_fixup_memsize commit c3aae14e5d468d18dbb5d7c0c8c7e2968cc14aad upstream. Clang warns: ../arch/powerpc/boot/4xx.c:231:3: warning: misleading indentation; statement is not part of the previous 'else' [-Wmisleading-indentation] val = SDRAM0_READ(DDR0_42); ^ ../arch/powerpc/boot/4xx.c:227:2: note: previous statement is here else ^ This is because there is a space at the beginning of this line; remove it so that the indentation is consistent according to the Linux kernel coding style and clang no longer warns. Fixes: d23f5099297c ("[POWERPC] 4xx: Adds decoding of 440SPE memory size to boot wrapper library") Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/780 Link: https://lore.kernel.org/r/20191209200338.12546-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/4xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index f7da65169124..3c8774163c7e 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -232,7 +232,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) From c4ceea2c97fc0517571e563d8fedd80231b3895e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:21:52 -0700 Subject: [PATCH 3131/3715] NFC: pn544: Adjust indentation in pn544_hci_check_presence commit 5080832627b65e3772a35d1dced68c64e2b24442 upstream. Clang warns ../drivers/nfc/pn544/pn544.c:696:4: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, ^ ../drivers/nfc/pn544/pn544.c:692:3: note: previous statement is here if (target->nfcid1_len != 4 && target->nfcid1_len != 7 && ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: da052850b911 ("NFC: Add pn544 presence check for different targets") Link: https://github.com/ClangBuiltLinux/linux/issues/814 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn544/pn544.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index 70e898e38b16..f30bdf95610f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -704,7 +704,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | From e94b702430344fb4e0bf2dedcad1f2f17cfbfa39 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Dec 2019 15:38:59 -0700 Subject: [PATCH 3132/3715] ppp: Adjust indentation into ppp_async_input commit 08cbc75f96029d3092664213a844a5e25523aa35 upstream. Clang warns: ../drivers/net/ppp/ppp_async.c:877:6: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] ap->rpkt = skb; ^ ../drivers/net/ppp/ppp_async.c:875:5: note: previous statement is here if (!skb) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Clean up this entire block's indentation so that it is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 6722e78c9005 ("[PPP]: handle misaligned accesses") Link: https://github.com/ClangBuiltLinux/linux/issues/800 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_async.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 814fd8fae67d..297a986e6653 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -878,15 +878,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } From 94268998e37bc216e122217716e0c6bd64b0f567 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Dec 2019 14:50:27 -0700 Subject: [PATCH 3133/3715] net: smc911x: Adjust indentation in smc911x_phy_configure commit 5c61e223004b3b5c3f1dd25718e979bc17a3b12d upstream. Clang warns: ../drivers/net/ethernet/smsc/smc911x.c:939:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (!lp->ctl_rfduplx) ^ ../drivers/net/ethernet/smsc/smc911x.c:936:2: note: previous statement is here if (lp->ctl_rspeed != 100) ^ 1 warning generated. This warning occurs because there is a space after the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: 0a0c72c9118c ("[PATCH] RE: [PATCH 1/1] net driver: Add support for SMSC LAN911x line of ethernet chips") Link: https://github.com/ClangBuiltLinux/linux/issues/796 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/smsc/smc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 42d35a87bcc9..f4f52a64f450 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -948,7 +948,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ From 0b8e15f035ecc7dfc27e3cefc9c2833c166634ab Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Dec 2019 14:16:23 -0700 Subject: [PATCH 3134/3715] net: tulip: Adjust indentation in {dmfe, uli526x}_init_module commit fe06bf3d83ef0d92f35a24e03297172e92ce9ce3 upstream. Clang warns: ../drivers/net/ethernet/dec/tulip/uli526x.c:1812:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] switch (mode) { ^ ../drivers/net/ethernet/dec/tulip/uli526x.c:1809:2: note: previous statement is here if (cr6set) ^ 1 warning generated. ../drivers/net/ethernet/dec/tulip/dmfe.c:2217:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] switch(mode) { ^ ../drivers/net/ethernet/dec/tulip/dmfe.c:2214:2: note: previous statement is here if (cr6set) ^ 1 warning generated. This warning occurs because there is a space before the tab on these lines. Remove them so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. While we are here, adjust the default block in dmfe_init_module to have a proper break between the label and assignment and add a space between the switch and opening parentheses to avoid a checkpatch warning. Fixes: e1c3e5014040 ("[PATCH] initialisation cleanup for ULI526x-net-driver") Link: https://github.com/ClangBuiltLinux/linux/issues/795 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/dec/tulip/dmfe.c | 7 ++++--- drivers/net/ethernet/dec/tulip/uli526x.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 07e10a45beaa..cd5309668186 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2224,15 +2224,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 7fc248efc4ba..9779555eea25 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1819,8 +1819,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: From 47e1f185a32a1fc79344ab3532f8c97bc3e2a420 Mon Sep 17 00:00:00 2001 From: Prabhath Sajeepa Date: Thu, 12 Dec 2019 17:11:29 -0700 Subject: [PATCH 3135/3715] IB/mlx5: Fix outstanding_pi index for GSI qps commit b5671afe5e39ed71e94eae788bacdcceec69db09 upstream. Commit b0ffeb537f3a ("IB/mlx5: Fix iteration overrun in GSI qps") changed the way outstanding WRs are tracked for the GSI QP. But the fix did not cover the case when a call to ib_post_send() fails and updates index to track outstanding. Since the prior commmit outstanding_pi should not be bounded otherwise the loop generate_completions() will fail. Fixes: b0ffeb537f3a ("IB/mlx5: Fix iteration overrun in GSI qps") Link: https://lore.kernel.org/r/1576195889-23527-1-git-send-email-psajeepa@purestorage.com Signed-off-by: Prabhath Sajeepa Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/gsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 79e6309460dc..262c18b2f525 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); From fc18aad70b27ccd194aae04aa91a7c2eecbe0835 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 22 Dec 2019 14:46:48 +0200 Subject: [PATCH 3136/3715] IB/core: Fix ODP get user pages flow commit d07de8bd1709a80a282963ad7b2535148678a9e4 upstream. The nr_pages argument of get_user_pages_remote() should always be in terms of the system page size, not the MR page size. Use PAGE_SIZE instead of umem_odp->page_shift. Fixes: 403cd12e2cf7 ("IB/umem: Add contiguous ODP support") Link: https://lore.kernel.org/r/20191222124649.52300-3-leon@kernel.org Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem_odp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 55e8f5ed8b3c..57b41125b146 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -637,7 +637,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); From 7558ea7f5ef69b48474ce3a1b468dde3e0bc7e5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Nov 2019 22:32:20 +0100 Subject: [PATCH 3137/3715] nfsd: fix delay timer on 32-bit architectures commit 2561c92b12f4f4e386d453556685f75775c0938b upstream. The nfsd4_cb_layout_done() function takes a 'time_t' value, multiplied by NSEC_PER_SEC*2 to get a nanosecond value. This works fine on 64-bit architectures, but on 32-bit, any value over 1 second results in a signed integer overflow with unexpected results. Cast one input to a 64-bit type in order to produce the same result that we have on 64-bit architectures, regarless of the type of nfsd4_lease. Fixes: 6b9b21073d3b ("nfsd: give up on CB_LAYOUTRECALLs after two lease periods") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4layouts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ea45d954e8d7..99add0cf20ff 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -683,7 +683,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ From a0f808e5594127631d07e4059e59450afe99ea8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Nov 2019 14:43:17 +0100 Subject: [PATCH 3138/3715] nfsd: fix jiffies/time_t mixup in LRU list commit 9594497f2c78993cb66b696122f7c65528ace985 upstream. The nfsd4_blocked_lock->nbl_time timestamp is recorded in jiffies, but then compared to a CLOCK_REALTIME timestamp later on, which makes no sense. For consistency with the other timestamps, change this to use a time_t. This is a change in behavior, which may cause regressions, but the current code is not sensible. On a system with CONFIG_HZ=1000, the 'time_after((unsigned long)nbl->nbl_time, (unsigned long)cutoff))' check is false for roughly the first 18 days of uptime and then true for the next 49 days. Fixes: 7919d0a27f1e ("nfsd: add a LRU list for blocked locks") Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fc13236d1be1..fca8b2e7fbeb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6040,7 +6040,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 133d8bf62a5c..7872b1ead885 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -591,7 +591,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; From 9ab31c4b1d7be0acea4fc14f7687e5d90d1321de Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 23 Oct 2019 11:58:12 +0200 Subject: [PATCH 3139/3715] ubi: fastmap: Fix inverted logic in seen selfcheck commit ef5aafb6e4e9942a28cd300bdcda21ce6cbaf045 upstream. set_seen() sets the bit corresponding to the PEB number in the bitmap, so when self_check_seen() wants to find PEBs that haven't been seen we have to print the PEBs that have their bit cleared, not the ones which have it set. Fixes: 5d71afb00840 ("ubi: Use bitmaps in Fastmap self-check code") Signed-off-by: Sascha Hauer Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/fastmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 63e8527f7b65..8e0398417650 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -73,7 +73,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { + if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } From c91730d38fd7f8151eb6f9d4a4f39d06170d3d40 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 13 Jan 2020 16:23:46 +0300 Subject: [PATCH 3140/3715] ubi: Fix an error pointer dereference in error handling code commit 5d3805af279c93ef49a64701f35254676d709622 upstream. If "seen_pebs = init_seen(ubi);" fails then "seen_pebs" is an error pointer and we try to kfree() it which results in an Oops. This patch re-arranges the error handling so now it only frees things which have been allocated successfully. Fixes: daef3dd1f0ae ("UBI: Fastmap: Add self check to detect absent PEBs") Signed-off-by: Dan Carpenter Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/fastmap.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 8e0398417650..18aba1cf8acc 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1147,7 +1147,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - unsigned long *seen_pebs = NULL; + unsigned long *seen_pebs; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); @@ -1161,7 +1161,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID); if (!dvbuf) { ret = -ENOMEM; - goto out_kfree; + goto out_free_avbuf; } avhdr = ubi_get_vid_hdr(avbuf); @@ -1170,7 +1170,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, seen_pebs = init_seen(ubi); if (IS_ERR(seen_pebs)) { ret = PTR_ERR(seen_pebs); - goto out_kfree; + goto out_free_dvbuf; } spin_lock(&ubi->volumes_lock); @@ -1338,7 +1338,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf); if (ret) { ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); - goto out_kfree; + goto out_free_seen; } for (i = 0; i < new_fm->used_blocks; i++) { @@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write vid_hdr to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1370,7 +1370,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write fastmap to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1380,10 +1380,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = self_check_seen(ubi, seen_pebs); dbg_bld("fastmap written!"); -out_kfree: - ubi_free_vid_buf(avbuf); - ubi_free_vid_buf(dvbuf); +out_free_seen: free_seen(seen_pebs); +out_free_dvbuf: + ubi_free_vid_buf(dvbuf); +out_free_avbuf: + ubi_free_vid_buf(avbuf); + out: return ret; } From 31aa47d4e26083842729dd480593a743e94c315a Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Wed, 8 Jan 2020 10:57:02 +0100 Subject: [PATCH 3141/3715] mfd: da9062: Fix watchdog compatible string commit 1112ba02ff1190ca9c15a912f9269e54b46d2d82 upstream. The watchdog driver compatible is "dlg,da9062-watchdog" and not "dlg,da9062-wdt". Therefore the mfd-core can't populate the of_node and fwnode. As result the watchdog driver can't parse the devicetree. Fixes: 9b40b030c4ad ("mfd: da9062: Supply core driver") Signed-off-by: Marco Felsch Acked-by: Guenter Roeck Reviewed-by: Adam Thomson Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/da9062-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c index fe1811523e4a..eff6ae5073c8 100644 --- a/drivers/mfd/da9062-core.c +++ b/drivers/mfd/da9062-core.c @@ -257,7 +257,7 @@ static const struct mfd_cell da9062_devs[] = { .name = "da9062-watchdog", .num_resources = ARRAY_SIZE(da9062_wdt_resources), .resources = da9062_wdt_resources, - .of_compatible = "dlg,da9062-wdt", + .of_compatible = "dlg,da9062-watchdog", }, { .name = "da9062-thermal", From c338e06f4ae39cb2cc1f14de6068b643a24049ea Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 17 Jan 2020 22:59:22 +0100 Subject: [PATCH 3142/3715] mfd: rn5t618: Mark ADC control register volatile commit 2f3dc25c0118de03a00ddc88b61f7216854f534d upstream. There is a bit which gets cleared after conversion. Fixes: 9bb9e29c78f8 ("mfd: Add Ricoh RN5T618 PMIC core driver") Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/rn5t618.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index f4037d42a60f..dd4251f105e0 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -32,6 +32,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) case RN5T618_WATCHDOGCNT: case RN5T618_DCIRQ: case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL: + case RN5T618_ADCCNT3: case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3: case RN5T618_IR_GPR: case RN5T618_IR_GPF: From ad998ed96e150faed0d0f148d12d66dd6ce20e9f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 6 Feb 2020 11:23:52 -0800 Subject: [PATCH 3143/3715] net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port [ Upstream commit de34d7084edd069dac5aa010cfe32bd8c4619fa6 ] The 7445 switch clocking profiles do not allow us to run the IMP port at 2Gb/sec in a way that it is reliable and consistent. Make sure that the setting is only applied to the 7278 family. Fixes: 8f1880cbe8d0 ("net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 05440b727261..747062f04bb5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -137,7 +137,9 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G); + reg |= (MII_SW_OR | LINK_STS); + if (priv->type == BCM7278_DEVICE_ID) + reg |= GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ From e79fbd72dca656e3c8f89bb4ebbcc369f5e40774 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 4 Feb 2020 11:10:12 -0800 Subject: [PATCH 3144/3715] net_sched: fix a resource leak in tcindex_set_parms() [ Upstream commit 52b5ae501c045010aeeb1d5ac0373ff161a88291 ] Jakub noticed there is a potential resource leak in tcindex_set_parms(): when tcindex_filter_result_init() fails and it jumps to 'errout1' which doesn't release the memory and resources allocated by tcindex_alloc_perfect_hash(). We should just jump to 'errout_alloc' which calls tcindex_free_perfect_hash(). Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Reported-by: Jakub Kicinski Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 10df2c12a3fb..796b4e1beb12 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -383,7 +383,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result); if (err < 0) - goto errout1; + goto errout_alloc; if (old_r) cr = r->res; @@ -502,7 +502,6 @@ errout_alloc: tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); -errout1: tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); From 1d4754d49fb86f8521c9b3c99242daae80e3776d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 5 Feb 2020 12:32:04 -0800 Subject: [PATCH 3145/3715] net: systemport: Avoid RBUF stuck in Wake-on-LAN mode [ Upstream commit 263a425a482fc495d6d3f9a29b9103a664c38b69 ] After a number of suspend and resume cycles, it is possible for the RBUF to be stuck in Wake-on-LAN mode, despite the MPD enable bit being cleared which instructed the RBUF to exit that mode. Avoid creating that problematic condition by clearing the RX_EN and TX_EN bits in the UniMAC prior to disable the Magic Packet Detector logic which is guaranteed to make the RBUF exit Wake-on-LAN mode. Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 69b2f99b0c19..f48f7d104af2 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2329,6 +2329,9 @@ static int bcm_sysport_resume(struct device *d) umac_reset(priv); + /* Disable the UniMAC RX/TX */ + umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0); + /* We may have been suspended and never received a WOL event that * would turn off MPD detection, take care of that now */ From 03d7740a4c3925f05de82f61b3fe50359280e146 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 5 Feb 2020 18:08:11 +0530 Subject: [PATCH 3146/3715] net: macb: Remove unnecessary alignment check for TSO [ Upstream commit 41c1ef978c8d0259c6636e6d2d854777e92650eb ] The IP TSO implementation does NOT require the length to be a multiple of 8. That is only a requirement for UFO as per IP documentation. Hence, exit macb_features_check function in the beginning if the protocol is not UDP. Only when it is UDP, proceed further to the alignment checks. Update comments to reflect the same. Also remove dead code checking for protocol TCP when calculating header length. Fixes: 1629dd4f763c ("cadence: Add LSO support.") Signed-off-by: Harini Katakam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5aff1b460151..a29102d4644f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1577,16 +1577,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb, /* Validate LSO compatibility */ - /* there is only one buffer */ - if (!skb_is_nonlinear(skb)) + /* there is only one buffer or protocol is not UDP */ + if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP)) return features; /* length of header */ hdrlen = skb_transport_offset(skb); - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - hdrlen += tcp_hdrlen(skb); - /* For LSO: + /* For UFO only: * When software supplies two or more payload buffers all payload buffers * apart from the last must be a multiple of 8 bytes in size. */ From 196491f360855e6042dc372bb303b69794fefb99 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 5 Feb 2020 18:08:12 +0530 Subject: [PATCH 3147/3715] net: macb: Limit maximum GEM TX length in TSO [ Upstream commit f822e9c4ffa511a5c681cf866287d9383a3b6f1b ] GEM_MAX_TX_LEN currently resolves to 0x3FF8 for any IP version supporting TSO with full 14bits of length field in payload descriptor. But an IP errata causes false amba_error (bit 6 of ISR) when length in payload descriptors is specified above 16387. The error occurs because the DMA falsely concludes that there is not enough space in SRAM for incoming payload. These errors were observed continuously under stress of large packets using iperf on a version where SRAM was 16K for each queue. This errata will be documented shortly and affects all versions since TSO functionality was added. Hence limit the max length to 0x3FC0 (rounded). Signed-off-by: Harini Katakam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a29102d4644f..b01b242c2bf0 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -66,7 +66,11 @@ /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) -#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) +/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a + * false amba_error in TX path from the DMA assuming there is not enough + * space in the SRAM (16KB) even when there is. + */ +#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU #define MACB_NETIF_LSO NETIF_F_TSO From 3576bb20416f9f34a75b492c6c969e8446aad24e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Feb 2020 19:26:05 -0800 Subject: [PATCH 3148/3715] bonding/alb: properly access headers in bond_alb_xmit() [ Upstream commit 38f88c45404293bbc027b956def6c10cbd45c616 ] syzbot managed to send an IPX packet through bond_alb_xmit() and af_packet and triggered a use-after-free. First, bond_alb_xmit() was using ipx_hdr() helper to reach the IPX header, but ipx_hdr() was using the transport offset instead of the network offset. In the particular syzbot report transport offset was 0xFFFF This patch removes ipx_hdr() since it was only (mis)used from bonding. Then we need to make sure IPv4/IPv6/IPX headers are pulled in skb->head before dereferencing anything. BUG: KASAN: use-after-free in bond_alb_xmit+0x153a/0x1590 drivers/net/bonding/bond_alb.c:1452 Read of size 2 at addr ffff8801ce56dfff by task syz-executor.2/18108 (if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) ...) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: [] __dump_stack lib/dump_stack.c:17 [inline] [] dump_stack+0x14d/0x20b lib/dump_stack.c:53 [] print_address_description+0x6f/0x20b mm/kasan/report.c:282 [] kasan_report_error mm/kasan/report.c:380 [inline] [] kasan_report mm/kasan/report.c:438 [inline] [] kasan_report.cold+0x8c/0x2a0 mm/kasan/report.c:422 [] __asan_report_load_n_noabort+0xf/0x20 mm/kasan/report.c:469 [] bond_alb_xmit+0x153a/0x1590 drivers/net/bonding/bond_alb.c:1452 [] __bond_start_xmit drivers/net/bonding/bond_main.c:4199 [inline] [] bond_start_xmit+0x4f4/0x1570 drivers/net/bonding/bond_main.c:4224 [] __netdev_start_xmit include/linux/netdevice.h:4525 [inline] [] netdev_start_xmit include/linux/netdevice.h:4539 [inline] [] xmit_one net/core/dev.c:3611 [inline] [] dev_hard_start_xmit+0x168/0x910 net/core/dev.c:3627 [] __dev_queue_xmit+0x1f55/0x33b0 net/core/dev.c:4238 [] dev_queue_xmit+0x18/0x20 net/core/dev.c:4278 [] packet_snd net/packet/af_packet.c:3226 [inline] [] packet_sendmsg+0x4919/0x70b0 net/packet/af_packet.c:3252 [] sock_sendmsg_nosec net/socket.c:673 [inline] [] sock_sendmsg+0x12c/0x160 net/socket.c:684 [] __sys_sendto+0x262/0x380 net/socket.c:1996 [] SYSC_sendto net/socket.c:2008 [inline] [] SyS_sendto+0x40/0x60 net/socket.c:2004 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_alb.c | 44 ++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 60666db31886..755d588bbcb1 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1403,26 +1403,31 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; - struct ipv6hdr *ip6hdr; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || - (iph->daddr == ip_bcast) || - (iph->protocol == IPPROTO_IGMP)) { + (!pskb_network_may_pull(skb, sizeof(*iph)))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ @@ -1439,7 +1444,11 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - /* Additianally, DAD probes should not be tx-balanced as that + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + do_tx_balance = false; + break; + } + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ @@ -1449,17 +1458,26 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { + } + case ETH_P_IPX: { + const struct ipxhdr *ipxhdr; + + if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { + do_tx_balance = false; + break; + } + ipxhdr = (struct ipxhdr *)skb_network_header(skb); + + if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ do_tx_balance = false; break; } - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { + if (ipxhdr->ipx_type != IPX_TYPE_NCP) { /* The only protocol worth balancing in * this family since it has an "ARP" like * mechanism @@ -1468,9 +1486,11 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } + eth_data = eth_hdr(skb); hash_start = (char *)eth_data->h_dest; hash_size = ETH_ALEN; break; + } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) From 7be41b349ca90dcbdea4bb33e212f977c76b4040 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Dec 2019 12:11:49 -0600 Subject: [PATCH 3149/3715] ext4: fix deadlock allocating crypto bounce page from mempool [ Upstream commit 547c556f4db7c09447ecf5f833ab6aaae0c5ab58 ] ext4_writepages() on an encrypted file has to encrypt the data, but it can't modify the pagecache pages in-place, so it encrypts the data into bounce pages and writes those instead. All bounce pages are allocated from a mempool using GFP_NOFS. This is not correct use of a mempool, and it can deadlock. This is because GFP_NOFS includes __GFP_DIRECT_RECLAIM, which enables the "never fail" mode for mempool_alloc() where a failed allocation will fall back to waiting for one of the preallocated elements in the pool. But since this mode is used for all a bio's pages and not just the first, it can deadlock waiting for pages already in the bio to be freed. This deadlock can be reproduced by patching mempool_alloc() to pretend that pool->alloc() always fails (so that it always falls back to the preallocations), and then creating an encrypted file of size > 128 KiB. Fix it by only using GFP_NOFS for the first page in the bio. For subsequent pages just use GFP_NOWAIT, and if any of those fail, just submit the bio and start a new one. This will need to be fixed in f2fs too, but that's less straightforward. Fixes: c9af28fdd449 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20191231181149.47619-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/page-io.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index db7590178dfc..9cc79b7b0df1 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -481,17 +481,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io, nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; retry_encrypt: data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, page->index, gfp_flags); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { - if (io->io_bio) { + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_flags = GFP_NOFS; + if (io->io_bio) ext4_io_submit(io); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } - gfp_flags |= __GFP_NOFAIL; + else + gfp_flags |= __GFP_NOFAIL; + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry_encrypt; } data_page = NULL; From cf1569db065f8c9cb9f0c73ff8951f4071b29976 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 6 Jun 2018 15:41:49 +0800 Subject: [PATCH 3150/3715] btrfs: Get rid of the confusing btrfs_file_extent_inline_len [ Upstream commit e41ca5897489b1c18af75ff0cc8f5c80260b3281 ] We used to call btrfs_file_extent_inline_len() to get the uncompressed data size of an inlined extent. However this function is hiding evil, for compressed extent, it has no choice but to directly read out ram_bytes from btrfs_file_extent_item. While for uncompressed extent, it uses item size to calculate the real data size, and ignoring ram_bytes completely. In fact, for corrupted ram_bytes, due to above behavior kernel btrfs_print_leaf() can't even print correct ram_bytes to expose the bug. Since we have the tree-checker to verify all EXTENT_DATA, such mismatch can be detected pretty easily, thus we can trust ram_bytes without the evil btrfs_file_extent_inline_len(). Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.h | 26 -------------------------- fs/btrfs/file-item.c | 2 +- fs/btrfs/file.c | 3 +-- fs/btrfs/inode.c | 12 ++++++------ fs/btrfs/print-tree.c | 4 ++-- fs/btrfs/send.c | 17 +++++++---------- fs/btrfs/tree-log.c | 12 ++++-------- include/trace/events/btrfs.h | 2 +- 8 files changed, 22 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 588760c49fe2..664710848e6f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2408,32 +2408,6 @@ static inline u32 btrfs_file_extent_inline_item_len( return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; } -/* this returns the number of file bytes represented by the inline item. - * If an item is compressed, this is the uncompressed size - */ -static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, - int slot, - const struct btrfs_file_extent_item *fi) -{ - struct btrfs_map_token token; - - btrfs_init_map_token(&token); - /* - * return the space used on disk if this item isn't - * compressed or encoded - */ - if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 && - btrfs_token_file_extent_encryption(eb, fi, &token) == 0 && - btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) { - return btrfs_file_extent_inline_item_len(eb, - btrfs_item_nr(slot)); - } - - /* otherwise use the ram bytes field */ - return btrfs_token_file_extent_ram_bytes(eb, fi, &token); -} - - /* btrfs_dev_stats_item */ static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, const struct btrfs_dev_stats_item *ptr, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fdcb41002623..702b3606ad0e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -955,7 +955,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, btrfs_file_extent_num_bytes(leaf, fi); } else if (type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, slot, fi); + size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c68ce3412dc1..725544ec9c84 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -784,8 +784,7 @@ next_slot: btrfs_file_extent_num_bytes(leaf, fi); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); } else { /* can't happen */ BUG(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f2dc517768f0..abecc4724a3b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1476,8 +1476,7 @@ next_slot: nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_end, fs_info->sectorsize); } else { @@ -4651,8 +4650,8 @@ search_again: BTRFS_I(inode), leaf, fi, found_key.offset); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + item_end += btrfs_file_extent_ram_bytes(leaf, + fi); trace_btrfs_truncate_show_fi_inline( BTRFS_I(inode), leaf, fi, path->slots[0], @@ -7167,7 +7166,8 @@ again: extent_start); } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + + size = btrfs_file_extent_ram_bytes(leaf, item); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); @@ -7218,7 +7218,7 @@ next: if (new_inline) goto out; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + size = btrfs_file_extent_ram_bytes(leaf, item); extent_offset = page_offset(page) + pg_offset - extent_start; copy_size = min_t(u64, PAGE_SIZE - pg_offset, size - extent_offset); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 569205e651c7..47336d4b19d8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -259,8 +259,8 @@ void btrfs_print_leaf(struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - pr_info("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, i, fi)); + pr_info("\t\tinline extent data size %llu\n", + btrfs_file_extent_ram_bytes(l, fi)); break; } pr_info("\t\textent data disk bytenr %llu nr %llu\n", diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1211fdcd425d..ca15d65a2070 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1545,7 +1545,7 @@ static int read_symlink(struct btrfs_root *root, BUG_ON(compression); off = btrfs_file_extent_inline_start(ei); - len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); @@ -5195,7 +5195,7 @@ static int clone_range(struct send_ctx *sctx, ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); + ext_len = btrfs_file_extent_ram_bytes(leaf, ei); ext_len = PAGE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); @@ -5271,8 +5271,7 @@ static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); /* * it is possible the inline item won't cover the whole page, * but there may be items after this page. Make @@ -5405,7 +5404,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, } if (right_type == BTRFS_FILE_EXTENT_INLINE) { - right_len = btrfs_file_extent_inline_len(eb, slot, ei); + right_len = btrfs_file_extent_ram_bytes(eb, ei); right_len = PAGE_ALIGN(right_len); } else { right_len = btrfs_file_extent_num_bytes(eb, ei); @@ -5526,8 +5525,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key.offset + size, sctx->send_root->fs_info->sectorsize); } else { @@ -5590,7 +5588,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(leaf, slot, fi); + u64 size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(key.offset + size, root->fs_info->sectorsize); @@ -5636,8 +5634,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key->offset + size, sctx->send_root->fs_info->sectorsize); } else { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 98c397eb054c..65a986054f89 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -619,7 +619,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (btrfs_file_extent_disk_bytenr(eb, item) == 0) nbytes = 0; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - size = btrfs_file_extent_inline_len(eb, slot, item); + size = btrfs_file_extent_ram_bytes(eb, item); nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, fs_info->sectorsize); @@ -3943,9 +3943,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, - src_path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(src, extent); *last_extent = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4010,7 +4008,7 @@ fill_holes: extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, i, extent); + len = btrfs_file_extent_ram_bytes(src, extent); extent_end = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4730,9 +4728,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(leaf, - path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(leaf, extent); ASSERT(len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 32d0c1fe2bfa..3ebada29a313 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -325,7 +325,7 @@ DECLARE_EVENT_CLASS( __entry->extent_type = btrfs_file_extent_type(l, fi); __entry->compression = btrfs_file_extent_compression(l, fi); __entry->extent_start = start; - __entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi)); + __entry->extent_end = (start + btrfs_file_extent_ram_bytes(l, fi)); ), TP_printk_btrfs( From 8a024c09cb4364fd2c7d676c1df26a975875b480 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 19 Mar 2019 17:18:13 +0000 Subject: [PATCH 3151/3715] Btrfs: fix assertion failure on fsync with NO_HOLES enabled [ Upstream commit 0ccc3876e4b2a1559a4dbe3126dda4459d38a83b ] Back in commit a89ca6f24ffe4 ("Btrfs: fix fsync after truncate when no_holes feature is enabled") I added an assertion that is triggered when an inline extent is found to assert that the length of the (uncompressed) data the extent represents is the same as the i_size of the inode, since that is true most of the time I couldn't find or didn't remembered about any exception at that time. Later on the assertion was expanded twice to deal with a case of a compressed inline extent representing a range that matches the sector size followed by an expanding truncate, and another case where fallocate can update the i_size of the inode without adding or updating existing extents (if the fallocate range falls entirely within the first block of the file). These two expansion/fixes of the assertion were done by commit 7ed586d0a8241 ("Btrfs: fix assertion on fsync of regular file when using no-holes feature") and commit 6399fb5a0b69a ("Btrfs: fix assertion failure during fsync in no-holes mode"). These however missed the case where an falloc expands the i_size of an inode to exactly the sector size and inline extent exists, for example: $ mkfs.btrfs -f -O no-holes /dev/sdc $ mount /dev/sdc /mnt $ xfs_io -f -c "pwrite -S 0xab 0 1096" /mnt/foobar wrote 1096/1096 bytes at offset 0 1 KiB, 1 ops; 0.0002 sec (4.448 MiB/sec and 4255.3191 ops/sec) $ xfs_io -c "falloc 1096 3000" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar Segmentation fault $ dmesg [701253.602385] assertion failed: len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != BTRFS_COMPRESS_NONE) || (len < i_size && i_size < fs_info->sectorsize), file: fs/btrfs/tree-log.c, line: 4727 [701253.602962] ------------[ cut here ]------------ [701253.603224] kernel BUG at fs/btrfs/ctree.h:3533! [701253.603503] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [701253.603774] CPU: 2 PID: 7192 Comm: xfs_io Tainted: G W 5.0.0-rc8-btrfs-next-45 #1 [701253.604054] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [701253.604650] RIP: 0010:assfail.constprop.23+0x18/0x1a [btrfs] (...) [701253.605591] RSP: 0018:ffffbb48c186bc48 EFLAGS: 00010286 [701253.605914] RAX: 00000000000000de RBX: ffff921d0a7afc08 RCX: 0000000000000000 [701253.606244] RDX: 0000000000000000 RSI: ffff921d36b16868 RDI: ffff921d36b16868 [701253.606580] RBP: ffffbb48c186bcf0 R08: 0000000000000000 R09: 0000000000000000 [701253.606913] R10: 0000000000000003 R11: 0000000000000000 R12: ffff921d05d2de18 [701253.607247] R13: ffff921d03b54000 R14: 0000000000000448 R15: ffff921d059ecf80 [701253.607769] FS: 00007f14da906700(0000) GS:ffff921d36b00000(0000) knlGS:0000000000000000 [701253.608163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [701253.608516] CR2: 000056087ea9f278 CR3: 00000002268e8001 CR4: 00000000003606e0 [701253.608880] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [701253.609250] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [701253.609608] Call Trace: [701253.609994] btrfs_log_inode+0xdfb/0xe40 [btrfs] [701253.610383] btrfs_log_inode_parent+0x2be/0xa60 [btrfs] [701253.610770] ? do_raw_spin_unlock+0x49/0xc0 [701253.611150] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] [701253.611537] btrfs_sync_file+0x3b2/0x440 [btrfs] [701253.612010] ? do_sysinfo+0xb0/0xf0 [701253.612552] do_fsync+0x38/0x60 [701253.612988] __x64_sys_fsync+0x10/0x20 [701253.613360] do_syscall_64+0x60/0x1b0 [701253.613733] entry_SYSCALL_64_after_hwframe+0x49/0xbe [701253.614103] RIP: 0033:0x7f14da4e66d0 (...) [701253.615250] RSP: 002b:00007fffa670fdb8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [701253.615647] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f14da4e66d0 [701253.616047] RDX: 000056087ea9c260 RSI: 000056087ea9c260 RDI: 0000000000000003 [701253.616450] RBP: 0000000000000001 R08: 0000000000000020 R09: 0000000000000010 [701253.616854] R10: 000000000000009b R11: 0000000000000246 R12: 000056087ea9c260 [701253.617257] R13: 000056087ea9c240 R14: 0000000000000000 R15: 000056087ea9dd10 (...) [701253.619941] ---[ end trace e088d74f132b6da5 ]--- Updating the assertion again to allow for this particular case would result in a meaningless assertion, plus there is currently no risk of logging content that would result in any corruption after a log replay if the size of the data encoded in an inline extent is greater than the inode's i_size (which is not currently possibe either with or without compression), therefore just remove the assertion. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tree-log.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 65a986054f89..3558697e4c04 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4727,15 +4727,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(leaf, extent); - ASSERT(len == i_size || - (len == fs_info->sectorsize && - btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE) || - (len < i_size && i_size < fs_info->sectorsize)); + BTRFS_FILE_EXTENT_INLINE) return 0; - } len = btrfs_file_extent_num_bytes(leaf, extent); /* Last extent goes beyond i_size, no need to log a hole. */ From 1d5a59051c77b9ab36c1d03b709588f541a1e1ed Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 19 Nov 2019 12:07:33 +0000 Subject: [PATCH 3152/3715] Btrfs: fix missing hole after hole punching and fsync when using NO_HOLES [ Upstream commit 0e56315ca147b3e60c7bf240233a301d3c7fb508 ] When using the NO_HOLES feature, if we punch a hole into a file and then fsync it, there are cases where a subsequent fsync will miss the fact that a hole was punched, resulting in the holes not existing after replaying the log tree. Essentially these cases all imply that, tree-log.c:copy_items(), is not invoked for the leafs that delimit holes, because nothing changed those leafs in the current transaction. And it's precisely copy_items() where we currenly detect and log holes, which works as long as the holes are between file extent items in the input leaf or between the beginning of input leaf and the previous leaf or between the last item in the leaf and the next leaf. First example where we miss a hole: *) The extent items of the inode span multiple leafs; *) The punched hole covers a range that affects only the extent items of the first leaf; *) The fsync operation is done in full mode (BTRFS_INODE_NEEDS_FULL_SYNC is set in the inode's runtime flags). That results in the hole not existing after replaying the log tree. For example, if the fs/subvolume tree has the following layout for a particular inode: Leaf N, generation 10: [ ... INODE_ITEM INODE_REF EXTENT_ITEM (0 64K) EXTENT_ITEM (64K 128K) ] Leaf N + 1, generation 10: [ EXTENT_ITEM (128K 64K) ... ] If at transaction 11 we punch a hole coverting the range [0, 128K[, we end up dropping the two extent items from leaf N, but we don't touch the other leaf, so we end up in the following state: Leaf N, generation 11: [ ... INODE_ITEM INODE_REF ] Leaf N + 1, generation 10: [ EXTENT_ITEM (128K 64K) ... ] A full fsync after punching the hole will only process leaf N because it was modified in the current transaction, but not leaf N + 1, since it was not modified in the current transaction (generation 10 and not 11). As a result the fsync will not log any holes, because it didn't process any leaf with extent items. Second example where we will miss a hole: *) An inode as its items spanning 5 (or more) leafs; *) A hole is punched and it covers only the extents items of the 3rd leaf. This resulsts in deleting the entire leaf and not touching any of the other leafs. So the only leaf that is modified in the current transaction, when punching the hole, is the first leaf, which contains the inode item. During the full fsync, the only leaf that is passed to copy_items() is that first leaf, and that's not enough for the hole detection code in copy_items() to determine there's a hole between the last file extent item in the 2nd leaf and the first file extent item in the 3rd leaf (which was the 4th leaf before punching the hole). Fix this by scanning all leafs and punch holes as necessary when doing a full fsync (less common than a non-full fsync) when the NO_HOLES feature is enabled. The lack of explicit file extent items to mark holes makes it necessary to scan existing extents to determine if holes exist. A test case for fstests follows soon. Fixes: 16e7549f045d33 ("Btrfs: incompatible format change to remove hole extents") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tree-log.c | 388 ++++++++++++-------------------------------- 1 file changed, 100 insertions(+), 288 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3558697e4c04..0b62c8080af0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3758,7 +3758,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, - struct btrfs_path *src_path, u64 *last_extent, + struct btrfs_path *src_path, int start_slot, int nr, int inode_only, u64 logged_isize) { @@ -3769,7 +3769,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; - struct btrfs_key first_key, last_key, key; int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -3777,9 +3776,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int i; struct list_head ordered_sums; int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; - bool has_extents = false; - bool need_find_last_extent = true; - bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3788,8 +3784,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!ins_data) return -ENOMEM; - first_key.objectid = (u64)-1; - ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -3810,9 +3804,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if (i == nr - 1) - last_key = ins_keys[i]; - if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_path->slots[0], @@ -3826,20 +3817,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset, ins_sizes[i]); } - /* - * We set need_find_last_extent here in case we know we were - * processing other items and then walk into the first extent in - * the inode. If we don't hit an extent then nothing changes, - * we'll do the last search the next time around. - */ - if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { - has_extents = true; - if (first_key.objectid == (u64)-1) - first_key = ins_keys[i]; - } else { - need_find_last_extent = false; - } - /* take a reference on file data extents so that truncates * or deletes of this inode don't have to relog the inode * again @@ -3905,167 +3882,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, kfree(sums); } - if (!has_extents) - return ret; - - if (need_find_last_extent && *last_extent == first_key.offset) { - /* - * We don't have any leafs between our current one and the one - * we processed before that can have file extent items for our - * inode (and have a generation number smaller than our current - * transaction id). - */ - need_find_last_extent = false; - } - - /* - * Because we use btrfs_search_forward we could skip leaves that were - * not modified and then assume *last_extent is valid when it really - * isn't. So back up to the previous leaf and read the end of the last - * extent before we go and fill in holes. - */ - if (need_find_last_extent) { - u64 len; - - ret = btrfs_prev_leaf(inode->root, src_path); - if (ret < 0) - return ret; - if (ret) - goto fill_holes; - if (src_path->slots[0]) - src_path->slots[0]--; - src = src_path->nodes[0]; - btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - goto fill_holes; - extent = btrfs_item_ptr(src, src_path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - *last_extent = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - *last_extent = key.offset + len; - } - } -fill_holes: - /* So we did prev_leaf, now we need to move to the next leaf, but a few - * things could have happened - * - * 1) A merge could have happened, so we could currently be on a leaf - * that holds what we were copying in the first place. - * 2) A split could have happened, and now not all of the items we want - * are on the same leaf. - * - * So we need to adjust how we search for holes, we need to drop the - * path and re-search for the first extent key we found, and then walk - * forward until we hit the last one we copied. - */ - if (need_find_last_extent) { - /* btrfs_prev_leaf could return 1 without releasing the path */ - btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, inode->root, &first_key, - src_path, 0, 0); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = src_path->slots[0]; - } else { - i = start_slot; - } - - /* - * Ok so here we need to go through and fill in any holes we may have - * to make sure that holes are punched for those areas in case they had - * extents previously. - */ - while (!done) { - u64 offset, len; - u64 extent_end; - - if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = 0; - need_find_last_extent = true; - } - - btrfs_item_key_to_cpu(src, &key, i); - if (!btrfs_comp_cpu_keys(&key, &last_key)) - done = true; - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) { - i++; - continue; - } - extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - extent_end = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - extent_end = key.offset + len; - } - i++; - - if (*last_extent == key.offset) { - *last_extent = extent_end; - continue; - } - offset = *last_extent; - len = key.offset - *last_extent; - ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, 0, 0); - if (ret) - break; - *last_extent = extent_end; - } - - /* - * Check if there is a hole between the last extent found in our leaf - * and the first extent in the next leaf. If there is one, we need to - * log an explicit hole so that at replay time we can punch the hole. - */ - if (ret == 0 && - key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - i == btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - need_find_last_extent = true; - if (ret > 0) { - ret = 0; - } else if (ret == 0) { - btrfs_item_key_to_cpu(src_path->nodes[0], &key, - src_path->slots[0]); - if (key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - *last_extent < key.offset) { - const u64 len = key.offset - *last_extent; - - ret = btrfs_insert_file_extent(trans, log, - btrfs_ino(inode), - *last_extent, 0, - 0, len, 0, len, - 0, 0, 0); - *last_extent += len; - } - } - } - /* - * Need to let the callers know we dropped the path so they should - * re-search. - */ - if (!ret && need_find_last_extent) - ret = 1; return ret; } @@ -4338,7 +4154,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 i_size = i_size_read(&inode->vfs_inode); const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; - u64 last_extent = (u64)-1; + bool dropped_extents = false; int ins_nr = 0; int start_slot; int ret; @@ -4360,8 +4176,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); + start_slot, ins_nr, 1, 0); if (ret < 0) goto out; ins_nr = 0; @@ -4385,8 +4200,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (last_extent == (u64)-1) { - last_extent = key.offset; + if (!dropped_extents) { /* * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. @@ -4400,6 +4214,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } while (ret == -EAGAIN); if (ret) goto out; + dropped_extents = true; } if (ins_nr == 0) start_slot = slot; @@ -4414,7 +4229,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } } if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, start_slot, ins_nr, 1, 0); if (ret > 0) ret = 0; @@ -4608,13 +4423,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; ins_nr = 0; @@ -4638,13 +4448,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, cond_resched(); } if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; } @@ -4653,100 +4458,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } /* - * If the no holes feature is enabled we need to make sure any hole between the - * last extent and the i_size of our inode is explicitly marked in the log. This - * is to make sure that doing something like: - * - * 1) create file with 128Kb of data - * 2) truncate file to 64Kb - * 3) truncate file to 256Kb - * 4) fsync file - * 5) - * 6) mount fs and trigger log replay - * - * Will give us a file with a size of 256Kb, the first 64Kb of data match what - * the file had in its first 64Kb of data at step 1 and the last 192Kb of the - * file correspond to a hole. The presence of explicit holes in a log tree is - * what guarantees that log replay will remove/adjust file extent items in the - * fs/subvol tree. - * - * Here we do not need to care about holes between extents, that is already done - * by copy_items(). We also only need to do this in the full sync path, where we - * lookup for extents from the fs/subvol tree only. In the fast path case, we - * lookup the list of modified extent maps and if any represents a hole, we - * insert a corresponding extent representing a hole in the log tree. + * When using the NO_HOLES feature if we punched a hole that causes the + * deletion of entire leafs or all the extent items of the first leaf (the one + * that contains the inode item and references) we may end up not processing + * any extents, because there are no leafs with a generation matching the + * current transaction that have extent items for our inode. So we need to find + * if any holes exist and then log them. We also need to log holes after any + * truncate operation that changes the inode's size. */ -static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_inode *inode, - struct btrfs_path *path) +static int btrfs_log_holes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; - int ret; struct btrfs_key key; - u64 hole_start; - u64 hole_size; - struct extent_buffer *leaf; - struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); + u64 prev_extent_end = 0; + int ret; - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) return 0; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = (u64)-1; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - ASSERT(ret != 0); if (ret < 0) return ret; - ASSERT(path->slots[0] > 0); - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { - /* inode does not have any extents */ - hole_start = 0; - hole_size = i_size; - } else { + while (true) { struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf = path->nodes[0]; u64 len; - /* - * If there's an extent beyond i_size, an explicit hole was - * already inserted by copy_items(). - */ - if (key.offset >= i_size) - return 0; + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) + break; + + /* We have a hole, log it. */ + if (prev_extent_end < key.offset) { + const u64 hole_len = key.offset - prev_extent_end; + + /* + * Release the path to avoid deadlocks with other code + * paths that search the root while holding locks on + * leafs from the log root. + */ + btrfs_release_path(path); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, + 0, hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + + /* + * Search for the same key again in the root. Since it's + * an extent item and we are holding the inode lock, the + * key must still exist. If it doesn't just emit warning + * and return an error to fall back to a transaction + * commit. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (WARN_ON(ret > 0)) + return -ENOENT; + leaf = path->nodes[0]; + } extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) - return 0; + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + prev_extent_end = ALIGN(key.offset + len, + fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + prev_extent_end = key.offset + len; + } - len = btrfs_file_extent_num_bytes(leaf, extent); - /* Last extent goes beyond i_size, no need to log a hole. */ - if (key.offset + len > i_size) - return 0; - hole_start = key.offset + len; - hole_size = i_size - hole_start; + path->slots[0]++; + cond_resched(); } - btrfs_release_path(path); - /* Last extent ends at i_size. */ - if (hole_size == 0) - return 0; + if (prev_extent_end < i_size) { + u64 hole_len; - hole_size = ALIGN(hole_size, fs_info->sectorsize); - ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, - hole_size, 0, hole_size, 0, 0, 0); - return ret; + btrfs_release_path(path); + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, 0, + hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + } + + return 0; } /* @@ -4914,7 +4738,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *log = root->log_root; struct extent_buffer *src = NULL; LIST_HEAD(logged_list); - u64 last_extent = 0; int err = 0; int ret; int nritems; @@ -5088,7 +4911,7 @@ again: ins_start_slot = path->slots[0]; } ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { @@ -5142,17 +4965,13 @@ again: if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } ins_nr = 0; - if (ret) { - btrfs_release_path(path); - continue; - } goto next_slot; } @@ -5166,18 +4985,13 @@ again: goto next_slot; } - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - if (ret) { - ins_nr = 0; - btrfs_release_path(path); - continue; - } ins_nr = 1; ins_start_slot = path->slots[0]; next_slot: @@ -5191,13 +5005,12 @@ next_slot: } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } btrfs_release_path(path); @@ -5212,14 +5025,13 @@ next_key: } } if (ins_nr) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } @@ -5232,7 +5044,7 @@ next_key: if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } From 71f54d0173e947c09b5dfaf155bf3756afca5326 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Oct 2019 10:39:25 +0800 Subject: [PATCH 3153/3715] btrfs: use bool argument in free_root_pointers() [ Upstream commit 4273eaff9b8d5e141113a5bdf9628c02acf3afe5 ] We don't need int argument bool shall do in free_root_pointers(). And rename the argument as it confused two people. Reviewed-by: Qu Wenruo Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a8ea56218d6b..e5b0a027c213 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2051,7 +2051,7 @@ static void free_root_extent_buffers(struct btrfs_root *root) } /* helper to cleanup tree roots */ -static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) +static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) { free_root_extent_buffers(info->tree_root); @@ -2060,7 +2060,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_root_extent_buffers(info->csum_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); - if (chunk_root) + if (free_chunk_root) free_root_extent_buffers(info->chunk_root); free_root_extent_buffers(info->free_space_root); } @@ -3069,7 +3069,7 @@ fail_block_groups: btrfs_put_block_group_cache(fs_info); fail_tree_roots: - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: @@ -3097,7 +3097,7 @@ recovery_tree_root: if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) goto fail_tree_roots; - free_root_pointers(fs_info, 0); + free_root_pointers(fs_info, false); /* don't use the log in recovery mode, it won't be valid */ btrfs_set_super_log_root(disk_super, 0); @@ -3764,7 +3764,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfs_free_block_groups(fs_info); clear_bit(BTRFS_FS_OPEN, &fs_info->flags); - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); iput(fs_info->btree_inode); From 8d9ec3725592cb97012d3a2a4eedbd880538d7b2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 21 Jan 2020 09:17:06 -0500 Subject: [PATCH 3154/3715] btrfs: free block groups after free'ing fs trees [ Upstream commit 4e19443da1941050b346f8fc4c368aa68413bc88 ] Sometimes when running generic/475 we would trip the WARN_ON(cache->reserved) check when free'ing the block groups on umount. This is because sometimes we don't commit the transaction because of IO errors and thus do not cleanup the tree logs until at umount time. These blocks are still reserved until they are cleaned up, but they aren't cleaned up until _after_ we do the free block groups work. Fix this by moving the free after free'ing the fs roots, that way all of the tree logs are cleaned up and we have a properly cleaned fs. A bunch of loops of generic/475 confirmed this fixes the problem. CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e5b0a027c213..6c6c15fdeef7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3761,11 +3761,18 @@ void close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - btrfs_free_block_groups(fs_info); - clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); + /* + * We must free the block groups after dropping the fs_roots as we could + * have had an IO error and have left over tree log blocks that aren't + * cleaned up until the fs roots are freed. This makes the block group + * accounting appear to be wrong because there's pending reserved bytes, + * so make sure we do the block group cleanup afterwards. + */ + btrfs_free_block_groups(fs_info); + iput(fs_info->btree_inode); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY From 1066f7e4bcb93336df322ece6c1e468dd103c810 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 5 Mar 2018 15:43:41 +0100 Subject: [PATCH 3155/3715] btrfs: remove trivial locking wrappers of tree mod log [ Upstream commit b1a09f1ec540408abf3a50d15dff5d9506932693 ] The wrappers are trivial and do not bring any extra value on top of the plain locking primitives. Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 58 ++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 740ef428acdd..a7b9859449c5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -334,26 +334,6 @@ struct tree_mod_elem { struct tree_mod_root old_root; }; -static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) -{ - read_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) -{ - read_unlock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) -{ - write_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) -{ - write_unlock(&fs_info->tree_mod_log_lock); -} - /* * Pull a new tree mod seq number for our operation. */ @@ -373,14 +353,14 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { - tree_mod_log_write_lock(fs_info); + write_lock(&fs_info->tree_mod_log_lock); spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { elem->seq = btrfs_inc_tree_mod_seq(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } spin_unlock(&fs_info->tree_mod_seq_lock); - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); return elem->seq; } @@ -422,7 +402,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - tree_mod_log_write_lock(fs_info); + write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); @@ -432,7 +412,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, rb_erase(node, tm_root); kfree(tm); } - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); } /* @@ -443,7 +423,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * for root replace operations, or the logical address of the affected * block for all other operations. * - * Note: must be called with write lock (tree_mod_log_write_lock). + * Note: must be called with write lock for fs_info::tree_mod_log_lock. */ static noinline int __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) @@ -481,7 +461,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it * returns zero with the tree_mod_log_lock acquired. The caller must hold * this until all tree mod log insertions are recorded in the rb tree and then - * call tree_mod_log_write_unlock() to release. + * write unlock fs_info::tree_mod_log_lock. */ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { @@ -491,9 +471,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, if (eb && btrfs_header_level(eb) == 0) return 1; - tree_mod_log_write_lock(fs_info); + write_lock(&fs_info->tree_mod_log_lock); if (list_empty(&(fs_info)->tree_mod_seq_list)) { - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); return 1; } @@ -557,7 +537,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } ret = __tree_mod_log_insert(fs_info, tm); - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); if (ret) kfree(tm); @@ -621,7 +601,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, ret = __tree_mod_log_insert(fs_info, tm); if (ret) goto free_tms; - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); kfree(tm_list); return 0; @@ -632,7 +612,7 @@ free_tms: kfree(tm_list[i]); } if (locked) - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); kfree(tm_list); kfree(tm); @@ -713,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (!ret) ret = __tree_mod_log_insert(fs_info, tm); - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); if (ret) goto free_tms; kfree(tm_list); @@ -740,7 +720,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct tree_mod_elem *cur = NULL; struct tree_mod_elem *found = NULL; - tree_mod_log_read_lock(fs_info); + read_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { @@ -768,7 +748,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, break; } } - tree_mod_log_read_unlock(fs_info); + read_unlock(&fs_info->tree_mod_log_lock); return found; } @@ -849,7 +829,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, goto free_tms; } - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); kfree(tm_list); return 0; @@ -861,7 +841,7 @@ free_tms: kfree(tm_list[i]); } if (locked) - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); kfree(tm_list); return ret; @@ -921,7 +901,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) goto free_tms; ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems); - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); if (ret) goto free_tms; kfree(tm_list); @@ -1279,7 +1259,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); - tree_mod_log_read_lock(fs_info); + read_lock(&fs_info->tree_mod_log_lock); while (tm && tm->seq >= time_seq) { /* * all the operations are recorded with the operator used for @@ -1334,7 +1314,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (tm->logical != first_tm->logical) break; } - tree_mod_log_read_unlock(fs_info); + read_unlock(&fs_info->tree_mod_log_lock); btrfs_set_header_nritems(eb, n); } From f6eb1d580e7d11db285e781c4ddeec012d72bc1a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Jan 2020 12:23:20 +0000 Subject: [PATCH 3156/3715] Btrfs: fix race between adding and putting tree mod seq elements and nodes [ Upstream commit 7227ff4de55d931bbdc156c8ef0ce4f100c78a5b ] There is a race between adding and removing elements to the tree mod log list and rbtree that can lead to use-after-free problems. Consider the following example that explains how/why the problems happens: 1) Task A has mod log element with sequence number 200. It currently is the only element in the mod log list; 2) Task A calls btrfs_put_tree_mod_seq() because it no longer needs to access the tree mod log. When it enters the function, it initializes 'min_seq' to (u64)-1. Then it acquires the lock 'tree_mod_seq_lock' before checking if there are other elements in the mod seq list. Since the list it empty, 'min_seq' remains set to (u64)-1. Then it unlocks the lock 'tree_mod_seq_lock'; 3) Before task A acquires the lock 'tree_mod_log_lock', task B adds itself to the mod seq list through btrfs_get_tree_mod_seq() and gets a sequence number of 201; 4) Some other task, name it task C, modifies a btree and because there elements in the mod seq list, it adds a tree mod elem to the tree mod log rbtree. That node added to the mod log rbtree is assigned a sequence number of 202; 5) Task B, which is doing fiemap and resolving indirect back references, calls btrfs get_old_root(), with 'time_seq' == 201, which in turn calls tree_mod_log_search() - the search returns the mod log node from the rbtree with sequence number 202, created by task C; 6) Task A now acquires the lock 'tree_mod_log_lock', starts iterating the mod log rbtree and finds the node with sequence number 202. Since 202 is less than the previously computed 'min_seq', (u64)-1, it removes the node and frees it; 7) Task B still has a pointer to the node with sequence number 202, and it dereferences the pointer itself and through the call to __tree_mod_log_rewind(), resulting in a use-after-free problem. This issue can be triggered sporadically with the test case generic/561 from fstests, and it happens more frequently with a higher number of duperemove processes. When it happens to me, it either freezes the VM or it produces a trace like the following before crashing: [ 1245.321140] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [ 1245.321200] CPU: 1 PID: 26997 Comm: pool Not tainted 5.5.0-rc6-btrfs-next-52 #1 [ 1245.321235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 [ 1245.321287] RIP: 0010:rb_next+0x16/0x50 [ 1245.321307] Code: .... [ 1245.321372] RSP: 0018:ffffa151c4d039b0 EFLAGS: 00010202 [ 1245.321388] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8ae221363c80 RCX: 6b6b6b6b6b6b6b6b [ 1245.321409] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8ae221363c80 [ 1245.321439] RBP: ffff8ae20fcc4688 R08: 0000000000000002 R09: 0000000000000000 [ 1245.321475] R10: ffff8ae20b120910 R11: 00000000243f8bb1 R12: 0000000000000038 [ 1245.321506] R13: ffff8ae221363c80 R14: 000000000000075f R15: ffff8ae223f762b8 [ 1245.321539] FS: 00007fdee1ec7700(0000) GS:ffff8ae236c80000(0000) knlGS:0000000000000000 [ 1245.321591] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1245.321614] CR2: 00007fded4030c48 CR3: 000000021da16003 CR4: 00000000003606e0 [ 1245.321642] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1245.321668] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1245.321706] Call Trace: [ 1245.321798] __tree_mod_log_rewind+0xbf/0x280 [btrfs] [ 1245.321841] btrfs_search_old_slot+0x105/0xd00 [btrfs] [ 1245.321877] resolve_indirect_refs+0x1eb/0xc60 [btrfs] [ 1245.321912] find_parent_nodes+0x3dc/0x11b0 [btrfs] [ 1245.321947] btrfs_check_shared+0x115/0x1c0 [btrfs] [ 1245.321980] ? extent_fiemap+0x59d/0x6d0 [btrfs] [ 1245.322029] extent_fiemap+0x59d/0x6d0 [btrfs] [ 1245.322066] do_vfs_ioctl+0x45a/0x750 [ 1245.322081] ksys_ioctl+0x70/0x80 [ 1245.322092] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 1245.322113] __x64_sys_ioctl+0x16/0x20 [ 1245.322126] do_syscall_64+0x5c/0x280 [ 1245.322139] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1245.322155] RIP: 0033:0x7fdee3942dd7 [ 1245.322177] Code: .... [ 1245.322258] RSP: 002b:00007fdee1ec6c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 1245.322294] RAX: ffffffffffffffda RBX: 00007fded40210d8 RCX: 00007fdee3942dd7 [ 1245.322314] RDX: 00007fded40210d8 RSI: 00000000c020660b RDI: 0000000000000004 [ 1245.322337] RBP: 0000562aa89e7510 R08: 0000000000000000 R09: 00007fdee1ec6d44 [ 1245.322369] R10: 0000000000000073 R11: 0000000000000246 R12: 00007fdee1ec6d48 [ 1245.322390] R13: 00007fdee1ec6d40 R14: 00007fded40210d0 R15: 00007fdee1ec6d50 [ 1245.322423] Modules linked in: .... [ 1245.323443] ---[ end trace 01de1e9ec5dff3cd ]--- Fix this by ensuring that btrfs_put_tree_mod_seq() computes the minimum sequence number and iterates the rbtree while holding the lock 'tree_mod_log_lock' in write mode. Also get rid of the 'tree_mod_seq_lock' lock, since it is now redundant. Fixes: bd989ba359f2ac ("Btrfs: add tree modification log functions") Fixes: 097b8a7c9e48e2 ("Btrfs: join tree mod log code with the code holding back delayed refs") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 8 ++------ fs/btrfs/ctree.h | 6 ++---- fs/btrfs/delayed-ref.c | 8 ++++---- fs/btrfs/disk-io.c | 1 - fs/btrfs/tests/btrfs-tests.c | 1 - 5 files changed, 8 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a7b9859449c5..f5a8c0d26cf3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -354,12 +354,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { write_lock(&fs_info->tree_mod_log_lock); - spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { elem->seq = btrfs_inc_tree_mod_seq(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - spin_unlock(&fs_info->tree_mod_seq_lock); write_unlock(&fs_info->tree_mod_log_lock); return elem->seq; @@ -379,7 +377,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; - spin_lock(&fs_info->tree_mod_seq_lock); + write_lock(&fs_info->tree_mod_log_lock); list_del(&elem->list); elem->seq = 0; @@ -390,19 +388,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * blocker with lower sequence number exists, we * cannot remove anything from the log */ - spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); return; } min_seq = cur_elem->seq; } } - spin_unlock(&fs_info->tree_mod_seq_lock); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 664710848e6f..5412b12491cb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -869,14 +869,12 @@ struct btrfs_fs_info { struct list_head delayed_iputs; struct mutex cleaner_delayed_iput_mutex; - /* this protects tree_mod_seq_list */ - spinlock_t tree_mod_seq_lock; atomic64_t tree_mod_seq; - struct list_head tree_mod_seq_list; - /* this protects tree_mod_log */ + /* this protects tree_mod_log and tree_mod_seq_list */ rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; + struct list_head tree_mod_seq_list; atomic_t nr_async_submits; atomic_t async_submit_draining; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index d56bd3625468..45714f1c43a3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -281,7 +281,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, if (head->is_data) return; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { struct seq_list *elem; @@ -289,7 +289,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct seq_list, list); seq = elem->seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, list); @@ -317,7 +317,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem; int ret = 0; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); @@ -331,7 +331,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, } } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c6c15fdeef7..44b15617c7b9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2455,7 +2455,6 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index d3f25376a0f8..6c92101e8092 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -115,7 +115,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); mutex_init(&fs_info->qgroup_ioctl_lock); mutex_init(&fs_info->qgroup_rescan_lock); rwlock_init(&fs_info->tree_mod_log_lock); From a31a66fe1c7d27064ecea28f2710979c1e2e66c2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 18 Dec 2019 14:28:25 +0200 Subject: [PATCH 3157/3715] drm: atmel-hlcdc: enable clock before configuring timing engine [ Upstream commit 2c1fb9d86f6820abbfaa38a6836157c76ccb4e7b ] Changing pixel clock source without having this clock source enabled will block the timing engine and the next operations after (in this case setting ATMEL_HLCDC_CFG(5) settings in atmel_hlcdc_crtc_mode_set_nofb() will fail). It is recomended (although in datasheet this is not present) to actually enabled pixel clock source before doing any changes on timing enginge (only SAM9X60 datasheet specifies that the peripheral clock and pixel clock must be enabled before using LCD controller). Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support") Signed-off-by: Claudiu Beznea Signed-off-by: Sam Ravnborg Cc: Boris Brezillon Cc: # v4.0+ Link: https://patchwork.freedesktop.org/patch/msgid/1576672109-22707-3-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index d73281095fac..976109c20d49 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -79,7 +79,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) struct videomode vm; unsigned long prate; unsigned int cfg; - int div; + int div, ret; + + ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk); + if (ret) + return; vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; @@ -138,6 +142,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO | ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK, cfg); + + clk_disable_unprepare(crtc->dc->hlcdc->sys_clk); } static enum drm_mode_status From ce02876c29909a62d580035e6dab401e54277bb3 Mon Sep 17 00:00:00 2001 From: Marios Pomonis Date: Wed, 11 Dec 2019 12:47:53 -0800 Subject: [PATCH 3158/3715] KVM: x86: Protect pmu_intel.c from Spectre-v1/L1TF attacks [ Upstream commit 66061740f1a487f4ed54fde75e724709f805da53 ] This fixes Spectre-v1/L1TF vulnerabilities in intel_find_fixed_event() and intel_rdpmc_ecx_to_pmc(). kvm_rdpmc() (ancestor of intel_find_fixed_event()) and reprogram_fixed_counter() (ancestor of intel_rdpmc_ecx_to_pmc()) are exported symbols so KVM should treat them conservatively from a security perspective. Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") Signed-off-by: Nick Finco Signed-off-by: Marios Pomonis Reviewed-by: Andrew Honig Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/pmu_intel.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 2729131fe9bf..84ae4dd261ca 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -87,10 +87,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -131,15 +135,19 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) - return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) From 8d3a5b81348e79168d1fb1a53824d5236b4aaa5f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2020 15:33:02 -0500 Subject: [PATCH 3159/3715] btrfs: flush write bio if we loop in extent_write_cache_pages [ Upstream commit 42ffb0bf584ae5b6b38f72259af1e0ee417ac77f ] There exists a deadlock with range_cyclic that has existed forever. If we loop around with a bio already built we could deadlock with a writer who has the page locked that we're attempting to write but is waiting on a page in our bio to be written out. The task traces are as follows PID: 1329874 TASK: ffff889ebcdf3800 CPU: 33 COMMAND: "kworker/u113:5" #0 [ffffc900297bb658] __schedule at ffffffff81a4c33f #1 [ffffc900297bb6e0] schedule at ffffffff81a4c6e3 #2 [ffffc900297bb6f8] io_schedule at ffffffff81a4ca42 #3 [ffffc900297bb708] __lock_page at ffffffff811f145b #4 [ffffc900297bb798] __process_pages_contig at ffffffff814bc502 #5 [ffffc900297bb8c8] lock_delalloc_pages at ffffffff814bc684 #6 [ffffc900297bb900] find_lock_delalloc_range at ffffffff814be9ff #7 [ffffc900297bb9a0] writepage_delalloc at ffffffff814bebd0 #8 [ffffc900297bba18] __extent_writepage at ffffffff814bfbf2 #9 [ffffc900297bba98] extent_write_cache_pages at ffffffff814bffbd PID: 2167901 TASK: ffff889dc6a59c00 CPU: 14 COMMAND: "aio-dio-invalid" #0 [ffffc9003b50bb18] __schedule at ffffffff81a4c33f #1 [ffffc9003b50bba0] schedule at ffffffff81a4c6e3 #2 [ffffc9003b50bbb8] io_schedule at ffffffff81a4ca42 #3 [ffffc9003b50bbc8] wait_on_page_bit at ffffffff811f24d6 #4 [ffffc9003b50bc60] prepare_pages at ffffffff814b05a7 #5 [ffffc9003b50bcd8] btrfs_buffered_write at ffffffff814b1359 #6 [ffffc9003b50bdb0] btrfs_file_write_iter at ffffffff814b5933 #7 [ffffc9003b50be38] new_sync_write at ffffffff8128f6a8 #8 [ffffc9003b50bec8] vfs_write at ffffffff81292b9d #9 [ffffc9003b50bf00] ksys_pwrite64 at ffffffff81293032 I used drgn to find the respective pages we were stuck on page_entry.page 0xffffea00fbfc7500 index 8148 bit 15 pid 2167901 page_entry.page 0xffffea00f9bb7400 index 7680 bit 0 pid 1329874 As you can see the kworker is waiting for bit 0 (PG_locked) on index 7680, and aio-dio-invalid is waiting for bit 15 (PG_writeback) on index 8148. aio-dio-invalid has 7680, and the kworker epd looks like the following crash> struct extent_page_data ffffc900297bbbb0 struct extent_page_data { bio = 0xffff889f747ed830, tree = 0xffff889eed6ba448, extent_locked = 0, sync_io = 0 } Probably worth mentioning as well that it waits for writeback of the page to complete while holding a lock on it (at prepare_pages()). Using drgn I walked the bio pages looking for page 0xffffea00fbfc7500 which is the one we're waiting for writeback on bio = Object(prog, 'struct bio', address=0xffff889f747ed830) for i in range(0, bio.bi_vcnt.value_()): bv = bio.bi_io_vec[i] if bv.bv_page.value_() == 0xffffea00fbfc7500: print("FOUND IT") which validated what I suspected. The fix for this is simple, flush the epd before we loop back around to the beginning of the file during writeout. Fixes: b293f02e1423 ("Btrfs: Add writepages support") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fced434bbddc..a8be9478ca3e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4048,6 +4048,14 @@ retry: */ scanned = 1; index = 0; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + flush_write_bio(data); goto retry; } From 0eed3d7100e69e9f05c37ab13f37f42181e35d4d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 9 Dec 2019 12:19:31 -0800 Subject: [PATCH 3160/3715] KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform [ Upstream commit f958bd2314d117f8c29f4821401bc1925bc2e5ef ] Unlike most state managed by XSAVE, MPX is initialized to zero on INIT. Because INITs are usually recognized in the context of a VCPU_RUN call, kvm_vcpu_reset() puts the guest's FPU so that the FPU state is resident in memory, zeros the MPX state, and reloads FPU state to hardware. But, in the unlikely event that an INIT is recognized during kvm_arch_vcpu_ioctl_get_mpstate() via kvm_apic_accept_events(), kvm_vcpu_reset() will call kvm_put_guest_fpu() without a preceding kvm_load_guest_fpu() and corrupt the guest's FPU state (and possibly userspace's FPU state as well). Given that MPX is being removed from the kernel[*], fix the bug with the simple-but-ugly approach of loading the guest's FPU during KVM_GET_MP_STATE. [*] See commit f240652b6032b ("x86/mpx: Remove MPX APIs"). Fixes: f775b13eedee2 ("x86,kvm: move qemu/guest FPU switching out to vcpu_run") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b6d80c019056..d915ea0e69cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7677,6 +7677,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); + kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && vcpu->arch.pv.pv_unhalted) @@ -7684,6 +7687,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); return 0; } From 6b26d90d125a1bceee429cb2b839bae8ac8b8b38 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Jan 2020 16:12:10 -0800 Subject: [PATCH 3161/3715] KVM: x86/mmu: Apply max PA check for MMIO sptes to 32-bit KVM [ Upstream commit e30a7d623dccdb3f880fbcad980b0cb589a1da45 ] Remove the bogus 64-bit only condition from the check that disables MMIO spte optimization when the system supports the max PA, i.e. doesn't have any reserved PA bits. 32-bit KVM always uses PAE paging for the shadow MMU, and per Intel's SDM: PAE paging translates 32-bit linear addresses to 52-bit physical addresses. The kernel's restrictions on max physical addresses are limits on how much memory the kernel can reasonably use, not what physical addresses are supported by hardware. Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d915ea0e69cf..d6851636edab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6314,7 +6314,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) + if (maxphyaddr == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask); From 07157433a3fe520110090d58715591c412e2dbab Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Dec 2019 15:24:32 -0800 Subject: [PATCH 3162/3715] KVM: VMX: Add non-canonical check on writes to RTIT address MSRs [ Upstream commit fe6ed369fca98e99df55c932b85782a5687526b5 ] Reject writes to RTIT address MSRs if the data being written is a non-canonical address as the MSRs are subject to canonical checks, e.g. KVM will trigger an unchecked #GP when loading the values to hardware during pt_guest_enter(). Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 8033 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 8033 insertions(+) create mode 100644 arch/x86/kvm/vmx/vmx.c diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c new file mode 100644 index 000000000000..3791ce8d269e --- /dev/null +++ b/arch/x86/kvm/vmx/vmx.c @@ -0,0 +1,8033 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Kernel-based Virtual Machine driver for Linux + * + * This module enables machines with Intel VT-x extensions to run virtual + * machines without emulation or binary translation. + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * + * Authors: + * Avi Kivity + * Yaniv Kamay + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "capabilities.h" +#include "cpuid.h" +#include "evmcs.h" +#include "irq.h" +#include "kvm_cache_regs.h" +#include "lapic.h" +#include "mmu.h" +#include "nested.h" +#include "ops.h" +#include "pmu.h" +#include "trace.h" +#include "vmcs.h" +#include "vmcs12.h" +#include "vmx.h" +#include "x86.h" + +MODULE_AUTHOR("Qumranet"); +MODULE_LICENSE("GPL"); + +static const struct x86_cpu_id vmx_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_VMX), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); + +bool __read_mostly enable_vpid = 1; +module_param_named(vpid, enable_vpid, bool, 0444); + +static bool __read_mostly enable_vnmi = 1; +module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); + +bool __read_mostly flexpriority_enabled = 1; +module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); + +bool __read_mostly enable_ept = 1; +module_param_named(ept, enable_ept, bool, S_IRUGO); + +bool __read_mostly enable_unrestricted_guest = 1; +module_param_named(unrestricted_guest, + enable_unrestricted_guest, bool, S_IRUGO); + +bool __read_mostly enable_ept_ad_bits = 1; +module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); + +static bool __read_mostly emulate_invalid_guest_state = true; +module_param(emulate_invalid_guest_state, bool, S_IRUGO); + +static bool __read_mostly fasteoi = 1; +module_param(fasteoi, bool, S_IRUGO); + +static bool __read_mostly enable_apicv = 1; +module_param(enable_apicv, bool, S_IRUGO); + +/* + * If nested=1, nested virtualization is supported, i.e., guests may use + * VMX and be a hypervisor for its own guests. If nested=0, guests may not + * use VMX instructions. + */ +static bool __read_mostly nested = 1; +module_param(nested, bool, S_IRUGO); + +bool __read_mostly enable_pml = 1; +module_param_named(pml, enable_pml, bool, S_IRUGO); + +static bool __read_mostly dump_invalid_vmcs = 0; +module_param(dump_invalid_vmcs, bool, 0644); + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 + +#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL + +/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ +static int __read_mostly cpu_preemption_timer_multi; +static bool __read_mostly enable_preemption_timer = 1; +#ifdef CONFIG_X86_64 +module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); +#endif + +#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE +#define KVM_VM_CR0_ALWAYS_ON \ + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ + X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) +#define KVM_CR4_GUEST_OWNED_BITS \ + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) + +#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE +#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) +#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) + +#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) + +#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ + RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ + RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ + RTIT_STATUS_BYTECNT)) + +#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \ + (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f) + +/* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * ple_gap: upper bound on the amount of time between two successive + * executions of PAUSE in a loop. Also indicate if ple enabled. + * According to test, this time is usually smaller than 128 cycles. + * ple_window: upper bound on the amount of time a guest is allowed to execute + * in a PAUSE loop. Tests indicate that most spinlocks are held for + * less than 2^12 cycles + * Time is measured based on a counter that runs at the same rate as the TSC, + * refer SDM volume 3b section 21.6.13 & 22.1.3. + */ +static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; +module_param(ple_gap, uint, 0444); + +static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; +module_param(ple_window, uint, 0444); + +/* Default doubles per-vcpu window every exit. */ +static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(ple_window_grow, uint, 0444); + +/* Default resets per-vcpu window every exit to ple_window. */ +static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(ple_window_shrink, uint, 0444); + +/* Default is to compute the maximum so we can never overflow. */ +static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; +module_param(ple_window_max, uint, 0444); + +/* Default is SYSTEM mode, 1 for host-guest mode */ +int __read_mostly pt_mode = PT_MODE_SYSTEM; +module_param(pt_mode, int, S_IRUGO); + +static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); +static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); +static DEFINE_MUTEX(vmx_l1d_flush_mutex); + +/* Storage for pre module init parameter parsing */ +static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; + +static const struct { + const char *option; + bool for_parse; +} vmentry_l1d_param[] = { + [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, + [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, + [VMENTER_L1D_FLUSH_COND] = {"cond", true}, + [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, + [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, + [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, +}; + +#define L1D_CACHE_ORDER 4 +static void *vmx_l1d_flush_pages; + +static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) +{ + struct page *page; + unsigned int i; + + if (!boot_cpu_has_bug(X86_BUG_L1TF)) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + + if (!enable_ept) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; + return 0; + } + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { + u64 msr; + + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + } + + /* If set to auto use the default l1tf mitigation method */ + if (l1tf == VMENTER_L1D_FLUSH_AUTO) { + switch (l1tf_mitigation) { + case L1TF_MITIGATION_OFF: + l1tf = VMENTER_L1D_FLUSH_NEVER; + break; + case L1TF_MITIGATION_FLUSH_NOWARN: + case L1TF_MITIGATION_FLUSH: + case L1TF_MITIGATION_FLUSH_NOSMT: + l1tf = VMENTER_L1D_FLUSH_COND; + break; + case L1TF_MITIGATION_FULL: + case L1TF_MITIGATION_FULL_FORCE: + l1tf = VMENTER_L1D_FLUSH_ALWAYS; + break; + } + } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { + l1tf = VMENTER_L1D_FLUSH_ALWAYS; + } + + if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && + !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { + /* + * This allocation for vmx_l1d_flush_pages is not tied to a VM + * lifetime and so should not be charged to a memcg. + */ + page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); + if (!page) + return -ENOMEM; + vmx_l1d_flush_pages = page_address(page); + + /* + * Initialize each page with a different pattern in + * order to protect against KSM in the nested + * virtualization case. + */ + for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { + memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, + PAGE_SIZE); + } + } + + l1tf_vmx_mitigation = l1tf; + + if (l1tf != VMENTER_L1D_FLUSH_NEVER) + static_branch_enable(&vmx_l1d_should_flush); + else + static_branch_disable(&vmx_l1d_should_flush); + + if (l1tf == VMENTER_L1D_FLUSH_COND) + static_branch_enable(&vmx_l1d_flush_cond); + else + static_branch_disable(&vmx_l1d_flush_cond); + return 0; +} + +static int vmentry_l1d_flush_parse(const char *s) +{ + unsigned int i; + + if (s) { + for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { + if (vmentry_l1d_param[i].for_parse && + sysfs_streq(s, vmentry_l1d_param[i].option)) + return i; + } + } + return -EINVAL; +} + +static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) +{ + int l1tf, ret; + + l1tf = vmentry_l1d_flush_parse(s); + if (l1tf < 0) + return l1tf; + + if (!boot_cpu_has(X86_BUG_L1TF)) + return 0; + + /* + * Has vmx_init() run already? If not then this is the pre init + * parameter parsing. In that case just store the value and let + * vmx_init() do the proper setup after enable_ept has been + * established. + */ + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { + vmentry_l1d_flush_param = l1tf; + return 0; + } + + mutex_lock(&vmx_l1d_flush_mutex); + ret = vmx_setup_l1d_flush(l1tf); + mutex_unlock(&vmx_l1d_flush_mutex); + return ret; +} + +static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) +{ + if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) + return sprintf(s, "???\n"); + + return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); +} + +static const struct kernel_param_ops vmentry_l1d_flush_ops = { + .set = vmentry_l1d_flush_set, + .get = vmentry_l1d_flush_get, +}; +module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); + +static bool guest_state_valid(struct kvm_vcpu *vcpu); +static u32 vmx_segment_access_rights(struct kvm_segment *var); +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + +void vmx_vmexit(void); + +#define vmx_insn_failed(fmt...) \ +do { \ + WARN_ONCE(1, fmt); \ + pr_warn_ratelimited(fmt); \ +} while (0) + +asmlinkage void vmread_error(unsigned long field, bool fault) +{ + if (fault) + kvm_spurious_fault(); + else + vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); +} + +noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); +} + +noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) +{ + vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + ext, vpid, gva); +} + +noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) +{ + vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + ext, eptp, gpa); +} + +static DEFINE_PER_CPU(struct vmcs *, vmxarea); +DEFINE_PER_CPU(struct vmcs *, current_vmcs); +/* + * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed + * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. + */ +static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); + +/* + * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we + * can find which vCPU should be waken up. + */ +static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + +static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); +static DEFINE_SPINLOCK(vmx_vpid_lock); + +struct vmcs_config vmcs_config; +struct vmx_capability vmx_capability; + +#define VMX_SEGMENT_FIELD(seg) \ + [VCPU_SREG_##seg] = { \ + .selector = GUEST_##seg##_SELECTOR, \ + .base = GUEST_##seg##_BASE, \ + .limit = GUEST_##seg##_LIMIT, \ + .ar_bytes = GUEST_##seg##_AR_BYTES, \ + } + +static const struct kvm_vmx_segment_field { + unsigned selector; + unsigned base; + unsigned limit; + unsigned ar_bytes; +} kvm_vmx_segment_fields[] = { + VMX_SEGMENT_FIELD(CS), + VMX_SEGMENT_FIELD(DS), + VMX_SEGMENT_FIELD(ES), + VMX_SEGMENT_FIELD(FS), + VMX_SEGMENT_FIELD(GS), + VMX_SEGMENT_FIELD(SS), + VMX_SEGMENT_FIELD(TR), + VMX_SEGMENT_FIELD(LDTR), +}; + +u64 host_efer; +static unsigned long host_idt_base; + +/* + * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm + * will emulate SYSCALL in legacy mode if the vendor string in guest + * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To + * support this emulation, IA32_STAR must always be included in + * vmx_msr_index[], even in i386 builds. + */ +const u32 vmx_msr_index[] = { +#ifdef CONFIG_X86_64 + MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, +#endif + MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, +}; + +#if IS_ENABLED(CONFIG_HYPERV) +static bool __read_mostly enlightened_vmcs = true; +module_param(enlightened_vmcs, bool, 0444); + +/* check_ept_pointer() should be under protection of ept_pointer_lock. */ +static void check_ept_pointer_match(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + u64 tmp_eptp = INVALID_PAGE; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!VALID_PAGE(tmp_eptp)) { + tmp_eptp = to_vmx(vcpu)->ept_pointer; + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_MISMATCH; + return; + } + } + + to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; +} + +static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, + void *data) +{ + struct kvm_tlb_range *range = data; + + return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, + range->pages); +} + +static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, + struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) +{ + u64 ept_pointer = to_vmx(vcpu)->ept_pointer; + + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address + * of the base of EPT PML4 table, strip off EPT configuration + * information. + */ + if (range) + return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, + kvm_fill_hv_flush_list_func, (void *)range); + else + return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); +} + +static int hv_remote_flush_tlb_with_range(struct kvm *kvm, + struct kvm_tlb_range *range) +{ + struct kvm_vcpu *vcpu; + int ret = 0, i; + + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + + if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) + check_ept_pointer_match(kvm); + + if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { + kvm_for_each_vcpu(i, vcpu, kvm) { + /* If ept_pointer is invalid pointer, bypass flush request. */ + if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) + ret |= __hv_remote_flush_tlb_with_range( + kvm, vcpu, range); + } + } else { + ret = __hv_remote_flush_tlb_with_range(kvm, + kvm_get_vcpu(kvm, 0), range); + } + + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + return ret; +} +static int hv_remote_flush_tlb(struct kvm *kvm) +{ + return hv_remote_flush_tlb_with_range(kvm, NULL); +} + +static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +{ + struct hv_enlightened_vmcs *evmcs; + struct hv_partition_assist_pg **p_hv_pa_pg = + &vcpu->kvm->arch.hyperv.hv_pa_pg; + /* + * Synthetic VM-Exit is not enabled in current code and so All + * evmcs in singe VM shares same assist page. + */ + if (!*p_hv_pa_pg) + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + + if (!*p_hv_pa_pg) + return -ENOMEM; + + evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; + + evmcs->partition_assist_page = + __pa(*p_hv_pa_pg); + evmcs->hv_vm_id = (unsigned long)vcpu->kvm; + evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + + return 0; +} + +#endif /* IS_ENABLED(CONFIG_HYPERV) */ + +/* + * Comment's format: document - errata name - stepping - processor name. + * Refer from + * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp + */ +static u32 vmx_preemption_cpu_tfms[] = { +/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ +0x000206E6, +/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ +/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ +/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ +0x00020652, +/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ +0x00020655, +/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ +/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ +/* + * 320767.pdf - AAP86 - B1 - + * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile + */ +0x000106E5, +/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ +0x000106A0, +/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ +0x000106A1, +/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ +0x000106A4, + /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ + /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ + /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ +0x000106A5, + /* Xeon E3-1220 V2 */ +0x000306A8, +}; + +static inline bool cpu_has_broken_vmx_preemption_timer(void) +{ + u32 eax = cpuid_eax(0x00000001), i; + + /* Clear the reserved bits */ + eax &= ~(0x3U << 14 | 0xfU << 28); + for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) + if (eax == vmx_preemption_cpu_tfms[i]) + return true; + + return false; +} + +static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) +{ + return flexpriority_enabled && lapic_in_kernel(vcpu); +} + +static inline bool report_flexpriority(void) +{ + return flexpriority_enabled; +} + +static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) +{ + int i; + + for (i = 0; i < vmx->nmsrs; ++i) + if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) + return i; + return -1; +} + +struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) +{ + int i; + + i = __find_msr_index(vmx, msr); + if (i >= 0) + return &vmx->guest_msrs[i]; + return NULL; +} + +static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) +{ + int ret = 0; + + u64 old_msr_data = msr->data; + msr->data = data; + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); + preempt_enable(); + if (ret) + msr->data = old_msr_data; + } + return ret; +} + +void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) +{ + vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); + loaded_vmcs->cpu = -1; + loaded_vmcs->launched = 0; +} + +#ifdef CONFIG_KEXEC_CORE +/* + * This bitmap is used to indicate whether the vmclear + * operation is enabled on all cpus. All disabled by + * default. + */ +static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; + +static inline void crash_enable_local_vmclear(int cpu) +{ + cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static inline void crash_disable_local_vmclear(int cpu) +{ + cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static inline int crash_local_vmclear_enabled(int cpu) +{ + return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static void crash_vmclear_local_loaded_vmcss(void) +{ + int cpu = raw_smp_processor_id(); + struct loaded_vmcs *v; + + if (!crash_local_vmclear_enabled(cpu)) + return; + + list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), + loaded_vmcss_on_cpu_link) + vmcs_clear(v->vmcs); +} +#else +static inline void crash_enable_local_vmclear(int cpu) { } +static inline void crash_disable_local_vmclear(int cpu) { } +#endif /* CONFIG_KEXEC_CORE */ + +static void __loaded_vmcs_clear(void *arg) +{ + struct loaded_vmcs *loaded_vmcs = arg; + int cpu = raw_smp_processor_id(); + + if (loaded_vmcs->cpu != cpu) + return; /* vcpu migration can race with cpu offline */ + if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) + per_cpu(current_vmcs, cpu) = NULL; + crash_disable_local_vmclear(cpu); + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); + + /* + * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link + * is before setting loaded_vmcs->vcpu to -1 which is done in + * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist + * then adds the vmcs into percpu list before it is deleted. + */ + smp_wmb(); + + loaded_vmcs_init(loaded_vmcs); + crash_enable_local_vmclear(cpu); +} + +void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) +{ + int cpu = loaded_vmcs->cpu; + + if (cpu != -1) + smp_call_function_single(cpu, + __loaded_vmcs_clear, loaded_vmcs, 1); +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { + kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + +void update_exception_bitmap(struct kvm_vcpu *vcpu) +{ + u32 eb; + + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + (1u << DB_VECTOR) | (1u << AC_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + eb |= (1u << GP_VECTOR); + if ((vcpu->guest_debug & + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) + eb |= 1u << BP_VECTOR; + if (to_vmx(vcpu)->rmode.vm86_active) + eb = ~0; + if (enable_ept) + eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ + + /* When we are running a nested L2 guest and L1 specified for it a + * certain exception bitmap, we must trap the same exceptions and pass + * them to L1. When running L2, we will only handle the exceptions + * specified above if L1 did not want them. + */ + if (is_guest_mode(vcpu)) + eb |= get_vmcs12(vcpu)->exception_bitmap; + + vmcs_write32(EXCEPTION_BITMAP, eb); +} + +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) +{ + vm_entry_controls_clearbit(vmx, entry); + vm_exit_controls_clearbit(vmx, exit); +} + +int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) +{ + unsigned int i; + + for (i = 0; i < m->nr; ++i) { + if (m->val[i].index == msr) + return i; + } + return -ENOENT; +} + +static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) +{ + int i; + struct msr_autoload *m = &vmx->msr_autoload; + + switch (msr) { + case MSR_EFER: + if (cpu_has_load_ia32_efer()) { + clear_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, + VM_EXIT_LOAD_IA32_EFER); + return; + } + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if (cpu_has_load_perf_global_ctrl()) { + clear_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); + return; + } + break; + } + i = vmx_find_msr_index(&m->guest, msr); + if (i < 0) + goto skip_guest; + --m->guest.nr; + m->guest.val[i] = m->guest.val[m->guest.nr]; + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + +skip_guest: + i = vmx_find_msr_index(&m->host, msr); + if (i < 0) + return; + + --m->host.nr; + m->host.val[i] = m->host.val[m->host.nr]; + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); +} + +static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit, + unsigned long guest_val_vmcs, unsigned long host_val_vmcs, + u64 guest_val, u64 host_val) +{ + vmcs_write64(guest_val_vmcs, guest_val); + if (host_val_vmcs != HOST_IA32_EFER) + vmcs_write64(host_val_vmcs, host_val); + vm_entry_controls_setbit(vmx, entry); + vm_exit_controls_setbit(vmx, exit); +} + +static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 guest_val, u64 host_val, bool entry_only) +{ + int i, j = 0; + struct msr_autoload *m = &vmx->msr_autoload; + + switch (msr) { + case MSR_EFER: + if (cpu_has_load_ia32_efer()) { + add_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, + VM_EXIT_LOAD_IA32_EFER, + GUEST_IA32_EFER, + HOST_IA32_EFER, + guest_val, host_val); + return; + } + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if (cpu_has_load_perf_global_ctrl()) { + add_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, + GUEST_IA32_PERF_GLOBAL_CTRL, + HOST_IA32_PERF_GLOBAL_CTRL, + guest_val, host_val); + return; + } + break; + case MSR_IA32_PEBS_ENABLE: + /* PEBS needs a quiescent period after being disabled (to write + * a record). Disabling PEBS through VMX MSR swapping doesn't + * provide that period, so a CPU could write host's record into + * guest's memory. + */ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); + } + + i = vmx_find_msr_index(&m->guest, msr); + if (!entry_only) + j = vmx_find_msr_index(&m->host, msr); + + if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || + (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) { + printk_once(KERN_WARNING "Not enough msr switch entries. " + "Can't add msr %x\n", msr); + return; + } + if (i < 0) { + i = m->guest.nr++; + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); + } + m->guest.val[i].index = msr; + m->guest.val[i].value = guest_val; + + if (entry_only) + return; + + if (j < 0) { + j = m->host.nr++; + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); + } + m->host.val[j].index = msr; + m->host.val[j].value = host_val; +} + +static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) +{ + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; + + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; + + /* + * LMA and LME handled by hardware; SCE meaningless outside long mode. + */ + ignore_bits |= EFER_SCE; +#ifdef CONFIG_X86_64 + ignore_bits |= EFER_LMA | EFER_LME; + /* SCE is meaningful only in long mode on Intel */ + if (guest_efer & EFER_LMA) + ignore_bits &= ~(u64)EFER_SCE; +#endif + + /* + * On EPT, we can't emulate NX, so we must switch EFER atomically. + * On CPUs that support "load IA32_EFER", always switch EFER + * atomically, since it's faster than switching it manually. + */ + if (cpu_has_load_ia32_efer() || + (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { + if (!(guest_efer & EFER_LMA)) + guest_efer &= ~EFER_LME; + if (guest_efer != host_efer) + add_atomic_switch_msr(vmx, MSR_EFER, + guest_efer, host_efer, false); + else + clear_atomic_switch_msr(vmx, MSR_EFER); + return false; + } else { + clear_atomic_switch_msr(vmx, MSR_EFER); + + guest_efer &= ~ignore_bits; + guest_efer |= host_efer & ignore_bits; + + vmx->guest_msrs[efer_offset].data = guest_efer; + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; + + return true; + } +} + +#ifdef CONFIG_X86_32 +/* + * On 32-bit kernels, VM exits still load the FS and GS bases from the + * VMCS rather than the segment table. KVM uses this helper to figure + * out the current bases to poke them into the VMCS before entry. + */ +static unsigned long segment_base(u16 selector) +{ + struct desc_struct *table; + unsigned long v; + + if (!(selector & ~SEGMENT_RPL_MASK)) + return 0; + + table = get_current_gdt_ro(); + + if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { + u16 ldt_selector = kvm_read_ldt(); + + if (!(ldt_selector & ~SEGMENT_RPL_MASK)) + return 0; + + table = (struct desc_struct *)segment_base(ldt_selector); + } + v = get_desc_base(&table[selector >> 3]); + return v; +} +#endif + +static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) +{ + u32 i; + + wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); + wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); + wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); + wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); + for (i = 0; i < addr_range; i++) { + wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); + wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); + } +} + +static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) +{ + u32 i; + + rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); + rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); + for (i = 0; i < addr_range; i++) { + rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); + rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); + } +} + +static void pt_guest_enter(struct vcpu_vmx *vmx) +{ + if (pt_mode == PT_MODE_SYSTEM) + return; + + /* + * GUEST_IA32_RTIT_CTL is already set in the VMCS. + * Save host state before VM entry. + */ + rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); + if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { + wrmsrl(MSR_IA32_RTIT_CTL, 0); + pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); + pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); + } +} + +static void pt_guest_exit(struct vcpu_vmx *vmx) +{ + if (pt_mode == PT_MODE_SYSTEM) + return; + + if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { + pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); + pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); + } + + /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */ + wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); +} + +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) +{ + if (unlikely(fs_sel != host->fs_sel)) { + if (!(fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, fs_sel); + else + vmcs_write16(HOST_FS_SELECTOR, 0); + host->fs_sel = fs_sel; + } + if (unlikely(gs_sel != host->gs_sel)) { + if (!(gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, gs_sel); + else + vmcs_write16(HOST_GS_SELECTOR, 0); + host->gs_sel = gs_sel; + } + if (unlikely(fs_base != host->fs_base)) { + vmcs_writel(HOST_FS_BASE, fs_base); + host->fs_base = fs_base; + } + if (unlikely(gs_base != host->gs_base)) { + vmcs_writel(HOST_GS_BASE, gs_base); + host->gs_base = gs_base; + } +} + +void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_host_state *host_state; +#ifdef CONFIG_X86_64 + int cpu = raw_smp_processor_id(); +#endif + unsigned long fs_base, gs_base; + u16 fs_sel, gs_sel; + int i; + + vmx->req_immediate_exit = false; + + /* + * Note that guest MSRs to be saved/restored can also be changed + * when guest state is loaded. This happens when guest transitions + * to/from long-mode by setting MSR_EFER.LMA. + */ + if (!vmx->guest_msrs_ready) { + vmx->guest_msrs_ready = true; + for (i = 0; i < vmx->save_nmsrs; ++i) + kvm_set_shared_msr(vmx->guest_msrs[i].index, + vmx->guest_msrs[i].data, + vmx->guest_msrs[i].mask); + + } + if (vmx->guest_state_loaded) + return; + + host_state = &vmx->loaded_vmcs->host_state; + + /* + * Set host fs and gs selectors. Unfortunately, 22.2.3 does not + * allow segment selectors with cpl > 0 or ti == 1. + */ + host_state->ldt_sel = kvm_read_ldt(); + +#ifdef CONFIG_X86_64 + savesegment(ds, host_state->ds_sel); + savesegment(es, host_state->es_sel); + + gs_base = cpu_kernelmode_gs_base(cpu); + if (likely(is_64bit_mm(current->mm))) { + save_fsgs_for_kvm(); + fs_sel = current->thread.fsindex; + gs_sel = current->thread.gsindex; + fs_base = current->thread.fsbase; + vmx->msr_host_kernel_gs_base = current->thread.gsbase; + } else { + savesegment(fs, fs_sel); + savesegment(gs, gs_sel); + fs_base = read_msr(MSR_FS_BASE); + vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); + } + + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#else + savesegment(fs, fs_sel); + savesegment(gs, gs_sel); + fs_base = segment_base(fs_sel); + gs_base = segment_base(gs_sel); +#endif + + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); + vmx->guest_state_loaded = true; +} + +static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) +{ + struct vmcs_host_state *host_state; + + if (!vmx->guest_state_loaded) + return; + + host_state = &vmx->loaded_vmcs->host_state; + + ++vmx->vcpu.stat.host_state_reload; + +#ifdef CONFIG_X86_64 + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#endif + if (host_state->ldt_sel || (host_state->gs_sel & 7)) { + kvm_load_ldt(host_state->ldt_sel); +#ifdef CONFIG_X86_64 + load_gs_index(host_state->gs_sel); +#else + loadsegment(gs, host_state->gs_sel); +#endif + } + if (host_state->fs_sel & 7) + loadsegment(fs, host_state->fs_sel); +#ifdef CONFIG_X86_64 + if (unlikely(host_state->ds_sel | host_state->es_sel)) { + loadsegment(ds, host_state->ds_sel); + loadsegment(es, host_state->es_sel); + } +#endif + invalidate_tss_limit(); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); +#endif + load_fixmap_gdt(raw_smp_processor_id()); + vmx->guest_state_loaded = false; + vmx->guest_msrs_ready = false; +} + +#ifdef CONFIG_X86_64 +static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) +{ + preempt_disable(); + if (vmx->guest_state_loaded) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + preempt_enable(); + return vmx->msr_guest_kernel_gs_base; +} + +static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) +{ + preempt_disable(); + if (vmx->guest_state_loaded) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); + vmx->msr_guest_kernel_gs_base = data; +} +#endif + +static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + + /* + * In case of hot-plug or hot-unplug, we may have to undo + * vmx_vcpu_pi_put even if there is no assigned device. And we + * always keep PI.NDST up to date for simplicity: it makes the + * code easier, and CPU migration is not a fast path. + */ + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) + return; + + /* + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change + * PI.NDST: pi_post_block is the one expected to change PID.NDST and the + * wakeup handler expects the vCPU to be on the blocked_vcpu_list that + * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up + * correctly. + */ + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { + pi_clear_sn(pi_desc); + goto after_clear_sn; + } + + /* The full case. */ + do { + old.control = new.control = pi_desc->control; + + dest = cpu_physical_id(cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + new.sn = 0; + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); + +after_clear_sn: + + /* + * Clear SN before reading the bitmap. The VT-d firmware + * writes the bitmap and reads SN atomically (5.2.3 in the + * spec), so it doesn't really have a memory barrier that + * pairs with this, but we cannot do that and we need one. + */ + smp_mb__after_atomic(); + + if (!pi_is_pir_empty(pi_desc)) + pi_set_on(pi_desc); +} + +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool already_loaded = vmx->loaded_vmcs->cpu == cpu; + + if (!already_loaded) { + loaded_vmcs_clear(vmx->loaded_vmcs); + local_irq_disable(); + crash_disable_local_vmclear(cpu); + + /* + * Read loaded_vmcs->cpu should be before fetching + * loaded_vmcs->loaded_vmcss_on_cpu_link. + * See the comments in __loaded_vmcs_clear(). + */ + smp_rmb(); + + list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, + &per_cpu(loaded_vmcss_on_cpu, cpu)); + crash_enable_local_vmclear(cpu); + local_irq_enable(); + } + + if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; + vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); + } + + if (!already_loaded) { + void *gdt = get_current_gdt_ro(); + unsigned long sysenter_esp; + + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + + /* + * Linux uses per-cpu TSS and GDT, so set these when switching + * processors. See 22.2.4. + */ + vmcs_writel(HOST_TR_BASE, + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); + vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ + + rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); + vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + vmx->loaded_vmcs->cpu = cpu; + } + + /* Setup TSC multiplier */ + if (kvm_has_tsc_control && + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) + decache_tsc_multiplier(vmx); +} + +/* + * Switches to specified vcpu, until a matching vcpu_put(), but assumes + * vcpu mutex is already taken. + */ +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx_vcpu_load_vmcs(vcpu, cpu); + + vmx_vcpu_pi_load(vcpu, cpu); + + vmx->host_pkru = read_pkru(); + vmx->host_debugctlmsr = get_debugctlmsr(); +} + +static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) + return; + + /* Set SN when the vCPU is preempted */ + if (vcpu->preempted) + pi_set_sn(pi_desc); +} + +static void vmx_vcpu_put(struct kvm_vcpu *vcpu) +{ + vmx_vcpu_pi_put(vcpu); + + vmx_prepare_switch_to_host(to_vmx(vcpu)); +} + +static bool emulation_required(struct kvm_vcpu *vcpu) +{ + return emulate_invalid_guest_state && !guest_state_valid(vcpu); +} + +static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); + +unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long rflags, save_rflags; + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); + rflags = vmcs_readl(GUEST_RFLAGS); + if (vmx->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = vmx->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + vmx->rflags = rflags; + } + return vmx->rflags; +} + +void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long old_rflags; + + if (enable_unrestricted_guest) { + kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); + vmx->rflags = rflags; + vmcs_writel(GUEST_RFLAGS, rflags); + return; + } + + old_rflags = vmx_get_rflags(vcpu); + vmx->rflags = rflags; + if (vmx->rmode.vm86_active) { + vmx->rmode.save_rflags = rflags; + rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + } + vmcs_writel(GUEST_RFLAGS, rflags); + + if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) + vmx->emulation_required = emulation_required(vcpu); +} + +u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) +{ + u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + int ret = 0; + + if (interruptibility & GUEST_INTR_STATE_STI) + ret |= KVM_X86_SHADOW_INT_STI; + if (interruptibility & GUEST_INTR_STATE_MOV_SS) + ret |= KVM_X86_SHADOW_INT_MOV_SS; + + return ret; +} + +void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +{ + u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + u32 interruptibility = interruptibility_old; + + interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); + + if (mask & KVM_X86_SHADOW_INT_MOV_SS) + interruptibility |= GUEST_INTR_STATE_MOV_SS; + else if (mask & KVM_X86_SHADOW_INT_STI) + interruptibility |= GUEST_INTR_STATE_STI; + + if ((interruptibility != interruptibility_old)) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); +} + +static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long value; + + /* + * Any MSR write that attempts to change bits marked reserved will + * case a #GP fault. + */ + if (data & vmx->pt_desc.ctl_bitmask) + return 1; + + /* + * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will + * result in a #GP unless the same write also clears TraceEn. + */ + if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && + ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) + return 1; + + /* + * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit + * and FabricEn would cause #GP, if + * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 + */ + if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && + !(data & RTIT_CTL_FABRIC_EN) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) + return 1; + + /* + * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that + * utilize encodings marked reserved will casue a #GP fault. + */ + value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && + !test_bit((data & RTIT_CTL_MTC_RANGE) >> + RTIT_CTL_MTC_RANGE_OFFSET, &value)) + return 1; + value = intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cycle_thresholds); + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && + !test_bit((data & RTIT_CTL_CYC_THRESH) >> + RTIT_CTL_CYC_THRESH_OFFSET, &value)) + return 1; + value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && + !test_bit((data & RTIT_CTL_PSB_FREQ) >> + RTIT_CTL_PSB_FREQ_OFFSET, &value)) + return 1; + + /* + * If ADDRx_CFG is reserved or the encodings is >2 will + * cause a #GP fault. + */ + value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; + if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2)) + return 1; + value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; + if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2)) + return 1; + value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; + if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2)) + return 1; + value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; + if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2)) + return 1; + + return 0; +} + +static int skip_emulated_instruction(struct kvm_vcpu *vcpu) +{ + unsigned long rip; + + /* + * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on + * undefined behavior: Intel's SDM doesn't mandate the VMCS field be + * set when EPT misconfig occurs. In practice, real hardware updates + * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors + * (namely Hyper-V) don't set it due to it being undefined behavior, + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || + to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + } else { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } + + /* skipping an emulated instruction also counts */ + vmx_set_interrupt_shadow(vcpu, 0); + + return 1; +} + +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* + * Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, + * then the instruction is already executing and RIP has already been + * advanced. + */ + if (kvm_hlt_in_guest(vcpu->kvm) && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); +} + +static void vmx_queue_exception(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned nr = vcpu->arch.exception.nr; + bool has_error_code = vcpu->arch.exception.has_error_code; + u32 error_code = vcpu->arch.exception.error_code; + u32 intr_info = nr | INTR_INFO_VALID_MASK; + + kvm_deliver_exception_payload(vcpu); + + if (has_error_code) { + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } + + if (vmx->rmode.vm86_active) { + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); + return; + } + + WARN_ON_ONCE(vmx->emulation_required); + + if (kvm_exception_is_soft(nr)) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmx->vcpu.arch.event_exit_inst_len); + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + } else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + + vmx_clear_hlt(vcpu); +} + +static bool vmx_rdtscp_supported(void) +{ + return cpu_has_vmx_rdtscp(); +} + +static bool vmx_invpcid_supported(void) +{ + return cpu_has_vmx_invpcid(); +} + +/* + * Swap MSR entry in host/guest MSR entry array. + */ +static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) +{ + struct shared_msr_entry tmp; + + tmp = vmx->guest_msrs[to]; + vmx->guest_msrs[to] = vmx->guest_msrs[from]; + vmx->guest_msrs[from] = tmp; +} + +/* + * Set up the vmcs to automatically save and restore system + * msrs. Don't touch the 64-bit msrs if the guest is in legacy + * mode, as fiddling with msrs is very expensive. + */ +static void setup_msrs(struct vcpu_vmx *vmx) +{ + int save_nmsrs, index; + + save_nmsrs = 0; +#ifdef CONFIG_X86_64 + /* + * The SYSCALL MSRs are only needed on long mode guests, and only + * when EFER.SCE is set. + */ + if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { + index = __find_msr_index(vmx, MSR_STAR); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_LSTAR); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_SYSCALL_MASK); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); + } +#endif + index = __find_msr_index(vmx, MSR_EFER); + if (index >= 0 && update_transition_efer(vmx, index)) + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_TSC_AUX); + if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); + + vmx->save_nmsrs = save_nmsrs; + vmx->guest_msrs_ready = false; + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(&vmx->vcpu); +} + +static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (is_guest_mode(vcpu) && + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) + return vcpu->arch.tsc_offset - vmcs12->tsc_offset; + + return vcpu->arch.tsc_offset; +} + +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u64 g_tsc_offset = 0; + + /* + * We're here if L1 chose not to trap WRMSR to TSC. According + * to the spec, this should set L1's TSC; The offset that L1 + * set for L2 remains unchanged, and still needs to be added + * to the newly set TSC to get L2's TSC. + */ + if (is_guest_mode(vcpu) && + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) + g_tsc_offset = vmcs12->tsc_offset; + + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + vcpu->arch.tsc_offset - g_tsc_offset, + offset); + vmcs_write64(TSC_OFFSET, offset + g_tsc_offset); + return offset + g_tsc_offset; +} + +/* + * nested_vmx_allowed() checks whether a guest should be allowed to use VMX + * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for + * all guests if the "nested" module option is off, and can also be disabled + * for a single guest by disabling its VMX cpuid bit. + */ +bool nested_vmx_allowed(struct kvm_vcpu *vcpu) +{ + return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); +} + +static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, + uint64_t val) +{ + uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; + + return !(val & ~valid_bits); +} + +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested) + return 1; + return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + default: + return 1; + } +} + +/* + * Reads an msr value (of 'msr_index') into 'pdata'. + * Returns 0 on success, non-0 otherwise. + * Assumes vcpu_load() was already called. + */ +static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct shared_msr_entry *msr; + u32 index; + + switch (msr_info->index) { +#ifdef CONFIG_X86_64 + case MSR_FS_BASE: + msr_info->data = vmcs_readl(GUEST_FS_BASE); + break; + case MSR_GS_BASE: + msr_info->data = vmcs_readl(GUEST_GS_BASE); + break; + case MSR_KERNEL_GS_BASE: + msr_info->data = vmx_read_guest_kernel_gs_base(vmx); + break; +#endif + case MSR_EFER: + return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + goto find_shared_msr; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; + case MSR_IA32_SYSENTER_CS: + msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); + break; + case MSR_IA32_SYSENTER_EIP: + msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); + break; + case MSR_IA32_SYSENTER_ESP: + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + case MSR_IA32_BNDCFGS: + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) + return 1; + msr_info->data = vmcs_read64(GUEST_BNDCFGS); + break; + case MSR_IA32_MCG_EXT_CTL: + if (!msr_info->host_initiated && + !(vmx->msr_ia32_feature_control & + FEATURE_CONTROL_LMCE)) + return 1; + msr_info->data = vcpu->arch.mcg_ext_ctl; + break; + case MSR_IA32_FEATURE_CONTROL: + msr_info->data = vmx->msr_ia32_feature_control; + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested_vmx_allowed(vcpu)) + return 1; + return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, + &msr_info->data); + case MSR_IA32_RTIT_CTL: + if (pt_mode != PT_MODE_HOST_GUEST) + return 1; + msr_info->data = vmx->pt_desc.guest.ctl; + break; + case MSR_IA32_RTIT_STATUS: + if (pt_mode != PT_MODE_HOST_GUEST) + return 1; + msr_info->data = vmx->pt_desc.guest.status; + break; + case MSR_IA32_RTIT_CR3_MATCH: + if ((pt_mode != PT_MODE_HOST_GUEST) || + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cr3_filtering)) + return 1; + msr_info->data = vmx->pt_desc.guest.cr3_match; + break; + case MSR_IA32_RTIT_OUTPUT_BASE: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output))) + return 1; + msr_info->data = vmx->pt_desc.guest.output_base; + break; + case MSR_IA32_RTIT_OUTPUT_MASK: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output))) + return 1; + msr_info->data = vmx->pt_desc.guest.output_mask; + break; + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; + if ((pt_mode != PT_MODE_HOST_GUEST) || + (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges))) + return 1; + if (is_noncanonical_address(data, vcpu)) + return 1; + if (index % 2) + msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; + else + msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; + break; + case MSR_TSC_AUX: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + return 1; + goto find_shared_msr; + default: + find_shared_msr: + msr = find_msr_entry(vmx, msr_info->index); + if (msr) { + msr_info->data = msr->data; + break; + } + return kvm_get_msr_common(vcpu, msr_info); + } + + return 0; +} + +/* + * Writes msr value into the appropriate "register". + * Returns 0 on success, non-0 otherwise. + * Assumes vcpu_load() was already called. + */ +static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct shared_msr_entry *msr; + int ret = 0; + u32 msr_index = msr_info->index; + u64 data = msr_info->data; + u32 index; + + switch (msr_index) { + case MSR_EFER: + ret = kvm_set_msr_common(vcpu, msr_info); + break; +#ifdef CONFIG_X86_64 + case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); + vmcs_writel(GUEST_FS_BASE, data); + break; + case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); + vmcs_writel(GUEST_GS_BASE, data); + break; + case MSR_KERNEL_GS_BASE: + vmx_write_guest_kernel_gs_base(vmx, data); + break; +#endif + case MSR_IA32_SYSENTER_CS: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_cs = data; + vmcs_write32(GUEST_SYSENTER_CS, data); + break; + case MSR_IA32_SYSENTER_EIP: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_eip = data; + vmcs_writel(GUEST_SYSENTER_EIP, data); + break; + case MSR_IA32_SYSENTER_ESP: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_esp = data; + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_DEBUGCTLMSR: + if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & + VM_EXIT_SAVE_DEBUG_CONTROLS) + get_vmcs12(vcpu)->guest_ia32_debugctl = data; + + ret = kvm_set_msr_common(vcpu, msr_info); + break; + + case MSR_IA32_BNDCFGS: + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK, vcpu) || + (data & MSR_IA32_BNDCFGS_RSVD)) + return 1; + vmcs_write64(GUEST_BNDCFGS, data); + break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_prepare_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) + return 1; + goto find_shared_msr; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_prepare_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; + case MSR_IA32_CR_PAT: + if (!kvm_pat_valid(data)) + return 1; + + if (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) + get_vmcs12(vcpu)->guest_ia32_pat = data; + + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + vmcs_write64(GUEST_IA32_PAT, data); + vcpu->arch.pat = data; + break; + } + ret = kvm_set_msr_common(vcpu, msr_info); + break; + case MSR_IA32_TSC_ADJUST: + ret = kvm_set_msr_common(vcpu, msr_info); + break; + case MSR_IA32_MCG_EXT_CTL: + if ((!msr_info->host_initiated && + !(to_vmx(vcpu)->msr_ia32_feature_control & + FEATURE_CONTROL_LMCE)) || + (data & ~MCG_EXT_CTL_LMCE_EN)) + return 1; + vcpu->arch.mcg_ext_ctl = data; + break; + case MSR_IA32_FEATURE_CONTROL: + if (!vmx_feature_control_msr_valid(vcpu, data) || + (to_vmx(vcpu)->msr_ia32_feature_control & + FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + return 1; + vmx->msr_ia32_feature_control = data; + if (msr_info->host_initiated && data == 0) + vmx_leave_nested(vcpu); + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!msr_info->host_initiated) + return 1; /* they are read-only */ + if (!nested_vmx_allowed(vcpu)) + return 1; + return vmx_set_vmx_msr(vcpu, msr_index, data); + case MSR_IA32_RTIT_CTL: + if ((pt_mode != PT_MODE_HOST_GUEST) || + vmx_rtit_ctl_check(vcpu, data) || + vmx->nested.vmxon) + return 1; + vmcs_write64(GUEST_IA32_RTIT_CTL, data); + vmx->pt_desc.guest.ctl = data; + pt_update_intercept_for_msr(vmx); + break; + case MSR_IA32_RTIT_STATUS: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || + (data & MSR_IA32_RTIT_STATUS_MASK)) + return 1; + vmx->pt_desc.guest.status = data; + break; + case MSR_IA32_RTIT_CR3_MATCH: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cr3_filtering)) + return 1; + vmx->pt_desc.guest.cr3_match = data; + break; + case MSR_IA32_RTIT_OUTPUT_BASE: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || + (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) || + (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) + return 1; + vmx->pt_desc.guest.output_base = data; + break; + case MSR_IA32_RTIT_OUTPUT_MASK: + if ((pt_mode != PT_MODE_HOST_GUEST) || + (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || + (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output))) + return 1; + vmx->pt_desc.guest.output_mask = data; + break; + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; + if ((pt_mode != PT_MODE_HOST_GUEST) || + (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || + (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges))) + return 1; + if (is_noncanonical_address(data, vcpu)) + return 1; + if (index % 2) + vmx->pt_desc.guest.addr_b[index / 2] = data; + else + vmx->pt_desc.guest.addr_a[index / 2] = data; + break; + case MSR_TSC_AUX: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + return 1; + /* Check reserved bit, higher 32 bits should be zero */ + if ((data >> 32) != 0) + return 1; + goto find_shared_msr; + + default: + find_shared_msr: + msr = find_msr_entry(vmx, msr_index); + if (msr) + ret = vmx_set_guest_msr(vmx, msr, data); + else + ret = kvm_set_msr_common(vcpu, msr_info); + } + + return ret; +} + +static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) +{ + kvm_register_mark_available(vcpu, reg); + + switch (reg) { + case VCPU_REGS_RSP: + vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); + break; + case VCPU_REGS_RIP: + vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); + break; + case VCPU_EXREG_PDPTR: + if (enable_ept) + ept_save_pdptrs(vcpu); + break; + case VCPU_EXREG_CR3: + if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + break; + default: + WARN_ON_ONCE(1); + break; + } +} + +static __init int cpu_has_kvm_support(void) +{ + return cpu_has_vmx(); +} + +static __init int vmx_disabled_by_bios(void) +{ + u64 msr; + + rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); + if (msr & FEATURE_CONTROL_LOCKED) { + /* launched w/ TXT and VMX disabled */ + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) + && tboot_enabled()) + return 1; + /* launched w/o TXT and VMX only enabled w/ TXT */ + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) + && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) + && !tboot_enabled()) { + printk(KERN_WARNING "kvm: disable TXT in the BIOS or " + "activate TXT before enabling KVM\n"); + return 1; + } + /* launched w/o TXT and VMX disabled */ + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) + && !tboot_enabled()) + return 1; + } + + return 0; +} + +static void kvm_cpu_vmxon(u64 addr) +{ + cr4_set_bits(X86_CR4_VMXE); + intel_pt_handle_vmx(1); + + asm volatile ("vmxon %0" : : "m"(addr)); +} + +static int hardware_enable(void) +{ + int cpu = raw_smp_processor_id(); + u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); + u64 old, test_bits; + + if (cr4_read_shadow() & X86_CR4_VMXE) + return -EBUSY; + + /* + * This can happen if we hot-added a CPU but failed to allocate + * VP assist page for it. + */ + if (static_branch_unlikely(&enable_evmcs) && + !hv_get_vp_assist_page(cpu)) + return -EFAULT; + + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + + /* + * Now we can enable the vmclear operation in kdump + * since the loaded_vmcss_on_cpu list on this cpu + * has been initialized. + * + * Though the cpu is not in VMX operation now, there + * is no problem to enable the vmclear operation + * for the loaded_vmcss_on_cpu list is empty! + */ + crash_enable_local_vmclear(cpu); + + rdmsrl(MSR_IA32_FEATURE_CONTROL, old); + + test_bits = FEATURE_CONTROL_LOCKED; + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + if (tboot_enabled()) + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; + + if ((old & test_bits) != test_bits) { + /* enable and lock */ + wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); + } + kvm_cpu_vmxon(phys_addr); + if (enable_ept) + ept_sync_global(); + + return 0; +} + +static void vmclear_local_loaded_vmcss(void) +{ + int cpu = raw_smp_processor_id(); + struct loaded_vmcs *v, *n; + + list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), + loaded_vmcss_on_cpu_link) + __loaded_vmcs_clear(v); +} + + +/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() + * tricks. + */ +static void kvm_cpu_vmxoff(void) +{ + asm volatile (__ex("vmxoff")); + + intel_pt_handle_vmx(0); + cr4_clear_bits(X86_CR4_VMXE); +} + +static void hardware_disable(void) +{ + vmclear_local_loaded_vmcss(); + kvm_cpu_vmxoff(); +} + +static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, + u32 msr, u32 *result) +{ + u32 vmx_msr_low, vmx_msr_high; + u32 ctl = ctl_min | ctl_opt; + + rdmsr(msr, vmx_msr_low, vmx_msr_high); + + ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ + ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ + + /* Ensure minimum (required) set of control bits are supported. */ + if (ctl_min & ~ctl) + return -EIO; + + *result = ctl; + return 0; +} + +static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, + struct vmx_capability *vmx_cap) +{ + u32 vmx_msr_low, vmx_msr_high; + u32 min, opt, min2, opt2; + u32 _pin_based_exec_control = 0; + u32 _cpu_based_exec_control = 0; + u32 _cpu_based_2nd_exec_control = 0; + u32 _vmexit_control = 0; + u32 _vmentry_control = 0; + + memset(vmcs_conf, 0, sizeof(*vmcs_conf)); + min = CPU_BASED_HLT_EXITING | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_UNCOND_IO_EXITING | + CPU_BASED_MOV_DR_EXITING | + CPU_BASED_USE_TSC_OFFSETTING | + CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING | + CPU_BASED_INVLPG_EXITING | + CPU_BASED_RDPMC_EXITING; + + opt = CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, + &_cpu_based_exec_control) < 0) + return -EIO; +#ifdef CONFIG_X86_64 + if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) + _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & + ~CPU_BASED_CR8_STORE_EXITING; +#endif + if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { + min2 = 0; + opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_ENABLE_VPID | + SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_UNRESTRICTED_GUEST | + SECONDARY_EXEC_PAUSE_LOOP_EXITING | + SECONDARY_EXEC_DESC | + SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + SECONDARY_EXEC_SHADOW_VMCS | + SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_RDSEED_EXITING | + SECONDARY_EXEC_RDRAND_EXITING | + SECONDARY_EXEC_ENABLE_PML | + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | + SECONDARY_EXEC_PT_USE_GPA | + SECONDARY_EXEC_PT_CONCEAL_VMX | + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_ENCLS_EXITING; + if (adjust_vmx_controls(min2, opt2, + MSR_IA32_VMX_PROCBASED_CTLS2, + &_cpu_based_2nd_exec_control) < 0) + return -EIO; + } +#ifndef CONFIG_X86_64 + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; +#endif + + if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) + _cpu_based_2nd_exec_control &= ~( + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, + &vmx_cap->ept, &vmx_cap->vpid); + + if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { + /* CR3 accesses and invlpg don't need to cause VM Exits when EPT + enabled */ + _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_INVLPG_EXITING); + } else if (vmx_cap->ept) { + vmx_cap->ept = 0; + pr_warn_once("EPT CAP should not exist if not support " + "1-setting enable EPT VM-execution control\n"); + } + if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && + vmx_cap->vpid) { + vmx_cap->vpid = 0; + pr_warn_once("VPID CAP should not exist if not support " + "1-setting enable VPID VM-execution control\n"); + } + + min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; +#ifdef CONFIG_X86_64 + min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_EXIT_LOAD_IA32_PAT | + VM_EXIT_LOAD_IA32_EFER | + VM_EXIT_CLEAR_BNDCFGS | + VM_EXIT_PT_CONCEAL_PIP | + VM_EXIT_CLEAR_IA32_RTIT_CTL; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, + &_vmexit_control) < 0) + return -EIO; + + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | + PIN_BASED_VMX_PREEMPTION_TIMER; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (cpu_has_broken_vmx_preemption_timer()) + _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + + min = VM_ENTRY_LOAD_DEBUG_CONTROLS; + opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_ENTRY_LOAD_IA32_PAT | + VM_ENTRY_LOAD_IA32_EFER | + VM_ENTRY_LOAD_BNDCFGS | + VM_ENTRY_PT_CONCEAL_PIP | + VM_ENTRY_LOAD_IA32_RTIT_CTL; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, + &_vmentry_control) < 0) + return -EIO; + + /* + * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they + * can't be used due to an errata where VM Exit may incorrectly clear + * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the + * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. + */ + if (boot_cpu_data.x86 == 0x6) { + switch (boot_cpu_data.x86_model) { + case 26: /* AAK155 */ + case 30: /* AAP115 */ + case 37: /* AAT100 */ + case 44: /* BC86,AAY89,BD102 */ + case 46: /* BA97 */ + _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " + "does not work properly. Using workaround\n"); + break; + default: + break; + } + } + + + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + + /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ + if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + return -EIO; + +#ifdef CONFIG_X86_64 + /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ + if (vmx_msr_high & (1u<<16)) + return -EIO; +#endif + + /* Require Write-Back (WB) memory type for VMCS accesses. */ + if (((vmx_msr_high >> 18) & 15) != 6) + return -EIO; + + vmcs_conf->size = vmx_msr_high & 0x1fff; + vmcs_conf->order = get_order(vmcs_conf->size); + vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; + + vmcs_conf->revision_id = vmx_msr_low; + + vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; + vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; + vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; + vmcs_conf->vmexit_ctrl = _vmexit_control; + vmcs_conf->vmentry_ctrl = _vmentry_control; + + if (static_branch_unlikely(&enable_evmcs)) + evmcs_sanitize_exec_ctrls(vmcs_conf); + + return 0; +} + +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) +{ + int node = cpu_to_node(cpu); + struct page *pages; + struct vmcs *vmcs; + + pages = __alloc_pages_node(node, flags, vmcs_config.order); + if (!pages) + return NULL; + vmcs = page_address(pages); + memset(vmcs, 0, vmcs_config.size); + + /* KVM supports Enlightened VMCS v1 only */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs->hdr.revision_id = KVM_EVMCS_VERSION; + else + vmcs->hdr.revision_id = vmcs_config.revision_id; + + if (shadow) + vmcs->hdr.shadow_vmcs = 1; + return vmcs; +} + +void free_vmcs(struct vmcs *vmcs) +{ + free_pages((unsigned long)vmcs, vmcs_config.order); +} + +/* + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded + */ +void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + if (!loaded_vmcs->vmcs) + return; + loaded_vmcs_clear(loaded_vmcs); + free_vmcs(loaded_vmcs->vmcs); + loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); +} + +int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(false); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs->hv_timer_soft_disabled = false; + loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *) + __get_free_page(GFP_KERNEL_ACCOUNT); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + + if (IS_ENABLED(CONFIG_HYPERV) && + static_branch_unlikely(&enable_evmcs) && + (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { + struct hv_enlightened_vmcs *evmcs = + (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; + + evmcs->hv_enlightenments_control.msr_bitmap = 1; + } + } + + memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); + memset(&loaded_vmcs->controls_shadow, 0, + sizeof(struct vmcs_controls_shadow)); + + return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; +} + +static void free_kvm_area(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + free_vmcs(per_cpu(vmxarea, cpu)); + per_cpu(vmxarea, cpu) = NULL; + } +} + +static __init int alloc_kvm_area(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct vmcs *vmcs; + + vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); + if (!vmcs) { + free_kvm_area(); + return -ENOMEM; + } + + /* + * When eVMCS is enabled, alloc_vmcs_cpu() sets + * vmcs->revision_id to KVM_EVMCS_VERSION instead of + * revision_id reported by MSR_IA32_VMX_BASIC. + * + * However, even though not explicitly documented by + * TLFS, VMXArea passed as VMXON argument should + * still be marked with revision_id reported by + * physical CPU. + */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs->hdr.revision_id = vmcs_config.revision_id; + + per_cpu(vmxarea, cpu) = vmcs; + } + return 0; +} + +static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, + struct kvm_segment *save) +{ + if (!emulate_invalid_guest_state) { + /* + * CS and SS RPL should be equal during guest entry according + * to VMX spec, but in reality it is not always so. Since vcpu + * is in the middle of the transition from real mode to + * protected mode it is safe to assume that RPL 0 is a good + * default value. + */ + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) + save->selector &= ~SEGMENT_RPL_MASK; + save->dpl = save->selector & SEGMENT_RPL_MASK; + save->s = 1; + } + vmx_set_segment(vcpu, save, seg); +} + +static void enter_pmode(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * Update real mode segment cache. It may be not up-to-date if sement + * register was written while vcpu was in a guest mode. + */ + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); + + vmx->rmode.vm86_active = 0; + + vmx_segment_cache_clear(vmx); + + vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); + + flags = vmcs_readl(GUEST_RFLAGS); + flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + vmcs_writel(GUEST_RFLAGS, flags); + + vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | + (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); + + update_exception_bitmap(vcpu); + + fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); + fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); + fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); +} + +static void fix_rmode_seg(int seg, struct kvm_segment *save) +{ + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + struct kvm_segment var = *save; + + var.dpl = 0x3; + if (seg == VCPU_SREG_CS) + var.type = 0x3; + + if (!emulate_invalid_guest_state) { + var.selector = var.base >> 4; + var.base = var.base & 0xffff0; + var.limit = 0xffff; + var.g = 0; + var.db = 0; + var.present = 1; + var.s = 1; + var.l = 0; + var.unusable = 0; + var.type = 0x3; + var.avl = 0; + if (save->base & 0xf) + printk_once(KERN_WARNING "kvm: segment base is not " + "paragraph aligned when entering " + "protected mode (seg=%d)", seg); + } + + vmcs_write16(sf->selector, var.selector); + vmcs_writel(sf->base, var.base); + vmcs_write32(sf->limit, var.limit); + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); +} + +static void enter_rmode(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); + + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); + + vmx->rmode.vm86_active = 1; + + /* + * Very old userspace does not call KVM_SET_TSS_ADDR before entering + * vcpu. Warn the user that an update is overdue. + */ + if (!kvm_vmx->tss_addr) + printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " + "called before entering vcpu\n"); + + vmx_segment_cache_clear(vmx); + + vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); + vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); + vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); + + flags = vmcs_readl(GUEST_RFLAGS); + vmx->rmode.save_rflags = flags; + + flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + + vmcs_writel(GUEST_RFLAGS, flags); + vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); + update_exception_bitmap(vcpu); + + fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); + fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); + fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); + + kvm_mmu_reset_context(vcpu); +} + +void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + + if (!msr) + return; + + vcpu->arch.efer = efer; + if (efer & EFER_LMA) { + vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); + msr->data = efer; + } else { + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); + + msr->data = efer & ~EFER_LME; + } + setup_msrs(vmx); +} + +#ifdef CONFIG_X86_64 + +static void enter_lmode(struct kvm_vcpu *vcpu) +{ + u32 guest_tr_ar; + + vmx_segment_cache_clear(to_vmx(vcpu)); + + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); + if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { + pr_debug_ratelimited("%s: tss fixup for long mode. \n", + __func__); + vmcs_write32(GUEST_TR_AR_BYTES, + (guest_tr_ar & ~VMX_AR_TYPE_MASK) + | VMX_AR_TYPE_BUSY_64_TSS); + } + vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); +} + +static void exit_lmode(struct kvm_vcpu *vcpu) +{ + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); + vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); +} + +#endif + +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +{ + int vpid = to_vmx(vcpu)->vpid; + + if (!vpid_sync_vcpu_addr(vpid, addr)) + vpid_sync_context(vpid); + + /* + * If VPIDs are not supported or enabled, then the above is a no-op. + * But we don't really need a TLB flush in that case anyway, because + * each VM entry/exit includes an implicit flush when VPID is 0. + */ +} + +static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) +{ + ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; + + vcpu->arch.cr0 &= ~cr0_guest_owned_bits; + vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; +} + +static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) +{ + ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; + + vcpu->arch.cr4 &= ~cr4_guest_owned_bits; + vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; +} + +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + + if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) + return; + + if (is_pae_paging(vcpu)) { + vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); + } +} + +void ept_save_pdptrs(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + + if (is_pae_paging(vcpu)) { + mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + } + + kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); +} + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) + vmx_cache_reg(vcpu, VCPU_EXREG_CR3); + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } + + if (!(cr0 & X86_CR0_WP)) + *hw_cr0 &= ~X86_CR0_WP; +} + +void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long hw_cr0; + + hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); + if (enable_unrestricted_guest) + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; + else { + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; + + if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) + enter_pmode(vcpu); + + if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) + enter_rmode(vcpu); + } + +#ifdef CONFIG_X86_64 + if (vcpu->arch.efer & EFER_LME) { + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) + enter_lmode(vcpu); + if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) + exit_lmode(vcpu); + } +#endif + + if (enable_ept && !enable_unrestricted_guest) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + + vmcs_writel(CR0_READ_SHADOW, cr0); + vmcs_writel(GUEST_CR0, hw_cr0); + vcpu->arch.cr0 = cr0; + + /* depends on vcpu->arch.cr0 to be set to a new value */ + vmx->emulation_required = emulation_required(vcpu); +} + +static int get_ept_level(struct kvm_vcpu *vcpu) +{ + if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) + return 5; + return 4; +} + +u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) +{ + u64 eptp = VMX_EPTP_MT_WB; + + eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; + + if (enable_ept_ad_bits && + (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) + eptp |= VMX_EPTP_AD_ENABLE_BIT; + eptp |= (root_hpa & PAGE_MASK); + + return eptp; +} + +void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +{ + struct kvm *kvm = vcpu->kvm; + bool update_guest_cr3 = true; + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 = cr3; + if (enable_ept) { + eptp = construct_eptp(vcpu, cr3); + vmcs_write64(EPT_POINTER, eptp); + + if (kvm_x86_ops->tlb_remote_flush) { + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + to_vmx(vcpu)->ept_pointer = eptp; + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_CHECK; + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + } + + /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ + if (is_guest_mode(vcpu)) + update_guest_cr3 = false; + else if (!enable_unrestricted_guest && !is_paging(vcpu)) + guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; + else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + guest_cr3 = vcpu->arch.cr3; + else /* vmcs01.GUEST_CR3 is already up-to-date. */ + update_guest_cr3 = false; + ept_load_pdptrs(vcpu); + } + + if (update_guest_cr3) + vmcs_writel(GUEST_CR3, guest_cr3); +} + +int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * Pass through host's Machine Check Enable value to hw_cr4, which + * is in force while we are in guest mode. Do not let guests control + * this bit, even if host CR4.MCE == 0. + */ + unsigned long hw_cr4; + + hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); + if (enable_unrestricted_guest) + hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; + else if (vmx->rmode.vm86_active) + hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; + else + hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; + + if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { + if (cr4 & X86_CR4_UMIP) { + secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); + hw_cr4 &= ~X86_CR4_UMIP; + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { + secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); + } + } + + if (cr4 & X86_CR4_VMXE) { + /* + * To use VMXON (and later other VMX instructions), a guest + * must first be able to turn on cr4.VMXE (see handle_vmon()). + * So basically the check on whether to allow nested VMX + * is here. We operate under the default treatment of SMM, + * so VMX cannot be enabled under SMM. + */ + if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) + return 1; + } + + if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) + return 1; + + vcpu->arch.cr4 = cr4; + + if (!enable_unrestricted_guest) { + if (enable_ept) { + if (!is_paging(vcpu)) { + hw_cr4 &= ~X86_CR4_PAE; + hw_cr4 |= X86_CR4_PSE; + } else if (!(cr4 & X86_CR4_PAE)) { + hw_cr4 &= ~X86_CR4_PAE; + } + } + + /* + * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in + * hardware. To emulate this behavior, SMEP/SMAP/PKU needs + * to be manually disabled when guest switches to non-paging + * mode. + * + * If !enable_unrestricted_guest, the CPU is always running + * with CR0.PG=1 and CR4 needs to be modified. + * If enable_unrestricted_guest, the CPU automatically + * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. + */ + if (!is_paging(vcpu)) + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + } + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); + return 0; +} + +void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 ar; + + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { + *var = vmx->rmode.segs[seg]; + if (seg == VCPU_SREG_TR + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) + return; + var->base = vmx_read_guest_seg_base(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + return; + } + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); + var->unusable = (ar >> 16) & 1; + var->type = ar & 15; + var->s = (ar >> 4) & 1; + var->dpl = (ar >> 5) & 3; + /* + * Some userspaces do not preserve unusable property. Since usable + * segment has to be present according to VMX spec we can use present + * property to amend userspace bug by making unusable segment always + * nonpresent. vmx_segment_access_rights() already marks nonpresent + * segment as unusable. + */ + var->present = !var->unusable; + var->avl = (ar >> 12) & 1; + var->l = (ar >> 13) & 1; + var->db = (ar >> 14) & 1; + var->g = (ar >> 15) & 1; +} + +static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment s; + + if (to_vmx(vcpu)->rmode.vm86_active) { + vmx_get_segment(vcpu, &s, seg); + return s.base; + } + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); +} + +int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (unlikely(vmx->rmode.vm86_active)) + return 0; + else { + int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); + return VMX_AR_DPL(ar); + } +} + +static u32 vmx_segment_access_rights(struct kvm_segment *var) +{ + u32 ar; + + if (var->unusable || !var->present) + ar = 1 << 16; + else { + ar = var->type & 15; + ar |= (var->s & 1) << 4; + ar |= (var->dpl & 3) << 5; + ar |= (var->present & 1) << 7; + ar |= (var->avl & 1) << 12; + ar |= (var->l & 1) << 13; + ar |= (var->db & 1) << 14; + ar |= (var->g & 1) << 15; + } + + return ar; +} + +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + + vmx_segment_cache_clear(vmx); + + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { + vmx->rmode.segs[seg] = *var; + if (seg == VCPU_SREG_TR) + vmcs_write16(sf->selector, var->selector); + else if (var->s) + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); + goto out; + } + + vmcs_writel(sf->base, var->base); + vmcs_write32(sf->limit, var->limit); + vmcs_write16(sf->selector, var->selector); + + /* + * Fix the "Accessed" bit in AR field of segment registers for older + * qemu binaries. + * IA32 arch specifies that at the time of processor reset the + * "Accessed" bit in the AR field of segment registers is 1. And qemu + * is setting it to 0 in the userland code. This causes invalid guest + * state vmexit when "unrestricted guest" mode is turned on. + * Fix for this setup issue in cpu_reset is being pushed in the qemu + * tree. Newer qemu binaries with that qemu fix would not need this + * kvm hack. + */ + if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) + var->type |= 0x1; /* Accessed */ + + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); + +out: + vmx->emulation_required = emulation_required(vcpu); +} + +static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +{ + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); + + *db = (ar >> 14) & 1; + *l = (ar >> 13) & 1; +} + +static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +{ + dt->size = vmcs_read32(GUEST_IDTR_LIMIT); + dt->address = vmcs_readl(GUEST_IDTR_BASE); +} + +static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +{ + vmcs_write32(GUEST_IDTR_LIMIT, dt->size); + vmcs_writel(GUEST_IDTR_BASE, dt->address); +} + +static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +{ + dt->size = vmcs_read32(GUEST_GDTR_LIMIT); + dt->address = vmcs_readl(GUEST_GDTR_BASE); +} + +static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +{ + vmcs_write32(GUEST_GDTR_LIMIT, dt->size); + vmcs_writel(GUEST_GDTR_BASE, dt->address); +} + +static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment var; + u32 ar; + + vmx_get_segment(vcpu, &var, seg); + var.dpl = 0x3; + if (seg == VCPU_SREG_CS) + var.type = 0x3; + ar = vmx_segment_access_rights(&var); + + if (var.base != (var.selector << 4)) + return false; + if (var.limit != 0xffff) + return false; + if (ar != 0xf3) + return false; + + return true; +} + +static bool code_segment_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + unsigned int cs_rpl; + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs_rpl = cs.selector & SEGMENT_RPL_MASK; + + if (cs.unusable) + return false; + if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) + return false; + if (!cs.s) + return false; + if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { + if (cs.dpl > cs_rpl) + return false; + } else { + if (cs.dpl != cs_rpl) + return false; + } + if (!cs.present) + return false; + + /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ + return true; +} + +static bool stack_segment_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment ss; + unsigned int ss_rpl; + + vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); + ss_rpl = ss.selector & SEGMENT_RPL_MASK; + + if (ss.unusable) + return true; + if (ss.type != 3 && ss.type != 7) + return false; + if (!ss.s) + return false; + if (ss.dpl != ss_rpl) /* DPL != RPL */ + return false; + if (!ss.present) + return false; + + return true; +} + +static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment var; + unsigned int rpl; + + vmx_get_segment(vcpu, &var, seg); + rpl = var.selector & SEGMENT_RPL_MASK; + + if (var.unusable) + return true; + if (!var.s) + return false; + if (!var.present) + return false; + if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { + if (var.dpl < rpl) /* DPL < RPL */ + return false; + } + + /* TODO: Add other members to kvm_segment_field to allow checking for other access + * rights flags + */ + return true; +} + +static bool tr_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment tr; + + vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); + + if (tr.unusable) + return false; + if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ + return false; + if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ + return false; + if (!tr.present) + return false; + + return true; +} + +static bool ldtr_valid(struct kvm_vcpu *vcpu) +{ + struct kvm_segment ldtr; + + vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); + + if (ldtr.unusable) + return true; + if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ + return false; + if (ldtr.type != 2) + return false; + if (!ldtr.present) + return false; + + return true; +} + +static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs, ss; + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); + + return ((cs.selector & SEGMENT_RPL_MASK) == + (ss.selector & SEGMENT_RPL_MASK)); +} + +/* + * Check if guest state is valid. Returns true if valid, false if + * not. + * We assume that registers are always usable + */ +static bool guest_state_valid(struct kvm_vcpu *vcpu) +{ + if (enable_unrestricted_guest) + return true; + + /* real mode guest state checks */ + if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { + if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) + return false; + if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) + return false; + } else { + /* protected mode guest state checks */ + if (!cs_ss_rpl_check(vcpu)) + return false; + if (!code_segment_valid(vcpu)) + return false; + if (!stack_segment_valid(vcpu)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_DS)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_ES)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_FS)) + return false; + if (!data_segment_valid(vcpu, VCPU_SREG_GS)) + return false; + if (!tr_valid(vcpu)) + return false; + if (!ldtr_valid(vcpu)) + return false; + } + /* TODO: + * - Add checks on RIP + * - Add checks on RFLAGS + */ + + return true; +} + +static int init_rmode_tss(struct kvm *kvm) +{ + gfn_t fn; + u16 data = 0; + int idx, r; + + idx = srcu_read_lock(&kvm->srcu); + fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; + r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); + if (r < 0) + goto out; + data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; + r = kvm_write_guest_page(kvm, fn++, &data, + TSS_IOPB_BASE_OFFSET, sizeof(u16)); + if (r < 0) + goto out; + r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); + if (r < 0) + goto out; + r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); + if (r < 0) + goto out; + data = ~0; + r = kvm_write_guest_page(kvm, fn, &data, + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, + sizeof(u8)); +out: + srcu_read_unlock(&kvm->srcu, idx); + return r; +} + +static int init_rmode_identity_map(struct kvm *kvm) +{ + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); + int i, idx, r = 0; + kvm_pfn_t identity_map_pfn; + u32 tmp; + + /* Protect kvm_vmx->ept_identity_pagetable_done. */ + mutex_lock(&kvm->slots_lock); + + if (likely(kvm_vmx->ept_identity_pagetable_done)) + goto out2; + + if (!kvm_vmx->ept_identity_map_addr) + kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; + + r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm_vmx->ept_identity_map_addr, PAGE_SIZE); + if (r < 0) + goto out2; + + idx = srcu_read_lock(&kvm->srcu); + r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); + if (r < 0) + goto out; + /* Set up identity-mapping pagetable for EPT in real mode */ + for (i = 0; i < PT32_ENT_PER_PAGE; i++) { + tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | + _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + r = kvm_write_guest_page(kvm, identity_map_pfn, + &tmp, i * sizeof(tmp), sizeof(tmp)); + if (r < 0) + goto out; + } + kvm_vmx->ept_identity_pagetable_done = true; + +out: + srcu_read_unlock(&kvm->srcu, idx); + +out2: + mutex_unlock(&kvm->slots_lock); + return r; +} + +static void seg_setup(int seg) +{ + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + unsigned int ar; + + vmcs_write16(sf->selector, 0); + vmcs_writel(sf->base, 0); + vmcs_write32(sf->limit, 0xffff); + ar = 0x93; + if (seg == VCPU_SREG_CS) + ar |= 0x08; /* code segment */ + + vmcs_write32(sf->ar_bytes, ar); +} + +static int alloc_apic_access_page(struct kvm *kvm) +{ + struct page *page; + int r = 0; + + mutex_lock(&kvm->slots_lock); + if (kvm->arch.apic_access_page_done) + goto out; + r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); + if (r) + goto out; + + page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + /* + * Do not pin the page in memory, so that memory hot-unplug + * is able to migrate it. + */ + put_page(page); + kvm->arch.apic_access_page_done = true; +out: + mutex_unlock(&kvm->slots_lock); + return r; +} + +int allocate_vpid(void) +{ + int vpid; + + if (!enable_vpid) + return 0; + spin_lock(&vmx_vpid_lock); + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); + if (vpid < VMX_NR_VPIDS) + __set_bit(vpid, vmx_vpid_bitmap); + else + vpid = 0; + spin_unlock(&vmx_vpid_lock); + return vpid; +} + +void free_vpid(int vpid) +{ + if (!enable_vpid || vpid == 0) + return; + spin_lock(&vmx_vpid_lock); + __clear_bit(vpid, vmx_vpid_bitmap); + spin_unlock(&vmx_vpid_lock); +} + +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + if (static_branch_unlikely(&enable_evmcs)) + evmcs_touch_msr_bitmap(); + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __clear_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __clear_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __clear_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __clear_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + if (static_branch_unlikely(&enable_evmcs)) + evmcs_touch_msr_bitmap(); + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) +{ + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (secondary_exec_controls_get(to_vmx(vcpu)) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + return mode; +} + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) +{ + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } + } +} + +void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; +} + +void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) +{ + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); + u32 i; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS, + MSR_TYPE_RW, flag); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE, + MSR_TYPE_RW, flag); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK, + MSR_TYPE_RW, flag); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH, + MSR_TYPE_RW, flag); + for (i = 0; i < vmx->pt_desc.addr_range; i++) { + vmx_set_intercept_for_msr(msr_bitmap, + MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); + vmx_set_intercept_for_msr(msr_bitmap, + MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); + } +} + +static bool vmx_get_enable_apicv(struct kvm *kvm) +{ + return enable_apicv; +} + +static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + void *vapic_page; + u32 vppr; + int rvi; + + if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || + !nested_cpu_has_vid(get_vmcs12(vcpu)) || + WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) + return false; + + rvi = vmx_get_rvi(); + + vapic_page = vmx->nested.virtual_apic_map.hva; + vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + +static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, + bool nested) +{ +#ifdef CONFIG_SMP + int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; + + if (vcpu->mode == IN_GUEST_MODE) { + /* + * The vector of interrupt to be delivered to vcpu had + * been set in PIR before this function. + * + * Following cases will be reached in this block, and + * we always send a notification event in all cases as + * explained below. + * + * Case 1: vcpu keeps in non-root mode. Sending a + * notification event posts the interrupt to vcpu. + * + * Case 2: vcpu exits to root mode and is still + * runnable. PIR will be synced to vIRR before the + * next vcpu entry. Sending a notification event in + * this case has no effect, as vcpu is not in root + * mode. + * + * Case 3: vcpu exits to root mode and is blocked. + * vcpu_block() has already synced PIR to vIRR and + * never blocks vcpu if vIRR is not cleared. Therefore, + * a blocked vcpu here does not wait for any requested + * interrupts in PIR, and sending a notification event + * which has no effect is safe here. + */ + + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); + return true; + } +#endif + return false; +} + +static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, + int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (is_guest_mode(vcpu) && + vector == vmx->nested.posted_intr_nv) { + /* + * If a posted intr is not recognized by hardware, + * we will accomplish it in the next vmentry. + */ + vmx->nested.pi_pending = true; + kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) + kvm_vcpu_kick(vcpu); + return 0; + } + return -1; +} +/* + * Send interrupt to vcpu via posted interrupt way. + * 1. If target vcpu is running(non-root mode), send posted interrupt + * notification to vcpu and hardware will sync PIR to vIRR atomically. + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + r = vmx_deliver_nested_posted_interrupt(vcpu, vector); + if (!r) + return; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + /* If a previous notification has sent the IPI, nothing to do. */ + if (pi_test_and_set_on(&vmx->pi_desc)) + return; + + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + kvm_vcpu_kick(vcpu); +} + +/* + * Set up the vmcs's constant host-state fields, i.e., host-state fields that + * will not change in the lifetime of the guest. + * Note that host-state that does change is set elsewhere. E.g., host-state + * that is set differently for each CPU is set in vmx_vcpu_load(), not here. + */ +void vmx_set_constant_host_state(struct vcpu_vmx *vmx) +{ + u32 low32, high32; + unsigned long tmpl; + unsigned long cr0, cr3, cr4; + + cr0 = read_cr0(); + WARN_ON(cr0 & X86_CR0_TS); + vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ + + /* + * Save the most likely value for this task's CR3 in the VMCS. + * We can't use __get_current_cr3_fast() because we're not atomic. + */ + cr3 = __read_cr3(); + vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ + vmx->loaded_vmcs->host_state.cr3 = cr3; + + /* Save the most likely value for this task's CR4 in the VMCS. */ + cr4 = cr4_read_shadow(); + vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ + vmx->loaded_vmcs->host_state.cr4 = cr4; + + vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ +#ifdef CONFIG_X86_64 + /* + * Load null selectors, so we can avoid reloading them in + * vmx_prepare_switch_to_host(), in case userspace uses + * the null selectors too (the expected case). + */ + vmcs_write16(HOST_DS_SELECTOR, 0); + vmcs_write16(HOST_ES_SELECTOR, 0); +#else + vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ + vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ +#endif + vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ + vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ + + vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ + + vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ + + rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); + vmcs_write32(HOST_IA32_SYSENTER_CS, low32); + rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); + vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ + + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, low32, high32); + vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); + } + + if (cpu_has_load_ia32_efer()) + vmcs_write64(HOST_IA32_EFER, host_efer); +} + +void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) +{ + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; + if (enable_ept) + vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; + if (is_guest_mode(&vmx->vcpu)) + vmx->vcpu.arch.cr4_guest_owned_bits &= + ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; + vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); +} + +u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + + if (!enable_vnmi) + pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; + + if (!enable_preemption_timer) + pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + + return pin_based_exec_ctrl; +} + +static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); + if (cpu_has_secondary_exec_ctrls()) { + if (kvm_vcpu_apicv_active(vcpu)) + secondary_exec_controls_setbit(vmx, + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + else + secondary_exec_controls_clearbit(vmx, + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + } + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(vcpu); +} + +u32 vmx_exec_control(struct vcpu_vmx *vmx) +{ + u32 exec_control = vmcs_config.cpu_based_exec_ctrl; + + if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) + exec_control &= ~CPU_BASED_MOV_DR_EXITING; + + if (!cpu_need_tpr_shadow(&vmx->vcpu)) { + exec_control &= ~CPU_BASED_TPR_SHADOW; +#ifdef CONFIG_X86_64 + exec_control |= CPU_BASED_CR8_STORE_EXITING | + CPU_BASED_CR8_LOAD_EXITING; +#endif + } + if (!enable_ept) + exec_control |= CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_INVLPG_EXITING; + if (kvm_mwait_in_guest(vmx->vcpu.kvm)) + exec_control &= ~(CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING); + if (kvm_hlt_in_guest(vmx->vcpu.kvm)) + exec_control &= ~CPU_BASED_HLT_EXITING; + return exec_control; +} + + +static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) +{ + struct kvm_vcpu *vcpu = &vmx->vcpu; + + u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; + + if (pt_mode == PT_MODE_SYSTEM) + exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); + if (!cpu_need_virtualize_apic_accesses(vcpu)) + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + if (vmx->vpid == 0) + exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; + if (!enable_ept) { + exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + enable_unrestricted_guest = 0; + } + if (!enable_unrestricted_guest) + exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (kvm_pause_in_guest(vmx->vcpu.kvm)) + exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; + if (!kvm_vcpu_apicv_active(vcpu)) + exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + + /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, + * in vmx_set_cr4. */ + exec_control &= ~SECONDARY_EXEC_DESC; + + /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD + (handle_vmptrld). + We can NOT enable shadow_vmcs here because we don't have yet + a current VMCS12 + */ + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + + if (!enable_pml) + exec_control &= ~SECONDARY_EXEC_ENABLE_PML; + + if (vmx_xsaves_supported()) { + /* Exposing XSAVES only when XSAVE is exposed */ + bool xsaves_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); + + vcpu->arch.xsaves_enabled = xsaves_enabled; + + if (!xsaves_enabled) + exec_control &= ~SECONDARY_EXEC_XSAVES; + + if (nested) { + if (xsaves_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_XSAVES; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_XSAVES; + } + } + + if (vmx_rdtscp_supported()) { + bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); + if (!rdtscp_enabled) + exec_control &= ~SECONDARY_EXEC_RDTSCP; + + if (nested) { + if (rdtscp_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_RDTSCP; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; + } + } + + if (vmx_invpcid_supported()) { + /* Exposing INVPCID only when PCID is exposed */ + bool invpcid_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && + guest_cpuid_has(vcpu, X86_FEATURE_PCID); + + if (!invpcid_enabled) { + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; + guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); + } + + if (nested) { + if (invpcid_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_INVPCID; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_INVPCID; + } + } + + if (vmx_rdrand_supported()) { + bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); + if (rdrand_enabled) + exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; + + if (nested) { + if (rdrand_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_RDRAND_EXITING; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_RDRAND_EXITING; + } + } + + if (vmx_rdseed_supported()) { + bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); + if (rdseed_enabled) + exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; + + if (nested) { + if (rdseed_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_RDSEED_EXITING; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_RDSEED_EXITING; + } + } + + if (vmx_waitpkg_supported()) { + bool waitpkg_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); + + if (!waitpkg_enabled) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + if (nested) { + if (waitpkg_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + } + } + + vmx->secondary_exec_control = exec_control; +} + +static void ept_set_mmio_spte_mask(void) +{ + /* + * EPT Misconfigurations can be generated if the value of bits 2:0 + * of an EPT paging-structure entry is 110b (write/execute). + */ + kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, + VMX_EPT_MISCONFIG_WX_VALUE, 0); +} + +#define VMX_XSS_EXIT_BITMAP 0 + +/* + * Noting that the initialization of Guest-state Area of VMCS is in + * vmx_vcpu_reset(). + */ +static void init_vmcs(struct vcpu_vmx *vmx) +{ + if (nested) + nested_vmx_set_vmcs_shadowing_bitmap(); + + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); + + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ + + /* Control */ + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); + + exec_controls_set(vmx, vmx_exec_control(vmx)); + + if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); + secondary_exec_controls_set(vmx, vmx->secondary_exec_control); + } + + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { + vmcs_write64(EOI_EXIT_BITMAP0, 0); + vmcs_write64(EOI_EXIT_BITMAP1, 0); + vmcs_write64(EOI_EXIT_BITMAP2, 0); + vmcs_write64(EOI_EXIT_BITMAP3, 0); + + vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); + } + + if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { + vmcs_write32(PLE_GAP, ple_gap); + vmx->ple_window = ple_window; + vmx->ple_window_dirty = true; + } + + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); + vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ + + vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ + vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ + vmx_set_constant_host_state(vmx); + vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ + vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ + + if (cpu_has_vmx_vmfunc()) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); + + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) + vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); + + vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); + + /* 22.2.1, 20.8.1 */ + vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); + + vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; + vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); + + set_cr4_guest_host_mask(vmx); + + if (vmx->vpid != 0) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + + if (vmx_xsaves_supported()) + vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); + + if (enable_pml) { + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + } + + if (cpu_has_vmx_encls_vmexit()) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + + if (pt_mode == PT_MODE_HOST_GUEST) { + memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); + /* Bit[6~0] are forced to 1, writes are ignored. */ + vmx->pt_desc.guest.output_mask = 0x7F; + vmcs_write64(GUEST_IA32_RTIT_CTL, 0); + } +} + +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct msr_data apic_base_msr; + u64 cr0; + + vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; + + vmx->msr_ia32_umwait_control = 0; + + vcpu->arch.microcode_version = 0x100000000ULL; + vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); + vmx->hv_deadline_tsc = -1; + kvm_set_cr8(vcpu, 0); + + if (!init_event) { + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + apic_base_msr.data |= MSR_IA32_APICBASE_BSP; + apic_base_msr.host_initiated = true; + kvm_set_apic_base(vcpu, &apic_base_msr); + } + + vmx_segment_cache_clear(vmx); + + seg_setup(VCPU_SREG_CS); + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); + vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); + + seg_setup(VCPU_SREG_DS); + seg_setup(VCPU_SREG_ES); + seg_setup(VCPU_SREG_FS); + seg_setup(VCPU_SREG_GS); + seg_setup(VCPU_SREG_SS); + + vmcs_write16(GUEST_TR_SELECTOR, 0); + vmcs_writel(GUEST_TR_BASE, 0); + vmcs_write32(GUEST_TR_LIMIT, 0xffff); + vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); + + vmcs_write16(GUEST_LDTR_SELECTOR, 0); + vmcs_writel(GUEST_LDTR_BASE, 0); + vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); + vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); + + if (!init_event) { + vmcs_write32(GUEST_SYSENTER_CS, 0); + vmcs_writel(GUEST_SYSENTER_ESP, 0); + vmcs_writel(GUEST_SYSENTER_EIP, 0); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + } + + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0xfff0); + + vmcs_writel(GUEST_GDTR_BASE, 0); + vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); + + vmcs_writel(GUEST_IDTR_BASE, 0); + vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); + + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); + if (kvm_mpx_supported()) + vmcs_write64(GUEST_BNDCFGS, 0); + + setup_msrs(vmx); + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ + + if (cpu_has_vmx_tpr_shadow() && !init_event) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); + if (cpu_need_tpr_shadow(vcpu)) + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, + __pa(vcpu->arch.apic->regs)); + vmcs_write32(TPR_THRESHOLD, 0); + } + + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + + cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vmx->vcpu.arch.cr0 = cr0; + vmx_set_cr0(vcpu, cr0); /* enter rmode */ + vmx_set_cr4(vcpu, 0); + vmx_set_efer(vcpu, 0); + + update_exception_bitmap(vcpu); + + vpid_sync_context(vmx->vpid); + if (init_event) + vmx_clear_hlt(vcpu); +} + +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + if (!enable_vnmi || + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { + enable_irq_window(vcpu); + return; + } + + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); +} + +static void vmx_inject_irq(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + uint32_t intr; + int irq = vcpu->arch.interrupt.nr; + + trace_kvm_inj_virq(irq); + + ++vcpu->stat.irq_injections; + if (vmx->rmode.vm86_active) { + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); + return; + } + intr = irq | INTR_INFO_VALID_MASK; + if (vcpu->arch.interrupt.soft) { + intr |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmx->vcpu.arch.event_exit_inst_len); + } else + intr |= INTR_TYPE_EXT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + + vmx_clear_hlt(vcpu); +} + +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!enable_vnmi) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->loaded_vmcs->soft_vnmi_blocked = 1; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + + ++vcpu->stat.nmi_injections; + vmx->loaded_vmcs->nmi_known_unmasked = false; + + if (vmx->rmode.vm86_active) { + kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); + return; + } + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + + vmx_clear_hlt(vcpu); +} + +bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool masked; + + if (!enable_vnmi) + return vmx->loaded_vmcs->soft_vnmi_blocked; + if (vmx->loaded_vmcs->nmi_known_unmasked) + return false; + masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; + vmx->loaded_vmcs->nmi_known_unmasked = !masked; + return masked; +} + +void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!enable_vnmi) { + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { + vmx->loaded_vmcs->soft_vnmi_blocked = masked; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + } else { + vmx->loaded_vmcs->nmi_known_unmasked = !masked; + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) +{ + if (to_vmx(vcpu)->nested.nested_run_pending) + return 0; + + if (!enable_vnmi && + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) + return 0; + + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI + | GUEST_INTR_STATE_NMI)); +} + +static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + return (!to_vmx(vcpu)->nested.nested_run_pending && + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); +} + +static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) +{ + int ret; + + if (enable_unrestricted_guest) + return 0; + + ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); + if (ret) + return ret; + to_kvm_vmx(kvm)->tss_addr = addr; + return init_rmode_tss(kvm); +} + +static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; + return 0; +} + +static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) +{ + switch (vec) { + case BP_VECTOR: + /* + * Update instruction length as we may reinject the exception + * from user space while in guest debugging mode. + */ + to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return false; + /* fall through */ + case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return false; + /* fall through */ + case DE_VECTOR: + case OF_VECTOR: + case BR_VECTOR: + case UD_VECTOR: + case DF_VECTOR: + case SS_VECTOR: + case GP_VECTOR: + case MF_VECTOR: + return true; + break; + } + return false; +} + +static int handle_rmode_exception(struct kvm_vcpu *vcpu, + int vec, u32 err_code) +{ + /* + * Instruction with address size override prefix opcode 0x67 + * Cause the #SS fault with 0 error code in VM86 mode. + */ + if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { + if (kvm_emulate_instruction(vcpu, 0)) { + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; + return kvm_vcpu_halt(vcpu); + } + return 1; + } + return 0; + } + + /* + * Forward all other exceptions that are valid in real mode. + * FIXME: Breaks guest debugging in real mode, needs to be fixed with + * the required debugging infrastructure rework. + */ + kvm_queue_exception(vcpu, vec); + return 1; +} + +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s, 0); +#endif +} + +static int handle_machine_check(struct kvm_vcpu *vcpu) +{ + /* handled by vmx_vcpu_run() */ + return 1; +} + +static int handle_exception_nmi(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_run *kvm_run = vcpu->run; + u32 intr_info, ex_no, error_code; + unsigned long cr2, rip, dr6; + u32 vect_info; + + vect_info = vmx->idt_vectoring_info; + intr_info = vmx->exit_intr_info; + + if (is_machine_check(intr_info) || is_nmi(intr_info)) + return 1; /* handled by handle_exception_nmi_irqoff() */ + + if (is_invalid_opcode(intr_info)) + return handle_ud(vcpu); + + error_code = 0; + if (intr_info & INTR_INFO_DELIVER_CODE_MASK) + error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + + if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { + WARN_ON_ONCE(!enable_vmware_backdoor); + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero + * error code on #GP. + */ + if (error_code) { + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); + } + + /* + * The #PF with PFEC.RSVD = 1 indicates the guest is accessing + * MMIO, it is better to report an internal error. + * See the comments in vmx_handle_exit. + */ + if ((vect_info & VECTORING_INFO_VALID_MASK) && + !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; + vcpu->run->internal.ndata = 3; + vcpu->run->internal.data[0] = vect_info; + vcpu->run->internal.data[1] = intr_info; + vcpu->run->internal.data[2] = error_code; + return 0; + } + + if (is_page_fault(intr_info)) { + cr2 = vmcs_readl(EXIT_QUALIFICATION); + /* EPT won't cause page fault directly */ + WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); + return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); + } + + ex_no = intr_info & INTR_INFO_VECTOR_MASK; + + if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) + return handle_rmode_exception(vcpu, ex_no, error_code); + + switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; + case DB_VECTOR: + dr6 = vmcs_readl(EXIT_QUALIFICATION); + if (!(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + vcpu->arch.dr6 &= ~DR_TRAP_BITS; + vcpu->arch.dr6 |= dr6 | DR6_RTM; + if (is_icebp(intr_info)) + WARN_ON(!skip_emulated_instruction(vcpu)); + + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); + /* fall through */ + case BP_VECTOR: + /* + * Update instruction length as we may reinject #BP from + * user space while in guest debugging mode. Reading it for + * #DB as well causes no harm, it is not used in that case. + */ + vmx->vcpu.arch.event_exit_inst_len = + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; + break; + default: + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = ex_no; + kvm_run->ex.error_code = error_code; + break; + } + return 0; +} + +static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.irq_exits; + return 1; +} + +static int handle_triple_fault(struct kvm_vcpu *vcpu) +{ + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; + return 0; +} + +static int handle_io(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification; + int size, in, string; + unsigned port; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + string = (exit_qualification & 16) != 0; + + ++vcpu->stat.io_exits; + + if (string) + return kvm_emulate_instruction(vcpu, 0); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + in = (exit_qualification & 8) != 0; + + return kvm_fast_pio(vcpu, size, port, in); +} + +static void +vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) +{ + /* + * Patch in the VMCALL instruction: + */ + hypercall[0] = 0x0f; + hypercall[1] = 0x01; + hypercall[2] = 0xc1; +} + +/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ +static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) +{ + if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + /* + * We get here when L2 changed cr0 in a way that did not change + * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), + * but did change L0 shadowed bits. So we first calculate the + * effective cr0 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1's guest_cr0. + */ + val = (val & ~vmcs12->cr0_guest_host_mask) | + (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); + + if (!nested_guest_cr0_valid(vcpu, val)) + return 1; + + if (kvm_set_cr0(vcpu, val)) + return 1; + vmcs_writel(CR0_READ_SHADOW, orig_val); + return 0; + } else { + if (to_vmx(vcpu)->nested.vmxon && + !nested_host_cr0_valid(vcpu, val)) + return 1; + + return kvm_set_cr0(vcpu, val); + } +} + +static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) +{ + if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + /* analogously to handle_set_cr0 */ + val = (val & ~vmcs12->cr4_guest_host_mask) | + (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); + if (kvm_set_cr4(vcpu, val)) + return 1; + vmcs_writel(CR4_READ_SHADOW, orig_val); + return 0; + } else + return kvm_set_cr4(vcpu, val); +} + +static int handle_desc(struct kvm_vcpu *vcpu) +{ + WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); + return kvm_emulate_instruction(vcpu, 0); +} + +static int handle_cr(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification, val; + int cr; + int reg; + int err; + int ret; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + cr = exit_qualification & 15; + reg = (exit_qualification >> 8) & 15; + switch ((exit_qualification >> 4) & 3) { + case 0: /* mov to cr */ + val = kvm_register_readl(vcpu, reg); + trace_kvm_cr_write(cr, val); + switch (cr) { + case 0: + err = handle_set_cr0(vcpu, val); + return kvm_complete_insn_gp(vcpu, err); + case 3: + WARN_ON_ONCE(enable_unrestricted_guest); + err = kvm_set_cr3(vcpu, val); + return kvm_complete_insn_gp(vcpu, err); + case 4: + err = handle_set_cr4(vcpu, val); + return kvm_complete_insn_gp(vcpu, err); + case 8: { + u8 cr8_prev = kvm_get_cr8(vcpu); + u8 cr8 = (u8)val; + err = kvm_set_cr8(vcpu, cr8); + ret = kvm_complete_insn_gp(vcpu, err); + if (lapic_in_kernel(vcpu)) + return ret; + if (cr8_prev <= cr8) + return ret; + /* + * TODO: we might be squashing a + * KVM_GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + vcpu->run->exit_reason = KVM_EXIT_SET_TPR; + return 0; + } + } + break; + case 2: /* clts */ + WARN_ONCE(1, "Guest should always own CR0.TS"); + vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); + trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); + return kvm_skip_emulated_instruction(vcpu); + case 1: /*mov from cr*/ + switch (cr) { + case 3: + WARN_ON_ONCE(enable_unrestricted_guest); + val = kvm_read_cr3(vcpu); + kvm_register_write(vcpu, reg, val); + trace_kvm_cr_read(cr, val); + return kvm_skip_emulated_instruction(vcpu); + case 8: + val = kvm_get_cr8(vcpu); + kvm_register_write(vcpu, reg, val); + trace_kvm_cr_read(cr, val); + return kvm_skip_emulated_instruction(vcpu); + } + break; + case 3: /* lmsw */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; + trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); + kvm_lmsw(vcpu, val); + + return kvm_skip_emulated_instruction(vcpu); + default: + break; + } + vcpu->run->exit_reason = 0; + vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", + (int)(exit_qualification >> 4) & 3, cr); + return 0; +} + +static int handle_dr(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification; + int dr, dr7, reg; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + + /* First, if DR does not exist, trigger UD */ + if (!kvm_require_dr(vcpu, dr)) + return 1; + + /* Do not handle if the CPL > 0, will trigger GP on re-entry */ + if (!kvm_require_cpl(vcpu, 0)) + return 1; + dr7 = vmcs_readl(GUEST_DR7); + if (dr7 & DR7_GD) { + /* + * As the vm-exit takes precedence over the debug trap, we + * need to emulate the latter, either for the host or the + * guest debugging itself. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { + vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; + vcpu->run->debug.arch.dr7 = dr7; + vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); + vcpu->run->debug.arch.exception = DB_VECTOR; + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } else { + vcpu->arch.dr6 &= ~DR_TRAP_BITS; + vcpu->arch.dr6 |= DR6_BD | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + } + + if (vcpu->guest_debug == 0) { + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); + + /* + * No more DR vmexits; force a reload of the debug registers + * and reenter on this instruction. The next vmexit will + * retrieve the full state of the debug registers. + */ + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; + return 1; + } + + reg = DEBUG_REG_ACCESS_REG(exit_qualification); + if (exit_qualification & TYPE_MOV_FROM_DR) { + unsigned long val; + + if (kvm_get_dr(vcpu, dr, &val)) + return 1; + kvm_register_write(vcpu, reg, val); + } else + if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) + return 1; + + return kvm_skip_emulated_instruction(vcpu); +} + +static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.dr6; +} + +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ +} + +static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +{ + get_debugreg(vcpu->arch.db[0], 0); + get_debugreg(vcpu->arch.db[1], 1); + get_debugreg(vcpu->arch.db[2], 2); + get_debugreg(vcpu->arch.db[3], 3); + get_debugreg(vcpu->arch.dr6, 6); + vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); + + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); +} + +static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) +{ + vmcs_writel(GUEST_DR7, val); +} + +static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) +{ + kvm_apic_update_ppr(vcpu); + return 1; +} + +static int handle_interrupt_window(struct kvm_vcpu *vcpu) +{ + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); + + kvm_make_request(KVM_REQ_EVENT, vcpu); + + ++vcpu->stat.irq_window_exits; + return 1; +} + +static int handle_vmcall(struct kvm_vcpu *vcpu) +{ + return kvm_emulate_hypercall(vcpu); +} + +static int handle_invd(struct kvm_vcpu *vcpu) +{ + return kvm_emulate_instruction(vcpu, 0); +} + +static int handle_invlpg(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + kvm_mmu_invlpg(vcpu, exit_qualification); + return kvm_skip_emulated_instruction(vcpu); +} + +static int handle_rdpmc(struct kvm_vcpu *vcpu) +{ + int err; + + err = kvm_rdpmc(vcpu); + return kvm_complete_insn_gp(vcpu, err); +} + +static int handle_wbinvd(struct kvm_vcpu *vcpu) +{ + return kvm_emulate_wbinvd(vcpu); +} + +static int handle_xsetbv(struct kvm_vcpu *vcpu) +{ + u64 new_bv = kvm_read_edx_eax(vcpu); + u32 index = kvm_rcx_read(vcpu); + + if (kvm_set_xcr(vcpu, index, new_bv) == 0) + return kvm_skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_apic_access(struct kvm_vcpu *vcpu) +{ + if (likely(fasteoi)) { + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int access_type, offset; + + access_type = exit_qualification & APIC_ACCESS_TYPE; + offset = exit_qualification & APIC_ACCESS_OFFSET; + /* + * Sane guest uses MOV to write EOI, with written value + * not cared. So make a short-circuit here by avoiding + * heavy instruction emulation. + */ + if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && + (offset == APIC_EOI)) { + kvm_lapic_set_eoi(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + } + return kvm_emulate_instruction(vcpu, 0); +} + +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int vector = exit_qualification & 0xff; + + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_set_eoi_accelerated(vcpu, vector); + return 1; +} + +static int handle_apic_write(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 offset = exit_qualification & 0xfff; + + /* APIC-write VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_write_nodecode(vcpu, offset); + return 1; +} + +static int handle_task_switch(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qualification; + bool has_error_code = false; + u32 error_code = 0; + u16 tss_selector; + int reason, type, idt_v, idt_index; + + idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); + idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); + type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + reason = (u32)exit_qualification >> 30; + if (reason == TASK_SWITCH_GATE && idt_v) { + switch (type) { + case INTR_TYPE_NMI_INTR: + vcpu->arch.nmi_injected = false; + vmx_set_nmi_mask(vcpu, true); + break; + case INTR_TYPE_EXT_INTR: + case INTR_TYPE_SOFT_INTR: + kvm_clear_interrupt_queue(vcpu); + break; + case INTR_TYPE_HARD_EXCEPTION: + if (vmx->idt_vectoring_info & + VECTORING_INFO_DELIVER_CODE_MASK) { + has_error_code = true; + error_code = + vmcs_read32(IDT_VECTORING_ERROR_CODE); + } + /* fall through */ + case INTR_TYPE_SOFT_EXCEPTION: + kvm_clear_exception_queue(vcpu); + break; + default: + break; + } + } + tss_selector = exit_qualification; + + if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && + type != INTR_TYPE_EXT_INTR && + type != INTR_TYPE_NMI_INTR)) + WARN_ON(!skip_emulated_instruction(vcpu)); + + /* + * TODO: What about debug traps on tss switch? + * Are we supposed to inject them and update dr6? + */ + return kvm_task_switch(vcpu, tss_selector, + type == INTR_TYPE_SOFT_INTR ? idt_index : -1, + reason, has_error_code, error_code); +} + +static int handle_ept_violation(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification; + gpa_t gpa; + u64 error_code; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + /* + * EPT violation happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + * There are errata that may cause this bit to not be set: + * AAK134, BY25. + */ + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && + (exit_qualification & INTR_INFO_UNBLOCK_NMI)) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + trace_kvm_page_fault(gpa, exit_qualification); + + /* Is it a read fault? */ + error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) + ? PFERR_USER_MASK : 0; + /* Is it a write fault? */ + error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) + ? PFERR_WRITE_MASK : 0; + /* Is it a fetch fault? */ + error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) + ? PFERR_FETCH_MASK : 0; + /* ept page table entry is present? */ + error_code |= (exit_qualification & + (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | + EPT_VIOLATION_EXECUTABLE)) + ? PFERR_PRESENT_MASK : 0; + + error_code |= (exit_qualification & 0x100) != 0 ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + + vcpu->arch.exit_qualification = exit_qualification; + return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); +} + +static int handle_ept_misconfig(struct kvm_vcpu *vcpu) +{ + gpa_t gpa; + + /* + * A nested guest cannot optimize MMIO vmexits, because we have an + * nGPA here instead of the required GPA. + */ + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + if (!is_guest_mode(vcpu) && + !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + trace_kvm_fast_mmio(gpa); + return kvm_skip_emulated_instruction(vcpu); + } + + return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); +} + +static int handle_nmi_window(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!enable_vnmi); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); + ++vcpu->stat.nmi_window_exits; + kvm_make_request(KVM_REQ_EVENT, vcpu); + + return 1; +} + +static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool intr_window_requested; + unsigned count = 130; + + /* + * We should never reach the point where we are emulating L2 + * due to invalid guest state as that means we incorrectly + * allowed a nested VMEntry with an invalid vmcs12. + */ + WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); + + intr_window_requested = exec_controls_get(vmx) & + CPU_BASED_INTR_WINDOW_EXITING; + + while (vmx->emulation_required && count-- != 0) { + if (intr_window_requested && vmx_interrupt_allowed(vcpu)) + return handle_interrupt_window(&vmx->vcpu); + + if (kvm_test_request(KVM_REQ_EVENT, vcpu)) + return 1; + + if (!kvm_emulate_instruction(vcpu, 0)) + return 0; + + if (vmx->emulation_required && !vmx->rmode.vm86_active && + vcpu->arch.exception.pending) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; + return kvm_vcpu_halt(vcpu); + } + + /* + * Note, return 1 and not 0, vcpu_run() is responsible for + * morphing the pending signal into the proper return code. + */ + if (signal_pending(current)) + return 1; + + if (need_resched()) + schedule(); + } + + return 1; +} + +static void grow_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned int old = vmx->ple_window; + + vmx->ple_window = __grow_ple_window(old, ple_window, + ple_window_grow, + ple_window_max); + + if (vmx->ple_window != old) { + vmx->ple_window_dirty = true; + trace_kvm_ple_window_update(vcpu->vcpu_id, + vmx->ple_window, old); + } +} + +static void shrink_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned int old = vmx->ple_window; + + vmx->ple_window = __shrink_ple_window(old, ple_window, + ple_window_shrink, + ple_window); + + if (vmx->ple_window != old) { + vmx->ple_window_dirty = true; + trace_kvm_ple_window_update(vcpu->vcpu_id, + vmx->ple_window, old); + } +} + +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +static void wakeup_handler(void) +{ + struct kvm_vcpu *vcpu; + int cpu = smp_processor_id(); + + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), + blocked_vcpu_list) { + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (pi_test_on(pi_desc) == 1) + kvm_vcpu_kick(vcpu); + } + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); +} + +static void vmx_enable_tdp(void) +{ + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, + enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, + enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, + 0ull, VMX_EPT_EXECUTABLE_MASK, + cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, + VMX_EPT_RWX_MASK, 0ull); + + ept_set_mmio_spte_mask(); + kvm_enable_tdp(); +} + +/* + * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE + * exiting, so only get here on cpu with PAUSE-Loop-Exiting. + */ +static int handle_pause(struct kvm_vcpu *vcpu) +{ + if (!kvm_pause_in_guest(vcpu->kvm)) + grow_ple_window(vcpu); + + /* + * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" + * VM-execution control is ignored if CPL > 0. OTOH, KVM + * never set PAUSE_EXITING and just set PLE if supported, + * so the vcpu must be CPL=0 if it gets a PAUSE exit. + */ + kvm_vcpu_on_spin(vcpu, true); + return kvm_skip_emulated_instruction(vcpu); +} + +static int handle_nop(struct kvm_vcpu *vcpu) +{ + return kvm_skip_emulated_instruction(vcpu); +} + +static int handle_mwait(struct kvm_vcpu *vcpu) +{ + printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); + return handle_nop(vcpu); +} + +static int handle_invalid_op(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + +static int handle_monitor_trap(struct kvm_vcpu *vcpu) +{ + return 1; +} + +static int handle_monitor(struct kvm_vcpu *vcpu) +{ + printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); + return handle_nop(vcpu); +} + +static int handle_invpcid(struct kvm_vcpu *vcpu) +{ + u32 vmx_instruction_info; + unsigned long type; + bool pcid_enabled; + gva_t gva; + struct x86_exception e; + unsigned i; + unsigned long roots_to_free = 0; + struct { + u64 pcid; + u64 gla; + } operand; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); + + if (type > 3) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* According to the Intel instruction reference, the memory operand + * is read even if it isn't needed (e.g., for type==all) + */ + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, false, + sizeof(operand), &gva)) + return 1; + + if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } + + if (operand.pcid >> 12 != 0) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); + + switch (type) { + case INVPCID_TYPE_INDIV_ADDR: + if ((!pcid_enabled && (operand.pcid != 0)) || + is_noncanonical_address(operand.gla, vcpu)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); + return kvm_skip_emulated_instruction(vcpu); + + case INVPCID_TYPE_SINGLE_CTXT: + if (!pcid_enabled && (operand.pcid != 0)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + if (kvm_get_active_pcid(vcpu) == operand.pcid) { + kvm_mmu_sync_roots(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) + == operand.pcid) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); + /* + * If neither the current cr3 nor any of the prev_roots use the + * given PCID, then nothing needs to be done here because a + * resync will happen anyway before switching to any other CR3. + */ + + return kvm_skip_emulated_instruction(vcpu); + + case INVPCID_TYPE_ALL_NON_GLOBAL: + /* + * Currently, KVM doesn't mark global entries in the shadow + * page tables, so a non-global flush just degenerates to a + * global flush. If needed, we could optimize this later by + * keeping track of global entries in shadow page tables. + */ + + /* fall-through */ + case INVPCID_TYPE_ALL_INCL_GLOBAL: + kvm_mmu_unload(vcpu); + return kvm_skip_emulated_instruction(vcpu); + + default: + BUG(); /* We have already checked above that type <= 3 */ + } +} + +static int handle_pml_full(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification; + + trace_kvm_pml_full(vcpu->vcpu_id); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + /* + * PML buffer FULL happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + */ + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && + (exit_qualification & INTR_INFO_UNBLOCK_NMI)) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + + /* + * PML buffer already flushed at beginning of VMEXIT. Nothing to do + * here.., and there's no userspace involvement needed for PML. + */ + return 1; +} + +static int handle_preemption_timer(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!vmx->req_immediate_exit && + !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) + kvm_lapic_expired_hv_timer(vcpu); + + return 1; +} + +/* + * When nested=0, all VMX instruction VM Exits filter here. The handlers + * are overwritten by nested_vmx_setup() when nested=1. + */ +static int handle_vmx_instruction(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + +static int handle_encls(struct kvm_vcpu *vcpu) +{ + /* + * SGX virtualization is not yet supported. There is no software + * enable bit for SGX, so we have to trap ENCLS and inject a #UD + * to prevent the guest from executing ENCLS. + */ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + +/* + * The exit handlers return 1 if the exit was handled fully and guest execution + * may resume. Otherwise they set the kvm_run parameter to indicate what needs + * to be done to userspace and return 0. + */ +static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { + [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, + [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, + [EXIT_REASON_IO_INSTRUCTION] = handle_io, + [EXIT_REASON_CR_ACCESS] = handle_cr, + [EXIT_REASON_DR_ACCESS] = handle_dr, + [EXIT_REASON_CPUID] = kvm_emulate_cpuid, + [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, + [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, + [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, + [EXIT_REASON_HLT] = kvm_emulate_halt, + [EXIT_REASON_INVD] = handle_invd, + [EXIT_REASON_INVLPG] = handle_invlpg, + [EXIT_REASON_RDPMC] = handle_rdpmc, + [EXIT_REASON_VMCALL] = handle_vmcall, + [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, + [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, + [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, + [EXIT_REASON_VMPTRST] = handle_vmx_instruction, + [EXIT_REASON_VMREAD] = handle_vmx_instruction, + [EXIT_REASON_VMRESUME] = handle_vmx_instruction, + [EXIT_REASON_VMWRITE] = handle_vmx_instruction, + [EXIT_REASON_VMOFF] = handle_vmx_instruction, + [EXIT_REASON_VMON] = handle_vmx_instruction, + [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, + [EXIT_REASON_APIC_ACCESS] = handle_apic_access, + [EXIT_REASON_APIC_WRITE] = handle_apic_write, + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, + [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_XSETBV] = handle_xsetbv, + [EXIT_REASON_TASK_SWITCH] = handle_task_switch, + [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, + [EXIT_REASON_GDTR_IDTR] = handle_desc, + [EXIT_REASON_LDTR_TR] = handle_desc, + [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, + [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, + [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, + [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, + [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, + [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, + [EXIT_REASON_INVEPT] = handle_vmx_instruction, + [EXIT_REASON_INVVPID] = handle_vmx_instruction, + [EXIT_REASON_RDRAND] = handle_invalid_op, + [EXIT_REASON_RDSEED] = handle_invalid_op, + [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_INVPCID] = handle_invpcid, + [EXIT_REASON_VMFUNC] = handle_vmx_instruction, + [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, + [EXIT_REASON_ENCLS] = handle_encls, +}; + +static const int kvm_vmx_max_exit_handlers = + ARRAY_SIZE(kvm_vmx_exit_handlers); + +static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) +{ + *info1 = vmcs_readl(EXIT_QUALIFICATION); + *info2 = vmcs_read32(VM_EXIT_INTR_INFO); +} + +static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) +{ + if (vmx->pml_pg) { + __free_page(vmx->pml_pg); + vmx->pml_pg = NULL; + } +} + +static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 *pml_buf; + u16 pml_idx; + + pml_idx = vmcs_read16(GUEST_PML_INDEX); + + /* Do nothing if PML buffer is empty */ + if (pml_idx == (PML_ENTITY_NUM - 1)) + return; + + /* PML index always points to next available PML buffer entity */ + if (pml_idx >= PML_ENTITY_NUM) + pml_idx = 0; + else + pml_idx++; + + pml_buf = page_address(vmx->pml_pg); + for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { + u64 gpa; + + gpa = pml_buf[pml_idx]; + WARN_ON(gpa & (PAGE_SIZE - 1)); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); + } + + /* reset PML index */ + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); +} + +/* + * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. + * Called before reporting dirty_bitmap to userspace. + */ +static void kvm_flush_pml_buffers(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + /* + * We only need to kick vcpu out of guest mode here, as PML buffer + * is flushed at beginning of all VMEXITs, and it's obvious that only + * vcpus running in guest are possible to have unflushed GPAs in PML + * buffer. + */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); +} + +static void vmx_dump_sel(char *name, uint32_t sel) +{ + pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", + name, vmcs_read16(sel), + vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), + vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), + vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); +} + +static void vmx_dump_dtsel(char *name, uint32_t limit) +{ + pr_err("%s limit=0x%08x, base=0x%016lx\n", + name, vmcs_read32(limit), + vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); +} + +void dump_vmcs(void) +{ + u32 vmentry_ctl, vmexit_ctl; + u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + unsigned long cr4; + u64 efer; + int i, n; + + if (!dump_invalid_vmcs) { + pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); + return; + } + + vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + cr4 = vmcs_readl(GUEST_CR4); + efer = vmcs_read64(GUEST_IA32_EFER); + secondary_exec_control = 0; + if (cpu_has_secondary_exec_ctrls()) + secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + pr_err("*** Guest State ***\n"); + pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), + vmcs_readl(CR0_GUEST_HOST_MASK)); + pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); + pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && + (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) + { + pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); + } + pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", + vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); + pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", + vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(GUEST_SYSENTER_ESP), + vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); + vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); + vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); + vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); + vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); + vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); + vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); + vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); + vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); + vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); + vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); + if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || + (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + efer, vmcs_read64(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", + vmcs_read64(GUEST_IA32_DEBUGCTL), + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); + if (cpu_has_load_perf_global_ctrl() && + vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); + if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) + pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); + pr_err("Interruptibility = %08x ActivityState = %08x\n", + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), + vmcs_read32(GUEST_ACTIVITY_STATE)); + if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) + pr_err("InterruptStatus = %04x\n", + vmcs_read16(GUEST_INTR_STATUS)); + + pr_err("*** Host State ***\n"); + pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", + vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); + pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", + vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), + vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), + vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), + vmcs_read16(HOST_TR_SELECTOR)); + pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", + vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), + vmcs_readl(HOST_TR_BASE)); + pr_err("GDTBase=%016lx IDTBase=%016lx\n", + vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); + pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", + vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), + vmcs_readl(HOST_CR4)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(HOST_IA32_SYSENTER_ESP), + vmcs_read32(HOST_IA32_SYSENTER_CS), + vmcs_readl(HOST_IA32_SYSENTER_EIP)); + if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + vmcs_read64(HOST_IA32_EFER), + vmcs_read64(HOST_IA32_PAT)); + if (cpu_has_load_perf_global_ctrl() && + vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); + + pr_err("*** Control State ***\n"); + pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", + pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); + pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); + pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", + vmcs_read32(EXCEPTION_BITMAP), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); + pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), + vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); + pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); + pr_err(" reason=%08x qualification=%016lx\n", + vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); + pr_err("IDTVectoring: info=%08x errcode=%08x\n", + vmcs_read32(IDT_VECTORING_INFO_FIELD), + vmcs_read32(IDT_VECTORING_ERROR_CODE)); + pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); + if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) + pr_err("TSC Multiplier = 0x%016llx\n", + vmcs_read64(TSC_MULTIPLIER)); + if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { + if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { + u16 status = vmcs_read16(GUEST_INTR_STATUS); + pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); + } + pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) + pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); + pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); + } + if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) + pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) + pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); + n = vmcs_read32(CR3_TARGET_COUNT); + for (i = 0; i + 1 < n; i += 4) + pr_err("CR3 target%u=%016lx target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), + i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); + if (i < n) + pr_err("CR3 target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); + if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) + pr_err("PLE Gap=%08x Window=%08x\n", + vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); + if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) + pr_err("Virtual processor ID = 0x%04x\n", + vmcs_read16(VIRTUAL_PROCESSOR_ID)); +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int vmx_handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exit_reason = vmx->exit_reason; + u32 vectoring_info = vmx->idt_vectoring_info; + + trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); + + /* + * Flush logged GPAs PML buffer, this will make dirty_bitmap more + * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before + * querying dirty_bitmap, we only need to kick all vcpus out of guest + * mode as if vcpus is in root mode, the PML buffer must has been + * flushed already. + */ + if (enable_pml) + vmx_flush_pml_buffer(vcpu); + + /* If guest state is invalid, start emulating */ + if (vmx->emulation_required) + return handle_invalid_guest_state(vcpu); + + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) + return nested_vmx_reflect_vmexit(vcpu, exit_reason); + + if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->run->fail_entry.hardware_entry_failure_reason + = exit_reason; + return 0; + } + + if (unlikely(vmx->fail)) { + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->run->fail_entry.hardware_entry_failure_reason + = vmcs_read32(VM_INSTRUCTION_ERROR); + return 0; + } + + /* + * Note: + * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by + * delivery event since it indicates guest is accessing MMIO. + * The vm-exit can be triggered again after return to guest that + * will cause infinite loop. + */ + if ((vectoring_info & VECTORING_INFO_VALID_MASK) && + (exit_reason != EXIT_REASON_EXCEPTION_NMI && + exit_reason != EXIT_REASON_EPT_VIOLATION && + exit_reason != EXIT_REASON_PML_FULL && + exit_reason != EXIT_REASON_TASK_SWITCH)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; + vcpu->run->internal.ndata = 3; + vcpu->run->internal.data[0] = vectoring_info; + vcpu->run->internal.data[1] = exit_reason; + vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; + if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { + vcpu->run->internal.ndata++; + vcpu->run->internal.data[3] = + vmcs_read64(GUEST_PHYSICAL_ADDRESS); + } + return 0; + } + + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) { + if (vmx_interrupt_allowed(vcpu)) { + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } + } + + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } else if (exit_reason < kvm_vmx_max_exit_handlers + && kvm_vmx_exit_handlers[exit_reason]) { +#ifdef CONFIG_RETPOLINE + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); +#endif + return kvm_vmx_exit_handlers[exit_reason](vcpu); + } else { + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", + exit_reason); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; + } +} + +/* + * Software based L1D cache flush which is used when microcode providing + * the cache control MSR is not loaded. + * + * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to + * flush it is required to read in 64 KiB because the replacement algorithm + * is not exactly LRU. This could be sized at runtime via topology + * information but as all relevant affected CPUs have 32KiB L1D cache size + * there is no point in doing so. + */ +static void vmx_l1d_flush(struct kvm_vcpu *vcpu) +{ + int size = PAGE_SIZE << L1D_CACHE_ORDER; + + /* + * This code is only executed when the the flush mode is 'cond' or + * 'always' + */ + if (static_branch_likely(&vmx_l1d_flush_cond)) { + bool flush_l1d; + + /* + * Clear the per-vcpu flush bit, it gets set again + * either from vcpu_run() or from one of the unsafe + * VMEXIT handlers. + */ + flush_l1d = vcpu->arch.l1tf_flush_l1d; + vcpu->arch.l1tf_flush_l1d = false; + + /* + * Clear the per-cpu flush bit, it gets set again from + * the interrupt handlers. + */ + flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); + kvm_clear_cpu_l1tf_flush_l1d(); + + if (!flush_l1d) + return; + } + + vcpu->stat.l1d_flush++; + + if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { + wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + return; + } + + asm volatile( + /* First ensure the pages are in the TLB */ + "xorl %%eax, %%eax\n" + ".Lpopulate_tlb:\n\t" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" + "addl $4096, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lpopulate_tlb\n\t" + "xorl %%eax, %%eax\n\t" + "cpuid\n\t" + /* Now fill the cache */ + "xorl %%eax, %%eax\n" + ".Lfill_cache:\n" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" + "addl $64, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lfill_cache\n\t" + "lfence\n" + :: [flush_pages] "r" (vmx_l1d_flush_pages), + [size] "r" (size) + : "eax", "ebx", "ecx", "edx"); +} + +static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + int tpr_threshold; + + if (is_guest_mode(vcpu) && + nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + return; + + tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; + if (is_guest_mode(vcpu)) + to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; + else + vmcs_write32(TPR_THRESHOLD, tpr_threshold); +} + +void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 sec_exec_control; + + if (!lapic_in_kernel(vcpu)) + return; + + if (!flexpriority_enabled && + !cpu_has_vmx_virtualize_x2apic_mode()) + return; + + /* Postpone execution until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + vmx->nested.change_vmcs01_virtual_apic_mode = true; + return; + } + + sec_exec_control = secondary_exec_controls_get(vmx); + sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); + + switch (kvm_get_apic_mode(vcpu)) { + case LAPIC_MODE_INVALID: + WARN_ONCE(true, "Invalid local APIC state"); + case LAPIC_MODE_DISABLED: + break; + case LAPIC_MODE_XAPIC: + if (flexpriority_enabled) { + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb(vcpu, true); + } + break; + case LAPIC_MODE_X2APIC: + if (cpu_has_vmx_virtualize_x2apic_mode()) + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + break; + } + secondary_exec_controls_set(vmx, sec_exec_control); + + vmx_update_msr_bitmap(vcpu); +} + +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +{ + if (!is_guest_mode(vcpu)) { + vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb(vcpu, true); + } +} + +static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) +{ + u16 status; + u8 old; + + if (max_isr == -1) + max_isr = 0; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = status >> 8; + if (max_isr != old) { + status &= 0xff; + status |= max_isr << 8; + vmcs_write16(GUEST_INTR_STATUS, status); + } +} + +static void vmx_set_rvi(int vector) +{ + u16 status; + u8 old; + + if (vector == -1) + vector = 0; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = (u8)status & 0xff; + if ((u8)vector != old) { + status &= ~0xff; + status |= (u8)vector; + vmcs_write16(GUEST_INTR_STATUS, status); + } +} + +static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) +{ + /* + * When running L2, updating RVI is only relevant when + * vmcs12 virtual-interrupt-delivery enabled. + * However, it can be enabled only when L1 also + * intercepts external-interrupts and in that case + * we should not update vmcs02 RVI but instead intercept + * interrupt. Therefore, do nothing when running L2. + */ + if (!is_guest_mode(vcpu)) + vmx_set_rvi(max_irr); +} + +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int max_irr; + bool max_irr_updated; + + WARN_ON(!vcpu->arch.apicv_active); + if (pi_test_on(&vmx->pi_desc)) { + pi_clear_on(&vmx->pi_desc); + /* + * IOMMU can write to PID.ON, so the barrier matters even on UP. + * But on x86 this is just a compiler barrier anyway. + */ + smp_mb__after_atomic(); + max_irr_updated = + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); + + /* + * If we are running L2 and L1 has a new pending interrupt + * which can be injected, we should re-evaluate + * what should be done with this new L1 interrupt. + * If L1 intercepts external-interrupts, we should + * exit from L2 to L1. Otherwise, interrupt should be + * delivered directly to L2. + */ + if (is_guest_mode(vcpu) && max_irr_updated) { + if (nested_exit_on_intr(vcpu)) + kvm_vcpu_exiting_guest_mode(vcpu); + else + kvm_make_request(KVM_REQ_EVENT, vcpu); + } + } else { + max_irr = kvm_lapic_find_highest_irr(vcpu); + } + vmx_hwapic_irr_update(vcpu, max_irr); + return max_irr; +} + +static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + return pi_test_on(pi_desc) || + (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); +} + +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + if (!kvm_vcpu_apicv_active(vcpu)) + return; + + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); + vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); + vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); + vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); +} + +static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + pi_clear_on(&vmx->pi_desc); + memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); +} + +static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) +{ + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* if exit due to PF check for async PF */ + if (is_page_fault(vmx->exit_intr_info)) + vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + + /* Handle machine checks before interrupts are enabled */ + if (is_machine_check(vmx->exit_intr_info)) + kvm_machine_check(); + + /* We need to handle NMIs before interrupts are enabled */ + if (is_nmi(vmx->exit_intr_info)) { + kvm_before_interrupt(&vmx->vcpu); + asm("int $2"); + kvm_after_interrupt(&vmx->vcpu); + } +} + +static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) +{ + unsigned int vector; + unsigned long entry; +#ifdef CONFIG_X86_64 + unsigned long tmp; +#endif + gate_desc *desc; + u32 intr_info; + + intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + if (WARN_ONCE(!is_external_intr(intr_info), + "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) + return; + + vector = intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)host_idt_base + vector; + entry = gate_offset(desc); + + kvm_before_interrupt(vcpu); + + asm volatile( +#ifdef CONFIG_X86_64 + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + CALL_NOSPEC + : +#ifdef CONFIG_X86_64 + [sp]"=&r"(tmp), +#endif + ASM_CALL_CONSTRAINT + : + THUNK_TARGET(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + + kvm_after_interrupt(vcpu); +} +STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); + +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + handle_external_interrupt_irqoff(vcpu); + else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) + handle_exception_nmi_irqoff(vmx); + else if (!is_guest_mode(vcpu) && + vmx->exit_reason == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); +} + +static bool vmx_has_emulated_msr(int index) +{ + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + return nested; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } +} + +static bool vmx_pt_supported(void) +{ + return pt_mode == PT_MODE_HOST_GUEST; +} + +static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) +{ + u32 exit_intr_info; + bool unblock_nmi; + u8 vector; + bool idtv_info_valid; + + idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; + + if (enable_vnmi) { + if (vmx->loaded_vmcs->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + /* + * SDM 3: 27.7.1.2 (September 2008) + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + * SDM 3: 23.2.2 (September 2008) + * Bit 12 is undefined in any of the following cases: + * If the VM exit sets the valid bit in the IDT-vectoring + * information field. + * If the VM exit is due to a double fault. + */ + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && + vector != DF_VECTOR && !idtv_info_valid) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), + vmx->loaded_vmcs->entry_time)); +} + +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, + u32 idt_vectoring_info, + int instr_len_field, + int error_code_field) +{ + u8 vector; + int type; + bool idtv_info_valid; + + idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; + + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); + + if (!idtv_info_valid) + return; + + kvm_make_request(KVM_REQ_EVENT, vcpu); + + vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; + type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; + + switch (type) { + case INTR_TYPE_NMI_INTR: + vcpu->arch.nmi_injected = true; + /* + * SDM 3: 27.7.1.2 (September 2008) + * Clear bit "block by NMI" before VM entry if a NMI + * delivery faulted. + */ + vmx_set_nmi_mask(vcpu, false); + break; + case INTR_TYPE_SOFT_EXCEPTION: + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); + /* fall through */ + case INTR_TYPE_HARD_EXCEPTION: + if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { + u32 err = vmcs_read32(error_code_field); + kvm_requeue_exception_e(vcpu, vector, err); + } else + kvm_requeue_exception(vcpu, vector); + break; + case INTR_TYPE_SOFT_INTR: + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); + /* fall through */ + case INTR_TYPE_EXT_INTR: + kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); + break; + default: + break; + } +} + +static void vmx_complete_interrupts(struct vcpu_vmx *vmx) +{ + __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, + VM_EXIT_INSTRUCTION_LEN, + IDT_VECTORING_ERROR_CODE); +} + +static void vmx_cancel_injection(struct kvm_vcpu *vcpu) +{ + __vmx_complete_interrupts(vcpu, + vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), + VM_ENTRY_INSTRUCTION_LEN, + VM_ENTRY_EXCEPTION_ERROR_CODE); + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); +} + +static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) +{ + int i, nr_msrs; + struct perf_guest_switch_msr *msrs; + + msrs = perf_guest_get_msrs(&nr_msrs); + + if (!msrs) + return; + + for (i = 0; i < nr_msrs; i++) + if (msrs[i].host == msrs[i].guest) + clear_atomic_switch_msr(vmx, msrs[i].msr); + else + add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, + msrs[i].host, false); +} + +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + u32 host_umwait_control; + + if (!vmx_has_waitpkg(vmx)) + return; + + host_umwait_control = get_umwait_control_msr(); + + if (vmx->msr_ia32_umwait_control != host_umwait_control) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + host_umwait_control, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + +static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 tscl; + u32 delta_tsc; + + if (vmx->req_immediate_exit) { + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); + vmx->loaded_vmcs->hv_timer_soft_disabled = false; + } else if (vmx->hv_deadline_tsc != -1) { + tscl = rdtsc(); + if (vmx->hv_deadline_tsc > tscl) + /* set_hv_timer ensures the delta fits in 32-bits */ + delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> + cpu_preemption_timer_multi); + else + delta_tsc = 0; + + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + vmx->loaded_vmcs->hv_timer_soft_disabled = false; + } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); + vmx->loaded_vmcs->hv_timer_soft_disabled = true; + } +} + +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) +{ + if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { + vmx->loaded_vmcs->host_state.rsp = host_rsp; + vmcs_writel(HOST_RSP, host_rsp); + } +} + +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); + +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long cr3, cr4; + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->entry_time = ktime_get(); + + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required) + return; + + if (vmx->ple_window_dirty) { + vmx->ple_window_dirty = false; + vmcs_write32(PLE_WINDOW, vmx->ple_window); + } + + if (vmx->nested.need_vmcs12_to_shadow_sync) + nested_sync_vmcs12_to_shadow(vcpu); + + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + + cr4 = cr4_read_shadow(); + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->loaded_vmcs->host_state.cr4 = cr4; + } + + /* When single-stepping over STI and MOV SS, we must clear the + * corresponding interruptibility bits in the guest state. Otherwise + * vmentry fails as it then expects bit 14 (BS) in pending debug + * exceptions being set, but that's not correct for the guest debugging + * case. */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmx_set_interrupt_shadow(vcpu, 0); + + kvm_load_guest_xsave_state(vcpu); + + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); + + pt_guest_enter(vmx); + + atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); + + if (enable_preemption_timer) + vmx_update_hv_timer(vcpu); + + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + kvm_wait_lapic_expire(vcpu); + + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + /* L1D Flush includes CPU buffer clear to mitigate MDS */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); + + if (vcpu->arch.cr2 != read_cr2()) + write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, + vmx->loaded_vmcs->launched); + + vcpu->arch.cr2 = read_cr2(); + + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_clean_fields |= + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; + + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ + if (vmx->host_debugctlmsr) + update_debugctlmsr(vmx->host_debugctlmsr); + +#ifndef CONFIG_X86_64 + /* + * The sysexit path does not restore ds/es, so we must set them to + * a reasonable value ourselves. + * + * We can't defer this to vmx_prepare_switch_to_host() since that + * function may be executed in interrupt context, which saves and + * restore segments around it, nullifying its effect. + */ + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); +#endif + + vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) + | (1 << VCPU_EXREG_CR3)); + vcpu->arch.regs_dirty = 0; + + pt_guest_exit(vmx); + + /* + * eager fpu is enabled if PKEY is supported and CR4 is switched + * back on host, so it is safe to read guest PKRU from current + * XSAVE. + */ + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { + vcpu->arch.pkru = rdpkru(); + if (vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vmx->host_pkru); + } + + kvm_load_host_xsave_state(vcpu); + + vmx->nested.nested_run_pending = 0; + vmx->idt_vectoring_info = 0; + + vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); + if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) + kvm_machine_check(); + + if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + return; + + vmx->loaded_vmcs->launched = 1; + vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); + + vmx_recover_nmi_blocking(vmx); + vmx_complete_interrupts(vmx); +} + +static struct kvm *vmx_vm_alloc(void) +{ + struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), + GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); + return &kvm_vmx->kvm; +} + +static void vmx_vm_free(struct kvm *kvm) +{ + kfree(kvm->arch.hyperv.hv_pa_pg); + vfree(to_kvm_vmx(kvm)); +} + +static void vmx_free_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (enable_pml) + vmx_destroy_pml_buffer(vmx); + free_vpid(vmx->vpid); + nested_vmx_free_vcpu(vcpu); + free_loaded_vmcs(vmx->loaded_vmcs); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); + kmem_cache_free(kvm_vcpu_cache, vmx); +} + +static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) +{ + int err; + struct vcpu_vmx *vmx; + unsigned long *msr_bitmap; + int i, cpu; + + BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, + "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + + vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); + if (!vmx) + return ERR_PTR(-ENOMEM); + + vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vmx->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_vcpu; + } + + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vmx->vcpu.arch.guest_fpu) { + printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); + err = -ENOMEM; + goto free_user_fpu; + } + + vmx->vpid = allocate_vpid(); + + err = kvm_vcpu_init(&vmx->vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = -ENOMEM; + + /* + * If PML is turned on, failure on enabling PML just results in failure + * of creating the vcpu, therefore we can simplify PML logic (by + * avoiding dealing with cases, such as enabling PML partially on vcpus + * for the guest), etc. + */ + if (enable_pml) { + vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmx->pml_pg) + goto uninit_vcpu; + } + + BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); + + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { + u32 index = vmx_msr_index[i]; + u32 data_low, data_high; + int j = vmx->nmsrs; + + if (rdmsr_safe(index, &data_low, &data_high) < 0) + continue; + if (wrmsr_safe(index, data_low, data_high) < 0) + continue; + + vmx->guest_msrs[j].index = i; + vmx->guest_msrs[j].data = 0; + switch (index) { + case MSR_IA32_TSX_CTRL: + /* + * No need to pass TSX_CTRL_CPUID_CLEAR through, so + * let's avoid changing CPUID bits under the host + * kernel's feet. + */ + vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + break; + default: + vmx->guest_msrs[j].mask = -1ull; + break; + } + ++vmx->nmsrs; + } + + err = alloc_loaded_vmcs(&vmx->vmcs01); + if (err < 0) + goto free_pml; + + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + if (kvm_cstate_in_guest(kvm)) { + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); + } + vmx->msr_bitmap_mode = 0; + + vmx->loaded_vmcs = &vmx->vmcs01; + cpu = get_cpu(); + vmx_vcpu_load(&vmx->vcpu, cpu); + vmx->vcpu.cpu = cpu; + init_vmcs(vmx); + vmx_vcpu_put(&vmx->vcpu); + put_cpu(); + if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { + err = alloc_apic_access_page(kvm); + if (err) + goto free_vmcs; + } + + if (enable_ept && !enable_unrestricted_guest) { + err = init_rmode_identity_map(kvm); + if (err) + goto free_vmcs; + } + + if (nested) + nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, + vmx_capability.ept, + kvm_vcpu_apicv_active(&vmx->vcpu)); + else + memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); + + vmx->nested.posted_intr_nv = -1; + vmx->nested.current_vmptr = -1ull; + + vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + + /* + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR + * or POSTED_INTR_WAKEUP_VECTOR. + */ + vmx->pi_desc.nv = POSTED_INTR_VECTOR; + vmx->pi_desc.sn = 1; + + vmx->ept_pointer = INVALID_PAGE; + + return &vmx->vcpu; + +free_vmcs: + free_loaded_vmcs(vmx->loaded_vmcs); +free_pml: + vmx_destroy_pml_buffer(vmx); +uninit_vcpu: + kvm_vcpu_uninit(&vmx->vcpu); +free_vcpu: + free_vpid(vmx->vpid); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); +free_partial_vcpu: + kmem_cache_free(kvm_vcpu_cache, vmx); + return ERR_PTR(err); +} + +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" + +static int vmx_vm_init(struct kvm *kvm) +{ + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); + + if (!ple_gap) + kvm->arch.pause_in_guest = true; + + if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { + switch (l1tf_mitigation) { + case L1TF_MITIGATION_OFF: + case L1TF_MITIGATION_FLUSH_NOWARN: + /* 'I explicitly don't care' is set */ + break; + case L1TF_MITIGATION_FLUSH: + case L1TF_MITIGATION_FLUSH_NOSMT: + case L1TF_MITIGATION_FULL: + /* + * Warn upon starting the first VM in a potentially + * insecure environment. + */ + if (sched_smt_active()) + pr_warn_once(L1TF_MSG_SMT); + if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) + pr_warn_once(L1TF_MSG_L1D); + break; + case L1TF_MITIGATION_FULL_FORCE: + /* Flush is enforced */ + break; + } + } + return 0; +} + +static int __init vmx_check_processor_compat(void) +{ + struct vmcs_config vmcs_conf; + struct vmx_capability vmx_cap; + + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) + return -EIO; + if (nested) + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, + enable_apicv); + if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { + printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", + smp_processor_id()); + return -EIO; + } + return 0; +} + +static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + u8 cache; + u64 ipat = 0; + + /* For VT-d and EPT combination + * 1. MMIO: always map as UC + * 2. EPT with VT-d: + * a. VT-d without snooping control feature: can't guarantee the + * result, try to trust guest. + * b. VT-d with snooping control feature: snooping control feature of + * VT-d engine can guarantee the cache correctness. Just set it + * to WB to keep consistent with host. So the same as item 3. + * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep + * consistent with host MTRR + */ + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + ipat = VMX_EPT_IPAT_BIT; + cache = MTRR_TYPE_WRBACK; + goto exit; + } + + if (kvm_read_cr0(vcpu) & X86_CR0_CD) { + ipat = VMX_EPT_IPAT_BIT; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + +exit: + return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; +} + +static int vmx_get_lpage_level(void) +{ + if (enable_ept && !cpu_has_vmx_ept_1g_page()) + return PT_DIRECTORY_LEVEL; + else + /* For shadow and EPT supported 1GB page */ + return PT_PDPE_LEVEL; +} + +static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) +{ + /* + * These bits in the secondary execution controls field + * are dynamic, the others are mostly based on the hypervisor + * architecture and the guest's CPUID. Do not touch the + * dynamic bits. + */ + u32 mask = + SECONDARY_EXEC_SHADOW_VMCS | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_DESC; + + u32 new_ctl = vmx->secondary_exec_control; + u32 cur_ctl = secondary_exec_controls_get(vmx); + + secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); +} + +/* + * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits + * (indicating "allowed-1") if they are supported in the guest's CPUID. + */ +static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_cpuid_entry2 *entry; + + vmx->nested.msrs.cr0_fixed1 = 0xffffffff; + vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; + +#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ + if (entry && (entry->_reg & (_cpuid_mask))) \ + vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ +} while (0) + + entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); + cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); + cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); + cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); + cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); + cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); + cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); + cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); + cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); + cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); + cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); + cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); + cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); + cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); + cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); + + entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); + cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); + cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); + cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); + cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); + cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); + +#undef cr4_fixed1_update +} + +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_mpx_supported()) { + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); + + if (mpx_enabled) { + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + } else { + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; + } + } +} + +static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_cpuid_entry2 *best = NULL; + int i; + + for (i = 0; i < PT_CPUID_LEAVES; i++) { + best = kvm_find_cpuid_entry(vcpu, 0x14, i); + if (!best) + return; + vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; + vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; + vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; + vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; + } + + /* Get the number of configurable Address Ranges for filtering */ + vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges); + + /* Initialize and clear the no dependency bits */ + vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | + RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC); + + /* + * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise + * will inject an #GP + */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) + vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; + + /* + * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and + * PSBFreq can be set + */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) + vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | + RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); + + /* + * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and + * MTCFreq can be set + */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) + vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | + RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE); + + /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) + vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | + RTIT_CTL_PTW_EN); + + /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) + vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; + + /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) + vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; + + /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */ + if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) + vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; + + /* unmask address range configure area */ + for (i = 0; i < vmx->pt_desc.addr_range; i++) + vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); +} + +static void vmx_cpuid_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ + vcpu->arch.xsaves_enabled = false; + + if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); + vmcs_set_secondary_exec_control(vmx); + } + + if (nested_vmx_allowed(vcpu)) + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + else + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); + + if (nested_vmx_allowed(vcpu)) { + nested_vmx_cr_fixed1_bits_update(vcpu); + nested_vmx_entry_exit_ctls_update(vcpu); + } + + if (boot_cpu_has(X86_FEATURE_INTEL_PT) && + guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) + update_intel_pt_cfg(vcpu); + + if (boot_cpu_has(X86_FEATURE_RTM)) { + struct shared_msr_entry *msr; + msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); + if (msr) { + bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); + vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); + } + } +} + +static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) +{ + if (func == 1 && nested) + entry->ecx |= bit(X86_FEATURE_VMX); +} + +static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) +{ + to_vmx(vcpu)->req_immediate_exit = true; +} + +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + if (info->intercept == x86_intercept_rdtscp && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + + /* TODO: check more intercepts... */ + return X86EMUL_CONTINUE; +} + +#ifdef CONFIG_X86_64 +/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ +static inline int u64_shl_div_u64(u64 a, unsigned int shift, + u64 divisor, u64 *result) +{ + u64 low = a << shift, high = a >> (64 - shift); + + /* To avoid the overflow on divq */ + if (high >= divisor) + return 1; + + /* Low hold the result, high hold rem which is discarded */ + asm("divq %2\n\t" : "=a" (low), "=d" (high) : + "rm" (divisor), "0" (low), "1" (high)); + *result = low; + + return 0; +} + +static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired) +{ + struct vcpu_vmx *vmx; + u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; + struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; + + if (kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)) + return -EOPNOTSUPP; + + vmx = to_vmx(vcpu); + tscl = rdtsc(); + guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; + lapic_timer_advance_cycles = nsec_to_cycles(vcpu, + ktimer->timer_advance_ns); + + if (delta_tsc > lapic_timer_advance_cycles) + delta_tsc -= lapic_timer_advance_cycles; + else + delta_tsc = 0; + + /* Convert to host delta tsc if tsc scaling is enabled */ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && + delta_tsc && u64_shl_div_u64(delta_tsc, + kvm_tsc_scaling_ratio_frac_bits, + vcpu->arch.tsc_scaling_ratio, &delta_tsc)) + return -ERANGE; + + /* + * If the delta tsc can't fit in the 32 bit after the multi shift, + * we can't use the preemption timer. + * It's possible that it fits on later vmentries, but checking + * on every vmentry is costly so we just use an hrtimer. + */ + if (delta_tsc >> (cpu_preemption_timer_multi + 32)) + return -ERANGE; + + vmx->hv_deadline_tsc = tscl + delta_tsc; + *expired = !delta_tsc; + return 0; +} + +static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) +{ + to_vmx(vcpu)->hv_deadline_tsc = -1; +} +#endif + +static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ + if (!kvm_pause_in_guest(vcpu->kvm)) + shrink_ple_window(vcpu); +} + +static void vmx_slot_enable_log_dirty(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_mmu_slot_leaf_clear_dirty(kvm, slot); + kvm_mmu_slot_largepage_remove_write_access(kvm, slot); +} + +static void vmx_slot_disable_log_dirty(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_mmu_slot_set_dirty(kvm, slot); +} + +static void vmx_flush_log_dirty(struct kvm *kvm) +{ + kvm_flush_pml_buffers(kvm); +} + +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + gpa_t gpa, dst; + + if (is_guest_mode(vcpu)) { + WARN_ON_ONCE(vmx->nested.pml_full); + + /* + * Check if PML is enabled for the nested guest. + * Whether eptp bit 6 is set is already checked + * as part of A/D emulation. + */ + vmcs12 = get_vmcs12(vcpu); + if (!nested_cpu_has_pml(vmcs12)) + return 0; + + if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { + vmx->nested.pml_full = true; + return 1; + } + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; + + if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, + offset_in_page(dst), sizeof(gpa))) + return 0; + + vmcs12->guest_pml_index--; + } + + return 0; +} + +static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t offset, unsigned long mask) +{ + kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); +} + +static void __pi_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + + do { + old.control = new.control = pi_desc->control; + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the VCPU is blocked\n"); + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); + + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + vcpu->pre_pcpu = -1; + } +} + +/* + * This routine does the following things for vCPU which is going + * to be blocked if VT-d PI is enabled. + * - Store the vCPU to the wakeup list, so when interrupts happen + * we can find the right vCPU to wake up. + * - Change the Posted-interrupt descriptor as below: + * 'NDST' <-- vcpu->pre_pcpu + * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR + * - If 'ON' is set during this process, which means at least one + * interrupt is posted for this vCPU, we cannot block it, in + * this case, return 1, otherwise, return 0. + * + */ +static int pi_pre_block(struct kvm_vcpu *vcpu) +{ + unsigned int dest; + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) + return 0; + + WARN_ON(irqs_disabled()); + local_irq_disable(); + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + } + + do { + old.control = new.control = pi_desc->control; + + WARN((pi_desc->sn == 1), + "Warning: SN field of posted-interrupts " + "is set before blocking\n"); + + /* + * Since vCPU can be preempted during this process, + * vcpu->cpu could be different with pre_pcpu, we + * need to set pre_pcpu as the destination of wakeup + * notification event, then we can find the right vCPU + * to wakeup in wakeup handler if interrupts happen + * when the vCPU is in blocked state. + */ + dest = cpu_physical_id(vcpu->pre_pcpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'wakeup vector' */ + new.nv = POSTED_INTR_WAKEUP_VECTOR; + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); + + /* We should not block the vCPU if an interrupt is posted for it. */ + if (pi_test_on(pi_desc) == 1) + __pi_post_block(vcpu); + + local_irq_enable(); + return (vcpu->pre_pcpu == -1); +} + +static int vmx_pre_block(struct kvm_vcpu *vcpu) +{ + if (pi_pre_block(vcpu)) + return 1; + + if (kvm_lapic_hv_timer_in_use(vcpu)) + kvm_lapic_switch_to_sw_timer(vcpu); + + return 0; +} + +static void pi_post_block(struct kvm_vcpu *vcpu) +{ + if (vcpu->pre_pcpu == -1) + return; + + WARN_ON(irqs_disabled()); + local_irq_disable(); + __pi_post_block(vcpu); + local_irq_enable(); +} + +static void vmx_post_block(struct kvm_vcpu *vcpu) +{ + if (kvm_x86_ops->set_hv_timer) + kvm_lapic_switch_to_hv_timer(vcpu); + + pi_post_block(vcpu); +} + +/* + * vmx_update_pi_irte - set IRTE for Posted-Interrupts + * + * @kvm: kvm + * @host_irq: host irq of the interrupt + * @guest_irq: gsi of the interrupt + * @set: set or unset PI + * returns 0 on success, < 0 on failure + */ +static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + struct kvm_kernel_irq_routing_entry *e; + struct kvm_irq_routing_table *irq_rt; + struct kvm_lapic_irq irq; + struct kvm_vcpu *vcpu; + struct vcpu_data vcpu_info; + int idx, ret = 0; + + if (!kvm_arch_has_assigned_device(kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(kvm->vcpus[0])) + return 0; + + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + if (guest_irq >= irq_rt->nr_rt_entries || + hlist_empty(&irq_rt->map[guest_irq])) { + pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", + guest_irq, irq_rt->nr_rt_entries); + goto out; + } + + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { + if (e->type != KVM_IRQ_ROUTING_MSI) + continue; + /* + * VT-d PI cannot support posting multicast/broadcast + * interrupts to a vCPU, we still use interrupt remapping + * for these kind of interrupts. + * + * For lowest-priority interrupts, we only support + * those with single CPU as the destination, e.g. user + * configures the interrupts via /proc/irq or uses + * irqbalance to make the interrupts single-CPU. + * + * We will support full lowest-priority interrupt later. + * + * In addition, we can only inject generic interrupts using + * the PI mechanism, refuse to route others through it. + */ + + kvm_set_msi_irq(kvm, e, &irq); + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || + !kvm_irq_is_postable(&irq)) { + /* + * Make sure the IRTE is in remapped mode if + * we don't handle it in posted mode. + */ + ret = irq_set_vcpu_affinity(host_irq, NULL); + if (ret < 0) { + printk(KERN_INFO + "failed to back to remapped mode, irq: %u\n", + host_irq); + goto out; + } + + continue; + } + + vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); + vcpu_info.vector = irq.vector; + + trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, + vcpu_info.vector, vcpu_info.pi_desc_addr, set); + + if (set) + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); + else + ret = irq_set_vcpu_affinity(host_irq, NULL); + + if (ret < 0) { + printk(KERN_INFO "%s: failed to update PI IRTE\n", + __func__); + goto out; + } + } + + ret = 0; +out: + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + +static void vmx_setup_mce(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.mcg_cap & MCG_LMCE_P) + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_LMCE; + else + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + ~FEATURE_CONTROL_LMCE; +} + +static int vmx_smi_allowed(struct kvm_vcpu *vcpu) +{ + /* we need a nested vmexit to enter SMM, postpone if run is pending */ + if (to_vmx(vcpu)->nested.nested_run_pending) + return 0; + return 1; +} + +static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx->nested.smm.guest_mode = is_guest_mode(vcpu); + if (vmx->nested.smm.guest_mode) + nested_vmx_vmexit(vcpu, -1, 0, 0); + + vmx->nested.smm.vmxon = vmx->nested.vmxon; + vmx->nested.vmxon = false; + vmx_clear_hlt(vcpu); + return 0; +} + +static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int ret; + + if (vmx->nested.smm.vmxon) { + vmx->nested.vmxon = true; + vmx->nested.smm.vmxon = false; + } + + if (vmx->nested.smm.guest_mode) { + ret = nested_vmx_enter_non_root_mode(vcpu, false); + if (ret) + return ret; + + vmx->nested.smm.guest_mode = false; + } + return 0; +} + +static int enable_smi_window(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return false; +} + +static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.vmxon; +} + +static __init int hardware_setup(void) +{ + unsigned long host_bndcfgs; + struct desc_ptr dt; + int r, i; + + rdmsrl_safe(MSR_EFER, &host_efer); + + store_idt(&dt); + host_idt_base = dt.address; + + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) + kvm_define_shared_msr(i, vmx_msr_index[i]); + + if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) + return -EIO; + + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + + if (boot_cpu_has(X86_FEATURE_MPX)) { + rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); + WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); + } + + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) + enable_vpid = 0; + + if (!cpu_has_vmx_ept() || + !cpu_has_vmx_ept_4levels() || + !cpu_has_vmx_ept_mt_wb() || + !cpu_has_vmx_invept_global()) + enable_ept = 0; + + if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) + enable_ept_ad_bits = 0; + + if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) + enable_unrestricted_guest = 0; + + if (!cpu_has_vmx_flexpriority()) + flexpriority_enabled = 0; + + if (!cpu_has_virtual_nmis()) + enable_vnmi = 0; + + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if not + * using the APIC_ACCESS_ADDR VMCS field. + */ + if (!flexpriority_enabled) + kvm_x86_ops->set_apic_access_page_addr = NULL; + + if (!cpu_has_vmx_tpr_shadow()) + kvm_x86_ops->update_cr8_intercept = NULL; + + if (enable_ept && !cpu_has_vmx_ept_2m_page()) + kvm_disable_largepages(); + +#if IS_ENABLED(CONFIG_HYPERV) + if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH + && enable_ept) { + kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb; + kvm_x86_ops->tlb_remote_flush_with_range = + hv_remote_flush_tlb_with_range; + } +#endif + + if (!cpu_has_vmx_ple()) { + ple_gap = 0; + ple_window = 0; + ple_window_grow = 0; + ple_window_max = 0; + ple_window_shrink = 0; + } + + if (!cpu_has_vmx_apicv()) { + enable_apicv = 0; + kvm_x86_ops->sync_pir_to_irr = NULL; + } + + if (cpu_has_vmx_tsc_scaling()) { + kvm_has_tsc_control = true; + kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; + kvm_tsc_scaling_ratio_frac_bits = 48; + } + + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + + if (enable_ept) + vmx_enable_tdp(); + else + kvm_disable_tdp(); + + /* + * Only enable PML when hardware supports PML feature, and both EPT + * and EPT A/D bit features are enabled -- PML depends on them to work. + */ + if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) + enable_pml = 0; + + if (!enable_pml) { + kvm_x86_ops->slot_enable_log_dirty = NULL; + kvm_x86_ops->slot_disable_log_dirty = NULL; + kvm_x86_ops->flush_log_dirty = NULL; + kvm_x86_ops->enable_log_dirty_pt_masked = NULL; + } + + if (!cpu_has_vmx_preemption_timer()) + enable_preemption_timer = false; + + if (enable_preemption_timer) { + u64 use_timer_freq = 5000ULL * 1000 * 1000; + u64 vmx_msr; + + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + cpu_preemption_timer_multi = + vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + + if (tsc_khz) + use_timer_freq = (u64)tsc_khz * 1000; + use_timer_freq >>= cpu_preemption_timer_multi; + + /* + * KVM "disables" the preemption timer by setting it to its max + * value. Don't use the timer if it might cause spurious exits + * at a rate faster than 0.1 Hz (of uninterrupted guest time). + */ + if (use_timer_freq > 0xffffffffu / 10) + enable_preemption_timer = false; + } + + if (!enable_preemption_timer) { + kvm_x86_ops->set_hv_timer = NULL; + kvm_x86_ops->cancel_hv_timer = NULL; + kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; + } + + kvm_set_posted_intr_wakeup_handler(wakeup_handler); + + kvm_mce_cap_supported |= MCG_LMCE_P; + + if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) + return -EINVAL; + if (!enable_ept || !cpu_has_vmx_intel_pt()) + pt_mode = PT_MODE_SYSTEM; + + if (nested) { + nested_vmx_setup_ctls_msrs(&vmcs_config.nested, + vmx_capability.ept, enable_apicv); + + r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); + if (r) + return r; + } + + r = alloc_kvm_area(); + if (r) + nested_vmx_hardware_unsetup(); + return r; +} + +static __exit void hardware_unsetup(void) +{ + if (nested) + nested_vmx_hardware_unsetup(); + + free_kvm_area(); +} + +static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { + .cpu_has_kvm_support = cpu_has_kvm_support, + .disabled_by_bios = vmx_disabled_by_bios, + .hardware_setup = hardware_setup, + .hardware_unsetup = hardware_unsetup, + .check_processor_compatibility = vmx_check_processor_compat, + .hardware_enable = hardware_enable, + .hardware_disable = hardware_disable, + .cpu_has_accelerated_tpr = report_flexpriority, + .has_emulated_msr = vmx_has_emulated_msr, + + .vm_init = vmx_vm_init, + .vm_alloc = vmx_vm_alloc, + .vm_free = vmx_vm_free, + + .vcpu_create = vmx_create_vcpu, + .vcpu_free = vmx_free_vcpu, + .vcpu_reset = vmx_vcpu_reset, + + .prepare_guest_switch = vmx_prepare_switch_to_guest, + .vcpu_load = vmx_vcpu_load, + .vcpu_put = vmx_vcpu_put, + + .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, + .get_msr = vmx_get_msr, + .set_msr = vmx_set_msr, + .get_segment_base = vmx_get_segment_base, + .get_segment = vmx_get_segment, + .set_segment = vmx_set_segment, + .get_cpl = vmx_get_cpl, + .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, + .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, + .set_cr0 = vmx_set_cr0, + .set_cr3 = vmx_set_cr3, + .set_cr4 = vmx_set_cr4, + .set_efer = vmx_set_efer, + .get_idt = vmx_get_idt, + .set_idt = vmx_set_idt, + .get_gdt = vmx_get_gdt, + .set_gdt = vmx_set_gdt, + .get_dr6 = vmx_get_dr6, + .set_dr6 = vmx_set_dr6, + .set_dr7 = vmx_set_dr7, + .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, + .cache_reg = vmx_cache_reg, + .get_rflags = vmx_get_rflags, + .set_rflags = vmx_set_rflags, + + .tlb_flush = vmx_flush_tlb, + .tlb_flush_gva = vmx_flush_tlb_gva, + + .run = vmx_vcpu_run, + .handle_exit = vmx_handle_exit, + .skip_emulated_instruction = skip_emulated_instruction, + .set_interrupt_shadow = vmx_set_interrupt_shadow, + .get_interrupt_shadow = vmx_get_interrupt_shadow, + .patch_hypercall = vmx_patch_hypercall, + .set_irq = vmx_inject_irq, + .set_nmi = vmx_inject_nmi, + .queue_exception = vmx_queue_exception, + .cancel_injection = vmx_cancel_injection, + .interrupt_allowed = vmx_interrupt_allowed, + .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, + .enable_nmi_window = enable_nmi_window, + .enable_irq_window = enable_irq_window, + .update_cr8_intercept = update_cr8_intercept, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .get_enable_apicv = vmx_get_enable_apicv, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, + .load_eoi_exitmap = vmx_load_eoi_exitmap, + .apicv_post_state_restore = vmx_apicv_post_state_restore, + .hwapic_irr_update = vmx_hwapic_irr_update, + .hwapic_isr_update = vmx_hwapic_isr_update, + .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, + + .set_tss_addr = vmx_set_tss_addr, + .set_identity_map_addr = vmx_set_identity_map_addr, + .get_tdp_level = get_ept_level, + .get_mt_mask = vmx_get_mt_mask, + + .get_exit_info = vmx_get_exit_info, + + .get_lpage_level = vmx_get_lpage_level, + + .cpuid_update = vmx_cpuid_update, + + .rdtscp_supported = vmx_rdtscp_supported, + .invpcid_supported = vmx_invpcid_supported, + + .set_supported_cpuid = vmx_set_supported_cpuid, + + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + + .read_l1_tsc_offset = vmx_read_l1_tsc_offset, + .write_l1_tsc_offset = vmx_write_l1_tsc_offset, + + .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, + .handle_exit_irqoff = vmx_handle_exit_irqoff, + .mpx_supported = vmx_mpx_supported, + .xsaves_supported = vmx_xsaves_supported, + .umip_emulated = vmx_umip_emulated, + .pt_supported = vmx_pt_supported, + + .request_immediate_exit = vmx_request_immediate_exit, + + .sched_in = vmx_sched_in, + + .slot_enable_log_dirty = vmx_slot_enable_log_dirty, + .slot_disable_log_dirty = vmx_slot_disable_log_dirty, + .flush_log_dirty = vmx_flush_log_dirty, + .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .write_log_dirty = vmx_write_pml_buffer, + + .pre_block = vmx_pre_block, + .post_block = vmx_post_block, + + .pmu_ops = &intel_pmu_ops, + + .update_pi_irte = vmx_update_pi_irte, + +#ifdef CONFIG_X86_64 + .set_hv_timer = vmx_set_hv_timer, + .cancel_hv_timer = vmx_cancel_hv_timer, +#endif + + .setup_mce = vmx_setup_mce, + + .smi_allowed = vmx_smi_allowed, + .pre_enter_smm = vmx_pre_enter_smm, + .pre_leave_smm = vmx_pre_leave_smm, + .enable_smi_window = enable_smi_window, + + .check_nested_events = NULL, + .get_nested_state = NULL, + .set_nested_state = NULL, + .get_vmcs12_pages = NULL, + .nested_enable_evmcs = NULL, + .nested_get_evmcs_version = NULL, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, + .apic_init_signal_blocked = vmx_apic_init_signal_blocked, +}; + +static void vmx_cleanup_l1d_flush(void) +{ + if (vmx_l1d_flush_pages) { + free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); + vmx_l1d_flush_pages = NULL; + } + /* Restore state so sysfs ignores VMX */ + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; +} + +static void vmx_exit(void) +{ +#ifdef CONFIG_KEXEC_CORE + RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); + synchronize_rcu(); +#endif + + kvm_exit(); + +#if IS_ENABLED(CONFIG_HYPERV) + if (static_branch_unlikely(&enable_evmcs)) { + int cpu; + struct hv_vp_assist_page *vp_ap; + /* + * Reset everything to support using non-enlightened VMCS + * access later (e.g. when we reload the module with + * enlightened_vmcs=0) + */ + for_each_online_cpu(cpu) { + vp_ap = hv_get_vp_assist_page(cpu); + + if (!vp_ap) + continue; + + vp_ap->nested_control.features.directhypercall = 0; + vp_ap->current_nested_vmcs = 0; + vp_ap->enlighten_vmentry = 0; + } + + static_branch_disable(&enable_evmcs); + } +#endif + vmx_cleanup_l1d_flush(); +} +module_exit(vmx_exit); + +static int __init vmx_init(void) +{ + int r; + +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Enlightened VMCS usage should be recommended and the host needs + * to support eVMCS v1 or above. We can also disable eVMCS support + * with module parameter. + */ + if (enlightened_vmcs && + ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && + (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= + KVM_EVMCS_VERSION) { + int cpu; + + /* Check that we have assist pages on all online CPUs */ + for_each_online_cpu(cpu) { + if (!hv_get_vp_assist_page(cpu)) { + enlightened_vmcs = false; + break; + } + } + + if (enlightened_vmcs) { + pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); + static_branch_enable(&enable_evmcs); + } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_direct_tlbflush + = hv_enable_direct_tlbflush; + + } else { + enlightened_vmcs = false; + } +#endif + + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); + if (r) + return r; + + /* + * Must be called after kvm_init() so enable_ept is properly set + * up. Hand the parameter mitigation value in which was stored in + * the pre module init parser. If no parameter was given, it will + * contain 'auto' which will be turned into the default 'cond' + * mitigation mode. + */ + r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); + if (r) { + vmx_exit(); + return r; + } + +#ifdef CONFIG_KEXEC_CORE + rcu_assign_pointer(crash_vmclear_loaded_vmcss, + crash_vmclear_local_loaded_vmcss); +#endif + vmx_check_vmcs12_offsets(); + + return 0; +} +module_init(vmx_init); From a4870bd1ee22d2a5c8cf720e5f3045999de3bdb0 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 28 Dec 2019 14:25:24 +0800 Subject: [PATCH 3163/3715] KVM: nVMX: vmread should not set rflags to specify success in case of #PF [ Upstream commit a4d956b9390418623ae5d07933e2679c68b6f83c ] In case writing to vmread destination operand result in a #PF, vmread should not call nested_vmx_succeed() to set rflags to specify success. Similar to as done in VMPTRST (See handle_vmptrst()). Reviewed-by: Liran Alon Signed-off-by: Miaohe Lin Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c579cda1721e..809d1b031fd9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8014,8 +8014,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* _system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, (is_long_mode(vcpu) ? 8 : 4), - &e)) + &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } nested_vmx_succeed(vcpu); From f8d4d63d8c806bab7d6cb4254f462aa10e5f78ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 Jan 2020 12:24:37 -0800 Subject: [PATCH 3164/3715] KVM: Use vcpu-specific gva->hva translation when querying host page size [ Upstream commit f9b84e19221efc5f493156ee0329df3142085f28 ] Use kvm_vcpu_gfn_to_hva() when retrieving the host page size so that the correct set of memslots is used when handling x86 page faults in SMM. Fixes: 54bf36aac520 ("KVM: x86: use vcpu-specific functions to read/write/translate GFNs") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/mmu.c | 6 +++--- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c0b0135ef07f..e5af08b58132 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1165,12 +1165,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) +static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) { unsigned long page_size; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) @@ -1220,7 +1220,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, if (unlikely(*force_pt_level)) return PT_PAGE_TABLE_LEVEL; - host_level = host_mapping_level(vcpu->kvm, large_gfn); + host_level = host_mapping_level(vcpu, large_gfn); if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7668c68ddb5b..30376715a607 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -695,7 +695,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index deff4b3eb972..609903481e39 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1277,14 +1277,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return PAGE_SIZE; From 0941ce401d489cb4d53b852fbff7cfce3982beb0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 Jan 2020 12:24:38 -0800 Subject: [PATCH 3165/3715] KVM: Play nice with read-only memslots when querying host page size [ Upstream commit 42cde48b2d39772dba47e680781a32a6c4b7dc33 ] Avoid the "writable" check in __gfn_to_hva_many(), which will always fail on read-only memslots due to gfn_to_hva() assuming writes. Functionally, this allows x86 to create large mappings for read-only memslots that are backed by HugeTLB mappings. Note, the changelog for commit 05da45583de9 ("KVM: MMU: large page support") states "If the largepage contains write-protected pages, a large pte is not used.", but "write-protected" refers to pages that are temporarily read-only, e.g. read-only memslots didn't even exist at the time. Fixes: 4d8b81abc47b ("KVM: introduce readonly memslot") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson [Redone using kvm_vcpu_gfn_to_memslot_prot. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 609903481e39..745ee09083dd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1284,7 +1284,7 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) size = PAGE_SIZE; - addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; From 9a473fc4c82c35d2c54095c09dde1dd7e1dc298f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 31 Jan 2020 05:02:00 -0500 Subject: [PATCH 3166/3715] KVM: s390: do not clobber registers during guest reset/store status [ Upstream commit 55680890ea78be0df5e1384989f1be835043c084 ] The initial CPU reset clobbers the userspace fpc and the store status ioctl clobbers the guest acrs + fpr. As these calls are only done via ioctl (and not via vcpu_run), no CPU context is loaded, so we can (and must) act directly on the sync regs, not on the thread context. Cc: stable@kernel.org Fixes: e1788bb995be ("KVM: s390: handle floating point registers in the run ioctl not in vcpu_put/load") Fixes: 31d8b8d41a7e ("KVM: s390: handle access registers in the run ioctl not in vcpu_put/load") Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Janosch Frank Link: https://lore.kernel.org/r/20200131100205.74720-2-frankja@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 91c24e87fe10..46fee3f4dedd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2384,9 +2384,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -3753,7 +3751,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { From 640d7fb2d29c08aa8c0569853d1df4cfdd3eb333 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 5 Feb 2020 11:08:01 +1000 Subject: [PATCH 3167/3715] cifs: fail i/o on soft mounts if sessionsetup errors out commit b0dd940e582b6a60296b9847a54012a4b080dc72 upstream. RHBZ: 1579050 If we have a soft mount we should fail commands for session-setup failures (such as the password having changed/ account being deleted/ ...) and return an error back to the application. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4eb0a9e7194b..1c87a429ce72 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -257,9 +257,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -301,6 +306,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } From 2106d26897f9341ddb7ad74bfc5867808cec927a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 31 Jan 2020 19:08:59 +0300 Subject: [PATCH 3168/3715] clocksource: Prevent double add_timer_on() for watchdog_timer commit febac332a819f0e764aa4da62757ba21d18c182b upstream. Kernel crashes inside QEMU/KVM are observed: kernel BUG at kernel/time/timer.c:1154! BUG_ON(timer_pending(timer) || !timer->function) in add_timer_on(). At the same time another cpu got: general protection fault: 0000 [#1] SMP PTI of poinson pointer 0xdead000000000200 in: __hlist_del at include/linux/list.h:681 (inlined by) detach_timer at kernel/time/timer.c:818 (inlined by) expire_timers at kernel/time/timer.c:1355 (inlined by) __run_timers at kernel/time/timer.c:1686 (inlined by) run_timer_softirq at kernel/time/timer.c:1699 Unfortunately kernel logs are badly scrambled, stacktraces are lost. Printing the timer->function before the BUG_ON() pointed to clocksource_watchdog(). The execution of clocksource_watchdog() can race with a sequence of clocksource_stop_watchdog() .. clocksource_start_watchdog(): expire_timers() detach_timer(timer, true); timer->entry.pprev = NULL; raw_spin_unlock_irq(&base->lock); call_timer_fn clocksource_watchdog() clocksource_watchdog_kthread() or clocksource_unbind() spin_lock_irqsave(&watchdog_lock, flags); clocksource_stop_watchdog(); del_timer(&watchdog_timer); watchdog_running = 0; spin_unlock_irqrestore(&watchdog_lock, flags); spin_lock_irqsave(&watchdog_lock, flags); clocksource_start_watchdog(); add_timer_on(&watchdog_timer, ...); watchdog_running = 1; spin_unlock_irqrestore(&watchdog_lock, flags); spin_lock(&watchdog_lock); add_timer_on(&watchdog_timer, ...); BUG_ON(timer_pending(timer) || !timer->function); timer_pending() -> true BUG() I.e. inside clocksource_watchdog() watchdog_timer could be already armed. Check timer_pending() before calling add_timer_on(). This is sufficient as all operations are synchronized by watchdog_lock. Fixes: 75c5158f70c0 ("timekeeping: Update clocksource with stop_machine") Signed-off-by: Konstantin Khlebnikov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/158048693917.4378.13823603769948933793.stgit@buzz Signed-off-by: Greg Kroah-Hartman --- kernel/time/clocksource.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3b71d859ee38..825d24df921a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -280,8 +280,15 @@ static void clocksource_watchdog(unsigned long data) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } From f993a79d85fe81379ac4b2f9b4660823f8574a6d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 23 Jan 2020 10:11:46 -0800 Subject: [PATCH 3169/3715] perf/core: Fix mlock accounting in perf_mmap() commit 003461559ef7a9bd0239bae35a22ad8924d6e9ad upstream. Decreasing sysctl_perf_event_mlock between two consecutive perf_mmap()s of a perf ring buffer may lead to an integer underflow in locked memory accounting. This may lead to the undesired behaviors, such as failures in BPF map creation. Address this by adjusting the accounting logic to take into account the possibility that the amount of already locked memory may exceed the current limit. Fixes: c4b75479741c ("perf/core: Make the mlock accounting simple again") Suggested-by: Alexander Shishkin Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Acked-by: Alexander Shishkin Link: https://lkml.kernel.org/r/20200123181146.2238074-1-songliubraving@fb.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2ac73b4cb8a9..845c8a1a9d30 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5441,7 +5441,15 @@ accounting: */ user_lock_limit *= num_online_cpus(); - user_locked = atomic_long_read(&user->locked_vm) + user_extra; + user_locked = atomic_long_read(&user->locked_vm); + + /* + * sysctl_perf_event_mlock may have changed, so that + * user->locked_vm > user_lock_limit + */ + if (user_locked > user_lock_limit) + user_locked = user_lock_limit; + user_locked += user_extra; if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; From c12b8cb9f1546df81368d076f7ec3e13222d1b6c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 13:55:01 +0000 Subject: [PATCH 3170/3715] rxrpc: Fix service call disconnection [ Upstream commit b39a934ec72fa2b5a74123891f25273a38378b90 ] The recent patch that substituted a flag on an rxrpc_call for the connection pointer being NULL as an indication that a call was disconnected puts the set_bit in the wrong place for service calls. This is only a problem if a call is implicitly terminated by a new call coming in on the same connection channel instead of a terminating ACK packet. In such a case, rxrpc_input_implicit_end_call() calls __rxrpc_disconnect_call(), which is now (incorrectly) setting the disconnection bit, meaning that when rxrpc_release_call() is later called, it doesn't call rxrpc_disconnect_call() and so the call isn't removed from the peer's error distribution list and the list gets corrupted. KASAN finds the issue as an access after release on a call, but the position at which it occurs is confusing as it appears to be related to a different call (the call site is where the latter call is being removed from the error distribution list and either the next or pprev pointer points to a previously released call). Fix this by moving the setting of the flag from __rxrpc_disconnect_call() to rxrpc_disconnect_call() in the same place that the connection pointer was being cleared. Fixes: 5273a191dca6 ("rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect") Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/conn_object.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 13b29e491de9..af0232820597 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -163,8 +163,6 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. @@ -209,6 +207,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; } From c7d57b1fd0f77ccd0def6893c02f3babe96e37ef Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Mon, 4 Nov 2019 14:48:11 -0800 Subject: [PATCH 3171/3715] ASoC: pcm: update FE/BE trigger order based on the command [ Upstream commit acbf27746ecfa96b290b54cc7f05273482ea128a ] Currently, the trigger orders SND_SOC_DPCM_TRIGGER_PRE/POST determine the order in which FE DAI and BE DAI are triggered. In the case of SND_SOC_DPCM_TRIGGER_PRE, the FE DAI is triggered before the BE DAI and in the case of SND_SOC_DPCM_TRIGGER_POST, the BE DAI is triggered before the FE DAI. And this order remains the same irrespective of the trigger command. In the case of the SOF driver, during playback, the FW expects the BE DAI to be triggered before the FE DAI during the START trigger. The BE DAI trigger handles the starting of Link DMA and so it must be started before the FE DAI is started to prevent xruns during pause/release. This can be addressed by setting the trigger order for the FE dai link to SND_SOC_DPCM_TRIGGER_POST. But during the STOP trigger, the FW expects the FE DAI to be triggered before the BE DAI. Retaining the same order during the START and STOP commands, results in FW error as the DAI component in the FW is still active. The issue can be fixed by mirroring the trigger order of FE and BE DAI's during the START and STOP trigger. So, with the trigger order set to SND_SOC_DPCM_TRIGGER_PRE, the FE DAI will be trigger first during SNDRV_PCM_TRIGGER_START/STOP/RESUME and the BE DAI will be triggered first during the STOP/SUSPEND/PAUSE commands. Conversely, with the trigger order set to SND_SOC_DPCM_TRIGGER_POST, the BE DAI will be triggered first during the SNDRV_PCM_TRIGGER_START/STOP/RESUME commands and the FE DAI will be triggered first during the SNDRV_PCM_TRIGGER_STOP/SUSPEND/PAUSE commands. Signed-off-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191104224812.3393-2-ranjani.sridharan@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 95 ++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 70e1a60a2e98..89f772ed4705 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2123,42 +2123,81 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger); +static int dpcm_dai_trigger_fe_be(struct snd_pcm_substream *substream, + int cmd, bool fe_first) +{ + struct snd_soc_pcm_runtime *fe = substream->private_data; + int ret; + + /* call trigger on the frontend before the backend. */ + if (fe_first) { + dev_dbg(fe->dev, "ASoC: pre trigger FE %s cmd %d\n", + fe->dai_link->name, cmd); + + ret = soc_pcm_trigger(substream, cmd); + if (ret < 0) + return ret; + + ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); + return ret; + } + + /* call trigger on the frontend after the backend. */ + ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); + if (ret < 0) + return ret; + + dev_dbg(fe->dev, "ASoC: post trigger FE %s cmd %d\n", + fe->dai_link->name, cmd); + + ret = soc_pcm_trigger(substream, cmd); + + return ret; +} + static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *fe = substream->private_data; - int stream = substream->stream, ret; + int stream = substream->stream; + int ret = 0; enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream]; fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; switch (trigger) { case SND_SOC_DPCM_TRIGGER_PRE: - /* call trigger on the frontend before the backend. */ - - dev_dbg(fe->dev, "ASoC: pre trigger FE %s cmd %d\n", - fe->dai_link->name, cmd); - - ret = soc_pcm_trigger(substream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ret = dpcm_dai_trigger_fe_be(substream, cmd, true); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + ret = dpcm_dai_trigger_fe_be(substream, cmd, false); + break; + default: + ret = -EINVAL; + break; } - - ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); break; case SND_SOC_DPCM_TRIGGER_POST: - /* call trigger on the frontend after the backend. */ - - ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ret = dpcm_dai_trigger_fe_be(substream, cmd, false); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + ret = dpcm_dai_trigger_fe_be(substream, cmd, true); + break; + default: + ret = -EINVAL; + break; } - - dev_dbg(fe->dev, "ASoC: post trigger FE %s cmd %d\n", - fe->dai_link->name, cmd); - - ret = soc_pcm_trigger(substream, cmd); break; case SND_SOC_DPCM_TRIGGER_BESPOKE: /* bespoke trigger() - handles both FE and BEs */ @@ -2167,10 +2206,6 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) fe->dai_link->name, cmd); ret = soc_pcm_bespoke_trigger(substream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; - } break; default: dev_err(fe->dev, "ASoC: invalid trigger cmd %d for %s\n", cmd, @@ -2179,6 +2214,12 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) goto out; } + if (ret < 0) { + dev_err(fe->dev, "ASoC: trigger FE cmd: %d failed: %d\n", + cmd, ret); + goto out; + } + switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: From 9b164187829c7db401ca2da9e19f2c30a026b3e5 Mon Sep 17 00:00:00 2001 From: Sunil Muthuswamy Date: Fri, 24 Jan 2020 03:08:18 +0000 Subject: [PATCH 3172/3715] hv_sock: Remove the accept port restriction [ Upstream commit c742c59e1fbd022b64d91aa9a0092b3a699d653c ] Currently, hv_sock restricts the port the guest socket can accept connections on. hv_sock divides the socket port namespace into two parts for server side (listening socket), 0-0x7FFFFFFF & 0x80000000-0xFFFFFFFF (there are no restrictions on client port namespace). The first part (0-0x7FFFFFFF) is reserved for sockets where connections can be accepted. The second part (0x80000000-0xFFFFFFFF) is reserved for allocating ports for the peer (host) socket, once a connection is accepted. This reservation of the port namespace is specific to hv_sock and not known by the generic vsock library (ex: af_vsock). This is problematic because auto-binds/ephemeral ports are handled by the generic vsock library and it has no knowledge of this port reservation and could allocate a port that is not compatible with hv_sock (and legitimately so). The issue hasn't surfaced so far because the auto-bind code of vsock (__vsock_bind_stream) prior to the change 'VSOCK: bind to random port for VMADDR_PORT_ANY' would start walking up from LAST_RESERVED_PORT (1023) and start assigning ports. That will take a large number of iterations to hit 0x7FFFFFFF. But, after the above change to randomize port selection, the issue has started coming up more frequently. There has really been no good reason to have this port reservation logic in hv_sock from the get go. Reserving a local port for peer ports is not how things are handled generally. Peer ports should reflect the peer port. This fixes the issue by lifting the port reservation, and also returns the right peer port. Since the code converts the GUID to the peer port (by using the first 4 bytes), there is a possibility of conflicts, but that seems like a reasonable risk to take, given this is limited to vsock and that only applies to all local sockets. Signed-off-by: Sunil Muthuswamy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/hyperv_transport.c | 68 +++++--------------------------- 1 file changed, 9 insertions(+), 59 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 6614512f8180..736b76ec8cf0 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -144,28 +144,15 @@ struct hvsock { **************************************************************************** * The only valid Service GUIDs, from the perspectives of both the host and * * Linux VM, that can be connected by the other end, must conform to this * - * format: -facb-11e6-bd58-64006a7986d3, and the "port" must be in * - * this range [0, 0x7FFFFFFF]. * + * format: -facb-11e6-bd58-64006a7986d3. * **************************************************************************** * * When we write apps on the host to connect(), the GUID ServiceID is used. * When we write apps in Linux VM to connect(), we only need to specify the * port and the driver will form the GUID and use that to request the host. * - * From the perspective of Linux VM: - * 1. the local ephemeral port (i.e. the local auto-bound port when we call - * connect() without explicit bind()) is generated by __vsock_bind_stream(), - * and the range is [1024, 0xFFFFFFFF). - * 2. the remote ephemeral port (i.e. the auto-generated remote port for - * a connect request initiated by the host's connect()) is generated by - * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF). */ -#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF) -#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT -#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT -#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) - /* 00000000-facb-11e6-bd58-64006a7986d3 */ static const uuid_le srv_id_template = UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, @@ -188,33 +175,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id) vsock_addr_init(addr, VMADDR_CID_ANY, port); } -static void hvs_remote_addr_init(struct sockaddr_vm *remote, - struct sockaddr_vm *local) -{ - static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - struct sock *sk; - - vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY); - - while (1) { - /* Wrap around ? */ - if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT || - host_ephemeral_port == VMADDR_PORT_ANY) - host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - - remote->svm_port = host_ephemeral_port++; - - sk = vsock_find_connected_socket(remote, local); - if (!sk) { - /* Found an available ephemeral port */ - return; - } - - /* Release refcnt got in vsock_find_connected_socket */ - sock_put(sk); - } -} - static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) { set_channel_pending_send_size(chan, @@ -342,12 +302,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if_type = &chan->offermsg.offer.if_type; if_instance = &chan->offermsg.offer.if_instance; conn_from_host = chan->offermsg.offer.u.pipe.user_def[0]; - - /* The host or the VM should only listen on a port in - * [0, MAX_LISTEN_PORT] - */ - if (!is_valid_srv_id(if_type) || - get_port_by_srv_id(if_type) > MAX_LISTEN_PORT) + if (!is_valid_srv_id(if_type)) return; hvs_addr_init(&addr, conn_from_host ? if_type : if_instance); @@ -372,6 +327,13 @@ static void hvs_open_connection(struct vmbus_channel *chan) new->sk_state = TCP_SYN_SENT; vnew = vsock_sk(new); + + hvs_addr_init(&vnew->local_addr, if_type); + + /* Remote peer is always the host */ + vsock_addr_init(&vnew->remote_addr, + VMADDR_CID_HOST, VMADDR_PORT_ANY); + vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance); hvs_new = vnew->trans; hvs_new->chan = chan; } else { @@ -411,8 +373,6 @@ static void hvs_open_connection(struct vmbus_channel *chan) sk->sk_ack_backlog++; hvs_addr_init(&vnew->local_addr, if_type); - hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); - hvs_new->vm_srv_id = *if_type; hvs_new->host_srv_id = *if_instance; @@ -717,16 +677,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk) static bool hvs_stream_allow(u32 cid, u32 port) { - /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is - * reserved as ephemeral ports, which are used as the host's ports - * when the host initiates connections. - * - * Perform this check in the guest so an immediate error is produced - * instead of a timeout. - */ - if (port > MAX_HOST_LISTEN_PORT) - return false; - if (cid == VMADDR_CID_HOST) return true; From 38df0eb4a4092a7e8c3ed6945c432fdb5e74ba30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Mon, 16 Dec 2019 13:04:36 +0100 Subject: [PATCH 3173/3715] RDMA/netlink: Do not always generate an ACK for some netlink operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a242c36951ecd24bc16086940dbe6b522205c461 upstream. In rdma_nl_rcv_skb(), the local variable err is assigned the return value of the supplied callback function, which could be one of ib_nl_handle_resolve_resp(), ib_nl_handle_set_timeout(), or ib_nl_handle_ip_res_resp(). These three functions all return skb->len on success. rdma_nl_rcv_skb() is merely a copy of netlink_rcv_skb(). The callback functions used by the latter have the convention: "Returns 0 on success or a negative error code". In particular, the statement (equal for both functions): if (nlh->nlmsg_flags & NLM_F_ACK || err) implies that rdma_nl_rcv_skb() always will ack a message, independent of the NLM_F_ACK being set in nlmsg_flags or not. The fix could be to change the above statement, but it is better to keep the two *_rcv_skb() functions equal in this respect and instead change the three callback functions in the rdma subsystem to the correct convention. Fixes: 2ca546b92a02 ("IB/sa: Route SA pathrecord query through netlink") Fixes: ae43f8286730 ("IB/core: Add IP to GID netlink offload") Link: https://lore.kernel.org/r/20191216120436.3204814-1-haakon.bugge@oracle.com Suggested-by: Mark Haywood Signed-off-by: Håkon Bugge Tested-by: Mark Haywood Reviewed-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 2 +- drivers/infiniband/core/sa_query.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index aadaa9e84eee..c2bbe0df0931 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -140,7 +140,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 50068b0a91fa..83dad5401c93 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1078,7 +1078,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1149,7 +1149,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) From 251601c93fd4615168607d2be37b7dfa7c9cfab4 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 20 Jan 2020 14:08:13 +0100 Subject: [PATCH 3174/3715] scsi: ufs: Fix ufshcd_probe_hba() reture value in case ufshcd_scsi_add_wlus() fails commit b9fc5320212efdfb4e08b825aaa007815fd11d16 upstream. A non-zero error value likely being returned by ufshcd_scsi_add_wlus() in case of failure of adding the WLs, but ufshcd_probe_hba() doesn't use this value, and doesn't report this failure to upper caller. This patch is to fix this issue. Fixes: 2a8fa600445c ("ufs: manually add well known logical units") Link: https://lore.kernel.org/r/20200120130820.1737-2-huobean@gmail.com Reviewed-by: Asutosh Das Reviewed-by: Alim Akhtar Reviewed-by: Stanley Chu Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 7ada4f272258..ce40de334f11 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6415,7 +6415,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) ufshcd_init_icc_levels(hba); /* Add required well known logical units to scsi mid layer */ - if (ufshcd_scsi_add_wlus(hba)) + ret = ufshcd_scsi_add_wlus(hba); + if (ret) goto out; /* Initialize devfreq after UFS device is detected */ From b642fb092de14eb6ed760f5fb7ceb160ec6bee98 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 6 Jan 2020 12:03:27 -0700 Subject: [PATCH 3175/3715] PCI/switchtec: Fix vep_vector_number ioread width commit 9375646b4cf03aee81bc6c305aa18cc80b682796 upstream. vep_vector_number is actually a 16 bit register which should be read with ioread16() instead of ioread32(). Fixes: 080b47def5e5 ("MicroSemi Switchtec management interface driver") Link: https://lore.kernel.org/r/20200106190337.2428-3-logang@deltatee.com Reported-by: Doug Meyer Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/switch/switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 73dba2739849..bf229b442e72 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1399,7 +1399,7 @@ static int switchtec_init_isr(struct switchtec_dev *stdev) if (nvecs < 0) return nvecs; - event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number); + event_irq = ioread16(&stdev->mmio_part_cfg->vep_vector_number); if (event_irq < 0 || event_irq >= nvecs) return -EFAULT; From 2edc1dc9008b02fc43f22698c88c390c493e9399 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 8 Jan 2020 14:32:08 -0700 Subject: [PATCH 3176/3715] PCI: Don't disable bridge BARs when assigning bus resources commit 9db8dc6d0785225c42a37be7b44d1b07b31b8957 upstream. Some PCI bridges implement BARs in addition to bridge windows. For example, here's a PLX switch: 04:00.0 PCI bridge: PLX Technology, Inc. PEX 8724 24-Lane, 6-Port PCI Express Gen 3 (8 GT/s) Switch, 19 x 19mm FCBGA (rev ca) (prog-if 00 [Normal decode]) Flags: bus master, fast devsel, latency 0, IRQ 30, NUMA node 0 Memory at 90a00000 (32-bit, non-prefetchable) [size=256K] Bus: primary=04, secondary=05, subordinate=0a, sec-latency=0 I/O behind bridge: 00002000-00003fff Memory behind bridge: 90000000-909fffff Prefetchable memory behind bridge: 0000380000800000-0000380000bfffff Previously, when the kernel assigned resource addresses (with the pci=realloc command line parameter, for example) it could clear the struct resource corresponding to the BAR. When this happened, lspci would report this BAR as "ignored": Region 0: Memory at (32-bit, non-prefetchable) [size=256K] This is because the kernel reports a zero start address and zero flags in the corresponding sysfs resource file and in /proc/bus/pci/devices. Investigation with 'lspci -x', however, shows the BIOS-assigned address will still be programmed in the device's BAR registers. It's clearly a bug that the kernel lost track of the BAR value, but in most cases, this still won't result in a visible issue because nothing uses the memory, so nothing is affected. However, when an IOMMU is in use, it will not reserve this space in the IOVA because the kernel no longer thinks the range is valid. (See dmar_init_reserved_ranges() for the Intel implementation of this.) Without the proper reserved range, a DMA mapping may allocate an IOVA that matches a bridge BAR, which results in DMA accesses going to the BAR instead of the intended RAM. The problem was in pci_assign_unassigned_root_bus_resources(). When any resource from a bridge device fails to get assigned, the code set the resource's flags to zero. This makes sense for bridge windows, as they will be re-enabled later, but for regular BARs, it makes the kernel permanently lose track of the fact that they decode address space. Change pci_assign_unassigned_root_bus_resources() and pci_assign_unassigned_bridge_resources() so they only clear "res->flags" for bridge *windows*, not bridge BARs. Fixes: da7822e5ad71 ("PCI: update bridge resources to get more big ranges when allocating space (again)") Link: https://lore.kernel.org/r/20200108213208.4612-1-logang@deltatee.com [bhelgaas: commit log, check for pci_is_bridge()] Reported-by: Kit Chow Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/setup-bus.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 958da7db9033..fb73e975d22b 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1824,12 +1824,18 @@ again: /* restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); @@ -1895,12 +1901,18 @@ again: /* restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); From 15466c4d2910c8e385d74cf0fb37b747750124bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 30 Dec 2019 16:32:38 +0100 Subject: [PATCH 3177/3715] nfs: NFS_SWAP should depend on SWAP commit 474c4f306eefbb21b67ebd1de802d005c7d7ecdc upstream. If CONFIG_SWAP=n, it does not make much sense to offer the user the option to enable support for swapping over NFS, as that will still fail at run time: # swapon /swap swapon: /swap: swapon failed: Function not implemented Fix this by adding a dependency on CONFIG_SWAP. Fixes: a564b8f0398636ba ("nfs: enable swap on NFS") Signed-off-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 5f93cfacb3d1..ac3e06367cb6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -89,7 +89,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. From e69fb85b026bd7e71d7ac5c51dfa8cd82a1107b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Jan 2020 15:25:04 -0500 Subject: [PATCH 3178/3715] NFS/pnfs: Fix pnfs_generic_prepare_to_resend_writes() commit 221203ce6406273cf00e5c6397257d986c003ee6 upstream. Instead of making assumptions about the commit verifier contents, change the commit code to ensure we always check that the verifier was set by the XDR code. Fixes: f54bcf2ecee9 ("pnfs: Prepare for flexfiles by pulling out common code") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 4 ++-- fs/nfs/nfs3xdr.c | 5 ++++- fs/nfs/nfs4xdr.c | 5 ++++- fs/nfs/pnfs_nfs.c | 7 +++---- fs/nfs/write.c | 4 +++- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9cdac9945483..9d07b53e1647 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -261,10 +261,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cd33bd5da87..f1cb0b7eb05f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2373,6 +2373,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2385,7 +2386,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 525684b0056f..0b2d051990e9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4409,11 +4409,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 4a3dd66175fe..b0ef37f3e2dd 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -30,12 +30,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ed3f5afc4ff7..89f36040adf6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1807,6 +1807,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1833,7 +1834,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); From 0cffbe2a1bb5a1e63b67bebf38eb653aabfd6f43 Mon Sep 17 00:00:00 2001 From: Robert Milkowski Date: Tue, 28 Jan 2020 08:37:47 +0000 Subject: [PATCH 3179/3715] NFSv4: try lease recovery on NFS4ERR_EXPIRED commit 924491f2e476f7234d722b24171a4daff61bbe13 upstream. Currently, if an nfs server returns NFS4ERR_EXPIRED to open(), we return EIO to applications without even trying to recover. Fixes: 272289a3df72 ("NFSv4: nfs4_do_handle_exception() handle revoke/expiry of a single stateid") Signed-off-by: Robert Milkowski Reviewed-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3dd403943b07..4d45786738ab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2923,6 +2923,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; From 69332673468b8f488dfc073684d09d7b916e1e52 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 10 Feb 2020 10:18:25 -0700 Subject: [PATCH 3180/3715] serial: uartps: Add a timeout to the tx empty wait [ Upstream commit 277375b864e8147975b064b513f491e2a910e66a ] In case the cable is not connected then the target gets into an infinite wait for tx empty. Add a timeout to the tx empty wait. Reported-by: Jean-Francois Dagenais Signed-off-by: Shubhrajyoti Datta Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/xilinx_uartps.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 0dbfd02e3b19..9826a15bfd87 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CDNS_UART_TTY_NAME "ttyPS" #define CDNS_UART_NAME "xuartps" @@ -39,6 +40,7 @@ #define CDNS_UART_NR_PORTS 2 #define CDNS_UART_FIFO_SIZE 64 /* FIFO size */ #define CDNS_UART_REGISTER_SPACE 0x1000 +#define TX_TIMEOUT 500000 /* Rx Trigger level */ static int rx_trigger_level = 56; @@ -685,16 +687,20 @@ static void cdns_uart_set_termios(struct uart_port *port, unsigned int cval = 0; unsigned int baud, minbaud, maxbaud; unsigned long flags; - unsigned int ctrl_reg, mode_reg; + unsigned int ctrl_reg, mode_reg, val; + int err; spin_lock_irqsave(&port->lock, flags); /* Wait for the transmit FIFO to empty before making changes */ if (!(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_TX_DIS)) { - while (!(readl(port->membase + CDNS_UART_SR) & - CDNS_UART_SR_TXEMPTY)) { - cpu_relax(); + err = readl_poll_timeout(port->membase + CDNS_UART_SR, + val, (val & CDNS_UART_SR_TXEMPTY), + 1000, TX_TIMEOUT); + if (err) { + dev_err(port->dev, "timed out waiting for tx empty"); + return; } } From 64ab2bb7f98e2fda9d59efe40b592285981d85d9 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 12 Dec 2019 16:31:10 +0100 Subject: [PATCH 3181/3715] rtc: hym8563: Return -EINVAL if the time is known to be invalid commit f236a2a2ebabad0848ad0995af7ad1dc7029e895 upstream. The current code returns -EPERM when the voltage loss bit is set. Since the bit indicates that the time value is not valid, return -EINVAL instead, which is the appropriate error code for this situation. Fixes: dcaf03849352 ("rtc: add hym8563 rtc-driver") Signed-off-by: Paul Kocialkowski Link: https://lore.kernel.org/r/20191212153111.966923-1-paul.kocialkowski@bootlin.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-hym8563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index e5ad527cb75e..a8c2d38b2411 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -105,7 +105,7 @@ static int hym8563_rtc_read_time(struct device *dev, struct rtc_time *tm) if (!hym8563->valid) { dev_warn(&client->dev, "no valid clock/calendar values available\n"); - return -EPERM; + return -EINVAL; } ret = i2c_smbus_read_i2c_block_data(client, HYM8563_SEC, 7, buf); From 1eaca30af8afce3aa6664a5e0a84012f22c471e8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Jan 2020 15:14:35 +0200 Subject: [PATCH 3182/3715] rtc: cmos: Stop using shared IRQ commit b6da197a2e9670df6f07e6698629e9ce95ab614e upstream. As reported by Guilherme G. Piccoli: ---8<---8<---8<--- The rtc-cmos interrupt setting was changed in the commit 079062b28fb4 ("rtc: cmos: prevent kernel warning on IRQ flags mismatch") in order to allow shared interrupts; according to that commit's description, some machine got kernel warnings due to the interrupt line being shared between rtc-cmos and other hardware, and rtc-cmos didn't allow IRQ sharing that time. After the aforementioned commit though it was observed a huge increase in lost HPET interrupts in some systems, observed through the following kernel message: [...] hpet1: lost 35 rtc interrupts After investigation, it was narrowed down to the shared interrupts usage when having the kernel option "irqpoll" enabled. In this case, all IRQ handlers are called for non-timer interrupts, if such handlers are setup in shared IRQ lines. The rtc-cmos IRQ handler could be set to hpet_rtc_interrupt(), which will produce the kernel "lost interrupts" message after doing work - lots of readl/writel to HPET registers, which are known to be slow. Although "irqpoll" is not a default kernel option, it's used in some contexts, one being the kdump kernel (which is an already "impaired" kernel usually running with 1 CPU available), so the performance burden could be considerable. Also, the same issue would happen (in a shorter extent though) when using "irqfixup" kernel option. In a quick experiment, a virtual machine with uptime of 2 minutes produced >300 calls to hpet_rtc_interrupt() when "irqpoll" was set, whereas without sharing interrupts this number reduced to 1 interrupt. Machines with more hardware than a VM should generate even more unnecessary HPET interrupts in this scenario. ---8<---8<---8<--- After looking into the rtc-cmos driver history and DSDT table from the Microsoft Surface 3, we may notice that Hans de Goede submitted a correct fix (see dependency below). Thus, we simply revert the culprit commit. Fixes: 079062b28fb4 ("rtc: cmos: prevent kernel warning on IRQ flags mismatch") Depends-on: a1e23a42f1bd ("rtc: cmos: Do not assume irq 8 for rtc when there are no legacy irqs") Reported-by: Guilherme G. Piccoli Cc: Hans de Goede Signed-off-by: Andy Shevchenko Tested-by: Guilherme G. Piccoli Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200123131437.28157-1-andriy.shevchenko@linux.intel.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-cmos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 9dca53df3584..5b7c16b85dc0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -806,7 +806,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rtc_cmos_int_handler = cmos_interrupt; retval = request_irq(rtc_irq, rtc_cmos_int_handler, - IRQF_SHARED, dev_name(&cmos_rtc.rtc->dev), + 0, dev_name(&cmos_rtc.rtc->dev), cmos_rtc.rtc); if (retval < 0) { dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); From 18a698a8659dd115c15db8c0ede9319d528ad815 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 14 Jan 2020 17:09:24 +0100 Subject: [PATCH 3183/3715] ARC: [plat-axs10x]: Add missing multicast filter number to GMAC node commit 7980dff398f86a618f502378fa27cf7e77449afa upstream. Add a missing property to GMAC node so that multicast filtering works correctly. Fixes: 556cc1c5f528 ("ARC: [axs101] Add support for AXS101 SDP (software development platform)") Acked-by: Alexey Brodkin Signed-off-by: Jose Abreu Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/boot/dts/axs10x_mb.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index e114000a84f5..d825b9dbae5d 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -70,6 +70,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; From cf2647f146c41995f03b9b1e3800157aee2cf87d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 22 Jan 2020 19:05:20 +0300 Subject: [PATCH 3184/3715] platform/x86: intel_mid_powerbtn: Take a copy of ddata commit 5e0c94d3aeeecc68c573033f08d9678fecf253bd upstream. The driver gets driver_data from memory that is marked as const (which is probably put to read-only memory) and it then modifies it. This likely causes some sort of fault to happen. Fix this by taking a copy of the structure. Fixes: c94a8ff14de3 ("platform/x86: intel_mid_powerbtn: make mid_pb_ddata const") Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_mid_powerbtn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 5ad44204a9c3..10dbd6cac48a 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -158,9 +158,10 @@ static int mid_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - ddata = (struct mid_pb_ddata *)id->driver_data; + ddata = devm_kmemdup(&pdev->dev, (void *)id->driver_data, + sizeof(*ddata), GFP_KERNEL); if (!ddata) - return -ENODATA; + return -ENOMEM; ddata->dev = &pdev->dev; ddata->irq = irq; From bc2807c061a6b73baec206cb716b1c6461a288df Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 10 Jan 2020 18:20:06 +0100 Subject: [PATCH 3185/3715] ARM: dts: at91: sama5d3: fix maximum peripheral clock rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ee0aa926ddb0bd8ba59e33e3803b3b5804e3f5da upstream. Currently the maximum rate for peripheral clock is calculated based on a typical 133MHz MCK. The maximum frequency is defined in the datasheet as a ratio to MCK. Some sama5d3 platforms are using a 166MHz MCK. Update the device trees to match the maximum rate based on 166MHz. Reported-by: Karl Rudbæk Olsen Fixes: d2e8190b7916 ("ARM: at91/dt: define sama5d3 clocks") Link: https://lore.kernel.org/r/20200110172007.1253659-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d3.dtsi | 28 ++++++++++++++-------------- arch/arm/boot/dts/sama5d3_can.dtsi | 4 ++-- arch/arm/boot/dts/sama5d3_uart.dtsi | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 554d0bdedc7a..f96b41ed5b96 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1185,49 +1185,49 @@ usart0_clk: usart0_clk { #clock-cells = <0>; reg = <12>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart1_clk: usart1_clk { #clock-cells = <0>; reg = <13>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart2_clk: usart2_clk { #clock-cells = <0>; reg = <14>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart3_clk: usart3_clk { #clock-cells = <0>; reg = <15>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; twi0_clk: twi0_clk { reg = <18>; #clock-cells = <0>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi1_clk: twi1_clk { #clock-cells = <0>; reg = <19>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi2_clk: twi2_clk { #clock-cells = <0>; reg = <20>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; mci0_clk: mci0_clk { @@ -1243,19 +1243,19 @@ spi0_clk: spi0_clk { #clock-cells = <0>; reg = <24>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; spi1_clk: spi1_clk { #clock-cells = <0>; reg = <25>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; tcb0_clk: tcb0_clk { #clock-cells = <0>; reg = <26>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; pwm_clk: pwm_clk { @@ -1266,7 +1266,7 @@ adc_clk: adc_clk { #clock-cells = <0>; reg = <29>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; dma0_clk: dma0_clk { @@ -1297,13 +1297,13 @@ ssc0_clk: ssc0_clk { #clock-cells = <0>; reg = <38>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; ssc1_clk: ssc1_clk { #clock-cells = <0>; reg = <39>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; sha_clk: sha_clk { diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index c5a3772741bf..0fac79f75c06 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -37,13 +37,13 @@ can0_clk: can0_clk { #clock-cells = <0>; reg = <40>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi index 186377d41c91..48e23d18e5e3 100644 --- a/arch/arm/boot/dts/sama5d3_uart.dtsi +++ b/arch/arm/boot/dts/sama5d3_uart.dtsi @@ -42,13 +42,13 @@ uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart1_clk: uart1_clk { #clock-cells = <0>; reg = <17>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; From 59d6a094087e3bd135cb92da76c345be2f5def1b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 10 Jan 2020 18:20:07 +0100 Subject: [PATCH 3186/3715] ARM: dts: at91: sama5d3: define clock rate range for tcb1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a7e0f3fc01df4b1b7077df777c37feae8c9e8b6d upstream. The clock rate range for the TCB1 clock is missing. define it in the device tree. Reported-by: Karl Rudbæk Olsen Fixes: d2e8190b7916 ("ARM: at91/dt: define sama5d3 clocks") Link: https://lore.kernel.org/r/20200110172007.1253659-2-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d3_tcb1.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi index 801f9745e82f..b80dbc45a3c2 100644 --- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi +++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi @@ -23,6 +23,7 @@ tcb1_clk: tcb1_clk { #clock-cells = <0>; reg = <27>; + atmel,clk-output-range = <0 166000000>; }; }; }; From 4a7a57c810a891ac924462de512f5e9cc81f6aee Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Sat, 14 Dec 2019 00:27:12 +0800 Subject: [PATCH 3187/3715] tools/power/acpi: fix compilation error commit 1985f8c7f9a42a651a9750d6fcadc74336d182df upstream. If we compile tools/acpi target in the top source directory, we'd get a compilation error showing as bellow: # make tools/acpi DESCEND power/acpi DESCEND tools/acpidbg CC tools/acpidbg/acpidbg.o Assembler messages: Fatal error: can't create /home/lzy/kernel-upstream/power/acpi/\ tools/acpidbg/acpidbg.o: No such file or directory ../../Makefile.rules:26: recipe for target '/home/lzy/kernel-upstream/\ power/acpi/tools/acpidbg/acpidbg.o' failed make[3]: *** [/home/lzy/kernel-upstream//power/acpi/tools/acpidbg/\ acpidbg.o] Error 1 Makefile:19: recipe for target 'acpidbg' failed make[2]: *** [acpidbg] Error 2 Makefile:54: recipe for target 'acpi' failed make[1]: *** [acpi] Error 2 Makefile:1607: recipe for target 'tools/acpi' failed make: *** [tools/acpi] Error 2 Fixes: d5a4b1a540b8 ("tools/power/acpi: Remove direct kernel source include reference") Signed-off-by: Zhengyuan Liu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- tools/power/acpi/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index f304be71c278..fc116c060b98 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -18,7 +18,7 @@ include $(srctree)/../../scripts/Makefile.include OUTPUT=$(srctree)/ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/power/acpi/ + OUTPUT := $(O)/tools/power/acpi/ endif #$(info Determined 'OUTPUT' to be $(OUTPUT)) From 197cd72ec3aa5afcf1ab1da61135d7ee2bfb4b49 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 20 Jan 2020 14:10:02 -0800 Subject: [PATCH 3188/3715] powerpc/pseries/vio: Fix iommu_table use-after-free refcount warning commit aff8c8242bc638ba57247ae1ec5f272ac3ed3b92 upstream. Commit e5afdf9dd515 ("powerpc/vfio_spapr_tce: Add reference counting to iommu_table") missed an iommu_table allocation in the pseries vio code. The iommu_table is allocated with kzalloc and as a result the associated kref gets a value of zero. This has the side effect that during a DLPAR remove of the associated virtual IOA the iommu_tce_table_put() triggers a use-after-free underflow warning. Call Trace: [c0000002879e39f0] [c00000000071ecb4] refcount_warn_saturate+0x184/0x190 (unreliable) [c0000002879e3a50] [c0000000000500ac] iommu_tce_table_put+0x9c/0xb0 [c0000002879e3a70] [c0000000000f54e4] vio_dev_release+0x34/0x70 [c0000002879e3aa0] [c00000000087cfa4] device_release+0x54/0xf0 [c0000002879e3b10] [c000000000d64c84] kobject_cleanup+0xa4/0x240 [c0000002879e3b90] [c00000000087d358] put_device+0x28/0x40 [c0000002879e3bb0] [c0000000007a328c] dlpar_remove_slot+0x15c/0x250 [c0000002879e3c50] [c0000000007a348c] remove_slot_store+0xac/0xf0 [c0000002879e3cd0] [c000000000d64220] kobj_attr_store+0x30/0x60 [c0000002879e3cf0] [c0000000004ff13c] sysfs_kf_write+0x6c/0xa0 [c0000002879e3d10] [c0000000004fde4c] kernfs_fop_write+0x18c/0x260 [c0000002879e3d60] [c000000000410f3c] __vfs_write+0x3c/0x70 [c0000002879e3d80] [c000000000415408] vfs_write+0xc8/0x250 [c0000002879e3dd0] [c0000000004157dc] ksys_write+0x7c/0x120 [c0000002879e3e20] [c00000000000b278] system_call+0x5c/0x68 Further, since the refcount was always zero the iommu_tce_table_put() fails to call the iommu_table release function resulting in a leak. Fix this issue be initilizing the iommu_table kref immediately after allocation. Fixes: e5afdf9dd515 ("powerpc/vfio_spapr_tce: Add reference counting to iommu_table") Signed-off-by: Tyrel Datwyler Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1579558202-26052-1-git-send-email-tyreld@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/vio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index d86938260a86..fc778865a412 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1195,6 +1195,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); From cef509c55685401ab07f08276df15d287f280815 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 16 Dec 2019 15:19:22 +1100 Subject: [PATCH 3189/3715] powerpc/pseries: Allow not having ibm, hypertas-functions::hcall-multi-tce for DDW commit 7559d3d295f3365ea7ac0c0274c05e633fe4f594 upstream. By default a pseries guest supports a H_PUT_TCE hypercall which maps a single IOMMU page in a DMA window. Additionally the hypervisor may support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once; this is advertised via the device tree /rtas/ibm,hypertas-functions property which Linux converts to FW_FEATURE_MULTITCE. FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however the code managing the huge DMA window (DDW) ignores it and calls H_PUT_TCE_INDIRECT even if it is explicitly disabled via the "multitce=off" kernel command line parameter. This adds FW_FEATURE_MULTITCE checking to the DDW code path. This changes tce_build_pSeriesLP to take liobn and page size as the huge window does not have iommu_table descriptor which usually the place to store these numbers. Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Thiago Jung Bauermann Tested-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191216041924.42318-3-aik@ozlabs.ru Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/iommu.c | 43 +++++++++++++++++--------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 7c181467d0ad..0e4e22dfa6b5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -168,10 +168,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -182,25 +182,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -229,7 +229,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, unsigned long flags; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -245,8 +246,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -297,16 +299,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -321,7 +323,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -436,6 +438,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); From d73066c047a0cf2e4286023320512d059ac2b405 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 13 Dec 2019 10:42:37 +0100 Subject: [PATCH 3190/3715] KVM: arm/arm64: vgic-its: Fix restoration of unmapped collections commit 8c58be34494b7f1b2adb446e2d8beeb90e5de65b upstream. Saving/restoring an unmapped collection is a valid scenario. For example this happens if a MAPTI command was sent, featuring an unmapped collection. At the moment the CTE fails to be restored. Only compare against the number of online vcpus if the rdist base is set. Fixes: ea1ad53e1e31a ("KVM: arm64: vgic-its: Collection table save/restore") Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20191213094237.19627-1-eric.auger@redhat.com Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-its.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 526d808ecbbd..8354ec4ef912 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -2210,7 +2210,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); From c3f4c5ecf759b55bbabc17a2ff784fcf06ebfb23 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 18 Dec 2019 01:18:49 +0100 Subject: [PATCH 3191/3715] ARM: 8949/1: mm: mark free_memmap as __init commit 31f3010e60522ede237fb145a63b4af5a41718c2 upstream. As of commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly"), free_memmap() might not always be inlined, and thus is triggering a section warning: WARNING: vmlinux.o(.text.unlikely+0x904): Section mismatch in reference from the function free_memmap() to the function .meminit.text:memblock_free() Mark it as __init, since the faller (free_unused_memmap) already is. Fixes: ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") Signed-off-by: Olof Johansson Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 27a40101dd3a..fd26b5c92b44 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -356,7 +356,7 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static inline void +static inline void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; From 1e26078c29edb7bdf7d75ddc52d34912b805791e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 13 Jan 2020 23:30:19 +0000 Subject: [PATCH 3192/3715] arm64: cpufeature: Fix the type of no FP/SIMD capability commit 449443c03d8cfdacf7313e17779a2594ebf87e6d upstream. The NO_FPSIMD capability is defined with scope SYSTEM, which implies that the "absence" of FP/SIMD on at least one CPU is detected only after all the SMP CPUs are brought up. However, we use the status of this capability for every context switch. So, let us change the scope to LOCAL_CPU to allow the detection of this capability as and when the first CPU without FP is brought up. Also, the current type allows hotplugged CPU to be brought up without FP/SIMD when all the current CPUs have FP/SIMD and we have the userspace up. Fix both of these issues by changing the capability to BOOT_RESTRICTED_LOCAL_CPU_FEATURE. Fixes: 82e0191a1aa11abf ("arm64: Support systems without FP/ASIMD") Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 09c6499bc500..c477fd34a912 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1103,7 +1103,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, From 4b99ae6d648416af377f5ac389619f12e555b362 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 21 Jan 2020 16:56:59 +1100 Subject: [PATCH 3193/3715] KVM: arm/arm64: Fix young bit from mmu notifier commit cf2d23e0bac9f6b5cd1cba8898f5f05ead40e530 upstream. kvm_test_age_hva() is called upon mmu_notifier_test_young(), but wrong address range has been passed to handle_hva_to_gpa(). With the wrong address range, no young bits will be checked in handle_hva_to_gpa(). It means zero is always returned from mmu_notifier_test_young(). This fixes the issue by passing correct address range to the underly function handle_hva_to_gpa(), so that the hardware young (access) bit will be visited. Fixes: 35307b9a5f7e ("arm/arm64: KVM: Implement Stage-2 page aging") Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200121055659.19560-1-gshan@redhat.com Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 9f69202d8e49..3814cdad643a 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1736,7 +1736,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) From bcf27e218594685647c5c46a11d5f2b80fdfded1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:32 -0600 Subject: [PATCH 3194/3715] crypto: artpec6 - return correct error code for failed setkey() commit b828f905904cd76424230c69741a4cabb0174168 upstream. ->setkey() is supposed to retun -EINVAL for invalid key lengths, not -1. Fixes: a21eb94fc4d3 ("crypto: axis - add ARTPEC-6/7 crypto accelerator driver") Cc: Jesper Nilsson Cc: Lars Persson Signed-off-by: Eric Biggers Acked-by: Lars Persson Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/axis/artpec6_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 9f82e14983f6..a886245b931e 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -1256,7 +1256,7 @@ static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key, if (len != 16 && len != 24 && len != 32) { crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -1; + return -EINVAL; } ctx->key_length = len; From 0b751f35f93eed9515f27053268c28782a671059 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:33 -0600 Subject: [PATCH 3195/3715] crypto: atmel-sha - fix error handling when setting hmac key commit b529f1983b2dcc46354f311feda92e07b6e9e2da upstream. HMAC keys can be of any length, and atmel_sha_hmac_key_set() can only fail due to -ENOMEM. But atmel_sha_hmac_setkey() incorrectly treated any error as a "bad key length" error. Fix it to correctly propagate the -ENOMEM error code and not set any tfm result flags. Fixes: 81d8750b2b59 ("crypto: atmel-sha - add support to hmac(shaX)") Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Signed-off-by: Eric Biggers Reviewed-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/atmel-sha.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 3e2f41b3eaf3..15e68774034a 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1921,12 +1921,7 @@ static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, { struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); - if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) { - crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return 0; + return atmel_sha_hmac_key_set(&hmac->hkey, key, keylen); } static int atmel_sha_hmac_init(struct ahash_request *req) From 079c03b754b9264192b82ff6f88a49991eb50581 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Oct 2019 15:25:22 +0200 Subject: [PATCH 3196/3715] media: i2c: adv748x: Fix unsafe macros commit 0d962e061abcf1b9105f88fb850158b5887fbca3 upstream. Enclose multiple macro parameters in parentheses in order to make such macros safer and fix the Clang warning below: drivers/media/i2c/adv748x/adv748x-afe.c:452:12: warning: operator '?:' has lower precedence than '|'; '|' will be evaluated first [-Wbitwise-conditional-parentheses] ret = sdp_clrset(state, ADV748X_SDP_FRP, ADV748X_SDP_FRP_MASK, enable ? ctrl->val - 1 : 0); Fixes: 3e89586a64df ("media: i2c: adv748x: add adv748x driver") Reported-by: Dmitry Vyukov Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kieran Bingham Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/adv748x/adv748x.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/i2c/adv748x/adv748x.h b/drivers/media/i2c/adv748x/adv748x.h index 296c5f8a8c63..1991c22be51a 100644 --- a/drivers/media/i2c/adv748x/adv748x.h +++ b/drivers/media/i2c/adv748x/adv748x.h @@ -372,10 +372,10 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define io_read(s, r) adv748x_read(s, ADV748X_PAGE_IO, r) #define io_write(s, r, v) adv748x_write(s, ADV748X_PAGE_IO, r, v) -#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~m) | v) +#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~(m)) | (v)) #define hdmi_read(s, r) adv748x_read(s, ADV748X_PAGE_HDMI, r) -#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, r+1)) & m) +#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, (r)+1)) & (m)) #define hdmi_write(s, r, v) adv748x_write(s, ADV748X_PAGE_HDMI, r, v) #define repeater_read(s, r) adv748x_read(s, ADV748X_PAGE_REPEATER, r) @@ -383,11 +383,11 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define sdp_read(s, r) adv748x_read(s, ADV748X_PAGE_SDP, r) #define sdp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_SDP, r, v) -#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~m) | v) +#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~(m)) | (v)) #define cp_read(s, r) adv748x_read(s, ADV748X_PAGE_CP, r) #define cp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_CP, r, v) -#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~m) | v) +#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~(m)) | (v)) #define txa_read(s, r) adv748x_read(s, ADV748X_PAGE_TXA, r) #define txb_read(s, r) adv748x_read(s, ADV748X_PAGE_TXB, r) From 8b360321449e805bb8f9833b7e249c8cefacfb29 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Dec 2019 20:48:07 +0100 Subject: [PATCH 3197/3715] pinctrl: sh-pfc: r8a7778: Fix duplicate SDSELF_B and SD1_CLK_B commit 805f635703b2562b5ddd822c62fc9124087e5dd5 upstream. The FN_SDSELF_B and FN_SD1_CLK_B enum IDs are used twice, which means one set of users must be wrong. Replace them by the correct enum IDs. Fixes: 87f8c988636db0d4 ("sh-pfc: Add r8a7778 pinmux support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191218194812.12741-2-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/pfc-r8a7778.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c index c3af9ebee4af..28c0405ba396 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c @@ -2325,7 +2325,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_ATAG0_A, 0, FN_REMOCON_B, 0, /* IP0_11_8 [4] */ FN_SD1_DAT2_A, FN_MMC_D2, 0, FN_BS, - FN_ATADIR0_A, 0, FN_SDSELF_B, 0, + FN_ATADIR0_A, 0, FN_SDSELF_A, 0, FN_PWM4_B, 0, 0, 0, 0, 0, 0, 0, /* IP0_7_5 [3] */ @@ -2367,7 +2367,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_TS_SDAT0_A, 0, 0, 0, 0, 0, 0, 0, /* IP1_10_8 [3] */ - FN_SD1_CLK_B, FN_MMC_D6, 0, FN_A24, + FN_SD1_CD_A, FN_MMC_D6, 0, FN_A24, FN_DREQ1_A, 0, FN_HRX0_B, FN_TS_SPSYNC0_A, /* IP1_7_5 [3] */ FN_A23, FN_HTX0_B, FN_TX2_B, FN_DACK2_A, From f2cfc606d03c00558393d8688b64b33bcb1de710 Mon Sep 17 00:00:00 2001 From: Anand Lodnoor Date: Tue, 14 Jan 2020 16:51:19 +0530 Subject: [PATCH 3198/3715] scsi: megaraid_sas: Do not initiate OCR if controller is not in ready state commit 6d7537270e3283b92f9b327da9d58a4de40fe8d0 upstream. Driver initiates OCR if a DCMD command times out. But there is a deadlock if the driver attempts to invoke another OCR before the mutex lock (reset_mutex) is released from the previous session of OCR. This patch takes care of the above scenario using new flag MEGASAS_FUSION_OCR_NOT_POSSIBLE to indicate if OCR is possible. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1579000882-20246-9-git-send-email-anand.lodnoor@broadcom.com Signed-off-by: Shivasharan S Signed-off-by: Anand Lodnoor Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 ++- drivers/scsi/megaraid/megaraid_sas_fusion.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 6abad63b127a..42d876034741 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -4109,7 +4109,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) { if (instance->adapter_type == MFI_SERIES) return KILL_ADAPTER; else if (instance->unload || - test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) + test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, + &instance->reset_flags)) return IGNORE_TIMEOUT; else return INITIATE_OCR; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 7be2b9e11332..b13721290f4b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4212,6 +4212,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); instance->instancet->disable_intr(instance); megasas_sync_irqs((unsigned long)instance); @@ -4399,7 +4400,7 @@ fail_kill_adapter: atomic_set(&instance->adprecovery, MEGASAS_HBA_OPERATIONAL); } out: - clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); mutex_unlock(&instance->reset_mutex); return retval; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index 7c1f7ccf031d..40724df20780 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -100,6 +100,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { #define MEGASAS_FP_CMD_LEN 16 #define MEGASAS_FUSION_IN_RESET 0 +#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1 #define THRESHOLD_REPLY_COUNT 50 #define RAID_1_PEER_CMDS 2 #define JBOD_MAPS_COUNT 2 From ad6026d244ef0a1fa4823efed0abcfad11a06849 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 27 Jan 2020 14:07:23 -0500 Subject: [PATCH 3199/3715] dm: fix potential for q->make_request_fn NULL pointer commit 47ace7e012b9f7ad71d43ac9063d335ea3d6820b upstream. Move blk_queue_make_request() to dm.c:alloc_dev() so that q->make_request_fn is never NULL during the lifetime of a DM device (even one that is created without a DM table). Otherwise generic_make_request() will crash simply by doing: dmsetup create -n test mount /dev/dm-N /mnt While at it, move ->congested_data initialization out of dm.c:alloc_dev() and into the bio-based specific init method. Reported-by: Stefan Bader BugLink: https://bugs.launchpad.net/bugs/1860231 Fixes: ff36ab34583a ("dm: remove request-based logic from make_request_fn wrapper") Depends-on: c12c9a3c3860c ("dm: various cleanups to md->queue initialization code") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer [smb: adjusted for context and dm_init_md_queue() exitsting in older kernels] Signed-off-by: Stefan Bader Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a56008b2e7c2..02ba6849f89d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1647,7 +1647,6 @@ void dm_init_md_queue(struct mapped_device *md) * - must do so here (in alloc_dev callchain) before queue is used */ md->queue->queuedata = md; - md->queue->backing_dev_info->congested_data = md; } void dm_init_normal_md_queue(struct mapped_device *md) @@ -1658,6 +1657,7 @@ void dm_init_normal_md_queue(struct mapped_device *md) /* * Initialize aspects of queue that aren't relevant for blk-mq */ + md->queue->backing_dev_info->congested_data = md; md->queue->backing_dev_info->congested_fn = dm_any_congested; } @@ -1750,6 +1750,12 @@ static struct mapped_device *alloc_dev(int minor) goto bad; dm_init_md_queue(md); + /* + * default to bio-based required ->make_request_fn until DM + * table is loaded and md->type established. If request-based + * table is loaded: blk-mq will override accordingly. + */ + blk_queue_make_request(md->queue, dm_make_request); md->disk = alloc_disk_node(1, numa_node_id); if (!md->disk) @@ -2055,7 +2061,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request); /* * DM handles splitting bios as needed. Free the bio_split bioset * since it won't be used (saves 1 process per bio-based DM device). From 9b95c9d93dace8566865758e3c64d21a84a9d06c Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Fri, 24 May 2019 14:41:28 +0530 Subject: [PATCH 3200/3715] serial: uartps: Move the spinlock after the read of the tx empty commit 107475685abfdee504bb0ef4824f15797f6d2d4d upstream. Currently we are doing a read of the status register. Move the spinlock after that as the reads need not be spinlock protected. This patch prevents relaxing the cpu with spinlock held. Signed-off-by: Shubhrajyoti Datta Cc: Pavel Machek Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 9826a15bfd87..81657f09761c 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -690,8 +690,6 @@ static void cdns_uart_set_termios(struct uart_port *port, unsigned int ctrl_reg, mode_reg, val; int err; - spin_lock_irqsave(&port->lock, flags); - /* Wait for the transmit FIFO to empty before making changes */ if (!(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_TX_DIS)) { @@ -703,6 +701,7 @@ static void cdns_uart_set_termios(struct uart_port *port, return; } } + spin_lock_irqsave(&port->lock, flags); /* Disable the TX and RX to set baud rate */ ctrl_reg = readl(port->membase + CDNS_UART_CR); From 49e9f1057ef968af57a62860697898cb35246936 Mon Sep 17 00:00:00 2001 From: Qing Xu Date: Thu, 2 Jan 2020 10:39:26 +0800 Subject: [PATCH 3201/3715] mwifiex: Fix possible buffer overflows in mwifiex_ret_wmm_get_status() [ Upstream commit 3a9b153c5591548612c3955c9600a98150c81875 ] mwifiex_ret_wmm_get_status() calls memcpy() without checking the destination size.Since the source is given from remote AP which contains illegal wmm elements , this may trigger a heap buffer overflow. Fix it by putting the length check before calling memcpy(). Signed-off-by: Qing Xu Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/wmm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index 7fba4d940131..a13b05ec8fc0 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -976,6 +976,10 @@ int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv, "WMM Parameter Set Count: %d\n", wmm_param_ie->qos_info_bitmap & mask); + if (wmm_param_ie->vend_hdr.len + 2 > + sizeof(struct ieee_types_wmm_parameter)) + break; + memcpy((u8 *) &priv->curr_bss_params.bss_descriptor. wmm_ie, wmm_param_ie, wmm_param_ie->vend_hdr.len + 2); From 43e189049f32740c0d015f43a407658ac53d1743 Mon Sep 17 00:00:00 2001 From: Qing Xu Date: Thu, 2 Jan 2020 10:39:27 +0800 Subject: [PATCH 3202/3715] mwifiex: Fix possible buffer overflows in mwifiex_cmd_append_vsie_tlv() [ Upstream commit b70261a288ea4d2f4ac7cd04be08a9f0f2de4f4d ] mwifiex_cmd_append_vsie_tlv() calls memcpy() without checking the destination size may trigger a buffer overflower, which a local user could use to cause denial of service or the execution of arbitrary code. Fix it by putting the length check before calling memcpy(). Signed-off-by: Qing Xu Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/scan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index c013c94fbf15..0071c40afe81 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2890,6 +2890,13 @@ mwifiex_cmd_append_vsie_tlv(struct mwifiex_private *priv, vs_param_set->header.len = cpu_to_le16((((u16) priv->vs_ie[id].ie[1]) & 0x00FF) + 2); + if (le16_to_cpu(vs_param_set->header.len) > + MWIFIEX_MAX_VSIE_LEN) { + mwifiex_dbg(priv->adapter, ERROR, + "Invalid param length!\n"); + break; + } + memcpy(vs_param_set->ie, priv->vs_ie[id].ie, le16_to_cpu(vs_param_set->header.len)); *buffer += le16_to_cpu(vs_param_set->header.len) + From 783c96281bce4b38bb51e8c24b27b5dc4fdb3a51 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 14 Jan 2020 11:39:02 +0100 Subject: [PATCH 3203/3715] libertas: don't exit from lbs_ibss_join_existing() with RCU read lock held [ Upstream commit c7bf1fb7ddca331780b9a733ae308737b39f1ad4 ] Commit e5e884b42639 ("libertas: Fix two buffer overflows at parsing bss descriptor") introduced a bounds check on the number of supplied rates to lbs_ibss_join_existing(). Unfortunately, it introduced a return path from within a RCU read side critical section without a corresponding rcu_read_unlock(). Fix this. Fixes: e5e884b42639 ("libertas: Fix two buffer overflows at parsing bss descriptor") Signed-off-by: Nicolai Stange Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/cfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 4ffc188d2ffd..a2874f111d12 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1788,6 +1788,7 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, rates_max = rates_eid[1]; if (rates_max > MAX_RATES) { lbs_deb_join("invalid rates"); + rcu_read_unlock(); goto out; } rates = cmd.bss.rates; From a5efc7dd242eb1086de3b02077e29657e4577e76 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 14 Jan 2020 11:39:03 +0100 Subject: [PATCH 3204/3715] libertas: make lbs_ibss_join_existing() return error code on rates overflow [ Upstream commit 1754c4f60aaf1e17d886afefee97e94d7f27b4cb ] Commit e5e884b42639 ("libertas: Fix two buffer overflows at parsing bss descriptor") introduced a bounds check on the number of supplied rates to lbs_ibss_join_existing() and made it to return on overflow. However, the aforementioned commit doesn't set the return value accordingly and thus, lbs_ibss_join_existing() would return with zero even though it failed. Make lbs_ibss_join_existing return -EINVAL in case the bounds check on the number of supplied rates fails. Fixes: e5e884b42639 ("libertas: Fix two buffer overflows at parsing bss descriptor") Signed-off-by: Nicolai Stange Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/cfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index a2874f111d12..fbeb12018c3d 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1789,6 +1789,7 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, if (rates_max > MAX_RATES) { lbs_deb_join("invalid rates"); rcu_read_unlock(); + ret = -EINVAL; goto out; } rates = cmd.bss.rates; From 98db2bf27b9ed2d5ed0b6c9c8a4bfcb127a19796 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Feb 2020 16:32:24 -0500 Subject: [PATCH 3205/3715] Linux 4.14.171 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b614291199f8..f2657f4838db 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 170 +SUBLEVEL = 171 EXTRAVERSION = NAME = Petit Gorille From b0b638bda1abcbfc3f4878d600c56d88baa466eb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Feb 2020 22:39:01 +0100 Subject: [PATCH 3206/3715] ANDROID: Add INIT_STACK_ALL to the list of Clang-specific options android-4.14 is unable to check for compiler features at config time, so we must explicitly list all Clang configs that have to be disabled in GCC builds. Bug: 144999193 Test: build cuttlefish_defconfig with GCC for ARM64 Change-Id: Ideaedb77e0384fa5c72b71d30de584a748221648 Signed-off-by: Alexander Potapenko --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 10d4ec15087b..d380fee4b6b8 100644 --- a/Makefile +++ b/Makefile @@ -1228,7 +1228,7 @@ endif endif # Disable clang-specific config options when using a different compiler -clang-specific-configs := LTO_CLANG CFI_CLANG SHADOW_CALL_STACK +clang-specific-configs := LTO_CLANG CFI_CLANG SHADOW_CALL_STACK INIT_STACK_ALL PHONY += check-clang-specific-options check-clang-specific-options: $(KCONFIG_CONFIG) FORCE From fe82848d9c1c887d2a84d3738c13e644d01b6d6f Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 18 Feb 2020 15:33:50 +0900 Subject: [PATCH 3207/3715] ANDROID: net: bpf: Allow TC programs to call BPF_FUNC_skb_change_head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables ebpf tethering offload from a cellular interface with no L2 mac header to a wifi/ethernet/usb interface with one. Will pursue upstreaming this along with further mtu related fixups. Test: builds, real testing with identical patch on a 4.14 flame device Bug: 149724482 Signed-off-by: Lorenzo Colitti Signed-off-by: Maciej Żenczykowski Change-Id: Ic182320bf1abb248d0f86fa2973c60208710f664 --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index a3646230fbee..a00955173328 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3260,6 +3260,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: From 31469fab250f7128720efac4c559ca63ecc034ee Mon Sep 17 00:00:00 2001 From: Roman Kiryanov Date: Fri, 7 Feb 2020 17:06:13 -0800 Subject: [PATCH 3208/3715] ANDROID: virtio: virtio_input: pass _DIRECT only if the device advertises _DIRECT INPUT_MT_DIRECT should be used only if the device advertises INPUT_PROP_DIRECT. Bug: 147493341 Bug: 149250379 Test: boot emulator with 5.4 kernel, check if touchscreen works Signed-off-by: Roman Kiryanov Change-Id: Ic06ae2b9d0ab7c77cb8829e5392fd048139500a2 --- drivers/virtio/virtio_input.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 89e14b01959b..0073d078e50e 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -164,12 +164,15 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs) virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); input_abs_set_res(vi->idev, abs, re); - if (abs == ABS_MT_TRACKING_ID) + if (abs == ABS_MT_TRACKING_ID) { + unsigned int slot_flags = + test_bit(INPUT_PROP_DIRECT, vi->idev->propbit) ? + INPUT_MT_DIRECT : 0; + input_mt_init_slots(vi->idev, ma, /* input max finger */ - INPUT_MT_DIRECT - | INPUT_MT_DROP_UNUSED - | INPUT_MT_TRACK); + slot_flags); + } } static int virtinput_init_vqs(struct virtio_input *vi) From 027b02d66529bebbeccec4d5e23e3b680a54a8d7 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Tue, 18 Feb 2020 22:55:21 -0800 Subject: [PATCH 3209/3715] ANDROID: cuttlefish_defconfig: Enable CONFIG_NET_NS Bug: 144061615 Bug: 147493341 Change-Id: I6392a7ee9c8ab26ca3e033889d8311661e6ec4aa Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 3 +++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 8ed22333c2c3..c1b2fc17bdbb 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -18,6 +18,9 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set CONFIG_SCHED_AUTOGROUP=y CONFIG_SCHED_TUNE=y CONFIG_DEFAULT_USE_ENERGY_AWARE=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 64976b1fbc4d..b18d574c8143 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -23,6 +23,8 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set CONFIG_SCHED_TUNE=y CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZ4 is not set From 4c08606a4abe409c5a217f60478c85764460f12c Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Tue, 18 Feb 2020 21:51:08 -0800 Subject: [PATCH 3210/3715] ANDROID: clang: update to 10.0.4 Bug: 149785865 Change-Id: I299263190154da05d8b8db8da1beea3ddb764d8c Signed-off-by: Alistair Delva --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index d4754486cd82..be3afbc1c6b5 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KERNEL_DIR=common CC=clang LD=ld.lld -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782b/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' From 23b0338e20577fac9aeef7d4f05f44081b37ade5 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Feb 2020 23:08:43 +0100 Subject: [PATCH 3211/3715] ANDROID: test_stackinit: work around LLVM PR44916 Temporarily move the local outside of switch statement to work around https://bugs.llvm.org/show_bug.cgi?id=44916 and unbreak the ClangBuiltLinux project Travis build. This patch has been tested according to the instructions at https://github.com/ClangBuiltLinux/continuous-integration Bug: 149581678 Test: ./driver.sh REPO=android-4.14 Change-Id: Iea85ee09d8f0e6d698b5f23a02fe23a6002ad5e7 Signed-off-by: Alexander Potapenko --- lib/test_stackinit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 7c5f9fb49e58..b7e586d559ee 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -282,8 +282,9 @@ DEFINE_TEST(user, struct test_user, STRUCT, none); */ static int noinline __leaf_switch_none(int path, bool fill) { + uint64_t var; + switch (path) { - uint64_t var; case 1: target_start = &var; From ee1d24d6af5e2cea35022a8f95371974c175415b Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 13 Feb 2020 08:09:57 -0800 Subject: [PATCH 3212/3715] ANDROID: Incremental fs: Support xattrs To make selinux work, add xattr support. This is a bit clunky - it seems like it would be better for the log and pending read functionality to be ioctls rather than this mixture of real and virtual files. Bug: 133435829 Change-Id: I56579fabe2ae7efb88f0344553948dc9573299aa Signed-off-by: Paul Lawrence --- fs/incfs/data_mgmt.c | 2 ++ fs/incfs/data_mgmt.h | 6 ++++ fs/incfs/vfs.c | 78 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 4698f14bbdf7..90bf9e37d236 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -68,6 +68,8 @@ void incfs_free_mount_info(struct mount_info *mi) mutex_destroy(&mi->mi_pending_reads_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); + kfree(mi->log_xattr); + kfree(mi->pending_read_xattr); kfree(mi); } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 6722cef1608c..8b62b0348d51 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -131,6 +131,12 @@ struct mount_info { /* Temporary buffer for read logger. */ struct read_log mi_log; + + void *log_xattr; + size_t log_xattr_size; + + void *pending_read_xattr; + size_t pending_read_xattr_size; }; struct data_file_block { diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index aebd2b02bd83..22edaeeaa613 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -75,6 +75,8 @@ static void evict_inode(struct inode *inode); static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size); +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags); static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); static int show_options(struct seq_file *, struct dentry *); @@ -169,9 +171,18 @@ static int incfs_handler_getxattr(const struct xattr_handler *xh, return incfs_getxattr(d, name, buffer, size); } +static int incfs_handler_setxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) +{ + return incfs_setxattr(d, name, buffer, size, flags); +} + static const struct xattr_handler incfs_xattr_handler = { .prefix = "", /* AKA all attributes */ .get = incfs_handler_getxattr, + .set = incfs_handler_setxattr, }; static const struct xattr_handler *incfs_xattr_ops[] = { @@ -2048,11 +2059,74 @@ static ssize_t incfs_getxattr(struct dentry *d, const char *name, void *value, size_t size) { struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + char *stored_value; + size_t stored_size; - if (!di || !di->backing_path.dentry) + if (di && di->backing_path.dentry) + return vfs_getxattr(di->backing_path.dentry, name, value, size); + + if (strcmp(name, "security.selinux")) return -ENODATA; - return vfs_getxattr(di->backing_path.dentry, name, value, size); + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = mi->pending_read_xattr; + stored_size = mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = mi->log_xattr; + stored_size = mi->log_xattr_size; + } else { + return -ENODATA; + } + + if (!stored_value) + return -ENODATA; + + if (stored_size > size) + return -E2BIG; + + memcpy(value, stored_value, stored_size); + return stored_size; + +} + + +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags) +{ + struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + void **stored_value; + size_t *stored_size; + + if (di && di->backing_path.dentry) + return vfs_setxattr(di->backing_path.dentry, name, value, size, + flags); + + if (strcmp(name, "security.selinux")) + return -ENODATA; + + if (size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = &mi->pending_read_xattr; + stored_size = &mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = &mi->log_xattr; + stored_size = &mi->log_xattr_size; + } else { + return -ENODATA; + } + + kfree (*stored_value); + *stored_value = kzalloc(size, GFP_NOFS); + if (!*stored_value) + return -ENOMEM; + + memcpy(*stored_value, value, size); + *stored_size = size; + return 0; } static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) From 8d97219e60d4b11b9592b178627a557a0683a841 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 13 Feb 2020 15:08:24 -0800 Subject: [PATCH 3213/3715] ANDROID: ufs, block: fix crypto power management and move into block layer The call to pm_runtime_get_sync() in ufshcd_program_key() can deadlock because it waits for the UFS controller to be resumed, but it can itself be reached while resuming the UFS controller via: - ufshcd_runtime_resume() - ufshcd_resume() - ufshcd_reset_and_restore() - ufshcd_host_reset_and_restore() - ufshcd_hba_enable() - ufshcd_hba_execute_hce() - ufshcd_hba_start() - ufshcd_crypto_enable() - keyslot_manager_reprogram_all_keys() - ufshcd_crypto_keyslot_program() - ufshcd_program_key() But pm_runtime_get_sync() *is* needed when evicting a key. Also, on pre-4.20 kernels it's needed when programming a keyslot for a bio since the block layer used to resume the device in a different place. Thus, it's hard for drivers to know what to do in .keyslot_program() and .keyslot_evict(). In old kernels it may even be impossible unless we were to pass more information down from the keyslot_manager. There's also another possible deadlock: keyslot programming and eviction take ksm->lock for write and then resume the device, which may result in ksm->lock being taken again via the above call stack. To fix this, we should resume the device before taking ksm->lock. Fix these problems by moving to a better design where the block layer (namely, the keyslot manager) handles runtime power management instead of drivers. This is analogous to the block layer's existing runtime power management support (blk-pm), which handles resuming devices when bios are submitted to them so that drivers don't need to handle it. Test: Tested on coral with: echo 5 > /sys/bus/platform/devices/1d84000.ufshc/rpm_lvl sleep 30 touch /data && sync # hangs before this fix Also verified via kvm-xfstests that blk-crypto-fallback continues to work both with and without CONFIG_PM=y. Bug: 137270441 Bug: 149368295 Change-Id: I6bc9fb81854afe7edf490d71796ee68a61f7cbc8 Signed-off-by: Eric Biggers --- block/blk-crypto-fallback.c | 2 +- block/keyslot-manager.c | 90 +++++++++++++++++++++++++++----- drivers/md/dm.c | 3 +- drivers/scsi/ufs/ufshcd-crypto.c | 6 +-- include/linux/keyslot-manager.h | 5 +- 5 files changed, 87 insertions(+), 19 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index cce3317cba80..b8e9ae1c1d5b 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -608,7 +608,7 @@ int __init blk_crypto_fallback_init(void) crypto_mode_supported[i] = 0xFFFFFFFF; crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; - blk_crypto_ksm = keyslot_manager_create(blk_crypto_num_keyslots, + blk_crypto_ksm = keyslot_manager_create(NULL, blk_crypto_num_keyslots, &blk_crypto_ksm_ll_ops, crypto_mode_supported, NULL); if (!blk_crypto_ksm) diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 7e42813c9de0..0b6dd460645e 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,11 @@ struct keyslot_manager { unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; void *ll_priv_data; +#ifdef CONFIG_PM + /* Device for runtime power management (NULL if none) */ + struct device *dev; +#endif + /* Protects programming and evicting keys from the device */ struct rw_semaphore lock; @@ -72,8 +78,60 @@ static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) return ksm->num_slots == 0; } +#ifdef CONFIG_PM +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ + ksm->dev = dev; +} + +/* If there's an underlying device and it's suspended, resume it. */ +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_get_sync(ksm->dev); +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_put_sync(ksm->dev); +} +#else /* CONFIG_PM */ +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ +} + +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ +} +#endif /* !CONFIG_PM */ + +static inline void keyslot_manager_hw_enter(struct keyslot_manager *ksm) +{ + /* + * Calling into the driver requires ksm->lock held and the device + * resumed. But we must resume the device first, since that can acquire + * and release ksm->lock via keyslot_manager_reprogram_all_keys(). + */ + keyslot_manager_pm_get(ksm); + down_write(&ksm->lock); +} + +static inline void keyslot_manager_hw_exit(struct keyslot_manager *ksm) +{ + up_write(&ksm->lock); + keyslot_manager_pm_put(ksm); +} + /** * keyslot_manager_create() - Create a keyslot manager + * @dev: Device for runtime power management (NULL if none) * @num_slots: The number of key slots to manage. * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot * manager will use to perform operations like programming and @@ -93,7 +151,9 @@ static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) * Context: May sleep * Return: Pointer to constructed keyslot manager or NULL on error. */ -struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) @@ -119,6 +179,7 @@ struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); init_rwsem(&ksm->lock); @@ -227,10 +288,10 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, return slot; for (;;) { - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); slot = find_and_grab_keyslot(ksm, key); if (slot != -ENOKEY) { - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return slot; } @@ -241,7 +302,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, if (!list_empty(&ksm->idle_slots)) break; - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); wait_event(ksm->idle_slots_wait_queue, !list_empty(&ksm->idle_slots)); } @@ -253,7 +314,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); if (err) { wake_up(&ksm->idle_slots_wait_queue); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } @@ -267,7 +328,7 @@ int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, remove_slot_from_lru_list(ksm, slot); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return slot; } @@ -369,15 +430,16 @@ int keyslot_manager_evict_key(struct keyslot_manager *ksm, if (keyslot_manager_is_passthrough(ksm)) { if (ksm->ksm_ll_ops.keyslot_evict) { - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } return 0; } - down_write(&ksm->lock); + keyslot_manager_hw_enter(ksm); + slot = find_keyslot(ksm, key); if (slot < 0) { err = slot; @@ -397,7 +459,7 @@ int keyslot_manager_evict_key(struct keyslot_manager *ksm, memzero_explicit(&slotp->key, sizeof(slotp->key)); err = 0; out_unlock: - up_write(&ksm->lock); + keyslot_manager_hw_exit(ksm); return err; } @@ -417,6 +479,7 @@ void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm) if (WARN_ON(keyslot_manager_is_passthrough(ksm))) return; + /* This is for device initialization, so don't resume the device */ down_write(&ksm->lock); for (slot = 0; slot < ksm->num_slots; slot++) { const struct keyslot *slotp = &ksm->slots[slot]; @@ -456,6 +519,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); /** * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager + * @dev: Device for runtime power management (NULL if none) * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops * @crypto_mode_supported: Bitmasks for supported encryption modes * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. @@ -472,6 +536,7 @@ EXPORT_SYMBOL_GPL(keyslot_manager_destroy); * Return: Pointer to constructed keyslot manager or NULL on error. */ struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ll_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data) @@ -486,6 +551,7 @@ struct keyslot_manager *keyslot_manager_create_passthrough( memcpy(ksm->crypto_mode_supported, crypto_mode_supported, sizeof(ksm->crypto_mode_supported)); ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); init_rwsem(&ksm->lock); @@ -545,15 +611,15 @@ int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, { int err; - down_write(&ksm->lock); if (ksm->ksm_ll_ops.derive_raw_secret) { + keyslot_manager_hw_enter(ksm); err = ksm->ksm_ll_ops.derive_raw_secret(ksm, wrapped_key, wrapped_key_size, secret, secret_size); + keyslot_manager_hw_exit(ksm); } else { err = -EOPNOTSUPP; } - up_write(&ksm->lock); return err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0189f70e87a0..0271ca072453 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2102,7 +2102,8 @@ static int dm_init_inline_encryption(struct mapped_device *md) */ memset(mode_masks, 0xFF, sizeof(mode_masks)); - md->queue->ksm = keyslot_manager_create_passthrough(&dm_ksm_ll_ops, + md->queue->ksm = keyslot_manager_create_passthrough(NULL, + &dm_ksm_ll_ops, mode_masks, md); if (!md->queue->ksm) return -ENOMEM; diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 276b49ad13be..e3de448c9bbe 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -125,7 +125,6 @@ static int ufshcd_program_key(struct ufs_hba *hba, u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); int err; - pm_runtime_get_sync(hba->dev); ufshcd_hold(hba, false); if (hba->vops->program_key) { @@ -155,7 +154,6 @@ static int ufshcd_program_key(struct ufs_hba *hba, err = 0; out: ufshcd_release(hba); - pm_runtime_put_sync(hba->dev); return err; } @@ -337,8 +335,8 @@ int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, ufshcd_clear_all_keyslots(hba); - hba->ksm = keyslot_manager_create(ufshcd_num_keyslots(hba), ksm_ops, - crypto_modes_supported, hba); + hba->ksm = keyslot_manager_create(hba->dev, ufshcd_num_keyslots(hba), + ksm_ops, crypto_modes_supported, hba); if (!hba->ksm) { err = -ENOMEM; diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h index 6d32a031218e..2f4aac2851bf 100644 --- a/include/linux/keyslot-manager.h +++ b/include/linux/keyslot-manager.h @@ -41,7 +41,9 @@ struct keyslot_mgmt_ll_ops { u8 *secret, unsigned int secret_size); }; -struct keyslot_manager *keyslot_manager_create(unsigned int num_slots, +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, const struct keyslot_mgmt_ll_ops *ksm_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); @@ -67,6 +69,7 @@ void *keyslot_manager_private(struct keyslot_manager *ksm); void keyslot_manager_destroy(struct keyslot_manager *ksm); struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, const struct keyslot_mgmt_ll_ops *ksm_ops, const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], void *ll_priv_data); From e723e513c2fc9cdca3974dd811431ea8d6d13d64 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 26 Feb 2020 13:25:24 -0800 Subject: [PATCH 3214/3715] ANDROID: increase limit on sched-tune boost groups Some devices need an additional sched-tune boost group to optimize performance for key tasks Bug: 150302001 Change-Id: I392c8cc05a8851f1d416c381b4a27242924c2c27 Signed-off-by: Todd Kjos --- kernel/sched/tune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 74a45606dc8c..b420258f9732 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -78,7 +78,7 @@ root_schedtune = { * implementation especially for the computation of the per-CPU boost * value */ -#define BOOSTGROUPS_COUNT 5 +#define BOOSTGROUPS_COUNT 6 /* Array of configured boostgroups */ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { From 304407a616abe4ce26cf5c234de245a651b35f4a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 26 Feb 2020 15:28:50 -0800 Subject: [PATCH 3215/3715] ANDROID: Disable wq fp check in CFI builds With non-canonical CFI, LLVM generates jump table entries for external symbols in modules and as a result, a function pointer passed from a module to the core kernel will have a different address. Disable the warning for now. Bug: 145210207 Change-Id: Ifdcee3479280f7b97abdee6b4c746f447e0944e6 Signed-off-by: Sami Tolvanen Signed-off-by: Alistair Delva --- kernel/workqueue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fa4a72c6d5f5..fce73f9a2742 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1530,8 +1530,10 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); +#ifndef CONFIG_CFI + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); +#endif + WARN_ON_ONCE(timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); From 166fda7a7ed0e60e05fb4cfdc1f25fd59ff198ba Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:20 -0800 Subject: [PATCH 3216/3715] ANDROID: block: Prevent crypto fallback for wrapped keys blk-crypto-fallback does not support wrapped keys, hence prevent falling back when program_key fails. Add 'is_hw_wrapped' flag to blk-crypto-key to mention if the key is wrapped when the key is initialized. Bug: 147209885 Test: Validate FBE, simulate a failure in the underlying blk device and ensure the call fails without falling back to blk-crypto-fallback. Change-Id: I8bc301ca1ac9e55ba6ab622e8325486916b45c56 Signed-off-by: Barani Muthukumaran --- block/blk-crypto-fallback.c | 6 ++++++ block/blk-crypto.c | 17 +++++++++++++---- drivers/md/dm-default-key.c | 2 +- fs/crypto/fscrypt_private.h | 3 +++ fs/crypto/inline_crypt.c | 3 ++- fs/crypto/keysetup.c | 14 +++++++------- fs/crypto/keysetup_v1.c | 2 +- include/linux/bio-crypt-ctx.h | 3 +++ include/linux/blk-crypto.h | 1 + 9 files changed, 37 insertions(+), 14 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index b8e9ae1c1d5b..195b04b5df0d 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -571,6 +571,12 @@ int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) struct bio_crypt_ctx *bc = bio->bi_crypt_context; struct bio_fallback_crypt_ctx *f_ctx; + if (bc->bc_key->is_hw_wrapped) { + pr_warn_once("HW wrapped key cannot be used with fallback.\n"); + bio->bi_status = BLK_STS_NOTSUPP; + return -EOPNOTSUPP; + } + if (!tfms_inited[bc->bc_key->crypto_mode]) { bio->bi_status = BLK_STS_IOERR; return -EIO; diff --git a/block/blk-crypto.c b/block/blk-crypto.c index a8de0d9680e0..88df1c0e7e5f 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -175,7 +175,9 @@ bool blk_crypto_endio(struct bio *bio) * @raw_key_size: Size of raw key. Must be at least the required size for the * chosen @crypto_mode; see blk_crypto_modes[]. (It's allowed * to be longer than the mode's actual key size, in order to - * support inline encryption hardware that accepts wrapped keys.) + * support inline encryption hardware that accepts wrapped keys. + * @is_hw_wrapped has to be set for such keys) + * @is_hw_wrapped: Denotes @raw_key is wrapped. * @crypto_mode: identifier for the encryption algorithm to use * @data_unit_size: the data unit size to use for en/decryption * @@ -184,6 +186,7 @@ bool blk_crypto_endio(struct bio *bio) */ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size) { @@ -198,9 +201,14 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, BUILD_BUG_ON(BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE < BLK_CRYPTO_MAX_KEY_SIZE); mode = &blk_crypto_modes[crypto_mode]; - if (raw_key_size < mode->keysize || - raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) - return -EINVAL; + if (is_hw_wrapped) { + if (raw_key_size < mode->keysize || + raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) + return -EINVAL; + } else { + if (raw_key_size != mode->keysize) + return -EINVAL; + } if (!is_power_of_2(data_unit_size)) return -EINVAL; @@ -209,6 +217,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, blk_key->data_unit_size = data_unit_size; blk_key->data_unit_size_bits = ilog2(data_unit_size); blk_key->size = raw_key_size; + blk_key->is_hw_wrapped = is_hw_wrapped; memcpy(blk_key->raw, raw_key, raw_key_size); /* diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index 43a30c076aa6..e23ceb1390c8 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -226,7 +226,7 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, - cipher->mode_num, dkc->sector_size); + false, cipher->mode_num, dkc->sector_size); if (err) { ti->error = "Error initializing blk-crypto key"; goto bad; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 2e7a7a8171a1..bc53caf70497 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -319,6 +319,7 @@ extern int fscrypt_prepare_inline_crypt_key( struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_inline_crypt_key( @@ -363,6 +364,7 @@ static inline bool fscrypt_using_inline_encryption( static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci) { WARN_ON(1); @@ -557,6 +559,7 @@ extern struct fscrypt_mode fscrypt_modes[]; extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci); extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 92c471d3db73..cd901697d62a 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -51,6 +51,7 @@ void fscrypt_select_encryption_impl(struct fscrypt_info *ci) int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci) { const struct inode *inode = ci->ci_inode; @@ -81,7 +82,7 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, - crypto_mode, sb->s_blocksize); + is_hw_wrapped, crypto_mode, sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index e474d6c09cbd..c289f4e32186 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -117,15 +117,15 @@ err_free_tfm: */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, - const struct fscrypt_info *ci) + bool is_hw_wrapped, const struct fscrypt_info *ci) { struct crypto_skcipher *tfm; if (fscrypt_using_inline_encryption(ci)) return fscrypt_prepare_inline_crypt_key(prep_key, - raw_key, raw_key_size, ci); + raw_key, raw_key_size, is_hw_wrapped, ci); - if (WARN_ON(raw_key_size != ci->ci_mode->keysize)) + if (WARN_ON(is_hw_wrapped || raw_key_size != ci->ci_mode->keysize)) return -EINVAL; tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); @@ -150,8 +150,8 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) { ci->ci_owns_key = true; - return fscrypt_prepare_key(&ci->ci_key, raw_key, - ci->ci_mode->keysize, ci); + return fscrypt_prepare_key(&ci->ci_key, raw_key, ci->ci_mode->keysize, + false /*is_hw_wrapped*/, ci); } static int setup_per_mode_enc_key(struct fscrypt_info *ci, @@ -202,7 +202,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, } } err = fscrypt_prepare_key(prep_key, mk->mk_secret.raw, - mk->mk_secret.size, ci); + mk->mk_secret.size, true, ci); if (err) goto out_unlock; } else { @@ -221,7 +221,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci, if (err) goto out_unlock; err = fscrypt_prepare_key(prep_key, mode_key, mode->keysize, - ci); + false /*is_hw_wrapped*/, ci); memzero_explicit(mode_key, mode->keysize); if (err) goto out_unlock; diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 29fe4df6ec75..3f7bb48f7317 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -234,7 +234,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci->ci_mode->keysize, - ci); + false /*is_hw_wrapped*/, ci); if (err) goto err_free_dk; memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h index ab22dbe7b880..8456a409fc21 100644 --- a/include/linux/bio-crypt-ctx.h +++ b/include/linux/bio-crypt-ctx.h @@ -31,6 +31,8 @@ enum blk_crypto_mode_num { * @data_unit_size_bits: log2 of data_unit_size * @size: size of this key in bytes (determined by @crypto_mode) * @hash: hash of this key, for keyslot manager use only + * @is_hw_wrapped: @raw points to a wrapped key to be used by an inline + * encryption hardware that accepts wrapped keys. * @raw: the raw bytes of this key. Only the first @size bytes are used. * * A blk_crypto_key is immutable once created, and many bios can reference it at @@ -42,6 +44,7 @@ struct blk_crypto_key { unsigned int data_unit_size_bits; unsigned int size; unsigned int hash; + bool is_hw_wrapped; u8 raw[BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE]; }; diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 485cee0b92dd..913b367d42bd 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -18,6 +18,7 @@ bool blk_crypto_endio(struct bio *bio); int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, enum blk_crypto_mode_num crypto_mode, unsigned int data_unit_size); From 23e670a1b851251c0144722352e649e61e29ed8e Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:29 -0800 Subject: [PATCH 3217/3715] ANDROID: dm: add support for passing through derive_raw_secret Update the device-mapper core to support exposing the inline crypto support of wrapped keys through the device-mapper device. derive_raw_secret in keyslot manager is used to derive the software raw secret from the given wrapped keyblob using the underlying blk device. Given that the raw_secret is the same for a given wrapped keyblob the call exits when the first underlying blk-device suceeds. Bug: 147209885 Test: Validated FBE with wrappedkey_v0 when /data is mounted on a dm device. Change-Id: Ia49ed61613607f8b82f2be0615e5b6d2f7533859 Signed-off-by: Barani Muthukumaran --- drivers/md/dm.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0271ca072453..c41c2eddba31 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2088,8 +2088,79 @@ static int dm_keyslot_evict(struct keyslot_manager *ksm, return args.err; } +struct dm_derive_raw_secret_args { + const u8 *wrapped_key; + unsigned int wrapped_key_size; + u8 *secret; + unsigned int secret_size; + int err; +}; + +static int dm_derive_raw_secret_callback(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct dm_derive_raw_secret_args *args = data; + struct request_queue *q = dev->bdev->bd_queue; + + if (!args->err) + return 0; + + if (!q->ksm) { + args->err = -EOPNOTSUPP; + return 0; + } + + args->err = keyslot_manager_derive_raw_secret(q->ksm, args->wrapped_key, + args->wrapped_key_size, + args->secret, + args->secret_size); + /* Try another device in case this fails. */ + return 0; +} + +/* + * Retrieve the raw_secret from the underlying device. Given that + * only only one raw_secret can exist for a particular wrappedkey, + * retrieve it only from the first device that supports derive_raw_secret() + */ +static int dm_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size) +{ + struct mapped_device *md = keyslot_manager_private(ksm); + struct dm_derive_raw_secret_args args = { + .wrapped_key = wrapped_key, + .wrapped_key_size = wrapped_key_size, + .secret = secret, + .secret_size = secret_size, + .err = -EOPNOTSUPP, + }; + struct dm_table *t; + int srcu_idx; + int i; + struct dm_target *ti; + + t = dm_get_live_table(md, &srcu_idx); + if (!t) + return -EOPNOTSUPP; + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, dm_derive_raw_secret_callback, + &args); + if (!args.err) + break; + } + dm_put_live_table(md, srcu_idx); + return args.err; +} + static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { .keyslot_evict = dm_keyslot_evict, + .derive_raw_secret = dm_derive_raw_secret, }; static int dm_init_inline_encryption(struct mapped_device *md) From 06ab740983752ed19ad995b77409b17b3b56db11 Mon Sep 17 00:00:00 2001 From: Barani Muthukumaran Date: Thu, 6 Feb 2020 18:01:33 -0800 Subject: [PATCH 3218/3715] ANDROID: dm: Add wrapped key support in dm-default-key To prevent keys from being compromised if an attacker acquires read access to kernel memory, some inline encryption hardware supports protecting the keys in hardware without software having access to or the ability to set the plaintext keys. Instead, software only sees "wrapped keys", which may differ on every boot. The keys can be initially generated either by software (in which case they need to be imported to hardware to be wrapped), or directly by the hardware. Add support for this type of hardware by allowing keys to be flagged as hardware-wrapped. When used, dm-default-key will pass the wrapped key to the inline encryption hardware to encryption metadata. The hardware will internally unwrap the key and derive the metadata encryption key. Bug: 147209885 Test: Validate metadata encryption & FBE with wrapped keys. Change-Id: I8078b116dab9e04d7f3f15f29f11823185ea5d50 Signed-off-by: Barani Muthukumaran --- drivers/md/dm-default-key.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c index e23ceb1390c8..b3da6afa6927 100644 --- a/drivers/md/dm-default-key.c +++ b/drivers/md/dm-default-key.c @@ -9,7 +9,7 @@ #define DM_MSG_PREFIX "default-key" -#define DM_DEFAULT_KEY_MAX_KEY_SIZE 64 +#define DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE 128 #define SECTOR_SIZE (1 << SECTOR_SHIFT) @@ -49,6 +49,7 @@ struct default_key_c { unsigned int sector_size; unsigned int sector_bits; struct blk_crypto_key key; + bool is_hw_wrapped; }; static const struct dm_default_key_cipher * @@ -84,7 +85,7 @@ static int default_key_ctr_optional(struct dm_target *ti, struct default_key_c *dkc = ti->private; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 3, "Invalid number of feature args"}, + {0, 4, "Invalid number of feature args"}, }; unsigned int opt_params; const char *opt_string; @@ -117,6 +118,8 @@ static int default_key_ctr_optional(struct dm_target *ti, } } else if (!strcmp(opt_string, "iv_large_sectors")) { iv_large_sectors = true; + } else if (!strcmp(opt_string, "wrappedkey_v0")) { + dkc->is_hw_wrapped = true; } else { ti->error = "Invalid feature arguments"; return -EINVAL; @@ -144,7 +147,8 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct default_key_c *dkc; const struct dm_default_key_cipher *cipher; - u8 raw_key[DM_DEFAULT_KEY_MAX_KEY_SIZE]; + u8 raw_key[DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE]; + unsigned int raw_key_size; unsigned long long tmpll; char dummy; int err; @@ -176,12 +180,15 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* */ - if (strlen(argv[1]) != 2 * cipher->key_size) { - ti->error = "Incorrect key size for cipher"; + raw_key_size = strlen(argv[1]); + if (raw_key_size > 2 * DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE || + raw_key_size % 2) { + ti->error = "Invalid keysize"; err = -EINVAL; goto bad; } - if (hex2bin(raw_key, argv[1], cipher->key_size) != 0) { + raw_key_size /= 2; + if (hex2bin(raw_key, argv[1], raw_key_size) != 0) { ti->error = "Malformed key string"; err = -EINVAL; goto bad; @@ -226,7 +233,8 @@ static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) } err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, - false, cipher->mode_num, dkc->sector_size); + dkc->is_hw_wrapped, cipher->mode_num, + dkc->sector_size); if (err) { ti->error = "Error initializing blk-crypto key"; goto bad; @@ -319,6 +327,8 @@ static void default_key_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; if (dkc->sector_size != SECTOR_SIZE) num_feature_args += 2; + if (dkc->is_hw_wrapped) + num_feature_args += 1; if (num_feature_args != 0) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -327,6 +337,8 @@ static void default_key_status(struct dm_target *ti, status_type_t type, DMEMIT(" sector_size:%u", dkc->sector_size); DMEMIT(" iv_large_sectors"); } + if (dkc->is_hw_wrapped) + DMEMIT(" wrappedkey_v0"); } break; } @@ -372,7 +384,7 @@ static void default_key_io_hints(struct dm_target *ti, static struct target_type default_key_target = { .name = "default-key", - .version = {2, 0, 0}, + .version = {2, 1, 0}, .module = THIS_MODULE, .ctr = default_key_ctr, .dtr = default_key_dtr, From 9ec56ec3132b38ba351b3f9db7b70b269022834e Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Thu, 27 Feb 2020 12:52:46 -0800 Subject: [PATCH 3219/3715] ANDROID: gki_defconfig: Enable CONFIG_RD_LZ4 Support future decompression of LZ4-compressed ramdisk images. This support is in addition to the default support for gzip. Bug: 150391496 Test: TreeHugger Change-Id: I8c07e9363333c53cd6ff9c7e16fc42cbe318fe83 Signed-off-by: Alistair Delva --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index c1b2fc17bdbb..0b9fbd2d3d14 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -29,7 +29,6 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set CONFIG_SGETMASK_SYSCALL=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_KALLSYMS_ALL=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index b18d574c8143..fbe017c1f5cf 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -27,7 +27,10 @@ CONFIG_NAMESPACES=y # CONFIG_PID_NS is not set CONFIG_SCHED_TUNE=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_LZ4 is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set CONFIG_KALLSYMS_ALL=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_BPF_SYSCALL=y From 9a29b43c06d3b114d351e115ab00f76f628fefe9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 13 Aug 2018 09:56:43 +0800 Subject: [PATCH 3220/3715] ANDROID: sdcardfs: fix -ENOENT lookup race issue The negative lower dentry created by vfs_path_lookup could be reclaimed between vfs_path_lookup and d_hash_and_lookup. Therefore, it is unsafe to just lookup dcache again for the negative dentry cases. Without this patch, users could occasionally get trapped into `failed to create' under memory pressure. So here is a workaround to hack it and in my opinion sdcardfs should be refactored to close all races in the long term as pointed out in the code comment of this commit. Test: (Thread 1) while true; do echo 3 > /proc/sys/vm/drop_caches done (Thread 2) i=0 while true; do echo 123 > /sdcard/$i i=$((i+1)) done Bug: 63872684 Cc: Daniel Rosenberg Cc: Miao Xie Cc: Chao Yu Change-Id: Ic033e1f84a8b271c1f48010f4e1f189982bbbea2 Signed-off-by: Gao Xiang Signed-off-by: Daniel Rosenberg (cherry picked from commit bd77267426ed5ffe6a25aa77c149cde28f479f95) --- fs/sdcardfs/inode.c | 3 +++ fs/sdcardfs/lookup.c | 34 ++++++++++++++++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4dd681e0d59d..edeca118cce5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -87,6 +87,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); + if (d_is_positive(lower_dentry)) + return -EEXIST; + /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 73179ce2591f..886aee279920 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -257,7 +257,6 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; - struct qstr dname; struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; @@ -316,6 +315,7 @@ put_name: /* no error: handle positive dentries */ if (!err) { +found: /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with * the inode of the graft path @@ -362,28 +362,26 @@ put_name: if (err && err != -ENOENT) goto out; - /* instatiate a new negative dentry */ - dname.name = name->name; - dname.len = name->len; - - /* See if the low-level filesystem might want - * to use its own hash - */ - lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname); - if (IS_ERR(lower_dentry)) - return lower_dentry; - - if (!lower_dentry) { - /* We called vfs_path_lookup earlier, and did not get a negative - * dentry then. Don't confuse the lower filesystem by forcing - * one on it now... - */ - err = -ENOENT; + /* get a (very likely) new negative dentry */ + lower_dentry = lookup_one_len_unlocked(name->name, + lower_dir_dentry, name->len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); goto out; } lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); + + /* + * Check if someone sneakily filled in the dentry when + * we weren't looking. We'll check again in create. + */ + if (unlikely(d_inode_rcu(lower_dentry))) { + err = 0; + goto found; + } + sdcardfs_set_lower_path(dentry, &lower_path); /* From 699d26b883139a817426581a76b4edd2fb610616 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 27 Feb 2020 16:29:08 -0800 Subject: [PATCH 3221/3715] ANDROID: cuttlefish_defconfig: Add CONFIG_UNICODE This adds support for unicode to cuttlefish_defconfig Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Change-Id: I177d0b301087733234de3d6d283af946213eebca --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 0b9fbd2d3d14..8b5ebe05fe9f 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -449,6 +449,7 @@ CONFIG_SDCARD_FS=y CONFIG_PSTORE=y CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_RAM=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index fbe017c1f5cf..43c457eb9446 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -474,6 +474,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set From 045b7dadf1d6680f9b9b2190b91a513896b9ef8e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 5 Feb 2020 16:40:52 -0800 Subject: [PATCH 3222/3715] FROMLIST: unicode: Add utf8_casefold_hash This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I43c7d38a8e22f4479397f35e6343bd326901cdba --- fs/unicode/utf8-core.c | 23 ++++++++++++++++++++++- include/linux/unicode.h | 3 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 71ca4d047d65..d18789f27650 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(&cur, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte(&cur))) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d8049..74484d44c755 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); From 4200d6e3d17f4f3d59b55f78ea3b95d5fe362473 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 22 Jan 2020 20:05:18 -0800 Subject: [PATCH 3223/3715] FROMLIST: Add standard casefolding support This adds general supporting functions for filesystems that use utf8 casefolding. It provides standard dentry_operations and adds the necessary structures in struct super_block to allow this standardization. Ext4 and F2fs are switch to these implementations. Signed-off-by: Daniel Rosenberg Note: Fixed issue with non-strictly enforced fallback hash Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I81b5fb5d3ce0259a60712ae2505c1e4b03dbafde --- fs/libfs.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 22 +++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index 49623301e5f0..f59b163c38ac 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,8 @@ #include #include #include /* sync_mapping_buffers */ +#include +#include #include @@ -1219,3 +1221,62 @@ bool is_empty_dir_inode(struct inode *inode) return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } + +#ifdef CONFIG_UNICODE +bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding && + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); +} +EXPORT_SYMBOL(needs_casefold); + +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); + const struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + struct qstr entry = QSTR_INIT(str, len); + int ret; + + if (!inode || !needs_casefold(inode)) + goto fallback; + + ret = utf8_strncasecmp(um, name, &entry); + if (ret >= 0) + return ret; + + if (sb_has_enc_strict_mode(sb)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); +} +EXPORT_SYMBOL(generic_ci_d_compare); + +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) +{ + const struct inode *inode = READ_ONCE(dentry->d_inode); + struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + int ret = 0; + + if (!inode || !needs_casefold(inode)) + return 0; + + ret = utf8_casefold_hash(um, dentry, str); + if (ret < 0) + goto err; + + return 0; +err: + if (sb_has_enc_strict_mode(sb)) + ret = -EINVAL; + else + ret = 0; + return ret; +} +EXPORT_SYMBOL(generic_ci_d_hash); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 6495cb54f42c..828d4a55bf37 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,6 +1309,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_enc_strict_mode(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1377,6 +1383,10 @@ struct super_block { #endif struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; +#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; @@ -3190,6 +3200,18 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +extern bool needs_casefold(const struct inode *dir); +#else +static inline bool needs_casefold(const struct inode *dir) +{ + return 0; +} +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, From ed5f8d20bf3c6ae668e6e88e15b383adae0735d0 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 6 Feb 2020 14:41:31 -0800 Subject: [PATCH 3224/3715] FROMLIST: f2fs: Use generic casefolding support This switches f2fs over to the generic support provided in commit 65832afbeaaf ("fs: Add standard casefolding support") Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: Ib2aecfdf029a53d116929bf6c0fc4802585f4383 --- fs/f2fs/dir.c | 63 +++++++---------------------------------- fs/f2fs/f2fs.h | 4 --- fs/f2fs/hash.c | 2 +- fs/f2fs/super.c | 10 +++---- fs/f2fs/sysfs.c | 10 ++++--- include/linux/f2fs_fs.h | 3 -- 6 files changed, 22 insertions(+), 70 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 331c90556a0f..704d68c34130 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -114,8 +114,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -127,7 +127,7 @@ int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (f2fs_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -154,7 +154,7 @@ static void f2fs_fname_setup_ci_filename(struct inode *dir, if (!cf_name->name) return; - cf_name->len = utf8_casefold(sbi->s_encoding, + cf_name->len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, F2FS_NAME_LEN); if ((int)cf_name->len <= 0) { @@ -173,7 +173,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); + struct super_block *sb = parent->i_sb; struct qstr entry; #endif @@ -184,7 +184,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, entry.name = d->filename[bit_pos]; entry.len = de->name_len; - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { + if (sb->s_encoding && IS_CASEFOLDED(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; @@ -357,8 +357,8 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, int err; #ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { + if (sb_has_enc_strict_mode(dir->i_sb) && IS_CASEFOLDED(dir) && + utf8_validate(dir->i_sb->s_encoding, child)) { *res_page = ERR_PTR(-EINVAL); return NULL; } @@ -1079,51 +1079,8 @@ const struct file_operations f2fs_dir_operations = { }; #ifdef CONFIG_UNICODE -static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); - - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return f2fs_ci_compare(inode, name, &qstr, false); -} - -static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - const struct inode *inode = READ_ONCE(dentry->d_inode); - unsigned char *norm; - int len, ret = 0; - - if (!inode || !IS_CASEFOLDED(inode)) - return 0; - - norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (f2fs_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kvfree(norm); - return ret; -} - const struct dentry_operations f2fs_dentry_ops = { - .d_hash = f2fs_d_hash, - .d_compare = f2fs_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 57475e687e2f..3beb72a44ff9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1285,10 +1285,6 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03..28acb24e7a7a 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -110,7 +110,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr folded; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 705bd7a5abe1..7e1b9a6d1f90 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1230,7 +1230,7 @@ static void f2fs_put_super(struct super_block *sb) for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kvfree(sbi); } @@ -3293,7 +3293,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { #ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3324,8 +3324,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sbi->sb->s_encoding = encoding; + sbi->sb->s_encoding_flags = encoding_flags; sbi->sb->s_d_op = &f2fs_dentry_ops; } #else @@ -3800,7 +3800,7 @@ free_bio_info: kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 227d3db5c989..a32b3a392fbd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -175,12 +175,14 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { #ifdef CONFIG_UNICODE + struct super_block *sb = sbi->sb; + if (f2fs_sb_has_casefold(sbi)) return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); + sb->s_encoding->charset, + (sb->s_encoding->version >> 16) & 0xff, + (sb->s_encoding->version >> 8) & 0xff, + sb->s_encoding->version & 0xff); #endif return sprintf(buf, "(none)"); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3df..e8763a955f90 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -38,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ From d2cb77395ecdb80dbc77b1dd58634c2ac26868a9 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 6 Feb 2020 14:41:16 -0800 Subject: [PATCH 3225/3715] FROMLIST: ext4: Use generic casefolding support This switches ext4 over to the generic support provided in commit 65832afbeaaf ("fs: Add standard casefolding support") Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I3a0705278100590df4c7cdd0dcdf945e9f11feb7 --- fs/ext4/dir.c | 45 ++------------------------------------------- fs/ext4/ext4.h | 12 ------------ fs/ext4/hash.c | 2 +- fs/ext4/namei.c | 18 ++++++++---------- fs/ext4/super.c | 12 ++++++------ 5 files changed, 17 insertions(+), 72 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 97a3b29131bb..bb6ff465ae3d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -664,49 +664,8 @@ const struct file_operations ext4_dir_operations = { }; #ifdef CONFIG_UNICODE -static int ext4_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; - - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return ext4_ci_compare(inode, name, &qstr, false); -} - -static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) -{ - const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - unsigned char *norm; - int len, ret = 0; - - if (!IS_CASEFOLDED(dentry->d_inode) || !um) - return 0; - - norm = kmalloc(PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (ext4_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kfree(norm); - return ret; -} - const struct dentry_operations ext4_dentry_ops = { - .d_hash = ext4_d_hash, - .d_compare = ext4_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0b98ac498971..35dca3a91323 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1376,14 +1376,6 @@ struct ext4_super_block { #define EXT4_ENC_UTF8_12_1 1 -/* - * Flags for ext4_sb_info.s_encoding_flags. - */ -#define EXT4_ENC_STRICT_MODE_FL (1 << 0) - -#define ext4_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL) - /* * fourth extended-fs super-block data in memory */ @@ -1435,10 +1427,6 @@ struct ext4_sb_info { struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif /* Journaling */ struct journal_s *s_journal; diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ed76a6d7a2d8..f3bc69b8d4e5 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -277,7 +277,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { #ifdef CONFIG_UNICODE - const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 42177a426ca9..57aff1ae37a4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1280,8 +1280,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) int ext4_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -1293,7 +1293,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (ext4_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -1310,7 +1310,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, { int len; - if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) { + if (!needs_casefold(dir)) { cf_name->name = NULL; return; } @@ -1319,7 +1319,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, if (!cf_name->name) return; - len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, EXT4_NAME_LEN); if (len <= 0) { @@ -1356,7 +1356,7 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (fname->cf_name.name) { struct qstr cf = {.name = fname->cf_name.name, .len = fname->cf_name.len}; @@ -2172,7 +2172,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; - struct ext4_sb_info *sbi; struct ext4_filename fname; int retval; int dx_fallback=0; @@ -2184,14 +2183,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; - sbi = EXT4_SB(sb); blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; #ifdef CONFIG_UNICODE - if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && - sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name)) + if (sb_has_enc_strict_mode(sb) && IS_CASEFOLDED(dir) && + sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; #endif diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 25fe536638d0..bbbf4775d2f6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -989,7 +989,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kfree(sbi); } @@ -3815,7 +3815,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; #ifdef CONFIG_UNICODE - if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) { + if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3846,8 +3846,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; } #endif @@ -4470,7 +4470,7 @@ no_journal: } #ifdef CONFIG_UNICODE - if (sbi->s_encoding) + if (sb->s_encoding) sb->s_d_op = &ext4_dentry_ops; #endif @@ -4654,7 +4654,7 @@ failed_mount: crypto_free_shash(sbi->s_chksum_driver); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif #ifdef CONFIG_QUOTA From 207be96aba2472e1dd875bd192fe39b5b0d6a257 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 27 Jan 2020 20:04:40 -0800 Subject: [PATCH 3226/3715] FROMLIST: fscrypt: Have filesystems handle their d_ops This shifts the responsibility of setting up dentry operations from fscrypt to the individual filesystems, allowing them to have their own operations while still setting fscrypt's d_revalidate as appropriate. Also added helper function to libfs to unify ext4 and f2fs implementations. Signed-off-by: Daniel Rosenberg Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: Iaf77f8c5961ecf22e22478701ab0b7fe2025225d --- fs/crypto/fname.c | 7 ++---- fs/crypto/fscrypt_private.h | 1 - fs/crypto/hooks.c | 1 - fs/ext4/dir.c | 7 ------ fs/ext4/namei.c | 1 + fs/ext4/super.c | 5 ---- fs/f2fs/dir.c | 7 ------ fs/f2fs/f2fs.h | 3 --- fs/f2fs/namei.c | 1 + fs/f2fs/super.c | 1 - fs/libfs.c | 50 +++++++++++++++++++++++++++++++++++++ fs/ubifs/dir.c | 18 +++++++++++++ include/linux/fs.h | 2 ++ include/linux/fscrypt.h | 6 +++-- 14 files changed, 78 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index f64d66e10129..63bfe5e8accd 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; int err; @@ -583,7 +583,4 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return valid; } - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; +EXPORT_SYMBOL(fscrypt_d_revalidate); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index bc53caf70497..f78cc4dfb452 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -275,7 +275,6 @@ extern int fscrypt_fname_encrypt(const struct inode *inode, extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); -extern const struct dentry_operations fscrypt_d_ops; /* hkdf.c */ diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 4ca167017d67..a6396bf721ac 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); - d_set_d_op(dentry, &fscrypt_d_ops); } return err; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index bb6ff465ae3d..9c68ffb4c2cf 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -662,10 +662,3 @@ const struct file_operations ext4_dir_operations = { .open = ext4_dir_open, .release = ext4_release_dir, }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations ext4_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 57aff1ae37a4..87c0fe645337 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1607,6 +1607,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) return NULL; if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bbbf4775d2f6..a7ac63e921c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4469,11 +4469,6 @@ no_journal: goto failed_mount4; } -#ifdef CONFIG_UNICODE - if (sb->s_encoding) - sb->s_d_op = &ext4_dentry_ops; -#endif - sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 704d68c34130..2620132f3bad 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1077,10 +1077,3 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations f2fs_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3beb72a44ff9..23aa07c86499 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3626,9 +3626,6 @@ static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations f2fs_dentry_ops; -#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 76ba1e2fee48..198e77c15f59 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -494,6 +494,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = fscrypt_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) goto out_splice; if (err) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7e1b9a6d1f90..6aaefdc269a7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3326,7 +3326,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) sbi->sb->s_encoding = encoding; sbi->sb->s_encoding_flags = encoding_flags; - sbi->sb->s_d_op = &f2fs_dentry_ops; } #else if (f2fs_sb_has_casefold(sbi)) { diff --git a/fs/libfs.c b/fs/libfs.c index f59b163c38ac..27def8ba162d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1279,4 +1279,54 @@ err: return ret; } EXPORT_SYMBOL(generic_ci_d_hash); + +static const struct dentry_operations generic_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; #endif + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations generic_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) +static const struct dentry_operations generic_encrypted_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +/** + * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry + * @dir: parent of dentry whose ops to set + * @dentry: detnry to set ops on + * + * This function sets the dentry ops for the given dentry to handle both + * casefolding and encryption of the dentry name. + */ +void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); + return; + } +#endif + d_set_d_op(dentry, &generic_encrypted_dentry_ops); + return; + } +#endif +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_ci_dentry_ops); + return; + } +#endif +} +EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 3c806194942d..7d5c2cf95353 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -208,6 +208,7 @@ static int dbg_check_name(const struct ubifs_info *c, return 0; } +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry); static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -221,6 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); + ubifs_set_d_ops(dir, dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) @@ -1684,3 +1686,19 @@ const struct file_operations ubifs_dir_operations = { .compat_ioctl = ubifs_compat_ioctl, #endif }; + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations ubifs_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { + d_set_d_op(dentry, &ubifs_encrypted_dentry_ops); + return; + } +#endif +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 828d4a55bf37..90d15f08f4dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3211,6 +3211,8 @@ static inline bool needs_casefold(const struct inode *dir) return 0; } #endif +extern void generic_set_encrypted_ci_d_ops(struct inode *dir, + struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 12b53d0f6961..db2dbc0b972a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -138,6 +138,7 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) } extern void fscrypt_free_bounce_page(struct page *bounce_page); +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); @@ -670,8 +671,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * After calling this function, a filesystem should ensure that it's dentry + * operations contain fscrypt_d_revalidate if DCACHE_ENCRYPTED_NAME was set, + * so that the dentry can be invalidated if the key is later added. * * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a * correctly formed encoded ciphertext name, so a negative dentry should be From 10d4512505762621bbd20bef4195681fee5a7c42 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 30 Sep 2019 12:53:51 -0700 Subject: [PATCH 3227/3715] FROMLIST: f2fs: Handle casefolding with Encryption This expands f2fs's casefolding support to include encrypted directories. For encrypted directories, we use the siphash of the casefolded name. This ensures there is no direct way to go from an unencrypted name to the stored hash on disk without knowledge of the encryption policy keys. Additionally, we switch to using the vfs layer's casefolding support instead of storing this information inside of f2fs's private data. Signed-off-by: Daniel Rosenberg Note: Fixed some missing type conversions, crypto length issue and hash check for ciphertext name Test: Boots, /data/media is case insensitive Bug: 138322712 Link: https://lore.kernel.org/linux-f2fs-devel/20200208013552.241832-1-drosen@google.com/T/#t Change-Id: I8f1e324472668e27d3e059cc80e4c981ce89dd9b --- fs/f2fs/dir.c | 67 +++++++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 11 +++----- fs/f2fs/hash.c | 25 ++++++++++++------ fs/f2fs/inline.c | 9 ++++--- fs/f2fs/super.c | 6 ----- 5 files changed, 68 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2620132f3bad..8f83bfa8ffc6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -108,34 +108,52 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, * Test whether a case-insensitive directory entry matches the filename * being searched for. * + * Only called for encrypted names if the key is available. + * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, + u8 *de_name, size_t de_name_len, bool quick) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); + struct qstr entry = QSTR_INIT(de_name, de_name_len); int ret; - if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); - else - ret = utf8_strncasecmp(um, name, entry); + if (IS_ENCRYPTED(parent)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT(de_name, de_name_len); + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, + &decrypted_name); + if (ret < 0) + goto out; + entry.name = decrypted_name.name; + entry.len = decrypted_name.len; + } + + if (quick) + ret = utf8_strncasecmp_folded(um, name, &entry); + else + ret = utf8_strncasecmp(um, name, &entry); if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ if (sb_has_enc_strict_mode(sb)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + ret = -EINVAL; + else if (name->len != entry.len) + ret = 1; + else + ret = !!memcmp(name->name, entry.name, entry.len); } - +out: + kfree(decrypted_name.name); return ret; } @@ -173,24 +191,24 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct super_block *sb = parent->i_sb; - struct qstr entry; + u8 *name; + int len; #endif if (de->hash_code != namehash) return false; #ifdef CONFIG_UNICODE - entry.name = d->filename[bit_pos]; - entry.len = de->name_len; + name = d->filename[bit_pos]; + len = le16_to_cpu(de->name_len); - if (sb->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); + return !f2fs_ci_compare(parent, &cf, name, len, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, + return !f2fs_ci_compare(parent, fname->usr_fname, name, len, false); } #endif @@ -616,13 +634,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, + f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; - f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -632,7 +650,6 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -718,17 +735,19 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; + f2fs_hash_t dentry_hash; int err = -EAGAIN; new_name.name = fname_name(fname); new_name.len = fname_len(fname); if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, + err = f2fs_add_inline_entry(dir, &new_name, fname, inode, ino, mode); + dentry_hash = f2fs_dentry_hash(dir, &new_name, fname); if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + dentry_hash, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 23aa07c86499..ad0ec1b7c141 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3075,11 +3075,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ @@ -3113,7 +3108,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct qstr *orig_name, f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode); int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); @@ -3146,7 +3141,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); * hash.c */ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname); + const struct qstr *name_info, const struct fscrypt_name *fname); /* * node.c @@ -3656,7 +3651,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 28acb24e7a7a..8f7ee4362312 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -68,8 +68,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname) +static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, + const struct qstr *name_info, + const struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,12 +80,17 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, size_t len = name_info->len; /* encrypted bigname case */ - if (fname && !fname->disk_name.name) + if (fname && fname->is_ciphertext_name) return cpu_to_le32(fname->hash); if (is_dot_dotdot(name_info)) return 0; + if (IS_CASEFOLDED(dir) && IS_ENCRYPTED(dir)) { + f2fs_hash = cpu_to_le32(fscrypt_fname_siphash(dir, name_info)); + return f2fs_hash; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; @@ -106,7 +112,7 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, } f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname) + const struct qstr *name_info, const struct fscrypt_name *fname) { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -114,27 +120,30 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, int r, dlen; unsigned char *buff; struct qstr folded; + const struct qstr *name = fname ? fname->usr_fname : name_info; if (!name_info->len || !IS_CASEFOLDED(dir)) goto opaque_seq; + if (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir)) + goto opaque_seq; + buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); if (!buff) return -ENOMEM; - - dlen = utf8_casefold(um, name_info, buff, PATH_MAX); + dlen = utf8_casefold(um, name, buff, PATH_MAX); if (dlen < 0) { kvfree(buff); goto opaque_seq; } folded.name = buff; folded.len = dlen; - r = __f2fs_dentry_hash(&folded, fname); + r = __f2fs_dentry_hash(dir, &folded, fname); kvfree(buff); return r; opaque_seq: #endif - return __f2fs_dentry_hash(name_info, fname); + return __f2fs_dentry_hash(dir, name_info, fname); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cabc7a69cee4..c68a32369f44 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -483,8 +483,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, - ino, fake_mode); + err = f2fs_add_regular_entry(dir, &new_name, NULL, + de->hash_code, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -596,7 +596,7 @@ out: } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -607,6 +607,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; + const struct qstr *orig_name = fname->usr_fname; int err = 0; ipage = f2fs_get_node_page(sbi, dir->i_ino); @@ -637,7 +638,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, NULL); + name_hash = f2fs_dentry_hash(dir, new_name, fname); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6aaefdc269a7..e67f2a86105f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3298,12 +3298,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) struct unicode_map *encoding; __u16 encoding_flags; - if (f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, - "Can't mount with encoding and encryption"); - return -EINVAL; - } - if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, &encoding_flags)) { f2fs_err(sbi, From 4195d64396b7e3ddd90942a19e20c08041b21614 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 12 Sep 2019 18:34:52 +0800 Subject: [PATCH 3228/3715] UPSTREAM: sched/psi: Correct overly pessimistic size calculation When passing a equal or more then 32 bytes long string to psi_write(), psi_write() copies 31 bytes to its buf and overwrites buf[30] with '\0'. Which makes the input string 1 byte shorter than it should be. Fix it by copying sizeof(buf) bytes when nbytes >= sizeof(buf). This does not cause problems in normal use case like: "some 500000 10000000" or "full 500000 10000000" because they are less than 32 bytes in length. /* assuming nbytes == 35 */ char buf[32]; buf_size = min(nbytes, (sizeof(buf) - 1)); /* buf_size = 31 */ if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; buf[buf_size - 1] = '\0'; /* buf[30] = '\0' */ Before: %cd /proc/pressure/ %echo "123456789|123456789|123456789|1234" > memory [ 22.473497] nbytes=35,buf_size=31 [ 22.473775] 123456789|123456789|123456789| (print 30 chars) %sh: write error: Invalid argument %echo "123456789|123456789|123456789|1" > memory [ 64.916162] nbytes=32,buf_size=31 [ 64.916331] 123456789|123456789|123456789| (print 30 chars) %sh: write error: Invalid argument After: %cd /proc/pressure/ %echo "123456789|123456789|123456789|1234" > memory [ 254.837863] nbytes=35,buf_size=32 [ 254.838541] 123456789|123456789|123456789|1 (print 31 chars) %sh: write error: Invalid argument %echo "123456789|123456789|123456789|1" > memory [ 9965.714935] nbytes=32,buf_size=32 [ 9965.715096] 123456789|123456789|123456789|1 (print 31 chars) %sh: write error: Invalid argument Also remove the superfluous parentheses. Signed-off-by: Miles Chen Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190912103452.13281-1-miles.chen@mediatek.com Signed-off-by: Ingo Molnar (cherry picked from commit 4adcdcea717cb2d8436bef00dd689aa5bc76f11b) Signed-off-by: Suren Baghdasaryan Change-Id: I9371b4d5e465bb8b84ff7adf5f40f30696c6ff70 --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 3f85da02be4e..457eae0b39c5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1199,7 +1199,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - buf_size = min(nbytes, (sizeof(buf) - 1)); + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; From 8f19a0ef6a3e5f5af96e0435266a4ba5054cde61 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Dec 2019 13:35:23 -0500 Subject: [PATCH 3229/3715] UPSTREAM: sched/psi: Fix sampling error and rare div0 crashes with cgroups and high uptime Jingfeng reports rare div0 crashes in psi on systems with some uptime: [58914.066423] divide error: 0000 [#1] SMP [58914.070416] Modules linked in: ipmi_poweroff ipmi_watchdog toa overlay fuse tcp_diag inet_diag binfmt_misc aisqos(O) aisqos_hotfixes(O) [58914.083158] CPU: 94 PID: 140364 Comm: kworker/94:2 Tainted: G W OE K 4.9.151-015.ali3000.alios7.x86_64 #1 [58914.093722] Hardware name: Alibaba Alibaba Cloud ECS/Alibaba Cloud ECS, BIOS 3.23.34 02/14/2019 [58914.102728] Workqueue: events psi_update_work [58914.107258] task: ffff8879da83c280 task.stack: ffffc90059dcc000 [58914.113336] RIP: 0010:[] [] psi_update_stats+0x1c1/0x330 [58914.122183] RSP: 0018:ffffc90059dcfd60 EFLAGS: 00010246 [58914.127650] RAX: 0000000000000000 RBX: ffff8858fe98be50 RCX: 000000007744d640 [58914.134947] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00003594f700648e [58914.142243] RBP: ffffc90059dcfdf8 R08: 0000359500000000 R09: 0000000000000000 [58914.149538] R10: 0000000000000000 R11: 0000000000000000 R12: 0000359500000000 [58914.156837] R13: 0000000000000000 R14: 0000000000000000 R15: ffff8858fe98bd78 [58914.164136] FS: 0000000000000000(0000) GS:ffff887f7f380000(0000) knlGS:0000000000000000 [58914.172529] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [58914.178467] CR2: 00007f2240452090 CR3: 0000005d5d258000 CR4: 00000000007606f0 [58914.185765] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [58914.193061] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [58914.200360] PKRU: 55555554 [58914.203221] Stack: [58914.205383] ffff8858fe98bd48 00000000000002f0 0000002e81036d09 ffffc90059dcfde8 [58914.213168] ffff8858fe98bec8 0000000000000000 0000000000000000 0000000000000000 [58914.220951] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [58914.228734] Call Trace: [58914.231337] [] psi_update_work+0x22/0x60 [58914.237067] [] process_one_work+0x189/0x420 [58914.243063] [] worker_thread+0x4e/0x4b0 [58914.248701] [] ? process_one_work+0x420/0x420 [58914.254869] [] kthread+0xe6/0x100 [58914.259994] [] ? kthread_park+0x60/0x60 [58914.265640] [] ret_from_fork+0x39/0x50 [58914.271193] Code: 41 29 c3 4d 39 dc 4d 0f 42 dc <49> f7 f1 48 8b 13 48 89 c7 48 c1 [58914.279691] RIP [] psi_update_stats+0x1c1/0x330 The crashing instruction is trying to divide the observed stall time by the sampling period. The period, stored in R8, is not 0, but we are dividing by the lower 32 bits only, which are all 0 in this instance. We could switch to a 64-bit division, but the period shouldn't be that big in the first place. It's the time between the last update and the next scheduled one, and so should always be around 2s and comfortably fit into 32 bits. The bug is in the initialization of new cgroups: we schedule the first sampling event in a cgroup as an offset of sched_clock(), but fail to initialize the last_update timestamp, and it defaults to 0. That results in a bogusly large sampling period the first time we run the sampling code, and consequently we underreport pressure for the first 2s of a cgroup's life. But worse, if sched_clock() is sufficiently advanced on the system, and the user gets unlucky, the period's lower 32 bits can all be 0 and the sampling division will crash. Fix this by initializing the last update timestamp to the creation time of the cgroup, thus correctly marking the start of the first pressure sampling period in a new cgroup. Reported-by: Jingfeng Xie Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Cc: Suren Baghdasaryan Link: https://lkml.kernel.org/r/20191203183524.41378-2-hannes@cmpxchg.org Signed-off-by: Sasha Levin (cherry picked from commit 3dfbe25c27eab7c90c8a7e97b4c354a9d24dd985) Signed-off-by: Suren Baghdasaryan Change-Id: Iaada5c2f1a03cf38cbb053adde478f762ce40843 --- kernel/sched/psi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 457eae0b39c5..bb66cadba361 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -186,7 +186,8 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->avg_next_update = sched_clock() + psi_period; + group->avg_last_update = sched_clock(); + group->avg_next_update = group->avg_last_update + psi_period; INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); mutex_init(&group->avgs_lock); /* Init trigger-related members */ From f2920d064c055152e8775c786f3010bd5ec3322e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Dec 2019 13:35:24 -0500 Subject: [PATCH 3230/3715] UPSTREAM: psi: Fix a division error in psi poll() The psi window size is a u64 an can be up to 10 seconds right now, which exceeds the lower 32 bits of the variable. We currently use div_u64 for it, which is meant only for 32-bit divisors. The result is garbage pressure sampling values and even potential div0 crashes. Use div64_u64. Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Suren Baghdasaryan Cc: Jingfeng Xie Link: https://lkml.kernel.org/r/20191203183524.41378-3-hannes@cmpxchg.org Signed-off-by: Sasha Levin (cherry picked from commit c3466952ca1514158d7c16c9cfc48c27d5c5dc0f) Signed-off-by: Suren Baghdasaryan Change-Id: I49fdfd55751d1a2cde19666624c9c5d76dc78dad --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index bb66cadba361..1043170f04ed 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -483,7 +483,7 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) u32 remaining; remaining = win->size - elapsed; - growth += div_u64(win->prev_growth * remaining, win->size); + growth += div64_u64(win->prev_growth * remaining, win->size); } return growth; From ec932f3932e53f4cf5eceef1e88071a325655a2d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 3 Feb 2020 13:22:16 -0800 Subject: [PATCH 3231/3715] UPSTREAM: sched/psi: Fix OOB write when writing 0 bytes to PSI files Issuing write() with count parameter set to 0 on any file under /proc/pressure/ will cause an OOB write because of the access to buf[buf_size-1] when NUL-termination is performed. Fix this by checking for buf_size to be non-zero. Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20200203212216.7076-1-surenb@google.com (cherry picked from commit 6fcca0fa48118e6d63733eb4644c6cd880c15b8f) Bug: 148159562 Signed-off-by: Suren Baghdasaryan Change-Id: I9ec7acfc6e1083c677a95b0ea1c559ab50152873 --- kernel/sched/psi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 1043170f04ed..bc4b72527b02 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1200,6 +1200,9 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; + if (!nbytes) + return -EINVAL; + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; From 104b89080f2fad90fc67ab108d9a826015e28d73 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Jul 2016 11:04:26 +0200 Subject: [PATCH 3232/3715] KVM: x86: emulate RDPID commit fb6d4d340e0532032c808a9933eaaa7b8de435ab upstream. This is encoded as F3 0F C7 /7 with a register argument. The register argument is the second array in the group9 GroupDual, while F3 is the fourth element of a Prefix. Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 7 ++++++- arch/x86/kvm/emulate.c | 22 +++++++++++++++++++++- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 38959b173a42..1152afad524f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, { switch (func) { case 0: - entry->eax = 1; /* only one leaf currently */ + entry->eax = 7; ++*nent; break; case 1: entry->ecx = F(MOVBE); ++*nent; break; + case 7: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (index == 0) + entry->ecx = F(RDPID); + ++*nent; default: break; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 041b9b05fae1..70f3636aff11 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3539,6 +3539,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_rdpid(struct x86_emulate_ctxt *ctxt) +{ + u64 tsc_aux = 0; + + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + return emulate_gp(ctxt, 0); + ctxt->dst.val = tsc_aux; + return X86EMUL_CONTINUE; +} + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { u64 tsc = 0; @@ -4431,10 +4441,20 @@ static const struct opcode group8[] = { F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; +/* + * The "memory" destination is actually always a register, since we come + * from the register case of group9. + */ +static const struct gprefix pfx_0f_c7_7 = { + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), +}; + + static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { - N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, + GP(0, &pfx_0f_c7_7), } }; static const struct opcode group11[] = { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 809d1b031fd9..4790994854bb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12322,6 +12322,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + if (info->intercept == x86_intercept_rdtscp && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + + /* TODO: check more intercepts... */ return X86EMUL_CONTINUE; } From f16be63f67450e6964cd90878d875f1b9eb2be6e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 18 Feb 2020 18:12:41 +0000 Subject: [PATCH 3233/3715] iommu/qcom: Fix bogus detach logic commit faf305c51aeabd1ea2d7131e798ef5f55f4a7750 upstream. Currently, the implementation of qcom_iommu_domain_free() is guaranteed to do one of two things: WARN() and leak everything, or dereference NULL and crash. That alone is terrible, but in fact the whole idea of trying to track the liveness of a domain via the qcom_domain->iommu pointer as a sanity check is full of fundamentally flawed assumptions. Make things robust and actually functional by not trying to be quite so clever. Reported-by: Brian Masney Tested-by: Brian Masney Reported-by: Naresh Kamboju Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Robin Murphy Tested-by: Stephan Gerhold Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/qcom_iommu.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0..b08002851e06 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -327,21 +327,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -386,7 +384,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -397,8 +395,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, From feb0b94a36b3aa610841d13017bc8c64148a01e0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 10:14:09 +0100 Subject: [PATCH 3234/3715] ALSA: hda: Use scnprintf() for printing texts for sysfs/procfs commit 44eeb081b8630bb3ad3cd381d1ae1831463e48bb upstream. Some code in HD-audio driver calls snprintf() in a loop and still expects that the return value were actually written size, while snprintf() returns the expected would-be length instead. When the given buffer limit were small, this leads to a buffer overflow. Use scnprintf() for addressing those issues. It returns the actually written size unlike snprintf(). Cc: Link: https://lore.kernel.org/r/20200218091409.27162-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdmi_chmap.c | 2 +- sound/pci/hda/hda_codec.c | 2 +- sound/pci/hda/hda_eld.c | 2 +- sound/pci/hda/hda_sysfs.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index f21633cd9b38..acbe61b8db7b 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e3f3351da480..a6f7561e7bb9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4002,7 +4002,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index ba7fe9b6655c..864cc8c9ada0 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 9b7efece4484..2a173de7ca02 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->init_verbs.used; i++) { struct hda_verb *v = snd_array_elem(&codec->init_verbs, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->hints.used; i++) { struct hda_hint *hint = snd_array_elem(&codec->hints, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); From 0b3dc89884f3df90b5c85edc6812d0033780e1f6 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 17 Feb 2020 00:42:22 -0600 Subject: [PATCH 3235/3715] ASoC: sun8i-codec: Fix setting DAI data format commit 96781fd941b39e1f78098009344ebcd7af861c67 upstream. Use the correct mask for this two-bit field. This fixes setting the DAI data format to RIGHT_J or DSP_A. Fixes: 36c684936fae ("ASoC: Add sun8i digital audio codec") Signed-off-by: Samuel Holland Acked-by: Chen-Yu Tsai Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200217064250.15516-7-samuel@sholland.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sunxi/sun8i-codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 7a312168f864..a031f25031b4 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -71,6 +71,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -221,7 +222,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; From 344ea2525869377c7906ff53079ba1d4c388bc60 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:16:40 -0500 Subject: [PATCH 3236/3715] ecryptfs: fix a memory leak bug in parse_tag_1_packet() commit fe2e082f5da5b4a0a92ae32978f81507ef37ec66 upstream. In parse_tag_1_packet(), if tag 1 packet contains a key larger than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES, no cleanup is executed, leading to a memory leak on the allocated 'auth_tok_list_item'. To fix this issue, go to the label 'out_free' to perform the cleanup work. Cc: stable@vger.kernel.org Fixes: dddfa461fc89 ("[PATCH] eCryptfs: Public key; packet management") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/keystore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa218cd64f74..3f3ec50bf773 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n", ECRYPTFS_TAG_1_PACKET_TYPE); rc = -EINVAL; - goto out; + goto out_free; } /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ From 0db1fe06b692f5d18b6815d8f0e87b05d167ff21 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:33:54 -0500 Subject: [PATCH 3237/3715] ecryptfs: fix a memory leak bug in ecryptfs_init_messaging() commit b4a81b87a4cfe2bb26a4a943b748d96a43ef20e8 upstream. In ecryptfs_init_messaging(), if the allocation for 'ecryptfs_msg_ctx_arr' fails, the previously allocated 'ecryptfs_daemon_hash' is not deallocated, leading to a memory leak bug. To fix this issue, free 'ecryptfs_daemon_hash' before returning the error. Cc: stable@vger.kernel.org Fixes: 88b4a07e6610 ("[PATCH] eCryptfs: Public key transport mechanism") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/messaging.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 4f457d5c4933..26464f9d9b76 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; From a16888a6ad3f3f72634f02f07b97b993f5e6338d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 13 Feb 2020 16:59:15 -0800 Subject: [PATCH 3238/3715] Input: synaptics - switch T470s to RMI4 by default commit bf502391353b928e63096127e5fd8482080203f5 upstream. This supports RMI4 and everything seems to work, including the touchpad buttons. So, let's enable this by default. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200204194322.112638-1-lyude@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 111a71190547..82456816c472 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,6 +172,7 @@ static const char * const smbus_pnp_ids[] = { "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ + "LEN006c", /* T470s */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ From 659cda0d2beacb821dc84ebbd224667e02565544 Mon Sep 17 00:00:00 2001 From: Gaurav Agrawal Date: Thu, 13 Feb 2020 17:06:10 -0800 Subject: [PATCH 3239/3715] Input: synaptics - enable SMBus on ThinkPad L470 commit b8a3d819f872e0a3a0a6db0dbbcd48071042fb98 upstream. Add touchpad LEN2044 to the list, as it is capable of working with psmouse.synaptics_intertouch=1 Signed-off-by: Gaurav Agrawal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/CADdtggVzVJq5gGNmFhKSz2MBwjTpdN5YVOdr4D3Hkkv=KZRc9g@mail.gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 82456816c472..512709da9bbe 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -183,6 +183,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ "LEN200f", /* T450s */ + "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ From c73c7fd542088a1b70f7f23795c4eaf2d5ef9462 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 13 Feb 2020 17:07:47 -0800 Subject: [PATCH 3240/3715] Input: synaptics - remove the LEN0049 dmi id from topbuttonpad list commit 5179a9dfa9440c1781816e2c9a183d1d2512dc61 upstream. The Yoga 11e is using LEN0049, but it doesn't have a trackstick. Thus, there is no need to create a software top buttons row. However, it seems that the device works under SMBus, so keep it as part of the smbus_pnp_ids. Signed-off-by: Benjamin Tissoires Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200115013023.9710-1-benjamin.tissoires@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 512709da9bbe..5f764e0993a4 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -149,7 +149,6 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0042", /* Yoga */ "LEN0045", "LEN0047", - "LEN0049", "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ @@ -169,6 +168,7 @@ static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ + "LEN0049", /* Yoga 11e */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ From 333f517d48fa54aed52776bb0b881124a8fb0e58 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Feb 2020 11:22:35 -0500 Subject: [PATCH 3241/3715] ALSA: usb-audio: Apply sample rate quirk for Audioengine D1 commit 93f9d1a4ac5930654c17412e3911b46ece73755a upstream. The Audioengine D1 (0x2912:0x30c8) does support reading the sample rate, but it returns the rate in byte-reversed order. When setting sampling rate, the driver produces these warning messages: [168840.944226] usb 3-2.2: current rate 4500480 is different from the runtime rate 44100 [168854.930414] usb 3-2.2: current rate 8436480 is different from the runtime rate 48000 [168905.185825] usb 3-2.1.2: current rate 30465 is different from the runtime rate 96000 As can be seen from the hexadecimal conversion, the current rate read back is byte-reversed from the rate that was set. 44100 == 0x00ac44, 4500480 == 0x44ac00 48000 == 0x00bb80, 8436480 == 0x80bb00 96000 == 0x017700, 30465 == 0x007701 Rather than implementing a new quirk to reverse the order, just skip checking the rate to avoid spamming the log. Signed-off-by: Arvind Sankar Cc: Link: https://lore.kernel.org/r/20200211162235.1639889-1-nivedita@alum.mit.edu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 51ee7910e98c..4872c27f6054 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1151,6 +1151,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } return false; From 1f32a6a26818115a5d544e249b72c2471e575357 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:35 +0000 Subject: [PATCH 3242/3715] arm64: cpufeature: Set the FP/SIMD compat HWCAP bits properly commit 7559950aef1ab8792c50797c6c5c7c5150a02460 upstream We set the compat_elf_hwcap bits unconditionally on arm64 to include the VFP and NEON support. However, the FP/SIMD unit is optional on Arm v8 and thus could be missing. We already handle this properly in the kernel, but still advertise to the COMPAT applications that the VFP is available. Fix this to make sure we only advertise when we really have them. Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/cpufeature.c | 52 +++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c477fd34a912..6b3bb67596ae 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -41,9 +41,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -1134,17 +1132,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ - { \ - .desc = #cap, \ - .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ + +#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ + +#define __HWCAP_CAP(name, cap_type, cap) \ + .desc = name, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .hwcap_type = cap_type, \ .hwcap = cap, \ + +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + } + +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ } static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { @@ -1177,8 +1188,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), From 06fb1c6d23bba70a69cf62e3b972b3061a693401 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:36 +0000 Subject: [PATCH 3243/3715] arm64: ptrace: nofpsimd: Fail FP/SIMD regset operations commit c9d66999f064947e6b577ceacc1eb2fbca6a8d3c upstream When fp/simd is not supported on the system, fail the operations of FP/SIMD regsets. Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/ptrace.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 242527f29c41..e230b4dff960 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -624,6 +624,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -634,6 +641,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -648,6 +658,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state newstate = target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) return ret; @@ -740,6 +753,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -914,6 +928,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; if (target == current) @@ -947,6 +964,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1004,6 +1024,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, From afe95b7f56a8908c2c1cbf1e9e20058786f14b09 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 14 Feb 2020 15:19:37 +0000 Subject: [PATCH 3244/3715] arm64: nofpsimd: Handle TIF_FOREIGN_FPSTATE flag cleanly commit 52f73c383b2418f2d31b798e765ae7d596c35021 upstream We detect the absence of FP/SIMD after an incapable CPU is brought up, and by then we have kernel threads running already with TIF_FOREIGN_FPSTATE set which could be set for early userspace applications (e.g, modprobe triggered from initramfs) and init. This could cause the applications to loop forever in do_nofity_resume() as we never clear the TIF flag, once we now know that we don't support FP. Fix this by making sure that we clear the TIF_FOREIGN_FPSTATE flag for tasks which may have them set, as we would have done in the normal case, but avoiding touching the hardware state (since we don't support any). Cc: stable@vger.kernel.org # v4.14 Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f4fdf6420ac5..4cd962f6c430 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -206,8 +206,19 @@ void fpsimd_preserve_current_state(void) */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } local_bh_disable(); @@ -229,7 +240,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct fpsimd_state *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; local_bh_disable(); From 93b79ac8be3f12b306b41bfae5977b9be6737fe9 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Nov 2017 07:52:52 +0100 Subject: [PATCH 3245/3715] ARM: 8723/2: always assume the "unified" syntax for assembly code [ Upstream commit 75fea300d73ae5b18957949a53ec770daaeb6fc2 ] The GNU assembler has implemented the "unified syntax" parsing since 2005. This "unified" syntax is required when the kernel is built in Thumb2 mode. However the "unified" syntax is a mixed bag of features, including not requiring a `#' prefix with immediate operands. This leads to situations where some code builds just fine in Thumb2 mode and fails to build in ARM mode if that prefix is missing. This behavior discrepancy makes build tests less valuable, forcing both ARM and Thumb2 builds for proper coverage. Let's "fix" this issue by always using the "unified" syntax for both ARM and Thumb2 mode. Given that the documented minimum binutils version that properly builds the kernel is version 2.20 released in 2010, we can assume that any toolchain capable of building the latest kernel is also "unified syntax" capable. Whith this, a bunch of macros used to mask some differences between both syntaxes can be removed, with the side effect of making LTO easier. Suggested-by: Robin Murphy Signed-off-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 7 +--- arch/arm/Makefile | 6 ++- arch/arm/include/asm/unified.h | 77 ++-------------------------------- 3 files changed, 8 insertions(+), 82 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf69aab648fb..ba9325fc75b8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1533,12 +1533,10 @@ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K default y if CPU_THUMBONLY - select ARM_ASM_UNIFIED select ARM_UNWIND help By enabling this option, the kernel will be compiled in - Thumb-2 mode. A compiler/assembler that understand the unified - ARM-Thumb syntax is needed. + Thumb-2 mode. If unsure, say N. @@ -1573,9 +1571,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 Unless you are sure your tools don't have this problem, say Y. -config ARM_ASM_UNIFIED - bool - config ARM_PATCH_IDIV bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" depends on CPU_32v7 && !XIP_KERNEL diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 17e80f483281..234ee43b4438 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif +# Accept old syntax despite ".syntax unified" +AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: @@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else -CFLAGS_ISA :=$(call cc-option,-marm,) +CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index a91ae499614c..2c3b952be63e 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -20,8 +20,10 @@ #ifndef __ASM_UNIFIED_H #define __ASM_UNIFIED_H -#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) +#if defined(__ASSEMBLY__) .syntax unified +#else +__asm__(".syntax unified"); #endif #ifdef CONFIG_CPU_V7M @@ -64,77 +66,4 @@ #endif /* CONFIG_THUMB2_KERNEL */ -#ifndef CONFIG_ARM_ASM_UNIFIED - -/* - * If the unified assembly syntax isn't used (in ARM mode), these - * macros expand to an empty string - */ -#ifdef __ASSEMBLY__ - .macro it, cond - .endm - .macro itt, cond - .endm - .macro ite, cond - .endm - .macro ittt, cond - .endm - .macro itte, cond - .endm - .macro itet, cond - .endm - .macro itee, cond - .endm - .macro itttt, cond - .endm - .macro ittte, cond - .endm - .macro ittet, cond - .endm - .macro ittee, cond - .endm - .macro itett, cond - .endm - .macro itete, cond - .endm - .macro iteet, cond - .endm - .macro iteee, cond - .endm -#else /* !__ASSEMBLY__ */ -__asm__( -" .macro it, cond\n" -" .endm\n" -" .macro itt, cond\n" -" .endm\n" -" .macro ite, cond\n" -" .endm\n" -" .macro ittt, cond\n" -" .endm\n" -" .macro itte, cond\n" -" .endm\n" -" .macro itet, cond\n" -" .endm\n" -" .macro itee, cond\n" -" .endm\n" -" .macro itttt, cond\n" -" .endm\n" -" .macro ittte, cond\n" -" .endm\n" -" .macro ittet, cond\n" -" .endm\n" -" .macro ittee, cond\n" -" .endm\n" -" .macro itett, cond\n" -" .endm\n" -" .macro itete, cond\n" -" .endm\n" -" .macro iteet, cond\n" -" .endm\n" -" .macro iteee, cond\n" -" .endm\n"); -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_ARM_ASM_UNIFIED */ - #endif /* !__ASM_UNIFIED_H */ From 6f67ad9855dd7d3640e757d9d81f3bd88388d86e Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Sun, 26 Jan 2020 15:03:34 -0700 Subject: [PATCH 3246/3715] ext4: don't assume that mmp_nodename/bdevname have NUL commit 14c9ca0583eee8df285d68a0e6ec71053efd2228 upstream. Don't assume that the mmp_nodename and mmp_bdevname strings are NUL terminated, since they are filled in by snprintf(), which is not guaranteed to do so. Link: https://lore.kernel.org/r/1580076215-1048-1-git-send-email-adilger@dilger.ca Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mmp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 38e6a846aac1..0c042bd43246 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -154,6 +154,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -375,7 +376,8 @@ skip: /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { From e073def68e1c683b009809a045f76facec1eb600 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Feb 2020 17:35:01 -0500 Subject: [PATCH 3247/3715] ext4: fix support for inode sizes > 1024 bytes commit 4f97a68192bd33b9963b400759cef0ca5963af00 upstream. A recent commit, 9803387c55f7 ("ext4: validate the debug_want_extra_isize mount option at parse time"), moved mount-time checks around. One of those changes moved the inode size check before the blocksize variable was set to the blocksize of the file system. After 9803387c55f7 was set to the minimum allowable blocksize, which in practice on most systems would be 1024 bytes. This cuased file systems with inode sizes larger than 1024 bytes to be rejected with a message: EXT4-fs (sdXX): unsupported inode size: 4096 Fixes: 9803387c55f7 ("ext4: validate the debug_want_extra_isize mount option at parse time") Link: https://lore.kernel.org/r/20200206225252.GA3673@mit.edu Reported-by: Herbert Poetzl Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 93d8aa6ef661..e142e1f51676 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3668,6 +3668,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -3685,6 +3694,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); goto failed_mount; } /* @@ -3848,14 +3858,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } if (le32_to_cpu(es->s_log_block_size) > (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, From 418899d96606265650c80fbb3e7bc3afd95589b7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 10 Feb 2020 15:43:16 +0100 Subject: [PATCH 3248/3715] ext4: fix checksum errors with indexed dirs commit 48a34311953d921235f4d7bbd2111690d2e469cf upstream. DIR_INDEX has been introduced as a compat ext4 feature. That means that even kernels / tools that don't understand the feature may modify the filesystem. This works because for kernels not understanding indexed dir format, internal htree nodes appear just as empty directory entries. Index dir aware kernels then check the htree structure is still consistent before using the data. This all worked reasonably well until metadata checksums were introduced. The problem is that these effectively made DIR_INDEX only ro-compatible because internal htree nodes store checksums in a different place than normal directory blocks. Thus any modification ignorant to DIR_INDEX (or just clearing EXT4_INDEX_FL from the inode) will effectively cause checksum mismatch and trigger kernel errors. So we have to be more careful when dealing with indexed directories on filesystems with checksumming enabled. 1) We just disallow loading any directory inodes with EXT4_INDEX_FL when DIR_INDEX is not enabled. This is harsh but it should be very rare (it means someone disabled DIR_INDEX on existing filesystem and didn't run e2fsck), e2fsck can fix the problem, and we don't want to answer the difficult question: "Should we rather corrupt the directory more or should we ignore that DIR_INDEX feature is not set?" 2) When we find out htree structure is corrupted (but the filesystem and the directory should in support htrees), we continue just ignoring htree information for reading but we refuse to add new entries to the directory to avoid corrupting it more. Link: https://lore.kernel.org/r/20200210144316.22081-1-jack@suse.cz Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") Reviewed-by: Andreas Dilger Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/dir.c | 14 ++++++++------ fs/ext4/ext4.h | 5 ++++- fs/ext4/inode.c | 12 ++++++++++++ fs/ext4/namei.c | 7 +++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c17855fead7b..90beca85c416 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -125,12 +125,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 95ef26b39e69..fcee1f9c7fe3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2386,8 +2386,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a91b8404d3dc..57118ba82929 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4817,6 +4817,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + EXT4_ERROR_INODE(inode, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0b5c36bd5418..a7b7e0783eed 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2094,6 +2094,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); From ddf391e8ae8ff0ed1cc80fcb6f2221636d40992f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 14 Feb 2020 18:11:19 -0500 Subject: [PATCH 3249/3715] ext4: improve explanation of a mount failure caused by a misconfigured kernel commit d65d87a07476aa17df2dcb3ad18c22c154315bec upstream. If CONFIG_QFMT_V2 is not enabled, but CONFIG_QUOTA is enabled, when a user tries to mount a file system with the quota or project quota enabled, the kernel will emit a very confusing messsage: EXT4-fs warning (device vdc): ext4_enable_quotas:5914: Failed to enable quota tracking (type=0, err=-3). Please run e2fsck to fix. EXT4-fs (vdc): mount failed We will now report an explanatory message indicating which kernel configuration options have to be enabled, to avoid customer/sysadmin confusion. Link: https://lore.kernel.org/r/20200215012738.565735-1-tytso@mit.edu Google-Bug-Id: 149093531 Fixes: 7c319d328505b778 ("ext4: make quota as first class supported feature") Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e142e1f51676..5b9e7377f26e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2863,17 +2863,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { +#if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ From 841793cd072c701c373f1f1f27c91e435b8c864d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 31 Jan 2020 14:06:07 +0000 Subject: [PATCH 3250/3715] Btrfs: fix race between using extent maps and merging them commit ac05ca913e9f3871126d61da275bfe8516ff01ca upstream. We have a few cases where we allow an extent map that is in an extent map tree to be merged with other extents in the tree. Such cases include the unpinning of an extent after the respective ordered extent completed or after logging an extent during a fast fsync. This can lead to subtle and dangerous problems because when doing the merge some other task might be using the same extent map and as consequence see an inconsistent state of the extent map - for example sees the new length but has seen the old start offset. With luck this triggers a BUG_ON(), and not some silent bug, such as the following one in __do_readpage(): $ cat -n fs/btrfs/extent_io.c 3061 static int __do_readpage(struct extent_io_tree *tree, 3062 struct page *page, (...) 3127 em = __get_extent_map(inode, page, pg_offset, cur, 3128 end - cur + 1, get_extent, em_cached); 3129 if (IS_ERR_OR_NULL(em)) { 3130 SetPageError(page); 3131 unlock_extent(tree, cur, end); 3132 break; 3133 } 3134 extent_offset = cur - em->start; 3135 BUG_ON(extent_map_end(em) <= cur); (...) Consider the following example scenario, where we end up hitting the BUG_ON() in __do_readpage(). We have an inode with a size of 8KiB and 2 extent maps: extent A: file offset 0, length 4KiB, disk_bytenr = X, persisted on disk by a previous transaction extent B: file offset 4KiB, length 4KiB, disk_bytenr = X + 4KiB, not yet persisted but writeback started for it already. The extent map is pinned since there's writeback and an ordered extent in progress, so it can not be merged with extent map A yet The following sequence of steps leads to the BUG_ON(): 1) The ordered extent for extent B completes, the respective page gets its writeback bit cleared and the extent map is unpinned, at that point it is not yet merged with extent map A because it's in the list of modified extents; 2) Due to memory pressure, or some other reason, the MM subsystem releases the page corresponding to extent B - btrfs_releasepage() is called and returns 1, meaning the page can be released as it's not dirty, not under writeback anymore and the extent range is not locked in the inode's iotree. However the extent map is not released, either because we are not in a context that allows memory allocations to block or because the inode's size is smaller than 16MiB - in this case our inode has a size of 8KiB; 3) Task B needs to read extent B and ends up __do_readpage() through the btrfs_readpage() callback. At __do_readpage() it gets a reference to extent map B; 4) Task A, doing a fast fsync, calls clear_em_loggin() against extent map B while holding the write lock on the inode's extent map tree - this results in try_merge_map() being called and since it's possible to merge extent map B with extent map A now (the extent map B was removed from the list of modified extents), the merging begins - it sets extent map B's start offset to 0 (was 4KiB), but before it increments the map's length to 8KiB (4kb + 4KiB), task A is at: BUG_ON(extent_map_end(em) <= cur); The call to extent_map_end() sees the extent map has a start of 0 and a length still at 4KiB, so it returns 4KiB and 'cur' is 4KiB, so the BUG_ON() is triggered. So it's dangerous to modify an extent map that is in the tree, because some other task might have got a reference to it before and still using it, and needs to see a consistent map while using it. Generally this is very rare since most paths that lookup and use extent maps also have the file range locked in the inode's iotree. The fsync path is pretty much the only exception where we don't do it to avoid serialization with concurrent reads. Fix this by not allowing an extent map do be merged if if it's being used by tasks other then the one attempting to merge the extent map (when the reference count of the extent map is greater than 2). Reported-by: ryusuke1925 Reported-by: Koki Mitani Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206211 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_map.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2e348fb0b280..c87d673ce334 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -228,6 +228,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) struct extent_map *merge = NULL; struct rb_node *rb; + /* + * We can't modify an extent map that is in the tree and that is being + * used by another task, as it can cause that other task to see it in + * inconsistent state during the merging. We always have 1 reference for + * the tree and 1 for this task (which is unpinning the extent map or + * clearing the logging flag), so anything > 2 means it's being used by + * other tasks too. + */ + if (refcount_read(&em->refs) > 2) + return; + if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) From 893bb1890fb956ab960aec89ef51f1422b0a4405 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2020 17:12:16 +0100 Subject: [PATCH 3251/3715] btrfs: print message when tree-log replay starts commit e8294f2f6aa6208ed0923aa6d70cea3be178309a upstream. There's no logged information about tree-log replay although this is something that points to previous unclean unmount. Other filesystems report that as well. Suggested-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44b15617c7b9..d8ab9c5a8b7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2913,6 +2913,7 @@ retry_root_backup: /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; From 13b91b8b704340c2ffd1a926a16c63091660ed3d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2020 17:12:28 +0100 Subject: [PATCH 3252/3715] btrfs: log message when rw remount is attempted with unclean tree-log commit 10a3a3edc5b89a8cd095bc63495fb1e0f42047d9 upstream. A remount to a read-write filesystem is not safe when there's tree-log to be replayed. Files that could be opened until now might be affected by the changes in the tree-log. A regular mount is needed to replay the log so the filesystem presents the consistent view with the pending changes included. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3ab79fa00dc7..17a8463ef35c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1801,6 +1801,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (btrfs_super_log_root(fs_info->super_copy) != 0) { + btrfs_warn(fs_info, + "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } From cd24510b31c1fb04afcd84847664a76b9033d3c3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Feb 2020 10:42:58 +0000 Subject: [PATCH 3253/3715] arm64: ssbs: Fix context-switch when SSBS is present on all CPUs commit fca3d33d8ad61eb53eca3ee4cac476d1e31b9008 upstream. When all CPUs in the system implement the SSBS extension, the SSBS field in PSTATE is the definitive indication of the mitigation state. Further, when the CPUs implement the SSBS manipulation instructions (advertised to userspace via an HWCAP), EL0 can toggle the SSBS field directly and so we cannot rely on any shadow state such as TIF_SSBD at all. Avoid forcing the SSBS field in context-switch on such a system, and simply rely on the PSTATE register instead. Cc: Cc: Catalin Marinas Cc: Srinivas Ramana Fixes: cbdf8a189a66 ("arm64: Force SSBS on context switch") Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 243fd247d04e..ee5ce03c9315 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -354,6 +354,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) From 8d8d60598a831f5f500fbced9b89475d39202666 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 Feb 2020 09:37:41 -0800 Subject: [PATCH 3254/3715] KVM: nVMX: Use correct root level for nested EPT shadow page tables commit 148d735eb55d32848c3379e460ce365f2c1cbe4b upstream. Hardcode the EPT page-walk level for L2 to be 4 levels, as KVM's MMU currently also hardcodes the page walk level for nested EPT to be 4 levels. The L2 guest is all but guaranteed to soft hang on its first instruction when L1 is using EPT, as KVM will construct 4-level page tables and then tell hardware to use 5-level page tables. Fixes: 855feb673640 ("KVM: MMU: Add 5 level EPT & Shadow page table support.") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3791ce8d269e..997926a9121c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2968,6 +2968,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; From 7680efd75c6d8b5c67d68a2aafd5218efe72307e Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 21 Jan 2020 11:12:31 -0600 Subject: [PATCH 3255/3715] perf/x86/amd: Add missing L2 misses event spec to AMD Family 17h's event map commit 25d387287cf0330abf2aad761ce6eee67326a355 upstream. Commit 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h"), claimed L2 misses were unsupported, due to them not being found in its referenced documentation, whose link has now moved [1]. That old documentation listed PMCx064 unit mask bit 3 as: "LsRdBlkC: LS Read Block C S L X Change to X Miss." and bit 0 as: "IcFillMiss: IC Fill Miss" We now have new public documentation [2] with improved descriptions, that clearly indicate what events those unit mask bits represent: Bit 3 now clearly states: "LsRdBlkC: Data Cache Req Miss in L2 (all types)" and bit 0 is: "IcFillMiss: Instruction Cache Req Miss in L2." So we can now add support for L2 misses in perf's genericised events as PMCx064 with both the above unit masks. [1] The commit's original documentation reference, "Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors", originally available here: https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf is now available here: https://developer.amd.com/wordpress/media/2017/11/54945_PPR_Family_17h_Models_00h-0Fh.pdf [2] "Processor Programming Reference (PPR) for Family 17h Model 31h, Revision B0 Processors", available here: https://developer.amd.com/wp-content/resources/55803_0.54-PUB.pdf Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h") Reported-by: Babu Moger Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200121171232.28839-1-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index defb536aebce..c3ec535fd36b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -245,6 +245,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, From c9da8ee1491719001a444f4af688b75e72b58418 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Nov 2019 13:17:31 +0800 Subject: [PATCH 3256/3715] padata: Remove broken queue flushing commit 07928d9bfc81640bab36f5190e8725894d93b659 upstream. The function padata_flush_queues is fundamentally broken because it cannot force padata users to complete the request that is underway. IOW padata has to passively wait for the completion of any outstanding work. As it stands flushing is used in two places. Its use in padata_stop is simply unnecessary because nothing depends on the queues to be flushed afterwards. The other use in padata_replace is more substantial as we depend on it to free the old pd structure. This patch instead uses the pd->refcnt to dynamically free the pd structure once all requests are complete. Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively") Cc: Signed-off-by: Herbert Xu Reviewed-by: Daniel Jordan Signed-off-by: Herbert Xu [dj: leave "pd->pinst = pinst" assignment in padata_alloc_pd()] Signed-off-by: Daniel Jordan Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 87540ce72aea..528a251217df 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -34,6 +34,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -292,6 +294,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -301,6 +304,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -310,9 +315,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -435,7 +443,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); pd->pinst = pinst; spin_lock_init(&pd->lock); @@ -460,31 +468,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - del_timer_sync(&pd->timer); - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -498,10 +481,6 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ @@ -522,8 +501,8 @@ static void padata_replace(struct padata_instance *pinst, if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + if (atomic_dec_and_test(&pd_old->refcnt)) + padata_free_pd(pd_old); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, From d0a06285fee84381d2c054549f74ff840c957070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 2 Mar 2018 11:07:26 +0100 Subject: [PATCH 3257/3715] serial: imx: ensure that RX irqs are off if RX is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76821e222c189b81d553b855ee7054340607eb46 upstream. Make sure that UCR1.RXDMAEN and UCR1.ATDMAEN (for the DMA case) and UCR1.RRDYEN (for the PIO case) are off iff UCR1.RXEN is disabled. This ensures that the fifo isn't read with RX disabled which results in an exception. Signed-off-by: Uwe Kleine-König [Backport to v4.14] Signed-off-by: Frieder Schrempf Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 118 ++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 39 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a81a5be0cf7a..31e1e32c62c9 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -80,7 +80,7 @@ #define UCR1_IDEN (1<<12) /* Idle condition interrupt */ #define UCR1_ICD_REG(x) (((x) & 3) << 10) /* idle condition detect */ #define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */ -#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */ +#define UCR1_RXDMAEN (1<<8) /* Recv ready DMA enable */ #define UCR1_IREN (1<<7) /* Infrared interface enable */ #define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */ #define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */ @@ -352,6 +352,30 @@ static void imx_port_rts_auto(struct imx_port *sport, unsigned long *ucr2) *ucr2 |= UCR2_CTSC; } +/* + * interrupts disabled on entry + */ +static void imx_start_rx(struct uart_port *port) +{ + struct imx_port *sport = (struct imx_port *)port; + unsigned int ucr1, ucr2; + + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + + ucr2 |= UCR2_RXEN; + + if (sport->dma_is_enabled) { + ucr1 |= UCR1_RXDMAEN | UCR1_ATDMAEN; + } else { + ucr1 |= UCR1_RRDYEN; + } + + /* Write UCR2 first as it includes RXEN */ + writel(ucr2, port->membase + UCR2); + writel(ucr1, port->membase + UCR1); +} + /* * interrupts disabled on entry */ @@ -378,9 +402,10 @@ static void imx_stop_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - temp |= UCR2_RXEN; writel(temp, port->membase + UCR2); + imx_start_rx(port); + temp = readl(port->membase + UCR4); temp &= ~UCR4_TCEN; writel(temp, port->membase + UCR4); @@ -393,7 +418,7 @@ static void imx_stop_tx(struct uart_port *port) static void imx_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - unsigned long temp; + unsigned long ucr1, ucr2; if (sport->dma_is_enabled && sport->dma_is_rxing) { if (sport->port.suspended) { @@ -404,12 +429,18 @@ static void imx_stop_rx(struct uart_port *port) } } - temp = readl(sport->port.membase + UCR2); - writel(temp & ~UCR2_RXEN, sport->port.membase + UCR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); - /* disable the `Receiver Ready Interrrupt` */ - temp = readl(sport->port.membase + UCR1); - writel(temp & ~UCR1_RRDYEN, sport->port.membase + UCR1); + if (sport->dma_is_enabled) { + ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN); + } else { + ucr1 &= ~UCR1_RRDYEN; + } + writel(ucr1, port->membase + UCR1); + + ucr2 &= ~UCR2_RXEN; + writel(ucr2, port->membase + UCR2); } /* @@ -581,10 +612,11 @@ static void imx_start_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - temp &= ~UCR2_RXEN; writel(temp, port->membase + UCR2); + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + imx_stop_rx(port); + /* enable transmitter and shifter empty irq */ temp = readl(port->membase + UCR4); temp |= UCR4_TCEN; @@ -1206,7 +1238,7 @@ static void imx_enable_dma(struct imx_port *sport) /* set UCR1 */ temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; + temp |= UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); @@ -1224,7 +1256,7 @@ static void imx_disable_dma(struct imx_port *sport) /* clear UCR1 */ temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); + temp &= ~(UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); /* clear UCR2 */ @@ -1289,11 +1321,9 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD | USR1_DTRD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RRDYEN | UCR1_UARTEN; + temp &= ~UCR1_RRDYEN; + temp |= UCR1_UARTEN; if (sport->have_rtscts) temp |= UCR1_RTSDEN; @@ -1332,14 +1362,13 @@ static int imx_startup(struct uart_port *port) */ imx_enable_ms(&sport->port); - /* - * Start RX DMA immediately instead of waiting for RX FIFO interrupts. - * In our iMX53 the average delay for the first reception dropped from - * approximately 35000 microseconds to 1000 microseconds. - */ - if (sport->dma_is_enabled) { - imx_disable_rx_int(sport); + if (sport->dma_is_inited) { + imx_enable_dma(sport); start_rx_dma(sport); + } else { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); } spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1386,7 +1415,8 @@ static void imx_shutdown(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | + UCR1_RXDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1659,7 +1689,7 @@ static int imx_poll_init(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; unsigned long flags; - unsigned long temp; + unsigned long ucr1, ucr2; int retval; retval = clk_prepare_enable(sport->clk_ipg); @@ -1673,16 +1703,29 @@ static int imx_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); - temp = readl(sport->port.membase + UCR1); - if (is_imx1_uart(sport)) - temp |= IMX1_UCR1_UARTCLKEN; - temp |= UCR1_UARTEN | UCR1_RRDYEN; - temp &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN); - writel(temp, sport->port.membase + UCR1); + /* + * Be careful about the order of enabling bits here. First enable the + * receiver (UARTEN + RXEN) and only then the corresponding irqs. + * This prevents that a character that already sits in the RX fifo is + * triggering an irq but the try to fetch it from there results in an + * exception because UARTEN or RXEN is still off. + */ + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); + if (is_imx1_uart(sport)) + ucr1 |= IMX1_UCR1_UARTCLKEN; + + ucr1 |= UCR1_UARTEN; + ucr1 &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN | UCR1_RRDYEN); + + ucr2 |= UCR2_RXEN; + + writel(ucr1, sport->port.membase + UCR1); + writel(ucr2, sport->port.membase + UCR2); + + /* now enable irqs */ + writel(ucr1 | UCR1_RRDYEN, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1742,11 +1785,8 @@ static int imx_rs485_config(struct uart_port *port, /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) { - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); - } + rs485conf->flags & SER_RS485_RX_DURING_TX) + imx_start_rx(port); port->rs485 = *rs485conf; From 55f5f2c1f39320358df3dcf026480a29a44e98be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 18 Feb 2018 22:02:44 +0100 Subject: [PATCH 3258/3715] serial: imx: Only handle irqs that are actually enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 437768962f754d9501e5ba4d98b1f2a89dc62028 upstream. Handling an irq that isn't enabled can have some undesired side effects. Some of these are mentioned in the newly introduced code comment. Some of the irq sources already had their handling right, some don't. Handle them all in the same consistent way. The change for USR1_RRDY and USR1_AGTIM drops the check for dma_is_enabled. This is correct as UCR1_RRDYEN and UCR2_ATEN are always off if dma is enabled. Signed-off-by: Uwe Kleine-König Reviewed-by: Shawn Guo [Backport to v4.14] Signed-off-by: Frieder Schrempf Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 53 +++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 31e1e32c62c9..969497599e88 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -843,14 +843,42 @@ static void imx_mctrl_check(struct imx_port *sport) static irqreturn_t imx_int(int irq, void *dev_id) { struct imx_port *sport = dev_id; - unsigned int sts; - unsigned int sts2; + unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; - sts = readl(sport->port.membase + USR1); - sts2 = readl(sport->port.membase + USR2); + usr1 = readl(sport->port.membase + USR1); + usr2 = readl(sport->port.membase + USR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); + ucr3 = readl(sport->port.membase + UCR3); + ucr4 = readl(sport->port.membase + UCR4); - if (sts & (USR1_RRDY | USR1_AGTIM)) { + /* + * Even if a condition is true that can trigger an irq only handle it if + * the respective irq source is enabled. This prevents some undesired + * actions, for example if a character that sits in the RX FIFO and that + * should be fetched via DMA is tried to be fetched using PIO. Or the + * receiver is currently off and so reading from URXD0 results in an + * exception. So just mask the (raw) status bits for disabled irqs. + */ + if ((ucr1 & UCR1_RRDYEN) == 0) + usr1 &= ~USR1_RRDY; + if ((ucr2 & UCR2_ATEN) == 0) + usr1 &= ~USR1_AGTIM; + if ((ucr1 & UCR1_TXMPTYEN) == 0) + usr1 &= ~USR1_TRDY; + if ((ucr4 & UCR4_TCEN) == 0) + usr2 &= ~USR2_TXDC; + if ((ucr3 & UCR3_DTRDEN) == 0) + usr1 &= ~USR1_DTRD; + if ((ucr1 & UCR1_RTSDEN) == 0) + usr1 &= ~USR1_RTSD; + if ((ucr3 & UCR3_AWAKEN) == 0) + usr1 &= ~USR1_AWAKE; + if ((ucr4 & UCR4_OREN) == 0) + usr2 &= ~USR2_ORE; + + if (usr1 & (USR1_RRDY | USR1_AGTIM)) { if (sport->dma_is_enabled) imx_dma_rxint(sport); else @@ -858,18 +886,15 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if ((sts & USR1_TRDY && - readl(sport->port.membase + UCR1) & UCR1_TXMPTYEN) || - (sts2 & USR2_TXDC && - readl(sport->port.membase + UCR4) & UCR4_TCEN)) { + if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) { imx_txint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_DTRD) { + if (usr1 & USR1_DTRD) { unsigned long flags; - if (sts & USR1_DTRD) + if (usr1 & USR1_DTRD) writel(USR1_DTRD, sport->port.membase + USR1); spin_lock_irqsave(&sport->port.lock, flags); @@ -879,17 +904,17 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if (sts & USR1_RTSD) { + if (usr1 & USR1_RTSD) { imx_rtsint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_AWAKE) { + if (usr1 & USR1_AWAKE) { writel(USR1_AWAKE, sport->port.membase + USR1); ret = IRQ_HANDLED; } - if (sts2 & USR2_ORE) { + if (usr2 & USR2_ORE) { sport->port.icount.overrun++; writel(USR2_ORE, sport->port.membase + USR2); ret = IRQ_HANDLED; From dda7557605b53d36a59cac5680f14e83bc06c3fa Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Feb 2020 08:10:33 -0500 Subject: [PATCH 3259/3715] IB/hfi1: Close window for pq and request coliding commit be8638344c70bf492963ace206a9896606b6922d upstream. Cleaning up a pq can result in the following warning and panic: WARNING: CPU: 52 PID: 77418 at lib/list_debug.c:53 __list_del_entry+0x63/0xd0 list_del corruption, ffff88cb2c6ac068->next is LIST_POISON1 (dead000000000100) Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_fmt+0x5f/0x80 [] __list_del_entry+0x63/0xd0 [] list_del+0xd/0x30 [] kmem_cache_destroy+0x50/0x110 [] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [] hfi1_file_close+0x70/0x1e0 [hfi1] [] __fput+0xec/0x260 [] ____fput+0xe/0x10 [] task_work_run+0xbb/0xe0 [] do_notify_resume+0xa5/0xc0 [] int_signal+0x12/0x17 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] kmem_cache_close+0x7e/0x300 PGD 2cdab19067 PUD 2f7bfdb067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 task: ffff88cc26db9040 ti: ffff88b5393a8000 task.ti: ffff88b5393a8000 RIP: 0010:[] [] kmem_cache_close+0x7e/0x300 RSP: 0018:ffff88b5393abd60 EFLAGS: 00010287 RAX: 0000000000000000 RBX: ffff88cb2c6ac000 RCX: 0000000000000003 RDX: 0000000000000400 RSI: 0000000000000400 RDI: ffffffff9095b800 RBP: ffff88b5393abdb0 R08: ffffffff9095b808 R09: ffffffff8ff77c19 R10: ffff88b73ce1f160 R11: ffffddecddde9800 R12: ffff88cb2c6ac000 R13: 000000000000000c R14: ffff88cf3fdca780 R15: 0000000000000000 FS: 00002aaaaab52500(0000) GS:ffff88b73ce00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000002d27664000 CR4: 00000000007607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: [] __kmem_cache_shutdown+0x14/0x80 [] kmem_cache_destroy+0x58/0x110 [] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [] hfi1_file_close+0x70/0x1e0 [hfi1] [] __fput+0xec/0x260 [] ____fput+0xe/0x10 [] task_work_run+0xbb/0xe0 [] do_notify_resume+0xa5/0xc0 [] int_signal+0x12/0x17 Code: 00 00 ba 00 04 00 00 0f 4f c2 3d 00 04 00 00 89 45 bc 0f 84 e7 01 00 00 48 63 45 bc 49 8d 04 c4 48 89 45 b0 48 8b 80 c8 00 00 00 <48> 8b 78 10 48 89 45 c0 48 83 c0 10 48 89 45 d0 48 8b 17 48 39 RIP [] kmem_cache_close+0x7e/0x300 RSP CR2: 0000000000000010 The panic is the result of slab entries being freed during the destruction of the pq slab. The code attempts to quiesce the pq, but looking for n_req == 0 doesn't account for new requests. Fix the issue by using SRCU to get a pq pointer and adjust the pq free logic to NULL the fd pq pointer prior to the quiesce. Fixes: e87473bc1b6c ("IB/hfi1: Only set fd pointer when base context is completely initialized") Link: https://lore.kernel.org/r/20200210131033.87408.81174.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/file_ops.c | 52 ++++++++++++++--------- drivers/infiniband/hw/hfi1/hfi.h | 5 ++- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 -- drivers/infiniband/hw/hfi1/user_sdma.c | 17 +++++--- 4 files changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 76861a8b5c1e..b3ab803bf8b1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -195,23 +195,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -417,21 +418,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -449,6 +459,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -824,6 +835,7 @@ done: if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index af550c1767e3..810ef5114772 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1353,10 +1353,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b38e3808836c..c6d085e1c10d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 4854a4a453b5..f23d47194c12 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; From 57456970b8c3d0f3591e92ce197be4593f5c493b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 12 Feb 2020 10:06:51 +0200 Subject: [PATCH 3260/3715] RDMA/core: Fix protection fault in get_pkey_idx_qp_list commit 1dd017882e01d2fcd9c5dbbf1eb376211111c393 upstream. We don't need to set pkey as valid in case that user set only one of pkey index or port number, otherwise it will be resulted in NULL pointer dereference while accessing to uninitialized pkey list. The following crash from Syzkaller revealed it. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 14753 Comm: syz-executor.2 Not tainted 5.5.0-rc5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 RIP: 0010:get_pkey_idx_qp_list+0x161/0x2d0 Code: 01 00 00 49 8b 5e 20 4c 39 e3 0f 84 b9 00 00 00 e8 e4 42 6e fe 48 8d 7b 10 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 01 0f 8e d0 00 00 00 48 8d 7d 04 48 b8 RSP: 0018:ffffc9000bc6f950 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff82c8bdec RDX: 0000000000000002 RSI: ffffc900030a8000 RDI: 0000000000000010 RBP: ffff888112c8ce80 R08: 0000000000000004 R09: fffff5200178df1f R10: 0000000000000001 R11: fffff5200178df1f R12: ffff888115dc4430 R13: ffff888115da8498 R14: ffff888115dc4410 R15: ffff888115da8000 FS: 00007f20777de700(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2f721000 CR3: 00000001173ca002 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: port_pkey_list_insert+0xd7/0x7c0 ib_security_modify_qp+0x6fa/0xfc0 _ib_modify_qp+0x8c4/0xbf0 modify_qp+0x10da/0x16d0 ib_uverbs_modify_qp+0x9a/0x100 ib_uverbs_write+0xaa5/0xdf0 __vfs_write+0x7c/0x100 vfs_write+0x168/0x4a0 ksys_write+0xc8/0x200 do_syscall_64+0x9c/0x390 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Link: https://lore.kernel.org/r/20200212080651.GB679970@unreal Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Message-Id: <20200212080651.GB679970@unreal> Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/security.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a3dd88c57be7..9b8276691329 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -338,22 +338,16 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = + (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : + qp_attr->pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) From 793a70864ba18df05c4e3be52b0d542e79ea562e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 8 Feb 2020 07:08:59 -0700 Subject: [PATCH 3261/3715] s390/time: Fix clk type in get_tod_clock commit 0f8a206df7c920150d2aa45574fba0ab7ff6be4f upstream. Clang warns: In file included from ../arch/s390/boot/startup.c:3: In file included from ../include/linux/elf.h:5: In file included from ../arch/s390/include/asm/elf.h:132: In file included from ../include/linux/compat.h:10: In file included from ../include/linux/time.h:74: In file included from ../include/linux/time32.h:13: In file included from ../include/linux/timex.h:65: ../arch/s390/include/asm/timex.h:160:20: warning: passing 'unsigned char [16]' to parameter of type 'char *' converts between pointers to integer types with different sign [-Wpointer-sign] get_tod_clock_ext(clk); ^~~ ../arch/s390/include/asm/timex.h:149:44: note: passing argument to parameter 'clk' here static inline void get_tod_clock_ext(char *clk) ^ Change clk's type to just be char so that it matches what happens in get_tod_clock_ext. Fixes: 57b28f66316d ("[S390] s390_hypfs: Add new attributes") Link: https://github.com/ClangBuiltLinux/linux/issues/861 Link: http://lkml.kernel.org/r/20200208140858.47970-1-natechancellor@gmail.com Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 2dc9eb4e1acc..b6a4ce9dafaf 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); From ab9444f69c53374b04906f9a49976048531456a5 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 21 Jan 2020 11:01:25 -0800 Subject: [PATCH 3262/3715] perf/x86/intel: Fix inaccurate period in context switch for auto-reload commit f861854e1b435b27197417f6f90d87188003cb24 upstream. Perf doesn't take the left period into account when auto-reload is enabled with fixed period sampling mode in context switch. Here is the MSR trace of the perf command as below. (The MSR trace is simplified from a ftrace log.) #perf record -e cycles:p -c 2000000 -- ./triad_loop //The MSR trace of task schedule out //perf disable all counters, disable PEBS, disable GP counter 0, //read GP counter 0, and re-enable all counters. //The counter 0 stops at 0xfffffff82840 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 0 write_msr: MSR_P6_EVNTSEL0(186), value 40003003c rdpmc: 0, value fffffff82840 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff //The MSR trace of the same task schedule in again //perf disable all counters, enable and set GP counter 0, //enable PEBS, and re-enable all counters. //0xffffffe17b80 (-2000000) is written to GP counter 0. write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PMC0(4c1), value ffffffe17b80 write_msr: MSR_P6_EVNTSEL0(186), value 40043003c write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 1 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff When the same task schedule in again, the counter should starts from previous left. However, it starts from the fixed period -2000000 again. A special variant of intel_pmu_save_and_restart() is used for auto-reload, which doesn't update the hwc->period_left. When the monitored task schedules in again, perf doesn't know the left period. The fixed period is used, which is inaccurate. With auto-reload, the counter always has a negative counter value. So the left period is -value. Update the period_left in intel_pmu_save_and_restart_reload(). With the patch: //The MSR trace of task schedule out write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 0 write_msr: MSR_P6_EVNTSEL0(186), value 40003003c rdpmc: 0, value ffffffe25cbc write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff //The MSR trace of the same task schedule in again write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value 0 write_msr: MSR_IA32_PMC0(4c1), value ffffffe25cbc write_msr: MSR_P6_EVNTSEL0(186), value 40043003c write_msr: MSR_IA32_PEBS_ENABLE(3f1), value 1 write_msr: MSR_CORE_PERF_GLOBAL_CTRL(38f), value f000000ff Fixes: d31fc13fdcb2 ("perf/x86/intel: Fix event update for auto-reload") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200121190125.3389-1-kan.liang@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3310f9f6c3e1..550b7814ef92 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1368,6 +1368,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; From 1614d08d34152295c1bb7690d3b16b66bdbb86b4 Mon Sep 17 00:00:00 2001 From: Mike Jones Date: Tue, 28 Jan 2020 10:59:59 -0700 Subject: [PATCH 3263/3715] hwmon: (pmbus/ltc2978) Fix PMBus polling of MFR_COMMON definitions. commit cf2b012c90e74e85d8aea7d67e48868069cfee0c upstream. Change 21537dc driver PMBus polling of MFR_COMMON from bits 5/4 to bits 6/5. This fixs a LTC297X family bug where polling always returns not busy even when the part is busy. This fixes a LTC388X and LTM467X bug where polling used PEND and NOT_IN_TRANS, and BUSY was not polled, which can lead to NACKing of commands. LTC388X and LTM467X modules now poll BUSY and PEND, increasing reliability by eliminating NACKing of commands. Signed-off-by: Mike Jones Link: https://lore.kernel.org/r/1580234400-2829-2-git-send-email-michael-a1.jones@analog.com Fixes: e04d1ce9bbb49 ("hwmon: (ltc2978) Add polling for chips requiring it") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/ltc2978.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 58b789c28b48..94eea2ac6251 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882, #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */ -#define LTC_NOT_BUSY BIT(5) -#define LTC_NOT_PENDING BIT(4) +#define LTC_NOT_BUSY BIT(6) +#define LTC_NOT_PENDING BIT(5) /* * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which From bb43eea5d29ee1880e920fd171b7998efd849ebc Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 18 Feb 2020 18:58:55 +0800 Subject: [PATCH 3264/3715] jbd2: move the clearing of b_modified flag to the journal_unmap_buffer() [ Upstream commit 6a66a7ded12baa6ebbb2e3e82f8cb91382814839 ] There is no need to delay the clearing of b_modified flag to the transaction committing time when unmapping the journalled buffer, so just move it to the journal_unmap_buffer(). Link: https://lore.kernel.org/r/20200213063821.30455-2-yi.zhang@huawei.com Reviewed-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 43 +++++++++++++++---------------------------- fs/jbd2/transaction.c | 10 ++++++---- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7dd613392592..89cbf45a1dcd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -973,34 +973,21 @@ restart_loop: * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { - /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. - */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; - } + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7fe422eced89..f2ff141a4479 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2231,14 +2231,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, return -EBUSY; } /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. + * OK, buffer won't be reachable after truncate. We just clear + * b_modified to not confuse transaction credit accounting, and + * set j_next_transaction to the running transaction (if there + * is one) and mark buffer as freed so that commit code knows + * it should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); From 800f71280e07d344af90ad9ce9d84e28841e0e9b Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 18 Feb 2020 18:58:56 +0800 Subject: [PATCH 3265/3715] jbd2: do not clear the BH_Mapped flag when forgetting a metadata buffer [ Upstream commit c96dceeabf765d0b1b1f29c3bf50a5c01315b820 ] Commit 904cdbd41d74 ("jbd2: clear dirty flag when revoking a buffer from an older transaction") set the BH_Freed flag when forgetting a metadata buffer which belongs to the committing transaction, it indicate the committing process clear dirty bits when it is done with the buffer. But it also clear the BH_Mapped flag at the same time, which may trigger below NULL pointer oops when block_size < PAGE_SIZE. rmdir 1 kjournald2 mkdir 2 jbd2_journal_commit_transaction commit transaction N jbd2_journal_forget set_buffer_freed(bh1) jbd2_journal_commit_transaction commit transaction N+1 ... clear_buffer_mapped(bh1) ext4_getblk(bh2 ummapped) ... grow_dev_page init_page_buffers bh1->b_private=NULL bh2->b_private=NULL jbd2_journal_put_journal_head(jh1) __journal_remove_journal_head(hb1) jh1 is NULL and trigger oops *) Dir entry block bh1 and bh2 belongs to one page, and the bh2 has already been unmapped. For the metadata buffer we forgetting, we should always keep the mapped flag and clear the dirty flags is enough, so this patch pick out the these buffers and keep their BH_Mapped flag. Link: https://lore.kernel.org/r/20200213063821.30455-3-yi.zhang@huawei.com Fixes: 904cdbd41d74 ("jbd2: clear dirty flag when revoking a buffer from an older transaction") Reviewed-by: Jan Kara Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 89cbf45a1dcd..cb0da3d4adc0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -982,12 +982,29 @@ restart_loop: * pagesize and it is attached to the last partial page. */ if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + clear_buffer_freed(bh); clear_buffer_jbddirty(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; + + /* + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * out hands, which alse need not to clear more bits + * because the page and buffers will be freed and can + * never be reused once we are done with them. + */ + mapping = READ_ONCE(bh->b_page->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { From 2011a54b6161cdeb42ec8d7843170977701f97b6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Wed, 18 Sep 2019 22:06:58 +0530 Subject: [PATCH 3266/3715] scsi: qla2xxx: fix a potential NULL pointer dereference commit 35a79a63517981a8aea395497c548776347deda8 upstream. alloc_workqueue is not checked for errors and as a result a potential NULL dereference could occur. Link: https://lore.kernel.org/r/1568824618-4366-1-git-send-email-allen.pais@oracle.com Signed-off-by: Allen Pais Reviewed-by: Martin Wilck Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen [Ajay: Modified to apply on v4.14.y] Signed-off-by: Ajay Kaher Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5f9d4dbc4a98..d4024015f859 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3178,6 +3178,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->mqenable) { bool mq = false; From aedede2e024cdbb1be3055eaf8683270b5eaccc2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Feb 2020 06:54:06 -0500 Subject: [PATCH 3267/3715] Revert "KVM: nVMX: Use correct root level for nested EPT shadow page tables" This reverts commit 740d876bd9565857a695ce7c05efda4eba5bc585. Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 997926a9121c..3791ce8d269e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2968,9 +2968,6 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { - /* Nested EPT currently only supports 4-level walks. */ - if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) - return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; From 70eb01d7e5c58d755c70dad29cba7465988965e0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Feb 2020 06:54:12 -0500 Subject: [PATCH 3268/3715] Revert "KVM: VMX: Add non-canonical check on writes to RTIT address MSRs" This reverts commit 57211b7366cc2abf784c35e537b256e7fcddc91e. This patch isn't needed on 4.19 and older. Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 8033 ---------------------------------------- 1 file changed, 8033 deletions(-) delete mode 100644 arch/x86/kvm/vmx/vmx.c diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c deleted file mode 100644 index 3791ce8d269e..000000000000 --- a/arch/x86/kvm/vmx/vmx.c +++ /dev/null @@ -1,8033 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Authors: - * Avi Kivity - * Yaniv Kamay - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "capabilities.h" -#include "cpuid.h" -#include "evmcs.h" -#include "irq.h" -#include "kvm_cache_regs.h" -#include "lapic.h" -#include "mmu.h" -#include "nested.h" -#include "ops.h" -#include "pmu.h" -#include "trace.h" -#include "vmcs.h" -#include "vmcs12.h" -#include "vmx.h" -#include "x86.h" - -MODULE_AUTHOR("Qumranet"); -MODULE_LICENSE("GPL"); - -static const struct x86_cpu_id vmx_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_VMX), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); - -bool __read_mostly enable_vpid = 1; -module_param_named(vpid, enable_vpid, bool, 0444); - -static bool __read_mostly enable_vnmi = 1; -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); - -bool __read_mostly flexpriority_enabled = 1; -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); - -bool __read_mostly enable_ept = 1; -module_param_named(ept, enable_ept, bool, S_IRUGO); - -bool __read_mostly enable_unrestricted_guest = 1; -module_param_named(unrestricted_guest, - enable_unrestricted_guest, bool, S_IRUGO); - -bool __read_mostly enable_ept_ad_bits = 1; -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); - -static bool __read_mostly emulate_invalid_guest_state = true; -module_param(emulate_invalid_guest_state, bool, S_IRUGO); - -static bool __read_mostly fasteoi = 1; -module_param(fasteoi, bool, S_IRUGO); - -static bool __read_mostly enable_apicv = 1; -module_param(enable_apicv, bool, S_IRUGO); - -/* - * If nested=1, nested virtualization is supported, i.e., guests may use - * VMX and be a hypervisor for its own guests. If nested=0, guests may not - * use VMX instructions. - */ -static bool __read_mostly nested = 1; -module_param(nested, bool, S_IRUGO); - -bool __read_mostly enable_pml = 1; -module_param_named(pml, enable_pml, bool, S_IRUGO); - -static bool __read_mostly dump_invalid_vmcs = 0; -module_param(dump_invalid_vmcs, bool, 0644); - -#define MSR_BITMAP_MODE_X2APIC 1 -#define MSR_BITMAP_MODE_X2APIC_APICV 2 - -#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL - -/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ -static int __read_mostly cpu_preemption_timer_multi; -static bool __read_mostly enable_preemption_timer = 1; -#ifdef CONFIG_X86_64 -module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); -#endif - -#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE -#define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) -#define KVM_CR4_GUEST_OWNED_BITS \ - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) - -#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE -#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) -#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) - -#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) - -#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ - RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ - RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ - RTIT_STATUS_BYTECNT)) - -#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \ - (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f) - -/* - * These 2 parameters are used to config the controls for Pause-Loop Exiting: - * ple_gap: upper bound on the amount of time between two successive - * executions of PAUSE in a loop. Also indicate if ple enabled. - * According to test, this time is usually smaller than 128 cycles. - * ple_window: upper bound on the amount of time a guest is allowed to execute - * in a PAUSE loop. Tests indicate that most spinlocks are held for - * less than 2^12 cycles - * Time is measured based on a counter that runs at the same rate as the TSC, - * refer SDM volume 3b section 21.6.13 & 22.1.3. - */ -static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; -module_param(ple_gap, uint, 0444); - -static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; -module_param(ple_window, uint, 0444); - -/* Default doubles per-vcpu window every exit. */ -static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; -module_param(ple_window_grow, uint, 0444); - -/* Default resets per-vcpu window every exit to ple_window. */ -static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; -module_param(ple_window_shrink, uint, 0444); - -/* Default is to compute the maximum so we can never overflow. */ -static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -module_param(ple_window_max, uint, 0444); - -/* Default is SYSTEM mode, 1 for host-guest mode */ -int __read_mostly pt_mode = PT_MODE_SYSTEM; -module_param(pt_mode, int, S_IRUGO); - -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); -static DEFINE_MUTEX(vmx_l1d_flush_mutex); - -/* Storage for pre module init parameter parsing */ -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; - -static const struct { - const char *option; - bool for_parse; -} vmentry_l1d_param[] = { - [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, - [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, - [VMENTER_L1D_FLUSH_COND] = {"cond", true}, - [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, - [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, - [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, -}; - -#define L1D_CACHE_ORDER 4 -static void *vmx_l1d_flush_pages; - -static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) -{ - struct page *page; - unsigned int i; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - - if (!enable_ept) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; - return 0; - } - - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; - - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - } - - /* If set to auto use the default l1tf mitigation method */ - if (l1tf == VMENTER_L1D_FLUSH_AUTO) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - l1tf = VMENTER_L1D_FLUSH_NEVER; - break; - case L1TF_MITIGATION_FLUSH_NOWARN: - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - l1tf = VMENTER_L1D_FLUSH_COND; - break; - case L1TF_MITIGATION_FULL: - case L1TF_MITIGATION_FULL_FORCE: - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - break; - } - } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - } - - if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && - !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { - /* - * This allocation for vmx_l1d_flush_pages is not tied to a VM - * lifetime and so should not be charged to a memcg. - */ - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); - if (!page) - return -ENOMEM; - vmx_l1d_flush_pages = page_address(page); - - /* - * Initialize each page with a different pattern in - * order to protect against KSM in the nested - * virtualization case. - */ - for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { - memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, - PAGE_SIZE); - } - } - - l1tf_vmx_mitigation = l1tf; - - if (l1tf != VMENTER_L1D_FLUSH_NEVER) - static_branch_enable(&vmx_l1d_should_flush); - else - static_branch_disable(&vmx_l1d_should_flush); - - if (l1tf == VMENTER_L1D_FLUSH_COND) - static_branch_enable(&vmx_l1d_flush_cond); - else - static_branch_disable(&vmx_l1d_flush_cond); - return 0; -} - -static int vmentry_l1d_flush_parse(const char *s) -{ - unsigned int i; - - if (s) { - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (vmentry_l1d_param[i].for_parse && - sysfs_streq(s, vmentry_l1d_param[i].option)) - return i; - } - } - return -EINVAL; -} - -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) -{ - int l1tf, ret; - - l1tf = vmentry_l1d_flush_parse(s); - if (l1tf < 0) - return l1tf; - - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - - /* - * Has vmx_init() run already? If not then this is the pre init - * parameter parsing. In that case just store the value and let - * vmx_init() do the proper setup after enable_ept has been - * established. - */ - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { - vmentry_l1d_flush_param = l1tf; - return 0; - } - - mutex_lock(&vmx_l1d_flush_mutex); - ret = vmx_setup_l1d_flush(l1tf); - mutex_unlock(&vmx_l1d_flush_mutex); - return ret; -} - -static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) -{ - if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) - return sprintf(s, "???\n"); - - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); -} - -static const struct kernel_param_ops vmentry_l1d_flush_ops = { - .set = vmentry_l1d_flush_set, - .get = vmentry_l1d_flush_get, -}; -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); - -static bool guest_state_valid(struct kvm_vcpu *vcpu); -static u32 vmx_segment_access_rights(struct kvm_segment *var); -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type); - -void vmx_vmexit(void); - -#define vmx_insn_failed(fmt...) \ -do { \ - WARN_ONCE(1, fmt); \ - pr_warn_ratelimited(fmt); \ -} while (0) - -asmlinkage void vmread_error(unsigned long field, bool fault) -{ - if (fault) - kvm_spurious_fault(); - else - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); -} - -noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); -} - -noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) -{ - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", - ext, vpid, gva); -} - -noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) -{ - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", - ext, eptp, gpa); -} - -static DEFINE_PER_CPU(struct vmcs *, vmxarea); -DEFINE_PER_CPU(struct vmcs *, current_vmcs); -/* - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. - */ -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); - -/* - * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we - * can find which vCPU should be waken up. - */ -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); - -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); -static DEFINE_SPINLOCK(vmx_vpid_lock); - -struct vmcs_config vmcs_config; -struct vmx_capability vmx_capability; - -#define VMX_SEGMENT_FIELD(seg) \ - [VCPU_SREG_##seg] = { \ - .selector = GUEST_##seg##_SELECTOR, \ - .base = GUEST_##seg##_BASE, \ - .limit = GUEST_##seg##_LIMIT, \ - .ar_bytes = GUEST_##seg##_AR_BYTES, \ - } - -static const struct kvm_vmx_segment_field { - unsigned selector; - unsigned base; - unsigned limit; - unsigned ar_bytes; -} kvm_vmx_segment_fields[] = { - VMX_SEGMENT_FIELD(CS), - VMX_SEGMENT_FIELD(DS), - VMX_SEGMENT_FIELD(ES), - VMX_SEGMENT_FIELD(FS), - VMX_SEGMENT_FIELD(GS), - VMX_SEGMENT_FIELD(SS), - VMX_SEGMENT_FIELD(TR), - VMX_SEGMENT_FIELD(LDTR), -}; - -u64 host_efer; -static unsigned long host_idt_base; - -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_msr_index[], even in i386 builds. - */ -const u32 vmx_msr_index[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - -#if IS_ENABLED(CONFIG_HYPERV) -static bool __read_mostly enlightened_vmcs = true; -module_param(enlightened_vmcs, bool, 0444); - -/* check_ept_pointer() should be under protection of ept_pointer_lock. */ -static void check_ept_pointer_match(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - u64 tmp_eptp = INVALID_PAGE; - int i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!VALID_PAGE(tmp_eptp)) { - tmp_eptp = to_vmx(vcpu)->ept_pointer; - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_MISMATCH; - return; - } - } - - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; -} - -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, - void *data) -{ - struct kvm_tlb_range *range = data; - - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, - range->pages); -} - -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) -{ - u64 ept_pointer = to_vmx(vcpu)->ept_pointer; - - /* - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address - * of the base of EPT PML4 table, strip off EPT configuration - * information. - */ - if (range) - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, - kvm_fill_hv_flush_list_func, (void *)range); - else - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); -} - -static int hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_tlb_range *range) -{ - struct kvm_vcpu *vcpu; - int ret = 0, i; - - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) - check_ept_pointer_match(kvm); - - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { - kvm_for_each_vcpu(i, vcpu, kvm) { - /* If ept_pointer is invalid pointer, bypass flush request. */ - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) - ret |= __hv_remote_flush_tlb_with_range( - kvm, vcpu, range); - } - } else { - ret = __hv_remote_flush_tlb_with_range(kvm, - kvm_get_vcpu(kvm, 0), range); - } - - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - return ret; -} -static int hv_remote_flush_tlb(struct kvm *kvm) -{ - return hv_remote_flush_tlb_with_range(kvm, NULL); -} - -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) -{ - struct hv_enlightened_vmcs *evmcs; - struct hv_partition_assist_pg **p_hv_pa_pg = - &vcpu->kvm->arch.hyperv.hv_pa_pg; - /* - * Synthetic VM-Exit is not enabled in current code and so All - * evmcs in singe VM shares same assist page. - */ - if (!*p_hv_pa_pg) - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); - - if (!*p_hv_pa_pg) - return -ENOMEM; - - evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; - - evmcs->partition_assist_page = - __pa(*p_hv_pa_pg); - evmcs->hv_vm_id = (unsigned long)vcpu->kvm; - evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - - return 0; -} - -#endif /* IS_ENABLED(CONFIG_HYPERV) */ - -/* - * Comment's format: document - errata name - stepping - processor name. - * Refer from - * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp - */ -static u32 vmx_preemption_cpu_tfms[] = { -/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ -0x000206E6, -/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ -/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ -/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020652, -/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020655, -/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ -/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ -/* - * 320767.pdf - AAP86 - B1 - - * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile - */ -0x000106E5, -/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ -0x000106A0, -/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ -0x000106A1, -/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ -0x000106A4, - /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ - /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ - /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ -0x000106A5, - /* Xeon E3-1220 V2 */ -0x000306A8, -}; - -static inline bool cpu_has_broken_vmx_preemption_timer(void) -{ - u32 eax = cpuid_eax(0x00000001), i; - - /* Clear the reserved bits */ - eax &= ~(0x3U << 14 | 0xfU << 28); - for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) - if (eax == vmx_preemption_cpu_tfms[i]) - return true; - - return false; -} - -static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) -{ - return flexpriority_enabled && lapic_in_kernel(vcpu); -} - -static inline bool report_flexpriority(void) -{ - return flexpriority_enabled; -} - -static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - for (i = 0; i < vmx->nmsrs; ++i) - if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) - return i; - return -1; -} - -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - i = __find_msr_index(vmx, msr); - if (i >= 0) - return &vmx->guest_msrs[i]; - return NULL; -} - -static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) -{ - int ret = 0; - - u64 old_msr_data = msr->data; - msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) { - preempt_disable(); - ret = kvm_set_shared_msr(msr->index, msr->data, - msr->mask); - preempt_enable(); - if (ret) - msr->data = old_msr_data; - } - return ret; -} - -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) -{ - vmcs_clear(loaded_vmcs->vmcs); - if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) - vmcs_clear(loaded_vmcs->shadow_vmcs); - loaded_vmcs->cpu = -1; - loaded_vmcs->launched = 0; -} - -#ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static void crash_vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v; - - if (!crash_local_vmclear_enabled(cpu)) - return; - - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - vmcs_clear(v->vmcs); -} -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } -#endif /* CONFIG_KEXEC_CORE */ - -static void __loaded_vmcs_clear(void *arg) -{ - struct loaded_vmcs *loaded_vmcs = arg; - int cpu = raw_smp_processor_id(); - - if (loaded_vmcs->cpu != cpu) - return; /* vcpu migration can race with cpu offline */ - if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) - per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); - list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); - - /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. - */ - smp_wmb(); - - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); -} - -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) -{ - int cpu = loaded_vmcs->cpu; - - if (cpu != -1) - smp_call_function_single(cpu, - __loaded_vmcs_clear, loaded_vmcs, 1); -} - -static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, - unsigned field) -{ - bool ret; - u32 mask = 1 << (seg * SEG_FIELD_NR + field); - - if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { - kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); - vmx->segment_cache.bitmask = 0; - } - ret = vmx->segment_cache.bitmask & mask; - vmx->segment_cache.bitmask |= mask; - return ret; -} - -static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) -{ - u16 *p = &vmx->segment_cache.seg[seg].selector; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) - *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); - return *p; -} - -static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) -{ - ulong *p = &vmx->segment_cache.seg[seg].base; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) - *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); - return *p; -} - -static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].limit; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); - return *p; -} - -static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].ar; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); - return *p; -} - -void update_exception_bitmap(struct kvm_vcpu *vcpu) -{ - u32 eb; - - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << DB_VECTOR) | (1u << AC_VECTOR); - /* - * Guest access to VMware backdoor ports could legitimately - * trigger #GP because of TSS I/O permission bitmap. - * We intercept those #GP and allow access to them anyway - * as VMware does. - */ - if (enable_vmware_backdoor) - eb |= (1u << GP_VECTOR); - if ((vcpu->guest_debug & - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) - eb |= 1u << BP_VECTOR; - if (to_vmx(vcpu)->rmode.vm86_active) - eb = ~0; - if (enable_ept) - eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ - - /* When we are running a nested L2 guest and L1 specified for it a - * certain exception bitmap, we must trap the same exceptions and pass - * them to L1. When running L2, we will only handle the exceptions - * specified above if L1 did not want them. - */ - if (is_guest_mode(vcpu)) - eb |= get_vmcs12(vcpu)->exception_bitmap; - - vmcs_write32(EXCEPTION_BITMAP, eb); -} - -/* - * Check if MSR is intercepted for currently loaded MSR bitmap. - */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) -{ - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return true; - - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; -} - -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit) -{ - vm_entry_controls_clearbit(vmx, entry); - vm_exit_controls_clearbit(vmx, exit); -} - -int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) -{ - unsigned int i; - - for (i = 0; i < m->nr; ++i) { - if (m->val[i].index == msr) - return i; - } - return -ENOENT; -} - -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) -{ - int i; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); - return; - } - break; - } - i = vmx_find_msr_index(&m->guest, msr); - if (i < 0) - goto skip_guest; - --m->guest.nr; - m->guest.val[i] = m->guest.val[m->guest.nr]; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - -skip_guest: - i = vmx_find_msr_index(&m->host, msr); - if (i < 0) - return; - - --m->host.nr; - m->host.val[i] = m->host.val[m->host.nr]; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); -} - -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit, - unsigned long guest_val_vmcs, unsigned long host_val_vmcs, - u64 guest_val, u64 host_val) -{ - vmcs_write64(guest_val_vmcs, guest_val); - if (host_val_vmcs != HOST_IA32_EFER) - vmcs_write64(host_val_vmcs, host_val); - vm_entry_controls_setbit(vmx, entry); - vm_exit_controls_setbit(vmx, exit); -} - -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, - u64 guest_val, u64 host_val, bool entry_only) -{ - int i, j = 0; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER, - GUEST_IA32_EFER, - HOST_IA32_EFER, - guest_val, host_val); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, - GUEST_IA32_PERF_GLOBAL_CTRL, - HOST_IA32_PERF_GLOBAL_CTRL, - guest_val, host_val); - return; - } - break; - case MSR_IA32_PEBS_ENABLE: - /* PEBS needs a quiescent period after being disabled (to write - * a record). Disabling PEBS through VMX MSR swapping doesn't - * provide that period, so a CPU could write host's record into - * guest's memory. - */ - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); - } - - i = vmx_find_msr_index(&m->guest, msr); - if (!entry_only) - j = vmx_find_msr_index(&m->host, msr); - - if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || - (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) { - printk_once(KERN_WARNING "Not enough msr switch entries. " - "Can't add msr %x\n", msr); - return; - } - if (i < 0) { - i = m->guest.nr++; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - } - m->guest.val[i].index = msr; - m->guest.val[i].value = guest_val; - - if (entry_only) - return; - - if (j < 0) { - j = m->host.nr++; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); - } - m->host.val[j].index = msr; - m->host.val[j].value = host_val; -} - -static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) -{ - u64 guest_efer = vmx->vcpu.arch.efer; - u64 ignore_bits = 0; - - /* Shadow paging assumes NX to be available. */ - if (!enable_ept) - guest_efer |= EFER_NX; - - /* - * LMA and LME handled by hardware; SCE meaningless outside long mode. - */ - ignore_bits |= EFER_SCE; -#ifdef CONFIG_X86_64 - ignore_bits |= EFER_LMA | EFER_LME; - /* SCE is meaningful only in long mode on Intel */ - if (guest_efer & EFER_LMA) - ignore_bits &= ~(u64)EFER_SCE; -#endif - - /* - * On EPT, we can't emulate NX, so we must switch EFER atomically. - * On CPUs that support "load IA32_EFER", always switch EFER - * atomically, since it's faster than switching it manually. - */ - if (cpu_has_load_ia32_efer() || - (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - if (!(guest_efer & EFER_LMA)) - guest_efer &= ~EFER_LME; - if (guest_efer != host_efer) - add_atomic_switch_msr(vmx, MSR_EFER, - guest_efer, host_efer, false); - else - clear_atomic_switch_msr(vmx, MSR_EFER); - return false; - } else { - clear_atomic_switch_msr(vmx, MSR_EFER); - - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; - - return true; - } -} - -#ifdef CONFIG_X86_32 -/* - * On 32-bit kernels, VM exits still load the FS and GS bases from the - * VMCS rather than the segment table. KVM uses this helper to figure - * out the current bases to poke them into the VMCS before entry. - */ -static unsigned long segment_base(u16 selector) -{ - struct desc_struct *table; - unsigned long v; - - if (!(selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = get_current_gdt_ro(); - - if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { - u16 ldt_selector = kvm_read_ldt(); - - if (!(ldt_selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = (struct desc_struct *)segment_base(ldt_selector); - } - v = get_desc_base(&table[selector >> 3]); - return v; -} -#endif - -static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static void pt_guest_enter(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - /* - * GUEST_IA32_RTIT_CTL is already set in the VMCS. - * Save host state before VM entry. - */ - rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - wrmsrl(MSR_IA32_RTIT_CTL, 0); - pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - } -} - -static void pt_guest_exit(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - } - - /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */ - wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); -} - -void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) -{ - if (unlikely(fs_sel != host->fs_sel)) { - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else - vmcs_write16(HOST_FS_SELECTOR, 0); - host->fs_sel = fs_sel; - } - if (unlikely(gs_sel != host->gs_sel)) { - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else - vmcs_write16(HOST_GS_SELECTOR, 0); - host->gs_sel = gs_sel; - } - if (unlikely(fs_base != host->fs_base)) { - vmcs_writel(HOST_FS_BASE, fs_base); - host->fs_base = fs_base; - } - if (unlikely(gs_base != host->gs_base)) { - vmcs_writel(HOST_GS_BASE, gs_base); - host->gs_base = gs_base; - } -} - -void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs_host_state *host_state; -#ifdef CONFIG_X86_64 - int cpu = raw_smp_processor_id(); -#endif - unsigned long fs_base, gs_base; - u16 fs_sel, gs_sel; - int i; - - vmx->req_immediate_exit = false; - - /* - * Note that guest MSRs to be saved/restored can also be changed - * when guest state is loaded. This happens when guest transitions - * to/from long-mode by setting MSR_EFER.LMA. - */ - if (!vmx->guest_msrs_ready) { - vmx->guest_msrs_ready = true; - for (i = 0; i < vmx->save_nmsrs; ++i) - kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data, - vmx->guest_msrs[i].mask); - - } - if (vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - /* - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not - * allow segment selectors with cpl > 0 or ti == 1. - */ - host_state->ldt_sel = kvm_read_ldt(); - -#ifdef CONFIG_X86_64 - savesegment(ds, host_state->ds_sel); - savesegment(es, host_state->es_sel); - - gs_base = cpu_kernelmode_gs_base(cpu); - if (likely(is_64bit_mm(current->mm))) { - save_fsgs_for_kvm(); - fs_sel = current->thread.fsindex; - gs_sel = current->thread.gsindex; - fs_base = current->thread.fsbase; - vmx->msr_host_kernel_gs_base = current->thread.gsbase; - } else { - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = read_msr(MSR_FS_BASE); - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); - } - - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#else - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = segment_base(fs_sel); - gs_base = segment_base(gs_sel); -#endif - - vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); - vmx->guest_state_loaded = true; -} - -static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) -{ - struct vmcs_host_state *host_state; - - if (!vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - ++vmx->vcpu.stat.host_state_reload; - -#ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#endif - if (host_state->ldt_sel || (host_state->gs_sel & 7)) { - kvm_load_ldt(host_state->ldt_sel); -#ifdef CONFIG_X86_64 - load_gs_index(host_state->gs_sel); -#else - loadsegment(gs, host_state->gs_sel); -#endif - } - if (host_state->fs_sel & 7) - loadsegment(fs, host_state->fs_sel); -#ifdef CONFIG_X86_64 - if (unlikely(host_state->ds_sel | host_state->es_sel)) { - loadsegment(ds, host_state->ds_sel); - loadsegment(es, host_state->es_sel); - } -#endif - invalidate_tss_limit(); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); -#endif - load_fixmap_gdt(raw_smp_processor_id()); - vmx->guest_state_loaded = false; - vmx->guest_msrs_ready = false; -} - -#ifdef CONFIG_X86_64 -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - preempt_enable(); - return vmx->msr_guest_kernel_gs_base; -} - -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - vmx->msr_guest_kernel_gs_base = data; -} -#endif - -static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - /* - * In case of hot-plug or hot-unplug, we may have to undo - * vmx_vcpu_pi_put even if there is no assigned device. And we - * always keep PI.NDST up to date for simplicity: it makes the - * code easier, and CPU migration is not a fast path. - */ - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) - return; - - /* - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up - * correctly. - */ - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { - pi_clear_sn(pi_desc); - goto after_clear_sn; - } - - /* The full case. */ - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - new.sn = 0; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - -after_clear_sn: - - /* - * Clear SN before reading the bitmap. The VT-d firmware - * writes the bitmap and reads SN atomically (5.2.3 in the - * spec), so it doesn't really have a memory barrier that - * pairs with this, but we cannot do that and we need one. - */ - smp_mb__after_atomic(); - - if (!pi_is_pir_empty(pi_desc)) - pi_set_on(pi_desc); -} - -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool already_loaded = vmx->loaded_vmcs->cpu == cpu; - - if (!already_loaded) { - loaded_vmcs_clear(vmx->loaded_vmcs); - local_irq_disable(); - crash_disable_local_vmclear(cpu); - - /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). - */ - smp_rmb(); - - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, - &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); - local_irq_enable(); - } - - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; - vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); - } - - if (!already_loaded) { - void *gdt = get_current_gdt_ro(); - unsigned long sysenter_esp; - - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - - /* - * Linux uses per-cpu TSS and GDT, so set these when switching - * processors. See 22.2.4. - */ - vmcs_writel(HOST_TR_BASE, - (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); - vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ - - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); - vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - - vmx->loaded_vmcs->cpu = cpu; - } - - /* Setup TSC multiplier */ - if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) - decache_tsc_multiplier(vmx); -} - -/* - * Switches to specified vcpu, until a matching vcpu_put(), but assumes - * vcpu mutex is already taken. - */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx_vcpu_load_vmcs(vcpu, cpu); - - vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_pkru = read_pkru(); - vmx->host_debugctlmsr = get_debugctlmsr(); -} - -static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return; - - /* Set SN when the vCPU is preempted */ - if (vcpu->preempted) - pi_set_sn(pi_desc); -} - -static void vmx_vcpu_put(struct kvm_vcpu *vcpu) -{ - vmx_vcpu_pi_put(vcpu); - - vmx_prepare_switch_to_host(to_vmx(vcpu)); -} - -static bool emulation_required(struct kvm_vcpu *vcpu) -{ - return emulate_invalid_guest_state && !guest_state_valid(vcpu); -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); - -unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long rflags, save_rflags; - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - rflags = vmcs_readl(GUEST_RFLAGS); - if (vmx->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = vmx->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - } - vmx->rflags = rflags; - } - return vmx->rflags; -} - -void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long old_rflags; - - if (enable_unrestricted_guest) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - vmx->rflags = rflags; - vmcs_writel(GUEST_RFLAGS, rflags); - return; - } - - old_rflags = vmx_get_rflags(vcpu); - vmx->rflags = rflags; - if (vmx->rmode.vm86_active) { - vmx->rmode.save_rflags = rflags; - rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - } - vmcs_writel(GUEST_RFLAGS, rflags); - - if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) - vmx->emulation_required = emulation_required(vcpu); -} - -u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) -{ - u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - int ret = 0; - - if (interruptibility & GUEST_INTR_STATE_STI) - ret |= KVM_X86_SHADOW_INT_STI; - if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= KVM_X86_SHADOW_INT_MOV_SS; - - return ret; -} - -void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) -{ - u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - u32 interruptibility = interruptibility_old; - - interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - - if (mask & KVM_X86_SHADOW_INT_MOV_SS) - interruptibility |= GUEST_INTR_STATE_MOV_SS; - else if (mask & KVM_X86_SHADOW_INT_STI) - interruptibility |= GUEST_INTR_STATE_STI; - - if ((interruptibility != interruptibility_old)) - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); -} - -static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long value; - - /* - * Any MSR write that attempts to change bits marked reserved will - * case a #GP fault. - */ - if (data & vmx->pt_desc.ctl_bitmask) - return 1; - - /* - * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will - * result in a #GP unless the same write also clears TraceEn. - */ - if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) - return 1; - - /* - * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit - * and FabricEn would cause #GP, if - * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 - */ - if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && - !(data & RTIT_CTL_FABRIC_EN) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) - return 1; - - /* - * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that - * utilize encodings marked reserved will casue a #GP fault. - */ - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && - !test_bit((data & RTIT_CTL_MTC_RANGE) >> - RTIT_CTL_MTC_RANGE_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cycle_thresholds); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_CYC_THRESH) >> - RTIT_CTL_CYC_THRESH_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_PSB_FREQ) >> - RTIT_CTL_PSB_FREQ_OFFSET, &value)) - return 1; - - /* - * If ADDRx_CFG is reserved or the encodings is >2 will - * cause a #GP fault. - */ - value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2)) - return 1; - - return 0; -} - -static int skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - unsigned long rip; - - /* - * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on - * undefined behavior: Intel's SDM doesn't mandate the VMCS field be - * set when EPT misconfig occurs. In practice, real hardware updates - * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors - * (namely Hyper-V) don't set it due to it being undefined behavior, - * i.e. we end up advancing IP with some random value. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || - to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); - } else { - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) - return 0; - } - - /* skipping an emulated instruction also counts */ - vmx_set_interrupt_shadow(vcpu, 0); - - return 1; -} - -static void vmx_clear_hlt(struct kvm_vcpu *vcpu) -{ - /* - * Ensure that we clear the HLT state in the VMCS. We don't need to - * explicitly skip the instruction because if the HLT state is set, - * then the instruction is already executing and RIP has already been - * advanced. - */ - if (kvm_hlt_in_guest(vcpu->kvm) && - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); -} - -static void vmx_queue_exception(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned nr = vcpu->arch.exception.nr; - bool has_error_code = vcpu->arch.exception.has_error_code; - u32 error_code = vcpu->arch.exception.error_code; - u32 intr_info = nr | INTR_INFO_VALID_MASK; - - kvm_deliver_exception_payload(vcpu); - - if (has_error_code) { - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } - - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (kvm_exception_is_soft(nr)) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); - return; - } - - WARN_ON_ONCE(vmx->emulation_required); - - if (kvm_exception_is_soft(nr)) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - intr_info |= INTR_TYPE_SOFT_EXCEPTION; - } else - intr_info |= INTR_TYPE_HARD_EXCEPTION; - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); - - vmx_clear_hlt(vcpu); -} - -static bool vmx_rdtscp_supported(void) -{ - return cpu_has_vmx_rdtscp(); -} - -static bool vmx_invpcid_supported(void) -{ - return cpu_has_vmx_invpcid(); -} - -/* - * Swap MSR entry in host/guest MSR entry array. - */ -static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) -{ - struct shared_msr_entry tmp; - - tmp = vmx->guest_msrs[to]; - vmx->guest_msrs[to] = vmx->guest_msrs[from]; - vmx->guest_msrs[from] = tmp; -} - -/* - * Set up the vmcs to automatically save and restore system - * msrs. Don't touch the 64-bit msrs if the guest is in legacy - * mode, as fiddling with msrs is very expensive. - */ -static void setup_msrs(struct vcpu_vmx *vmx) -{ - int save_nmsrs, index; - - save_nmsrs = 0; -#ifdef CONFIG_X86_64 - /* - * The SYSCALL MSRs are only needed on long mode guests, and only - * when EFER.SCE is set. - */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - index = __find_msr_index(vmx, MSR_STAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_LSTAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_SYSCALL_MASK); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - } -#endif - index = __find_msr_index(vmx, MSR_EFER); - if (index >= 0 && update_transition_efer(vmx, index)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - - vmx->save_nmsrs = save_nmsrs; - vmx->guest_msrs_ready = false; - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); -} - -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - return vcpu->arch.tsc_offset - vmcs12->tsc_offset; - - return vcpu->arch.tsc_offset; -} - -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u64 g_tsc_offset = 0; - - /* - * We're here if L1 chose not to trap WRMSR to TSC. According - * to the spec, this should set L1's TSC; The offset that L1 - * set for L2 remains unchanged, and still needs to be added - * to the newly set TSC to get L2's TSC. - */ - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - g_tsc_offset = vmcs12->tsc_offset; - - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - vcpu->arch.tsc_offset - g_tsc_offset, - offset); - vmcs_write64(TSC_OFFSET, offset + g_tsc_offset); - return offset + g_tsc_offset; -} - -/* - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for - * all guests if the "nested" module option is off, and can also be disabled - * for a single guest by disabling its VMX cpuid bit. - */ -bool nested_vmx_allowed(struct kvm_vcpu *vcpu) -{ - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); -} - -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, - uint64_t val) -{ - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; - - return !(val & ~valid_bits); -} - -static int vmx_get_msr_feature(struct kvm_msr_entry *msr) -{ - switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested) - return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); - default: - return 1; - } -} - -/* - * Reads an msr value (of 'msr_index') into 'pdata'. - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - u32 index; - - switch (msr_info->index) { -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - msr_info->data = vmcs_readl(GUEST_FS_BASE); - break; - case MSR_GS_BASE: - msr_info->data = vmcs_readl(GUEST_GS_BASE); - break; - case MSR_KERNEL_GS_BASE: - msr_info->data = vmx_read_guest_kernel_gs_base(vmx); - break; -#endif - case MSR_EFER: - return kvm_get_msr_common(vcpu, msr_info); - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - goto find_shared_msr; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - msr_info->data = to_vmx(vcpu)->spec_ctrl; - break; - case MSR_IA32_SYSENTER_CS: - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); - break; - case MSR_IA32_SYSENTER_EIP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); - break; - case MSR_IA32_SYSENTER_ESP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); - break; - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - msr_info->data = vmcs_read64(GUEST_BNDCFGS); - break; - case MSR_IA32_MCG_EXT_CTL: - if (!msr_info->host_initiated && - !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) - return 1; - msr_info->data = vcpu->arch.mcg_ext_ctl; - break; - case MSR_IA32_FEATURE_CONTROL: - msr_info->data = vmx->msr_ia32_feature_control; - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, - &msr_info->data); - case MSR_IA32_RTIT_CTL: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.ctl; - break; - case MSR_IA32_RTIT_STATUS: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.status; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - msr_info->data = vmx->pt_desc.guest.cr3_match; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_base; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_mask; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; - else - msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - goto find_shared_msr; - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_info->index); - if (msr) { - msr_info->data = msr->data; - break; - } - return kvm_get_msr_common(vcpu, msr_info); - } - - return 0; -} - -/* - * Writes msr value into the appropriate "register". - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - int ret = 0; - u32 msr_index = msr_info->index; - u64 data = msr_info->data; - u32 index; - - switch (msr_index) { - case MSR_EFER: - ret = kvm_set_msr_common(vcpu, msr_info); - break; -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_FS_BASE, data); - break; - case MSR_GS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_GS_BASE, data); - break; - case MSR_KERNEL_GS_BASE: - vmx_write_guest_kernel_gs_base(vmx, data); - break; -#endif - case MSR_IA32_SYSENTER_CS: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_cs = data; - vmcs_write32(GUEST_SYSENTER_CS, data); - break; - case MSR_IA32_SYSENTER_EIP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_eip = data; - vmcs_writel(GUEST_SYSENTER_EIP, data); - break; - case MSR_IA32_SYSENTER_ESP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_esp = data; - vmcs_writel(GUEST_SYSENTER_ESP, data); - break; - case MSR_IA32_DEBUGCTLMSR: - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & - VM_EXIT_SAVE_DEBUG_CONTROLS) - get_vmcs12(vcpu)->guest_ia32_debugctl = data; - - ret = kvm_set_msr_common(vcpu, msr_info); - break; - - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - if (is_noncanonical_address(data & PAGE_MASK, vcpu) || - (data & MSR_IA32_BNDCFGS_RSVD)) - return 1; - vmcs_write64(GUEST_BNDCFGS, data); - break; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; - - vmx->msr_ia32_umwait_control = data; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) - return 1; - - vmx->spec_ctrl = data; - - if (!data) - break; - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. We update the vmcs01 here for L1 as well - * since it will end up touching the MSR anyway now. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, - MSR_IA32_SPEC_CTRL, - MSR_TYPE_RW); - break; - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) - return 1; - goto find_shared_msr; - case MSR_IA32_PRED_CMD: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - if (data & ~PRED_CMD_IBPB) - return 1; - - if (!data) - break; - - wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, - MSR_TYPE_W); - break; - case MSR_IA32_CR_PAT: - if (!kvm_pat_valid(data)) - return 1; - - if (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) - get_vmcs12(vcpu)->guest_ia32_pat = data; - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_TSC_ADJUST: - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_MCG_EXT_CTL: - if ((!msr_info->host_initiated && - !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || - (data & ~MCG_EXT_CTL_LMCE_EN)) - return 1; - vcpu->arch.mcg_ext_ctl = data; - break; - case MSR_IA32_FEATURE_CONTROL: - if (!vmx_feature_control_msr_valid(vcpu, data) || - (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) - return 1; - vmx->msr_ia32_feature_control = data; - if (msr_info->host_initiated && data == 0) - vmx_leave_nested(vcpu); - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!msr_info->host_initiated) - return 1; /* they are read-only */ - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_set_vmx_msr(vcpu, msr_index, data); - case MSR_IA32_RTIT_CTL: - if ((pt_mode != PT_MODE_HOST_GUEST) || - vmx_rtit_ctl_check(vcpu, data) || - vmx->nested.vmxon) - return 1; - vmcs_write64(GUEST_IA32_RTIT_CTL, data); - vmx->pt_desc.guest.ctl = data; - pt_update_intercept_for_msr(vmx); - break; - case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) - return 1; - vmx->pt_desc.guest.status = data; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - vmx->pt_desc.guest.cr3_match = data; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) - return 1; - vmx->pt_desc.guest.output_base = data; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - vmx->pt_desc.guest.output_mask = data; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - vmx->pt_desc.guest.addr_b[index / 2] = data; - else - vmx->pt_desc.guest.addr_a[index / 2] = data; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_shared_msr; - - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_index); - if (msr) - ret = vmx_set_guest_msr(vmx, msr, data); - else - ret = kvm_set_msr_common(vcpu, msr_info); - } - - return ret; -} - -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) -{ - kvm_register_mark_available(vcpu, reg); - - switch (reg) { - case VCPU_REGS_RSP: - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); - break; - case VCPU_REGS_RIP: - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); - break; - case VCPU_EXREG_PDPTR: - if (enable_ept) - ept_save_pdptrs(vcpu); - break; - case VCPU_EXREG_CR3: - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - break; - default: - WARN_ON_ONCE(1); - break; - } -} - -static __init int cpu_has_kvm_support(void) -{ - return cpu_has_vmx(); -} - -static __init int vmx_disabled_by_bios(void) -{ - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; -} - -static void kvm_cpu_vmxon(u64 addr) -{ - cr4_set_bits(X86_CR4_VMXE); - intel_pt_handle_vmx(1); - - asm volatile ("vmxon %0" : : "m"(addr)); -} - -static int hardware_enable(void) -{ - int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; - - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; - - /* - * This can happen if we hot-added a CPU but failed to allocate - * VP assist page for it. - */ - if (static_branch_unlikely(&enable_evmcs) && - !hv_get_vp_assist_page(cpu)) - return -EFAULT; - - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } - kvm_cpu_vmxon(phys_addr); - if (enable_ept) - ept_sync_global(); - - return 0; -} - -static void vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v, *n; - - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - __loaded_vmcs_clear(v); -} - - -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() - * tricks. - */ -static void kvm_cpu_vmxoff(void) -{ - asm volatile (__ex("vmxoff")); - - intel_pt_handle_vmx(0); - cr4_clear_bits(X86_CR4_VMXE); -} - -static void hardware_disable(void) -{ - vmclear_local_loaded_vmcss(); - kvm_cpu_vmxoff(); -} - -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, - u32 msr, u32 *result) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 ctl = ctl_min | ctl_opt; - - rdmsr(msr, vmx_msr_low, vmx_msr_high); - - ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ - ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ - - /* Ensure minimum (required) set of control bits are supported. */ - if (ctl_min & ~ctl) - return -EIO; - - *result = ctl; - return 0; -} - -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, - struct vmx_capability *vmx_cap) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 min, opt, min2, opt2; - u32 _pin_based_exec_control = 0; - u32 _cpu_based_exec_control = 0; - u32 _cpu_based_2nd_exec_control = 0; - u32 _vmexit_control = 0; - u32 _vmentry_control = 0; - - memset(vmcs_conf, 0, sizeof(*vmcs_conf)); - min = CPU_BASED_HLT_EXITING | -#ifdef CONFIG_X86_64 - CPU_BASED_CR8_LOAD_EXITING | - CPU_BASED_CR8_STORE_EXITING | -#endif - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETTING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | - CPU_BASED_INVLPG_EXITING | - CPU_BASED_RDPMC_EXITING; - - opt = CPU_BASED_TPR_SHADOW | - CPU_BASED_USE_MSR_BITMAPS | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, - &_cpu_based_exec_control) < 0) - return -EIO; -#ifdef CONFIG_X86_64 - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & - ~CPU_BASED_CR8_STORE_EXITING; -#endif - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { - min2 = 0; - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_ENABLE_VPID | - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_PAUSE_LOOP_EXITING | - SECONDARY_EXEC_DESC | - SECONDARY_EXEC_RDTSCP | - SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_RDSEED_EXITING | - SECONDARY_EXEC_RDRAND_EXITING | - SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | - SECONDARY_EXEC_PT_USE_GPA | - SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_ENCLS_EXITING; - if (adjust_vmx_controls(min2, opt2, - MSR_IA32_VMX_PROCBASED_CTLS2, - &_cpu_based_2nd_exec_control) < 0) - return -EIO; - } -#ifndef CONFIG_X86_64 - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) - _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; -#endif - - if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - - rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, - &vmx_cap->ept, &vmx_cap->vpid); - - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT - enabled */ - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_INVLPG_EXITING); - } else if (vmx_cap->ept) { - vmx_cap->ept = 0; - pr_warn_once("EPT CAP should not exist if not support " - "1-setting enable EPT VM-execution control\n"); - } - if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && - vmx_cap->vpid) { - vmx_cap->vpid = 0; - pr_warn_once("VPID CAP should not exist if not support " - "1-setting enable VPID VM-execution control\n"); - } - - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; -#ifdef CONFIG_X86_64 - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; -#endif - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_LOAD_IA32_PAT | - VM_EXIT_LOAD_IA32_EFER | - VM_EXIT_CLEAR_BNDCFGS | - VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, - &_vmexit_control) < 0) - return -EIO; - - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | - PIN_BASED_VMX_PREEMPTION_TIMER; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - - if (cpu_has_broken_vmx_preemption_timer()) - _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) - _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; - - min = VM_ENTRY_LOAD_DEBUG_CONTROLS; - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_ENTRY_LOAD_IA32_PAT | - VM_ENTRY_LOAD_IA32_EFER | - VM_ENTRY_LOAD_BNDCFGS | - VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, - &_vmentry_control) < 0) - return -EIO; - - /* - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they - * can't be used due to an errata where VM Exit may incorrectly clear - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. - */ - if (boot_cpu_data.x86 == 0x6) { - switch (boot_cpu_data.x86_model) { - case 26: /* AAK155 */ - case 30: /* AAP115 */ - case 37: /* AAT100 */ - case 44: /* BC86,AAY89,BD102 */ - case 46: /* BA97 */ - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " - "does not work properly. Using workaround\n"); - break; - default: - break; - } - } - - - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); - - /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) - return -EIO; - -#ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) - return -EIO; -#endif - - /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) - return -EIO; - - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - - vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; - vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; - vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; - vmcs_conf->vmexit_ctrl = _vmexit_control; - vmcs_conf->vmentry_ctrl = _vmentry_control; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_sanitize_exec_ctrls(vmcs_conf); - - return 0; -} - -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) -{ - int node = cpu_to_node(cpu); - struct page *pages; - struct vmcs *vmcs; - - pages = __alloc_pages_node(node, flags, vmcs_config.order); - if (!pages) - return NULL; - vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); - - /* KVM supports Enlightened VMCS v1 only */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = KVM_EVMCS_VERSION; - else - vmcs->hdr.revision_id = vmcs_config.revision_id; - - if (shadow) - vmcs->hdr.shadow_vmcs = 1; - return vmcs; -} - -void free_vmcs(struct vmcs *vmcs) -{ - free_pages((unsigned long)vmcs, vmcs_config.order); -} - -/* - * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded - */ -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - if (!loaded_vmcs->vmcs) - return; - loaded_vmcs_clear(loaded_vmcs); - free_vmcs(loaded_vmcs->vmcs); - loaded_vmcs->vmcs = NULL; - if (loaded_vmcs->msr_bitmap) - free_page((unsigned long)loaded_vmcs->msr_bitmap); - WARN_ON(loaded_vmcs->shadow_vmcs != NULL); -} - -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - loaded_vmcs->vmcs = alloc_vmcs(false); - if (!loaded_vmcs->vmcs) - return -ENOMEM; - - loaded_vmcs->shadow_vmcs = NULL; - loaded_vmcs->hv_timer_soft_disabled = false; - loaded_vmcs_init(loaded_vmcs); - - if (cpu_has_vmx_msr_bitmap()) { - loaded_vmcs->msr_bitmap = (unsigned long *) - __get_free_page(GFP_KERNEL_ACCOUNT); - if (!loaded_vmcs->msr_bitmap) - goto out_vmcs; - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - - if (IS_ENABLED(CONFIG_HYPERV) && - static_branch_unlikely(&enable_evmcs) && - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { - struct hv_enlightened_vmcs *evmcs = - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; - - evmcs->hv_enlightenments_control.msr_bitmap = 1; - } - } - - memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); - memset(&loaded_vmcs->controls_shadow, 0, - sizeof(struct vmcs_controls_shadow)); - - return 0; - -out_vmcs: - free_loaded_vmcs(loaded_vmcs); - return -ENOMEM; -} - -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = vmcs_config.revision_id; - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - -static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, - struct kvm_segment *save) -{ - if (!emulate_invalid_guest_state) { - /* - * CS and SS RPL should be equal during guest entry according - * to VMX spec, but in reality it is not always so. Since vcpu - * is in the middle of the transition from real mode to - * protected mode it is safe to assume that RPL 0 is a good - * default value. - */ - if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SEGMENT_RPL_MASK; - save->dpl = save->selector & SEGMENT_RPL_MASK; - save->s = 1; - } - vmx_set_segment(vcpu, save, seg); -} - -static void enter_pmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* - * Update real mode segment cache. It may be not up-to-date if sement - * register was written while vcpu was in a guest mode. - */ - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 0; - - vmx_segment_cache_clear(vmx); - - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - - flags = vmcs_readl(GUEST_RFLAGS); - flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - vmcs_writel(GUEST_RFLAGS, flags); - - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | - (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); - - update_exception_bitmap(vcpu); - - fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); -} - -static void fix_rmode_seg(int seg, struct kvm_segment *save) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - struct kvm_segment var = *save; - - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - - if (!emulate_invalid_guest_state) { - var.selector = var.base >> 4; - var.base = var.base & 0xffff0; - var.limit = 0xffff; - var.g = 0; - var.db = 0; - var.present = 1; - var.s = 1; - var.l = 0; - var.unusable = 0; - var.type = 0x3; - var.avl = 0; - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not " - "paragraph aligned when entering " - "protected mode (seg=%d)", seg); - } - - vmcs_write16(sf->selector, var.selector); - vmcs_writel(sf->base, var.base); - vmcs_write32(sf->limit, var.limit); - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); -} - -static void enter_rmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); - - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 1; - - /* - * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Warn the user that an update is overdue. - */ - if (!kvm_vmx->tss_addr) - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " - "called before entering vcpu\n"); - - vmx_segment_cache_clear(vmx); - - vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); - vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_rflags = flags; - - flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - - vmcs_writel(GUEST_RFLAGS, flags); - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); - update_exception_bitmap(vcpu); - - fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - - kvm_mmu_reset_context(vcpu); -} - -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - if (!msr) - return; - - vcpu->arch.efer = efer; - if (efer & EFER_LMA) { - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - msr->data = efer; - } else { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - -#ifdef CONFIG_X86_64 - -static void enter_lmode(struct kvm_vcpu *vcpu) -{ - u32 guest_tr_ar; - - vmx_segment_cache_clear(to_vmx(vcpu)); - - guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { - pr_debug_ratelimited("%s: tss fixup for long mode. \n", - __func__); - vmcs_write32(GUEST_TR_AR_BYTES, - (guest_tr_ar & ~VMX_AR_TYPE_MASK) - | VMX_AR_TYPE_BUSY_64_TSS); - } - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); -} - -static void exit_lmode(struct kvm_vcpu *vcpu) -{ - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); -} - -#endif - -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) -{ - int vpid = to_vmx(vcpu)->vpid; - - if (!vpid_sync_vcpu_addr(vpid, addr)) - vpid_sync_context(vpid); - - /* - * If VPIDs are not supported or enabled, then the above is a no-op. - * But we don't really need a TLB flush in that case anyway, because - * each VM entry/exit includes an implicit flush when VPID is 0. - */ -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; - - vcpu->arch.cr0 &= ~cr0_guest_owned_bits; - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; -} - -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; - - vcpu->arch.cr4 &= ~cr4_guest_owned_bits; - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; -} - -static void ept_load_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) - return; - - if (is_pae_paging(vcpu)) { - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); - } -} - -void ept_save_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (is_pae_paging(vcpu)) { - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); - } - - kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); -} - -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, - unsigned long cr0, - struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) - vmx_cache_reg(vcpu, VCPU_EXREG_CR3); - if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } - - if (!(cr0 & X86_CR0_WP)) - *hw_cr0 &= ~X86_CR0_WP; -} - -void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long hw_cr0; - - hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); - if (enable_unrestricted_guest) - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; - else { - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; - - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) - enter_pmode(vcpu); - - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) - enter_rmode(vcpu); - } - -#ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) - enter_lmode(vcpu); - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) - exit_lmode(vcpu); - } -#endif - - if (enable_ept && !enable_unrestricted_guest) - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); - - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, hw_cr0); - vcpu->arch.cr0 = cr0; - - /* depends on vcpu->arch.cr0 to be set to a new value */ - vmx->emulation_required = emulation_required(vcpu); -} - -static int get_ept_level(struct kvm_vcpu *vcpu) -{ - if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) - return 5; - return 4; -} - -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) -{ - u64 eptp = VMX_EPTP_MT_WB; - - eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; - - if (enable_ept_ad_bits && - (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) - eptp |= VMX_EPTP_AD_ENABLE_BIT; - eptp |= (root_hpa & PAGE_MASK); - - return eptp; -} - -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) -{ - struct kvm *kvm = vcpu->kvm; - bool update_guest_cr3 = true; - unsigned long guest_cr3; - u64 eptp; - - guest_cr3 = cr3; - if (enable_ept) { - eptp = construct_eptp(vcpu, cr3); - vmcs_write64(EPT_POINTER, eptp); - - if (kvm_x86_ops->tlb_remote_flush) { - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - to_vmx(vcpu)->ept_pointer = eptp; - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_CHECK; - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - } - - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (!enable_unrestricted_guest && !is_paging(vcpu)) - guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - guest_cr3 = vcpu->arch.cr3; - else /* vmcs01.GUEST_CR3 is already up-to-date. */ - update_guest_cr3 = false; - ept_load_pdptrs(vcpu); - } - - if (update_guest_cr3) - vmcs_writel(GUEST_CR3, guest_cr3); -} - -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * Pass through host's Machine Check Enable value to hw_cr4, which - * is in force while we are in guest mode. Do not let guests control - * this bit, even if host CR4.MCE == 0. - */ - unsigned long hw_cr4; - - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); - if (enable_unrestricted_guest) - hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; - else if (vmx->rmode.vm86_active) - hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; - else - hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { - if (cr4 & X86_CR4_UMIP) { - secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); - hw_cr4 &= ~X86_CR4_UMIP; - } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { - secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); - } - } - - if (cr4 & X86_CR4_VMXE) { - /* - * To use VMXON (and later other VMX instructions), a guest - * must first be able to turn on cr4.VMXE (see handle_vmon()). - * So basically the check on whether to allow nested VMX - * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. - */ - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) - return 1; - } - - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) - return 1; - - vcpu->arch.cr4 = cr4; - - if (!enable_unrestricted_guest) { - if (enable_ept) { - if (!is_paging(vcpu)) { - hw_cr4 &= ~X86_CR4_PAE; - hw_cr4 |= X86_CR4_PSE; - } else if (!(cr4 & X86_CR4_PAE)) { - hw_cr4 &= ~X86_CR4_PAE; - } - } - - /* - * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in - * hardware. To emulate this behavior, SMEP/SMAP/PKU needs - * to be manually disabled when guest switches to non-paging - * mode. - * - * If !enable_unrestricted_guest, the CPU is always running - * with CR0.PG=1 and CR4 needs to be modified. - * If enable_unrestricted_guest, the CPU automatically - * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. - */ - if (!is_paging(vcpu)) - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); - } - - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, hw_cr4); - return 0; -} - -void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 ar; - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - *var = vmx->rmode.segs[seg]; - if (seg == VCPU_SREG_TR - || var->selector == vmx_read_guest_seg_selector(vmx, seg)) - return; - var->base = vmx_read_guest_seg_base(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - return; - } - var->base = vmx_read_guest_seg_base(vmx, seg); - var->limit = vmx_read_guest_seg_limit(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - ar = vmx_read_guest_seg_ar(vmx, seg); - var->unusable = (ar >> 16) & 1; - var->type = ar & 15; - var->s = (ar >> 4) & 1; - var->dpl = (ar >> 5) & 3; - /* - * Some userspaces do not preserve unusable property. Since usable - * segment has to be present according to VMX spec we can use present - * property to amend userspace bug by making unusable segment always - * nonpresent. vmx_segment_access_rights() already marks nonpresent - * segment as unusable. - */ - var->present = !var->unusable; - var->avl = (ar >> 12) & 1; - var->l = (ar >> 13) & 1; - var->db = (ar >> 14) & 1; - var->g = (ar >> 15) & 1; -} - -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment s; - - if (to_vmx(vcpu)->rmode.vm86_active) { - vmx_get_segment(vcpu, &s, seg); - return s.base; - } - return vmx_read_guest_seg_base(to_vmx(vcpu), seg); -} - -int vmx_get_cpl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (unlikely(vmx->rmode.vm86_active)) - return 0; - else { - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return VMX_AR_DPL(ar); - } -} - -static u32 vmx_segment_access_rights(struct kvm_segment *var) -{ - u32 ar; - - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } - - return ar; -} - -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - - vmx_segment_cache_clear(vmx); - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - vmx->rmode.segs[seg] = *var; - if (seg == VCPU_SREG_TR) - vmcs_write16(sf->selector, var->selector); - else if (var->s) - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - goto out; - } - - vmcs_writel(sf->base, var->base); - vmcs_write32(sf->limit, var->limit); - vmcs_write16(sf->selector, var->selector); - - /* - * Fix the "Accessed" bit in AR field of segment registers for older - * qemu binaries. - * IA32 arch specifies that at the time of processor reset the - * "Accessed" bit in the AR field of segment registers is 1. And qemu - * is setting it to 0 in the userland code. This causes invalid guest - * state vmexit when "unrestricted guest" mode is turned on. - * Fix for this setup issue in cpu_reset is being pushed in the qemu - * tree. Newer qemu binaries with that qemu fix would not need this - * kvm hack. - */ - if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) - var->type |= 0x1; /* Accessed */ - - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); - -out: - vmx->emulation_required = emulation_required(vcpu); -} - -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -{ - u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); - - *db = (ar >> 14) & 1; - *l = (ar >> 13) & 1; -} - -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_IDTR_LIMIT); - dt->address = vmcs_readl(GUEST_IDTR_BASE); -} - -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_IDTR_LIMIT, dt->size); - vmcs_writel(GUEST_IDTR_BASE, dt->address); -} - -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_GDTR_LIMIT); - dt->address = vmcs_readl(GUEST_GDTR_BASE); -} - -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_GDTR_LIMIT, dt->size); - vmcs_writel(GUEST_GDTR_BASE, dt->address); -} - -static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - u32 ar; - - vmx_get_segment(vcpu, &var, seg); - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - ar = vmx_segment_access_rights(&var); - - if (var.base != (var.selector << 4)) - return false; - if (var.limit != 0xffff) - return false; - if (ar != 0xf3) - return false; - - return true; -} - -static bool code_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs; - unsigned int cs_rpl; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SEGMENT_RPL_MASK; - - if (cs.unusable) - return false; - if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) - return false; - if (!cs.s) - return false; - if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { - if (cs.dpl > cs_rpl) - return false; - } else { - if (cs.dpl != cs_rpl) - return false; - } - if (!cs.present) - return false; - - /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ - return true; -} - -static bool stack_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ss; - unsigned int ss_rpl; - - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SEGMENT_RPL_MASK; - - if (ss.unusable) - return true; - if (ss.type != 3 && ss.type != 7) - return false; - if (!ss.s) - return false; - if (ss.dpl != ss_rpl) /* DPL != RPL */ - return false; - if (!ss.present) - return false; - - return true; -} - -static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - unsigned int rpl; - - vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SEGMENT_RPL_MASK; - - if (var.unusable) - return true; - if (!var.s) - return false; - if (!var.present) - return false; - if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { - if (var.dpl < rpl) /* DPL < RPL */ - return false; - } - - /* TODO: Add other members to kvm_segment_field to allow checking for other access - * rights flags - */ - return true; -} - -static bool tr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment tr; - - vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); - - if (tr.unusable) - return false; - if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ - return false; - if (!tr.present) - return false; - - return true; -} - -static bool ldtr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ldtr; - - vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); - - if (ldtr.unusable) - return true; - if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (ldtr.type != 2) - return false; - if (!ldtr.present) - return false; - - return true; -} - -static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs, ss; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - - return ((cs.selector & SEGMENT_RPL_MASK) == - (ss.selector & SEGMENT_RPL_MASK)); -} - -/* - * Check if guest state is valid. Returns true if valid, false if - * not. - * We assume that registers are always usable - */ -static bool guest_state_valid(struct kvm_vcpu *vcpu) -{ - if (enable_unrestricted_guest) - return true; - - /* real mode guest state checks */ - if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { - if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - } else { - /* protected mode guest state checks */ - if (!cs_ss_rpl_check(vcpu)) - return false; - if (!code_segment_valid(vcpu)) - return false; - if (!stack_segment_valid(vcpu)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - if (!tr_valid(vcpu)) - return false; - if (!ldtr_valid(vcpu)) - return false; - } - /* TODO: - * - Add checks on RIP - * - Add checks on RFLAGS - */ - - return true; -} - -static int init_rmode_tss(struct kvm *kvm) -{ - gfn_t fn; - u16 data = 0; - int idx, r; - - idx = srcu_read_lock(&kvm->srcu); - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, - TSS_IOPB_BASE_OFFSET, sizeof(u16)); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = ~0; - r = kvm_write_guest_page(kvm, fn, &data, - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, - sizeof(u8)); -out: - srcu_read_unlock(&kvm->srcu, idx); - return r; -} - -static int init_rmode_identity_map(struct kvm *kvm) -{ - struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; - kvm_pfn_t identity_map_pfn; - u32 tmp; - - /* Protect kvm_vmx->ept_identity_pagetable_done. */ - mutex_lock(&kvm->slots_lock); - - if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; - - if (!kvm_vmx->ept_identity_map_addr) - kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; - - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm_vmx->ept_identity_map_addr, PAGE_SIZE); - if (r < 0) - goto out2; - - idx = srcu_read_lock(&kvm->srcu); - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); - if (r < 0) - goto out; - /* Set up identity-mapping pagetable for EPT in real mode */ - for (i = 0; i < PT32_ENT_PER_PAGE; i++) { - tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); - r = kvm_write_guest_page(kvm, identity_map_pfn, - &tmp, i * sizeof(tmp), sizeof(tmp)); - if (r < 0) - goto out; - } - kvm_vmx->ept_identity_pagetable_done = true; - -out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: - mutex_unlock(&kvm->slots_lock); - return r; -} - -static void seg_setup(int seg) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - unsigned int ar; - - vmcs_write16(sf->selector, 0); - vmcs_writel(sf->base, 0); - vmcs_write32(sf->limit, 0xffff); - ar = 0x93; - if (seg == VCPU_SREG_CS) - ar |= 0x08; /* code segment */ - - vmcs_write32(sf->ar_bytes, ar); -} - -static int alloc_apic_access_page(struct kvm *kvm) -{ - struct page *page; - int r = 0; - - mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page_done) - goto out; - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (r) - goto out; - - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) { - r = -EFAULT; - goto out; - } - - /* - * Do not pin the page in memory, so that memory hot-unplug - * is able to migrate it. - */ - put_page(page); - kvm->arch.apic_access_page_done = true; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - -int allocate_vpid(void) -{ - int vpid; - - if (!enable_vpid) - return 0; - spin_lock(&vmx_vpid_lock); - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); - if (vpid < VMX_NR_VPIDS) - __set_bit(vpid, vmx_vpid_bitmap); - else - vpid = 0; - spin_unlock(&vmx_vpid_lock); - return vpid; -} - -void free_vpid(int vpid) -{ - if (!enable_vpid || vpid == 0) - return; - spin_lock(&vmx_vpid_lock); - __clear_bit(vpid, vmx_vpid_bitmap); - spin_unlock(&vmx_vpid_lock); -} - -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __clear_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __clear_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __clear_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __clear_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __set_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __set_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __set_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __set_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type, bool value) -{ - if (value) - vmx_enable_intercept_for_msr(msr_bitmap, msr, type); - else - vmx_disable_intercept_for_msr(msr_bitmap, msr, type); -} - -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) -{ - u8 mode = 0; - - if (cpu_has_secondary_exec_ctrls() && - (secondary_exec_controls_get(to_vmx(vcpu)) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - mode |= MSR_BITMAP_MODE_X2APIC; - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) - mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } - - return mode; -} - -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, - u8 mode) -{ - int msr; - - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; - msr_bitmap[word + (0x800 / sizeof(long))] = ~0; - } - - if (mode & MSR_BITMAP_MODE_X2APIC) { - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); - } - } -} - -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - u8 mode = vmx_msr_bitmap_mode(vcpu); - u8 changed = mode ^ vmx->msr_bitmap_mode; - - if (!changed) - return; - - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); - - vmx->msr_bitmap_mode = mode; -} - -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) -{ - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); - u32 i; - - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH, - MSR_TYPE_RW, flag); - for (i = 0; i < vmx->pt_desc.addr_range; i++) { - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); - } -} - -static bool vmx_get_enable_apicv(struct kvm *kvm) -{ - return enable_apicv; -} - -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - void *vapic_page; - u32 vppr; - int rvi; - - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || - !nested_cpu_has_vid(get_vmcs12(vcpu)) || - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) - return false; - - rvi = vmx_get_rvi(); - - vapic_page = vmx->nested.virtual_apic_map.hva; - vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); - - return ((rvi & 0xf0) > (vppr & 0xf0)); -} - -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, - bool nested) -{ -#ifdef CONFIG_SMP - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; - - if (vcpu->mode == IN_GUEST_MODE) { - /* - * The vector of interrupt to be delivered to vcpu had - * been set in PIR before this function. - * - * Following cases will be reached in this block, and - * we always send a notification event in all cases as - * explained below. - * - * Case 1: vcpu keeps in non-root mode. Sending a - * notification event posts the interrupt to vcpu. - * - * Case 2: vcpu exits to root mode and is still - * runnable. PIR will be synced to vIRR before the - * next vcpu entry. Sending a notification event in - * this case has no effect, as vcpu is not in root - * mode. - * - * Case 3: vcpu exits to root mode and is blocked. - * vcpu_block() has already synced PIR to vIRR and - * never blocks vcpu if vIRR is not cleared. Therefore, - * a blocked vcpu here does not wait for any requested - * interrupts in PIR, and sending a notification event - * which has no effect is safe here. - */ - - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); - return true; - } -#endif - return false; -} - -static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, - int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (is_guest_mode(vcpu) && - vector == vmx->nested.posted_intr_nv) { - /* - * If a posted intr is not recognized by hardware, - * we will accomplish it in the next vmentry. - */ - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); - /* the PIR and ON have been set by L1. */ - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) - kvm_vcpu_kick(vcpu); - return 0; - } - return -1; -} -/* - * Send interrupt to vcpu via posted interrupt way. - * 1. If target vcpu is running(non-root mode), send posted interrupt - * notification to vcpu and hardware will sync PIR to vIRR atomically. - * 2. If target vcpu isn't running(root mode), kick it to pick up the - * interrupt from PIR in next vmentry. - */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - - r = vmx_deliver_nested_posted_interrupt(vcpu, vector); - if (!r) - return; - - if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; - - /* If a previous notification has sent the IPI, nothing to do. */ - if (pi_test_and_set_on(&vmx->pi_desc)) - return; - - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) - kvm_vcpu_kick(vcpu); -} - -/* - * Set up the vmcs's constant host-state fields, i.e., host-state fields that - * will not change in the lifetime of the guest. - * Note that host-state that does change is set elsewhere. E.g., host-state - * that is set differently for each CPU is set in vmx_vcpu_load(), not here. - */ -void vmx_set_constant_host_state(struct vcpu_vmx *vmx) -{ - u32 low32, high32; - unsigned long tmpl; - unsigned long cr0, cr3, cr4; - - cr0 = read_cr0(); - WARN_ON(cr0 & X86_CR0_TS); - vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ - - /* - * Save the most likely value for this task's CR3 in the VMCS. - * We can't use __get_current_cr3_fast() because we're not atomic. - */ - cr3 = __read_cr3(); - vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->loaded_vmcs->host_state.cr3 = cr3; - - /* Save the most likely value for this task's CR4 in the VMCS. */ - cr4 = cr4_read_shadow(); - vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->loaded_vmcs->host_state.cr4 = cr4; - - vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ -#ifdef CONFIG_X86_64 - /* - * Load null selectors, so we can avoid reloading them in - * vmx_prepare_switch_to_host(), in case userspace uses - * the null selectors too (the expected case). - */ - vmcs_write16(HOST_DS_SELECTOR, 0); - vmcs_write16(HOST_ES_SELECTOR, 0); -#else - vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ -#endif - vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - - vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ - - vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ - - rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); - vmcs_write32(HOST_IA32_SYSENTER_CS, low32); - rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); - vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ - - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, low32, high32); - vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); - } - - if (cpu_has_load_ia32_efer()) - vmcs_write64(HOST_IA32_EFER, host_efer); -} - -void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) -{ - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; - if (enable_ept) - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; - if (is_guest_mode(&vmx->vcpu)) - vmx->vcpu.arch.cr4_guest_owned_bits &= - ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; - vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); -} - -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) -{ - u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - - if (!kvm_vcpu_apicv_active(&vmx->vcpu)) - pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; - - if (!enable_vnmi) - pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; - - if (!enable_preemption_timer) - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - - return pin_based_exec_ctrl; -} - -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - if (cpu_has_secondary_exec_ctrls()) { - if (kvm_vcpu_apicv_active(vcpu)) - secondary_exec_controls_setbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - else - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - } - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); -} - -u32 vmx_exec_control(struct vcpu_vmx *vmx) -{ - u32 exec_control = vmcs_config.cpu_based_exec_ctrl; - - if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) - exec_control &= ~CPU_BASED_MOV_DR_EXITING; - - if (!cpu_need_tpr_shadow(&vmx->vcpu)) { - exec_control &= ~CPU_BASED_TPR_SHADOW; -#ifdef CONFIG_X86_64 - exec_control |= CPU_BASED_CR8_STORE_EXITING | - CPU_BASED_CR8_LOAD_EXITING; -#endif - } - if (!enable_ept) - exec_control |= CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_INVLPG_EXITING; - if (kvm_mwait_in_guest(vmx->vcpu.kvm)) - exec_control &= ~(CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING); - if (kvm_hlt_in_guest(vmx->vcpu.kvm)) - exec_control &= ~CPU_BASED_HLT_EXITING; - return exec_control; -} - - -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) -{ - struct kvm_vcpu *vcpu = &vmx->vcpu; - - u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - - if (pt_mode == PT_MODE_SYSTEM) - exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); - if (!cpu_need_virtualize_apic_accesses(vcpu)) - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - if (vmx->vpid == 0) - exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; - if (!enable_ept) { - exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; - enable_unrestricted_guest = 0; - } - if (!enable_unrestricted_guest) - exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (kvm_pause_in_guest(vmx->vcpu.kvm)) - exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!kvm_vcpu_apicv_active(vcpu)) - exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - - /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, - * in vmx_set_cr4. */ - exec_control &= ~SECONDARY_EXEC_DESC; - - /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD - (handle_vmptrld). - We can NOT enable shadow_vmcs here because we don't have yet - a current VMCS12 - */ - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - - if (!enable_pml) - exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - - if (vmx_xsaves_supported()) { - /* Exposing XSAVES only when XSAVE is exposed */ - bool xsaves_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); - - vcpu->arch.xsaves_enabled = xsaves_enabled; - - if (!xsaves_enabled) - exec_control &= ~SECONDARY_EXEC_XSAVES; - - if (nested) { - if (xsaves_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_XSAVES; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_XSAVES; - } - } - - if (vmx_rdtscp_supported()) { - bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); - if (!rdtscp_enabled) - exec_control &= ~SECONDARY_EXEC_RDTSCP; - - if (nested) { - if (rdtscp_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; - } - } - - if (vmx_invpcid_supported()) { - /* Exposing INVPCID only when PCID is exposed */ - bool invpcid_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && - guest_cpuid_has(vcpu, X86_FEATURE_PCID); - - if (!invpcid_enabled) { - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; - guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); - } - - if (nested) { - if (invpcid_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_INVPCID; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_INVPCID; - } - } - - if (vmx_rdrand_supported()) { - bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); - if (rdrand_enabled) - exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; - - if (nested) { - if (rdrand_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDRAND_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDRAND_EXITING; - } - } - - if (vmx_rdseed_supported()) { - bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); - if (rdseed_enabled) - exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; - - if (nested) { - if (rdseed_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDSEED_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDSEED_EXITING; - } - } - - if (vmx_waitpkg_supported()) { - bool waitpkg_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); - - if (!waitpkg_enabled) - exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - - if (nested) { - if (waitpkg_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - } - } - - vmx->secondary_exec_control = exec_control; -} - -static void ept_set_mmio_spte_mask(void) -{ - /* - * EPT Misconfigurations can be generated if the value of bits 2:0 - * of an EPT paging-structure entry is 110b (write/execute). - */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); -} - -#define VMX_XSS_EXIT_BITMAP 0 - -/* - * Noting that the initialization of Guest-state Area of VMCS is in - * vmx_vcpu_reset(). - */ -static void init_vmcs(struct vcpu_vmx *vmx) -{ - if (nested) - nested_vmx_set_vmcs_shadowing_bitmap(); - - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); - - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ - - /* Control */ - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - - exec_controls_set(vmx, vmx_exec_control(vmx)); - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - secondary_exec_controls_set(vmx, vmx->secondary_exec_control); - } - - if (kvm_vcpu_apicv_active(&vmx->vcpu)) { - vmcs_write64(EOI_EXIT_BITMAP0, 0); - vmcs_write64(EOI_EXIT_BITMAP1, 0); - vmcs_write64(EOI_EXIT_BITMAP2, 0); - vmcs_write64(EOI_EXIT_BITMAP3, 0); - - vmcs_write16(GUEST_INTR_STATUS, 0); - - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); - } - - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { - vmcs_write32(PLE_GAP, ple_gap); - vmx->ple_window = ple_window; - vmx->ple_window_dirty = true; - } - - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ - - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(vmx); - vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ - vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ - - if (cpu_has_vmx_vmfunc()) - vmcs_write64(VM_FUNCTION_CONTROL, 0); - - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) - vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); - - /* 22.2.1, 20.8.1 */ - vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); - - set_cr4_guest_host_mask(vmx); - - if (vmx->vpid != 0) - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - - if (vmx_xsaves_supported()) - vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); - - if (enable_pml) { - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - } - - if (cpu_has_vmx_encls_vmexit()) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); - - if (pt_mode == PT_MODE_HOST_GUEST) { - memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); - /* Bit[6~0] are forced to 1, writes are ignored. */ - vmx->pt_desc.guest.output_mask = 0x7F; - vmcs_write64(GUEST_IA32_RTIT_CTL, 0); - } -} - -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct msr_data apic_base_msr; - u64 cr0; - - vmx->rmode.vm86_active = 0; - vmx->spec_ctrl = 0; - - vmx->msr_ia32_umwait_control = 0; - - vcpu->arch.microcode_version = 0x100000000ULL; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - vmx->hv_deadline_tsc = -1; - kvm_set_cr8(vcpu, 0); - - if (!init_event) { - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); - } - - vmx_segment_cache_clear(vmx); - - seg_setup(VCPU_SREG_CS); - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); - - seg_setup(VCPU_SREG_DS); - seg_setup(VCPU_SREG_ES); - seg_setup(VCPU_SREG_FS); - seg_setup(VCPU_SREG_GS); - seg_setup(VCPU_SREG_SS); - - vmcs_write16(GUEST_TR_SELECTOR, 0); - vmcs_writel(GUEST_TR_BASE, 0); - vmcs_write32(GUEST_TR_LIMIT, 0xffff); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - vmcs_write16(GUEST_LDTR_SELECTOR, 0); - vmcs_writel(GUEST_LDTR_BASE, 0); - vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - - if (!init_event) { - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - } - - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0xfff0); - - vmcs_writel(GUEST_GDTR_BASE, 0); - vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); - - vmcs_writel(GUEST_IDTR_BASE, 0); - vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); - if (kvm_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, 0); - - setup_msrs(vmx); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - - if (cpu_has_vmx_tpr_shadow() && !init_event) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (cpu_need_tpr_shadow(vcpu)) - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vcpu->arch.apic->regs)); - vmcs_write32(TPR_THRESHOLD, 0); - } - - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx->vcpu.arch.cr0 = cr0; - vmx_set_cr0(vcpu, cr0); /* enter rmode */ - vmx_set_cr4(vcpu, 0); - vmx_set_efer(vcpu, 0); - - update_exception_bitmap(vcpu); - - vpid_sync_context(vmx->vpid); - if (init_event) - vmx_clear_hlt(vcpu); -} - -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - if (!enable_vnmi || - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } - - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); -} - -static void vmx_inject_irq(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - uint32_t intr; - int irq = vcpu->arch.interrupt.nr; - - trace_kvm_inj_virq(irq); - - ++vcpu->stat.irq_injections; - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (vcpu->arch.interrupt.soft) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); - return; - } - intr = irq | INTR_INFO_VALID_MASK; - if (vcpu->arch.interrupt.soft) { - intr |= INTR_TYPE_SOFT_INTR; - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - } else - intr |= INTR_TYPE_EXT_INTR; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); - - vmx_clear_hlt(vcpu); -} - -static void vmx_inject_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->loaded_vmcs->soft_vnmi_blocked = 1; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - - ++vcpu->stat.nmi_injections; - vmx->loaded_vmcs->nmi_known_unmasked = false; - - if (vmx->rmode.vm86_active) { - kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); - return; - } - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); - - vmx_clear_hlt(vcpu); -} - -bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool masked; - - if (!enable_vnmi) - return vmx->loaded_vmcs->soft_vnmi_blocked; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return false; - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - return masked; -} - -void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { - vmx->loaded_vmcs->soft_vnmi_blocked = masked; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - } else { - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } -} - -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -{ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - - if (!enable_vnmi && - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) - return 0; - - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -} - -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) -{ - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); -} - -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) -{ - int ret; - - if (enable_unrestricted_guest) - return 0; - - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); - if (ret) - return ret; - to_kvm_vmx(kvm)->tss_addr = addr; - return init_rmode_tss(kvm); -} - -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) -{ - to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; - return 0; -} - -static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) -{ - switch (vec) { - case BP_VECTOR: - /* - * Update instruction length as we may reinject the exception - * from user space while in guest debugging mode. - */ - to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return false; - /* fall through */ - case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; - /* fall through */ - case DE_VECTOR: - case OF_VECTOR: - case BR_VECTOR: - case UD_VECTOR: - case DF_VECTOR: - case SS_VECTOR: - case GP_VECTOR: - case MF_VECTOR: - return true; - break; - } - return false; -} - -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) -{ - /* - * Instruction with address size override prefix opcode 0x67 - * Cause the #SS fault with 0 error code in VM86 mode. - */ - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0)) { - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - return 1; - } - return 0; - } - - /* - * Forward all other exceptions that are valid in real mode. - * FIXME: Breaks guest debugging in real mode, needs to be fixed with - * the required debugging infrastructure rework. - */ - kvm_queue_exception(vcpu, vec); - return 1; -} - -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s, 0); -#endif -} - -static int handle_machine_check(struct kvm_vcpu *vcpu) -{ - /* handled by vmx_vcpu_run() */ - return 1; -} - -static int handle_exception_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_run *kvm_run = vcpu->run; - u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; - u32 vect_info; - - vect_info = vmx->idt_vectoring_info; - intr_info = vmx->exit_intr_info; - - if (is_machine_check(intr_info) || is_nmi(intr_info)) - return 1; /* handled by handle_exception_nmi_irqoff() */ - - if (is_invalid_opcode(intr_info)) - return handle_ud(vcpu); - - error_code = 0; - if (intr_info & INTR_INFO_DELIVER_CODE_MASK) - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - - if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { - WARN_ON_ONCE(!enable_vmware_backdoor); - - /* - * VMware backdoor emulation on #GP interception only handles - * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero - * error code on #GP. - */ - if (error_code) { - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; - } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); - } - - /* - * The #PF with PFEC.RSVD = 1 indicates the guest is accessing - * MMIO, it is better to report an internal error. - * See the comments in vmx_handle_exit. - */ - if ((vect_info & VECTORING_INFO_VALID_MASK) && - !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vect_info; - vcpu->run->internal.data[1] = intr_info; - vcpu->run->internal.data[2] = error_code; - return 0; - } - - if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); - /* EPT won't cause page fault directly */ - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); - } - - ex_no = intr_info & INTR_INFO_VECTOR_MASK; - - if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) - return handle_rmode_exception(vcpu, ex_no, error_code); - - switch (ex_no) { - case AC_VECTOR: - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); - return 1; - case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); - if (!(vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (is_icebp(intr_info)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ - case BP_VECTOR: - /* - * Update instruction length as we may reinject #BP from - * user space while in guest debugging mode. Reading it for - * #DB as well causes no harm, it is not used in that case. - */ - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_run->exit_reason = KVM_EXIT_DEBUG; - rip = kvm_rip_read(vcpu); - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; - kvm_run->debug.arch.exception = ex_no; - break; - default: - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = ex_no; - kvm_run->ex.error_code = error_code; - break; - } - return 0; -} - -static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.irq_exits; - return 1; -} - -static int handle_triple_fault(struct kvm_vcpu *vcpu) -{ - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; - vcpu->mmio_needed = 0; - return 0; -} - -static int handle_io(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int size, in, string; - unsigned port; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - string = (exit_qualification & 16) != 0; - - ++vcpu->stat.io_exits; - - if (string) - return kvm_emulate_instruction(vcpu, 0); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - in = (exit_qualification & 8) != 0; - - return kvm_fast_pio(vcpu, size, port, in); -} - -static void -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) -{ - /* - * Patch in the VMCALL instruction: - */ - hypercall[0] = 0x0f; - hypercall[1] = 0x01; - hypercall[2] = 0xc1; -} - -/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ -static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. So we first calculate the - * effective cr0 value that L1 would like to write into the - * hardware. It consists of the L2-owned bits from the new - * value combined with the L1-owned bits from L1's guest_cr0. - */ - val = (val & ~vmcs12->cr0_guest_host_mask) | - (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); - - if (!nested_guest_cr0_valid(vcpu, val)) - return 1; - - if (kvm_set_cr0(vcpu, val)) - return 1; - vmcs_writel(CR0_READ_SHADOW, orig_val); - return 0; - } else { - if (to_vmx(vcpu)->nested.vmxon && - !nested_host_cr0_valid(vcpu, val)) - return 1; - - return kvm_set_cr0(vcpu, val); - } -} - -static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* analogously to handle_set_cr0 */ - val = (val & ~vmcs12->cr4_guest_host_mask) | - (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); - if (kvm_set_cr4(vcpu, val)) - return 1; - vmcs_writel(CR4_READ_SHADOW, orig_val); - return 0; - } else - return kvm_set_cr4(vcpu, val); -} - -static int handle_desc(struct kvm_vcpu *vcpu) -{ - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_cr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification, val; - int cr; - int reg; - int err; - int ret; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - cr = exit_qualification & 15; - reg = (exit_qualification >> 8) & 15; - switch ((exit_qualification >> 4) & 3) { - case 0: /* mov to cr */ - val = kvm_register_readl(vcpu, reg); - trace_kvm_cr_write(cr, val); - switch (cr) { - case 0: - err = handle_set_cr0(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - err = kvm_set_cr3(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 4: - err = handle_set_cr4(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 8: { - u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = (u8)val; - err = kvm_set_cr8(vcpu, cr8); - ret = kvm_complete_insn_gp(vcpu, err); - if (lapic_in_kernel(vcpu)) - return ret; - if (cr8_prev <= cr8) - return ret; - /* - * TODO: we might be squashing a - * KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - vcpu->run->exit_reason = KVM_EXIT_SET_TPR; - return 0; - } - } - break; - case 2: /* clts */ - WARN_ONCE(1, "Guest should always own CR0.TS"); - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); - return kvm_skip_emulated_instruction(vcpu); - case 1: /*mov from cr*/ - switch (cr) { - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - val = kvm_read_cr3(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - case 8: - val = kvm_get_cr8(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case 3: /* lmsw */ - val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); - kvm_lmsw(vcpu, val); - - return kvm_skip_emulated_instruction(vcpu); - default: - break; - } - vcpu->run->exit_reason = 0; - vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", - (int)(exit_qualification >> 4) & 3, cr); - return 0; -} - -static int handle_dr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int dr, dr7, reg; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; - - /* First, if DR does not exist, trigger UD */ - if (!kvm_require_dr(vcpu, dr)) - return 1; - - /* Do not handle if the CPL > 0, will trigger GP on re-entry */ - if (!kvm_require_cpl(vcpu, 0)) - return 1; - dr7 = vmcs_readl(GUEST_DR7); - if (dr7 & DR7_GD) { - /* - * As the vm-exit takes precedence over the debug trap, we - * need to emulate the latter, either for the host or the - * guest debugging itself. - */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; - vcpu->run->debug.arch.dr7 = dr7; - vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); - vcpu->run->debug.arch.exception = DB_VECTOR; - vcpu->run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } else { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - } - - if (vcpu->guest_debug == 0) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); - - /* - * No more DR vmexits; force a reload of the debug registers - * and reenter on this instruction. The next vmexit will - * retrieve the full state of the debug registers. - */ - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; - return 1; - } - - reg = DEBUG_REG_ACCESS_REG(exit_qualification); - if (exit_qualification & TYPE_MOV_FROM_DR) { - unsigned long val; - - if (kvm_get_dr(vcpu, dr, &val)) - return 1; - kvm_register_write(vcpu, reg, val); - } else - if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) - return 1; - - return kvm_skip_emulated_instruction(vcpu); -} - -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.dr6; -} - -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ -} - -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) -{ - get_debugreg(vcpu->arch.db[0], 0); - get_debugreg(vcpu->arch.db[1], 1); - get_debugreg(vcpu->arch.db[2], 2); - get_debugreg(vcpu->arch.db[3], 3); - get_debugreg(vcpu->arch.dr6, 6); - vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); - - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); -} - -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) -{ - vmcs_writel(GUEST_DR7, val); -} - -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) -{ - kvm_apic_update_ppr(vcpu); - return 1; -} - -static int handle_interrupt_window(struct kvm_vcpu *vcpu) -{ - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - ++vcpu->stat.irq_window_exits; - return 1; -} - -static int handle_vmcall(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_hypercall(vcpu); -} - -static int handle_invd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_invlpg(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - kvm_mmu_invlpg(vcpu, exit_qualification); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_rdpmc(struct kvm_vcpu *vcpu) -{ - int err; - - err = kvm_rdpmc(vcpu); - return kvm_complete_insn_gp(vcpu, err); -} - -static int handle_wbinvd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_wbinvd(vcpu); -} - -static int handle_xsetbv(struct kvm_vcpu *vcpu) -{ - u64 new_bv = kvm_read_edx_eax(vcpu); - u32 index = kvm_rcx_read(vcpu); - - if (kvm_set_xcr(vcpu, index, new_bv) == 0) - return kvm_skip_emulated_instruction(vcpu); - return 1; -} - -static int handle_apic_access(struct kvm_vcpu *vcpu) -{ - if (likely(fasteoi)) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int access_type, offset; - - access_type = exit_qualification & APIC_ACCESS_TYPE; - offset = exit_qualification & APIC_ACCESS_OFFSET; - /* - * Sane guest uses MOV to write EOI, with written value - * not cared. So make a short-circuit here by avoiding - * heavy instruction emulation. - */ - if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && - (offset == APIC_EOI)) { - kvm_lapic_set_eoi(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - } - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int vector = exit_qualification & 0xff; - - /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_set_eoi_accelerated(vcpu, vector); - return 1; -} - -static int handle_apic_write(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 offset = exit_qualification & 0xfff; - - /* APIC-write VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_write_nodecode(vcpu, offset); - return 1; -} - -static int handle_task_switch(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qualification; - bool has_error_code = false; - u32 error_code = 0; - u16 tss_selector; - int reason, type, idt_v, idt_index; - - idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); - idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); - type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && idt_v) { - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = false; - vmx_set_nmi_mask(vcpu, true); - break; - case INTR_TYPE_EXT_INTR: - case INTR_TYPE_SOFT_INTR: - kvm_clear_interrupt_queue(vcpu); - break; - case INTR_TYPE_HARD_EXCEPTION: - if (vmx->idt_vectoring_info & - VECTORING_INFO_DELIVER_CODE_MASK) { - has_error_code = true; - error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - } - /* fall through */ - case INTR_TYPE_SOFT_EXCEPTION: - kvm_clear_exception_queue(vcpu); - break; - default: - break; - } - } - tss_selector = exit_qualification; - - if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && - type != INTR_TYPE_EXT_INTR && - type != INTR_TYPE_NMI_INTR)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - /* - * TODO: What about debug traps on tss switch? - * Are we supposed to inject them and update dr6? - */ - return kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, - reason, has_error_code, error_code); -} - -static int handle_ept_violation(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - gpa_t gpa; - u64 error_code; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * EPT violation happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - * There are errata that may cause this bit to not be set: - * AAK134, BY25. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - trace_kvm_page_fault(gpa, exit_qualification); - - /* Is it a read fault? */ - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) - ? PFERR_USER_MASK : 0; - /* Is it a write fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) - ? PFERR_WRITE_MASK : 0; - /* Is it a fetch fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) - ? PFERR_FETCH_MASK : 0; - /* ept page table entry is present? */ - error_code |= (exit_qualification & - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | - EPT_VIOLATION_EXECUTABLE)) - ? PFERR_PRESENT_MASK : 0; - - error_code |= (exit_qualification & 0x100) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; - - vcpu->arch.exit_qualification = exit_qualification; - return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); -} - -static int handle_ept_misconfig(struct kvm_vcpu *vcpu) -{ - gpa_t gpa; - - /* - * A nested guest cannot optimize MMIO vmexits, because we have an - * nGPA here instead of the required GPA. - */ - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!is_guest_mode(vcpu) && - !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { - trace_kvm_fast_mmio(gpa); - return kvm_skip_emulated_instruction(vcpu); - } - - return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); -} - -static int handle_nmi_window(struct kvm_vcpu *vcpu) -{ - WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); - ++vcpu->stat.nmi_window_exits; - kvm_make_request(KVM_REQ_EVENT, vcpu); - - return 1; -} - -static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool intr_window_requested; - unsigned count = 130; - - /* - * We should never reach the point where we are emulating L2 - * due to invalid guest state as that means we incorrectly - * allowed a nested VMEntry with an invalid vmcs12. - */ - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - - intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_INTR_WINDOW_EXITING; - - while (vmx->emulation_required && count-- != 0) { - if (intr_window_requested && vmx_interrupt_allowed(vcpu)) - return handle_interrupt_window(&vmx->vcpu); - - if (kvm_test_request(KVM_REQ_EVENT, vcpu)) - return 1; - - if (!kvm_emulate_instruction(vcpu, 0)) - return 0; - - if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } - - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - - /* - * Note, return 1 and not 0, vcpu_run() is responsible for - * morphing the pending signal into the proper return code. - */ - if (signal_pending(current)) - return 1; - - if (need_resched()) - schedule(); - } - - return 1; -} - -static void grow_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __grow_ple_window(old, ple_window, - ple_window_grow, - ple_window_max); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -static void shrink_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __shrink_ple_window(old, ple_window, - ple_window_shrink, - ple_window); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -/* - * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. - */ -static void wakeup_handler(void) -{ - struct kvm_vcpu *vcpu; - int cpu = smp_processor_id(); - - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), - blocked_vcpu_list) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (pi_test_on(pi_desc) == 1) - kvm_vcpu_kick(vcpu); - } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); -} - -static void vmx_enable_tdp(void) -{ - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, - enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, - enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK, - cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, - VMX_EPT_RWX_MASK, 0ull); - - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); -} - -/* - * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE - * exiting, so only get here on cpu with PAUSE-Loop-Exiting. - */ -static int handle_pause(struct kvm_vcpu *vcpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - grow_ple_window(vcpu); - - /* - * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" - * VM-execution control is ignored if CPL > 0. OTOH, KVM - * never set PAUSE_EXITING and just set PLE if supported, - * so the vcpu must be CPL=0 if it gets a PAUSE exit. - */ - kvm_vcpu_on_spin(vcpu, true); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_nop(struct kvm_vcpu *vcpu) -{ - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_mwait(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invalid_op(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_monitor_trap(struct kvm_vcpu *vcpu) -{ - return 1; -} - -static int handle_monitor(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invpcid(struct kvm_vcpu *vcpu) -{ - u32 vmx_instruction_info; - unsigned long type; - bool pcid_enabled; - gva_t gva; - struct x86_exception e; - unsigned i; - unsigned long roots_to_free = 0; - struct { - u64 pcid; - u64 gla; - } operand; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } - - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - - if (type > 3) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - /* According to the Intel instruction reference, the memory operand - * is read even if it isn't needed (e.g., for type==all) - */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, - sizeof(operand), &gva)) - return 1; - - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } - - if (operand.pcid >> 12 != 0) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); - - switch (type) { - case INVPCID_TYPE_INDIV_ADDR: - if ((!pcid_enabled && (operand.pcid != 0)) || - is_noncanonical_address(operand.gla, vcpu)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_SINGLE_CTXT: - if (!pcid_enabled && (operand.pcid != 0)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - if (kvm_get_active_pcid(vcpu) == operand.pcid) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) - == operand.pcid) - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); - /* - * If neither the current cr3 nor any of the prev_roots use the - * given PCID, then nothing needs to be done here because a - * resync will happen anyway before switching to any other CR3. - */ - - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_ALL_NON_GLOBAL: - /* - * Currently, KVM doesn't mark global entries in the shadow - * page tables, so a non-global flush just degenerates to a - * global flush. If needed, we could optimize this later by - * keeping track of global entries in shadow page tables. - */ - - /* fall-through */ - case INVPCID_TYPE_ALL_INCL_GLOBAL: - kvm_mmu_unload(vcpu); - return kvm_skip_emulated_instruction(vcpu); - - default: - BUG(); /* We have already checked above that type <= 3 */ - } -} - -static int handle_pml_full(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - - trace_kvm_pml_full(vcpu->vcpu_id); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * PML buffer FULL happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - - /* - * PML buffer already flushed at beginning of VMEXIT. Nothing to do - * here.., and there's no userspace involvement needed for PML. - */ - return 1; -} - -static int handle_preemption_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!vmx->req_immediate_exit && - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) - kvm_lapic_expired_hv_timer(vcpu); - - return 1; -} - -/* - * When nested=0, all VMX instruction VM Exits filter here. The handlers - * are overwritten by nested_vmx_setup() when nested=1. - */ -static int handle_vmx_instruction(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_encls(struct kvm_vcpu *vcpu) -{ - /* - * SGX virtualization is not yet supported. There is no software - * enable bit for SGX, so we have to trap ENCLS and inject a #UD - * to prevent the guest from executing ENCLS. - */ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -/* - * The exit handlers return 1 if the exit was handled fully and guest execution - * may resume. Otherwise they set the kvm_run parameter to indicate what needs - * to be done to userspace and return 0. - */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, - [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, - [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_CR_ACCESS] = handle_cr, - [EXIT_REASON_DR_ACCESS] = handle_dr, - [EXIT_REASON_CPUID] = kvm_emulate_cpuid, - [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, - [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, - [EXIT_REASON_HLT] = kvm_emulate_halt, - [EXIT_REASON_INVD] = handle_invd, - [EXIT_REASON_INVLPG] = handle_invlpg, - [EXIT_REASON_RDPMC] = handle_rdpmc, - [EXIT_REASON_VMCALL] = handle_vmcall, - [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, - [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, - [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, - [EXIT_REASON_VMPTRST] = handle_vmx_instruction, - [EXIT_REASON_VMREAD] = handle_vmx_instruction, - [EXIT_REASON_VMRESUME] = handle_vmx_instruction, - [EXIT_REASON_VMWRITE] = handle_vmx_instruction, - [EXIT_REASON_VMOFF] = handle_vmx_instruction, - [EXIT_REASON_VMON] = handle_vmx_instruction, - [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, - [EXIT_REASON_APIC_ACCESS] = handle_apic_access, - [EXIT_REASON_APIC_WRITE] = handle_apic_write, - [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, - [EXIT_REASON_WBINVD] = handle_wbinvd, - [EXIT_REASON_XSETBV] = handle_xsetbv, - [EXIT_REASON_TASK_SWITCH] = handle_task_switch, - [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, - [EXIT_REASON_GDTR_IDTR] = handle_desc, - [EXIT_REASON_LDTR_TR] = handle_desc, - [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, - [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, - [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, - [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, - [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, - [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, - [EXIT_REASON_INVEPT] = handle_vmx_instruction, - [EXIT_REASON_INVVPID] = handle_vmx_instruction, - [EXIT_REASON_RDRAND] = handle_invalid_op, - [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_PML_FULL] = handle_pml_full, - [EXIT_REASON_INVPCID] = handle_invpcid, - [EXIT_REASON_VMFUNC] = handle_vmx_instruction, - [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, - [EXIT_REASON_ENCLS] = handle_encls, -}; - -static const int kvm_vmx_max_exit_handlers = - ARRAY_SIZE(kvm_vmx_exit_handlers); - -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -{ - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); -} - -static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) -{ - if (vmx->pml_pg) { - __free_page(vmx->pml_pg); - vmx->pml_pg = NULL; - } -} - -static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 *pml_buf; - u16 pml_idx; - - pml_idx = vmcs_read16(GUEST_PML_INDEX); - - /* Do nothing if PML buffer is empty */ - if (pml_idx == (PML_ENTITY_NUM - 1)) - return; - - /* PML index always points to next available PML buffer entity */ - if (pml_idx >= PML_ENTITY_NUM) - pml_idx = 0; - else - pml_idx++; - - pml_buf = page_address(vmx->pml_pg); - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { - u64 gpa; - - gpa = pml_buf[pml_idx]; - WARN_ON(gpa & (PAGE_SIZE - 1)); - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); - } - - /* reset PML index */ - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); -} - -/* - * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. - * Called before reporting dirty_bitmap to userspace. - */ -static void kvm_flush_pml_buffers(struct kvm *kvm) -{ - int i; - struct kvm_vcpu *vcpu; - /* - * We only need to kick vcpu out of guest mode here, as PML buffer - * is flushed at beginning of all VMEXITs, and it's obvious that only - * vcpus running in guest are possible to have unflushed GPAs in PML - * buffer. - */ - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vcpu_kick(vcpu); -} - -static void vmx_dump_sel(char *name, uint32_t sel) -{ - pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", - name, vmcs_read16(sel), - vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), - vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), - vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); -} - -static void vmx_dump_dtsel(char *name, uint32_t limit) -{ - pr_err("%s limit=0x%08x, base=0x%016lx\n", - name, vmcs_read32(limit), - vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); -} - -void dump_vmcs(void) -{ - u32 vmentry_ctl, vmexit_ctl; - u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; - unsigned long cr4; - u64 efer; - int i, n; - - if (!dump_invalid_vmcs) { - pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); - return; - } - - vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - cr4 = vmcs_readl(GUEST_CR4); - efer = vmcs_read64(GUEST_IA32_EFER); - secondary_exec_control = 0; - if (cpu_has_secondary_exec_ctrls()) - secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - - pr_err("*** Guest State ***\n"); - pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), - vmcs_readl(CR0_GUEST_HOST_MASK)); - pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); - pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && - (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) - { - pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); - } - pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", - vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); - pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", - vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(GUEST_SYSENTER_ESP), - vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); - vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); - vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); - vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); - vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); - vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); - vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); - vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); - vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); - vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); - vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); - if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || - (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - efer, vmcs_read64(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", - vmcs_read64(GUEST_IA32_DEBUGCTL), - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); - if (cpu_has_load_perf_global_ctrl() && - vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); - if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); - pr_err("Interruptibility = %08x ActivityState = %08x\n", - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), - vmcs_read32(GUEST_ACTIVITY_STATE)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) - pr_err("InterruptStatus = %04x\n", - vmcs_read16(GUEST_INTR_STATUS)); - - pr_err("*** Host State ***\n"); - pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", - vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); - pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", - vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), - vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), - vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), - vmcs_read16(HOST_TR_SELECTOR)); - pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", - vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), - vmcs_readl(HOST_TR_BASE)); - pr_err("GDTBase=%016lx IDTBase=%016lx\n", - vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); - pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", - vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), - vmcs_readl(HOST_CR4)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(HOST_IA32_SYSENTER_ESP), - vmcs_read32(HOST_IA32_SYSENTER_CS), - vmcs_readl(HOST_IA32_SYSENTER_EIP)); - if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - vmcs_read64(HOST_IA32_EFER), - vmcs_read64(HOST_IA32_PAT)); - if (cpu_has_load_perf_global_ctrl() && - vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); - - pr_err("*** Control State ***\n"); - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); - pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", - vmcs_read32(EXCEPTION_BITMAP), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); - pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), - vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); - pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); - pr_err(" reason=%08x qualification=%016lx\n", - vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); - pr_err("IDTVectoring: info=%08x errcode=%08x\n", - vmcs_read32(IDT_VECTORING_INFO_FIELD), - vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); - if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016llx\n", - vmcs_read64(TSC_MULTIPLIER)); - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { - u16 status = vmcs_read16(GUEST_INTR_STATUS); - pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); - } - pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) - pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); - pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); - } - if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) - pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); - n = vmcs_read32(CR3_TARGET_COUNT); - for (i = 0; i + 1 < n; i += 4) - pr_err("CR3 target%u=%016lx target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); - if (i < n) - pr_err("CR3 target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); - if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) - pr_err("PLE Gap=%08x Window=%08x\n", - vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); - if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) - pr_err("Virtual processor ID = 0x%04x\n", - vmcs_read16(VIRTUAL_PROCESSOR_ID)); -} - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exit_reason = vmx->exit_reason; - u32 vectoring_info = vmx->idt_vectoring_info; - - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - - /* - * Flush logged GPAs PML buffer, this will make dirty_bitmap more - * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before - * querying dirty_bitmap, we only need to kick all vcpus out of guest - * mode as if vcpus is in root mode, the PML buffer must has been - * flushed already. - */ - if (enable_pml) - vmx_flush_pml_buffer(vcpu); - - /* If guest state is invalid, start emulating */ - if (vmx->emulation_required) - return handle_invalid_guest_state(vcpu); - - if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) - return nested_vmx_reflect_vmexit(vcpu, exit_reason); - - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = exit_reason; - return 0; - } - - if (unlikely(vmx->fail)) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = vmcs_read32(VM_INSTRUCTION_ERROR); - return 0; - } - - /* - * Note: - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by - * delivery event since it indicates guest is accessing MMIO. - * The vm-exit can be triggered again after return to guest that - * will cause infinite loop. - */ - if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION && - exit_reason != EXIT_REASON_PML_FULL && - exit_reason != EXIT_REASON_TASK_SWITCH)) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vectoring_info; - vcpu->run->internal.data[1] = exit_reason; - vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; - if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.ndata++; - vcpu->run->internal.data[3] = - vmcs_read64(GUEST_PHYSICAL_ADDRESS); - } - return 0; - } - - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) { - if (vmx_interrupt_allowed(vcpu)) { - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } - } - - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); - return 1; - } else if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { -#ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); -#endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } -} - -/* - * Software based L1D cache flush which is used when microcode providing - * the cache control MSR is not loaded. - * - * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to - * flush it is required to read in 64 KiB because the replacement algorithm - * is not exactly LRU. This could be sized at runtime via topology - * information but as all relevant affected CPUs have 32KiB L1D cache size - * there is no point in doing so. - */ -static void vmx_l1d_flush(struct kvm_vcpu *vcpu) -{ - int size = PAGE_SIZE << L1D_CACHE_ORDER; - - /* - * This code is only executed when the the flush mode is 'cond' or - * 'always' - */ - if (static_branch_likely(&vmx_l1d_flush_cond)) { - bool flush_l1d; - - /* - * Clear the per-vcpu flush bit, it gets set again - * either from vcpu_run() or from one of the unsafe - * VMEXIT handlers. - */ - flush_l1d = vcpu->arch.l1tf_flush_l1d; - vcpu->arch.l1tf_flush_l1d = false; - - /* - * Clear the per-cpu flush bit, it gets set again from - * the interrupt handlers. - */ - flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); - kvm_clear_cpu_l1tf_flush_l1d(); - - if (!flush_l1d) - return; - } - - vcpu->stat.l1d_flush++; - - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); - return; - } - - asm volatile( - /* First ensure the pages are in the TLB */ - "xorl %%eax, %%eax\n" - ".Lpopulate_tlb:\n\t" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $4096, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lpopulate_tlb\n\t" - "xorl %%eax, %%eax\n\t" - "cpuid\n\t" - /* Now fill the cache */ - "xorl %%eax, %%eax\n" - ".Lfill_cache:\n" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $64, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lfill_cache\n\t" - "lfence\n" - :: [flush_pages] "r" (vmx_l1d_flush_pages), - [size] "r" (size) - : "eax", "ebx", "ecx", "edx"); -} - -static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - int tpr_threshold; - - if (is_guest_mode(vcpu) && - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) - return; - - tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; - if (is_guest_mode(vcpu)) - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; - else - vmcs_write32(TPR_THRESHOLD, tpr_threshold); -} - -void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 sec_exec_control; - - if (!lapic_in_kernel(vcpu)) - return; - - if (!flexpriority_enabled && - !cpu_has_vmx_virtualize_x2apic_mode()) - return; - - /* Postpone execution until vmcs01 is the current VMCS. */ - if (is_guest_mode(vcpu)) { - vmx->nested.change_vmcs01_virtual_apic_mode = true; - return; - } - - sec_exec_control = secondary_exec_controls_get(vmx); - sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); - - switch (kvm_get_apic_mode(vcpu)) { - case LAPIC_MODE_INVALID: - WARN_ONCE(true, "Invalid local APIC state"); - case LAPIC_MODE_DISABLED: - break; - case LAPIC_MODE_XAPIC: - if (flexpriority_enabled) { - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); - } - break; - case LAPIC_MODE_X2APIC: - if (cpu_has_vmx_virtualize_x2apic_mode()) - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - break; - } - secondary_exec_controls_set(vmx, sec_exec_control); - - vmx_update_msr_bitmap(vcpu); -} - -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) -{ - if (!is_guest_mode(vcpu)) { - vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb(vcpu, true); - } -} - -static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) -{ - u16 status; - u8 old; - - if (max_isr == -1) - max_isr = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = status >> 8; - if (max_isr != old) { - status &= 0xff; - status |= max_isr << 8; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_set_rvi(int vector) -{ - u16 status; - u8 old; - - if (vector == -1) - vector = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = (u8)status & 0xff; - if ((u8)vector != old) { - status &= ~0xff; - status |= (u8)vector; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ - /* - * When running L2, updating RVI is only relevant when - * vmcs12 virtual-interrupt-delivery enabled. - * However, it can be enabled only when L1 also - * intercepts external-interrupts and in that case - * we should not update vmcs02 RVI but instead intercept - * interrupt. Therefore, do nothing when running L2. - */ - if (!is_guest_mode(vcpu)) - vmx_set_rvi(max_irr); -} - -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int max_irr; - bool max_irr_updated; - - WARN_ON(!vcpu->arch.apicv_active); - if (pi_test_on(&vmx->pi_desc)) { - pi_clear_on(&vmx->pi_desc); - /* - * IOMMU can write to PID.ON, so the barrier matters even on UP. - * But on x86 this is just a compiler barrier anyway. - */ - smp_mb__after_atomic(); - max_irr_updated = - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); - - /* - * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. - */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } - } else { - max_irr = kvm_lapic_find_highest_irr(vcpu); - } - vmx_hwapic_irr_update(vcpu, max_irr); - return max_irr; -} - -static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - return pi_test_on(pi_desc) || - (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); -} - -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) -{ - if (!kvm_vcpu_apicv_active(vcpu)) - return; - - vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); - vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); - vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); - vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); -} - -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pi_clear_on(&vmx->pi_desc); - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); -} - -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) -{ - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* if exit due to PF check for async PF */ - if (is_page_fault(vmx->exit_intr_info)) - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); - - /* Handle machine checks before interrupts are enabled */ - if (is_machine_check(vmx->exit_intr_info)) - kvm_machine_check(); - - /* We need to handle NMIs before interrupts are enabled */ - if (is_nmi(vmx->exit_intr_info)) { - kvm_before_interrupt(&vmx->vcpu); - asm("int $2"); - kvm_after_interrupt(&vmx->vcpu); - } -} - -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) -{ - unsigned int vector; - unsigned long entry; -#ifdef CONFIG_X86_64 - unsigned long tmp; -#endif - gate_desc *desc; - u32 intr_info; - - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - if (WARN_ONCE(!is_external_intr(intr_info), - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) - return; - - vector = intr_info & INTR_INFO_VECTOR_MASK; - desc = (gate_desc *)host_idt_base + vector; - entry = gate_offset(desc); - - kvm_before_interrupt(vcpu); - - asm volatile( -#ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" - "push %[sp]\n\t" -#endif - "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" - CALL_NOSPEC - : -#ifdef CONFIG_X86_64 - [sp]"=&r"(tmp), -#endif - ASM_CALL_CONSTRAINT - : - THUNK_TARGET(entry), - [ss]"i"(__KERNEL_DS), - [cs]"i"(__KERNEL_CS) - ); - - kvm_after_interrupt(vcpu); -} -STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); - -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - handle_external_interrupt_irqoff(vcpu); - else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) - handle_exception_nmi_irqoff(vmx); - else if (!is_guest_mode(vcpu) && - vmx->exit_reason == EXIT_REASON_MSR_WRITE) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); -} - -static bool vmx_has_emulated_msr(int index) -{ - switch (index) { - case MSR_IA32_SMBASE: - /* - * We cannot do SMM unless we can run the guest in big - * real mode. - */ - return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - return nested; - case MSR_AMD64_VIRT_SPEC_CTRL: - /* This is AMD only. */ - return false; - default: - return true; - } -} - -static bool vmx_pt_supported(void) -{ - return pt_mode == PT_MODE_HOST_GUEST; -} - -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) -{ - u32 exit_intr_info; - bool unblock_nmi; - u8 vector; - bool idtv_info_valid; - - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - if (enable_vnmi) { - if (vmx->loaded_vmcs->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), - vmx->loaded_vmcs->entry_time)); -} - -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, - u32 idt_vectoring_info, - int instr_len_field, - int error_code_field) -{ - u8 vector; - int type; - bool idtv_info_valid; - - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - vcpu->arch.nmi_injected = false; - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); - - if (!idtv_info_valid) - return; - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = true; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Clear bit "block by NMI" before VM entry if a NMI - * delivery faulted. - */ - vmx_set_nmi_mask(vcpu, false); - break; - case INTR_TYPE_SOFT_EXCEPTION: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_HARD_EXCEPTION: - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - u32 err = vmcs_read32(error_code_field); - kvm_requeue_exception_e(vcpu, vector, err); - } else - kvm_requeue_exception(vcpu, vector); - break; - case INTR_TYPE_SOFT_INTR: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); - break; - default: - break; - } -} - -static void vmx_complete_interrupts(struct vcpu_vmx *vmx) -{ - __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, - VM_EXIT_INSTRUCTION_LEN, - IDT_VECTORING_ERROR_CODE); -} - -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) -{ - __vmx_complete_interrupts(vcpu, - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); -} - -static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) -{ - int i, nr_msrs; - struct perf_guest_switch_msr *msrs; - - msrs = perf_guest_get_msrs(&nr_msrs); - - if (!msrs) - return; - - for (i = 0; i < nr_msrs; i++) - if (msrs[i].host == msrs[i].guest) - clear_atomic_switch_msr(vmx, msrs[i].msr); - else - add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, - msrs[i].host, false); -} - -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 tscl; - u32 delta_tsc; - - if (vmx->req_immediate_exit) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (vmx->hv_deadline_tsc != -1) { - tscl = rdtsc(); - if (vmx->hv_deadline_tsc > tscl) - /* set_hv_timer ensures the delta fits in 32-bits */ - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> - cpu_preemption_timer_multi); - else - delta_tsc = 0; - - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); - vmx->loaded_vmcs->hv_timer_soft_disabled = true; - } -} - -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) -{ - if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { - vmx->loaded_vmcs->host_state.rsp = host_rsp; - vmcs_writel(HOST_RSP, host_rsp); - } -} - -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); - -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required) - return; - - if (vmx->ple_window_dirty) { - vmx->ple_window_dirty = false; - vmcs_write32(PLE_WINDOW, vmx->ple_window); - } - - if (vmx->nested.need_vmcs12_to_shadow_sync) - nested_sync_vmcs12_to_shadow(vcpu); - - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { - vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->host_state.cr3 = cr3; - } - - cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { - vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->host_state.cr4 = cr4; - } - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - kvm_load_guest_xsave_state(vcpu); - - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - - pt_guest_enter(vmx); - - atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); - - if (enable_preemption_timer) - vmx_update_hv_timer(vcpu); - - if (lapic_in_kernel(vcpu) && - vcpu->arch.apic->lapic_timer.timer_advance_ns) - kvm_wait_lapic_expire(vcpu); - - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - - /* L1D Flush includes CPU buffer clear to mitigate MDS */ - if (static_branch_unlikely(&vmx_l1d_should_flush)) - vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); - - if (vcpu->arch.cr2 != read_cr2()) - write_cr2(vcpu->arch.cr2); - - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); - - vcpu->arch.cr2 = read_cr2(); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - - /* All fields are clean at this point */ - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_clean_fields |= - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; - - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); - -#ifndef CONFIG_X86_64 - /* - * The sysexit path does not restore ds/es, so we must set them to - * a reasonable value ourselves. - * - * We can't defer this to vmx_prepare_switch_to_host() since that - * function may be executed in interrupt context, which saves and - * restore segments around it, nullifying its effect. - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR3)); - vcpu->arch.regs_dirty = 0; - - pt_guest_exit(vmx); - - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - - kvm_load_host_xsave_state(vcpu); - - vmx->nested.nested_run_pending = 0; - vmx->idt_vectoring_info = 0; - - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - kvm_machine_check(); - - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - return; - - vmx->loaded_vmcs->launched = 1; - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - - vmx_recover_nmi_blocking(vmx); - vmx_complete_interrupts(vmx); -} - -static struct kvm *vmx_vm_alloc(void) -{ - struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), - GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); - return &kvm_vmx->kvm; -} - -static void vmx_vm_free(struct kvm *kvm) -{ - kfree(kvm->arch.hyperv.hv_pa_pg); - vfree(to_kvm_vmx(kvm)); -} - -static void vmx_free_vcpu(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (enable_pml) - vmx_destroy_pml_buffer(vmx); - free_vpid(vmx->vpid); - nested_vmx_free_vcpu(vcpu); - free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); -} - -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) -{ - int err; - struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); - - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } - - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } - - vmx->vpid = allocate_vpid(); - - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - - /* - * If PML is turned on, failure on enabling PML just results in failure - * of creating the vcpu, therefore we can simplify PML logic (by - * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest), etc. - */ - if (enable_pml) { - vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vmx->pml_pg) - goto uninit_vcpu; - } - - BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { - u32 index = vmx_msr_index[i]; - u32 data_low, data_high; - int j = vmx->nmsrs; - - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) - continue; - - vmx->guest_msrs[j].index = i; - vmx->guest_msrs[j].data = 0; - switch (index) { - case MSR_IA32_TSX_CTRL: - /* - * No need to pass TSX_CTRL_CPUID_CLEAR through, so - * let's avoid changing CPUID bits under the host - * kernel's feet. - */ - vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - break; - default: - vmx->guest_msrs[j].mask = -1ull; - break; - } - ++vmx->nmsrs; - } - - err = alloc_loaded_vmcs(&vmx->vmcs01); - if (err < 0) - goto free_pml; - - msr_bitmap = vmx->vmcs01.msr_bitmap; - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); - } - vmx->msr_bitmap_mode = 0; - - vmx->loaded_vmcs = &vmx->vmcs01; - cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; - init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); - put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); - if (err) - goto free_vmcs; - } - - if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); - if (err) - goto free_vmcs; - } - - if (nested) - nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); - else - memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); - - vmx->nested.posted_intr_nv = -1; - vmx->nested.current_vmptr = -1ull; - - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; - - /* - * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR - * or POSTED_INTR_WAKEUP_VECTOR. - */ - vmx->pi_desc.nv = POSTED_INTR_VECTOR; - vmx->pi_desc.sn = 1; - - vmx->ept_pointer = INVALID_PAGE; - - return &vmx->vcpu; - -free_vmcs: - free_loaded_vmcs(vmx->loaded_vmcs); -free_pml: - vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: - free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); -} - -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" - -static int vmx_vm_init(struct kvm *kvm) -{ - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (!ple_gap) - kvm->arch.pause_in_guest = true; - - if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - case L1TF_MITIGATION_FLUSH_NOWARN: - /* 'I explicitly don't care' is set */ - break; - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - case L1TF_MITIGATION_FULL: - /* - * Warn upon starting the first VM in a potentially - * insecure environment. - */ - if (sched_smt_active()) - pr_warn_once(L1TF_MSG_SMT); - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) - pr_warn_once(L1TF_MSG_L1D); - break; - case L1TF_MITIGATION_FULL_FORCE: - /* Flush is enforced */ - break; - } - } - return 0; -} - -static int __init vmx_check_processor_compat(void) -{ - struct vmcs_config vmcs_conf; - struct vmx_capability vmx_cap; - - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - return -EIO; - if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", - smp_processor_id()); - return -EIO; - } - return 0; -} - -static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 cache; - u64 ipat = 0; - - /* For VT-d and EPT combination - * 1. MMIO: always map as UC - * 2. EPT with VT-d: - * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. - * b. VT-d with snooping control feature: snooping control feature of - * VT-d engine can guarantee the cache correctness. Just set it - * to WB to keep consistent with host. So the same as item 3. - * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep - * consistent with host MTRR - */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { - ipat = VMX_EPT_IPAT_BIT; - cache = MTRR_TYPE_WRBACK; - goto exit; - } - - if (kvm_read_cr0(vcpu) & X86_CR0_CD) { - ipat = VMX_EPT_IPAT_BIT; - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - cache = MTRR_TYPE_WRBACK; - else - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - -exit: - return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; -} - -static int vmx_get_lpage_level(void) -{ - if (enable_ept && !cpu_has_vmx_ept_1g_page()) - return PT_DIRECTORY_LEVEL; - else - /* For shadow and EPT supported 1GB page */ - return PT_PDPE_LEVEL; -} - -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) -{ - /* - * These bits in the secondary execution controls field - * are dynamic, the others are mostly based on the hypervisor - * architecture and the guest's CPUID. Do not touch the - * dynamic bits. - */ - u32 mask = - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_DESC; - - u32 new_ctl = vmx->secondary_exec_control; - u32 cur_ctl = secondary_exec_controls_get(vmx); - - secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); -} - -/* - * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits - * (indicating "allowed-1") if they are supported in the guest's CPUID. - */ -static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *entry; - - vmx->nested.msrs.cr0_fixed1 = 0xffffffff; - vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; - -#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ - if (entry && (entry->_reg & (_cpuid_mask))) \ - vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ -} while (0) - - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); - - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); - -#undef cr4_fixed1_update -} - -static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (kvm_mpx_supported()) { - bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); - - if (mpx_enabled) { - vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - } else { - vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; - } - } -} - -static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *best = NULL; - int i; - - for (i = 0; i < PT_CPUID_LEAVES; i++) { - best = kvm_find_cpuid_entry(vcpu, 0x14, i); - if (!best) - return; - vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; - vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; - vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; - vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; - } - - /* Get the number of configurable Address Ranges for filtering */ - vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges); - - /* Initialize and clear the no dependency bits */ - vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | - RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise - * will inject an #GP - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and - * PSBFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | - RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and - * MTCFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | - RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE); - - /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | - RTIT_CTL_PTW_EN); - - /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; - - /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; - - /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; - - /* unmask address range configure area */ - for (i = 0; i < vmx->pt_desc.addr_range; i++) - vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); -} - -static void vmx_cpuid_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ - vcpu->arch.xsaves_enabled = false; - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - vmcs_set_secondary_exec_control(vmx); - } - - if (nested_vmx_allowed(vcpu)) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); - - if (nested_vmx_allowed(vcpu)) { - nested_vmx_cr_fixed1_bits_update(vcpu); - nested_vmx_entry_exit_ctls_update(vcpu); - } - - if (boot_cpu_has(X86_FEATURE_INTEL_PT) && - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) - update_intel_pt_cfg(vcpu); - - if (boot_cpu_has(X86_FEATURE_RTM)) { - struct shared_msr_entry *msr; - msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); - if (msr) { - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); - vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); - } - } -} - -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) -{ - if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); -} - -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->req_immediate_exit = true; -} - -static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - - /* - * RDPID causes #UD if disabled through secondary execution controls. - * Because it is marked as EmulateOnUD, we need to intercept it here. - */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } - - /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ -static inline int u64_shl_div_u64(u64 a, unsigned int shift, - u64 divisor, u64 *result) -{ - u64 low = a << shift, high = a >> (64 - shift); - - /* To avoid the overflow on divq */ - if (high >= divisor) - return 1; - - /* Low hold the result, high hold rem which is discarded */ - asm("divq %2\n\t" : "=a" (low), "=d" (high) : - "rm" (divisor), "0" (low), "1" (high)); - *result = low; - - return 0; -} - -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, - bool *expired) -{ - struct vcpu_vmx *vmx; - u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; - struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - - if (kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)) - return -EOPNOTSUPP; - - vmx = to_vmx(vcpu); - tscl = rdtsc(); - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); - delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; - lapic_timer_advance_cycles = nsec_to_cycles(vcpu, - ktimer->timer_advance_ns); - - if (delta_tsc > lapic_timer_advance_cycles) - delta_tsc -= lapic_timer_advance_cycles; - else - delta_tsc = 0; - - /* Convert to host delta tsc if tsc scaling is enabled */ - if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && - delta_tsc && u64_shl_div_u64(delta_tsc, - kvm_tsc_scaling_ratio_frac_bits, - vcpu->arch.tsc_scaling_ratio, &delta_tsc)) - return -ERANGE; - - /* - * If the delta tsc can't fit in the 32 bit after the multi shift, - * we can't use the preemption timer. - * It's possible that it fits on later vmentries, but checking - * on every vmentry is costly so we just use an hrtimer. - */ - if (delta_tsc >> (cpu_preemption_timer_multi + 32)) - return -ERANGE; - - vmx->hv_deadline_tsc = tscl + delta_tsc; - *expired = !delta_tsc; - return 0; -} - -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->hv_deadline_tsc = -1; -} -#endif - -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - shrink_ple_window(vcpu); -} - -static void vmx_slot_enable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_leaf_clear_dirty(kvm, slot); - kvm_mmu_slot_largepage_remove_write_access(kvm, slot); -} - -static void vmx_slot_disable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_set_dirty(kvm, slot); -} - -static void vmx_flush_log_dirty(struct kvm *kvm) -{ - kvm_flush_pml_buffers(kvm); -} - -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12; - struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; - - if (is_guest_mode(vcpu)) { - WARN_ON_ONCE(vmx->nested.pml_full); - - /* - * Check if PML is enabled for the nested guest. - * Whether eptp bit 6 is set is already checked - * as part of A/D emulation. - */ - vmcs12 = get_vmcs12(vcpu); - if (!nested_cpu_has_pml(vmcs12)) - return 0; - - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { - vmx->nested.pml_full = true; - return 1; - } - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; - dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; - - if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, - offset_in_page(dst), sizeof(gpa))) - return 0; - - vmcs12->guest_pml_index--; - } - - return 0; -} - -static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *memslot, - gfn_t offset, unsigned long mask) -{ - kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); -} - -static void __pi_post_block(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - do { - old.control = new.control = pi_desc->control; - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the VCPU is blocked\n"); - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - vcpu->pre_pcpu = -1; - } -} - -/* - * This routine does the following things for vCPU which is going - * to be blocked if VT-d PI is enabled. - * - Store the vCPU to the wakeup list, so when interrupts happen - * we can find the right vCPU to wake up. - * - Change the Posted-interrupt descriptor as below: - * 'NDST' <-- vcpu->pre_pcpu - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR - * - If 'ON' is set during this process, which means at least one - * interrupt is posted for this vCPU, we cannot block it, in - * this case, return 1, otherwise, return 0. - * - */ -static int pi_pre_block(struct kvm_vcpu *vcpu) -{ - unsigned int dest; - struct pi_desc old, new; - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return 0; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { - vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - } - - do { - old.control = new.control = pi_desc->control; - - WARN((pi_desc->sn == 1), - "Warning: SN field of posted-interrupts " - "is set before blocking\n"); - - /* - * Since vCPU can be preempted during this process, - * vcpu->cpu could be different with pre_pcpu, we - * need to set pre_pcpu as the destination of wakeup - * notification event, then we can find the right vCPU - * to wakeup in wakeup handler if interrupts happen - * when the vCPU is in blocked state. - */ - dest = cpu_physical_id(vcpu->pre_pcpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'wakeup vector' */ - new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - /* We should not block the vCPU if an interrupt is posted for it. */ - if (pi_test_on(pi_desc) == 1) - __pi_post_block(vcpu); - - local_irq_enable(); - return (vcpu->pre_pcpu == -1); -} - -static int vmx_pre_block(struct kvm_vcpu *vcpu) -{ - if (pi_pre_block(vcpu)) - return 1; - - if (kvm_lapic_hv_timer_in_use(vcpu)) - kvm_lapic_switch_to_sw_timer(vcpu); - - return 0; -} - -static void pi_post_block(struct kvm_vcpu *vcpu) -{ - if (vcpu->pre_pcpu == -1) - return; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - __pi_post_block(vcpu); - local_irq_enable(); -} - -static void vmx_post_block(struct kvm_vcpu *vcpu) -{ - if (kvm_x86_ops->set_hv_timer) - kvm_lapic_switch_to_hv_timer(vcpu); - - pi_post_block(vcpu); -} - -/* - * vmx_update_pi_irte - set IRTE for Posted-Interrupts - * - * @kvm: kvm - * @host_irq: host irq of the interrupt - * @guest_irq: gsi of the interrupt - * @set: set or unset PI - * returns 0 on success, < 0 on failure - */ -static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) -{ - struct kvm_kernel_irq_routing_entry *e; - struct kvm_irq_routing_table *irq_rt; - struct kvm_lapic_irq irq; - struct kvm_vcpu *vcpu; - struct vcpu_data vcpu_info; - int idx, ret = 0; - - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(kvm->vcpus[0])) - return 0; - - idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (guest_irq >= irq_rt->nr_rt_entries || - hlist_empty(&irq_rt->map[guest_irq])) { - pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", - guest_irq, irq_rt->nr_rt_entries); - goto out; - } - - hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { - if (e->type != KVM_IRQ_ROUTING_MSI) - continue; - /* - * VT-d PI cannot support posting multicast/broadcast - * interrupts to a vCPU, we still use interrupt remapping - * for these kind of interrupts. - * - * For lowest-priority interrupts, we only support - * those with single CPU as the destination, e.g. user - * configures the interrupts via /proc/irq or uses - * irqbalance to make the interrupts single-CPU. - * - * We will support full lowest-priority interrupt later. - * - * In addition, we can only inject generic interrupts using - * the PI mechanism, refuse to route others through it. - */ - - kvm_set_msi_irq(kvm, e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - - continue; - } - - vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); - vcpu_info.vector = irq.vector; - - trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, - vcpu_info.vector, vcpu_info.pi_desc_addr, set); - - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); - - if (ret < 0) { - printk(KERN_INFO "%s: failed to update PI IRTE\n", - __func__); - goto out; - } - } - - ret = 0; -out: - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - -static void vmx_setup_mce(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.mcg_cap & MCG_LMCE_P) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; -} - -static int vmx_smi_allowed(struct kvm_vcpu *vcpu) -{ - /* we need a nested vmexit to enter SMM, postpone if run is pending */ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - return 1; -} - -static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx->nested.smm.guest_mode = is_guest_mode(vcpu); - if (vmx->nested.smm.guest_mode) - nested_vmx_vmexit(vcpu, -1, 0, 0); - - vmx->nested.smm.vmxon = vmx->nested.vmxon; - vmx->nested.vmxon = false; - vmx_clear_hlt(vcpu); - return 0; -} - -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int ret; - - if (vmx->nested.smm.vmxon) { - vmx->nested.vmxon = true; - vmx->nested.smm.vmxon = false; - } - - if (vmx->nested.smm.guest_mode) { - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; - - vmx->nested.smm.guest_mode = false; - } - return 0; -} - -static int enable_smi_window(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) -{ - return false; -} - -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->nested.vmxon; -} - -static __init int hardware_setup(void) -{ - unsigned long host_bndcfgs; - struct desc_ptr dt; - int r, i; - - rdmsrl_safe(MSR_EFER, &host_efer); - - store_idt(&dt); - host_idt_base = dt.address; - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) - kvm_define_shared_msr(i, vmx_msr_index[i]); - - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) - return -EIO; - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - - if (boot_cpu_has(X86_FEATURE_MPX)) { - rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); - } - - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || - !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) - enable_vpid = 0; - - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels() || - !cpu_has_vmx_ept_mt_wb() || - !cpu_has_vmx_invept_global()) - enable_ept = 0; - - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) - enable_ept_ad_bits = 0; - - if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) - enable_unrestricted_guest = 0; - - if (!cpu_has_vmx_flexpriority()) - flexpriority_enabled = 0; - - if (!cpu_has_virtual_nmis()) - enable_vnmi = 0; - - /* - * set_apic_access_page_addr() is used to reload apic access - * page upon invalidation. No need to do anything if not - * using the APIC_ACCESS_ADDR VMCS field. - */ - if (!flexpriority_enabled) - kvm_x86_ops->set_apic_access_page_addr = NULL; - - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; - - if (enable_ept && !cpu_has_vmx_ept_2m_page()) - kvm_disable_largepages(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH - && enable_ept) { - kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb; - kvm_x86_ops->tlb_remote_flush_with_range = - hv_remote_flush_tlb_with_range; - } -#endif - - if (!cpu_has_vmx_ple()) { - ple_gap = 0; - ple_window = 0; - ple_window_grow = 0; - ple_window_max = 0; - ple_window_shrink = 0; - } - - if (!cpu_has_vmx_apicv()) { - enable_apicv = 0; - kvm_x86_ops->sync_pir_to_irr = NULL; - } - - if (cpu_has_vmx_tsc_scaling()) { - kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; - kvm_tsc_scaling_ratio_frac_bits = 48; - } - - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - - if (enable_ept) - vmx_enable_tdp(); - else - kvm_disable_tdp(); - - /* - * Only enable PML when hardware supports PML feature, and both EPT - * and EPT A/D bit features are enabled -- PML depends on them to work. - */ - if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) - enable_pml = 0; - - if (!enable_pml) { - kvm_x86_ops->slot_enable_log_dirty = NULL; - kvm_x86_ops->slot_disable_log_dirty = NULL; - kvm_x86_ops->flush_log_dirty = NULL; - kvm_x86_ops->enable_log_dirty_pt_masked = NULL; - } - - if (!cpu_has_vmx_preemption_timer()) - enable_preemption_timer = false; - - if (enable_preemption_timer) { - u64 use_timer_freq = 5000ULL * 1000 * 1000; - u64 vmx_msr; - - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); - cpu_preemption_timer_multi = - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; - - if (tsc_khz) - use_timer_freq = (u64)tsc_khz * 1000; - use_timer_freq >>= cpu_preemption_timer_multi; - - /* - * KVM "disables" the preemption timer by setting it to its max - * value. Don't use the timer if it might cause spurious exits - * at a rate faster than 0.1 Hz (of uninterrupted guest time). - */ - if (use_timer_freq > 0xffffffffu / 10) - enable_preemption_timer = false; - } - - if (!enable_preemption_timer) { - kvm_x86_ops->set_hv_timer = NULL; - kvm_x86_ops->cancel_hv_timer = NULL; - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; - } - - kvm_set_posted_intr_wakeup_handler(wakeup_handler); - - kvm_mce_cap_supported |= MCG_LMCE_P; - - if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) - return -EINVAL; - if (!enable_ept || !cpu_has_vmx_intel_pt()) - pt_mode = PT_MODE_SYSTEM; - - if (nested) { - nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); - - r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); - if (r) - return r; - } - - r = alloc_kvm_area(); - if (r) - nested_vmx_hardware_unsetup(); - return r; -} - -static __exit void hardware_unsetup(void) -{ - if (nested) - nested_vmx_hardware_unsetup(); - - free_kvm_area(); -} - -static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { - .cpu_has_kvm_support = cpu_has_kvm_support, - .disabled_by_bios = vmx_disabled_by_bios, - .hardware_setup = hardware_setup, - .hardware_unsetup = hardware_unsetup, - .check_processor_compatibility = vmx_check_processor_compat, - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, - .has_emulated_msr = vmx_has_emulated_msr, - - .vm_init = vmx_vm_init, - .vm_alloc = vmx_vm_alloc, - .vm_free = vmx_vm_free, - - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, - .vcpu_reset = vmx_vcpu_reset, - - .prepare_guest_switch = vmx_prepare_switch_to_guest, - .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, - - .update_bp_intercept = update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, - .get_msr = vmx_get_msr, - .set_msr = vmx_set_msr, - .get_segment_base = vmx_get_segment_base, - .get_segment = vmx_get_segment, - .set_segment = vmx_set_segment, - .get_cpl = vmx_get_cpl, - .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, - .set_cr0 = vmx_set_cr0, - .set_cr3 = vmx_set_cr3, - .set_cr4 = vmx_set_cr4, - .set_efer = vmx_set_efer, - .get_idt = vmx_get_idt, - .set_idt = vmx_set_idt, - .get_gdt = vmx_get_gdt, - .set_gdt = vmx_set_gdt, - .get_dr6 = vmx_get_dr6, - .set_dr6 = vmx_set_dr6, - .set_dr7 = vmx_set_dr7, - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, - .cache_reg = vmx_cache_reg, - .get_rflags = vmx_get_rflags, - .set_rflags = vmx_set_rflags, - - .tlb_flush = vmx_flush_tlb, - .tlb_flush_gva = vmx_flush_tlb_gva, - - .run = vmx_vcpu_run, - .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = skip_emulated_instruction, - .set_interrupt_shadow = vmx_set_interrupt_shadow, - .get_interrupt_shadow = vmx_get_interrupt_shadow, - .patch_hypercall = vmx_patch_hypercall, - .set_irq = vmx_inject_irq, - .set_nmi = vmx_inject_nmi, - .queue_exception = vmx_queue_exception, - .cancel_injection = vmx_cancel_injection, - .interrupt_allowed = vmx_interrupt_allowed, - .nmi_allowed = vmx_nmi_allowed, - .get_nmi_mask = vmx_get_nmi_mask, - .set_nmi_mask = vmx_set_nmi_mask, - .enable_nmi_window = enable_nmi_window, - .enable_irq_window = enable_irq_window, - .update_cr8_intercept = update_cr8_intercept, - .set_virtual_apic_mode = vmx_set_virtual_apic_mode, - .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .get_enable_apicv = vmx_get_enable_apicv, - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, - .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_post_state_restore = vmx_apicv_post_state_restore, - .hwapic_irr_update = vmx_hwapic_irr_update, - .hwapic_isr_update = vmx_hwapic_isr_update, - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, - .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, - .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, - - .set_tss_addr = vmx_set_tss_addr, - .set_identity_map_addr = vmx_set_identity_map_addr, - .get_tdp_level = get_ept_level, - .get_mt_mask = vmx_get_mt_mask, - - .get_exit_info = vmx_get_exit_info, - - .get_lpage_level = vmx_get_lpage_level, - - .cpuid_update = vmx_cpuid_update, - - .rdtscp_supported = vmx_rdtscp_supported, - .invpcid_supported = vmx_invpcid_supported, - - .set_supported_cpuid = vmx_set_supported_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .read_l1_tsc_offset = vmx_read_l1_tsc_offset, - .write_l1_tsc_offset = vmx_write_l1_tsc_offset, - - .set_tdp_cr3 = vmx_set_cr3, - - .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, - .mpx_supported = vmx_mpx_supported, - .xsaves_supported = vmx_xsaves_supported, - .umip_emulated = vmx_umip_emulated, - .pt_supported = vmx_pt_supported, - - .request_immediate_exit = vmx_request_immediate_exit, - - .sched_in = vmx_sched_in, - - .slot_enable_log_dirty = vmx_slot_enable_log_dirty, - .slot_disable_log_dirty = vmx_slot_disable_log_dirty, - .flush_log_dirty = vmx_flush_log_dirty, - .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, - .write_log_dirty = vmx_write_pml_buffer, - - .pre_block = vmx_pre_block, - .post_block = vmx_post_block, - - .pmu_ops = &intel_pmu_ops, - - .update_pi_irte = vmx_update_pi_irte, - -#ifdef CONFIG_X86_64 - .set_hv_timer = vmx_set_hv_timer, - .cancel_hv_timer = vmx_cancel_hv_timer, -#endif - - .setup_mce = vmx_setup_mce, - - .smi_allowed = vmx_smi_allowed, - .pre_enter_smm = vmx_pre_enter_smm, - .pre_leave_smm = vmx_pre_leave_smm, - .enable_smi_window = enable_smi_window, - - .check_nested_events = NULL, - .get_nested_state = NULL, - .set_nested_state = NULL, - .get_vmcs12_pages = NULL, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, - .apic_init_signal_blocked = vmx_apic_init_signal_blocked, -}; - -static void vmx_cleanup_l1d_flush(void) -{ - if (vmx_l1d_flush_pages) { - free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); - vmx_l1d_flush_pages = NULL; - } - /* Restore state so sysfs ignores VMX */ - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; -} - -static void vmx_exit(void) -{ -#ifdef CONFIG_KEXEC_CORE - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); -#endif - - kvm_exit(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs)) { - int cpu; - struct hv_vp_assist_page *vp_ap; - /* - * Reset everything to support using non-enlightened VMCS - * access later (e.g. when we reload the module with - * enlightened_vmcs=0) - */ - for_each_online_cpu(cpu) { - vp_ap = hv_get_vp_assist_page(cpu); - - if (!vp_ap) - continue; - - vp_ap->nested_control.features.directhypercall = 0; - vp_ap->current_nested_vmcs = 0; - vp_ap->enlighten_vmentry = 0; - } - - static_branch_disable(&enable_evmcs); - } -#endif - vmx_cleanup_l1d_flush(); -} -module_exit(vmx_exit); - -static int __init vmx_init(void) -{ - int r; - -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Enlightened VMCS usage should be recommended and the host needs - * to support eVMCS v1 or above. We can also disable eVMCS support - * with module parameter. - */ - if (enlightened_vmcs && - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= - KVM_EVMCS_VERSION) { - int cpu; - - /* Check that we have assist pages on all online CPUs */ - for_each_online_cpu(cpu) { - if (!hv_get_vp_assist_page(cpu)) { - enlightened_vmcs = false; - break; - } - } - - if (enlightened_vmcs) { - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); - static_branch_enable(&enable_evmcs); - } - - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_direct_tlbflush - = hv_enable_direct_tlbflush; - - } else { - enlightened_vmcs = false; - } -#endif - - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); - if (r) - return r; - - /* - * Must be called after kvm_init() so enable_ept is properly set - * up. Hand the parameter mitigation value in which was stored in - * the pre module init parser. If no parameter was given, it will - * contain 'auto' which will be turned into the default 'cond' - * mitigation mode. - */ - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } - -#ifdef CONFIG_KEXEC_CORE - rcu_assign_pointer(crash_vmclear_loaded_vmcss, - crash_vmclear_local_loaded_vmcss); -#endif - vmx_check_vmcs12_offsets(); - - return 0; -} -module_init(vmx_init); From fce56d970d65a20abc774ac808cce991f104a972 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 Feb 2020 09:37:41 -0800 Subject: [PATCH 3269/3715] KVM: nVMX: Use correct root level for nested EPT shadow page tables [ Upstream commit 148d735eb55d32848c3379e460ce365f2c1cbe4b ] Hardcode the EPT page-walk level for L2 to be 4 levels, as KVM's MMU currently also hardcodes the page walk level for nested EPT to be 4 levels. The L2 guest is all but guaranteed to soft hang on its first instruction when L1 is using EPT, as KVM will construct 4-level page tables and then tell hardware to use 5-level page tables. Fixes: 855feb673640 ("KVM: MMU: Add 5 level EPT & Shadow page table support.") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4790994854bb..86037cc11419 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4597,6 +4597,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; From 13851e4667e0d750e6ea3d1a110ca32e68b5b862 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 7 Nov 2019 16:30:48 +0100 Subject: [PATCH 3270/3715] drm/gma500: Fixup fbdev stolen size usage evaluation [ Upstream commit fd1a5e521c3c083bb43ea731aae0f8b95f12b9bd ] psbfb_probe performs an evaluation of the required size from the stolen GTT memory, but gets it wrong in two distinct ways: - The resulting size must be page-size-aligned; - The size to allocate is derived from the surface dimensions, not the fb dimensions. When two connectors are connected with different modes, the smallest will be stored in the fb dimensions, but the size that needs to be allocated must match the largest (surface) dimensions. This is what is used in the actual allocation code. Fix this by correcting the evaluation to conform to the two points above. It allows correctly switching to 16bpp when one connector is e.g. 1920x1080 and the other is 1024x768. Signed-off-by: Paul Kocialkowski Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20191107153048.843881-1-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/gma500/framebuffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 2570c7f647a6..883fc45870dd 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -486,6 +486,7 @@ static int psbfb_probe(struct drm_fb_helper *helper, container_of(helper, struct psb_fbdev, psb_fb_helper); struct drm_device *dev = psb_fbdev->psb_fb_helper.dev; struct drm_psb_private *dev_priv = dev->dev_private; + unsigned int fb_size; int bytespp; bytespp = sizes->surface_bpp / 8; @@ -495,8 +496,11 @@ static int psbfb_probe(struct drm_fb_helper *helper, /* If the mode will not fit in 32bit then switch to 16bit to get a console on full resolution. The X mode setting server will allocate its own 32bit GEM framebuffer */ - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height > - dev_priv->vram_stolen_size) { + fb_size = ALIGN(sizes->surface_width * bytespp, 64) * + sizes->surface_height; + fb_size = ALIGN(fb_size, PAGE_SIZE); + + if (fb_size > dev_priv->vram_stolen_size) { sizes->surface_bpp = 16; sizes->surface_depth = 16; } From 877a96a390db71b9e067ac62794401f58414a89e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Dec 2019 09:34:54 +0100 Subject: [PATCH 3271/3715] cpu/hotplug, stop_machine: Fix stop_machine vs hotplug order [ Upstream commit 45178ac0cea853fe0e405bf11e101bdebea57b15 ] Paul reported a very sporadic, rcutorture induced, workqueue failure. When the planets align, the workqueue rescuer's self-migrate fails and then triggers a WARN for running a work on the wrong CPU. Tejun then figured that set_cpus_allowed_ptr()'s stop_one_cpu() call could be ignored! When stopper->enabled is false, stop_machine will insta complete the work, without actually doing the work. Worse, it will not WARN about this (we really should fix this). It turns out there is a small window where a freshly online'ed CPU is marked 'online' but doesn't yet have the stopper task running: BP AP bringup_cpu() __cpu_up(cpu, idle) --> start_secondary() ... cpu_startup_entry() bringup_wait_for_ap() wait_for_ap_thread() <-- cpuhp_online_idle() while (1) do_idle() ... available to run kthreads ... stop_machine_unpark() stopper->enable = true; Close this by moving the stop_machine_unpark() into cpuhp_online_idle(), such that the stopper thread is ready before we start the idle loop and schedule. Reported-by: "Paul E. McKenney" Debugged-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Paul E. McKenney" Signed-off-by: Sasha Levin --- kernel/cpu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 49273130e4f1..96c0a868232e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -494,8 +494,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; - /* Unpark the stopper thread and the hotplug thread of the target cpu */ - stop_machine_unpark(cpu); + /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* @@ -1064,8 +1063,8 @@ void notify_cpu_starting(unsigned int cpu) /* * Called from the idle task. Wake up the controlling task which brings the - * stopper and the hotplug thread of the upcoming CPU up and then delegates - * the rest of the online bringup to the hotplug thread. + * hotplug thread of the upcoming CPU up and then delegates the rest of the + * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { @@ -1075,6 +1074,12 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + /* + * Unpart the stopper thread before we start the idle loop (and start + * scheduling); this ensures the stopper task is always available. + */ + stop_machine_unpark(smp_processor_id()); + st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } From 927c64c4e7395a24590ebb1278f39ad4a3a42331 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Dec 2019 12:58:55 +0300 Subject: [PATCH 3272/3715] brcmfmac: Fix use after free in brcmf_sdio_readframes() [ Upstream commit 216b44000ada87a63891a8214c347e05a4aea8fe ] The brcmu_pkt_buf_free_skb() function frees "pkt" so it leads to a static checker warning: drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:1974 brcmf_sdio_readframes() error: dereferencing freed memory 'pkt' It looks like there was supposed to be a continue after we free "pkt". Fixes: 4754fceeb9a6 ("brcmfmac: streamline SDIO read frame routine") Signed-off-by: Dan Carpenter Acked-by: Franky Lin Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 4c28b04ea605..d198a8780b96 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1932,6 +1932,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes) BRCMF_SDIO_FT_NORMAL)) { rd->len = 0; brcmu_pkt_buf_free_skb(pkt); + continue; } bus->sdcnt.rx_readahead_cnt++; if (rd->len != roundup(rd_new.len, 16)) { From bb36a883e32b7d3804441822e6e0d4cb3a47a19b Mon Sep 17 00:00:00 2001 From: Zahari Petkov Date: Mon, 18 Nov 2019 23:02:55 +0200 Subject: [PATCH 3273/3715] leds: pca963x: Fix open-drain initialization [ Upstream commit 697529091ac7a0a90ca349b914bb30641c13c753 ] Before commit bb29b9cccd95 ("leds: pca963x: Add bindings to invert polarity") Mode register 2 was initialized directly with either 0x01 or 0x05 for open-drain or totem pole (push-pull) configuration. Afterwards, MODE2 initialization started using bitwise operations on top of the default MODE2 register value (0x05). Using bitwise OR for setting OUTDRV with 0x01 and 0x05 does not produce correct results. When open-drain is used, instead of setting OUTDRV to 0, the driver keeps it as 1: Open-drain: 0x05 | 0x01 -> 0x05 (0b101 - incorrect) Totem pole: 0x05 | 0x05 -> 0x05 (0b101 - correct but still wrong) Now OUTDRV setting uses correct bitwise operations for initialization: Open-drain: 0x05 & ~0x04 -> 0x01 (0b001 - correct) Totem pole: 0x05 | 0x04 -> 0x05 (0b101 - correct) Additional MODE2 register definitions are introduced now as well. Fixes: bb29b9cccd95 ("leds: pca963x: Add bindings to invert polarity") Signed-off-by: Zahari Petkov Signed-off-by: Pavel Machek Signed-off-by: Sasha Levin --- drivers/leds/leds-pca963x.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c index 3bf9a1271819..88c7313cf869 100644 --- a/drivers/leds/leds-pca963x.c +++ b/drivers/leds/leds-pca963x.c @@ -43,6 +43,8 @@ #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ +#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */ +#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */ #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ #define PCA963X_MODE1 0x00 @@ -462,12 +464,12 @@ static int pca963x_probe(struct i2c_client *client, PCA963X_MODE2); /* Configure output: open-drain or totem pole (push-pull) */ if (pdata->outdrv == PCA963X_OPEN_DRAIN) - mode2 |= 0x01; + mode2 &= ~PCA963X_MODE2_OUTDRV; else - mode2 |= 0x05; + mode2 |= PCA963X_MODE2_OUTDRV; /* Configure direction: normal or inverted */ if (pdata->dir == PCA963X_INVERTED) - mode2 |= 0x10; + mode2 |= PCA963X_MODE2_INVRT; i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, mode2); } From 52f192660a4d1529458d72178f50cf431743b961 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 12 Dec 2019 11:25:55 +0530 Subject: [PATCH 3274/3715] ext4: fix ext4_dax_read/write inode locking sequence for IOCB_NOWAIT [ Upstream commit f629afe3369e9885fd6e9cc7a4f514b6a65cf9e9 ] Apparently our current rwsem code doesn't like doing the trylock, then lock for real scheme. So change our dax read/write methods to just do the trylock for the RWF_NOWAIT case. This seems to fix AIM7 regression in some scalable filesystems upto ~25% in some cases. Claimed in commit 942491c9e6d6 ("xfs: fix AIM7 regression") Reviewed-by: Jan Kara Reviewed-by: Matthew Bobrowski Tested-by: Joseph Qi Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/20191212055557.11151-2-riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4ede0af9d6fe..acec134da57d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -38,9 +38,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock_shared(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) return -EAGAIN; + } else { inode_lock_shared(inode); } /* @@ -188,9 +189,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } ret = ext4_write_checks(iocb, from); From e80cac7b887e1191210978e6e89c40fe4cfdca0b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 23 Dec 2019 18:33:47 +0900 Subject: [PATCH 3275/3715] ALSA: ctl: allow TLV read operation for callback type of element in locked case [ Upstream commit d61fe22c2ae42d9fd76c34ef4224064cca4b04b0 ] A design of ALSA control core allows applications to execute three operations for TLV feature; read, write and command. Furthermore, it allows driver developers to process the operations by two ways; allocated array or callback function. In the former, read operation is just allowed, thus developers uses the latter when device driver supports variety of models or the target model is expected to dynamically change information stored in TLV container. The core also allows applications to lock any element so that the other applications can't perform write operation to the element for element value and TLV information. When the element is locked, write and command operation for TLV information are prohibited as well as element value. Any read operation should be allowed in the case. At present, when an element has callback function for TLV information, TLV read operation returns EPERM if the element is locked. On the other hand, the read operation is success when an element has allocated array for TLV information. In both cases, read operation is success for element value expectedly. This commit fixes the bug. This change can be backported to v4.14 kernel or later. Signed-off-by: Takashi Sakamoto Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20191223093347.15279-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/control.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 36571cd49be3..a0ce22164957 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1467,8 +1467,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, if (kctl->tlv.c == NULL) return -ENXIO; - /* When locked, this is unavailable. */ - if (vd->owner != NULL && vd->owner != file) + /* Write and command operations are not allowed for locked element. */ + if (op_flag != SNDRV_CTL_TLV_OP_READ && + vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); From 9fb666349ef9a39034d14e798047a553b5d47a3a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 28 Dec 2019 15:30:45 +0200 Subject: [PATCH 3276/3715] gianfar: Fix TX timestamping with a stacked DSA driver [ Upstream commit c26a2c2ddc0115eb088873f5c309cf46b982f522 ] The driver wrongly assumes that it is the only entity that can set the SKBTX_IN_PROGRESS bit of the current skb. Therefore, in the gfar_clean_tx_ring function, where the TX timestamp is collected if necessary, the aforementioned bit is used to discriminate whether or not the TX timestamp should be delivered to the socket's error queue. But a stacked driver such as a DSA switch can also set the SKBTX_IN_PROGRESS bit, which is actually exactly what it should do in order to denote that the hardware timestamping process is undergoing. Therefore, gianfar would misinterpret the "in progress" bit as being its own, and deliver a second skb clone in the socket's error queue, completely throwing off a PTP process which is not expecting to receive it, _even though_ TX timestamping is not enabled for gianfar. There have been discussions [0] as to whether non-MAC drivers need or not to set SKBTX_IN_PROGRESS at all (whose purpose is to avoid sending 2 timestamps, a sw and a hw one, to applications which only expect one). But as of this patch, there are at least 2 PTP drivers that would break in conjunction with gianfar: the sja1105 DSA switch and the felix switch, by way of its ocelot core driver. So regardless of that conclusion, fix the gianfar driver to not do stuff based on flags set by others and not intended for it. [0]: https://www.spinics.net/lists/netdev/msg619699.html Fixes: f0ee7acfcdd4 ("gianfar: Add hardware TX timestamping support") Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/gianfar.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 27d0e3b9833c..e4a2c74a9b47 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2685,13 +2685,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2705,7 +2709,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2715,7 +2719,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); From d3151da31bda9c19c4b0050443aaa4eed3875cb9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Dec 2019 20:48:09 +0100 Subject: [PATCH 3277/3715] pinctrl: sh-pfc: sh7264: Fix CAN function GPIOs [ Upstream commit 55b1cb1f03ad5eea39897d0c74035e02deddcff2 ] pinmux_func_gpios[] contains a hole due to the missing function GPIO definition for the "CTX0&CTX1" signal, which is the logical "AND" of the two CAN outputs. Fix this by: - Renaming CRX0_CRX1_MARK to CTX0_CTX1_MARK, as PJ2MD[2:0]=010 configures the combined "CTX0&CTX1" output signal, - Renaming CRX0X1_MARK to CRX0_CRX1_MARK, as PJ3MD[1:0]=10 configures the shared "CRX0/CRX1" input signal, which is fed to both CAN inputs, - Adding the missing function GPIO definition for "CTX0&CTX1" to pinmux_func_gpios[], - Moving all CAN enums next to each other. See SH7262 Group, SH7264 Group User's Manual: Hardware, Rev. 4.00: [1] Figure 1.2 (3) (Pin Assignment for the SH7264 Group (1-Mbyte Version), [2] Figure 1.2 (4) Pin Assignment for the SH7264 Group (640-Kbyte Version, [3] Table 1.4 List of Pins, [4] Figure 20.29 Connection Example when Using This Module as 1-Channel Module (64 Mailboxes x 1 Channel), [5] Table 32.10 Multiplexed Pins (Port J), [6] Section 32.2.30 (3) Port J Control Register 0 (PJCR0). Note that the last 2 disagree about PJ2MD[2:0], which is probably the root cause of this bug. But considering [4], "CTx0&CTx1" in [5] must be correct, and "CRx0&CRx1" in [6] must be wrong. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191218194812.12741-4-geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/sh-pfc/pfc-sh7264.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index e1c34e19222e..3ddb9565ed80 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -500,17 +500,15 @@ enum { SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK, CRX0_MARK, CRX1_MARK, CTX0_MARK, CTX1_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK, PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK, PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK, PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK, IERXD_MARK, IETXD_MARK, - CRX0_CRX1_MARK, WDTOVF_MARK, - CRX0X1_MARK, - /* DMAC */ TEND0_MARK, DACK0_MARK, DREQ0_MARK, TEND1_MARK, DACK1_MARK, DREQ1_MARK, @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(PJ3_DATA, PJ3MD_00), PINMUX_DATA(CRX1_MARK, PJ3MD_01), - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10), + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10), PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11), PINMUX_DATA(PJ2_DATA, PJ2MD_000), PINMUX_DATA(CTX1_MARK, PJ2MD_001), - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010), + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010), PINMUX_DATA(CS2_MARK, PJ2MD_011), PINMUX_DATA(SCK0_MARK, PJ2MD_100), PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101), @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0), GPIO_FN(CRX0_CRX1), From e3b1ef9fb6079e9a8a9f236ad6d35eb7e79a7519 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 31 Aug 2019 12:00:24 +0200 Subject: [PATCH 3278/3715] pxa168fb: Fix the function used to release some memory in an error handling path [ Upstream commit 3c911fe799d1c338d94b78e7182ad452c37af897 ] In the probe function, some resources are allocated using 'dma_alloc_wc()', they should be released with 'dma_free_wc()', not 'dma_free_coherent()'. We already use 'dma_free_wc()' in the remove function, but not in the error handling path of the probe function. Also, remove a useless 'PAGE_ALIGN()'. 'info->fix.smem_len' is already PAGE_ALIGNed. Fixes: 638772c7553f ("fb: add support of LCD display controller on pxa168/910 (base layer)") Signed-off-by: Christophe JAILLET Reviewed-by: Lubomir Rintel CC: YueHaibing Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20190831100024.3248-1-christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index d059d04c63ac..20195d3dbf08 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -769,8 +769,8 @@ failed_free_cmap: failed_free_clk: clk_disable_unprepare(fbi->clk); failed_free_fbmem: - dma_free_coherent(fbi->dev, info->fix.smem_len, - info->screen_base, fbi->fb_start_dma); + dma_free_wc(fbi->dev, info->fix.smem_len, + info->screen_base, fbi->fb_start_dma); failed_free_info: kfree(info); @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), + dma_free_wc(fbi->dev, info->fix.smem_len, info->screen_base, info->fix.smem_start); clk_disable_unprepare(fbi->clk); From 0ea58ac76c2f7456b13d064f33cf198da64165fc Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 21 Nov 2019 08:55:24 +0100 Subject: [PATCH 3279/3715] media: i2c: mt9v032: fix enum mbus codes and frame sizes [ Upstream commit 1451d5ae351d938a0ab1677498c893f17b9ee21d ] This driver supports both the mt9v032 (color) and the mt9v022 (mono) sensors. Depending on which sensor is used, the format from the sensor is different. The format.code inside the dev struct holds this information. The enum mbus and enum frame sizes need to take into account both type of sensors, not just the color one. To solve this, use the format.code in these functions instead of the hardcoded bayer color format (which is only used for mt9v032). [Sakari Ailus: rewrapped commit message] Suggested-by: Wenyou Yang Signed-off-by: Eugen Hristev Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/mt9v032.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c index 8a430640c85d..1a20d0d558d3 100644 --- a/drivers/media/i2c/mt9v032.c +++ b/drivers/media/i2c/mt9v032.c @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_mbus_code_enum *code) { + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + if (code->index > 0) return -EINVAL; - code->code = MEDIA_BUS_FMT_SGRBG10_1X10; + code->code = mt9v032->format.code; return 0; } @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_frame_size_enum *fse) { - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10) + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + + if (fse->index >= 3) + return -EINVAL; + if (mt9v032->format.code != fse->code) return -EINVAL; fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index); From ad28e1b4ce466b17131149db9e372ba9ea5baccc Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 28 Oct 2019 19:54:22 +1100 Subject: [PATCH 3280/3715] powerpc/powernv/iov: Ensure the pdn for VFs always contains a valid PE number [ Upstream commit 3b5b9997b331e77ce967eba2c4bc80dc3134a7fe ] On pseries there is a bug with adding hotplugged devices to an IOMMU group. For a number of dumb reasons fixing that bug first requires re-working how VFs are configured on PowerNV. For background, on PowerNV we use the pcibios_sriov_enable() hook to do two things: 1. Create a pci_dn structure for each of the VFs, and 2. Configure the PHB's internal BARs so the MMIO range for each VF maps to a unique PE. Roughly speaking a PE is the hardware counterpart to a Linux IOMMU group since all the devices in a PE share the same IOMMU table. A PE also defines the set of devices that should be isolated in response to a PCI error (i.e. bad DMA, UR/CA, AER events, etc). When isolated all MMIO and DMA traffic to and from devicein the PE is blocked by the root complex until the PE is recovered by the OS. The requirement to block MMIO causes a giant headache because the P8 PHB generally uses a fixed mapping between MMIO addresses and PEs. As a result we need to delay configuring the IOMMU groups for device until after MMIO resources are assigned. For physical devices (i.e. non-VFs) the PE assignment is done in pcibios_setup_bridge() which is called immediately after the MMIO resources for downstream devices (and the bridge's windows) are assigned. For VFs the setup is more complicated because: a) pcibios_setup_bridge() is not called again when VFs are activated, and b) The pci_dev for VFs are created by generic code which runs after pcibios_sriov_enable() is called. The work around for this is a two step process: 1. A fixup in pcibios_add_device() is used to initialised the cached pe_number in pci_dn, then 2. A bus notifier then adds the device to the IOMMU group for the PE specified in pci_dn->pe_number. A side effect fixing the pseries bug mentioned in the first paragraph is moving the fixup out of pcibios_add_device() and into pcibios_bus_add_device(), which is called much later. This results in step 2. failing because pci_dn->pe_number won't be initialised when the bus notifier is run. We can fix this by removing the need for the fixup. The PE for a VF is known before the VF is even scanned so we can initialise pci_dn->pe_number pcibios_sriov_enable() instead. Unfortunately, moving the initialisation causes two problems: 1. We trip the WARN_ON() in the current fixup code, and 2. The EEH core clears pdn->pe_number when recovering a VF and relies on the fixup to correctly re-set it. The only justification for either of these is a comment in eeh_rmv_device() suggesting that pdn->pe_number *must* be set to IODA_INVALID_PE in order for the VF to be scanned. However, this comment appears to have no basis in reality. Both bugs can be fixed by just deleting the code. Tested-by: Alexey Kardashevskiy Reviewed-by: Alexey Kardashevskiy Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191028085424.12006-1-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/eeh_driver.c | 6 ------ arch/powerpc/platforms/powernv/pci-ioda.c | 19 +++++++++++++++---- arch/powerpc/platforms/powernv/pci.c | 4 ---- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 470284f9e4f6..5a48c93aaa1b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_list, &rmv_data->edev_list); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d3d5796f7df6..36ef504eeab3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1523,6 +1523,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1535,13 +1539,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1555,6 +1557,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 961c131a5b7e..844ca1886063 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -978,16 +978,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) struct pnv_phb *phb = hose->private_data; #ifdef CONFIG_PCI_IOV struct pnv_ioda_pe *pe; - struct pci_dn *pdn; /* Fix the VF pdn PE number */ if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); list_for_each_entry(pe, &phb->ioda.pe_list, list) { if (pe->rid == ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; pe->pdev = pdev; break; } From 13c15ab8b31731c4006e8c53918b8bba9548c51a Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 21:26:05 +0800 Subject: [PATCH 3281/3715] gpio: gpio-grgpio: fix possible sleep-in-atomic-context bugs in grgpio_irq_map/unmap() [ Upstream commit e36eaf94be8f7bc4e686246eed3cf92d845e2ef8 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/gpio/gpio-grgpio.c, 261: request_irq in grgpio_irq_map drivers/gpio/gpio-grgpio.c, 255: _raw_spin_lock_irqsave in grgpio_irq_map drivers/gpio/gpio-grgpio.c, 318: free_irq in grgpio_irq_unmap drivers/gpio/gpio-grgpio.c, 299: _raw_spin_lock_irqsave in grgpio_irq_unmap request_irq() and free_irq() can sleep at runtime. To fix these bugs, request_irq() and free_irq() are called without holding the spinlock. These bugs are found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20191218132605.10594-1-baijiaju1990@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-grgpio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 6544a16ab02e..7541bd327e6c 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, lirq->irq = irq; uirq = &priv->uirqs[lirq->index]; if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, dev_name(priv->dev), priv); if (ret) { dev_err(priv->dev, "Could not request underlying irq %d\n", uirq->uirq); - - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); - return ret; } + spin_lock_irqsave(&priv->gc.bgpio_lock, flags); } uirq->refcnt++; @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) if (index >= 0) { uirq = &priv->uirqs[lirq->index]; uirq->refcnt--; - if (uirq->refcnt == 0) + if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); free_irq(uirq->uirq, priv); + return; + } } spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); From 28820c5802f9f83c655ab09ccae8289103ce1490 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 13 Nov 2019 16:16:25 -0500 Subject: [PATCH 3282/3715] char/random: silence a lockdep splat with printk() [ Upstream commit 1b710b1b10eff9d46666064ea25f079f70bc67a8 ] Sergey didn't like the locking order, uart_port->lock -> tty_port->lock uart_write (uart_port->lock) __uart_start pl011_start_tx pl011_tx_chars uart_write_wakeup tty_port_tty_wakeup tty_port_default tty_port_tty_get (tty_port->lock) but those code is so old, and I have no clue how to de-couple it after checking other locks in the splat. There is an onging effort to make all printk() as deferred, so until that happens, workaround it for now as a short-term fix. LTP: starting iogen01 (export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$) WARNING: possible circular locking dependency detected ------------------------------------------------------ doio/49441 is trying to acquire lock: ffff008b7cff7290 (&(&zone->lock)->rlock){..-.}, at: rmqueue+0x138/0x2050 but task is already holding lock: 60ff000822352818 (&pool->lock/1){-.-.}, at: start_flush_work+0xd8/0x3f0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&pool->lock/1){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 __queue_work+0x4b4/0xa10 queue_work_on+0xac/0x11c tty_schedule_flip+0x84/0xbc tty_flip_buffer_push+0x1c/0x28 pty_write+0x98/0xd0 n_tty_write+0x450/0x60c tty_write+0x338/0x474 __vfs_write+0x88/0x214 vfs_write+0x12c/0x1a4 redirected_tty_write+0x90/0xdc do_loop_readv_writev+0x140/0x180 do_iter_write+0xe0/0x10c vfs_writev+0x134/0x1cc do_writev+0xbc/0x130 __arm64_sys_writev+0x58/0x8c el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 -> #3 (&(&port->lock)->rlock){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock_irqsave+0x7c/0x9c tty_port_tty_get+0x24/0x60 tty_port_default_wakeup+0x1c/0x3c tty_port_tty_wakeup+0x34/0x40 uart_write_wakeup+0x28/0x44 pl011_tx_chars+0x1b8/0x270 pl011_start_tx+0x24/0x70 __uart_start+0x5c/0x68 uart_write+0x164/0x1c8 do_output_char+0x33c/0x348 n_tty_write+0x4bc/0x60c tty_write+0x338/0x474 redirected_tty_write+0xc0/0xdc do_loop_readv_writev+0x140/0x180 do_iter_write+0xe0/0x10c vfs_writev+0x134/0x1cc do_writev+0xbc/0x130 __arm64_sys_writev+0x58/0x8c el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 -> #2 (&port_lock_key){-.-.}: lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 pl011_console_write+0xec/0x2cc console_unlock+0x794/0x96c vprintk_emit+0x260/0x31c vprintk_default+0x54/0x7c vprintk_func+0x218/0x254 printk+0x7c/0xa4 register_console+0x734/0x7b0 uart_add_one_port+0x734/0x834 pl011_register_port+0x6c/0xac sbsa_uart_probe+0x234/0x2ec platform_drv_probe+0xd4/0x124 really_probe+0x250/0x71c driver_probe_device+0xb4/0x200 __device_attach_driver+0xd8/0x188 bus_for_each_drv+0xbc/0x110 __device_attach+0x120/0x220 device_initial_probe+0x20/0x2c bus_probe_device+0x54/0x100 device_add+0xae8/0xc2c platform_device_add+0x278/0x3b8 platform_device_register_full+0x238/0x2ac acpi_create_platform_device+0x2dc/0x3a8 acpi_bus_attach+0x390/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_attach+0x108/0x3cc acpi_bus_scan+0x7c/0xb0 acpi_scan_init+0xe4/0x304 acpi_init+0x100/0x114 do_one_initcall+0x348/0x6a0 do_initcall_level+0x190/0x1fc do_basic_setup+0x34/0x4c kernel_init_freeable+0x19c/0x260 kernel_init+0x18/0x338 ret_from_fork+0x10/0x18 -> #1 (console_owner){-...}: lock_acquire+0x320/0x360 console_lock_spinning_enable+0x6c/0x7c console_unlock+0x4f8/0x96c vprintk_emit+0x260/0x31c vprintk_default+0x54/0x7c vprintk_func+0x218/0x254 printk+0x7c/0xa4 get_random_u64+0x1c4/0x1dc shuffle_pick_tail+0x40/0xac __free_one_page+0x424/0x710 free_one_page+0x70/0x120 __free_pages_ok+0x61c/0xa94 __free_pages_core+0x1bc/0x294 memblock_free_pages+0x38/0x48 __free_pages_memory+0xcc/0xfc __free_memory_core+0x70/0x78 free_low_memory_core_early+0x148/0x18c memblock_free_all+0x18/0x54 mem_init+0xb4/0x17c mm_init+0x14/0x38 start_kernel+0x19c/0x530 -> #0 (&(&zone->lock)->rlock){..-.}: validate_chain+0xf6c/0x2e2c __lock_acquire+0x868/0xc2c lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 rmqueue+0x138/0x2050 get_page_from_freelist+0x474/0x688 __alloc_pages_nodemask+0x3b4/0x18dc alloc_pages_current+0xd0/0xe0 alloc_slab_page+0x2b4/0x5e0 new_slab+0xc8/0x6bc ___slab_alloc+0x3b8/0x640 kmem_cache_alloc+0x4b4/0x588 __debug_object_init+0x778/0x8b4 debug_object_init_on_stack+0x40/0x50 start_flush_work+0x16c/0x3f0 __flush_work+0xb8/0x124 flush_work+0x20/0x30 xlog_cil_force_lsn+0x88/0x204 [xfs] xfs_log_force_lsn+0x128/0x1b8 [xfs] xfs_file_fsync+0x3c4/0x488 [xfs] vfs_fsync_range+0xb0/0xd0 generic_write_sync+0x80/0xa0 [xfs] xfs_file_buffered_aio_write+0x66c/0x6e4 [xfs] xfs_file_write_iter+0x1a0/0x218 [xfs] __vfs_write+0x1cc/0x214 vfs_write+0x12c/0x1a4 ksys_write+0xb0/0x120 __arm64_sys_write+0x54/0x88 el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 other info that might help us debug this: Chain exists of: &(&zone->lock)->rlock --> &(&port->lock)->rlock --> &pool->lock/1 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&pool->lock/1); lock(&(&port->lock)->rlock); lock(&pool->lock/1); lock(&(&zone->lock)->rlock); *** DEADLOCK *** 4 locks held by doio/49441: #0: a0ff00886fc27408 (sb_writers#8){.+.+}, at: vfs_write+0x118/0x1a4 #1: 8fff00080810dfe0 (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0x2a8/0x300 [xfs] #2: ffff9000129f2390 (rcu_read_lock){....}, at: rcu_lock_acquire+0x8/0x38 #3: 60ff000822352818 (&pool->lock/1){-.-.}, at: start_flush_work+0xd8/0x3f0 stack backtrace: CPU: 48 PID: 49441 Comm: doio Tainted: G W Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.11 06/18/2019 Call trace: dump_backtrace+0x0/0x248 show_stack+0x20/0x2c dump_stack+0xe8/0x150 print_circular_bug+0x368/0x380 check_noncircular+0x28c/0x294 validate_chain+0xf6c/0x2e2c __lock_acquire+0x868/0xc2c lock_acquire+0x320/0x360 _raw_spin_lock+0x64/0x80 rmqueue+0x138/0x2050 get_page_from_freelist+0x474/0x688 __alloc_pages_nodemask+0x3b4/0x18dc alloc_pages_current+0xd0/0xe0 alloc_slab_page+0x2b4/0x5e0 new_slab+0xc8/0x6bc ___slab_alloc+0x3b8/0x640 kmem_cache_alloc+0x4b4/0x588 __debug_object_init+0x778/0x8b4 debug_object_init_on_stack+0x40/0x50 start_flush_work+0x16c/0x3f0 __flush_work+0xb8/0x124 flush_work+0x20/0x30 xlog_cil_force_lsn+0x88/0x204 [xfs] xfs_log_force_lsn+0x128/0x1b8 [xfs] xfs_file_fsync+0x3c4/0x488 [xfs] vfs_fsync_range+0xb0/0xd0 generic_write_sync+0x80/0xa0 [xfs] xfs_file_buffered_aio_write+0x66c/0x6e4 [xfs] xfs_file_write_iter+0x1a0/0x218 [xfs] __vfs_write+0x1cc/0x214 vfs_write+0x12c/0x1a4 ksys_write+0xb0/0x120 __arm64_sys_write+0x54/0x88 el0_svc_handler+0x170/0x240 el0_sync_handler+0x150/0x250 el0_sync+0x164/0x180 Reviewed-by: Sergey Senozhatsky Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/1573679785-21068-1-git-send-email-cai@lca.pw Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- drivers/char/random.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e6efa07e9f9e..50d5846acf48 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1598,8 +1598,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - pr_notice("random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS " + "with crng_init=%d\n", func_name, caller, + crng_init); } /* From 3cd0d6e3b297a07c26a38600cb47d9c7932656ea Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 19 Dec 2019 11:34:01 +0100 Subject: [PATCH 3283/3715] media: sti: bdisp: fix a possible sleep-in-atomic-context bug in bdisp_device_run() [ Upstream commit bb6d42061a05d71dd73f620582d9e09c8fbf7f5b ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/media/platform/sti/bdisp/bdisp-hw.c, 385: msleep in bdisp_hw_reset drivers/media/platform/sti/bdisp/bdisp-v4l2.c, 341: bdisp_hw_reset in bdisp_device_run drivers/media/platform/sti/bdisp/bdisp-v4l2.c, 317: _raw_spin_lock_irqsave in bdisp_device_run To fix this bug, msleep() is replaced with udelay(). This bug is found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Reviewed-by: Fabien Dessenne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/sti/bdisp/bdisp-hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index b7892f3efd98..5c4c3f0c57be 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -14,8 +14,8 @@ #define MAX_SRC_WIDTH 2048 /* Reset & boot poll config */ -#define POLL_RST_MAX 50 -#define POLL_RST_DELAY_MS 20 +#define POLL_RST_MAX 500 +#define POLL_RST_DELAY_MS 2 enum bdisp_target_plan { BDISP_RGB, @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp) for (i = 0; i < POLL_RST_MAX; i++) { if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE) break; - msleep(POLL_RST_DELAY_MS); + udelay(POLL_RST_DELAY_MS * 1000); } if (i == POLL_RST_MAX) dev_err(bdisp->dev, "Reset timeout\n"); From 95c15f8e630ba55eec2d7fd9de88767d1ee398fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 28 Dec 2019 00:04:47 +0100 Subject: [PATCH 3284/3715] pinctrl: baytrail: Do not clear IRQ flags on direct-irq enabled pins [ Upstream commit a23680594da7a9e2696dbcf4f023e9273e2fa40b ] Suspending Goodix touchscreens requires changing the interrupt pin to output before sending them a power-down command. Followed by wiggling the interrupt pin to wake the device up, after which it is put back in input mode. On Bay Trail devices with a Goodix touchscreen direct-irq mode is used in combination with listing the pin as a normal GpioIo resource. This works fine, until the goodix driver gets rmmod-ed and then insmod-ed again. In this case byt_gpio_disable_free() calls byt_gpio_clear_triggering() which clears the IRQ flags and after that the (direct) IRQ no longer triggers. This commit fixes this by adding a check for the BYT_DIRECT_IRQ_EN flag to byt_gpio_clear_triggering(). Note that byt_gpio_clear_triggering() only gets called from byt_gpio_disable_free() for direct-irq enabled pins, as these are excluded from the irq_valid mask by byt_init_irq_valid_mask(). Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Reviewed-by: Linus Walleij Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-baytrail.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 9df5d29d708d..4fb3e44f9133 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */ + if (value & BYT_DIRECT_IRQ_EN) + /* nothing to do */ ; + else + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); raw_spin_unlock_irqrestore(&byt_lock, flags); } From bfd75d7bf197c9336a46042e9819f01075faa88e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:37 +0100 Subject: [PATCH 3285/3715] efi/x86: Map the entire EFI vendor string before copying it [ Upstream commit ffc2760bcf2dba0dbef74013ed73eea8310cc52c ] Fix a couple of issues with the way we map and copy the vendor string: - we map only 2 bytes, which usually works since you get at least a page, but if the vendor string happens to cross a page boundary, a crash will result - only call early_memunmap() if early_memremap() succeeded, or we will call it with a NULL address which it doesn't like, - while at it, switch to early_memremap_ro(), and array indexing rather than pointer dereferencing to read the CHAR16 characters. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Fixes: 5b83683f32b1 ("x86: EFI runtime service support") Link: https://lkml.kernel.org/r/20200103113953.9571-5-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 335a62e74a2e..5b0275310070 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -480,7 +480,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -505,14 +504,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, From f524a25696a3524ccdba397944c568df7d762026 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 10 Jan 2020 09:30:42 +0800 Subject: [PATCH 3286/3715] MIPS: Loongson: Fix potential NULL dereference in loongson3_platform_init() [ Upstream commit 72d052e28d1d2363f9107be63ef3a3afdea6143c ] If kzalloc fails, it should return -ENOMEM, otherwise may trigger a NULL pointer dereference. Fixes: 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") Signed-off-by: Tiezhu Yang Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Huacai Chen Cc: Jiaxun Yang Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/loongson64/loongson-3/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 25a97cc0ee33..0db4cc3196eb 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; From 5645615adf3f4bf6ef010943691cb0ebd1fbb065 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Jan 2020 00:07:42 -0800 Subject: [PATCH 3287/3715] sparc: Add .exit.data section. [ Upstream commit 548f0b9a5f4cffa0cecf62eb12aa8db682e4eee6 ] This fixes build errors of all sorts. Also, emit .exit.text unconditionally. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/kernel/vmlinux.lds.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5a2344574f39..4323dc4ae4c7 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -167,12 +167,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; From 0151b03f43f2d295a6949454434074b34a262e06 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 17:44:05 +0800 Subject: [PATCH 3288/3715] uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol() [ Upstream commit b74351287d4bd90636c3f48bc188c2f53824c2d4 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: kernel/irq/manage.c, 523: synchronize_irq in disable_irq drivers/uio/uio_dmem_genirq.c, 140: disable_irq in uio_dmem_genirq_irqcontrol drivers/uio/uio_dmem_genirq.c, 134: _raw_spin_lock_irqsave in uio_dmem_genirq_irqcontrol synchronize_irq() can sleep at runtime. To fix this bug, disable_irq() is called without holding the spinlock. This bug is found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20191218094405.6009-1-baijiaju1990@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio_dmem_genirq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index e1134a4d97f3..a00b4aee6c79 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); + spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!test_and_set_bit(0, &priv->flags)) { + spin_unlock_irqrestore(&priv->lock, flags); disable_irq(dev_info->irq); + } } - spin_unlock_irqrestore(&priv->lock, flags); return 0; } From d2de2d9f9807dde09d3e1dc19531dfdd078cd747 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 18 Dec 2019 11:43:49 +0800 Subject: [PATCH 3289/3715] usb: gadget: udc: fix possible sleep-in-atomic-context bugs in gr_probe() [ Upstream commit 9c1ed62ae0690dfe5d5e31d8f70e70a95cb48e52 ] The driver may sleep while holding a spinlock. The function call path (from bottom to top) in Linux 4.19 is: drivers/usb/gadget/udc/core.c, 1175: kzalloc(GFP_KERNEL) in usb_add_gadget_udc_release drivers/usb/gadget/udc/core.c, 1272: usb_add_gadget_udc_release in usb_add_gadget_udc drivers/usb/gadget/udc/gr_udc.c, 2186: usb_add_gadget_udc in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/core.c, 1195: mutex_lock in usb_add_gadget_udc_release drivers/usb/gadget/udc/core.c, 1272: usb_add_gadget_udc_release in usb_add_gadget_udc drivers/usb/gadget/udc/gr_udc.c, 2186: usb_add_gadget_udc in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/gr_udc.c, 212: debugfs_create_file in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2197: gr_dfs_create in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2114: devm_request_threaded_irq in gr_request_irq drivers/usb/gadget/udc/gr_udc.c, 2202: gr_request_irq in gr_probe drivers/usb/gadget/udc/gr_udc.c, 2183: spin_lock in gr_probe kzalloc(GFP_KERNEL), mutex_lock(), debugfs_create_file() and devm_request_threaded_irq() can sleep at runtime. To fix these possible bugs, usb_add_gadget_udc(), gr_dfs_create() and gr_request_irq() are called without handling the spinlock. These bugs are found by a static analysis tool STCheck written by myself. Signed-off-by: Jia-Ju Bai Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/gr_udc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746..feb73a1c42ef 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev) return -ENOMEM; } - spin_lock(&dev->lock); - /* Inside lock so that no gadget can use this udc until probe is done */ retval = usb_add_gadget_udc(dev->dev, &dev->gadget); if (retval) { @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev) } dev->added = 1; - retval = gr_udc_init(dev); - if (retval) - goto out; + spin_lock(&dev->lock); - gr_dfs_create(dev); + retval = gr_udc_init(dev); + if (retval) { + spin_unlock(&dev->lock); + goto out; + } /* Clear all interrupt enables that might be left on since last boot */ gr_disable_interrupts_and_pullup(dev); + spin_unlock(&dev->lock); + + gr_dfs_create(dev); + retval = gr_request_irq(dev, dev->irq); if (retval) { dev_err(dev->dev, "Failed to request irq %d\n", dev->irq); @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev) dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq); out: - spin_unlock(&dev->lock); - if (retval) gr_remove(pdev); From 197bdc2a01078753fd9f57a17e08eb97ce7d30ba Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 19 Dec 2019 11:34:31 +0000 Subject: [PATCH 3290/3715] usb: dwc2: Fix IN FIFO allocation [ Upstream commit 644139f8b64d818f6345351455f14471510879a5 ] On chips with fewer FIFOs than endpoints (for example RK3288 which has 9 endpoints, but only 6 which are cabable of input), the DPTXFSIZN registers above the FIFO count may return invalid values. With logging added on startup, I see: dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=1 sz=256 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=2 sz=128 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=3 sz=128 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=4 sz=64 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=5 sz=64 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=6 sz=32 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=7 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=8 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=9 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=10 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=11 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=12 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=13 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=14 sz=0 dwc2 ff580000.usb: dwc2_hsotg_init_fifo: ep=15 sz=0 but: # cat /sys/kernel/debug/ff580000.usb/fifo Non-periodic FIFOs: RXFIFO: Size 275 NPTXFIFO: Size 16, Start 0x00000113 Periodic TXFIFOs: DPTXFIFO 1: Size 256, Start 0x00000123 DPTXFIFO 2: Size 128, Start 0x00000223 DPTXFIFO 3: Size 128, Start 0x000002a3 DPTXFIFO 4: Size 64, Start 0x00000323 DPTXFIFO 5: Size 64, Start 0x00000363 DPTXFIFO 6: Size 32, Start 0x000003a3 DPTXFIFO 7: Size 0, Start 0x000003e3 DPTXFIFO 8: Size 0, Start 0x000003a3 DPTXFIFO 9: Size 256, Start 0x00000123 so it seems that FIFO 9 is mirroring FIFO 1. Fix the allocation by using the FIFO count instead of the endpoint count when selecting a FIFO for an endpoint. Acked-by: Minas Harutyunyan Signed-off-by: John Keeping Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc2/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4af9a1c652ed..aeb6f7c84ea0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3933,11 +3933,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, * a unique tx-fifo even if it is non-periodic. */ if (dir_in && hsotg->dedicated_fifos) { + unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); u32 fifo_index = 0; u32 fifo_size = UINT_MAX; size = hs_ep->ep.maxpacket * hs_ep->mc; - for (i = 1; i < hsotg->num_of_eps; ++i) { + for (i = 1; i <= fifo_count; ++i) { if (hsotg->fifo_map & (1 << i)) continue; val = dwc2_readl(hsotg->regs + DPTXFSIZN(i)); From 2a41bcb3bdc9236c3fe243b014e0d070f3bacd49 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Dec 2019 21:32:46 +0000 Subject: [PATCH 3291/3715] clocksource/drivers/bcm2835_timer: Fix memory leak of timer [ Upstream commit 2052d032c06761330bca4944bb7858b00960e868 ] Currently when setup_irq fails the error exit path will leak the recently allocated timer structure. Originally the code would throw a panic but a later commit changed the behaviour to return via the err_iounmap path and hence we now have a memory leak. Fix this by adding a err_timer_free error path that kfree's timer. Addresses-Coverity: ("Resource Leak") Fixes: 524a7f08983d ("clocksource/drivers/bcm2835_timer: Convert init function to return error") Signed-off-by: Colin Ian King Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191219213246.34437-1-colin.king@canonical.com Signed-off-by: Sasha Levin --- drivers/clocksource/bcm2835_timer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad7..8894cfc32be0 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -134,7 +134,7 @@ static int __init bcm2835_timer_init(struct device_node *node) ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); - goto err_iounmap; + goto err_timer_free; } clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff); @@ -143,6 +143,9 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; +err_timer_free: + kfree(timer); + err_iounmap: iounmap(base); return ret; From 0ffec5708942c021acf93662079be1cb1cebe738 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Mon, 13 Jan 2020 22:11:58 +0530 Subject: [PATCH 3292/3715] kselftest: Minimise dependency of get_size on C library interfaces [ Upstream commit 6b64a650f0b2ae3940698f401732988699eecf7a ] It was observed[1] on arm64 that __builtin_strlen led to an infinite loop in the get_size selftest. This is because __builtin_strlen (and other builtins) may sometimes result in a call to the C library function. The C library implementation of strlen uses an IFUNC resolver to load the most efficient strlen implementation for the underlying machine and hence has a PLT indirection even for static binaries. Because this binary avoids the C library startup routines, the PLT initialization never happens and hence the program gets stuck in an infinite loop. On x86_64 the __builtin_strlen just happens to expand inline and avoid the call but that is not always guaranteed. Further, while testing on x86_64 (Fedora 31), it was observed that the test also failed with a segfault inside write() because the generated code for the write function in glibc seems to access TLS before the syscall (probably due to the cancellation point check) and fails because TLS is not initialised. To mitigate these problems, this patch reduces the interface with the C library to just the syscall function. The syscall function still sets errno on failure, which is undesirable but for now it only affects cases where syscalls fail. [1] https://bugs.linaro.org/show_bug.cgi?id=5479 Signed-off-by: Siddhesh Poyarekar Reported-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Reviewed-by: Tim Bird Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/size/get_size.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index d4b59ab979a0..f55943b6d1e2 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -12,23 +12,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include #include +#include #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -80,12 +92,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -101,5 +113,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } From 5e9b22da0235e6dd6515c8701c76187dbc6852cb Mon Sep 17 00:00:00 2001 From: Kai Li Date: Sat, 11 Jan 2020 10:25:42 +0800 Subject: [PATCH 3293/3715] jbd2: clear JBD2_ABORT flag before journal_reset to update log tail info when load journal [ Upstream commit a09decff5c32060639a685581c380f51b14e1fc2 ] If the journal is dirty when the filesystem is mounted, jbd2 will replay the journal but the journal superblock will not be updated by journal_reset() because JBD2_ABORT flag is still set (it was set in journal_init_common()). This is problematic because when a new transaction is then committed, it will be recorded in block 1 (journal->j_tail was set to 1 in journal_reset()). If unclean shutdown happens again before the journal superblock is updated, the new recorded transaction will not be replayed during the next mount (because of stale sb->s_start and sb->s_sequence values) which can lead to filesystem corruption. Fixes: 85e0c4e89c1b ("jbd2: if the journal is aborted then don't allow update of the log tail") Signed-off-by: Kai Li Link: https://lore.kernel.org/r/20200111022542.5008-1-li.kai4@h3c.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/journal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d3cce5c86fd9..b72be822f04f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1687,6 +1687,11 @@ int jbd2_journal_load(journal_t *journal) journal->j_devname); return -EFSCORRUPTED; } + /* + * clear JBD2_ABORT flag initialized in journal_init_common + * here to update log tail information with the newest seq. + */ + journal->j_flags &= ~JBD2_ABORT; /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory @@ -1694,7 +1699,6 @@ int jbd2_journal_load(journal_t *journal) if (journal_reset(journal)) goto recovery_error; - journal->j_flags &= ~JBD2_ABORT; journal->j_flags |= JBD2_LOADED; return 0; From c118cd45f46a0d6b985e9959d020b5ec2f5d58dc Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 7 Jan 2020 18:04:10 -0500 Subject: [PATCH 3294/3715] x86/sysfb: Fix check for bad VRAM size [ Upstream commit dacc9092336be20b01642afe1a51720b31f60369 ] When checking whether the reported lfb_size makes sense, the height * stride result is page-aligned before seeing whether it exceeds the reported size. This doesn't work if height * stride is not an exact number of pages. For example, as reported in the kernel bugzilla below, an 800x600x32 EFI framebuffer gets skipped because of this. Move the PAGE_ALIGN to after the check vs size. Reported-by: Christopher Head Tested-by: Christopher Head Signed-off-by: Arvind Sankar Signed-off-by: Borislav Petkov Link: https://bugzilla.kernel.org/show_bug.cgi?id=206051 Link: https://lkml.kernel.org/r/20200107230410.2291947-1-nivedita@alum.mit.edu Signed-off-by: Sasha Levin --- arch/x86/kernel/sysfb_simplefb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 85195d447a92..f3215346e47f 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); From 8f16da1dcdac5f90e77f075160c110101004d48d Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Tue, 9 Sep 2014 22:49:41 +0100 Subject: [PATCH 3295/3715] tracing: Fix tracing_stat return values in error handling paths [ Upstream commit afccc00f75bbbee4e4ae833a96c2d29a7259c693 ] tracing_stat_init() was always returning '0', even on the error paths. It now returns -ENODEV if tracing_init_dentry() fails or -ENOMEM if it fails to created the 'trace_stat' debugfs directory. Link: http://lkml.kernel.org/r/1410299381-20108-1-git-send-email-luis.henriques@canonical.com Fixes: ed6f1c996bfe4 ("tracing: Check return value of tracing_init_dentry()") Signed-off-by: Luis Henriques [ Pulled from the archeological digging of my INBOX ] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_stat.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75bf1bcb4a8a..bf68af63538b 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -278,18 +278,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, From cf6eb046e53cc36534562c04cc6ce130c40dfcfc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Jan 2020 17:47:49 -0500 Subject: [PATCH 3296/3715] tracing: Fix very unlikely race of registering two stat tracers [ Upstream commit dfb6cd1e654315168e36d947471bd2a0ccd834ae ] Looking through old emails in my INBOX, I came across a patch from Luis Henriques that attempted to fix a race of two stat tracers registering the same stat trace (extremely unlikely, as this is done in the kernel, and probably doesn't even exist). The submitted patch wasn't quite right as it needed to deal with clean up a bit better (if two stat tracers were the same, it would have the same files). But to make the code cleaner, all we needed to do is to keep the all_stat_sessions_mutex held for most of the registering function. Link: http://lkml.kernel.org/r/1410299375-20068-1-git-send-email-luis.henriques@canonical.com Fixes: 002bb86d8d42f ("tracing/ftrace: separate events tracing and stats tracing engine") Reported-by: Luis Henriques Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_stat.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index bf68af63538b..92b76f9e25ed 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -306,7 +306,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -317,17 +317,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -336,15 +334,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) From 7139b70b1ae5ea076f03b888e3bec19e268faa28 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:12 +0800 Subject: [PATCH 3297/3715] ext4, jbd2: ensure panic when aborting with zero errno [ Upstream commit 51f57b01e4a3c7d7bdceffd84de35144e8c538e7 ] JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2 aborted, and then __ext4_abort() and ext4_handle_error() can invoke panic if ERRORS_PANIC is specified. But if the journal has been aborted with zero errno, jbd2_journal_abort() didn't set this flag so we can no longer panic. Fix this by always record the proper errno in the journal superblock. Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/journal.c | 15 ++++----------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index fe4fe155b7fb..15d129b7494b 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); - jbd2_journal_abort(journal, 0); + jbd2_journal_abort(journal, -EIO); } write_lock(&journal->j_state_lock); } else { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b72be822f04f..eae9ced846d5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2128,12 +2128,10 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) { - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); - } + jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** @@ -2175,11 +2173,6 @@ static void __journal_abort_soft (journal_t *journal, int errno) * failure to disk. ext3_error, for example, now uses this * functionality. * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * */ void jbd2_journal_abort(journal_t *journal, int errno) From 24fd0b0adc5354b31fd1f4b8315e6c7e8a4eb41b Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 22 Jan 2020 11:18:57 +0800 Subject: [PATCH 3298/3715] nbd: add a flush_workqueue in nbd_start_device [ Upstream commit 5c0dd228b5fc30a3b732c7ae2657e0161ec7ed80 ] When kzalloc fail, may cause trying to destroy the workqueue from inside the workqueue. If num_connections is m (2 < m), and NO.1 ~ NO.n (1 < n < m) kzalloc are successful. The NO.(n + 1) failed. Then, nbd_start_device will return ENOMEM to nbd_start_device_ioctl, and nbd_start_device_ioctl will return immediately without running flush_workqueue. However, we still have n recv threads. If nbd_release run first, recv threads may have to drop the last config_refs and try to destroy the workqueue from inside the workqueue. To fix it, add a flush_workqueue in nbd_start_device. Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Sun Ke Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4c661ad91e7d..8f56e6b2f114 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1203,6 +1203,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); From 394c452af6013255668b8f157a1bdfd4571f839d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 5 Dec 2019 07:40:43 -0500 Subject: [PATCH 3299/3715] KVM: s390: ENOTSUPP -> EOPNOTSUPP fixups [ Upstream commit c611990844c28c61ca4b35ff69d3a2ae95ccd486 ] There is no ENOTSUPP for userspace. Reported-by: Julian Wiedmann Fixes: 519783935451 ("KVM: s390: introduce ais mode modify function") Fixes: 2c1a48f2e5ed ("KVM: S390: add new group for flic") Reviewed-by: Cornelia Huck Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger Signed-off-by: Sasha Levin --- arch/s390/kvm/interrupt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 28f3796d23c8..61d25e2c82ef 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1913,7 +1913,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2214,7 +2214,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2294,7 +2294,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; From 16536ce0dfa08059fdd315f99b8ccfb30af2290c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 1 Feb 2020 14:03:11 +0900 Subject: [PATCH 3300/3715] kconfig: fix broken dependency in randconfig-generated .config [ Upstream commit c8fb7d7e48d11520ad24808cfce7afb7b9c9f798 ] Running randconfig on arm64 using KCONFIG_SEED=0x40C5E904 (e.g. on v5.5) produces the .config with CONFIG_EFI=y and CONFIG_CPU_BIG_ENDIAN=y, which does not meet the !CONFIG_CPU_BIG_ENDIAN dependency. This is because the user choice for CONFIG_CPU_LITTLE_ENDIAN vs CONFIG_CPU_BIG_ENDIAN is set by randomize_choice_values() after the value of CONFIG_EFI is calculated. When this happens, the has_changed flag should be set. Currently, it takes the result from the last iteration. It should accumulate all the results of the loop. Fixes: 3b9a19e08960 ("kconfig: loop as long as we changed some symbols in randconfig") Reported-by: Vincenzo Frascino Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/confdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 27aac273205b..fa423fcd1a92 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; From ef7339a38eccc07dbbc6a4dde0c0d4d3be157a87 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 3 Feb 2020 10:31:34 -0800 Subject: [PATCH 3301/3715] clk: qcom: rcg2: Don't crash if our parent can't be found; return an error [ Upstream commit 908b050114d8fefdddc57ec9fbc213c3690e7f5f ] When I got my clock parenting slightly wrong I ended up with a crash that looked like this: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... pc : clk_hw_get_rate+0x14/0x44 ... Call trace: clk_hw_get_rate+0x14/0x44 _freq_tbl_determine_rate+0x94/0xfc clk_rcg2_determine_rate+0x2c/0x38 clk_core_determine_round_nolock+0x4c/0x88 clk_core_round_rate_nolock+0x6c/0xa8 clk_core_round_rate_nolock+0x9c/0xa8 clk_core_set_rate_nolock+0x70/0x180 clk_set_rate+0x3c/0x6c of_clk_set_defaults+0x254/0x360 platform_drv_probe+0x28/0xb0 really_probe+0x120/0x2dc driver_probe_device+0x64/0xfc device_driver_attach+0x4c/0x6c __driver_attach+0xac/0xc0 bus_for_each_dev+0x84/0xcc driver_attach+0x2c/0x38 bus_add_driver+0xfc/0x1d0 driver_register+0x64/0xf8 __platform_driver_register+0x4c/0x58 msm_drm_register+0x5c/0x60 ... It turned out that clk_hw_get_parent_by_index() was returning NULL and we weren't checking. Let's check it so that we don't crash. Fixes: ac269395cdd8 ("clk: qcom: Convert to clk_hw based provider APIs") Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Link: https://lkml.kernel.org/r/20200203103049.v4.1.I7487325fe8e701a68a07d3be8a6a4b571eca9cfa@changeid Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/clk-rcg2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index a93439242565..d3953ea69fda 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -210,6 +210,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); + if (!p) + return -EINVAL; + if (clk_flags & CLK_SET_RATE_PARENT) { if (f->pre_div) { if (!rate) From df49e093ed54581241e70ce3b2ebd2faa92abfbf Mon Sep 17 00:00:00 2001 From: yu kuai Date: Mon, 4 Nov 2019 21:27:20 +0800 Subject: [PATCH 3302/3715] drm/amdgpu: remove 4 set but not used variable in amdgpu_atombios_get_connector_info_from_object_table [ Upstream commit bae028e3e521e8cb8caf2cc16a455ce4c55f2332 ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c: In function 'amdgpu_atombios_get_connector_info_from_object_table': drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:376:26: warning: variable 'grph_obj_num' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:376:13: warning: variable 'grph_obj_id' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:341:37: warning: variable 'con_obj_type' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:341:24: warning: variable 'con_obj_num' set but not used [-Wunused-but-set-variable] They are never used, so can be removed. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Signed-off-by: yu kuai Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index cc4e18dcd8b6..4779740421a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -371,14 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; + uint8_t grph_obj_type= grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; From b99bcff0147d069adbc68349a2a8bbe40855c2f2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Dec 2019 17:47:09 +0100 Subject: [PATCH 3303/3715] regulator: rk808: Lower log level on optional GPIOs being not available [ Upstream commit b8a039d37792067c1a380dc710361905724b9b2f ] RK808 can leverage a couple of GPIOs to tweak the ramp rate during DVS (Dynamic Voltage Scaling). These GPIOs are entirely optional but a dev_warn() appeared when cleaning this driver to use a more up-to-date gpiod API. At least reduce the log level to 'info' as it is totally fine to not populate these GPIO on a hardware design. This change is trivial but it is worth not polluting the logs during bringup phase by having real warnings and errors sorted out correctly. Fixes: a13eaf02e2d6 ("regulator: rk808: make better use of the gpiod API") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20191203164709.11127-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/rk808-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 213b68743cc8..92498ac50303 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -714,7 +714,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev, } if (!pdata->dvs_gpio[i]) { - dev_warn(dev, "there is no dvs%d gpio\n", i); + dev_info(dev, "there is no dvs%d gpio\n", i); continue; } From 7ee8e95a8c58c1040aeb3e96a4e68107d9f25c96 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Nov 2019 15:55:51 +0100 Subject: [PATCH 3304/3715] net/wan/fsl_ucc_hdlc: reject muram offsets above 64K [ Upstream commit 148587a59f6b85831695e0497d9dd1af5f0495af ] Qiang Zhao points out that these offsets get written to 16-bit registers, and there are some QE platforms with more than 64K muram. So it is possible that qe_muram_alloc() gives us an allocation that can't actually be used by the hardware, so detect and reject that. Reported-by: Qiang Zhao Reviewed-by: Timur Tabi Signed-off-by: Rasmus Villemoes Acked-by: David S. Miller Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/net/wan/fsl_ucc_hdlc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 571a1ff8f81f..6a26cef62193 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -240,6 +240,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ret = -ENOMEM; goto free_riptr; } + if (riptr != (u16)riptr || tiptr != (u16)tiptr) { + dev_err(priv->dev, "MURAM allocation out of addressable range\n"); + ret = -ENOMEM; + goto free_tiptr; + } /* Set RIPTR, TIPTR */ iowrite16be(riptr, &priv->ucc_pram->riptr); From 2caa8fad60a0186d0afad87e4281965dcba7c3b9 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 25 Nov 2019 13:52:52 -0600 Subject: [PATCH 3305/3715] PCI/IOV: Fix memory leak in pci_iov_add_virtfn() [ Upstream commit 8c386cc817878588195dde38e919aa6ba9409d58 ] In the implementation of pci_iov_add_virtfn() the allocated virtfn is leaked if pci_setup_device() fails. The error handling is not calling pci_stop_and_remove_bus_device(). Change the goto label to failed2. Fixes: 156c55325d30 ("PCI: Check for pci_setup_device() failure in pci_iov_add_virtfn()") Link: https://lore.kernel.org/r/20191125195255.23740-1-navid.emamdoost@gmail.com Signed-off-by: Navid Emamdoost Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/iov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0fd8e164339c..0dc646c1bc3d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) failed2: sysfs_remove_link(&dev->dev.kobj, buf); failed1: + pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); pci_stop_and_remove_bus_device(virtfn); failed0: From 4aea4f02dc85136a45f943ea2ba2b111d553017a Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Mon, 9 Dec 2019 21:08:45 +0800 Subject: [PATCH 3306/3715] NFC: port100: Convert cpu_to_le16(le16_to_cpu(E1) + E2) to use le16_add_cpu(). [ Upstream commit 718eae277e62a26e5862eb72a830b5e0fe37b04a ] Convert cpu_to_le16(le16_to_cpu(frame->datalen) + len) to use le16_add_cpu(), which is more concise and does the same thing. Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/port100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 60ae382f50da..06bb226c62ef 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -574,7 +574,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) From b4571f0e7235eaebd052b712fe0f586f474904d3 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Mon, 9 Dec 2019 16:15:01 +0100 Subject: [PATCH 3307/3715] arm64: dts: qcom: msm8996: Disable USB2 PHY suspend by core [ Upstream commit d026c96b25b7ce5df89526aad2df988d553edb4d ] QUSB2 PHY on msm8996 doesn't work well when autosuspend by dwc3 core using USB2PHYCFG register is enabled. One of the issue seen is that PHY driver reports PLL lock failure and fails phy_init() if dwc3 core has USB2 PHY suspend enabled. Fix this by using quirks to disable USB2 PHY LPM/suspend and dwc3 core already takes care of explicitly suspending PHY during suspend if quirks are specified. Signed-off-by: Manu Gautam Signed-off-by: Paolo Pisati Link: https://lore.kernel.org/r/20191209151501.26993-1-p.pisati@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 6f372ec055dd..da2949586c7a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -788,6 +788,8 @@ interrupts = <0 138 0>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -817,6 +819,8 @@ interrupts = <0 131 0>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; }; From eacee8c37a6eba0965fa82998f60777c9c1e683e Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 9 Dec 2019 08:50:17 -0800 Subject: [PATCH 3308/3715] ARM: dts: imx6: rdu2: Disable WP for USDHC2 and USDHC3 [ Upstream commit cd58a174e58649426fb43d7456e5f7d7eab58af1 ] RDU2 production units come with resistor connecting WP pin to correpsonding GPIO DNPed for both SD card slots. Drop any WP related configuration and mark both slots with "disable-wp". Reported-by: Chris Healy Reviewed-by: Chris Healy Reviewed-by: Lucas Stach Signed-off-by: Andrey Smirnov Cc: Shawn Guo Cc: Fabio Estevam Cc: Lucas Stach Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 849eb3443cde..719e63092c2e 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -587,7 +587,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -598,7 +598,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -1001,7 +1001,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1014,7 +1013,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; From cf471438760565041c052ee5fc3de9465744a735 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 8 Dec 2019 22:11:40 +0100 Subject: [PATCH 3309/3715] media: v4l2-device.h: Explicitly compare grp{id,mask} to zero in v4l2_device macros [ Upstream commit afb34781620274236bd9fc9246e22f6963ef5262 ] When building with Clang + -Wtautological-constant-compare, several of the ivtv and cx18 drivers warn along the lines of: drivers/media/pci/cx18/cx18-driver.c:1005:21: warning: converting the result of '<<' to a boolean always evaluates to true [-Wtautological-constant-compare] cx18_call_hw(cx, CX18_HW_GPIO_RESET_CTRL, ^ drivers/media/pci/cx18/cx18-cards.h:18:37: note: expanded from macro 'CX18_HW_GPIO_RESET_CTRL' #define CX18_HW_GPIO_RESET_CTRL (1 << 6) ^ 1 warning generated. This warning happens because the shift operation is implicitly converted to a boolean in v4l2_device_mask_call_all before being negated. This can be solved by just comparing the mask result to 0 explicitly so that there is no boolean conversion. The ultimate goal is to enable -Wtautological-compare globally because there are several subwarnings that would be helpful to have. For visual consistency and avoidance of these warnings in the future, all of the implicitly boolean conversions in the v4l2_device macros are converted to explicit ones as well. Link: https://github.com/ClangBuiltLinux/linux/issues/752 Reviewed-by: Ezequiel Garcia Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- include/media/v4l2-device.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 8ffa94009d1a..76002416cead 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ } while (0) /* @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ }) /* From bf1357325ad163bf716512838728e657a6903d66 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Dec 2019 11:35:58 +0100 Subject: [PATCH 3310/3715] reiserfs: Fix spurious unlock in reiserfs_fill_super() error handling [ Upstream commit 4d5c1adaf893b8aa52525d2b81995e949bcb3239 ] When we fail to allocate string for journal device name we jump to 'error' label which tries to unlock reiserfs write lock which is not held. Jump to 'error_unlocked' instead. Fixes: f32485be8397 ("reiserfs: delay reiserfs lock until journal initialization") Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/reiserfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5208d85dd30c..9caf3948417c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1954,7 +1954,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); - goto error; + goto error_unlocked; } } #ifdef CONFIG_QUOTA From 2730ea6d5c5242b0de0ec7599ce2aadb8d9e0a7a Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 10:14:51 -0600 Subject: [PATCH 3311/3715] fore200e: Fix incorrect checks of NULL pointer dereference [ Upstream commit bbd20c939c8aa3f27fa30e86691af250bf92973a ] In fore200e_send and fore200e_close, the pointers from the arguments are dereferenced in the variable declaration block and then checked for NULL. The patch fixes these issues by avoiding NULL pointer dereferences. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/fore200e.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f8b7e86907cc..0a1ad1a1d34f 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1496,12 +1496,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<vci >= 0) && (vcc->vci < 1<dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1562,9 +1564,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); From 6e46de4f308a3665c110d8237b968364901d4d58 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 20:42:57 -0700 Subject: [PATCH 3312/3715] ALSA: usx2y: Adjust indentation in snd_usX2Y_hwdep_dsp_status [ Upstream commit df4654bd6e42125d9b85ce3a26eaca2935290b98 ] Clang warns: ../sound/usb/usx2y/usX2Yhwdep.c:122:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->version = USX2Y_DRIVER_VERSION; ^ ../sound/usb/usx2y/usX2Yhwdep.c:120:2: note: previous statement is here if (us428->chip_status & USX2Y_STAT_CHIP_INIT) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. This was introduced before the beginning of git history so no fixes tag. Link: https://github.com/ClangBuiltLinux/linux/issues/831 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218034257.54535-1-natechancellor@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/usx2y/usX2Yhwdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index f4b3cda412fc..e75271e731b2 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -131,7 +131,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw, info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; - info->version = USX2Y_DRIVER_VERSION; + info->version = USX2Y_DRIVER_VERSION; return 0; } From 284fa04ce0f7887f5d1a183e75e10734df2a7a5b Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:26 +0700 Subject: [PATCH 3313/3715] b43legacy: Fix -Wcast-function-type [ Upstream commit 475eec112e4267232d10f4afe2f939a241692b6c ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Tested-by: Larry Finger Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/b43legacy/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index f1e3dad57629..f435bd0f8b5b 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev) } /* Interrupt handler bottom-half */ -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev) +static void b43legacy_interrupt_tasklet(unsigned long data) { + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data; u32 reason; u32 dma_reason[ARRAY_SIZE(dev->dma_reason)]; u32 merged_dma_reason = 0; @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev, b43legacy_set_status(wldev, B43legacy_STAT_UNINIT); wldev->bad_frames_preempt = modparam_bad_frames_preempt; tasklet_init(&wldev->isr_tasklet, - (void (*)(unsigned long))b43legacy_interrupt_tasklet, + b43legacy_interrupt_tasklet, (unsigned long)wldev); if (modparam_pio) wldev->__using_pio = true; From 79a0c15ca4b9ae01fcac8ddf4535f80c7a4f4176 Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:27 +0700 Subject: [PATCH 3314/3715] ipw2x00: Fix -Wcast-function-type [ Upstream commit ebd77feb27e91bb5fe35a7818b7c13ea7435fb98 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/ipw2x00/ipw2100.c | 7 ++++--- drivers/net/wireless/intel/ipw2x00/ipw2200.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 19c442cb93e4..8fbdd7d4fd0c 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) } } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv) +static void ipw2100_irq_tasklet(unsigned long data) { + struct ipw2100_priv *priv = (struct ipw2100_priv *)data; struct net_device *dev = priv->net_dev; unsigned long flags; u32 inta, tmp; @@ -6027,7 +6028,7 @@ static void ipw2100_rf_kill(struct work_struct *work) spin_unlock_irqrestore(&priv->low_lock, flags); } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv); +static void ipw2100_irq_tasklet(unsigned long data); static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, @@ -6157,7 +6158,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill); INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event); - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw2100_irq_tasklet, (unsigned long)priv); /* NOTE: We do not start the deferred work for status checks yet */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 8da87496cb58..2d0734ab3f74 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1966,8 +1966,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); } -static void ipw_irq_tasklet(struct ipw_priv *priv) +static void ipw_irq_tasklet(unsigned long data) { + struct ipw_priv *priv = (struct ipw_priv *)data; u32 inta, inta_mask, handled = 0; unsigned long flags; int rc = 0; @@ -10702,7 +10703,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv) INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate); #endif /* CONFIG_IPW2200_QOS */ - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw_irq_tasklet, (unsigned long)priv); return ret; From f603cec80d9e851dd27dcc2733f4866f8eb94b4f Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:28 +0700 Subject: [PATCH 3315/3715] iwlegacy: Fix -Wcast-function-type [ Upstream commit da5e57e8a6a3e69dac2937ba63fa86355628fbb2 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 5 +++-- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 329f3a63dadd..0fb81151a132 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il) } static void -il3945_irq_tasklet(struct il_priv *il) +il3945_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il3945_irq_tasklet, + il3945_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f..665e82effb03 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4363,8 +4363,9 @@ il4965_synchronize_irq(struct il_priv *il) } static void -il4965_irq_tasklet(struct il_priv *il) +il4965_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -6264,7 +6265,7 @@ il4965_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il4965_irq_tasklet, + il4965_irq_tasklet, (unsigned long)il); } From f5fb8b53423e989da9df0e87981faf90f373ebcc Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Wed, 27 Nov 2019 00:55:29 +0700 Subject: [PATCH 3316/3715] rtlwifi: rtl_pci: Fix -Wcast-function-type [ Upstream commit cb775c88da5d48a85d99d95219f637b6fad2e0e9 ] correct usage prototype of callback in tasklet_init(). Report by https://github.com/KSPP/linux/issues/20 Signed-off-by: Phong Tran Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtlwifi/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 457a0f725c8a..ab74f3155854 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1091,13 +1091,15 @@ done: return ret; } -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_irq_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; _rtl_pci_tx_chk_waitq(hw); } -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw, /*task */ tasklet_init(&rtlpriv->works.irq_tasklet, - (void (*)(unsigned long))_rtl_pci_irq_tasklet, + _rtl_pci_irq_tasklet, (unsigned long)hw); tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet, - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet, + _rtl_pci_prepare_bcn_tasklet, (unsigned long)hw); INIT_WORK(&rtlpriv->works.lps_change_work, rtl_lps_change_work_callback); From 4a30decabbb4a4d15dd5da43f9e1780062bdc487 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 13:58:58 -0600 Subject: [PATCH 3317/3715] orinoco: avoid assertion in case of NULL pointer [ Upstream commit c705f9fc6a1736dcf6ec01f8206707c108dca824 ] In ezusb_init, if upriv is NULL, the code crashes. However, the caller in ezusb_probe can handle the error and print the failure message. The patch replaces the BUG_ON call to error return. Signed-off-by: Aditya Pakki Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 95015d74b1c0..5a64674a5c8d 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1364,7 +1364,8 @@ static int ezusb_init(struct hermes *hw) int retval; BUG_ON(in_interrupt()); - BUG_ON(!upriv); + if (!upriv) + return -EINVAL; upriv->reply_count = 0; /* Write the MAGIC number on the simulated registers to keep From 4e2fd61c42be3bc47bdd71816f4b82be27501eeb Mon Sep 17 00:00:00 2001 From: Erik Kaneda Date: Tue, 17 Dec 2019 11:35:20 -0800 Subject: [PATCH 3318/3715] ACPICA: Disassembler: create buffer fields in ACPI_PARSE_LOAD_PASS1 [ Upstream commit 5ddbd77181dfca61b16d2e2222382ea65637f1b9 ] ACPICA commit 29cc8dbc5463a93625bed87d7550a8bed8913bf4 create_buffer_field is a deferred op that is typically processed in load pass 2. However, disassembly of control method contents walk the parse tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed in a later walk. This is a problem when there is a control method that has the same name as the AML_CREATE object. In this case, any use of the name segment will be detected as a method call rather than a reference to a buffer field. If this is detected as a method call, it can result in a mal-formed parse tree if the control methods have parameters. This change in processing AML_CREATE ops earlier solves this issue by inserting the named object in the ACPI namespace so that references to this name would be detected as a name string rather than a method call. Link: https://github.com/acpica/acpica/commit/29cc8dbc Reported-by: Elia Geretto Tested-by: Elia Geretto Signed-off-by: Bob Moore Signed-off-by: Erik Kaneda Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/dsfield.c | 2 +- drivers/acpi/acpica/dswload.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 7bcf5f5ea029..8df4a49a99a6 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -273,7 +273,7 @@ cleanup: * FUNCTION: acpi_ds_get_field_names * * PARAMETERS: info - create_field info structure - * ` walk_state - Current method state + * walk_state - Current method state * arg - First parser arg for the field name list * * RETURN: Status diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index eaa859a89702..1d82e1419397 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -444,6 +444,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op, walk_state)); + /* + * Disassembler: handle create field operators here. + * + * create_buffer_field is a deferred op that is typically processed in load + * pass 2. However, disassembly of control method contents walk the parse + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed + * in a later walk. This is a problem when there is a control method that + * has the same name as the AML_CREATE object. In this case, any use of the + * name segment will be detected as a method call rather than a reference + * to a buffer field. + * + * This earlier creation during disassembly solves this issue by inserting + * the named object in the ACPI namespace so that references to this name + * would be a name string rather than a method call. + */ + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) && + (walk_state->op_info->flags & AML_CREATE)) { + status = acpi_ds_create_buffer_field(op, walk_state); + return_ACPI_STATUS(status); + } + /* We are only interested in opcodes that have an associated name */ if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) { From 752c854192b5fded885bdc883fdb400a30cfda04 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 25 Nov 2019 22:53:33 -0800 Subject: [PATCH 3319/3715] scsi: ufs: Complete pending requests in host reset and restore path [ Upstream commit 2df74b6985b51e77756e2e8faa16c45ca3ba53c5 ] In UFS host reset and restore path, before probe, we stop and start the host controller once. After host controller is stopped, the pending requests, if any, are cleared from the doorbell, but no completion IRQ would be raised due to the hba is stopped. These pending requests shall be completed along with the first NOP_OUT command (as it is the first command which can raise a transfer completion IRQ) sent during probe. Since the OCSs of these pending requests are not SUCCESS (because they are not yet literally finished), their UPIUs shall be dumped. When there are multiple pending requests, the UPIU dump can be overwhelming and may lead to stability issues because it is in atomic context. Therefore, before probe, complete these pending requests right after host controller is stopped and silence the UPIU dump from them. Link: https://lore.kernel.org/r/1574751214-8321-5-git-send-email-cang@qti.qualcomm.com Reviewed-by: Alim Akhtar Reviewed-by: Bean Huo Tested-by: Bean Huo Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 24 ++++++++++-------------- drivers/scsi/ufs/ufshcd.h | 2 ++ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index ce40de334f11..c35045324695 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4580,7 +4580,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if (host_byte(result) != DID_OK) + if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5109,8 +5109,8 @@ static void ufshcd_err_handler(struct work_struct *work) /* * if host reset is required then skip clearing the pending - * transfers forcefully because they will automatically get - * cleared after link startup. + * transfers forcefully because they will get cleared during + * host reset and restore */ if (needs_reset) goto skip_pending_xfer_clear; @@ -5749,9 +5749,15 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) int err; unsigned long flags; - /* Reset the host controller */ + /* + * Stop the host controller and complete the requests + * cleared by h/w + */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_hba_stop(hba, false); + hba->silence_err_logs = true; + ufshcd_complete_requests(hba); + hba->silence_err_logs = false; spin_unlock_irqrestore(hba->host->host_lock, flags); /* scale up clocks to max frequency before full reinitialization */ @@ -5785,22 +5791,12 @@ out: static int ufshcd_reset_and_restore(struct ufs_hba *hba) { int err = 0; - unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; do { err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); - /* - * After reset the door-bell might be cleared, complete - * outstanding requests in s/w here. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_transfer_req_compl(hba); - ufshcd_tmc_handler(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return err; } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index cdc8bd05f7df..4aac4d86f57b 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -485,6 +485,7 @@ struct ufs_stats { * @uic_error: UFS interconnect layer error status * @saved_err: sticky error mask * @saved_uic_err: sticky UIC error mask + * @silence_err_logs: flag to silence error logs * @dev_cmd: ufs device management command information * @last_dme_cmd_tstamp: time stamp of the last completed DME command * @auto_bkops_enabled: to track whether bkops is enabled in device @@ -621,6 +622,7 @@ struct ufs_hba { u32 saved_err; u32 saved_uic_err; struct ufs_stats ufs_stats; + bool silence_err_logs; /* Device management request data */ struct ufs_dev_cmd dev_cmd; From 3f292dcec12777b0afcba5532d323b186fbca2ef Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:42:20 -0700 Subject: [PATCH 3320/3715] scsi: aic7xxx: Adjust indentation in ahc_find_syncrate [ Upstream commit 4dbc96ad65c45cdd4e895ed7ae4c151b780790c5 ] Clang warns: ../drivers/scsi/aic7xxx/aic7xxx_core.c:2317:5: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if ((syncrate->sxfr_u2 & ST_SXFR) != 0) ^ ../drivers/scsi/aic7xxx/aic7xxx_core.c:2310:4: note: previous statement is here if (syncrate == &ahc_syncrates[maxsync]) ^ 1 warning generated. This warning occurs because there is a space amongst the tabs on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. This has been a problem since the beginning of git history hence no fixes tag. Link: https://github.com/ClangBuiltLinux/linux/issues/817 Link: https://lore.kernel.org/r/20191218014220.52746-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aic7xxx/aic7xxx_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 381846164003..fdbb0a3dc9b4 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period, * At some speeds, we only support * ST transfers. */ - if ((syncrate->sxfr_u2 & ST_SXFR) != 0) + if ((syncrate->sxfr_u2 & ST_SXFR) != 0) *ppr_options &= ~MSG_EXT_PPR_DT_REQ; break; } From ff213227e66062249a665acbbeade6c0cab585ef Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 10 Dec 2019 13:05:21 +0800 Subject: [PATCH 3321/3715] drm/mediatek: handle events when enabling/disabling crtc [ Upstream commit 411f5c1eacfebb1f6e40b653d29447cdfe7282aa ] The driver currently handles vblank events only when updating planes on an already enabled CRTC. The atomic update API however allows requesting an event when enabling or disabling a CRTC. This currently leads to event objects being leaked in the kernel and to events not being sent out. Fix it. Signed-off-by: Bibby Hsieh Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 658b8dd45b83..3ea311d32fa9 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -307,6 +307,7 @@ err_pm_runtime_put: static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; + struct drm_crtc *crtc = &mtk_crtc->base; int i; DRM_DEBUG_DRIVER("%s\n", __func__); @@ -328,6 +329,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) mtk_disp_mutex_unprepare(mtk_crtc->mutex); pm_runtime_put(drm->dev); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } } static void mtk_crtc_ddp_config(struct drm_crtc *crtc) From 285f156fc859f0a922173a81af7cc4ccb6ac4b5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Dec 2019 14:52:21 +0100 Subject: [PATCH 3322/3715] ARM: dts: r8a7779: Add device node for ARM global timer [ Upstream commit 8443ffd1bbd5be74e9b12db234746d12e8ea93e2 ] Add a device node for the global timer, which is part of the Cortex-A9 MPCore. The global timer can serve as an accurate (4 ns) clock source for scheduling and delay loops. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191211135222.26770-4-geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a7779.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 8ee0b2ca5d39..2face089d65b 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -67,6 +67,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; From 35557d209c94eae2be6613141d2f6f9902dc110c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 16 Dec 2019 12:01:16 -0700 Subject: [PATCH 3323/3715] dmaengine: Store module owner in dma_device struct [ Upstream commit dae7a589c18a4d979d5f14b09374e871b995ceb1 ] dma_chan_to_owner() dereferences the driver from the struct device to obtain the owner and call module_[get|put](). However, if the backing device is unbound before the dma_device is unregistered, the driver will be cleared and this will cause a NULL pointer dereference. Instead, store a pointer to the owner module in the dma_device struct so the module reference can be properly put when the channel is put, even if the backing device was destroyed first. This change helps to support a safer unbind of DMA engines. If the dma_device is unregistered in the driver's remove function, there's no guarantee that there are no existing clients and a users action may trigger the WARN_ONCE in dma_async_device_unregister() which is unlikely to leave the system in a consistent state. Instead, a better approach is to allow the backing driver to go away and fail any subsequent requests to it. Signed-off-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20191216190120.21374-2-logang@deltatee.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmaengine.c | 4 +++- include/linux/dmaengine.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b451354735d3..faaaf10311ec 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -192,7 +192,7 @@ __dma_device_satisfies_mask(struct dma_device *device, static struct module *dma_chan_to_owner(struct dma_chan *chan) { - return chan->device->dev->driver->owner; + return chan->device->owner; } /** @@ -928,6 +928,8 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + device->owner = device->dev->driver->owner; + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 087cbe776868..8089e28539f1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -677,6 +677,7 @@ struct dma_filter { * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api + * @owner: owner module (automatically set based on the provided dev) * @src_addr_widths: bit mask of src addr widths the device supports * @dst_addr_widths: bit mask of dst addr widths the device supports * @directions: bit mask of slave direction the device supports since @@ -738,6 +739,7 @@ struct dma_device { int dev_id; struct device *dev; + struct module *owner; u32 src_addr_widths; u32 dst_addr_widths; From 37a6fc4398a18616c835df4a63cafc81c3b2e065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Thu, 5 Dec 2019 21:36:07 -0500 Subject: [PATCH 3324/3715] x86/vdso: Provide missing include file [ Upstream commit bff47c2302cc249bcd550b17067f8dddbd4b6f77 ] When building with C=1, sparse issues a warning: CHECK arch/x86/entry/vdso/vdso32-setup.c arch/x86/entry/vdso/vdso32-setup.c:28:28: warning: symbol 'vdso32_enabled' was not declared. Should it be static? Provide the missing header file. Signed-off-by: Valdis Kletnieks Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/36224.1575599767@turing-police Signed-off-by: Sasha Levin --- arch/x86/entry/vdso/vdso32-setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990e..ddff0ca6f509 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include From b8bedd5bfaa6a1cc9df7c96d9723d9e7aa882f8d Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 12 Dec 2019 11:20:30 +0900 Subject: [PATCH 3325/3715] PM / devfreq: rk3399_dmc: Add COMPILE_TEST and HAVE_ARM_SMCCC dependency [ Upstream commit eff5d31f7407fa9d31fb840106f1593399457298 ] To build test, add COMPILE_TEST depedency to both ARM_RK3399_DMC_DEVFREQ and DEVFREQ_EVENT_ROCKCHIP_DFI configuration. And ARM_RK3399_DMC_DEVFREQ used the SMCCC interface so that add HAVE_ARM_SMCCC dependency to prevent the build break. Reported-by: kbuild test robot Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/Kconfig | 3 ++- drivers/devfreq/event/Kconfig | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 6a172d338f6d..4c4ec68b0566 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -103,7 +103,8 @@ config ARM_TEGRA_DEVFREQ config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" - depends on ARCH_ROCKCHIP + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) select DEVFREQ_EVENT_ROCKCHIP_DFI select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index cd949800eed9..8851bc4e8e3e 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU config DEVFREQ_EVENT_ROCKCHIP_DFI tristate "ROCKCHIP DFI DEVFREQ event Driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help This add the devfreq-event driver for Rockchip SoC. It provides DFI (DDR Monitor Module) driver to count ddr load. From be95acb77293527a1d0b649b6a0ac4403edc47ed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Dec 2019 20:48:10 +0100 Subject: [PATCH 3326/3715] pinctrl: sh-pfc: sh7269: Fix CAN function GPIOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 02aeb2f21530c98fc3ca51028eda742a3fafbd9f ] pinmux_func_gpios[] contains a hole due to the missing function GPIO definition for the "CTX0&CTX1" signal, which is the logical "AND" of the first two CAN outputs. A closer look reveals other issues: - Some functionality is available on alternative pins, but the PINMUX_DATA() entries is using the wrong marks, - Several configurations are missing. Fix this by: - Renaming CTX0CTX1CTX2_MARK, CRX0CRX1_PJ22_MARK, and CRX0CRX1CRX2_PJ20_MARK to CTX0_CTX1_CTX2_MARK, CRX0_CRX1_PJ22_MARK, resp. CRX0_CRX1_CRX2_PJ20_MARK for consistency with the corresponding enum IDs, - Adding all missing enum IDs and marks, - Use the right (*_PJ2x) variants for alternative pins, - Adding all missing configurations to pinmux_data[], - Adding all missing function GPIO definitions to pinmux_func_gpios[]. See SH7268 Group, SH7269 Group User’s Manual: Hardware, Rev. 2.00: [1] Table 1.4 List of Pins [2] Figure 23.29 Connection Example when Using Channels 0 and 1 as One Channel (64 Mailboxes × 1 Channel) and Channel 2 as One Channel (32 Mailboxes × 1 Channel), [3] Figure 23.30 Connection Example when Using Channels 0, 1, and 2 as One Channel (96 Mailboxes × 1 Channel), [4] Table 48.3 Multiplexed Pins (Port B), [5] Table 48.4 Multiplexed Pins (Port C), [6] Table 48.10 Multiplexed Pins (Port J), [7] Section 48.2.4 Port B Control Registers 0 to 5 (PBCR0 to PBCR5). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191218194812.12741-5-geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/sh/include/cpu-sh2a/cpu/sh7269.h | 11 ++++++-- drivers/pinctrl/sh-pfc/pfc-sh7269.c | 39 ++++++++++++++++++--------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..b887cc402b71 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index cfdb4fc177c3..3df0c0d139d0 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -740,13 +740,12 @@ enum { CRX0_MARK, CTX0_MARK, CRX1_MARK, CTX1_MARK, CRX2_MARK, CTX2_MARK, - CRX0_CRX1_MARK, - CRX0_CRX1_CRX2_MARK, - CTX0CTX1CTX2_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK, CRX1_PJ22_MARK, CTX1_PJ23_MARK, CRX2_PJ20_MARK, CTX2_PJ21_MARK, - CRX0CRX1_PJ22_MARK, - CRX0CRX1CRX2_PJ20_MARK, + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK, + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK, /* VDC */ DV_CLK_MARK, @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CS3_MARK, PC8MD_001), PINMUX_DATA(TXD7_MARK, PC8MD_010), PINMUX_DATA(CTX1_MARK, PC8MD_011), + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100), PINMUX_DATA(PC7_DATA, PC7MD_000), PINMUX_DATA(CKE_MARK, PC7MD_001), @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CAS_MARK, PC6MD_001), PINMUX_DATA(SCK7_MARK, PC6MD_010), PINMUX_DATA(CTX0_MARK, PC6MD_011), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100), PINMUX_DATA(PC5_DATA, PC5MD_000), PINMUX_DATA(RAS_MARK, PC5MD_001), PINMUX_DATA(CRX0_MARK, PC5MD_011), - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100), PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101), PINMUX_DATA(PC4_DATA, PC4MD_00), @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = { PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010), PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011), PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100), - PINMUX_DATA(CTX1_MARK, PJ23MD_101), + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101), + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110), PINMUX_DATA(PJ22_DATA, PJ22MD_000), PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001), PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010), PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011), PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100), - PINMUX_DATA(CRX1_MARK, PJ22MD_101), - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110), + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101), + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110), PINMUX_DATA(PJ21_DATA, PJ21MD_000), PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001), PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010), PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011), PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100), - PINMUX_DATA(CTX2_MARK, PJ21MD_101), + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101), + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110), PINMUX_DATA(PJ20_DATA, PJ20MD_000), PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001), PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010), PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011), PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100), - PINMUX_DATA(CRX2_MARK, PJ20MD_101), - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110), + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101), + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110), PINMUX_DATA(PJ19_DATA, PJ19MD_000), PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001), @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(WDTOVF), /* CAN */ + GPIO_FN(CTX2), + GPIO_FN(CRX2), GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), GPIO_FN(CRX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0_CRX1), + GPIO_FN(CTX0_CTX1_CTX2), GPIO_FN(CRX0_CRX1_CRX2), + GPIO_FN(CTX2_PJ21), + GPIO_FN(CRX2_PJ20), + GPIO_FN(CTX1_PJ23), + GPIO_FN(CRX1_PJ22), + GPIO_FN(CTX0_CTX1_PJ23), + GPIO_FN(CRX0_CRX1_PJ22), + GPIO_FN(CTX0_CTX1_CTX2_PJ21), + GPIO_FN(CRX0_CRX1_CRX2_PJ20), /* DMAC */ GPIO_FN(TEND0), From 541078d09772ca69b5276be60657713dcc5ad72f Mon Sep 17 00:00:00 2001 From: Jiewei Ke Date: Fri, 27 Dec 2019 19:36:13 +0800 Subject: [PATCH 3327/3715] RDMA/rxe: Fix error type of mmap_offset [ Upstream commit 6ca18d8927d468c763571f78c9a7387a69ffa020 ] The type of mmap_offset should be u64 instead of int to match the type of mminfo.offset. If otherwise, after we create several thousands of CQs, it will run into overflow issues. Link: https://lore.kernel.org/r/20191227113613.5020-1-kejiewei.cn@gmail.com Signed-off-by: Jiewei Ke Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index d1cc89f6f2e3..46c8a66731e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -408,7 +408,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; From a801998986776ba437493f53001dd3eeb98db899 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 3 Jan 2020 22:35:03 -0800 Subject: [PATCH 3328/3715] clk: sunxi-ng: add mux and pll notifiers for A64 CPU clock [ Upstream commit ec97faff743b398e21f74a54c81333f3390093aa ] The A64 PLL_CPU clock has the same instability if some factor changed without the PLL gated like other SoCs with sun6i-style CCU, e.g. A33, H3. Add the mux and pll notifiers for A64 CPU clock to workaround the problem. Fixes: c6a0637460c2 ("clk: sunxi-ng: Add A64 clocks") Signed-off-by: Icenowy Zheng Signed-off-by: Vasily Khoruzhick Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index eaafc038368f..183985c8c9ba 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -884,11 +884,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = { .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets), }; +static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + +static struct ccu_mux_nb sun50i_a64_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static int sun50i_a64_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; u32 val; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -902,7 +917,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev) writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG); - return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + if (ret) + return ret; + + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_a64_cpu_nb); + + return 0; } static const struct of_device_id sun50i_a64_ccu_ids[] = { From 7b50b1aaade963f9de1a7c7ba15f3ae0c87c4c04 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 4 Jan 2020 12:00:57 +0100 Subject: [PATCH 3329/3715] ALSA: sh: Fix unused variable warnings [ Upstream commit 5da116f164ce265e397b8f59af5c39e4a61d61a5 ] Remove unused variables that are left over after the conversion of new PCM ops: sound/sh/sh_dac_audio.c:166:26: warning: unused variable 'runtime' sound/sh/sh_dac_audio.c:186:26: warning: unused variable 'runtime' sound/sh/sh_dac_audio.c:205:26: warning: unused variable 'runtime' Fixes: 1cc2f8ba0b3e ("ALSA: sh: Convert to the new PCM ops") Link: https://lore.kernel.org/r/20200104110057.13875-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/sh/sh_dac_audio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 834b2574786f..6251b5e1b64a 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -190,7 +190,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; if (copy_from_user_toio(chip->data_buffer + pos, src, count)) return -EFAULT; @@ -210,7 +209,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memcpy_toio(chip->data_buffer + pos, src, count); chip->buffer_end = chip->data_buffer + pos + count; @@ -229,7 +227,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memset_io(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; From 5d5207cfadc08358d6bf764e54a9de9a77e015c6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 5 Jan 2020 15:48:23 +0100 Subject: [PATCH 3330/3715] ALSA: sh: Fix compile warning wrt const [ Upstream commit f1dd4795b1523fbca7ab4344dd5a8bb439cc770d ] A long-standing compile warning was seen during build test: sound/sh/aica.c: In function 'load_aica_firmware': sound/sh/aica.c:521:25: warning: passing argument 2 of 'spu_memload' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] Fixes: 198de43d758c ("[ALSA] Add ALSA support for the SEGA Dreamcast PCM device") Link: https://lore.kernel.org/r/20200105144823.29547-69-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/sh/aica.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/sh/aica.c b/sound/sh/aica.c index fdc680ae8aa0..d9acf551a898 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length) } /* spu_memload - write to SPU address space */ -static void spu_memload(u32 toi, void *from, int length) +static void spu_memload(u32 toi, const void *from, int length) { unsigned long flags; - u32 *froml = from; + const u32 *froml = from; u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi); int i; u32 val; From 29fc3c7b5bbc59468f760708ed7b62937c13094b Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Wed, 11 Dec 2019 08:01:09 +0000 Subject: [PATCH 3331/3715] tools lib api fs: Fix gcc9 stringop-truncation compilation error [ Upstream commit 6794200fa3c9c3e6759dae099145f23e4310f4f7 ] GCC9 introduced string hardening mechanisms, which exhibits the error during fs api compilation: error: '__builtin_strncpy' specified bound 4096 equals destination size [-Werror=stringop-truncation] This comes when the length of copy passed to strncpy is is equal to destination size, which could potentially lead to buffer overflow. There is a need to mitigate this potential issue by limiting the size of destination by 1 and explicitly terminate the destination with NULL. Signed-off-by: Andrey Zhizhikin Reviewed-by: Petr Mladek Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Kefeng Wang Cc: Martin KaFai Lau Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Song Liu Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20191211080109.18765-1-andrey.zhizhikin@leica-geosystems.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/api/fs/fs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index b24afc0e6e81..45b50b89009a 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } From 51d1d3f948ade28573a2dfddc3d39ac54c53e6e3 Mon Sep 17 00:00:00 2001 From: Dingchen Zhang Date: Mon, 10 Jun 2019 09:47:51 -0400 Subject: [PATCH 3332/3715] drm: remove the newline for CRC source name. [ Upstream commit 72a848f5c46bab4c921edc9cbffd1ab273b2be17 ] userspace may transfer a newline, and this terminating newline is replaced by a '\0' to avoid followup issues. 'len-1' is the index to replace the newline of CRC source name. v3: typo fix (Sam) v2: update patch subject, body and format. (Sam) Cc: Leo Li Cc: Harry Wentland Cc: Sam Ravnborg Signed-off-by: Dingchen Zhang Reviewed-by: Sam Ravnborg Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20190610134751.14356-1-dingchen.zhang@amd.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_debugfs_crc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index 2901b7944068..6858c80d2eb5 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -101,8 +101,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf, if (IS_ERR(source)) return PTR_ERR(source); - if (source[len] == '\n') - source[len] = '\0'; + if (source[len - 1] == '\n') + source[len - 1] = '\0'; spin_lock_irq(&crc->lock); From cfce607a4f91c1ef1d800b536551e32750e03fd6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 8 Jan 2020 18:24:16 -0700 Subject: [PATCH 3333/3715] usbip: Fix unsafe unaligned pointer usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 585c91f40d201bc564d4e76b83c05b3b5363fe7e ] Fix unsafe unaligned pointer usage in usbip network interfaces. usbip tool build fails with new gcc -Werror=address-of-packed-member checks. usbip_network.c: In function ‘usbip_net_pack_usb_device’: usbip_network.c:79:32: error: taking address of packed member of ‘struct usbip_usb_device’ may result in an unaligned pointer value [-Werror=address-of-packed-member] 79 | usbip_net_pack_uint32_t(pack, &udev->busnum); Fix with minor changes to pass by value instead of by address. Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20200109012416.2875-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- tools/usb/usbip/src/usbip_network.c | 40 +++++++++++++++++------------ tools/usb/usbip/src/usbip_network.h | 12 +++------ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c index b4c37e76a6e0..187dfaa67d0a 100644 --- a/tools/usb/usbip/src/usbip_network.c +++ b/tools/usb/usbip/src/usbip_network.c @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg) info("using port %d (\"%s\")", usbip_port, usbip_port_string); } -void usbip_net_pack_uint32_t(int pack, uint32_t *num) +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num) { uint32_t i; if (pack) - i = htonl(*num); + i = htonl(num); else - i = ntohl(*num); + i = ntohl(num); - *num = i; + return i; } -void usbip_net_pack_uint16_t(int pack, uint16_t *num) +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num) { uint16_t i; if (pack) - i = htons(*num); + i = htons(num); else - i = ntohs(*num); + i = ntohs(num); - *num = i; + return i; } void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev) { - usbip_net_pack_uint32_t(pack, &udev->busnum); - usbip_net_pack_uint32_t(pack, &udev->devnum); - usbip_net_pack_uint32_t(pack, &udev->speed); + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum); + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum); + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed); - usbip_net_pack_uint16_t(pack, &udev->idVendor); - usbip_net_pack_uint16_t(pack, &udev->idProduct); - usbip_net_pack_uint16_t(pack, &udev->bcdDevice); + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor); + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct); + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice); } void usbip_net_pack_usb_interface(int pack __attribute__((unused)), @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen) return usbip_net_xmit(sockfd, buff, bufflen, 1); } +static inline void usbip_net_pack_op_common(int pack, + struct op_common *op_common) +{ + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version); + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code); + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status); +} + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) { struct op_common op_common; @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) op_common.code = code; op_common.status = status; - PACK_OP_COMMON(1, &op_common); + usbip_net_pack_op_common(1, &op_common); rc = usbip_net_send(sockfd, &op_common, sizeof(op_common)); if (rc < 0) { @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code) goto err; } - PACK_OP_COMMON(0, &op_common); + usbip_net_pack_op_common(0, &op_common); if (op_common.version != USBIP_VERSION) { dbg("version mismatch: %d %d", op_common.version, diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h index 7032687621d3..8e8330c0f1c9 100644 --- a/tools/usb/usbip/src/usbip_network.h +++ b/tools/usb/usbip/src/usbip_network.h @@ -34,12 +34,6 @@ struct op_common { } __attribute__((packed)); -#define PACK_OP_COMMON(pack, op_common) do {\ - usbip_net_pack_uint16_t(pack, &(op_common)->version);\ - usbip_net_pack_uint16_t(pack, &(op_common)->code);\ - usbip_net_pack_uint32_t(pack, &(op_common)->status);\ -} while (0) - /* ---------------------------------------------------------------------- */ /* Dummy Code */ #define OP_UNSPEC 0x00 @@ -165,11 +159,11 @@ struct op_devlist_reply_extra { } while (0) #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\ - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\ + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\ } while (0) -void usbip_net_pack_uint32_t(int pack, uint32_t *num); -void usbip_net_pack_uint16_t(int pack, uint16_t *num); +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num); +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num); void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev); void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf); From b48c6936909d230b7818fffa553d91a060f2dfac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 7 Jan 2020 16:36:49 +0100 Subject: [PATCH 3334/3715] udf: Fix free space reporting for metadata and virtual partitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a4a8b99ec819ca60b49dc582a4287ef03411f117 ] Free space on filesystems with metadata or virtual partition maps currently gets misreported. This is because these partitions are just remapped onto underlying real partitions from which keep track of free blocks. Take this remapping into account when counting free blocks as well. Reviewed-by: Pali Rohár Reported-by: Pali Rohár Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/super.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 242d960df9a1..51de27685e18 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2467,17 +2467,29 @@ static unsigned int udf_count_free_table(struct super_block *sb, static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; + unsigned int part = sbi->s_partition; + int ptype = sbi->s_partmaps[part].s_partition_type; + + if (ptype == UDF_METADATA_MAP25) { + part = sbi->s_partmaps[part].s_type_specific.s_metadata. + s_phys_partition_ref; + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { + /* + * Filesystems with VAT are append-only and we cannot write to + * them. Let's just report 0 here. + */ + return 0; + } - sbi = UDF_SB(sb); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( - lvid->freeSpaceTable[sbi->s_partition]); + lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -2486,7 +2498,7 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - map = &sbi->s_partmaps[sbi->s_partition]; + map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); From b8005f7f3e07bc83e8052e6ae48fec69c8b6e4cc Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 6 Jan 2020 08:42:28 -0500 Subject: [PATCH 3335/3715] IB/hfi1: Add software counter for ctxt0 seq drop [ Upstream commit 5ffd048698ea5139743acd45e8ab388a683642b8 ] All other code paths increment some form of drop counter. This was missed in the original implementation. Fixes: 82c2611daaf0 ("staging/rdma/hfi1: Handle packets with invalid RHF on context 0") Link: https://lore.kernel.org/r/20200106134228.119356.96828.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/chip.c | 10 ++++++++++ drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 1 + drivers/infiniband/hw/hfi1/hfi.h | 2 ++ 4 files changed, 14 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a0b7c003477..cb5785dda524 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1686,6 +1686,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4246,6 +4254,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 50b8645d0b87..a88ef2433cea 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -864,6 +864,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 72c836b826ca..7aa1aabb7a43 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -710,6 +710,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 810ef5114772..cf9bc95d8039 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1043,6 +1043,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; From ac6fd7bc483484c408042672f213469ccc335f0d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 18 Dec 2019 21:23:03 +0300 Subject: [PATCH 3336/3715] soc/tegra: fuse: Correct straps' address for older Tegra124 device trees [ Upstream commit 2d9ea1934f8ef0dfb862d103389562cc28b4fc03 ] Trying to read out Chip ID before APBMISC registers are mapped won't succeed, in a result Tegra124 gets a wrong address for the HW straps register if machine uses an old outdated device tree. Fixes: 297c4f3dcbff ("soc/tegra: fuse: Restrict legacy code to 32-bit ARM") Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/soc/tegra/fuse/tegra-apbmisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index 5b18f6ffa45c..cd61c883c19f 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void) apbmisc.flags = IORESOURCE_MEM; /* strapping options */ - if (tegra_get_chip_id() == TEGRA124) { + if (of_machine_is_compatible("nvidia,tegra124")) { straps.start = 0x7000e864; straps.end = 0x7000e867; } else { From 012e9492ef1b3046b57ef2aff689aacbff1b0315 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:46 +0100 Subject: [PATCH 3337/3715] efi/x86: Don't panic or BUG() on non-critical error conditions [ Upstream commit e2d68a955e49d61fd0384f23e92058dc9b79be5e ] The logic in __efi_enter_virtual_mode() does a number of steps in sequence, all of which may fail in one way or the other. In most cases, we simply print an error and disable EFI runtime services support, but in some cases, we BUG() or panic() and bring down the system when encountering conditions that we could easily handle in the same way. While at it, replace a pointless page-to-virt-phys conversion with one that goes straight from struct page to physical. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-14-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi.c | 28 ++++++++++++++-------------- arch/x86/platform/efi/efi_64.c | 9 +++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5b0275310070..e7f19dec16b9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -930,16 +930,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -953,8 +951,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -962,12 +959,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -987,9 +983,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } /* @@ -1016,6 +1012,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ae369c2bbc3e..0ebb7f94fd51 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -390,11 +390,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); From eb89193c7aacbfbd77e43c952d06bfda30ba019a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Nov 2019 09:42:13 -0800 Subject: [PATCH 3338/3715] rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls [ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ] Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet # For KCSAN Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- include/linux/list_nulls.h | 8 ++++---- include/linux/rculist_nulls.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8d..1ecd35664e0d 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a10da545b3f6..cf64a9492256 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** From e623ca6cc396b115cc785a8feb9f4c9e222aea75 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 9 Jan 2020 17:03:21 -0800 Subject: [PATCH 3339/3715] Input: edt-ft5x06 - work around first register access error [ Upstream commit e112324cc0422c046f1cf54c56f333d34fa20885 ] The EP0700MLP1 returns bogus data on the first register read access (reading the threshold parameter from register 0x00): edt_ft5x06 2-0038: crc error: 0xfc expected, got 0x40 It ignores writes until then. This patch adds a dummy read after which the number of sensors and parameter read/writes work correctly. Signed-off-by: Philipp Zabel Signed-off-by: Marco Felsch Tested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/edt-ft5x06.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 5bf63f76ddda..4eff5b44640c 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -888,6 +888,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, { const struct edt_i2c_chip_data *chip_data; struct edt_ft5x06_ts_data *tsdata; + u8 buf[2] = { 0xfc, 0x00 }; struct input_dev *input; unsigned long irq_flags; int error; @@ -957,6 +958,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, return error; } + /* + * Dummy read access. EP0700MLP1 returns bogus data on the first + * register read access and ignores writes. + */ + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf); + edt_ft5x06_ts_set_regs(tsdata); edt_ft5x06_ts_get_defaults(&client->dev, tsdata); edt_ft5x06_ts_get_parameters(tsdata); From 4b08dd2bd613ba002d70f29ca4bece7dad01c60e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 12 Jan 2020 13:04:42 +0100 Subject: [PATCH 3340/3715] wan: ixp4xx_hss: fix compile-testing on 64-bit [ Upstream commit 504c28c853ec5c626900b914b5833daf0581a344 ] Change the driver to use portable integer types to avoid warnings during compile testing: drivers/net/wan/ixp4xx_hss.c:863:21: error: cast to 'u32 *' (aka 'unsigned int *') from smaller integer type 'int' [-Werror,-Wint-to-pointer-cast] memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); ^ drivers/net/wan/ixp4xx_hss.c:979:12: error: incompatible pointer types passing 'u32 *' (aka 'unsigned int *') to parameter of type 'dma_addr_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] &port->desc_tab_phys))) ^~~~~~~~~~~~~~~~~~~~ include/linux/dmapool.h:27:20: note: passing argument to parameter 'handle' here dma_addr_t *handle); ^ Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/ixp4xx_hss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 6a505c26a3e7..a269ed63d90f 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -261,7 +261,7 @@ struct port { struct hss_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ - u32 desc_tab_phys; + dma_addr_t desc_tab_phys; unsigned int id; unsigned int clock_type, clock_rate, loopback; unsigned int initialized, carrier; @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return NETDEV_TX_OK; } - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4); dev_kfree_skb(skb); #endif From ae34e82fec4df033bec710a285e8af5bdbeead74 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Mon, 13 Jan 2020 21:32:42 +0800 Subject: [PATCH 3341/3715] ASoC: atmel: fix build error with CONFIG_SND_ATMEL_SOC_DMA=m [ Upstream commit 8fea78029f5e6ed734ae1957bef23cfda1af4354 ] If CONFIG_SND_ATMEL_SOC_DMA=m, build error: sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_set_audio': (.text+0x7cd): undefined reference to `atmel_pcm_dma_platform_register' Function atmel_pcm_dma_platform_register is defined under CONFIG SND_ATMEL_SOC_DMA, so select SND_ATMEL_SOC_DMA in CONFIG SND_ATMEL_SOC_SSC, same to CONFIG_SND_ATMEL_SOC_PDC. Reported-by: Hulk Robot Signed-off-by: Chen Zhou Link: https://lore.kernel.org/r/20200113133242.144550-1-chenzhou10@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/atmel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..23887613b5c3 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA config SND_ATMEL_SOC_SSC_DMA tristate + select SND_ATMEL_SOC_DMA + select SND_ATMEL_SOC_PDC config SND_ATMEL_SOC_SSC tristate From b53fe3e57c93ee8a044c1296bbb57aed863b2bff Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 19:47:20 -0700 Subject: [PATCH 3342/3715] tty: synclinkmp: Adjust indentation in several functions [ Upstream commit 1feedf61e7265128244f6993f23421f33dd93dbc ] Clang warns: ../drivers/tty/synclinkmp.c:1456:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (C_CRTSCTS(tty)) { ^ ../drivers/tty/synclinkmp.c:1453:2: note: previous statement is here if (I_IXOFF(tty)) ^ ../drivers/tty/synclinkmp.c:2473:8: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->port.tty->hw_stopped = 0; ^ ../drivers/tty/synclinkmp.c:2471:7: note: previous statement is here if ( debug_level >= DEBUG_LEVEL_ISR ) ^ ../drivers/tty/synclinkmp.c:2482:8: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] info->port.tty->hw_stopped = 1; ^ ../drivers/tty/synclinkmp.c:2480:7: note: previous statement is here if ( debug_level >= DEBUG_LEVEL_ISR ) ^ ../drivers/tty/synclinkmp.c:2809:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) ^ ../drivers/tty/synclinkmp.c:2807:2: note: previous statement is here if (I_INPCK(info->port.tty)) ^ ../drivers/tty/synclinkmp.c:3246:3: warning: misleading indentation; statement is not part of the previous 'else' [-Wmisleading-indentation] set_signals(info); ^ ../drivers/tty/synclinkmp.c:3244:2: note: previous statement is here else ^ 5 warnings generated. The indentation on these lines is not at all consistent, tabs and spaces are mixed together. Convert to just using tabs to be consistent with the Linux kernel coding style and eliminate these warnings from clang. Link: https://github.com/ClangBuiltLinux/linux/issues/823 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218024720.3528-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/synclinkmp.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 4fed9e7b281f..3c9e314406b4 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty) if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty) send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2484,7 +2484,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (status & SerialSignal_CTS) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx start..."); - info->port.tty->hw_stopped = 0; + info->port.tty->hw_stopped = 0; tx_start(info); info->pending_bh |= BH_TRANSMIT; return; @@ -2493,7 +2493,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (!(status & SerialSignal_CTS)) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx stop..."); - info->port.tty->hw_stopped = 1; + info->port.tty->hw_stopped = 1; tx_stop(info); } } @@ -2820,8 +2820,8 @@ static void change_params(SLMP_INFO *info) info->read_status_mask2 = OVRN; if (I_INPCK(info->port.tty)) info->read_status_mask2 |= PE | FRME; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask1 |= BRKD; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask1 |= BRKD; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask2 |= PE | FRME; if (I_IGNBRK(info->port.tty)) { @@ -3191,7 +3191,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) | @@ -3229,7 +3229,7 @@ static int tiocmset(struct tty_struct *tty, info->serial_signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -3241,7 +3241,7 @@ static int carrier_raised(struct tty_port *port) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; @@ -3257,7 +3257,7 @@ static void dtr_rts(struct tty_port *port, int on) info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR; else info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } From 86663c7bce98475a43a1c64df48c67f599e89a88 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 19:39:13 -0700 Subject: [PATCH 3343/3715] tty: synclink_gt: Adjust indentation in several functions [ Upstream commit 446e76873b5e4e70bdee5db2f2a894d5b4a7d081 ] Clang warns: ../drivers/tty/synclink_gt.c:1337:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (C_CRTSCTS(tty)) { ^ ../drivers/tty/synclink_gt.c:1335:2: note: previous statement is here if (I_IXOFF(tty)) ^ ../drivers/tty/synclink_gt.c:2563:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) ^ ../drivers/tty/synclink_gt.c:2561:2: note: previous statement is here if (I_INPCK(info->port.tty)) ^ ../drivers/tty/synclink_gt.c:3221:3: warning: misleading indentation; statement is not part of the previous 'else' [-Wmisleading-indentation] set_signals(info); ^ ../drivers/tty/synclink_gt.c:3219:2: note: previous statement is here else ^ 3 warnings generated. The indentation on these lines is not at all consistent, tabs and spaces are mixed together. Convert to just using tabs to be consistent with the Linux kernel coding style and eliminate these warnings from clang. Link: https://github.com/ClangBuiltLinux/linux/issues/822 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20191218023912.13827-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/synclink_gt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 344e8c427c7e..9d68f89a2bf8 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty) DBGINFO(("%s throttle\n", info->device_name)); if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty) else send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2575,8 +2575,8 @@ static void change_params(struct slgt_info *info) info->read_status_mask = IRQ_RXOVER; if (I_INPCK(info->port.tty)) info->read_status_mask |= MASK_PARITY | MASK_FRAMING; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask |= MASK_BREAK; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask |= MASK_BREAK; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; if (I_IGNBRK(info->port.tty)) { @@ -3207,7 +3207,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3218,7 +3218,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3233,7 +3233,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } From 062cfcd86078bc85220aa77385f0317110ad3062 Mon Sep 17 00:00:00 2001 From: Simon Schwartz Date: Tue, 10 Dec 2019 17:41:37 -0500 Subject: [PATCH 3344/3715] driver core: platform: Prevent resouce overflow from causing infinite loops [ Upstream commit 39cc539f90d035a293240c9443af50be55ee81b8 ] num_resources in the platform_device struct is declared as a u32. The for loops that iterate over num_resources use an int as the counter, which can cause infinite loops on architectures with smaller ints. Change the loop counters to u32. Signed-off-by: Simon Schwartz Link: https://lore.kernel.org/r/2201ce63a2a171ffd2ed14e867875316efcf71db.camel@theschwartz.xyz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/platform.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f1105de0d9fe..e3d40c41c33b 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev) struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -163,7 +164,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -360,7 +361,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties); */ int platform_device_add(struct platform_device *pdev) { - int i, ret; + u32 i; + int ret; if (!pdev) return -EINVAL; @@ -447,7 +449,7 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + u32 i; if (pdev) { device_remove_properties(&pdev->dev); From 6439e61a69d12624e82ded29ac2693d768261412 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 6 Dec 2019 14:22:19 +0100 Subject: [PATCH 3345/3715] driver core: Print device when resources present in really_probe() [ Upstream commit 7c35e699c88bd60734277b26962783c60e04b494 ] If a device already has devres items attached before probing, a warning backtrace is printed. However, this backtrace does not reveal the offending device, leaving the user uninformed. Furthermore, using WARN_ON() causes systems with panic-on-warn to reboot. Fix this by replacing the WARN_ON() by a dev_crit() message. Abort probing the device, to prevent doing more damage to the device's resources. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191206132219.28908-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/dd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 536c9ac3b848..aa1a2d32360f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -375,7 +375,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); - WARN_ON(!list_empty(&dev->devres_head)); + if (!list_empty(&dev->devres_head)) { + dev_crit(dev, "Resources present before probing\n"); + return -EBUSY; + } re_probe: dev->driver = drv; From 8f8d6aebe2d84c54e143c490b56a60f7e6832fe3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 21:05:43 +0100 Subject: [PATCH 3346/3715] vme: bridges: reduce stack usage [ Upstream commit 7483e7a939c074d887450ef1c4d9ccc5909405f8 ] With CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3, the stack usage in vme_fake grows above the warning limit: drivers/vme/bridges/vme_fake.c: In function 'fake_master_read': drivers/vme/bridges/vme_fake.c:610:1: error: the frame size of 1160 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] drivers/vme/bridges/vme_fake.c: In function 'fake_master_write': drivers/vme/bridges/vme_fake.c:797:1: error: the frame size of 1160 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] The problem is that in some configurations, each call to fake_vmereadX() puts another variable on the stack. Reduce the amount of inlining to get back to the previous state, with no function using more than 200 bytes each. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200107200610.3482901-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/vme/bridges/vme_fake.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c index 30b3acc93833..e81ec763b555 100644 --- a/drivers/vme/bridges/vme_fake.c +++ b/drivers/vme/bridges/vme_fake.c @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, } } -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u8 retval = 0xff; int i; @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u16 retval = 0xffff; int i; @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u32 retval = 0xffffffff; int i; @@ -613,8 +616,9 @@ out: return retval; } -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge, + u8 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, } -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge, + u16 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, } -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge, + u32 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; From f964992fdb8710beb3de984ec8c6a8e7670e3a9e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Jan 2020 08:46:01 +0300 Subject: [PATCH 3347/3715] drm/nouveau/secboot/gm20b: initialize pointer in gm20b_secboot_new() [ Upstream commit 3613a9bea95a1470dd42e4ed1cc7d86ebe0a2dc0 ] We accidentally set "psb" which is a no-op instead of "*psb" so it generates a static checker warning. We should probably set it before the first error return so that it's always initialized. Fixes: 923f1bd27bf1 ("drm/nouveau/secboot/gm20b: add secure boot support") Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index 30491d132d59..fbd10a67c6c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; + *psb = NULL; acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) @@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index, acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); - if (!gsb) { - psb = NULL; + if (!gsb) return -ENOMEM; - } *psb = &gsb->base; ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base); From fefca6abc571a5dcb7704a70e87896584d06830d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 9 Jan 2020 11:46:15 +1000 Subject: [PATCH 3348/3715] drm/nouveau/gr/gk20a,gm200-: add terminators to method lists read from fw [ Upstream commit 7adc77aa0e11f25b0e762859219c70852cd8d56f ] Method init is typically ordered by class in the FW image as ThreeD, TwoD, Compute. Due to a bug in parsing the FW into our internal format, we've been accidentally sending Twod + Compute methods to the ThreeD class, as well as Compute methods to the TwoD class - oops. Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- .../gpu/drm/nouveau/nvkm/engine/gr/gk20a.c | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd..7618b2eb4fdf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, nent = (fuc.size / sizeof(struct gk20a_fw_av)); - pack = vzalloc((sizeof(*pack) * max_classes) + - (sizeof(*init) * (nent + 1))); + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + + (sizeof(*init) * (nent + max_classes + 1))); if (!pack) { ret = -ENOMEM; goto end; } - init = (void *)(pack + max_classes); + init = (void *)(pack + max_classes + 1); - for (i = 0; i < nent; i++) { - struct gf100_gr_init *ent = &init[i]; + for (i = 0; i < nent; i++, init++) { struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; if (prevclass != class) { - pack[classidx].init = ent; + if (prevclass) /* Add terminator to the method list. */ + init++; + pack[classidx].init = init; pack[classidx].type = class; prevclass = class; if (++classidx >= max_classes) { @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, } } - ent->addr = addr; - ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + init->addr = addr; + init->data = av->data; + init->count = 1; + init->pitch = 1; } *ppack = pack; From bf24b7d69aead3fbe5131204a78a06787e7a2f0b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 10 Jan 2020 14:32:01 +0800 Subject: [PATCH 3349/3715] drm/nouveau: Fix copy-paste error in nouveau_fence_wait_uevent_handler [ Upstream commit 1eb013473bff5f95b6fe1ca4dd7deda47257b9c2 ] Like other cases, it should use rcu protected 'chan' rather than 'fence->channel' in nouveau_fence_wait_uevent_handler. Fixes: 0ec5f02f0e2c ("drm/nouveau: prevent stale fence->channel pointers, and protect with rcu") Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4..72532539369f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify) fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(fence->channel, fctx)) + if (nouveau_fence_update(chan, fctx)) ret = NVIF_NOTIFY_DROP; } spin_unlock_irqrestore(&fctx->lock, flags); From e7f9d07dcc424b132f47dbc835c090ff7c4449ff Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 23:37:58 -0500 Subject: [PATCH 3350/3715] drm/vmwgfx: prevent memory leak in vmw_cmdbuf_res_add [ Upstream commit 40efb09a7f53125719e49864da008495e39aaa1e ] In vmw_cmdbuf_res_add if drm_ht_insert_item fails the allocated memory for cres should be released. Fixes: 18e4a4669c50 ("drm/vmwgfx: Fix compat shader namespace") Signed-off-by: Navid Emamdoost Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 36c7b6c839c0..738ad2fc79a2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); From 66c863f64b7eb4d0e8d958e4013ab68e420925c0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 15 Jan 2020 07:25:26 -0600 Subject: [PATCH 3351/3715] usb: musb: omap2430: Get rid of musb .set_vbus for omap2430 glue [ Upstream commit 91b6dec32e5c25fbdbb564d1e5af23764ec17ef1 ] We currently have musb_set_vbus() called from two different paths. Mostly it gets called from the USB PHY via omap_musb_set_mailbox(), but in some cases it can get also called from musb_stage0_irq() rather via .set_vbus: (musb_set_host [musb_hdrc]) (omap2430_musb_set_vbus [omap2430]) (musb_stage0_irq [musb_hdrc]) (musb_interrupt [musb_hdrc]) (omap2430_musb_interrupt [omap2430]) This is racy and will not work with introducing generic helper functions for musb_set_host() and musb_set_peripheral(). We want to get rid of the busy loops in favor of usleep_range(). Let's just get rid of .set_vbus for omap2430 glue layer and let the PHY code handle VBUS with musb_set_vbus(). Note that in the follow-up patch we can completely remove omap2430_musb_set_vbus(), but let's do it in a separate patch as this change may actually turn out to be needed as a fix. Reported-by: Pavel Machek Acked-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Link: https://lore.kernel.org/r/20200115132547.364-5-b-liu@ti.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/musb/omap2430.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 456f3e6ecf03..26e69c2766f5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_vbus = omap2430_musb_set_vbus, - .enable = omap2430_musb_enable, .disable = omap2430_musb_disable, From af9f7ff24e89689390a79b38df195b3909f42939 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Jan 2020 15:21:47 +0000 Subject: [PATCH 3352/3715] iommu/arm-smmu-v3: Use WRITE_ONCE() when changing validity of an STE [ Upstream commit d71e01716b3606a6648df7e5646ae12c75babde4 ] If, for some bizarre reason, the compiler decided to split up the write of STE DWORD 0, we could end up making a partial structure valid. Although this probably won't happen, follow the example of the context-descriptor code and use WRITE_ONCE() to ensure atomicity of the write. Reported-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm-smmu-v3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 09eb258a9a7d..29feafa8007f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1145,7 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ From 7d36bf199518818956c4369e16337ea0aa3c423a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Dec 2019 18:32:16 -0800 Subject: [PATCH 3353/3715] f2fs: free sysfs kobject [ Upstream commit 820d366736c949ffe698d3b3fe1266a91da1766d ] Detected kmemleak. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 93af9d7dfcdc..79e45e760c20 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -557,4 +557,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } From 192f211d6f8460d6a8c15316ace784d61f0339a8 Mon Sep 17 00:00:00 2001 From: Nick Black Date: Thu, 26 Dec 2019 15:31:48 -0500 Subject: [PATCH 3354/3715] scsi: iscsi: Don't destroy session if there are outstanding connections [ Upstream commit 54155ed4199c7aa3fd20866648024ab63c96d579 ] A faulty userspace that calls destroy_session() before destroying the connections can trigger the failure. This patch prevents the issue by refusing to destroy the session if there are outstanding connections. ------------[ cut here ]------------ kernel BUG at mm/slub.c:306! invalid opcode: 0000 [#1] SMP PTI CPU: 1 PID: 1224 Comm: iscsid Not tainted 5.4.0-rc2.iscsi+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:__slab_free+0x181/0x350 [...] [ 1209.686056] RSP: 0018:ffffa93d4074fae0 EFLAGS: 00010246 [ 1209.686694] RAX: ffff934efa5ad800 RBX: 000000008010000a RCX: ffff934efa5ad800 [ 1209.687651] RDX: ffff934efa5ad800 RSI: ffffeb4041e96b00 RDI: ffff934efd402c40 [ 1209.688582] RBP: ffffa93d4074fb80 R08: 0000000000000001 R09: ffffffffbb5dfa26 [ 1209.689425] R10: ffff934efa5ad800 R11: 0000000000000001 R12: ffffeb4041e96b00 [ 1209.690285] R13: ffff934efa5ad800 R14: ffff934efd402c40 R15: 0000000000000000 [ 1209.691213] FS: 00007f7945dfb540(0000) GS:ffff934efda80000(0000) knlGS:0000000000000000 [ 1209.692316] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1209.693013] CR2: 000055877fd3da80 CR3: 0000000077384000 CR4: 00000000000006e0 [ 1209.693897] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1209.694773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1209.695631] Call Trace: [ 1209.695957] ? __wake_up_common_lock+0x8a/0xc0 [ 1209.696712] iscsi_pool_free+0x26/0x40 [ 1209.697263] iscsi_session_teardown+0x2f/0xf0 [ 1209.698117] iscsi_sw_tcp_session_destroy+0x45/0x60 [ 1209.698831] iscsi_if_rx+0xd88/0x14e0 [ 1209.699370] netlink_unicast+0x16f/0x200 [ 1209.699932] netlink_sendmsg+0x21a/0x3e0 [ 1209.700446] sock_sendmsg+0x4f/0x60 [ 1209.700902] ___sys_sendmsg+0x2ae/0x320 [ 1209.701451] ? cp_new_stat+0x150/0x180 [ 1209.701922] __sys_sendmsg+0x59/0xa0 [ 1209.702357] do_syscall_64+0x52/0x160 [ 1209.702812] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1209.703419] RIP: 0033:0x7f7946433914 [...] [ 1209.706084] RSP: 002b:00007fffb99f2378 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 1209.706994] RAX: ffffffffffffffda RBX: 000055bc869eac20 RCX: 00007f7946433914 [ 1209.708082] RDX: 0000000000000000 RSI: 00007fffb99f2390 RDI: 0000000000000005 [ 1209.709120] RBP: 00007fffb99f2390 R08: 000055bc84fe9320 R09: 00007fffb99f1f07 [ 1209.710110] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000038 [ 1209.711085] R13: 000055bc8502306e R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace a2d933ede7f730d8 ]--- Link: https://lore.kernel.org/r/20191226203148.2172200-1-krisman@collabora.com Signed-off-by: Nick Black Co-developed-by: Salman Qazi Signed-off-by: Salman Qazi Co-developed-by: Junho Ryu Signed-off-by: Junho Ryu Co-developed-by: Khazhismel Kumykov Signed-off-by: Khazhismel Kumykov Co-developed-by: Gabriel Krisman Bertazi Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/iscsi_tcp.c | 4 ++++ drivers/scsi/scsi_transport_iscsi.c | 26 +++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 7e3a77d3c6f0..e3ca16043f9a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -890,6 +890,10 @@ free_host: static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = cls_session->dd_data; + + if (WARN_ON_ONCE(session->leadconn)) + return; iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 95d71e301a53..aecb563a2b4e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2945,6 +2945,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_session_has_conns(int sid) +{ + struct iscsi_cls_conn *conn; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + if (iscsi_conn_get_sid(conn) == sid) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&connlock, flags); + + return found; +} + static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) @@ -3522,10 +3540,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); - if (session) - transport->destroy_session(session); - else + if (!session) err = -EINVAL; + else if (iscsi_session_has_conns(ev->u.d_session.sid)) + err = -EBUSY; + else + transport->destroy_session(session); break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); From 556b80a2b57acc5289367436251ae72193746021 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 31 Oct 2019 12:46:52 -0700 Subject: [PATCH 3355/3715] arm64: fix alternatives with LLVM's integrated assembler [ Upstream commit c54f90c2627cc316d365e3073614731e17dbc631 ] LLVM's integrated assembler fails with the following error when building KVM: :12:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :21:6: error: expected absolute expression .if kvm_update_va_mask == 0 ^ :24:2: error: unrecognized instruction mnemonic NOT_AN_INSTRUCTION ^ LLVM ERROR: Error parsing inline asm These errors come from ALTERNATIVE_CB and __ALTERNATIVE_CFG, which test for the existence of the callback parameter in inline assembly using the following expression: " .if " __stringify(cb) " == 0\n" This works with GNU as, but isn't supported by LLVM. This change splits __ALTERNATIVE_CFG and ALTINSTR_ENTRY into separate macros to fix the LLVM build. Link: https://github.com/ClangBuiltLinux/linux/issues/472 Signed-off-by: Sami Tolvanen Tested-by: Nick Desaulniers Reviewed-by: Kees Cook Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/alternative.h | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a91933b1e2e6..4cd4a793dc32 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -30,13 +30,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -57,15 +60,14 @@ void apply_alternatives(void *start, size_t length); * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -73,17 +75,25 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include From 434f42546a6eda220f53b489a377a34f35f542d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jan 2020 19:17:02 +0100 Subject: [PATCH 3356/3715] watchdog/softlockup: Enforce that timestamp is valid on boot [ Upstream commit 11e31f608b499f044f24b20be73f1dcab3e43f8a ] Robert reported that during boot the watchdog timestamp is set to 0 for one second which is the indicator for a watchdog reset. The reason for this is that the timestamp is in seconds and the time is taken from sched clock and divided by ~1e9. sched clock starts at 0 which means that for the first second during boot the watchdog timestamp is 0, i.e. reset. Use ULONG_MAX as the reset indicator value so the watchdog works correctly right from the start. ULONG_MAX would only conflict with a real timestamp if the system reaches an uptime of 136 years on 32bit and almost eternity on 64bit. Reported-by: Robert Richter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87o8v3uuzl.fsf@nanos.tec.linutronix.de Signed-off-by: Sasha Levin --- kernel/watchdog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 087994b23f8b..e4db5d54c07c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -164,6 +164,8 @@ static void lockup_detector_update_enable(void) #ifdef CONFIG_SOFTLOCKUP_DETECTOR +#define SOFTLOCKUP_RESET ULONG_MAX + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -271,7 +273,7 @@ notrace void touch_softlockup_watchdog_sched(void) * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ - raw_cpu_write(watchdog_touch_ts, 0); + raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) @@ -295,14 +297,14 @@ void touch_all_softlockup_watchdogs(void) * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) - per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); - __this_cpu_write(watchdog_touch_ts, 0); + __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) @@ -354,7 +356,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - if (touch_ts == 0) { + if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically From 981bc763712ca0a42ec7323d974d07cea0ee6eff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2019 17:41:41 +0800 Subject: [PATCH 3357/3715] f2fs: fix memleak of kobject [ Upstream commit fe396ad8e7526f059f7b8c7290d33a1b84adacab ] If kobject_init_and_add() failed, caller needs to invoke kobject_put() to release kobject explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/sysfs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 79e45e760c20..a55919eec035 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -507,10 +507,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -531,8 +533,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); From 9e09e071f7e0e75546071c5e1c1392641b8ab900 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:36 +0100 Subject: [PATCH 3358/3715] x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd [ Upstream commit 75fbef0a8b6b4bb19b9a91b5214f846c2dc5139e ] The following commit: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd()") modified kernel_map_pages_in_pgd() to manage writable permissions of memory mappings in the EFI page table in a different way, but in the process, it removed the ability to clear NX attributes from read-only mappings, by clobbering the clear mask if _PAGE_RW is not being requested. Failure to remove the NX attribute from read-only mappings is unlikely to be a security issue, but it does prevent us from tightening the permissions in the EFI page tables going forward, so let's fix it now. Fixes: 15f003d20782 ("x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-5-ardb@kernel.org Signed-off-by: Sasha Levin --- arch/x86/mm/pageattr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 835620ab435f..eaee1a7ed0b5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2077,19 +2077,13 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); From 01289476f55d716dc705a50e5fd6296c73342912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 11 Nov 2019 10:03:54 +0100 Subject: [PATCH 3359/3715] pwm: omap-dmtimer: Remove PWM chip in .remove before making it unfunctional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 43efdc8f0e6d7088ec61bd55a73bf853f002d043 ] In the old code (e.g.) mutex_destroy() was called before pwmchip_remove(). Between these two calls it is possible that a PWM callback is used which tries to grab the mutex. Fixes: 6604c6556db9 ("pwm: Add PWM driver for OMAP using dual-mode timers") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-omap-dmtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 5ad42f33e70c..2e15acf13893 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&omap->chip); + if (ret) + return ret; if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev) mutex_destroy(&omap->mutex); - return pwmchip_remove(&omap->chip); + return 0; } static const struct of_device_id pwm_omap_dmtimer_of_match[] = { From 8f80d62a385ad6e44c3702f489bb3f7ace779f30 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Jan 2020 16:04:41 +0300 Subject: [PATCH 3360/3715] cmd64x: potential buffer overflow in cmd64x_program_timings() [ Upstream commit 117fcc3053606d8db5cef8821dca15022ae578bb ] The "drive->dn" value is a u8 and it is controlled by root only, but it could be out of bounds here so let's check. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/cmd64x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index b127ed60c733..9dde8390da09 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) struct ide_timing t; u8 arttim = 0; + if (drive->dn >= ARRAY_SIZE(drwtim_regs)) + return; + ide_timing_compute(drive, mode, &t, T, 0); /* From 3c353d76495fced50a0ab75ed3720353013a211e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Jan 2020 16:06:07 +0300 Subject: [PATCH 3361/3715] ide: serverworks: potential overflow in svwks_set_pio_mode() [ Upstream commit ce1f31b4c0b9551dd51874dd5364654ed4ca13ae ] The "drive->dn" variable is a u8 controlled by root. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/serverworks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index a97affca18ab..0f57d45484d1 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) struct pci_dev *dev = to_pci_dev(hwif->dev); const u8 pio = drive->pio_mode - XFER_PIO_0; + if (drive->dn >= ARRAY_SIZE(drive_pci)) + return; + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); if (svwks_csb_check(dev)) { @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; + if (drive->dn >= ARRAY_SIZE(drive_pci2)) + return; + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); pci_read_config_byte(dev, 0x54, &ultra_enable); From 8e1b70446107558ebfbae42761da003fed6f6793 Mon Sep 17 00:00:00 2001 From: yu kuai Date: Mon, 20 Jan 2020 19:51:43 +0800 Subject: [PATCH 3362/3715] pwm: Remove set but not set variable 'pwm' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9871abffc81048e20f02e15d6aa4558a44ad53ea ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/pwm/pwm-pca9685.c: In function ‘pca9685_pwm_gpio_free’: drivers/pwm/pwm-pca9685.c:162:21: warning: variable ‘pwm’ set but not used [-Wunused-but-set-variable] It is never used, and so can be removed. In that case, hold and release the lock 'pca->lock' can be removed since nothing will be done between them. Fixes: e926b12c611c ("pwm: Clear chip_data in pwm_put()") Signed-off-by: yu kuai Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-pca9685.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 567f5e2771c4..e1e5dfcb16f3 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -170,13 +170,9 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset, static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); - mutex_lock(&pca->lock); - pwm = &pca->chip.pwms[offset]; - mutex_unlock(&pca->lock); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, From ad915b3d80144a437dd3c08491a72e0bb4154a2a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 5 Dec 2019 14:19:57 +0100 Subject: [PATCH 3363/3715] btrfs: fix possible NULL-pointer dereference in integrity checks [ Upstream commit 3dbd351df42109902fbcebf27104149226a4fcd9 ] A user reports a possible NULL-pointer dereference in btrfsic_process_superblock(). We are assigning state->fs_info to a local fs_info variable and afterwards checking for the presence of state. While we would BUG_ON() a NULL state anyways, we can also just remove the local fs_info copy, as fs_info is only used once as the first argument for btrfs_num_copies(). There we can just pass in state->fs_info as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=205003 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/check-integrity.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d5a9b51f0d7..4be07cf31d74 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -642,7 +642,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -713,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", From 76d4e6aeac0b7cb830f72780e89b96ba3052118f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Jan 2020 15:29:53 +0100 Subject: [PATCH 3364/3715] btrfs: safely advance counter when looking up bio csums [ Upstream commit 4babad10198fa73fe73239d02c2e99e3333f5f5c ] Dan's smatch tool reports fs/btrfs/file-item.c:295 btrfs_lookup_bio_sums() warn: should this be 'count == -1' which points to the while (count--) loop. With count == 0 the check itself could decrement it to -1. There's a WARN_ON a few lines below that has never been seen in practice though. It turns out that the value of page_bytes_left matches the count (by sectorsize multiples). The loop never reaches the state where count would go to -1, because page_bytes_left == 0 is found first and this breaks out. For clarity, use only plain check on count (and only for positive value), decrement safely inside the loop. Any other discrepancy after the whole bio list processing should be reported by the exising WARN_ON_ONCE as well. Reported-by: Dan Carpenter Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/file-item.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 702b3606ad0e..717d82d51bb1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -288,7 +288,8 @@ found: csum += count * csum_size; nblocks -= count; next: - while (count--) { + while (count > 0) { + count--; disk_bytenr += fs_info->sectorsize; offset += fs_info->sectorsize; page_bytes_left -= fs_info->sectorsize; From 36bd3298141ab084e6d68f6f54d8eefd80381fb0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 10 Jan 2020 12:26:34 +0800 Subject: [PATCH 3365/3715] btrfs: device stats, log when stats are zeroed [ Upstream commit a69976bc69308aa475d0ba3b8b3efd1d013c0460 ] We had a report indicating that some read errors aren't reported by the device stats in the userland. It is important to have the errors reported in the device stat as user land scripts might depend on it to take the reasonable corrective actions. But to debug these issue we need to be really sure that request to reset the device stat did not come from the userland itself. So log an info message when device error reset happens. For example: BTRFS info (device sdc): device stats zeroed by btrfs(9223) Reported-by: philip@philip-seeger.de Link: https://www.spinics.net/lists/linux-btrfs/msg96528.html Reviewed-by: Josef Bacik Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 358e930df4ac..6d34842912e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7227,6 +7227,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_reset(dev, i); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) From e8e2344aa493382890e2f9fd3bff343b7116aaa2 Mon Sep 17 00:00:00 2001 From: Brandon Maier Date: Thu, 30 May 2019 17:52:23 -0500 Subject: [PATCH 3366/3715] remoteproc: Initialize rproc_class before use [ Upstream commit a8f40111d184098cd2b3dc0c7170c42250a5fa09 ] The remoteproc_core and remoteproc drivers all initialize with module_init(). However remoteproc drivers need the rproc_class during their probe. If one of the remoteproc drivers runs init and gets through probe before remoteproc_init() runs, a NULL pointer access of rproc_class's `glue_dirs` spinlock occurs. > Unable to handle kernel NULL pointer dereference at virtual address 000000dc > pgd = c0004000 > [000000dc] *pgd=00000000 > Internal error: Oops: 5 [#1] PREEMPT ARM > Modules linked in: > CPU: 0 PID: 1 Comm: swapper Tainted: G W 4.14.106-rt56 #1 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > task: c6050000 task.stack: c604a000 > PC is at rt_spin_lock+0x40/0x6c > LR is at rt_spin_lock+0x28/0x6c > pc : [] lr : [] psr: 60000013 > sp : c604bdc0 ip : 00000000 fp : 00000000 > r10: 00000000 r9 : c61c7c10 r8 : c6269c20 > r7 : c0905888 r6 : c6269c20 r5 : 00000000 r4 : 000000d4 > r3 : 000000dc r2 : c6050000 r1 : 00000002 r0 : 000000d4 > Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none ... > [] (rt_spin_lock) from [] (get_device_parent+0x54/0x17c) > [] (get_device_parent) from [] (device_add+0xe0/0x5b4) > [] (device_add) from [] (rproc_add+0x18/0xd8) > [] (rproc_add) from [] (my_rproc_probe+0x158/0x204) > [] (my_rproc_probe) from [] (platform_drv_probe+0x34/0x70) > [] (platform_drv_probe) from [] (driver_probe_device+0x2c8/0x420) > [] (driver_probe_device) from [] (__driver_attach+0x100/0x11c) > [] (__driver_attach) from [] (bus_for_each_dev+0x7c/0xc0) > [] (bus_for_each_dev) from [] (bus_add_driver+0x1cc/0x264) > [] (bus_add_driver) from [] (driver_register+0x78/0xf8) > [] (driver_register) from [] (do_one_initcall+0x100/0x190) > [] (do_one_initcall) from [] (kernel_init_freeable+0x130/0x1d0) > [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) > [] (kernel_init) from [] (ret_from_fork+0x14/0x24) > Code: e2843008 e3c2203f f5d3f000 e5922010 (e193cf9f) > ---[ end trace 0000000000000002 ]--- Signed-off-by: Brandon Maier Link: https://lore.kernel.org/r/20190530225223.136420-1-brandon.maier@rockwellcollins.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index eab14b414bf0..cc733b89560a 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1620,7 +1620,7 @@ static int __init remoteproc_init(void) return 0; } -module_init(remoteproc_init); +subsys_initcall(remoteproc_init); static void __exit remoteproc_exit(void) { From fb065f7bc0d378dda1b5e98d8afb22ccfc07e0aa Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 17 Jan 2020 01:38:43 +0800 Subject: [PATCH 3367/3715] irqchip/mbigen: Set driver .suppress_bind_attrs to avoid remove problems [ Upstream commit d6152e6ec9e2171280436f7b31a571509b9287e1 ] The following crash can be seen for setting CONFIG_DEBUG_TEST_DRIVER_REMOVE=y for DT FW (which some people still use): Hisilicon MBIGEN-V2 60080000.interrupt-controller: Failed to create mbi-gen irqdomain Hisilicon MBIGEN-V2: probe of 60080000.interrupt-controller failed with error -12 [...] Unable to handle kernel paging request at virtual address 0000000000005008 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000041fb9990000 [0000000000005008] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 7 PID: 1 Comm: swapper/0 Not tainted 5.5.0-rc6-00002-g3fc42638a506-dirty #1622 Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT21 Nemo 2.0 RC0 04/18/2018 pstate: 40000085 (nZcv daIf -PAN -UAO) pc : mbigen_set_type+0x38/0x60 lr : __irq_set_trigger+0x6c/0x188 sp : ffff800014b4b400 x29: ffff800014b4b400 x28: 0000000000000007 x27: 0000000000000000 x26: 0000000000000000 x25: ffff041fd83bd0d4 x24: ffff041fd83bd188 x23: 0000000000000000 x22: ffff80001193ce00 x21: 0000000000000004 x20: 0000000000000000 x19: ffff041fd83bd000 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000119098c8 x14: ffff041fb94ec91c x13: ffff041fb94ec1a1 x12: 0000000000000030 x11: 0101010101010101 x10: 0000000000000040 x9 : 0000000000000000 x8 : ffff041fb98c6680 x7 : ffff800014b4b380 x6 : ffff041fd81636c8 x5 : 0000000000000000 x4 : 000000000000025f x3 : 0000000000005000 x2 : 0000000000005008 x1 : 0000000000000004 x0 : 0000000080000000 Call trace: mbigen_set_type+0x38/0x60 __setup_irq+0x744/0x900 request_threaded_irq+0xe0/0x198 pcie_pme_probe+0x98/0x118 pcie_port_probe_service+0x38/0x78 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xb0 bus_for_each_drv+0x64/0xc8 __device_attach+0xd8/0x138 device_initial_probe+0x10/0x18 bus_probe_device+0x90/0x98 device_add+0x4c4/0x770 device_register+0x1c/0x28 pcie_port_device_register+0x1e4/0x4f0 pcie_portdrv_probe+0x34/0xd8 local_pci_probe+0x3c/0xa0 pci_device_probe+0x128/0x1c0 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xb0 bus_for_each_drv+0x64/0xc8 __device_attach+0xd8/0x138 device_attach+0x10/0x18 pci_bus_add_device+0x4c/0xb8 pci_bus_add_devices+0x38/0x88 pci_host_probe+0x3c/0xc0 pci_host_common_probe+0xf0/0x208 hisi_pcie_almost_ecam_probe+0x24/0x30 platform_drv_probe+0x50/0xa0 really_probe+0xa0/0x3e0 driver_probe_device+0x58/0x100 device_driver_attach+0x6c/0x90 __driver_attach+0x84/0xc8 bus_for_each_dev+0x74/0xc8 driver_attach+0x20/0x28 bus_add_driver+0x148/0x1f0 driver_register+0x60/0x110 __platform_driver_register+0x40/0x48 hisi_pcie_almost_ecam_driver_init+0x1c/0x24 The specific problem here is that the mbigen driver real probe has failed as the mbigen_of_create_domain()->of_platform_device_create() call fails, the reason for that being that we never destroyed the platform device created during the remove test dry run and there is some conflict. Since we generally would never want to unbind this driver, and to save adding a driver tear down path for that, just set the driver .suppress_bind_attrs member to avoid this possibility. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/1579196323-180137-1-git-send-email-john.garry@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mbigen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 98b6e1d4b1a6..f7fdbf5d183b 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -381,6 +381,7 @@ static struct platform_driver mbigen_platform_driver = { .name = "Hisilicon MBIGEN-V2", .of_match_table = mbigen_of_match, .acpi_match_table = ACPI_PTR(mbigen_acpi_match), + .suppress_bind_attrs = true, }, .probe = mbigen_device_probe, }; From 02b675041fcd930fd7d95b4e7bb68ddea4b21d9e Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 20 Jan 2020 18:01:17 +0200 Subject: [PATCH 3368/3715] ALSA: hda/hdmi - add retry logic to parse_intel_hdmi() [ Upstream commit 2928fa0a97ebb9549cb877fdc99aed9b95438c3a ] The initial snd_hda_get_sub_node() can fail on certain devices (e.g. some Chromebook models using Intel GLK). The failure rate is very low, but as this is is part of the probe process, end-user impact is high. In observed cases, related hardware status registers have expected values, but the node query still fails. Retrying the node query does seem to help, so fix the problem by adding retry logic to the query. This does not impact non-Intel platforms. BugLink: https://github.com/thesofproject/linux/issues/1642 Signed-off-by: Kai Vehmanen Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200120160117.29130-4-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_hdmi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index f21405597215..12913368c231 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2574,9 +2574,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec) /* parse and post-process for Intel codecs */ static int parse_intel_hdmi(struct hda_codec *codec) { - int err; + int err, retries = 3; + + do { + err = hdmi_parse_codec(codec); + } while (err < 0 && retries--); - err = hdmi_parse_codec(codec); if (err < 0) { generic_spec_free(codec); return err; From aca257592d0b571f86b08f0336229e0d8cc41bbc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 Jan 2020 13:11:54 +0900 Subject: [PATCH 3369/3715] x86/decoder: Add TEST opcode to Group3-2 [ Upstream commit 8b7e20a7ba54836076ff35a28349dabea4cec48f ] Add TEST opcode to Group3-2 reg=001b as same as Group3-1 does. Commit 12a78d43de76 ("x86/decoder: Add new TEST instruction pattern") added a TEST opcode assignment to f6 XX/001/XXX (Group 3-1), but did not add f7 XX/001/XXX (Group 3-2). Actually, this TEST opcode variant (ModRM.reg /1) is not described in the Intel SDM Vol2 but in AMD64 Architecture Programmer's Manual Vol.3, Appendix A.2 Table A-6. ModRM.reg Extensions for the Primary Opcode Map. Without this fix, Randy found a warning by insn_decoder_test related to this issue as below. HOSTCC arch/x86/tools/insn_decoder_test HOSTCC arch/x86/tools/insn_sanity TEST posttest arch/x86/tools/insn_decoder_test: warning: Found an x86 instruction decoder bug, please report this. arch/x86/tools/insn_decoder_test: warning: ffffffff81000bf1: f7 0b 00 01 08 00 testl $0x80100,(%rbx) arch/x86/tools/insn_decoder_test: warning: objdump says 6 bytes, but insn_get_length() says 2 arch/x86/tools/insn_decoder_test: warning: Decoded and checked 11913894 instructions with 1 failures TEST posttest arch/x86/tools/insn_sanity: Success: decoded and checked 1000000 random instructions with 0 errors (seed:0x871ce29c) To fix this error, add the TEST opcode according to AMD64 APM Vol.3. [ bp: Massage commit message. ] Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lkml.kernel.org/r/157966631413.9580.10311036595431878351.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/x86/lib/x86-opcode-map.txt | 2 +- tools/objtool/arch/x86/lib/x86-opcode-map.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f284..5cb9f009f2be 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f284..5cb9f009f2be 100644 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev From cf3a133389666d42e79a41095ba4018616f6d186 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2019 14:33:39 +0100 Subject: [PATCH 3370/3715] s390/ftrace: generate traced function stack frame [ Upstream commit 45f7a0da600d3c409b5ad8d5ddddacd98ddc8840 ] Currently backtrace from ftraced function does not contain ftraced function itself. e.g. for "path_openat": arch_stack_walk+0x15c/0x2d8 stack_trace_save+0x50/0x68 stack_trace_call+0x15e/0x3d8 ftrace_graph_caller+0x0/0x1c <-- ftrace code do_filp_open+0x7c/0xe8 <-- ftraced function caller do_open_execat+0x76/0x1b8 open_exec+0x52/0x78 load_elf_binary+0x180/0x1160 search_binary_handler+0x8e/0x288 load_script+0x2a8/0x2b8 search_binary_handler+0x8e/0x288 __do_execve_file.isra.39+0x6fa/0xb40 __s390x_sys_execve+0x56/0x68 system_call+0xdc/0x2d8 Ftraced function is expected in the backtrace by ftrace kselftests, which are now failing. It would also be nice to have it for clarity reasons. "ftrace_caller" itself is called without stack frame allocated for it and does not store its caller (ftraced function). Instead it simply allocates a stack frame for "ftrace_trace_function" and sets backchain to point to ftraced function stack frame (which contains ftraced function caller in saved r14). To fix this issue make "ftrace_caller" allocate a stack frame for itself just to store ftraced function for the stack unwinder. As a result backtrace looks like the following: arch_stack_walk+0x15c/0x2d8 stack_trace_save+0x50/0x68 stack_trace_call+0x15e/0x3d8 ftrace_graph_caller+0x0/0x1c <-- ftrace code path_openat+0x6/0xd60 <-- ftraced function do_filp_open+0x7c/0xe8 <-- ftraced function caller do_open_execat+0x76/0x1b8 open_exec+0x52/0x78 load_elf_binary+0x180/0x1160 search_binary_handler+0x8e/0x288 load_script+0x2a8/0x2b8 search_binary_handler+0x8e/0x288 __do_execve_file.isra.39+0x6fa/0xb40 __s390x_sys_execve+0x56/0x68 system_call+0xdc/0x2d8 Reported-by: Sven Schnelle Tested-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/mcount.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 27110f3294ed..0cfd5a83a1da 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -25,6 +25,12 @@ ENTRY(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -38,9 +44,16 @@ ENTRY(ftrace_caller) #ifndef CC_USING_HOTPATCH aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES From cb259e08cb91c893028ea19188e5fae8ea3d9959 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 16 Jan 2020 17:57:58 +0000 Subject: [PATCH 3371/3715] driver core: platform: fix u32 greater or equal to zero comparison [ Upstream commit 0707cfa5c3ef58effb143db9db6d6e20503f9dec ] Currently the check that a u32 variable i is >= 0 is always true because the unsigned variable will never be negative, causing the loop to run forever. Fix this by changing the pre-decrement check to a zero check on i followed by a decrement of i. Addresses-Coverity: ("Unsigned compared against 0") Fixes: 39cc539f90d0 ("driver core: platform: Prevent resouce overflow from causing infinite loops") Signed-off-by: Colin Ian King Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200116175758.88396-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index e3d40c41c33b..bcb6519fe211 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -428,7 +428,7 @@ int platform_device_add(struct platform_device *pdev) pdev->id = PLATFORM_DEVID_AUTO; } - while (--i >= 0) { + while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); From e0e88677fabe4e514e2ed27815ec3141120a4afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Gro=C3=9Fe?= Date: Wed, 22 Jan 2020 19:01:06 +0100 Subject: [PATCH 3372/3715] ALSA: hda - Add docking station support for Lenovo Thinkpad T420s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ef7d84caa5928b40b1c93a26dbe5a3f12737c6ab ] Lenovo Thinkpad T420s uses the same codec as T420, so apply the same quirk to enable audio output on a docking station. Signed-off-by: Peter Große Link: https://lore.kernel.org/r/20200122180106.9351-1-pegro@friiks.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 382b6d2ed803..9cc9304ff21a 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -969,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), From 77f804437a9cdfe4e782e31539f6cb411cf2188c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 21 Aug 2019 16:26:53 +1000 Subject: [PATCH 3373/3715] powerpc/sriov: Remove VF eeh_dev state when disabling SR-IOV [ Upstream commit 1fb4124ca9d456656a324f1ee29b7bf942f59ac8 ] When disabling virtual functions on an SR-IOV adapter we currently do not correctly remove the EEH state for the now-dead virtual functions. When removing the pci_dn that was created for the VF when SR-IOV was enabled we free the corresponding eeh_dev without removing it from the child device list of the eeh_pe that contained it. This can result in crashes due to the use-after-free. Signed-off-by: Oliver O'Halloran Reviewed-by: Sam Bobroff Tested-by: Sam Bobroff Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190821062655.19735-1-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/pci_dn.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f4..0e45a446a8c7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -261,9 +261,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } From 6ad76bf5b160a17143dc9a995e289faa26a2fca8 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:11 +0800 Subject: [PATCH 3374/3715] jbd2: switch to use jbd2_journal_abort() when failed to submit the commit record [ Upstream commit d0a186e0d3e7ac05cc77da7c157dae5aa59f95d9 ] We invoke jbd2_journal_abort() to abort the journal and record errno in the jbd2 superblock when committing journal transaction besides the failure on submitting the commit record. But there is no need for the case and we can also invoke jbd2_journal_abort() instead of __jbd2_journal_abort_hard(). Fixes: 818d276ceb83a ("ext4: Add the journal checksum feature") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index cb0da3d4adc0..1a4bd8d9636e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -783,7 +783,7 @@ start_journal_io: err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -876,7 +876,7 @@ start_journal_io: err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); From 111159e8e3232b3897f27409244764c5aaba3aa2 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 4 Dec 2019 20:46:13 +0800 Subject: [PATCH 3375/3715] jbd2: make sure ESHUTDOWN to be recorded in the journal superblock [ Upstream commit 0e98c084a21177ef136149c6a293b3d1eb33ff92 ] Commit fb7c02445c49 ("ext4: pass -ESHUTDOWN code to jbd2 layer") want to allow jbd2 layer to distinguish shutdown journal abort from other error cases. So the ESHUTDOWN should be taken precedence over any other errno which has already been recoded after EXT4_FLAGS_SHUTDOWN is set, but it only update errno in the journal suoerblock now if the old errno is 0. Fixes: fb7c02445c49 ("ext4: pass -ESHUTDOWN code to jbd2 layer") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20191204124614.45424-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/journal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eae9ced846d5..6e054b368b5f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2119,8 +2119,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) if (journal->j_flags & JBD2_ABORT) { write_unlock(&journal->j_state_lock); - if (!old_errno && old_errno != -ESHUTDOWN && - errno == -ESHUTDOWN) + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) jbd2_journal_update_sb_errno(journal); return; } From 26b1c4da29a4d33d31d63a5908dce5ee80d7a7f5 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 10 Jan 2020 13:37:59 +0100 Subject: [PATCH 3376/3715] ARM: 8951/1: Fix Kexec compilation issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 76950f7162cad51d2200ebd22c620c14af38f718 ] To perform the reserve_crashkernel() operation kexec uses SECTION_SIZE to find a memblock in a range. SECTION_SIZE is not defined for nommu systems. Trying to compile kexec in these conditions results in a build error: linux/arch/arm/kernel/setup.c: In function ‘reserve_crashkernel’: linux/arch/arm/kernel/setup.c:1016:25: error: ‘SECTION_SIZE’ undeclared (first use in this function); did you mean ‘SECTIONS_WIDTH’? crash_size, SECTION_SIZE); ^~~~~~~~~~~~ SECTIONS_WIDTH linux/arch/arm/kernel/setup.c:1016:25: note: each undeclared identifier is reported only once for each function it appears in linux/scripts/Makefile.build:265: recipe for target 'arch/arm/kernel/setup.o' failed Make KEXEC depend on MMU to fix the compilation issue. Signed-off-by: Vincenzo Frascino Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba9325fc75b8..7a8fbe9a077b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2005,7 +2005,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your From 16563da40b950b6d6f2f3fe0205cc1cd60fb4966 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 17 Dec 2019 18:15:46 -0700 Subject: [PATCH 3377/3715] hostap: Adjust indentation in prism2_hostapd_add_sta [ Upstream commit b61156fba74f659d0bc2de8f2dbf5bad9f4b8faf ] Clang warns: ../drivers/net/wireless/intersil/hostap/hostap_ap.c:2511:3: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] if (sta->tx_supp_rates & WLAN_RATE_5M5) ^ ../drivers/net/wireless/intersil/hostap/hostap_ap.c:2509:2: note: previous statement is here if (sta->tx_supp_rates & WLAN_RATE_2M) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Fixes: ff1d2767d5a4 ("Add HostAP wireless driver.") Link: https://github.com/ClangBuiltLinux/linux/issues/813 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/hostap/hostap_ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 1a8d8db80b05..486ca1ee306e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -2568,7 +2568,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap, sta->supported_rates[0] = 2; if (sta->tx_supp_rates & WLAN_RATE_2M) sta->supported_rates[1] = 4; - if (sta->tx_supp_rates & WLAN_RATE_5M5) + if (sta->tx_supp_rates & WLAN_RATE_5M5) sta->supported_rates[2] = 11; if (sta->tx_supp_rates & WLAN_RATE_11M) sta->supported_rates[3] = 22; From 8fcb294f77535fc14b050879c7817f36df3a4a6f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 26 Jan 2020 00:09:54 +0000 Subject: [PATCH 3378/3715] iwlegacy: ensure loop counter addr does not wrap and cause an infinite loop [ Upstream commit c2f9a4e4a5abfc84c01b738496b3fd2d471e0b18 ] The loop counter addr is a u16 where as the upper limit of the loop is an int. In the unlikely event that the il->cfg->eeprom_size is greater than 64K then we end up with an infinite loop since addr will wrap around an never reach upper loop limit. Fix this by making addr an int. Addresses-Coverity: ("Infinite loop") Fixes: be663ab67077 ("iwlwifi: split the drivers for agn and legacy devices 3945/4965") Signed-off-by: Colin Ian King Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlegacy/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 8d5acda92a9b..6e6b124f0d5e 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il) u32 gp = _il_rd(il, CSR_EEPROM_GP); int sz; int ret; - u16 addr; + int addr; /* allocate eeprom */ sz = il->cfg->eeprom_size; From e534bec3712fc768f133defd6303353087bf90c8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 22 Jan 2020 11:07:56 +1000 Subject: [PATCH 3379/3715] cifs: fix NULL dereference in match_prepath [ Upstream commit fe1292686333d1dadaf84091f585ee903b9ddb84 ] RHBZ: 1760879 Fix an oops in match_prepath() by making sure that the prepath string is not NULL before we pass it into strcmp(). This is similar to other checks we make for example in cifs_root_iget() Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f0b1279a7de6..6e5ecf70996a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3047,8 +3047,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + old->prepath; + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + new->prepath; if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; From b817cc9f7d50bacbb6bd9b0f764432bdac68327c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 10 Dec 2019 20:29:40 -0500 Subject: [PATCH 3380/3715] ceph: check availability of mds cluster on mount after wait timeout [ Upstream commit 97820058fb2831a4b203981fa2566ceaaa396103 ] If all the MDS daemons are down for some reason, then the first mount attempt will fail with EIO after the mount request times out. A mount attempt will also fail with EIO if all of the MDS's are laggy. This patch changes the code to return -EHOSTUNREACH in these situations and adds a pr_info error message to help the admin determine the cause. URL: https://tracker.ceph.com/issues/4386 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 3 +-- fs/ceph/super.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b968334f841e..f36ddfea4997 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 088c4488b449..6b10b20bfe32 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1055,6 +1055,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, return res; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; From 730dd07c6bb12218ccdc783a14185697e967cdb0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Dec 2019 11:24:57 +0000 Subject: [PATCH 3381/3715] irqchip/gic-v3: Only provision redistributors that are enabled in ACPI [ Upstream commit 926b5dfa6b8dc666ff398044af6906b156e1d949 ] We currently allocate redistributor region structures for individual redistributors when ACPI doesn't present us with compact MMIO regions covering multiple redistributors. It turns out that we allocate these structures even when the redistributor is flagged as disabled by ACPI. It works fine until someone actually tries to tarse one of these structures, and access the corresponding MMIO region. Instead, track the number of enabled redistributors, and only allocate what is required. This makes sure that there is no invalid data to misuse. Signed-off-by: Marc Zyngier Reported-by: Heyi Guo Tested-by: Heyi Guo Link: https://lore.kernel.org/r/20191216062745.63397-1-guoheyi@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 3d7374655587..730b3c1cf7f6 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1253,6 +1253,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1347,8 +1348,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1378,8 +1381,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } From b06d001e8831d05e0d6d0d1549f489c19fffa52c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 28 Jan 2020 14:39:26 +1000 Subject: [PATCH 3382/3715] drm/nouveau/disp/nv50-: prevent oops when no channel method map provided [ Upstream commit 0e6176c6d286316e9431b4f695940cfac4ffe6c2 ] The implementations for most channel types contains a map of methods to priv registers in order to provide debugging info when a disp exception has been raised. This info is missing from the implementation of PIO channels as they're rather simplistic already, however, if an exception is raised by one of them, we'd end up triggering a NULL-pointer deref. Not ideal... Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206299 Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afd..cd9666583d4b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -73,6 +73,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (debug > subdev->debug) return; + if (!mthd) + return; for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) { u32 base = chan->head * mthd->addr; From 9df00bc555e465cb38490ea986cf06191b25a979 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:02:56 +0300 Subject: [PATCH 3383/3715] ftrace: fpid_next() should increase position index [ Upstream commit e4075e8bdffd93a9b6d6e1d52fabedceeca5a91b ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id no pid 2+1 records in 2+1 records out 10 bytes copied, 0.000213285 s, 46.9 kB/s Notice the "id" followed by "no pid". With the patch: # dd bs=4 skip=1 if=/sys/kernel/tracing/set_ftrace_pid dd: /sys/kernel/tracing/set_ftrace_pid: cannot skip to specified offset id 0+1 records in 0+1 records out 3 bytes copied, 0.000202112 s, 14.8 kB/s Notice that it only prints "id" and not the "no pid" afterward. Link: http://lkml.kernel.org/r/4f87c6ad-f114-30bb-8506-c32274ce2992@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8974ecbcca3c..8a8d92a8045b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6317,9 +6317,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } From c156943230070d2c1240da3b3b051e64aad265b0 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 10:03:06 +0300 Subject: [PATCH 3384/3715] trigger_next should increase position index [ Upstream commit 6722b23e7a2ace078344064a9735fb73e554e9ef ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Without patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist # Available triggers: # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 6+1 records in 6+1 records out 206 bytes copied, 0.00027916 s, 738 kB/s Notice the printing of "# Available triggers:..." after the line. With the patch: # dd bs=30 skip=1 if=/sys/kernel/tracing/events/sched/sched_switch/trigger dd: /sys/kernel/tracing/events/sched/sched_switch/trigger: cannot skip to specified offset n traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist 2+1 records in 2+1 records out 88 bytes copied, 0.000526867 s, 167 kB/s It only prints the end of the file, and does not restart. Link: http://lkml.kernel.org/r/3c35ee24-dd3a-8119-9c19-552ed253388a@virtuozzo.com https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace_events_trigger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index e2da180ca172..31e91efe243e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -127,9 +127,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } From 3e85259525a152b9a2c09855722b106878eaeadf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 28 Jan 2020 17:09:52 +0100 Subject: [PATCH 3385/3715] radeon: insert 10ms sleep in dce5_crtc_load_lut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ec3d65082d7dabad6fa8f66a8ef166f2d522d6b2 ] Per at least one tester this is enough magic to recover the regression introduced for some people (but not all) in commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette which for radeon had the side-effect of refactoring out a seemingly redudant writing of the color palette. 10ms in a fairly slow modeset path feels like an acceptable form of duct-tape, so maybe worth a shot and see what sticks. Cc: Alex Deucher Cc: Michel Dänzer References: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 4f94b78cb464..d86110cdf085 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -119,6 +119,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); From f354a9e6cf2cb4725bebf3b2e254925f9136f24e Mon Sep 17 00:00:00 2001 From: wangyan Date: Thu, 30 Jan 2020 22:11:50 -0800 Subject: [PATCH 3386/3715] ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans() [ Upstream commit 9f16ca48fc818a17de8be1f75d08e7f4addc4497 ] I found a NULL pointer dereference in ocfs2_update_inode_fsync_trans(), handle->h_transaction may be NULL in this situation: ocfs2_file_write_iter ->__generic_file_write_iter ->generic_perform_write ->ocfs2_write_begin ->ocfs2_write_begin_nolock ->ocfs2_write_cluster_by_desc ->ocfs2_write_cluster ->ocfs2_mark_extent_written ->ocfs2_change_extent_flag ->ocfs2_split_extent ->ocfs2_try_to_merge_extent ->ocfs2_extend_rotate_transaction ->ocfs2_extend_trans ->jbd2_journal_restart ->jbd2__journal_restart // handle->h_transaction is NULL here ->handle->h_transaction = NULL; ->start_this_handle /* journal aborted due to storage network disconnection, return error */ ->return -EROFS; /* line 3806 in ocfs2_try_to_merge_extent (), it will ignore ret error. */ ->ret = 0; ->... ->ocfs2_write_end ->ocfs2_write_end_nolock ->ocfs2_update_inode_fsync_trans // NULL pointer dereference ->oi->i_sync_tid = handle->h_transaction->t_tid; The information of NULL pointer dereference as follows: JBD2: Detected IO errors while flushing file data on dm-11-45 Aborting journal on device dm-11-45. JBD2: Error -5 detected when updating journal superblock for dm-11-45. (dd,22081,3):ocfs2_extend_trans:474 ERROR: status = -30 (dd,22081,3):ocfs2_try_to_merge_extent:3877 ERROR: status = -30 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e74e1338 [0000000000000008] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Process dd (pid: 22081, stack limit = 0x00000000584f35a9) CPU: 3 PID: 22081 Comm: dd Kdump: loaded Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019 pstate: 60400009 (nZCv daif +PAN -UAO) pc : ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] lr : ocfs2_write_end_nolock+0x2a0/0x550 [ocfs2] sp : ffff0000459fba70 x29: ffff0000459fba70 x28: 0000000000000000 x27: ffff807ccf7f1000 x26: 0000000000000001 x25: ffff807bdff57970 x24: ffff807caf1d4000 x23: ffff807cc79e9000 x22: 0000000000001000 x21: 000000006c6cd000 x20: ffff0000091d9000 x19: ffff807ccb239db0 x18: ffffffffffffffff x17: 000000000000000e x16: 0000000000000007 x15: ffff807c5e15bd78 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000001 x9 : 0000000000000228 x8 : 000000000000000c x7 : 0000000000000fff x6 : ffff807a308ed6b0 x5 : ffff7e01f10967c0 x4 : 0000000000000018 x3 : d0bc661572445600 x2 : 0000000000000000 x1 : 000000001b2e0200 x0 : 0000000000000000 Call trace: ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] ocfs2_write_end+0x4c/0x80 [ocfs2] generic_perform_write+0x108/0x1a8 __generic_file_write_iter+0x158/0x1c8 ocfs2_file_write_iter+0x668/0x950 [ocfs2] __vfs_write+0x11c/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc To prevent NULL pointer dereference in this situation, we use is_handle_aborted() before using handle->h_transaction->t_tid. Link: http://lkml.kernel.org/r/03e750ab-9ade-83aa-b000-b9e81e34e539@huawei.com Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/journal.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 497a4171ef61..bfb50fc51528 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, { struct ocfs2_inode_info *oi = OCFS2_I(inode); - oi->i_sync_tid = handle->h_transaction->t_tid; - if (datasync) - oi->i_datasync_tid = handle->h_transaction->t_tid; + if (!is_handle_aborted(handle)) { + oi->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + oi->i_datasync_tid = handle->h_transaction->t_tid; + } } #endif /* OCFS2_JOURNAL_H */ From 55f09a4b9d19a9e4549928cddeea4a23285edcbc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 30 Jan 2020 22:16:37 -0800 Subject: [PATCH 3387/3715] lib/scatterlist.c: adjust indentation in __sg_alloc_table [ Upstream commit 4e456fee215677584cafa7f67298a76917e89c64 ] Clang warns: ../lib/scatterlist.c:314:5: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation] return -ENOMEM; ^ ../lib/scatterlist.c:311:4: note: previous statement is here if (prv) ^ 1 warning generated. This warning occurs because there is a space before the tab on this line. Remove it so that the indentation is consistent with the Linux kernel coding style and clang no longer warns. Link: http://lkml.kernel.org/r/20191218033606.11942-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/830 Fixes: edce6820a9fd ("scatterlist: prevent invalid free when alloc fails") Signed-off-by: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 11fce289d116..834c846c5af8 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (prv) table->nents = ++table->orig_nents; - return -ENOMEM; + return -ENOMEM; } sg_init_table(sg, alloc_size); From 8f75e384833b26ec44cb44e82d73ce706f591bdd Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 30 Jan 2020 22:17:26 -0800 Subject: [PATCH 3388/3715] reiserfs: prevent NULL pointer dereference in reiserfs_insert_item() [ Upstream commit aacee5446a2a1aa35d0a49dab289552578657fb4 ] The variable inode may be NULL in reiserfs_insert_item(), but there is no check before accessing the member of inode. Fix this by adding NULL pointer check before calling reiserfs_debug(). Link: http://lkml.kernel.org/r/79c5135d-ff25-1cc9-4e99-9f572b88cc00@huawei.com Signed-off-by: Yunfeng Ye Cc: zhengbin Cc: Hu Shiyuan Cc: Feilong Lin Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/reiserfs/stree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 0037aea97d39..2946713cb00d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2250,7 +2250,8 @@ error_out: /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + if (inode) + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif From e8518a5ce4035ff5094dca6150919e004ece54e5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 1 Feb 2020 22:42:32 +0800 Subject: [PATCH 3389/3715] bcache: explicity type cast in bset_bkey_last() [ Upstream commit 7c02b0055f774ed9afb6e1c7724f33bf148ffdc0 ] In bset.h, macro bset_bkey_last() is defined as, bkey_idx((struct bkey *) (i)->d, (i)->keys) Parameter i can be variable type of data structure, the macro always works once the type of struct i has member 'd' and 'keys'. bset_bkey_last() is also used in macro csum_set() to calculate the checksum of a on-disk data structure. When csum_set() is used to calculate checksum of on-disk bcache super block, the parameter 'i' data type is struct cache_sb_disk. Inside struct cache_sb_disk (also in struct cache_sb) the member keys is __u16 type. But bkey_idx() expects unsigned int (a 32bit width), so there is problem when sending parameters via stack to call bkey_idx(). Sparse tool from Intel 0day kbuild system reports this incompatible problem. bkey_idx() is part of user space API, so the simplest fix is to cast the (i)->keys to unsigned int type in macro bset_bkey_last(). Reported-by: kbuild test robot Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/bset.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 8d1964b472e7..0bfde500af19 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -381,7 +381,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { From 74b16aa6ad15ab29286a09dd1d2595d6422119c4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 2 Dec 2019 15:10:21 +0800 Subject: [PATCH 3390/3715] irqchip/gic-v3-its: Reference to its_invall_cmd descriptor when building INVALL [ Upstream commit 107945227ac5d4c37911c7841b27c64b489ce9a9 ] It looks like an obvious mistake to use its_mapc_cmd descriptor when building the INVALL command block. It so far worked by luck because both its_mapc_cmd.col and its_invall_cmd.col sit at the same offset of the ITS command descriptor, but we should not rely on it. Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191202071021.1251-1-yuzenghui@huawei.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 52238e6bed39..799df1e598db 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -527,7 +527,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); From 16ccc575ff186a051dfe595db582e4f929e2356a Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 31 Jan 2020 15:45:24 +0200 Subject: [PATCH 3391/3715] iwlwifi: mvm: Fix thermal zone registration [ Upstream commit baa6cf8450b72dcab11f37c47efce7c5b9b8ad0f ] Use a unique name when registering a thermal zone. Otherwise, with multiple NICS, we hit the following warning during the unregistration. WARNING: CPU: 2 PID: 3525 at fs/sysfs/group.c:255 RIP: 0010:sysfs_remove_group+0x80/0x90 Call Trace: dpm_sysfs_remove+0x57/0x60 device_del+0x5a/0x350 ? sscanf+0x4e/0x70 device_unregister+0x1a/0x60 hwmon_device_unregister+0x4a/0xa0 thermal_remove_hwmon_sysfs+0x175/0x1d0 thermal_zone_device_unregister+0x188/0x1e0 iwl_mvm_thermal_exit+0xe7/0x100 [iwlmvm] iwl_op_mode_mvm_stop+0x27/0x180 [iwlmvm] _iwl_op_mode_stop.isra.3+0x2b/0x50 [iwlwifi] iwl_opmode_deregister+0x90/0xa0 [iwlwifi] __exit_compat+0x10/0x2c7 [iwlmvm] __x64_sys_delete_module+0x13f/0x270 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 1232f63278eb..319103f4b432 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -739,7 +739,8 @@ static struct thermal_zone_device_ops tzone_ops = { static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) { int i; - char name[] = "iwlwifi"; + char name[16]; + static atomic_t counter = ATOMIC_INIT(0); if (!iwl_mvm_is_tt_in_fw(mvm)) { mvm->tz_device.tzone = NULL; @@ -749,6 +750,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH); + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF); mvm->tz_device.tzone = thermal_zone_device_register(name, IWL_MAX_DTS_TRIPS, IWL_WRITABLE_TRIPS_MSK, From aa9c1f410398a2a70e03bd23f8c80c0699948259 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Sat, 11 Jan 2020 18:44:34 +0530 Subject: [PATCH 3392/3715] microblaze: Prevent the overflow of the start [ Upstream commit 061d2c1d593076424c910cb1b64ecdb5c9a6923f ] In case the start + cache size is more than the max int the start overflows. Prevent the same. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek Signed-off-by: Sasha Levin --- arch/microblaze/kernel/cpu/cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa69..dcba53803fa5 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) From c911b13e8b2d3427061c2da6372da987353eddc1 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Tue, 4 Feb 2020 19:30:20 +0800 Subject: [PATCH 3393/3715] brd: check and limit max_part par [ Upstream commit c8ab422553c81a0eb070329c63725df1cd1425bc ] In brd_init func, rd_nr num of brd_device are firstly allocated and add in brd_devices, then brd_devices are traversed to add each brd_device by calling add_disk func. When allocating brd_device, the disk->first_minor is set to i * max_part, if rd_nr * max_part is larger than MINORMASK, two different brd_device may have the same devt, then only one of them can be successfully added. when rmmod brd.ko, it will cause oops when calling brd_exit. Follow those steps: # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576 # rmmod brd then, the oops will appear. Oops log: [ 726.613722] Call trace: [ 726.614175] kernfs_find_ns+0x24/0x130 [ 726.614852] kernfs_find_and_get_ns+0x44/0x68 [ 726.615749] sysfs_remove_group+0x38/0xb0 [ 726.616520] blk_trace_remove_sysfs+0x1c/0x28 [ 726.617320] blk_unregister_queue+0x98/0x100 [ 726.618105] del_gendisk+0x144/0x2b8 [ 726.618759] brd_exit+0x68/0x560 [brd] [ 726.619501] __arm64_sys_delete_module+0x19c/0x2a0 [ 726.620384] el0_svc_common+0x78/0x130 [ 726.621057] el0_svc_handler+0x38/0x78 [ 726.621738] el0_svc+0x8/0xc [ 726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260) Here, we add brd_check_and_reset_par func to check and limit max_part par. -- V5->V6: - remove useless code V4->V5:(suggested by Ming Lei) - make sure max_part is not larger than DISK_MAX_PARTS V3->V4:(suggested by Ming Lei) - remove useless change - add one limit of max_part V2->V3: (suggested by Ming Lei) - clear .minors when running out of consecutive minor space in brd_alloc - remove limit of rd_nr V1->V2: - add more checks in brd_check_par_valid as suggested by Ming Lei. Signed-off-by: Zhiqiang Liu Reviewed-by: Bob Liu Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/brd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2d7178f7754e..0129b1921cb3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -529,6 +529,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -552,8 +571,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); From 2329f0eded07c62dbe40d8b523001525e91b99b4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 09:10:47 +0300 Subject: [PATCH 3394/3715] help_next should increase position index [ Upstream commit 9f198a2ac543eaaf47be275531ad5cbd50db3edf ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Mike Marshall Signed-off-by: Sasha Levin --- fs/orangefs/orangefs-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530de..34d1cc98260d 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -305,6 +305,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; From a4ba26c8801b10e98a6026b71a49ff80146974a0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Feb 2020 02:40:58 -0500 Subject: [PATCH 3395/3715] virtio_balloon: prevent pfn array overflow [ Upstream commit 6e9826e77249355c09db6ba41cd3f84e89f4b614 ] Make sure, at build time, that pfn array is big enough to hold a single page. It happens to be true since the PAGE_SHIFT value at the moment is 20, which is 1M - exactly 256 4K balloon pages. Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand Signed-off-by: Sasha Levin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 499531608fa2..71970773aad1 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -132,6 +132,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. From bc99bd6e85e1a08a1c75cd25e6175223083fe54a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 7 Feb 2020 19:26:28 +0200 Subject: [PATCH 3396/3715] mlxsw: spectrum_dpipe: Add missing error path [ Upstream commit 3a99cbb6fa7bca1995586ec2dc21b0368aad4937 ] In case devlink_dpipe_entry_ctx_prepare() failed, release RTNL that was previously taken and free the memory allocated by mlxsw_sp_erif_entry_prepare(). Fixes: 2ba5999f009d ("mlxsw: spectrum: Add Support for erif table entries access") Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..3c04f3d5de2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -225,7 +225,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -257,6 +257,7 @@ start_again: return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; From ee15cd14fa26e84607b735c6cf15648209081b02 Mon Sep 17 00:00:00 2001 From: Jaihind Yadav Date: Tue, 17 Dec 2019 17:25:47 +0530 Subject: [PATCH 3397/3715] selinux: ensure we cleanup the internal AVC counters on error in avc_update() [ Upstream commit 030b995ad9ece9fa2d218af4429c1c78c2342096 ] In AVC update we don't call avc_node_kill() when avc_xperms_populate() fails, resulting in the avc->avc_cache.active_nodes counter having a false value. In last patch this changes was missed , so correcting it. Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Signed-off-by: Jaihind Yadav Signed-off-by: Ravi Kumar Siddojigari [PM: merge fuzz, minor description cleanup] Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 2380b8d72cec..23f387b30ece 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -863,7 +863,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - kmem_cache_free(avc_node_cachep, node); + avc_node_kill(node); goto out_unlock; } } From a90fd80a5c0d4b57f790518d5ed9c1b55e549801 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Wed, 12 Feb 2020 06:09:17 +0100 Subject: [PATCH 3398/3715] enic: prevent waking up stopped tx queues over watchdog reset [ Upstream commit 0f90522591fd09dd201065c53ebefdfe3c6b55cb ] Recent months, our customer reported several kernel crashes all preceding with following message: NETDEV WATCHDOG: eth2 (enic): transmit queue 0 timed out Error message of one of those crashes: BUG: unable to handle kernel paging request at ffffffffa007e090 After analyzing severl vmcores, I found that most of crashes are caused by memory corruption. And all the corrupted memory areas are overwritten by data of network packets. Moreover, I also found that the tx queues were enabled over watchdog reset. After going through the source code, I found that in enic_stop(), the tx queues stopped by netif_tx_disable() could be woken up over a small time window between netif_tx_disable() and the napi_disable() by the following code path: napi_poll-> enic_poll_msix_wq-> vnic_cq_service-> enic_wq_service-> netif_wake_subqueue(enic->netdev, q_number)-> test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state) In turn, upper netowrk stack could queue skb to ENIC NIC though enic_hard_start_xmit(). And this might introduce some race condition. Our customer comfirmed that this kind of kernel crash doesn't occur over 90 days since they applied this patch. Signed-off-by: Firo Yang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 19f374b180fc..52a3b32390a9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1972,10 +1972,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); From b0f803378344e1a2dfec9818f2d42abcc5c989ed Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Thu, 13 Feb 2020 15:37:09 +0100 Subject: [PATCH 3399/3715] net: dsa: tag_qca: Make sure there is headroom for tag [ Upstream commit 04fb91243a853dbde216d829c79d9632e52aa8d9 ] Passing tag size to skb_cow_head will make sure there is enough headroom for the tag data. This change does not introduce any overhead in case there is already available headroom for tag. Signed-off-by: Per Forlin Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/tag_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index af3a12a36d88..f268c5c3eedb 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -41,7 +41,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u16 *phdr, hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); From 221a199d7c171c63e1ba6d0a9975b535d143bdb1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 11 Feb 2020 19:33:39 +0100 Subject: [PATCH 3400/3715] net/sched: matchall: add missing validation of TCA_MATCHALL_FLAGS [ Upstream commit 1afa3cc90f8fb745c777884d79eaa1001d6927a6 ] unlike other classifiers that can be offloaded (i.e. users can set flags like 'skip_hw' and 'skip_sw'), 'cls_matchall' doesn't validate the size of netlink attribute 'TCA_MATCHALL_FLAGS' provided by user: add a proper entry to mall_policy. Fixes: b87f7936a932 ("net/sched: Add match-all classifier hw offloading.") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_matchall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d8fd152779c8..a985f91e8b47 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -136,6 +136,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, From 3fdba7cb6f45818086d597546761f2602f44502e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 11 Feb 2020 19:33:40 +0100 Subject: [PATCH 3401/3715] net/sched: flower: add missing validation of TCA_FLOWER_FLAGS [ Upstream commit e2debf0852c4d66ba1a8bde12869b196094c70a7 ] unlike other classifiers that can be offloaded (i.e. users can set flags like 'skip_hw' and 'skip_sw'), 'cls_flower' doesn't validate the size of netlink attribute 'TCA_FLOWER_FLAGS' provided by user: add a proper entry to fl_policy. Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_flower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1879665e5a2b..80a5a6d503c8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -445,6 +445,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, From 6e60e4463c2694207932bd0dda9f82536303466f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2020 11:36:13 -0800 Subject: [PATCH 3402/3715] net/smc: fix leak of kernel memory to user space [ Upstream commit 457fed775c97ac2c0cd1672aaf2ff2c8a6235e87 ] As nlmsg_put() does not clear the memory that is reserved, it this the caller responsability to make sure all of this memory will be written, in order to not reveal prior content. While we are at it, we can provide the socket cookie even if clsock is not set. syzbot reported : BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in __swab32p include/uapi/linux/swab.h:179 [inline] BUG: KMSAN: uninit-value in __be32_to_cpup include/uapi/linux/byteorder/little_endian.h:82 [inline] BUG: KMSAN: uninit-value in get_unaligned_be32 include/linux/unaligned/access_ok.h:30 [inline] BUG: KMSAN: uninit-value in ____bpf_skb_load_helper_32 net/core/filter.c:240 [inline] BUG: KMSAN: uninit-value in ____bpf_skb_load_helper_32_no_cache net/core/filter.c:255 [inline] BUG: KMSAN: uninit-value in bpf_skb_load_helper_32_no_cache+0x14a/0x390 net/core/filter.c:252 CPU: 1 PID: 5262 Comm: syz-executor.5 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] __swab32p include/uapi/linux/swab.h:179 [inline] __be32_to_cpup include/uapi/linux/byteorder/little_endian.h:82 [inline] get_unaligned_be32 include/linux/unaligned/access_ok.h:30 [inline] ____bpf_skb_load_helper_32 net/core/filter.c:240 [inline] ____bpf_skb_load_helper_32_no_cache net/core/filter.c:255 [inline] bpf_skb_load_helper_32_no_cache+0x14a/0x390 net/core/filter.c:252 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_kmalloc_large+0x73/0xc0 mm/kmsan/kmsan_hooks.c:128 kmalloc_large_node_hook mm/slub.c:1406 [inline] kmalloc_large_node+0x282/0x2c0 mm/slub.c:3841 __kmalloc_node_track_caller+0x44b/0x1200 mm/slub.c:4368 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_dump+0x44b/0x1ab0 net/netlink/af_netlink.c:2224 __netlink_dump_start+0xbb2/0xcf0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:233 [inline] smc_diag_handler_dump+0x2ba/0x300 net/smc/smc_diag.c:242 sock_diag_rcv_msg+0x211/0x610 net/core/sock_diag.c:256 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:275 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] kernel_sendmsg+0x433/0x440 net/socket.c:679 sock_no_sendpage+0x235/0x300 net/core/sock.c:2740 kernel_sendpage net/socket.c:3776 [inline] sock_sendpage+0x1e1/0x2c0 net/socket.c:937 pipe_to_sendpage+0x38c/0x4c0 fs/splice.c:458 splice_from_pipe_feed fs/splice.c:512 [inline] __splice_from_pipe+0x539/0xed0 fs/splice.c:636 splice_from_pipe fs/splice.c:671 [inline] generic_splice_sendpage+0x1d5/0x2d0 fs/splice.c:844 do_splice_from fs/splice.c:863 [inline] do_splice fs/splice.c:1170 [inline] __do_sys_splice fs/splice.c:1447 [inline] __se_sys_splice+0x2380/0x3350 fs/splice.c:1427 __x64_sys_splice+0x6e/0x90 fs/splice.c:1427 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: f16a7dd5cf27 ("smc: netlink interface for SMC sockets") Signed-off-by: Eric Dumazet Cc: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_diag.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..576c37d86051 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,15 +38,14 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; } From 010e880595cb51dd8ba6da202761ecec6785753c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 13 Feb 2020 12:56:04 +0300 Subject: [PATCH 3403/3715] thunderbolt: Prevent crash if non-active NVMem file is read commit 03cd45d2e219301880cabc357e3cf478a500080f upstream. The driver does not populate .reg_read callback for the non-active NVMem because the file is supposed to be write-only. However, it turns out NVMem subsystem does not yet support this and expects that the .reg_read callback is provided. If user reads the binary attribute it triggers NULL pointer dereference like this one: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... Call Trace: bin_attr_nvmem_read+0x64/0x80 kernfs_fop_read+0xa7/0x180 vfs_read+0xbd/0x170 ksys_read+0x5a/0xd0 do_syscall_64+0x43/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this in the driver by providing .reg_read callback that always returns an error. Reported-by: Nicholas Johnson Fixes: e6b245ccd524 ("thunderbolt: Add support for host and device NVM firmware upgrade") Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200213095604.1074-1-mika.westerberg@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index fe2384b019ec..9cfc65ca173d 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -240,6 +240,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val, return dma_port_flash_read(sw->dma_port, offset, val, bytes); } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -285,6 +291,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } From 556098be62dd2fe6e90063f0fc7895a40c107807 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:21 -0800 Subject: [PATCH 3404/3715] USB: misc: iowarrior: add support for 2 OEMed devices commit 461d8deb26a7d70254bc0391feb4fd8a95e674e8 upstream. Add support for two OEM devices that are identical to existing IO-Warrior devices, except for the USB device id. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 7f226cc3ef8a..9c94ce4a413e 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -33,6 +33,10 @@ /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b + /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define IOWARRIOR_MINOR_BASE 0 @@ -137,6 +141,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -364,6 +370,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -378,6 +385,7 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -502,6 +510,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -786,7 +795,8 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -799,7 +809,8 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; From b949c86834e7aae6040723e074b9d15534ba8ff7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:22 -0800 Subject: [PATCH 3405/3715] USB: misc: iowarrior: add support for the 28 and 28L devices commit 5f6f8da2d7b5a431d3f391d0d73ace8edfb42af7 upstream. Add new device ids for the 28 and 28L devices. These have 4 interfaces instead of 2, but the driver binds the same, so the driver changes are minimal. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 9c94ce4a413e..c7e1d6b64705 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -32,6 +32,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -143,6 +146,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -386,6 +391,8 @@ static ssize_t iowarrior_write(struct file *file, break; case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -796,7 +803,9 @@ static int iowarrior_probe(struct usb_interface *interface, } if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -810,7 +819,9 @@ static int iowarrior_probe(struct usb_interface *interface, dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; From b80f5a9b41aea2d6f7ad860721ec617275731405 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Feb 2020 08:11:48 -0800 Subject: [PATCH 3406/3715] USB: misc: iowarrior: add support for the 100 device commit bab5417f5f0118ce914bc5b2f8381e959e891155 upstream. Add a new device id for the 100 devie. It has 4 interfaces like the 28 and 28L devices but a larger endpoint so more I/O pins. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200214161148.GA3963518@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index c7e1d6b64705..1ec32e5aa004 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -35,6 +35,7 @@ /* fuller speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 #define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -148,6 +149,7 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -393,6 +395,7 @@ static ssize_t iowarrior_write(struct file *file, case USB_DEVICE_ID_CODEMERCS_IOW56AM: case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -805,7 +808,8 @@ static int iowarrior_probe(struct usb_interface *interface, if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -821,7 +825,8 @@ static int iowarrior_probe(struct usb_interface *interface, ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; From e4c587650f64608fcd3506fa2cb47f81c0f88348 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Feb 2020 12:43:35 -0800 Subject: [PATCH 3407/3715] floppy: check FDC index for errors before assigning it commit 2e90ca68b0d2f5548804f22f0dd61145516171e3 upstream. Jordy Zomer reported a KASAN out-of-bounds read in the floppy driver in wait_til_ready(). Which on the face of it can't happen, since as Willy Tarreau points out, the function does no particular memory access. Except through the FDCS macro, which just indexes a static allocation through teh current fdc, which is always checked against N_FDC. Except the checking happens after we've already assigned the value. The floppy driver is a disgrace (a lot of it going back to my original horrd "design"), and has no real maintainer. Nobody has the hardware, and nobody really cares. But it still gets used in virtual environment because it's one of those things that everybody supports. The whole thing should be re-written, or at least parts of it should be seriously cleaned up. The 'current fdc' index, which is used by the FDCS macro, and which is often shadowed by a local 'fdc' variable, is a prime example of how not to write code. But because nobody has the hardware or the motivation, let's just fix up the immediate problem with a nasty band-aid: test the fdc index before actually assigning it to the static 'fdc' variable. Reported-by: Jordy Zomer Cc: Willy Tarreau Cc: Dan Carpenter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5f1aa3197244..cbf74731cfce 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); From 00d04bd42fbc1e1f2a2b6e1c6db8b784f9a890c7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:30 +0100 Subject: [PATCH 3408/3715] vt: selection, handle pending signals in paste_selection commit 687bff0cd08f790d540cfb7b2349f0d876cdddec upstream. When pasting a selection to a vt, the task is set as INTERRUPTIBLE while waiting for a tty to unthrottle. But signals are not handled at all. Normally, this is not a problem as tty_ldisc_receive_buf receives all the goods and a user has no reason to interrupt the task. There are two scenarios where this matters: 1) when the tty is throttled and a signal is sent to the process, it spins on a CPU until the tty is unthrottled. schedule() does not really echedule, but returns immediately, of course. 2) when the sel_buffer becomes invalid, KASAN prevents any reads from it and the loop simply does not proceed and spins forever (causing the tty to throttle, but the code never sleeps, the same as above). This sometimes happens as there is a race in the sel_buffer handling code. So add signal handling to this ioctl (TIOCL_PASTESEL) and return -EINTR in case a signal is pending. Signed-off-by: Jiri Slaby Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 7a4c8022c023..b157f17d2be2 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -27,6 +27,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -338,6 +340,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -351,6 +354,10 @@ int paste_selection(struct tty_struct *tty) add_wait_queue(&vc->paste_wait, &wait); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { schedule(); continue; @@ -366,5 +373,5 @@ int paste_selection(struct tty_struct *tty) tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } From 60ba005bbf5751c2c58ca23ccfc9289ae06782b7 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 27 Jan 2020 15:56:16 -0800 Subject: [PATCH 3409/3715] staging: android: ashmem: Disallow ashmem memory from being remapped commit 6d67b0290b4b84c477e6a2fc6e005e174d3c7786 upstream. When ashmem file is mmapped, the resulting vma->vm_file points to the backing shmem file with the generic fops that do not check ashmem permissions like fops of ashmem do. If an mremap is done on the ashmem region, then the permission checks will be skipped. Fix that by disallowing mapping operation on the backing shmem file. Reported-by: Jann Horn Signed-off-by: Suren Baghdasaryan Cc: stable # 4.4,4.9,4.14,4.18,5.4 Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20200127235616.48920-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 4151bb44a410..9481c0b23386 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -361,8 +361,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot) _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC); } +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* do not allow to mmap ashmem backing shmem file directly */ + return -EPERM; +} + +static unsigned long +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) { + static struct file_operations vmfile_fops; struct ashmem_area *asma = file->private_data; int ret = 0; @@ -403,6 +418,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; + /* + * override mmap operation of the vmfile so that it can't be + * remapped which would lead to creation of a new vma with no + * asma permission checks. Have to override get_unmapped_area + * as well to prevent VM_BUG_ON check for f_ops modification. + */ + if (!vmfile_fops.mmap) { + vmfile_fops = *vmfile->f_op; + vmfile_fops.mmap = ashmem_vmfile_mmap; + vmfile_fops.get_unmapped_area = + ashmem_vmfile_get_unmapped_area; + } + vmfile->f_op = &vmfile_fops; } get_file(asma->file); From 72fb144918083f37110a8b9d762059068b381a5f Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 4 Feb 2020 19:34:02 +0000 Subject: [PATCH 3410/3715] staging: vt6656: fix sign of rx_dbm to bb_pre_ed_rssi. commit 93134df520f23f4e9998c425b8987edca7016817 upstream. bb_pre_ed_rssi is an u8 rx_dm always returns negative signed values add minus operator to always yield positive. fixes issue where rx sensitivity is always set to maximum because the unsigned numbers were always greater then 100. Fixes: 63b9907f58f1 ("staging: vt6656: mac80211 conversion: create rx function.") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/aceac98c-6e69-3ce1-dfec-2bf27b980221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 655f0002f880..7b73fa2f8834 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; frame = skb_data + 8; From 04e2dcbed2136c3b332e10647beb8dc9a7a79e1a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:50 +0200 Subject: [PATCH 3411/3715] xhci: Force Maximum Packet size for Full-speed bulk devices to valid range. commit f148b9f402ef002b57bcff3964d45abc8ffb6c3f upstream. A Full-speed bulk USB audio device (DJ-Tech CTRL) with a invalid Maximum Packet Size of 4 causes a xHC "Parameter Error" at enumeration. This is because valid Maximum packet sizes for Full-speed bulk endpoints are 8, 16, 32 and 64 bytes. Hosts are not required to support other values than these. See usb 2 specs section 5.8.3 for details. The device starts working after forcing the maximum packet size to 8. This is most likely the case with other devices as well, so force the maximum packet size to a valid range. Cc: stable@vger.kernel.org Reported-by: Rene D Obermueller Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index a80a57decda1..70452c881e56 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1479,9 +1479,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; From 3026b4972d406b05c9349a8cd1cf931614043d23 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:52 +0200 Subject: [PATCH 3412/3715] xhci: fix runtime pm enabling for quirky Intel hosts commit 024d411e9c5d49eb96c825af52a3ce2682895676 upstream. Intel hosts that need the XHCI_PME_STUCK_QUIRK flag should enable runtime pm by calling xhci_pme_acpi_rtd3_enable() before usb_hcd_pci_probe() calls pci_dev_run_wake(). Otherwise usage count for the device won't be decreased, and runtime suspend is prevented. usb_hcd_pci_probe() only decreases the usage count if device can generate run-time wake-up events, i.e. when pci_dev_run_wake() returns true. This issue was exposed by pci_dev_run_wake() change in commit 8feaec33b986 ("PCI / PM: Always check PME wakeup capability for runtime wakeup support") and should be backported to kernels with that change Cc: # 4.13+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 09f228279c01..42d368cb76ce 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -284,6 +284,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -344,9 +347,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); From e64eae745849c6db1d3497071cc75044126ad501 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 15 Nov 2019 18:50:00 +0200 Subject: [PATCH 3413/3715] usb: host: xhci: update event ring dequeue pointer on purpose commit dc0ffbea5729a3abafa577ebfce87f18b79e294b upstream. On some situations, the software handles TRB events slower than adding TRBs, then xhci_handle_event can't return zero long time, the xHC will consider the event ring is full, and trigger "Event Ring Full" error, but in fact, the software has already finished lots of events, just no chance to update ERDP (event ring dequeue pointer). In this commit, we force update ERDP if half of TRBS_PER_SEGMENT events have handled to avoid "Event Ring Full" error. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/1573836603-10871-2-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 60 ++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 61fa3007a74a..868878f5b72b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2758,6 +2758,42 @@ static int xhci_handle_event(struct xhci_hcd *xhci) return 1; } +/* + * Update Event Ring Dequeue Pointer: + * - When all events have finished + * - To avoid "Event Ring Full Error" condition + */ +static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, + union xhci_trb *event_ring_deq) +{ + u64 temp_64; + dma_addr_t deq; + + temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + /* If necessary, update the HW's version of the event ring deq ptr. */ + if (event_ring_deq != xhci->event_ring->dequeue) { + deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, + xhci->event_ring->dequeue); + if (deq == 0) + xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr\n"); + /* + * Per 4.9.4, Software writes to the ERDP register shall + * always advance the Event Ring Dequeue Pointer value. + */ + if ((temp_64 & (u64) ~ERST_PTR_MASK) == + ((u64) deq & (u64) ~ERST_PTR_MASK)) + return; + + /* Update HC event ring dequeue pointer */ + temp_64 &= ERST_PTR_MASK; + temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + } + + /* Clear the event handler busy flag (RW1C) */ + temp_64 |= ERST_EHB; + xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); +} + /* * xHCI spec says we can get an interrupt, and if the HC has an error condition, * we might get bad data out of the event ring. Section 4.10.2.7 has a list of @@ -2769,9 +2805,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; unsigned long flags; - dma_addr_t deq; u64 temp_64; u32 status; + int event_loop = 0; spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ @@ -2825,24 +2861,14 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) /* FIXME this should be a delayed service routine * that clears the EHB. */ - while (xhci_handle_event(xhci) > 0) {} - - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); - /* If necessary, update the HW's version of the event ring deq ptr. */ - if (event_ring_deq != xhci->event_ring->dequeue) { - deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, - xhci->event_ring->dequeue); - if (deq == 0) - xhci_warn(xhci, "WARN something wrong with SW event " - "ring dequeue ptr.\n"); - /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_PTR_MASK; - temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + while (xhci_handle_event(xhci) > 0) { + if (event_loop++ < TRBS_PER_SEGMENT / 2) + continue; + xhci_update_erst_dequeue(xhci, event_ring_deq); + event_loop = 0; } - /* Clear the event handler busy flag (RW1C); event ring is empty. */ - temp_64 |= ERST_EHB; - xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); + xhci_update_erst_dequeue(xhci, event_ring_deq); ret = IRQ_HANDLED; out: From f6f227e878ff5c3f969cadea3dc4a833bc27e247 Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Thu, 30 Jan 2020 01:25:06 -0800 Subject: [PATCH 3414/3715] usb: uas: fix a plug & unplug racing commit 3e99862c05a9caa5a27969f41566b428696f5a9a upstream. When a uas disk is plugged into an external hub, uas_probe() will be called by the hub thread to do the probe. It will first create a SCSI host and then do the scan for this host. During the scan, it will probe the LUN using SCSI INQUERY command which will be packed in the URB and submitted to uas disk. There might be a chance that this external hub with uas disk attached is unplugged during the scan. In this case, uas driver will fail to submit the URB (due to the NOTATTACHED state of uas device) and try to put this SCSI command back to request queue waiting for next chance to run. In normal case, this cycle will terminate when hub thread gets disconnection event and calls into uas_disconnect() accordingly. But in this case, uas_disconnect() will not be called because hub thread of external hub gets stuck waiting for the completion of this SCSI command. A deadlock happened. In this fix, uas will call scsi_scan_host() asynchronously to avoid the blocking of hub thread. Signed-off-by: EJ Hsu Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20200130092506.102760-1-ejh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 8391a88cf90f..9d97543449e6 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -46,6 +46,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -115,6 +116,17 @@ out: spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -989,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -1005,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1173,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); From 2bc3e83764e0be8537d44e6e63260676f05cbee7 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 12 Feb 2020 14:22:18 +0000 Subject: [PATCH 3415/3715] USB: Fix novation SourceControl XL after suspend commit b692056db8ecc7f452b934f016c17348282b7699 upstream. Currently, the SourceControl will stay in power-down mode after resuming from suspend. This patch resets the device after suspend to power it up. Signed-off-by: Richard Dodd Cc: stable Link: https://lore.kernel.org/r/20200212142220.36892-1-richard.o.dodd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 19e819aa2419..ad8307140df8 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -291,6 +291,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; From 37218ed6c9b268560000d38fd5da4013bfe8cf42 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 31 Jan 2020 10:39:26 -0500 Subject: [PATCH 3416/3715] USB: hub: Don't record a connect-change event during reset-resume commit 8099f58f1ecddf4f374f4828a3dff8397c7cbd74 upstream. Paul Zimmerman reports that his USB Bluetooth adapter sometimes crashes following system resume, when it receives a Get-Device-Descriptor request while it is busy doing something else. Such a request was added by commit a4f55d8b8c14 ("usb: hub: Check device descriptor before resusciation"). It gets sent when the hub driver's work thread checks whether a connect-change event on an enabled port really indicates a new device has been connected, as opposed to an old device momentarily disconnecting and then reconnecting (which can happen with xHCI host controllers, since they automatically enable connected ports). The same kind of thing occurs when a port's power session is lost during system suspend. When the system wakes up it sees a connect-change event on the port, and if the child device's persist_enabled flag was set then hub_activate() sets the device's reset_resume flag as well as the port's bit in hub->change_bits. The reset-resume code then takes responsibility for checking that the same device is still attached to the port, and it does this as part of the device's resume pathway. By the time the hub driver's work thread starts up again, the device has already been fully reinitialized and is busy doing its own thing. There's no need for the work thread to do the same check a second time, and in fact this unnecessary check is what caused the problem that Paul observed. Note that performing the unnecessary check is not actually a bug. Devices are supposed to be able to send descriptors back to the host even when they are busy doing something else. The underlying cause of Paul's problem lies in his Bluetooth adapter. Nevertheless, we shouldn't perform the same check twice in a row -- and as a nice side benefit, removing the extra check allows the Bluetooth adapter to work more reliably. The work thread performs its check when it sees that the port's bit is set in hub->change_bits. In this situation that bit is interpreted as though a connect-change event had occurred on the port _after_ the reset-resume, which is not what actually happened. One possible fix would be to make the reset-resume code clear the port's bit in hub->change_bits. But it seems simpler to just avoid setting the bit during hub_activate() in the first place. That's what this patch does. (Proving that the patch is correct when CONFIG_PM is disabled requires a little thought. In that setting hub_activate() will be called only for initialization and resets, since there won't be any resumes or reset-resumes. During initialization and hub resets the hub doesn't have any child devices, and so this code path never gets executed.) Reported-and-tested-by: Paul Zimmerman Signed-off-by: Alan Stern Link: https://marc.info/?t=157949360700001&r=1&w=2 CC: David Heinzelmann CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2001311037460.1577-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7d5ecf36a33c..1bc826285343 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1189,11 +1189,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif - /* Don't set the change_bits when the device - * was powered off. - */ - if (test_bit(port1, hub->power_bits)) - set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ From 937008f2e1c167ec0e6eaa946bf355b5bae0772b Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Thu, 6 Feb 2020 12:49:23 +0100 Subject: [PATCH 3417/3715] USB: hub: Fix the broken detection of USB3 device in SMSC hub commit 1208f9e1d758c991b0a46a1bd60c616b906bbe27 upstream. Renesas R-Car H3ULCB + Kingfisher Infotainment Board is either not able to detect the USB3.0 mass storage devices or is detecting those as USB2.0 high speed devices. The explanation given by Renesas is that, due to a HW issue, the XHCI driver does not wake up after going to sleep on connecting a USB3.0 device. In order to mitigate that, disable the auto-suspend feature specifically for SMSC hubs from hub_probe() function, as a quirk. Renesas Kingfisher Infotainment Board has two USB3.0 ports (CN2) which are connected via USB5534B 4-port SuperSpeed/Hi-Speed, low-power, configurable hub controller. [1] SanDisk USB 3.0 device detected as USB-2.0 before the patch [ 74.036390] usb 5-1.1: new high-speed USB device number 4 using xhci-hcd [ 74.061598] usb 5-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 74.069976] usb 5-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 74.077303] usb 5-1.1: Product: Ultra [ 74.080980] usb 5-1.1: Manufacturer: SanDisk [ 74.085263] usb 5-1.1: SerialNumber: 4C530001110208116550 [2] SanDisk USB 3.0 device detected as USB-3.0 after the patch [ 34.565078] usb 6-1.1: new SuperSpeed Gen 1 USB device number 3 using xhci-hcd [ 34.588719] usb 6-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 34.597098] usb 6-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 34.604430] usb 6-1.1: Product: Ultra [ 34.608110] usb 6-1.1: Manufacturer: SanDisk [ 34.612397] usb 6-1.1: SerialNumber: 4C530001110208116550 Suggested-by: Alan Stern Signed-off-by: Hardik Gajjar Acked-by: Alan Stern Tested-by: Eugeniu Rosca Cc: stable Link: https://lore.kernel.org/r/1580989763-32291-1-git-send-email-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 15 +++++++++++++++ drivers/usb/core/hub.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1bc826285343..ff1be6a6841b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -36,7 +36,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -1680,6 +1682,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1810,6 +1816,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -5288,6 +5299,10 @@ out_hdev_lock: } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 34c1a7e22aae..657bacfbe3a7 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -69,6 +69,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; From 2851358d8f2b2aa1c3302abf47b6f179c074e056 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:30 -0600 Subject: [PATCH 3418/3715] staging: rtl8188eu: Fix potential security hole commit 499c405b2b80bb3a04425ba3541d20305e014d3e upstream. In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: a2c60d42d97c ("staging: r8188eu: Add files for new driver - part 16") Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-2-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 446310775e90..8c9c5cd03e86 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2856,7 +2856,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } From b34152501205a893f8091a509cc876e9f4e3f4a0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:32 -0600 Subject: [PATCH 3419/3715] staging: rtl8188eu: Fix potential overuse of kernel memory commit 4ddf8ab8d15ddbc52eefb44eb64e38466ce1f70f upstream. In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-4-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 8c9c5cd03e86..184fc05a0f8b 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2051,7 +2051,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } From ad39c28f11943283429a290f216729af21e03c63 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:31 -0600 Subject: [PATCH 3420/3715] staging: rtl8723bs: Fix potential security hole commit ac33597c0c0d1d819dccfe001bcd0acef7107e7c upstream. In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-3-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 1b61da61690b..9dc4786fde75 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -4340,7 +4340,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } From df1c95e4c1217f09f7b7ac2eae718f331ed57089 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:33 -0600 Subject: [PATCH 3421/3715] staging: rtl8723bs: Fix potential overuse of kernel memory commit 23954cb078febfc63a755301fe77e06bccdb4d2a upstream. In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-5-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 9dc4786fde75..d51f6c452972 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3495,7 +3495,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } From fcbfe89dd4003645887e7e7a47905031fc457052 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Feb 2020 13:28:41 +0100 Subject: [PATCH 3422/3715] x86/mce/amd: Publish the bank pointer only after setup has succeeded commit 6e5cf31fbe651bed7ba1df768f2e123531132417 upstream. threshold_create_bank() creates a bank descriptor per MCA error thresholding counter which can be controlled over sysfs. It publishes the pointer to that bank in a per-CPU variable and then goes on to create additional thresholding blocks if the bank has such. However, that creation of additional blocks in allocate_threshold_blocks() can fail, leading to a use-after-free through the per-CPU pointer. Therefore, publish that pointer only after all blocks have been setup successfully. Fixes: 019f34fccfd5 ("x86, MCE, AMD: Move shared bank to node descriptor") Reported-by: Saar Amar Reported-by: Dan Carpenter Signed-off-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/20200128140846.phctkvx5btiexvbx@kili.mountain Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 33 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a8f47697276b..4fb28239e49c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1151,8 +1151,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1196,16 +1197,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1213,7 +1210,7 @@ recurse: if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1298,8 +1295,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1310,9 +1305,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); From 1a7fb626c92beee0dd0283e2036f3289e7109ae6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Feb 2020 19:01:34 +0100 Subject: [PATCH 3423/3715] x86/mce/amd: Fix kobject lifetime commit 51dede9c05df2b78acd6dcf6a17d21f0877d2d7b upstream. Accessing the MCA thresholding controls in sysfs concurrently with CPU hotplug can lead to a couple of KASAN-reported issues: BUG: KASAN: use-after-free in sysfs_file_ops+0x155/0x180 Read of size 8 at addr ffff888367578940 by task grep/4019 and BUG: KASAN: use-after-free in show_error_count+0x15c/0x180 Read of size 2 at addr ffff888368a05514 by task grep/4454 for example. Both result from the fact that the threshold block creation/teardown code frees the descriptor memory itself instead of defining proper ->release function and leaving it to the driver core to take care of that, after all sysfs accesses have completed. Do that and get rid of the custom freeing code, fixing the above UAFs in the process. [ bp: write commit message. ] Fixes: 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200214082801.13836-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 4fb28239e49c..bbe94b682119 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1116,9 +1116,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1320,8 +1323,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1331,13 +1338,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) From d5f2c5dd15051766b2ba8d141201d8c33a6257b8 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 10 Feb 2020 16:20:53 +0100 Subject: [PATCH 3424/3715] tty/serial: atmel: manage shutdown in case of RS485 or ISO7816 mode commit 04b5bfe3dc94e64d0590c54045815cb5183fb095 upstream. In atmel_shutdown() we call atmel_stop_rx() and atmel_stop_tx() functions. Prevent the rx restart that is implemented in RS485 or ISO7816 modes when calling atmel_stop_tx() by using the atomic information tasklet_shutdown that is already in place for this purpose. Fixes: 98f2082c3ac4 ("tty/serial: atmel: enforce tasklet init and termination sequences") Signed-off-by: Nicolas Ferre Cc: stable Link: https://lore.kernel.org/r/20200210152053.8289-1-nicolas.ferre@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 367ce812743e..a00227d312d3 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -498,7 +498,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } From 75ca0a8f124bf40d55947fdef395c800d03c6831 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Tue, 11 Feb 2020 14:16:01 +0800 Subject: [PATCH 3425/3715] tty: serial: imx: setup the correct sg entry for tx dma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f76707831829530ffdd3888bebc108aecefccaa0 upstream. There has oops as below happen on i.MX8MP EVK platform that has 6G bytes DDR memory. when (xmit->tail < xmit->head) && (xmit->head == 0), it setups one sg entry with sg->length is zero: sg_set_buf(sgl + 1, xmit->buf, xmit->head); if xmit->buf is allocated from >4G address space, and SDMA only support <4G address space, then dma_map_sg() will call swiotlb_map() to do bounce buffer copying and mapping. But swiotlb_map() don't allow sg entry's length is zero, otherwise report BUG_ON(). So the patch is to correct the tx DMA scatter list. Oops: [ 287.675715] kernel BUG at kernel/dma/swiotlb.c:497! [ 287.680592] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 287.686075] Modules linked in: [ 287.689133] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.3-00016-g3fdc4e0-dirty #10 [ 287.696872] Hardware name: FSL i.MX8MP EVK (DT) [ 287.701402] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 287.706199] pc : swiotlb_tbl_map_single+0x1fc/0x310 [ 287.711076] lr : swiotlb_map+0x60/0x148 [ 287.714909] sp : ffff800010003c00 [ 287.718221] x29: ffff800010003c00 x28: 0000000000000000 [ 287.723533] x27: 0000000000000040 x26: ffff800011ae0000 [ 287.728844] x25: ffff800011ae09f8 x24: 0000000000000000 [ 287.734155] x23: 00000001b7af9000 x22: 0000000000000000 [ 287.739465] x21: ffff000176409c10 x20: 00000000001f7ffe [ 287.744776] x19: ffff000176409c10 x18: 000000000000002e [ 287.750087] x17: 0000000000000000 x16: 0000000000000000 [ 287.755397] x15: 0000000000000000 x14: 0000000000000000 [ 287.760707] x13: ffff00017f334000 x12: 0000000000000001 [ 287.766018] x11: 00000000001fffff x10: 0000000000000000 [ 287.771328] x9 : 0000000000000003 x8 : 0000000000000000 [ 287.776638] x7 : 0000000000000000 x6 : 0000000000000000 [ 287.781949] x5 : 0000000000200000 x4 : 0000000000000000 [ 287.787259] x3 : 0000000000000001 x2 : 00000001b7af9000 [ 287.792570] x1 : 00000000fbfff000 x0 : 0000000000000000 [ 287.797881] Call trace: [ 287.800328] swiotlb_tbl_map_single+0x1fc/0x310 [ 287.804859] swiotlb_map+0x60/0x148 [ 287.808347] dma_direct_map_page+0xf0/0x130 [ 287.812530] dma_direct_map_sg+0x78/0xe0 [ 287.816453] imx_uart_dma_tx+0x134/0x2f8 [ 287.820374] imx_uart_dma_tx_callback+0xd8/0x168 [ 287.824992] vchan_complete+0x194/0x200 [ 287.828828] tasklet_action_common.isra.0+0x154/0x1a0 [ 287.833879] tasklet_action+0x24/0x30 [ 287.837540] __do_softirq+0x120/0x23c [ 287.841202] irq_exit+0xb8/0xd8 [ 287.844343] __handle_domain_irq+0x64/0xb8 [ 287.848438] gic_handle_irq+0x5c/0x148 [ 287.852185] el1_irq+0xb8/0x180 [ 287.855327] cpuidle_enter_state+0x84/0x360 [ 287.859508] cpuidle_enter+0x34/0x48 [ 287.863083] call_cpuidle+0x18/0x38 [ 287.866571] do_idle+0x1e0/0x280 [ 287.869798] cpu_startup_entry+0x20/0x40 [ 287.873721] rest_init+0xd4/0xe0 [ 287.876949] arch_call_rest_init+0xc/0x14 [ 287.880958] start_kernel+0x420/0x44c [ 287.884622] Code: 9124c021 9417aff8 a94363f7 17ffffd5 (d4210000) [ 287.890718] ---[ end trace 5bc44c4ab6b009ce ]--- [ 287.895334] Kernel panic - not syncing: Fatal exception in interrupt [ 287.901686] SMP: stopping secondary CPUs [ 288.905607] SMP: failed to stop secondary CPUs 0-1 [ 288.910395] Kernel Offset: disabled [ 288.913882] CPU features: 0x0002,2000200c [ 288.917888] Memory Limit: none [ 288.920944] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Reported-by: Eagle Zhou Tested-by: Eagle Zhou Signed-off-by: Fugang Duan Cc: stable Fixes: 7942f8577f2a ("serial: imx: TX DMA: clean up sg initialization") Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/1581401761-6378-1-git-send-email-fugang.duan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 969497599e88..630065b551f5 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -557,7 +557,7 @@ static void imx_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { From 0b86f288e79191a87733796fe161ca76bb391621 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Feb 2020 15:57:30 +0100 Subject: [PATCH 3426/3715] serdev: ttyport: restore client ops on deregistration commit 0c5aae59270fb1f827acce182786094c9ccf598e upstream. The serdev tty-port controller driver should reset the tty-port client operations also on deregistration to avoid a NULL-pointer dereference in case the port is later re-registered as a normal tty device. Note that this can only happen with tty drivers such as 8250 which have statically allocated port structures that can end up being reused and where a later registration would not register a serdev controller (e.g. due to registration errors or if the devicetree has been changed in between). Specifically, this can be an issue for any statically defined ports that would be registered by 8250 core when an 8250 driver is being unbound. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Cc: stable # 4.11 Reported-by: Loic Poulain Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200210145730.22762-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 6 ++---- drivers/tty/tty_port.c | 5 +++-- include/linux/tty.h | 2 ++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 69fc6d9ab490..88cf520da739 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -238,7 +238,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; int ret; @@ -257,7 +256,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; @@ -270,7 +268,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -285,8 +283,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index c93a33701d32..dd12c3b86eb4 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -51,10 +51,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -67,7 +68,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/include/linux/tty.h b/include/linux/tty.h index 0cd621d8c7f0..ead308e996c0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -224,6 +224,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ From 664817c8226582d2a16aa2a260fa4e4ab3b40a09 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 12 Feb 2020 18:04:33 +0200 Subject: [PATCH 3427/3715] MAINTAINERS: Update drm/i915 bug filing URL commit 96228b7df33f8eb9006f8ae96949400aed9bd303 upstream. We've moved from bugzilla to gitlab. Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200212160434.6437-1-jani.nikula@intel.com (cherry picked from commit 3a6a4f0810c8ade6f1ff63c34aa9834176b9d88b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 029f96c43250..e2dd302345c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6877,7 +6877,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel From 122f23f4bc5c7ea9b0f35cb1b04c9f7886eadb81 Mon Sep 17 00:00:00 2001 From: Ioanna Alifieraki Date: Thu, 20 Feb 2020 20:04:00 -0800 Subject: [PATCH 3428/3715] Revert "ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()" commit edf28f4061afe4c2d9eb1c3323d90e882c1d6800 upstream. This reverts commit a97955844807e327df11aa33869009d14d6b7de0. Commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") removes a lock that is needed. This leads to a process looping infinitely in exit_sem() and can also lead to a crash. There is a reproducer available in [1] and with the commit reverted the issue does not reproduce anymore. Using the reproducer found in [1] is fairly easy to reach a point where one of the child processes is looping infinitely in exit_sem between for(;;) and if (semid == -1) block, while it's trying to free its last sem_undo structure which has already been freed by freeary(). Each sem_undo struct is on two lists: one per semaphore set (list_id) and one per process (list_proc). The list_id list tracks undos by semaphore set, and the list_proc by process. Undo structures are removed either by freeary() or by exit_sem(). The freeary function is invoked when the user invokes a syscall to remove a semaphore set. During this operation freeary() traverses the list_id associated with the semaphore set and removes the undo structures from both the list_id and list_proc lists. For this case, exit_sem() is called at process exit. Each process contains a struct sem_undo_list (referred to as "ulp") which contains the head for the list_proc list. When the process exits, exit_sem() traverses this list to remove each sem_undo struct. As in freeary(), whenever a sem_undo struct is removed from list_proc, it is also removed from the list_id list. Removing elements from list_id is safe for both exit_sem() and freeary() due to sem_lock(). Removing elements from list_proc is not safe; freeary() locks &un->ulp->lock when it performs list_del_rcu(&un->list_proc) but exit_sem() does not (locking was removed by commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()"). This can result in the following situation while executing the reproducer [1] : Consider a child process in exit_sem() and the parent in freeary() (because of semctl(sid[i], NSEM, IPC_RMID)). - The list_proc for the child contains the last two undo structs A and B (the rest have been removed either by exit_sem() or freeary()). - The semid for A is 1 and semid for B is 2. - exit_sem() removes A and at the same time freeary() removes B. - Since A and B have different semid sem_lock() will acquire different locks for each process and both can proceed. The bug is that they remove A and B from the same list_proc at the same time because only freeary() acquires the ulp lock. When exit_sem() removes A it makes ulp->list_proc.next to point at B and at the same time freeary() removes B setting B->semid=-1. At the next iteration of for(;;) loop exit_sem() will try to remove B. The only way to break from for(;;) is for (&un->list_proc == &ulp->list_proc) to be true which is not. Then exit_sem() will check if B->semid=-1 which is and will continue looping in for(;;) until the memory for B is reallocated and the value at B->semid is changed. At that point, exit_sem() will crash attempting to unlink B from the lists (this can be easily triggered by running the reproducer [1] a second time). To prove this scenario instrumentation was added to keep information about each sem_undo (un) struct that is removed per process and per semaphore set (sma). CPU0 CPU1 [caller holds sem_lock(sma for A)] ... freeary() exit_sem() ... ... ... sem_lock(sma for B) spin_lock(A->ulp->lock) ... list_del_rcu(un_A->list_proc) list_del_rcu(un_B->list_proc) Undo structures A and B have different semid and sem_lock() operations proceed. However they belong to the same list_proc list and they are removed at the same time. This results into ulp->list_proc.next pointing to the address of B which is already removed. After reverting commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") the issue was no longer reproducible. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1694779 Link: http://lkml.kernel.org/r/20191211191318.11860-1-ioanna-maria.alifieraki@canonical.com Fixes: a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") Signed-off-by: Ioanna Alifieraki Acked-by: Manfred Spraul Acked-by: Herton R. Krzesinski Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Cc: Joel Fernandes (Google) Cc: Davidlohr Bueso Cc: Jay Vosburgh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/sem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index d6dd2dc9ddad..6adc245f3e02 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2248,11 +2248,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { From e3880aaacc5fb7b75af72286158d12c1c54d630e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Feb 2020 20:04:24 -0800 Subject: [PATCH 3429/3715] mm/vmscan.c: don't round up scan size for online memory cgroup commit 76073c646f5f4999d763f471df9e38a5a912d70d upstream. Commit 68600f623d69 ("mm: don't miss the last page because of round-off error") makes the scan size round up to @denominator regardless of the memory cgroup's state, online or offline. This affects the overall reclaiming behavior: the corresponding LRU list is eligible for reclaiming only when its size logically right shifted by @sc->priority is bigger than zero in the former formula. For example, the inactive anonymous LRU list should have at least 0x4000 pages to be eligible for reclaiming when we have 60/12 for swappiness/priority and without taking scan/rotation ratio into account. After the roundup is applied, the inactive anonymous LRU list becomes eligible for reclaiming when its size is bigger than or equal to 0x1000 in the same condition. (0x4000 >> 12) * 60 / (60 + 140 + 1) = 1 ((0x1000 >> 12) * 60) + 200) / (60 + 140 + 1) = 1 aarch64 has 512MB huge page size when the base page size is 64KB. The memory cgroup that has a huge page is always eligible for reclaiming in that case. The reclaiming is likely to stop after the huge page is reclaimed, meaing the further iteration on @sc->priority and the silbing and child memory cgroups will be skipped. The overall behaviour has been changed. This fixes the issue by applying the roundup to offlined memory cgroups only, to give more preference to reclaim memory from offlined memory cgroup. It sounds reasonable as those memory is unlikedly to be used by anyone. The issue was found by starting up 8 VMs on a Ampere Mustang machine, which has 8 CPUs and 16 GB memory. Each VM is given with 2 vCPUs and 2GB memory. It took 264 seconds for all VMs to be completely up and 784MB swap is consumed after that. With this patch applied, it took 236 seconds and 60MB swap to do same thing. So there is 10% performance improvement for my case. Note that KSM is disable while THP is enabled in the testing. total used free shared buff/cache available Mem: 16196 10065 2049 16 4081 3749 Swap: 8175 784 7391 total used free shared buff/cache available Mem: 16196 11324 3656 24 1215 2936 Swap: 8175 60 8115 Link: http://lkml.kernel.org/r/20200211024514.8730-1-gshan@redhat.com Fixes: 68600f623d69 ("mm: don't miss the last page because of round-off error") Signed-off-by: Gavin Shan Acked-by: Roman Gushchin Cc: [4.20+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0cc3c1eb15f5..c6962aa5ddb4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2369,10 +2369,13 @@ out: /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: From edae04b8f4632b19acf1f11345e7d22264f73a86 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2020 01:46:16 -0500 Subject: [PATCH 3430/3715] drm/amdgpu/soc15: fix xclk for raven commit c657b936ea98630ef5ba4f130ab1ad5c534d0165 upstream. It's 25 Mhz (refclk / 4). This fixes the interpretation of the rlc clock counter. Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ff7d4827385e..7a2366bd1fba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,7 +279,12 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } From f15a843b030022c4e54c6fb04a3102c5af8b0fba Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 14 Feb 2020 10:32:38 +0800 Subject: [PATCH 3431/3715] KVM: x86: don't notify userspace IOAPIC on edge-triggered interrupt EOI commit 7455a8327674e1a7c9a1f5dd1b0743ab6713f6d1 upstream. Commit 13db77347db1 ("KVM: x86: don't notify userspace IOAPIC on edge EOI") said, edge-triggered interrupts don't set a bit in TMR, which means that IOAPIC isn't notified on EOI. And var level indicates level-triggered interrupt. But commit 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") replace var level with irq.level by mistake. Fix it by changing irq.level to irq.trig_mode. Cc: stable@vger.kernel.org Fixes: 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/irq_comm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3cc3b2d130a0..4d000aea05e0 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -427,7 +427,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } From 69b2384bf875b59e85ac38abe6a535440706987a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:53 +0200 Subject: [PATCH 3432/3715] xhci: apply XHCI_PME_STUCK_QUIRK to Intel Comet Lake platforms commit a3ae87dce3a5abe0b57c811bab02b2564b574106 upstream. Intel Comet Lake based platform require the XHCI_PME_STUCK_QUIRK quirk as well. Without this xHC can not enter D3 in runtime suspend. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 42d368cb76ce..908496ed3254 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -53,6 +53,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -191,7 +192,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && From 153ad3d8fa86c9c6db330d90b7fe87922891ef1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2017 12:34:13 -0400 Subject: [PATCH 3433/3715] VT_RESIZEX: get rid of field-by-field copyin [ Upstream commit 1b3bce4d6bf839304a90951b4b25a5863533bf2a ] Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- drivers/tty/vt/vt_ioctl.c | 68 ++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 7b34b0ddbf0e..be7990548afe 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -847,58 +847,44 @@ int vt_ioctl(struct tty_struct *tty, case VT_RESIZEX: { - struct vt_consize __user *vtconsize = up; - ushort ll,cc,vlin,clin,vcol,ccol; + struct vt_consize v; if (!perm) return -EPERM; - if (!access_ok(VERIFY_READ, vtconsize, - sizeof(struct vt_consize))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&v, up, sizeof(struct vt_consize))) + return -EFAULT; /* FIXME: Should check the copies properly */ - __get_user(ll, &vtconsize->v_rows); - __get_user(cc, &vtconsize->v_cols); - __get_user(vlin, &vtconsize->v_vlin); - __get_user(clin, &vtconsize->v_clin); - __get_user(vcol, &vtconsize->v_vcol); - __get_user(ccol, &vtconsize->v_ccol); - vlin = vlin ? vlin : vc->vc_scan_lines; - if (clin) { - if (ll) { - if (ll != vlin/clin) { - /* Parameters don't add up */ - ret = -EINVAL; - break; - } - } else - ll = vlin/clin; + if (!v.v_vlin) + v.v_vlin = vc->vc_scan_lines; + if (v.v_clin) { + int rows = v.v_vlin/v.v_clin; + if (v.v_rows != rows) { + if (v.v_rows) /* Parameters don't add up */ + return -EINVAL; + v.v_rows = rows; + } } - if (vcol && ccol) { - if (cc) { - if (cc != vcol/ccol) { - ret = -EINVAL; - break; - } - } else - cc = vcol/ccol; + if (v.v_vcol && v.v_ccol) { + int cols = v.v_vcol/v.v_ccol; + if (v.v_cols != cols) { + if (v.v_cols) + return -EINVAL; + v.v_cols = cols; + } } - if (clin > 32) { - ret = -EINVAL; - break; - } - + if (v.v_clin > 32) + return -EINVAL; + for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons[i].d) continue; console_lock(); - if (vlin) - vc_cons[i].d->vc_scan_lines = vlin; - if (clin) - vc_cons[i].d->vc_font.height = clin; + if (v.v_vlin) + vc_cons[i].d->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vc_cons[i].d->vc_font.height = v.v_clin; vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, cc, ll); + vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); console_unlock(); } break; From 69931c044c9de837602cfd4bcfc28123ce4987e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2020 11:07:21 -0800 Subject: [PATCH 3434/3715] vt: vt_ioctl: fix race in VT_RESIZEX [ Upstream commit 6cd1ed50efd88261298577cd92a14f2768eddeeb ] We need to make sure vc_cons[i].d is not NULL after grabbing console_lock(), or risk a crash. general protection fault, probably for non-canonical address 0xdffffc0000000068: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000340-0x0000000000000347] CPU: 1 PID: 19462 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tty_ioctl+0xa37/0x14f0 drivers/tty/tty_io.c:2660 vfs_ioctl fs/ioctl.c:47 [inline] ksys_ioctl+0x123/0x180 fs/ioctl.c:763 __do_sys_ioctl fs/ioctl.c:772 [inline] __se_sys_ioctl fs/ioctl.c:770 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:770 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45b399 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f7d13c11c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f7d13c126d4 RCX: 000000000045b399 RDX: 0000000020000080 RSI: 000000000000560a RDI: 0000000000000003 RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000666 R14: 00000000004c7f04 R15: 000000000075bf2c Modules linked in: ---[ end trace 80970faf7a67eb77 ]--- RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: stable Reported-by: syzbot Link: https://lore.kernel.org/r/20200210190721.200418-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vt/vt_ioctl.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index be7990548afe..c320fefab360 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -876,15 +876,20 @@ int vt_ioctl(struct tty_struct *tty, return -EINVAL; for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (v.v_vlin) - vc_cons[i].d->vc_scan_lines = v.v_vlin; - if (v.v_clin) - vc_cons[i].d->vc_font.height = v.v_clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; From b34e5f9f94d6c05a2c170c235382bf602abf3d1a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Feb 2020 15:55:59 +0200 Subject: [PATCH 3435/3715] serial: 8250: Check UPF_IRQ_SHARED in advance [ Upstream commit 7febbcbc48fc92e3f33863b32ed715ba4aff18c4 ] The commit 54e53b2e8081 ("tty: serial: 8250: pass IRQ shared flag to UART ports") nicely explained the problem: ---8<---8<--- On some systems IRQ lines between multiple UARTs might be shared. If so, the irqflags have to be configured accordingly. The reason is: The 8250 port startup code performs IRQ tests *before* the IRQ handler for that particular port is registered. This is performed in serial8250_do_startup(). This function checks whether IRQF_SHARED is configured and only then disables the IRQ line while testing. This test is performed upon each open() of the UART device. Imagine two UARTs share the same IRQ line: On is already opened and the IRQ is active. When the second UART is opened, the IRQ line has to be disabled while performing IRQ tests. Otherwise an IRQ might handler might be invoked, but the IRQ itself cannot be handled, because the corresponding handler isn't registered, yet. That's because the 8250 code uses a chain-handler and invokes the corresponding port's IRQ handling routines himself. Unfortunately this IRQF_SHARED flag isn't configured for UARTs probed via device tree even if the IRQs are shared. This way, the actual and shared IRQ line isn't disabled while performing tests and the kernel correctly detects a spurious IRQ. So, adding this flag to the DT probe solves the issue. Note: The UPF_SHARE_IRQ flag is configured unconditionally. Therefore, the IRQF_SHARED flag can be set unconditionally as well. Example stack trace by performing `echo 1 > /dev/ttyS2` on a non-patched system: |irq 85: nobody cared (try booting with the "irqpoll" option) | [...] |handlers: |[] irq_default_primary_handler threaded [] serial8250_interrupt |Disabling IRQ #85 ---8<---8<--- But unfortunately didn't fix the root cause. Let's try again here by moving IRQ flag assignment from serial_link_irq_chain() to serial8250_do_startup(). This should fix the similar issue reported for 8250_pnp case. Since this change we don't need to have custom solutions in 8250_aspeed_vuart and 8250_of drivers, thus, drop them. Fixes: 1c2f04937b3e ("serial: 8250: add IRQ trigger support") Reported-by: Li RongQing Cc: Kurt Kanzenbach Cc: Vikram Pandita Signed-off-by: Andy Shevchenko Cc: stable Acked-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20200211135559.85960-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 1 - drivers/tty/serial/8250/8250_core.c | 5 ++--- drivers/tty/serial/8250/8250_port.c | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 33a801353114..0a89df390f24 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -256,7 +256,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; port.port.uartclk = clk; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index c698ebab6d3b..5017a0f46b82 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -181,7 +181,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -216,9 +216,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index a73d2bc4b685..90a93c001e16 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2258,6 +2258,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* From 719742a77e4733e99ea066363f4487fa7513bc5d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 20 Feb 2020 20:04:30 -0800 Subject: [PATCH 3436/3715] lib/stackdepot.c: fix global out-of-bounds in stack_slabs [ Upstream commit 305e519ce48e935702c32241f07d393c3c8fed3e ] Walter Wu has reported a potential case in which init_stack_slab() is called after stack_slabs[STACK_ALLOC_MAX_SLABS - 1] has already been initialized. In that case init_stack_slab() will overwrite stack_slabs[STACK_ALLOC_MAX_SLABS], which may result in a memory corruption. Link: http://lkml.kernel.org/r/20200218102950.260263-1-glider@google.com Fixes: cd11016e5f521 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Alexander Potapenko Reported-by: Walter Wu Cc: Dmitry Vyukov Cc: Matthias Brugger Cc: Thomas Gleixner Cc: Josh Poimboeuf Cc: Kate Stewart Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/stackdepot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f87d138e9672..759ff419fe61 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in depot_save_stack(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } From ee1238c0285f40d8297e70f3aaa0d8a981ea0b53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Feb 2020 15:26:29 -0800 Subject: [PATCH 3437/3715] KVM: nVMX: Don't emulate instructions in guest mode [ Upstream commit 07721feee46b4b248402133228235318199b05ec ] vmx_check_intercept is not yet fully implemented. To avoid emulating instructions disallowed by the L1 hypervisor, refuse to emulate instructions by default. Cc: stable@vger.kernel.org [Made commit, added commit msg - Oliver] Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 86037cc11419..64837f07d955 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12340,7 +12340,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, } /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 From d63d922c9717df41e853df8ebb1756eea65eff1e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 7 Feb 2020 09:29:11 -0500 Subject: [PATCH 3438/3715] ext4: fix a data race in EXT4_I(inode)->i_disksize commit 35df4299a6487f323b0aca120ea3f485dfee2ae3 upstream. EXT4_I(inode)->i_disksize could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in ext4_write_end [ext4] / ext4_writepages [ext4] write to 0xffff91c6713b00f8 of 8 bytes by task 49268 on cpu 127: ext4_write_end+0x4e3/0x750 [ext4] ext4_update_i_disksize at fs/ext4/ext4.h:3032 (inlined by) ext4_update_inode_size at fs/ext4/ext4.h:3046 (inlined by) ext4_write_end at fs/ext4/inode.c:1287 generic_perform_write+0x208/0x2a0 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffff91c6713b00f8 of 8 bytes by task 24872 on cpu 37: ext4_writepages+0x10ac/0x1d00 [ext4] mpage_map_and_submit_extent at fs/ext4/inode.c:2468 (inlined by) ext4_writepages at fs/ext4/inode.c:2772 do_writepages+0x5e/0x130 __writeback_single_inode+0xeb/0xb20 writeback_sb_inodes+0x429/0x900 __writeback_inodes_wb+0xc4/0x150 wb_writeback+0x4bd/0x870 wb_workfn+0x6b4/0x960 process_one_work+0x54c/0xbe0 worker_thread+0x80/0x650 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 Reported by Kernel Concurrency Sanitizer on: CPU: 37 PID: 24872 Comm: kworker/u261:2 Tainted: G W O L 5.5.0-next-20200204+ #5 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Workqueue: writeback wb_workfn (flush-7:0) Since only the read is operating as lockless (outside of the "i_data_sem"), load tearing could introduce a logic bug. Fix it by adding READ_ONCE() for the read and WRITE_ONCE() for the write. Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/1581085751-31793-1-git-send-email-cai@lca.pw Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fcee1f9c7fe3..f88be401befb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2867,7 +2867,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 57118ba82929..8bfd1ea28234 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2564,7 +2564,7 @@ update_disksize: * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; From 6538b08e720d93a92c2dcb1215e761db248931c9 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sat, 15 Feb 2020 03:02:06 -0500 Subject: [PATCH 3439/3715] ext4: add cond_resched() to __ext4_find_entry() commit 9424ef56e13a1f14c57ea161eed3ecfdc7b2770e upstream. We tested a soft lockup problem in linux 4.19 which could also be found in linux 5.x. When dir inode takes up a large number of blocks, and if the directory is growing when we are searching, it's possible the restart branch could be called many times, and the do while loop could hold cpu a long time. Here is the call trace in linux 4.19. [ 473.756186] Call trace: [ 473.756196] dump_backtrace+0x0/0x198 [ 473.756199] show_stack+0x24/0x30 [ 473.756205] dump_stack+0xa4/0xcc [ 473.756210] watchdog_timer_fn+0x300/0x3e8 [ 473.756215] __hrtimer_run_queues+0x114/0x358 [ 473.756217] hrtimer_interrupt+0x104/0x2d8 [ 473.756222] arch_timer_handler_virt+0x38/0x58 [ 473.756226] handle_percpu_devid_irq+0x90/0x248 [ 473.756231] generic_handle_irq+0x34/0x50 [ 473.756234] __handle_domain_irq+0x68/0xc0 [ 473.756236] gic_handle_irq+0x6c/0x150 [ 473.756238] el1_irq+0xb8/0x140 [ 473.756286] ext4_es_lookup_extent+0xdc/0x258 [ext4] [ 473.756310] ext4_map_blocks+0x64/0x5c0 [ext4] [ 473.756333] ext4_getblk+0x6c/0x1d0 [ext4] [ 473.756356] ext4_bread_batch+0x7c/0x1f8 [ext4] [ 473.756379] ext4_find_entry+0x124/0x3f8 [ext4] [ 473.756402] ext4_lookup+0x8c/0x258 [ext4] [ 473.756407] __lookup_hash+0x8c/0xe8 [ 473.756411] filename_create+0xa0/0x170 [ 473.756413] do_mkdirat+0x6c/0x140 [ 473.756415] __arm64_sys_mkdirat+0x28/0x38 [ 473.756419] el0_svc_common+0x78/0x130 [ 473.756421] el0_svc_handler+0x38/0x78 [ 473.756423] el0_svc+0x8/0xc [ 485.755156] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [tmp:5149] Add cond_resched() to avoid soft lockup and to provide a better system responding. Link: https://lore.kernel.org/r/20200215080206.13293-1-luoshijie1@huawei.com Signed-off-by: Shijie Luo Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a7b7e0783eed..3f7b3836166c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1430,6 +1430,7 @@ restart: /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; From 77ed838c16339be8ecb98e0da9b50b892820336c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2020 11:08:35 +0100 Subject: [PATCH 3440/3715] ext4: fix mount failure with quota configured as module commit 9db176bceb5c5df4990486709da386edadc6bd1d upstream. When CONFIG_QFMT_V2 is configured as a module, the test in ext4_feature_set_ok() fails and so mount of filesystems with quota or project features fails. Fix the test to use IS_ENABLED macro which works properly even for modules. Link: https://lore.kernel.org/r/20200221100835.9332-1-jack@suse.cz Fixes: d65d87a07476 ("ext4: improve explanation of a mount failure caused by a misconfigured kernel") Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5b9e7377f26e..8ce6b741cc5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2863,7 +2863,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, From a3294abaf9006db9ead394064952763d2e552bec Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:46 -0800 Subject: [PATCH 3441/3715] ext4: rename s_journal_flag_rwsem to s_writepages_rwsem commit bbd55937de8f2754adc5792b0f8e5ff7d9c0420e upstream. In preparation for making s_journal_flag_rwsem synchronize ext4_writepages() with changes to both the EXTENTS and JOURNAL_DATA flags (rather than just JOURNAL_DATA as it does currently), rename it to s_writepages_rwsem. Link: https://lore.kernel.org/r/20200219183047.47417-2-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 10 +++++----- fs/ext4/super.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f88be401befb..1bffa484d8d1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1533,7 +1533,7 @@ struct ext4_sb_info { struct ratelimit_state s_msg_ratelimit_state; /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8bfd1ea28234..1e2edebd0929 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2744,7 +2744,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); if (dax_mapping(mapping)) { @@ -2974,7 +2974,7 @@ unplug: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -6050,7 +6050,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -6067,7 +6067,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -6076,7 +6076,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ce6b741cc5a..09b443709bca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -939,7 +939,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -4396,7 +4396,7 @@ no_journal: err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4490,7 +4490,7 @@ failed_mount6: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); From bcc1eab71a67c46b9e24544ac7923f44444174ce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:47 -0800 Subject: [PATCH 3442/3715] ext4: fix race between writepages and enabling EXT4_EXTENTS_FL commit cb85f4d23f794e24127f3e562cb3b54b0803f456 upstream. If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running on it, the following warning in ext4_add_complete_io() can be hit: WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120 Here's a minimal reproducer (not 100% reliable) (root isn't required): while true; do sync done & while true; do rm -f file touch file chattr -e file echo X >> file chattr +e file done The problem is that in ext4_writepages(), ext4_should_dioread_nolock() (which only returns true on extent-based files) is checked once to set the number of reserved journal credits, and also again later to select the flags for ext4_map_blocks() and copy the reserved journal handle to ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set, the first check can see dioread_nolock disabled while the later one can see it enabled, causing the reserved handle to unexpectedly be NULL. Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races related to doing so as well, fix this by synchronizing changing EXT4_EXTENTS_FL with ext4_writepages() via the existing s_writepages_rwsem (previously called s_journal_flag_rwsem). This was originally reported by syzbot without a reproducer at https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf, but now that dioread_nolock is the default I also started seeing this when running syzkaller locally. Link: https://lore.kernel.org/r/20200219183047.47417-3-ebiggers@kernel.org Reported-by: syzbot+2202a584a00fffd19fbf@syzkaller.appspotmail.com Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 5 ++++- fs/ext4/migrate.c | 27 +++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1bffa484d8d1..b162f602c430 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1532,7 +1532,10 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 78d45c7d3fa7..0d785868cc50 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -602,10 +605,11 @@ err_out: /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -615,7 +619,8 @@ out: int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } From 02a67798862f1c848e91859a7f4b291bcfdcf563 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:30 -0800 Subject: [PATCH 3443/3715] KVM: nVMX: Refactor IO bitmap checks into helper function commit e71237d3ff1abf9f3388337cfebf53b96df2020d upstream. Checks against the IO bitmap are useful for both instruction emulation and VM-exit reflection. Refactor the IO bitmap checks into a helper function. Signed-off-by: Oliver Upton Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64837f07d955..1c0c0e87f7e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4991,6 +4991,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) (ss.selector & SEGMENT_RPL_MASK)); } +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, + unsigned int port, int size); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned int port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Check if guest state is valid. Returns true if valid, false if * not. @@ -8521,23 +8541,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; From 9effa0ed199c7a02ad4cfe50fcec772645664b18 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:31 -0800 Subject: [PATCH 3444/3715] KVM: nVMX: Check IO instruction VM-exit conditions commit 35a571346a94fb93b5b3b6a599675ef3384bc75c upstream. Consult the 'unconditional IO exiting' and 'use IO bitmaps' VM-execution controls when checking instruction interception. If the 'use IO bitmaps' VM-execution control is 1, check the instruction access against the IO bitmaps to determine if the instruction causes a VM-exit. Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1c0c0e87f7e6..b5fa8fc0014d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4997,7 +4997,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { unsigned long exit_qualification; - unsigned int port; + unsigned short port; int size; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) @@ -12335,6 +12335,39 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -12342,18 +12375,30 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + switch (info->intercept) { /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); /* TODO: check more intercepts... */ + default: + break; + } + return X86EMUL_UNHANDLEABLE; } From e6bff0cdf51c26b6216f45f88e39aa3eb16b2115 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 20 Feb 2020 18:22:05 +0100 Subject: [PATCH 3445/3715] KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1 commit 91a5f413af596ad01097e59bf487eb07cb3f1331 upstream. Even when APICv is disabled for L1 it can (and, actually, is) still available for L2, this means we need to always call vmx_deliver_nested_posted_interrupt() when attempting an interrupt delivery. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 5 +---- arch/x86/kvm/svm.c | 7 ++++++- arch/x86/kvm/vmx.c | 13 +++++++++---- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0e17813a9b0..2cdf654ed132 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1006,7 +1006,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8715711f2755..d8c3fa015432 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -993,11 +993,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 52edb8cf1c40..8e65a9b40c18 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4631,8 +4631,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -4641,6 +4644,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b5fa8fc0014d..acf72da288f9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5541,24 +5541,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* From 647bdd69c205143bbbd77c1053f6baee1455f434 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 21 Feb 2020 22:04:46 +0800 Subject: [PATCH 3446/3715] KVM: apic: avoid calculating pending eoi from an uninitialized val commit 23520b2def95205f132e167cf5b25c609975e959 upstream. When pv_eoi_get_user() fails, 'val' may remain uninitialized and the return value of pv_eoi_get_pending() becomes random. Fix the issue by initializing the variable. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Miaohe Lin Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d8c3fa015432..537c36b55b5d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -566,9 +566,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { apic_debug("Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } From 9b32172a1bc80715309cd28e624dc2babd0ed48d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:31 -0500 Subject: [PATCH 3447/3715] btrfs: fix bytes_may_use underflow in prealloc error condtition commit b778cf962d71a0e737923d55d0432f3bd287258e upstream. I hit the following warning while running my error injection stress testing: WARNING: CPU: 3 PID: 1453 at fs/btrfs/space-info.h:108 btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] RIP: 0010:btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] Call Trace: btrfs_free_reserved_data_space+0x4f/0x70 [btrfs] __btrfs_prealloc_file_range+0x378/0x470 [btrfs] elfcorehdr_read+0x40/0x40 ? elfcorehdr_read+0x40/0x40 ? btrfs_commit_transaction+0xca/0xa50 [btrfs] ? dput+0xb4/0x2a0 ? btrfs_log_dentry_safe+0x55/0x70 [btrfs] ? btrfs_sync_file+0x30e/0x420 [btrfs] ? do_fsync+0x38/0x70 ? __x64_sys_fdatasync+0x13/0x20 ? do_syscall_64+0x5b/0x1b0 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens if we fail to insert our reserved file extent. At this point we've already converted our reservation from ->bytes_may_use to ->bytes_reserved. However once we break we will attempt to free everything from [cur_offset, end] from ->bytes_may_use, but our extent reservation will overlap part of this. Fix this problem by adding ins.offset (our extent allocation size) to cur_offset so we remove the actual remaining part from ->bytes_may_use. I validated this fix using my inject-error.py script python inject-error.py -o should_fail_bio -t cache_save_setup -t \ __btrfs_prealloc_file_range \ -t insert_reserved_file_extent.constprop.0 \ -r "-5" ./run-fsstress.sh where run-fsstress.sh simply mounts and runs fsstress on a disk. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index abecc4724a3b..2a196bb134d9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10639,6 +10639,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -10673,6 +10674,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -10753,9 +10763,9 @@ next: if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } From b3df7d2f336b69725ffc98096092ab6b2f281c1e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:29 -0500 Subject: [PATCH 3448/3715] btrfs: do not check delayed items are empty for single transaction cleanup commit 1e90315149f3fe148e114a5de86f0196d1c21fa5 upstream. btrfs_assert_delayed_root_empty() will check if the delayed root is completely empty, but this is a filesystem-wide check. On cleanup we may have allowed other transactions to begin, for whatever reason, and thus the delayed root is not empty. So remove this check from cleanup_one_transation(). This however can stay in btrfs_cleanup_transaction(), because it checks only after all of the transactions have been properly cleaned up, and thus is valid. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8ab9c5a8b7d..6b4fee5c79f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4394,7 +4394,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); From 7d08e0e026a9e3e2be99e18aaccf215aa37d34b1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 13 Feb 2020 12:29:50 +0000 Subject: [PATCH 3449/3715] Btrfs: fix btrfs_wait_ordered_range() so that it waits for all ordered extents commit e75fd33b3f744f644061a4f9662bd63f5434f806 upstream. In btrfs_wait_ordered_range() once we find an ordered extent that has finished with an error we exit the loop and don't wait for any other ordered extents that might be still in progress. All the users of btrfs_wait_ordered_range() expect that there are no more ordered extents in progress after that function returns. So past fixes such like the ones from the two following commits: ff612ba7849964 ("btrfs: fix panic during relocation after ENOSPC before writeback happens") 28aeeac1dd3080 ("Btrfs: fix panic when starting bg cache writeout after IO error") don't work when there are multiple ordered extents in the range. Fix that by making btrfs_wait_ordered_range() wait for all ordered extents even after it finds one that had an error. Link: https://github.com/kdave/btrfs-progs/issues/228#issuecomment-569777554 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ordered-data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a3aca495e33e..d2287ea9fc50 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -838,10 +838,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } From b982e7bc19217e2b7ab97533cbfb11331769ab63 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:08:59 -0800 Subject: [PATCH 3450/3715] scsi: Revert "RDMA/isert: Fix a recently introduced regression related to logout" commit 76261ada16dcc3be610396a46d35acc3efbda682 upstream. Since commit 04060db41178 introduces soft lockups when toggling network interfaces, revert it. Link: https://marc.info/?l=target-devel&m=158157054906196 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: 04060db41178 ("scsi: RDMA/isert: Fix a recently introduced regression related to logout") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ++++++++++++ drivers/target/iscsi/iscsi_target.c | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9b5691f306a2..ee3f630c9217 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2582,6 +2582,17 @@ isert_wait4logout(struct isert_conn *isert_conn) } } +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2629,6 +2640,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 37d64acea5e1..21ce92ee1652 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4155,6 +4155,9 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4240,9 +4243,6 @@ int iscsit_close_connection( target_sess_cmd_list_set_waiting(sess->se_sess); target_wait_for_sess_cmds(sess->se_sess); - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); - ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { struct crypto_ahash *tfm; From 6b0cc7a954ab2774da34b6954f73ea9888458ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:09:00 -0800 Subject: [PATCH 3451/3715] scsi: Revert "target: iscsi: Wait for all commands to finish before freeing a session" commit 807b9515b7d044cf77df31f1af9d842a76ecd5cb upstream. Since commit e9d3009cb936 introduced a regression and since the fix for that regression was not perfect, revert this commit. Link: https://marc.info/?l=target-devel&m=158157054906195 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 10 ++-------- include/scsi/iscsi_proto.h | 1 - 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 21ce92ee1652..fb7bd422e2e1 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1158,9 +1158,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2006,9 +2004,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4240,8 +4236,6 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); - target_sess_cmd_list_set_waiting(sess->se_sess); - target_wait_for_sess_cmds(sess->se_sess); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index f0a01a54bd15..df156f1d50b2 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -638,7 +638,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 From 220d4966d22dc505688abb8664cbd2834b1a79eb Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:35 -0800 Subject: [PATCH 3452/3715] usb: gadget: composite: Fix bMaxPower for SuperSpeedPlus commit c724417baf162bd3e035659e22cdf990cfb0d917 upstream. SuperSpeedPlus peripherals must report their bMaxPower of the configuration descriptor in units of 8mA as per the USB 3.2 specification. The current switch statement in encode_bMaxPower() only checks for USB_SPEED_SUPER but not USB_SPEED_SUPER_PLUS so the latter falls back to USB 2.0 encoding which uses 2mA units. Replace the switch with a simple if/else. Fixes: eae5820b852f ("usb: gadget: composite: Write SuperSpeedPlus config descriptors") Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index b29cd3979391..6e30b177aa22 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -440,12 +440,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - return DIV_ROUND_UP(val, 8); - default: + if (speed < USB_SPEED_SUPER) return DIV_ROUND_UP(val, 2); - } + else + return DIV_ROUND_UP(val, 8); } static int config_buf(struct usb_configuration *config, From edd606c03aeaa06cff00ca9b80f6efdff71f2f6c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 26 Jan 2020 22:05:49 +0000 Subject: [PATCH 3453/3715] staging: rtl8723bs: fix copy of overlapping memory commit 8ae9a588ca35eb9c32dc03299c5e1f4a1e9a9617 upstream. Currently the rtw_sprintf prints the contents of thread_name onto thread_name and this can lead to a potential copy of a string over itself. Avoid this by printing the literal string RTWHALXT instread of the contents of thread_name. Addresses-Coverity: ("copy of overlapping memory") Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Colin Ian King Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200126220549.9849-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index d0b317077511..f92f9073c507 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -486,14 +486,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); From f39b1f511ef3febde59f51a49e9f3fb58c06202a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Feb 2020 15:32:17 +0300 Subject: [PATCH 3454/3715] staging: greybus: use after free in gb_audio_manager_remove_all() commit b7db58105b80fa9232719c8329b995b3addfab55 upstream. When we call kobject_put() and it's the last reference to the kobject then it calls gb_audio_module_release() and frees module. We dereference "module" on the next line which is a use after free. Fixes: c77f85bbc91a ("greybus: audio: Fix incorrect counting of 'ida'") Signed-off-by: Dan Carpenter Acked-by: Viresh Kumar Reviewed-by: Vaibhav Agarwal Link: https://lore.kernel.org/r/20200205123217.jreendkyxulqsool@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index aa6508b44fab..ed7c32542cb3 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); From 591f3bc646edf4622f86f9266e4e215bde32538b Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Fri, 14 Feb 2020 12:21:01 -0600 Subject: [PATCH 3455/3715] ecryptfs: replace BUG_ON with error handling code commit 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72 upstream. In crypt_scatterlist, if the crypt_stat argument is not set up correctly, the kernel crashes. Instead, by returning an error code upstream, the error is handled safely. The issue is detected via a static analysis tool written by us. Fixes: 237fead619984 (ecryptfs: fs/Makefile and fs/Kconfig) Signed-off-by: Aditya Pakki Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bd25ab837011..eed38ae86c6c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); From 837f007098b2521b8b10ced2fdf07fd1d0af66f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 10 Feb 2020 10:36:56 +0100 Subject: [PATCH 3456/3715] iommu/vt-d: Fix compile warning from intel-svm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7598fac323aad0e502415edeffd567315994dd6 upstream. The intel_svm_is_pasid_valid() needs to be marked inline, otherwise it causes the compile warning below: CC [M] drivers/dma/idxd/cdev.o In file included from drivers/dma/idxd/cdev.c:9:0: ./include/linux/intel-svm.h:125:12: warning: ‘intel_svm_is_pasid_valid’ defined but not used [-Wunused-function] static int intel_svm_is_pasid_valid(struct device *dev, int pasid) ^~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Borislav Petkov Fixes: 15060aba71711 ('iommu/vt-d: Helper function to query if a pasid has any active users') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..733eaf95e207 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -130,7 +130,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } From d3daa3edcf879828fe6767f71b00fc44e24bdd6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 12 Feb 2020 12:19:41 +0100 Subject: [PATCH 3457/3715] genirq/proc: Reject invalid affinity masks (again) commit cba6437a1854fde5934098ec3bd0ee83af3129f5 upstream. Qian Cai reported that the WARN_ON() in the x86/msi affinity setting code, which catches cases where the affinity setting is not done on the CPU which is the current target of the interrupt, triggers during CPU hotplug stress testing. It turns out that the warning which was added with the commit addressing the MSI affinity race unearthed yet another long standing bug. If user space writes a bogus affinity mask, i.e. it contains no online CPUs, then it calls irq_select_affinity_usr(). This was introduced for ALPHA in eee45269b0f5 ("[PATCH] Alpha: convert to generic irq framework (generic part)") and subsequently made available for all architectures in 18404756765c ("genirq: Expose default irq affinity mask (take 3)") which introduced the circumvention of the affinity setting restrictions for interrupt which cannot be moved in process context. The whole exercise is bogus in various aspects: 1) If the interrupt is already started up then there is absolutely no point to honour a bogus interrupt affinity setting from user space. The interrupt is already assigned to an online CPU and it does not make any sense to reassign it to some other randomly chosen online CPU. 2) If the interupt is not yet started up then there is no point either. A subsequent startup of the interrupt will invoke irq_setup_affinity() anyway which will chose a valid target CPU. So the only correct solution is to just return -EINVAL in case user space wrote an affinity mask which does not contain any online CPUs, except for ALPHA which has it's own magic sauce for this. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Reported-by: Qian Cai Signed-off-by: Thomas Gleixner Tested-by: Qian Cai Link: https://lkml.kernel.org/r/878sl8xdbm.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/irq/internals.h | 2 -- kernel/irq/manage.c | 18 ++---------------- kernel/irq/proc.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4ef7f3b820ce..5230c47fc43e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -119,8 +119,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c86a3e45110..037e8fc1b008 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -382,23 +382,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c010cc0daf79..b031db9d56c6 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -117,6 +117,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) From 720c4bc2245c6e48644d21e3c2a4773054758e96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:16 +0100 Subject: [PATCH 3458/3715] ALSA: rawmidi: Avoid bit fields for state flags commit dfa9a5efe8b932a84b3b319250aa3ac60c20f876 upstream. The rawmidi state flags (opened, append, active_sensing) are stored in bit fields that can be potentially racy when concurrently accessed without any locks. Although the current code should be fine, there is also no any real benefit by keeping the bitfields for this kind of short number of members. This patch changes those bit fields flags to the simple bool fields. There should be no size increase of the snd_rawmidi_substream by this change. Reported-by: syzbot+576cc007eb9f2c968200@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-4-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/rawmidi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 6665cb29e1a2..c2a71fd8dfaf 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -92,9 +92,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; From 29238bccf63b8339a2b65bcbecb07c142f1d7073 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:14 +0100 Subject: [PATCH 3459/3715] ALSA: seq: Avoid concurrent access to queue flags commit bb51e669fa49feb5904f452b2991b240ef31bc97 upstream. The queue flags are represented in bit fields and the concurrent access may result in unexpected results. Although the current code should be mostly OK as it's only reading a field while writing other fields as KCSAN reported, it's safer to cover both with a proper spinlock protection. This patch fixes the possible concurrent read by protecting with q->owner_lock. Also the queue owner field is protected as well since it's the field to be protected by the lock itself. Reported-by: syzbot+65c6c92d04304d0a8efc@syzkaller.appspotmail.com Reported-by: syzbot+e60ddfa48717579799dd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_queue.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 1a6dc4ff44a6..a9a0b2f2708e 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -415,6 +415,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -424,8 +425,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -564,15 +567,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -764,6 +769,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -775,9 +782,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); From c33c14e30f3437d419761048f70dd88b7ec797c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:15 +0100 Subject: [PATCH 3460/3715] ALSA: seq: Fix concurrent access to queue current tick/time commit dc7497795e014d84699c3b8809ed6df35352dd74 upstream. snd_seq_check_queue() passes the current tick and time of the given queue as a pointer to snd_seq_prioq_cell_out(), but those might be updated concurrently by the seq timer update. Fix it by retrieving the current tick and time via the proper helper functions at first, and pass those values to snd_seq_prioq_cell_out() later in the loops. snd_seq_timer_get_cur_time() takes a new argument and adjusts with the current system time only when it's requested so; this update isn't needed for snd_seq_check_queue(), as it's called either from the interrupt handler or right after queuing. Also, snd_seq_timer_get_cur_tick() is changed to read the value in the spinlock for the concurrency, too. Reported-by: syzbot+fd5e0eaa1a32999173b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 4 ++-- sound/core/seq/seq_queue.c | 9 ++++++--- sound/core/seq/seq_timer.c | 13 ++++++++++--- sound/core/seq/seq_timer.h | 3 ++- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 92b0d4523a07..6fe93d5f6f71 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index a9a0b2f2708e..ea1aa0796276 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 0e1feb597586..bd5e5a5d52a8 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 9506b661fe5b..5d47d559465e 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; From a86265edeb3314f9c3270a5bf18b4e72ebc65beb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 20:30:53 -0800 Subject: [PATCH 3461/3715] netfilter: xt_hashlimit: limit the max size of hashtable commit 8d0015a7ab76b8b1e89a3e5f5710a6e5103f2dd5 upstream. The user-specified hashtable size is unbound, this could easily lead to an OOM or a hung task as we hold the global mutex while allocating and initializing the new hashtable. Add a max value to cap both cfg->size and cfg->max, as suggested by Florian. Reported-and-tested-by: syzbot+adf6c6c2be1c3a718121@syzkaller.appspotmail.com Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_hashlimit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index fe8e8a1622b5..186f97f1c6c0 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -845,6 +845,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -855,6 +857,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; From 8b6934200c2469c0726a709f93ad108573550c80 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Sat, 25 Jan 2020 03:37:29 +0000 Subject: [PATCH 3462/3715] ata: ahci: Add shutdown to freeze hardware resources of ahci commit 10a663a1b15134a5a714aa515e11425a44d4fdf7 upstream. device_shutdown() called from reboot or power_shutdown expect all devices to be shutdown. Same is true for even ahci pci driver. As no ahci shutdown function is implemented, the ata subsystem always remains alive with DMA & interrupt support. File system related calls should not be honored after device_shutdown(). So defining ahci pci driver shutdown to freeze hardware (mask interrupt, stop DMA engine and free DMA resources). Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 7 +++++++ drivers/ata/libata-core.c | 21 +++++++++++++++++++++ include/linux/libata.h | 1 + 3 files changed, 29 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index f003e301723a..0905c07b8c7e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -88,6 +88,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -586,6 +587,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1823,6 +1825,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 08f67c109429..33eb5e342a7a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6706,6 +6706,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7326,6 +7346,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/include/linux/libata.h b/include/linux/libata.h index c5188dc389c8..93838d98e3f3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1229,6 +1229,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM From f02017e021758c6dc3da591cac78ed512813dbb1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Feb 2020 18:30:26 +0100 Subject: [PATCH 3463/3715] xen: Enable interrupts when calling _cond_resched() commit 8645e56a4ad6dcbf504872db7f14a2f67db88ef2 upstream. xen_maybe_preempt_hcall() is called from the exception entry point xen_do_hypervisor_callback with interrupts disabled. _cond_resched() evades the might_sleep() check in cond_resched() which would have caught that and schedule_debug() unfortunately lacks a check for irqs_disabled(). Enable interrupts around the call and use cond_resched() to catch future issues. Fixes: fdfd811ddde3 ("x86/xen: allow privcmd hypercalls to be preempted") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/878skypjrh.fsf@nanos.tec.linutronix.de Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- drivers/xen/preempt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 08cb419eb4e6..5f6b77ea34fb 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } From 2fc1b71474b926b1671c96265c6b9637f2b607f0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 13 Feb 2020 23:42:07 -0700 Subject: [PATCH 3464/3715] s390/mm: Explicitly compare PAGE_DEFAULT_KEY against zero in storage_key_init_range commit 380324734956c64cd060e1db4304f3117ac15809 upstream. Clang warns: In file included from ../arch/s390/purgatory/purgatory.c:10: In file included from ../include/linux/kexec.h:18: In file included from ../include/linux/crash_core.h:6: In file included from ../include/linux/elfcore.h:5: In file included from ../include/linux/user.h:1: In file included from ../arch/s390/include/asm/user.h:11: ../arch/s390/include/asm/page.h:45:6: warning: converting the result of '<<' to a boolean always evaluates to false [-Wtautological-constant-compare] if (PAGE_DEFAULT_KEY) ^ ../arch/s390/include/asm/page.h:23:44: note: expanded from macro 'PAGE_DEFAULT_KEY' #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) ^ 1 warning generated. Explicitly compare this against zero to silence the warning as it is intended to be used in a boolean context. Fixes: de3fa841e429 ("s390/mm: fix compile for PAGE_DEFAULT_KEY != 0") Link: https://github.com/ClangBuiltLinux/linux/issues/860 Link: https://lkml.kernel.org/r/20200214064207.10381-1-natechancellor@gmail.com Acked-by: Christian Borntraeger Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 779c589b7089..5f2e272895ff 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } From 78d697fc93f98054e36a3ab76dca1a88802ba7be Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 28 Feb 2020 16:36:17 +0100 Subject: [PATCH 3465/3715] Linux 4.14.172 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f2657f4838db..6d3cecad7f1e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 171 +SUBLEVEL = 172 EXTRAVERSION = NAME = Petit Gorille From b05c992acd41b01629d2035d54ef6d4ed6334e0f Mon Sep 17 00:00:00 2001 From: "Daniel J. Ogorchock" Date: Sun, 29 Dec 2019 19:27:09 -0600 Subject: [PATCH 3466/3715] FROMLIST: HID: nintendo: add nintendo switch controller driver The hid-nintendo driver supports the Nintendo Switch Pro Controllers and the Joy-Cons. The Pro Controllers can be used over USB or Bluetooth. The Joy-Cons each create their own, independent input devices, so it is up to userspace to combine them if desired. Signed-off-by: Daniel J. Ogorchock Test: tested via custom test app Test: atest NintendoSwitchProTest Bug: 135136477 Link: https://patchwork.kernel.org/patch/11312547/ Link: https://lore.kernel.org/linux-input/20191230012720.2368987-2-djogorchock@gmail.com/ Change-Id: I179da1092faedc2ad25336224cf5ec8ff00e0d3f Signed-off-by: Siarhei Vishniakou --- MAINTAINERS | 6 + drivers/hid/Kconfig | 11 + drivers/hid/Makefile | 1 + drivers/hid/hid-core.c | 10 + drivers/hid/hid-ids.h | 3 + drivers/hid/hid-nintendo.c | 820 +++++++++++++++++++++++++++++++++++++ 6 files changed, 851 insertions(+) create mode 100644 drivers/hid/hid-nintendo.c diff --git a/MAINTAINERS b/MAINTAINERS index 77c68f63e29f..621734fe3603 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9591,6 +9591,12 @@ S: Maintained F: Documentation/scsi/NinjaSCSI.txt F: drivers/scsi/nsp32* +NINTENDO HID DRIVER +M: Daniel J. Ogorchock +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-nintendo* + NIOS2 ARCHITECTURE M: Ley Foon Tan L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index e51c529035cb..d9185240239a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -609,6 +609,17 @@ config HID_MULTITOUCH To compile this driver as a module, choose M here: the module will be called hid-multitouch. +config HID_NINTENDO + tristate "Nintendo Joy-Con and Pro Controller support" + depends on HID + help + Adds support for the Nintendo Switch Joy-Cons and Pro Controller. + All controllers support bluetooth, and the Pro Controller also supports + its USB mode. + + To compile this driver as a module, choose M here: the + module will be called hid-nintendo. + config HID_NTI tristate "NTI keyboard adapters" ---help--- diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index e146c257285a..aa7497748bb1 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_HID_MAYFLASH) += hid-mf.o obj-$(CONFIG_HID_MICROSOFT) += hid-microsoft.o obj-$(CONFIG_HID_MONTEREY) += hid-monterey.o obj-$(CONFIG_HID_MULTITOUCH) += hid-multitouch.o +obj-$(CONFIG_HID_NINTENDO) += hid-nintendo.o obj-$(CONFIG_HID_NTI) += hid-nti.o obj-$(CONFIG_HID_NTRIG) += hid-ntrig.o obj-$(CONFIG_HID_ORTEK) += hid-ortek.o diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 58b73eb9de3a..69229225a190 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2259,6 +2259,16 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) }, #endif +#if IS_ENABLED(CONFIG_HID_NINTENDO) + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, +#endif #if IS_ENABLED(CONFIG_HID_NTI) { HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) }, #endif diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 245a0cff0b87..71342b5cb703 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -816,6 +816,9 @@ #define USB_VENDOR_ID_NINTENDO 0x057e #define USB_DEVICE_ID_NINTENDO_WIIMOTE 0x0306 #define USB_DEVICE_ID_NINTENDO_WIIMOTE2 0x0330 +#define USB_DEVICE_ID_NINTENDO_JOYCONL 0x2006 +#define USB_DEVICE_ID_NINTENDO_JOYCONR 0x2007 +#define USB_DEVICE_ID_NINTENDO_PROCON 0x2009 #define USB_VENDOR_ID_NOVATEK 0x0603 #define USB_DEVICE_ID_NOVATEK_PCT 0x0600 diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c new file mode 100644 index 000000000000..3695b96694bd --- /dev/null +++ b/drivers/hid/hid-nintendo.c @@ -0,0 +1,820 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HID driver for Nintendo Switch Joy-Cons and Pro Controllers + * + * Copyright (c) 2019 Daniel J. Ogorchock + * + * The following resources/projects were referenced for this driver: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + * https://gitlab.com/pjranki/joycon-linux-kernel (Peter Rankin) + * https://github.com/FrotBot/SwitchProConLinuxUSB + * https://github.com/MTCKC/ProconXInput + * hid-wiimote kernel hid driver + * hid-logitech-hidpp driver + * + * This driver supports the Nintendo Switch Joy-Cons and Pro Controllers. The + * Pro Controllers can either be used over USB or Bluetooth. + * + * The driver will retrieve the factory calibration info from the controllers, + * so little to no user calibration should be required. + * + */ + +#include "hid-ids.h" +#include +#include +#include +#include +#include +#include + +/* + * Reference the url below for the following HID report defines: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + */ + +/* Output Reports */ +static const u8 JC_OUTPUT_RUMBLE_AND_SUBCMD = 0x01; +static const u8 JC_OUTPUT_FW_UPDATE_PKT = 0x03; +static const u8 JC_OUTPUT_RUMBLE_ONLY = 0x10; +static const u8 JC_OUTPUT_MCU_DATA = 0x11; +static const u8 JC_OUTPUT_USB_CMD = 0x80; + +/* Subcommand IDs */ +static const u8 JC_SUBCMD_STATE /*= 0x00*/; +static const u8 JC_SUBCMD_MANUAL_BT_PAIRING = 0x01; +static const u8 JC_SUBCMD_REQ_DEV_INFO = 0x02; +static const u8 JC_SUBCMD_SET_REPORT_MODE = 0x03; +static const u8 JC_SUBCMD_TRIGGERS_ELAPSED = 0x04; +static const u8 JC_SUBCMD_GET_PAGE_LIST_STATE = 0x05; +static const u8 JC_SUBCMD_SET_HCI_STATE = 0x06; +static const u8 JC_SUBCMD_RESET_PAIRING_INFO = 0x07; +static const u8 JC_SUBCMD_LOW_POWER_MODE = 0x08; +static const u8 JC_SUBCMD_SPI_FLASH_READ = 0x10; +static const u8 JC_SUBCMD_SPI_FLASH_WRITE = 0x11; +static const u8 JC_SUBCMD_RESET_MCU = 0x20; +static const u8 JC_SUBCMD_SET_MCU_CONFIG = 0x21; +static const u8 JC_SUBCMD_SET_MCU_STATE = 0x22; +static const u8 JC_SUBCMD_SET_PLAYER_LIGHTS = 0x30; +static const u8 JC_SUBCMD_GET_PLAYER_LIGHTS = 0x31; +static const u8 JC_SUBCMD_SET_HOME_LIGHT = 0x38; +static const u8 JC_SUBCMD_ENABLE_IMU = 0x40; +static const u8 JC_SUBCMD_SET_IMU_SENSITIVITY = 0x41; +static const u8 JC_SUBCMD_WRITE_IMU_REG = 0x42; +static const u8 JC_SUBCMD_READ_IMU_REG = 0x43; +static const u8 JC_SUBCMD_ENABLE_VIBRATION = 0x48; +static const u8 JC_SUBCMD_GET_REGULATED_VOLTAGE = 0x50; + +/* Input Reports */ +static const u8 JC_INPUT_BUTTON_EVENT = 0x3F; +static const u8 JC_INPUT_SUBCMD_REPLY = 0x21; +static const u8 JC_INPUT_IMU_DATA = 0x30; +static const u8 JC_INPUT_MCU_DATA = 0x31; +static const u8 JC_INPUT_USB_RESPONSE = 0x81; + +/* Feature Reports */ +static const u8 JC_FEATURE_LAST_SUBCMD = 0x02; +static const u8 JC_FEATURE_OTA_FW_UPGRADE = 0x70; +static const u8 JC_FEATURE_SETUP_MEM_READ = 0x71; +static const u8 JC_FEATURE_MEM_READ = 0x72; +static const u8 JC_FEATURE_ERASE_MEM_SECTOR = 0x73; +static const u8 JC_FEATURE_MEM_WRITE = 0x74; +static const u8 JC_FEATURE_LAUNCH = 0x75; + +/* USB Commands */ +static const u8 JC_USB_CMD_CONN_STATUS = 0x01; +static const u8 JC_USB_CMD_HANDSHAKE = 0x02; +static const u8 JC_USB_CMD_BAUDRATE_3M = 0x03; +static const u8 JC_USB_CMD_NO_TIMEOUT = 0x04; +static const u8 JC_USB_CMD_EN_TIMEOUT = 0x05; +static const u8 JC_USB_RESET = 0x06; +static const u8 JC_USB_PRE_HANDSHAKE = 0x91; +static const u8 JC_USB_SEND_UART = 0x92; + +/* SPI storage addresses of factory calibration data */ +static const u16 JC_CAL_DATA_START = 0x603d; +static const u16 JC_CAL_DATA_END = 0x604e; +#define JC_CAL_DATA_SIZE (JC_CAL_DATA_END - JC_CAL_DATA_START + 1) + + +/* The raw analog joystick values will be mapped in terms of this magnitude */ +static const u16 JC_MAX_STICK_MAG = 32767; +static const u16 JC_STICK_FUZZ = 250; +static const u16 JC_STICK_FLAT = 500; + +/* States for controller state machine */ +enum joycon_ctlr_state { + JOYCON_CTLR_STATE_INIT, + JOYCON_CTLR_STATE_READ, +}; + +struct joycon_stick_cal { + s32 max; + s32 min; + s32 center; +}; + +/* + * All the controller's button values are stored in a u32. + * They can be accessed with bitwise ANDs. + */ +static const u32 JC_BTN_Y = BIT(0); +static const u32 JC_BTN_X = BIT(1); +static const u32 JC_BTN_B = BIT(2); +static const u32 JC_BTN_A = BIT(3); +static const u32 JC_BTN_SR_R = BIT(4); +static const u32 JC_BTN_SL_R = BIT(5); +static const u32 JC_BTN_R = BIT(6); +static const u32 JC_BTN_ZR = BIT(7); +static const u32 JC_BTN_MINUS = BIT(8); +static const u32 JC_BTN_PLUS = BIT(9); +static const u32 JC_BTN_RSTICK = BIT(10); +static const u32 JC_BTN_LSTICK = BIT(11); +static const u32 JC_BTN_HOME = BIT(12); +static const u32 JC_BTN_CAP = BIT(13); /* capture button */ +static const u32 JC_BTN_DOWN = BIT(16); +static const u32 JC_BTN_UP = BIT(17); +static const u32 JC_BTN_RIGHT = BIT(18); +static const u32 JC_BTN_LEFT = BIT(19); +static const u32 JC_BTN_SR_L = BIT(20); +static const u32 JC_BTN_SL_L = BIT(21); +static const u32 JC_BTN_L = BIT(22); +static const u32 JC_BTN_ZL = BIT(23); + +enum joycon_msg_type { + JOYCON_MSG_TYPE_NONE, + JOYCON_MSG_TYPE_USB, + JOYCON_MSG_TYPE_SUBCMD, +}; + +struct joycon_subcmd_request { + u8 output_id; /* must be 0x01 for subcommand, 0x10 for rumble only */ + u8 packet_num; /* incremented every send */ + u8 rumble_data[8]; + u8 subcmd_id; + u8 data[0]; /* length depends on the subcommand */ +} __packed; + +struct joycon_subcmd_reply { + u8 ack; /* MSB 1 for ACK, 0 for NACK */ + u8 id; /* id of requested subcmd */ + u8 data[0]; /* will be at most 35 bytes */ +} __packed; + +struct joycon_input_report { + u8 id; + u8 timer; + u8 bat_con; /* battery and connection info */ + u8 button_status[3]; + u8 left_stick[3]; + u8 right_stick[3]; + u8 vibrator_report; + + /* + * If support for firmware updates, gyroscope data, and/or NFC/IR + * are added in the future, this can be swapped for a union. + */ + struct joycon_subcmd_reply reply; +} __packed; + +#define JC_MAX_RESP_SIZE (sizeof(struct joycon_input_report) + 35) + +/* Each physical controller is associated with a joycon_ctlr struct */ +struct joycon_ctlr { + struct hid_device *hdev; + struct input_dev *input; + enum joycon_ctlr_state ctlr_state; + + /* The following members are used for synchronous sends/receives */ + enum joycon_msg_type msg_type; + u8 subcmd_num; + struct mutex output_mutex; + u8 input_buf[JC_MAX_RESP_SIZE]; + wait_queue_head_t wait; + bool received_resp; + u8 usb_ack_match; + u8 subcmd_ack_match; + + /* factory calibration data */ + struct joycon_stick_cal left_stick_cal_x; + struct joycon_stick_cal left_stick_cal_y; + struct joycon_stick_cal right_stick_cal_x; + struct joycon_stick_cal right_stick_cal_y; + +}; + +static int __joycon_hid_send(struct hid_device *hdev, u8 *data, size_t len) +{ + u8 *buf; + int ret; + + buf = kmemdup(data, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = hid_hw_output_report(hdev, buf, len); + kfree(buf); + if (ret < 0) + hid_dbg(hdev, "Failed to send output report ret=%d\n", ret); + return ret; +} + +static int joycon_hid_send_sync(struct joycon_ctlr *ctlr, u8 *data, size_t len) +{ + int ret; + + ret = __joycon_hid_send(ctlr->hdev, data, len); + if (ret < 0) { + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return ret; + } + + if (!wait_event_timeout(ctlr->wait, ctlr->received_resp, HZ)) { + hid_dbg(ctlr->hdev, "synchronous send/receive timed out\n"); + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return -ETIMEDOUT; + } + + ctlr->received_resp = false; + return 0; +} + +static int joycon_send_usb(struct joycon_ctlr *ctlr, u8 cmd) +{ + int ret; + u8 buf[2] = {JC_OUTPUT_USB_CMD}; + + buf[1] = cmd; + ctlr->usb_ack_match = cmd; + ctlr->msg_type = JOYCON_MSG_TYPE_USB; + ret = joycon_hid_send_sync(ctlr, buf, sizeof(buf)); + if (ret) + hid_dbg(ctlr->hdev, "send usb command failed; ret=%d\n", ret); + return ret; +} + +static int joycon_send_subcmd(struct joycon_ctlr *ctlr, + struct joycon_subcmd_request *subcmd, + size_t data_len) +{ + int ret; + + subcmd->output_id = JC_OUTPUT_RUMBLE_AND_SUBCMD; + subcmd->packet_num = ctlr->subcmd_num; + if (++ctlr->subcmd_num > 0xF) + ctlr->subcmd_num = 0; + ctlr->subcmd_ack_match = subcmd->subcmd_id; + ctlr->msg_type = JOYCON_MSG_TYPE_SUBCMD; + + ret = joycon_hid_send_sync(ctlr, (u8 *)subcmd, + sizeof(*subcmd) + data_len); + if (ret < 0) + hid_dbg(ctlr->hdev, "send subcommand failed; ret=%d\n", ret); + else + ret = 0; + return ret; +} + +/* Supply nibbles for flash and on. Ones correspond to active */ +static int joycon_set_player_leds(struct joycon_ctlr *ctlr, u8 flash, u8 on) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_PLAYER_LIGHTS; + req->data[0] = (flash << 4) | on; + + hid_dbg(ctlr->hdev, "setting player leds\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static const u16 DFLT_STICK_CAL_CEN = 2000; +static const u16 DFLT_STICK_CAL_MAX = 3500; +static const u16 DFLT_STICK_CAL_MIN = 500; +static int joycon_request_calibration(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 5] = { 0 }; + struct joycon_input_report *report; + struct joycon_stick_cal *cal_x; + struct joycon_stick_cal *cal_y; + s32 x_max_above; + s32 x_min_below; + s32 y_max_above; + s32 y_min_below; + u8 *data; + u8 *raw_cal; + int ret; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SPI_FLASH_READ; + data = req->data; + data[0] = 0xFF & JC_CAL_DATA_START; + data[1] = 0xFF & (JC_CAL_DATA_START >> 8); + data[2] = 0xFF & (JC_CAL_DATA_START >> 16); + data[3] = 0xFF & (JC_CAL_DATA_START >> 24); + data[4] = JC_CAL_DATA_SIZE; + + hid_dbg(ctlr->hdev, "requesting cal data\n"); + ret = joycon_send_subcmd(ctlr, req, 5); + if (ret) { + hid_warn(ctlr->hdev, + "Failed to read stick cal, using defaults; ret=%d\n", + ret); + + ctlr->left_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->left_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + return ret; + } + + report = (struct joycon_input_report *)ctlr->input_buf; + raw_cal = &report->reply.data[5]; + + /* left stick calibration parsing */ + cal_x = &ctlr->left_stick_cal_x; + cal_y = &ctlr->left_stick_cal_y; + + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + /* right stick calibration parsing */ + raw_cal += 9; + cal_x = &ctlr->right_stick_cal_x; + cal_y = &ctlr->right_stick_cal_y; + + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + hid_dbg(ctlr->hdev, "calibration:\n" + "l_x_c=%d l_x_max=%d l_x_min=%d\n" + "l_y_c=%d l_y_max=%d l_y_min=%d\n" + "r_x_c=%d r_x_max=%d r_x_min=%d\n" + "r_y_c=%d r_y_max=%d r_y_min=%d\n", + ctlr->left_stick_cal_x.center, + ctlr->left_stick_cal_x.max, + ctlr->left_stick_cal_x.min, + ctlr->left_stick_cal_y.center, + ctlr->left_stick_cal_y.max, + ctlr->left_stick_cal_y.min, + ctlr->right_stick_cal_x.center, + ctlr->right_stick_cal_x.max, + ctlr->right_stick_cal_x.min, + ctlr->right_stick_cal_y.center, + ctlr->right_stick_cal_y.max, + ctlr->right_stick_cal_y.min); + + return 0; +} + +static int joycon_set_report_mode(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_REPORT_MODE; + req->data[0] = 0x30; /* standard, full report mode */ + + hid_dbg(ctlr->hdev, "setting controller report mode\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static s32 joycon_map_stick_val(struct joycon_stick_cal *cal, s32 val) +{ + s32 center = cal->center; + s32 min = cal->min; + s32 max = cal->max; + s32 new_val; + + if (val > center) { + new_val = (val - center) * JC_MAX_STICK_MAG; + new_val /= (max - center); + } else { + new_val = (center - val) * -JC_MAX_STICK_MAG; + new_val /= (center - min); + } + new_val = clamp(new_val, (s32)-JC_MAX_STICK_MAG, (s32)JC_MAX_STICK_MAG); + return new_val; +} + +static void joycon_parse_report(struct joycon_ctlr *ctlr, + struct joycon_input_report *rep) +{ + struct input_dev *dev = ctlr->input; + u32 btns; + u32 id = ctlr->hdev->product; + + btns = hid_field_extract(ctlr->hdev, rep->button_status, 0, 24); + + if (id != USB_DEVICE_ID_NINTENDO_JOYCONR) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->left_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->left_stick + 1, 4, 12); + /* map the stick values */ + x = joycon_map_stick_val(&ctlr->left_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->left_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_X, x); + input_report_abs(dev, ABS_Y, y); + + /* report buttons */ + input_report_key(dev, BTN_TL, btns & JC_BTN_L); + input_report_key(dev, BTN_TL2, btns & JC_BTN_ZL); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TR, btns & JC_BTN_SL_L); + input_report_key(dev, BTN_TR2, btns & JC_BTN_SR_L); + } + input_report_key(dev, BTN_SELECT, btns & JC_BTN_MINUS); + input_report_key(dev, BTN_THUMBL, btns & JC_BTN_LSTICK); + input_report_key(dev, BTN_Z, btns & JC_BTN_CAP); + input_report_key(dev, BTN_DPAD_DOWN, btns & JC_BTN_DOWN); + input_report_key(dev, BTN_DPAD_UP, btns & JC_BTN_UP); + input_report_key(dev, BTN_DPAD_RIGHT, btns & JC_BTN_RIGHT); + input_report_key(dev, BTN_DPAD_LEFT, btns & JC_BTN_LEFT); + } + if (id != USB_DEVICE_ID_NINTENDO_JOYCONL) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->right_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->right_stick + 1, 4, 12); + /* map stick values */ + x = joycon_map_stick_val(&ctlr->right_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->right_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_RX, x); + input_report_abs(dev, ABS_RY, y); + + /* report buttons */ + input_report_key(dev, BTN_TR, btns & JC_BTN_R); + input_report_key(dev, BTN_TR2, btns & JC_BTN_ZR); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TL, btns & JC_BTN_SL_R); + input_report_key(dev, BTN_TL2, btns & JC_BTN_SR_R); + } + input_report_key(dev, BTN_START, btns & JC_BTN_PLUS); + input_report_key(dev, BTN_THUMBR, btns & JC_BTN_RSTICK); + input_report_key(dev, BTN_MODE, btns & JC_BTN_HOME); + input_report_key(dev, BTN_WEST, btns & JC_BTN_Y); + input_report_key(dev, BTN_NORTH, btns & JC_BTN_X); + input_report_key(dev, BTN_EAST, btns & JC_BTN_A); + input_report_key(dev, BTN_SOUTH, btns & JC_BTN_B); + } + + input_sync(dev); +} + + +static const unsigned int joycon_button_inputs_l[] = { + BTN_SELECT, BTN_Z, BTN_THUMBL, + BTN_DPAD_UP, BTN_DPAD_DOWN, BTN_DPAD_LEFT, BTN_DPAD_RIGHT, + BTN_TL, BTN_TL2, + 0 /* 0 signals end of array */ +}; + +static const unsigned int joycon_button_inputs_r[] = { + BTN_START, BTN_MODE, BTN_THUMBR, + BTN_SOUTH, BTN_EAST, BTN_NORTH, BTN_WEST, + BTN_TR, BTN_TR2, + 0 /* 0 signals end of array */ +}; + +static DEFINE_MUTEX(joycon_input_num_mutex); +static int joycon_input_create(struct joycon_ctlr *ctlr) +{ + struct hid_device *hdev; + static int input_num = 1; + const char *name; + int ret; + int i; + + hdev = ctlr->hdev; + + switch (hdev->product) { + case USB_DEVICE_ID_NINTENDO_PROCON: + name = "Nintendo Switch Pro Controller"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONL: + name = "Nintendo Switch Left Joy-Con"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONR: + name = "Nintendo Switch Right Joy-Con"; + break; + default: /* Should be impossible */ + hid_err(hdev, "Invalid hid product\n"); + return -EINVAL; + } + + ctlr->input = devm_input_allocate_device(&hdev->dev); + if (!ctlr->input) + return -ENOMEM; + ctlr->input->id.bustype = hdev->bus; + ctlr->input->id.vendor = hdev->vendor; + ctlr->input->id.product = hdev->product; + ctlr->input->id.version = hdev->version; + ctlr->input->name = name; + input_set_drvdata(ctlr->input, ctlr); + + + /* set up sticks */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + input_set_abs_params(ctlr->input, ABS_X, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_Y, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + input_set_abs_params(ctlr->input, ABS_RX, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_RY, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + + /* set up buttons */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + for (i = 0; joycon_button_inputs_l[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_l[i]); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + for (i = 0; joycon_button_inputs_r[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_r[i]); + } + + ret = input_register_device(ctlr->input); + if (ret) + return ret; + + /* Set the default controller player leds based on controller number */ + mutex_lock(&joycon_input_num_mutex); + mutex_lock(&ctlr->output_mutex); + ret = joycon_set_player_leds(ctlr, 0, 0xF >> (4 - input_num)); + if (ret) + hid_warn(ctlr->hdev, "Failed to set leds; ret=%d\n", ret); + mutex_unlock(&ctlr->output_mutex); + if (++input_num > 4) + input_num = 1; + mutex_unlock(&joycon_input_num_mutex); + + return 0; +} + +/* Common handler for parsing inputs */ +static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + + if (data[0] == JC_INPUT_SUBCMD_REPLY || data[0] == JC_INPUT_IMU_DATA || + data[0] == JC_INPUT_MCU_DATA) { + if (size >= 12) /* make sure it contains the input report */ + joycon_parse_report(ctlr, + (struct joycon_input_report *)data); + } + + return ret; +} + +static int joycon_ctlr_handle_event(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + bool match = false; + struct joycon_input_report *report; + + if (unlikely(mutex_is_locked(&ctlr->output_mutex)) && + ctlr->msg_type != JOYCON_MSG_TYPE_NONE) { + switch (ctlr->msg_type) { + case JOYCON_MSG_TYPE_USB: + if (size < 2) + break; + if (data[0] == JC_INPUT_USB_RESPONSE && + data[1] == ctlr->usb_ack_match) + match = true; + break; + case JOYCON_MSG_TYPE_SUBCMD: + if (size < sizeof(struct joycon_input_report) || + data[0] != JC_INPUT_SUBCMD_REPLY) + break; + report = (struct joycon_input_report *)data; + if (report->reply.id == ctlr->subcmd_ack_match) + match = true; + break; + default: + break; + } + + if (match) { + memcpy(ctlr->input_buf, data, + min(size, (int)JC_MAX_RESP_SIZE)); + ctlr->msg_type = JOYCON_MSG_TYPE_NONE; + ctlr->received_resp = true; + wake_up(&ctlr->wait); + + /* This message has been handled */ + return 1; + } + } + + if (ctlr->ctlr_state == JOYCON_CTLR_STATE_READ) + ret = joycon_ctlr_read_handler(ctlr, data, size); + + return ret; +} + +static int nintendo_hid_event(struct hid_device *hdev, + struct hid_report *report, u8 *raw_data, int size) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + if (size < 1) + return -EINVAL; + + return joycon_ctlr_handle_event(ctlr, raw_data, size); +} + +static int nintendo_hid_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + struct joycon_ctlr *ctlr; + + hid_dbg(hdev, "probe - start\n"); + + ctlr = devm_kzalloc(&hdev->dev, sizeof(*ctlr), GFP_KERNEL); + if (!ctlr) { + ret = -ENOMEM; + goto err; + } + + ctlr->hdev = hdev; + ctlr->ctlr_state = JOYCON_CTLR_STATE_INIT; + hid_set_drvdata(hdev, ctlr); + mutex_init(&ctlr->output_mutex); + init_waitqueue_head(&ctlr->wait); + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "HID parse failed\n"); + goto err; + } + + ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); + if (ret) { + hid_err(hdev, "HW start failed\n"); + goto err; + } + + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, "cannot start hardware I/O\n"); + goto err_stop; + } + + hid_device_io_start(hdev); + + /* Initialize the controller */ + mutex_lock(&ctlr->output_mutex); + /* if handshake command fails, assume ble pro controller */ + if (hdev->product == USB_DEVICE_ID_NINTENDO_PROCON && + !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE)) { + hid_dbg(hdev, "detected USB controller\n"); + /* set baudrate for improved latency */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M); + if (ret) { + hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); + goto err_mutex; + } + /* handshake */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE); + if (ret) { + hid_err(hdev, "Failed handshake; ret=%d\n", ret); + goto err_mutex; + } + /* + * Set no timeout (to keep controller in USB mode). + * This doesn't send a response, so ignore the timeout. + */ + joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT); + } + + /* get controller calibration data, and parse it */ + ret = joycon_request_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Analog stick positions may be inaccurate\n"); + } + + /* Set the reporting mode to 0x30, which is the full report mode */ + ret = joycon_set_report_mode(ctlr); + if (ret) { + hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); + goto err_mutex; + } + + mutex_unlock(&ctlr->output_mutex); + + ret = joycon_input_create(ctlr); + if (ret) { + hid_err(hdev, "Failed to create input device; ret=%d\n", ret); + goto err_close; + } + + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + + hid_dbg(hdev, "probe - success\n"); + return 0; + +err_mutex: + mutex_unlock(&ctlr->output_mutex); +err_close: + hid_hw_close(hdev); +err_stop: + hid_hw_stop(hdev); +err: + hid_err(hdev, "probe - fail = %d\n", ret); + return ret; +} + +static void nintendo_hid_remove(struct hid_device *hdev) +{ + hid_dbg(hdev, "remove\n"); + hid_hw_close(hdev); + hid_hw_stop(hdev); +} + +static const struct hid_device_id nintendo_hid_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, + { } +}; +MODULE_DEVICE_TABLE(hid, nintendo_hid_devices); + +static struct hid_driver nintendo_hid_driver = { + .name = "nintendo", + .id_table = nintendo_hid_devices, + .probe = nintendo_hid_probe, + .remove = nintendo_hid_remove, + .raw_event = nintendo_hid_event, +}; +module_hid_driver(nintendo_hid_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel J. Ogorchock "); +MODULE_DESCRIPTION("Driver for Nintendo Switch Controllers"); From 8c0adf270ac072b49b147babfa937ef6dfd40ad4 Mon Sep 17 00:00:00 2001 From: Siarhei Vishniakou Date: Tue, 17 Dec 2019 10:07:39 -0800 Subject: [PATCH 3467/3715] ANDROID: Enable HID_NINTENDO as y This config will enable the Nintendo Switch Pro controller driver. Change-Id: I50645a611566928e20a1afd4024f71803ed5fefa Signed-off-by: Siarhei Vishniakou Bug: 135136477 Test: tested via custom test app Test: atest NintendoSwitchProTest --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 8b5ebe05fe9f..4dd213de909e 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -361,6 +361,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 43c457eb9446..644a3391c246 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -385,6 +385,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y From 25a911618ee5643f63c81fe69f9b67abd50c33f1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 20 Feb 2020 17:50:12 -0800 Subject: [PATCH 3468/3715] ANDROID: cuttlefish_defconfig: Disable CONFIG_RT_GROUP_SCHED Disable CONFIG_RT_GROUP_SCHED to control RT cpu allowance globally. Bug: 149954332 Change-Id: I9487bd113502e52f19637e43109433cb13e97a23 Signed-off-by: Suren Baghdasaryan --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 4dd213de909e..70ee6a310ed6 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -12,7 +12,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 644a3391c246..a9979a58d769 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y From 1cb89b322d016b6a227f38947369475fe2359b9d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 2 Mar 2020 09:30:14 +0100 Subject: [PATCH 3469/3715] ANDROID: fix build issue in security/selinux/avc.c This came up due to interaction with the backport of commit 6b6bc6205d98 ("selinux: wrap AVC state") and 4.14.172 causing a build error. Bug: 140252993 Signed-off-by: Greg Kroah-Hartman Change-Id: I894f4e8144c1ad28fbbea114ff830a730497b05d --- security/selinux/avc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index ce5465bb79e3..634b75e60669 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -895,7 +895,7 @@ static int avc_update_node(struct selinux_avc *avc, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - avc_node_kill(node); + avc_node_kill(avc, node); goto out_unlock; } } From a13dee535e7c54f8e0ffcd934432eeb9e19f180d Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 2 Jul 2018 16:03:46 -0700 Subject: [PATCH 3470/3715] BACKPORT: loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl This change adds LOOP_SET_BLOCK_SIZE as one of the supported ioctls in lo_compat_ioctl. It only takes an unsigned long argument, and in practice a 32-bit value works fine. Reviewed-by: Omar Sandoval Signed-off-by: Evan Green Signed-off-by: Jens Axboe (cherry picked from commit 9fea4b395260175de4016b42982f45a3e6e03d0b) [adelva: trivially backported around another backport conflict] Bug: 150622092 Change-Id: I98e14d3ab60ca638c3aebcea942e2207c738af95 Signed-off-by: Alistair Delva --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 453e3728e657..ca912eecc74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1615,6 +1615,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; From e5f689a1e13ab2f99e1488180d2d8099a9246582 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Wed, 19 Feb 2020 10:16:33 +0900 Subject: [PATCH 3471/3715] FROMLIST: ufs: fix a bug on printing PRDT In some architectures, an unit of PRDTO and PRDTL in UFSHCI spec assume bytes, not double word specified in the spec. W/o this patch, when the driver executes this, kernel panic occurres because of abnormal accesses. Bug: 149797634 Link: https://lore.kernel.org/linux-scsi/20200218224307.8017-1-kwmad.kim@samsung.com/ Signed-off-by: Kiwoong Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from android-mainline commit 8ec7bddd873f393ea94a3bc9dde9781e5e0fbfe1) Change-Id: I58ffa07535df8011b8d357135b80030833e725f9 Signed-off-by: Eric Biggers --- drivers/scsi/ufs/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index d360d0669527..11ae1e689f41 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -403,8 +403,11 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) ufshcd_hex_dump("UPIU RSP: ", lrbp->ucd_rsp_ptr, sizeof(struct utp_upiu_rsp)); - prdt_length = le16_to_cpu( - lrbp->utr_descriptor_ptr->prd_table_length); + prdt_length = + le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); + if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) + prdt_length /= sizeof(struct ufshcd_sg_entry); + dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", tag, prdt_length, From 167db78a71a2518f53eb17871876393d704f3572 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Feb 2020 14:37:12 -0800 Subject: [PATCH 3472/3715] ANDROID: scsi: ufs: allow ufs variants to override sg entry size Modify the UFSHCD core to allow 'struct ufshcd_sg_entry' to be variable-length. The default is the standard length, but variants can override ufs_hba::sg_entry_size with a larger value if there are vendor-specific fields following the standard ones. This is needed to support inline encryption with ufs-exynos (FMP). Bug: 129991660 Signed-off-by: Eric Biggers (cherry picked from android-mainline commit 8de80df7d7e407369d1f8c2971daf29348d1a643) (resolved trivial merge conflict in ufshcd_alloc_host()) Change-Id: I6ab9458d5c23331013e6b736d6fea378a6b5b86c Signed-off-by: Eric Biggers --- drivers/scsi/ufs/ufshcd.c | 32 +++++++++++++++++--------------- drivers/scsi/ufs/ufshcd.h | 2 ++ drivers/scsi/ufs/ufshci.h | 12 ++++++++++-- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 11ae1e689f41..74be1791e135 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -406,7 +406,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) prdt_length = le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) - prdt_length /= sizeof(struct ufshcd_sg_entry); + prdt_length /= hba->sg_entry_size; dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", @@ -415,7 +415,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) if (pr_prdt) ufshcd_hex_dump("UPIU PRDT: ", lrbp->ucd_prdt_ptr, - sizeof(struct ufshcd_sg_entry) * prdt_length); + hba->sg_entry_size * prdt_length); } } @@ -1982,7 +1982,7 @@ ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) */ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { - struct ufshcd_sg_entry *prd_table; + struct ufshcd_sg_entry *prd; struct scatterlist *sg; struct scsi_cmnd *cmd; int sg_segments; @@ -1997,21 +1997,22 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16)(sg_segments * - sizeof(struct ufshcd_sg_entry))); + hba->sg_entry_size)); else lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16) (sg_segments)); - prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; + prd = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; scsi_for_each_sg(cmd, sg, sg_segments, i) { - prd_table[i].size = + prd->size = cpu_to_le32(((u32) sg_dma_len(sg))-1); - prd_table[i].base_addr = + prd->base_addr = cpu_to_le32(lower_32_bits(sg->dma_address)); - prd_table[i].upper_addr = + prd->upper_addr = cpu_to_le32(upper_32_bits(sg->dma_address)); - prd_table[i].reserved = 0; + prd->reserved = 0; + prd = (void *)prd + hba->sg_entry_size; } } else { lrbp->utr_descriptor_ptr->prd_table_length = 0; @@ -3247,7 +3248,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) size_t utmrdl_size, utrdl_size, ucdl_size; /* Allocate memory for UTP command descriptors */ - ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); + ucdl_size = (sizeof_utp_transfer_cmd_desc(hba) * hba->nutrs); hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, @@ -3343,7 +3344,7 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table); - cmd_desc_size = sizeof(struct utp_transfer_cmd_desc); + cmd_desc_size = sizeof_utp_transfer_cmd_desc(hba); cmd_desc_dma_addr = hba->ucdl_dma_addr; for (i = 0; i < hba->nutrs; i++) { @@ -3375,17 +3376,17 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) hba->lrb[i].utr_descriptor_ptr = (utrdlp + i); hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr + (i * sizeof(struct utp_transfer_req_desc)); - hba->lrb[i].ucd_req_ptr = - (struct utp_upiu_req *)(cmd_descp + i); + hba->lrb[i].ucd_req_ptr = (struct utp_upiu_req *)cmd_descp; hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr; hba->lrb[i].ucd_rsp_ptr = - (struct utp_upiu_rsp *)cmd_descp[i].response_upiu; + (struct utp_upiu_rsp *)cmd_descp->response_upiu; hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr + response_offset; hba->lrb[i].ucd_prdt_ptr = - (struct ufshcd_sg_entry *)cmd_descp[i].prd_table; + (struct ufshcd_sg_entry *)cmd_descp->prd_table; hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr + prdt_offset; + cmd_descp = (void *)cmd_descp + cmd_desc_size; } } @@ -7921,6 +7922,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; *hba_handle = hba; + hba->sg_entry_size = sizeof(struct ufshcd_sg_entry); INIT_LIST_HEAD(&hba->clk_list_head); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index a755ad430a4f..73124e85dada 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -499,6 +499,7 @@ struct ufs_stats { * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @priv: pointer to variant specific private data + * @sg_entry_size: size of struct ufshcd_sg_entry (may include variant fields) * @irq: Irq number of the controller * @active_uic_cmd: handle of active UIC command * @uic_cmd_mutex: mutex for uic command @@ -581,6 +582,7 @@ struct ufs_hba { struct ufs_hba_variant_ops *vops; void *priv; const struct ufs_hba_crypto_variant_ops *crypto_vops; + size_t sg_entry_size; unsigned int irq; bool is_irq_enabled; diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 97006b324ee7..8a032c557df9 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -417,20 +417,28 @@ struct ufshcd_sg_entry { __le32 upper_addr; __le32 reserved; __le32 size; + /* + * followed by variant-specific fields if + * hba->sg_entry_size != sizeof(struct ufshcd_sg_entry) + */ }; /** * struct utp_transfer_cmd_desc - UFS Command Descriptor structure * @command_upiu: Command UPIU Frame address * @response_upiu: Response UPIU Frame address - * @prd_table: Physical Region Descriptor + * @prd_table: Physical Region Descriptor: an array of SG_ALL struct + * ufshcd_sg_entry's. Variant-specific fields may be present after each. */ struct utp_transfer_cmd_desc { u8 command_upiu[ALIGNED_UPIU_SIZE]; u8 response_upiu[ALIGNED_UPIU_SIZE]; - struct ufshcd_sg_entry prd_table[SG_ALL]; + u8 prd_table[]; }; +#define sizeof_utp_transfer_cmd_desc(hba) \ + (sizeof(struct utp_transfer_cmd_desc) + SG_ALL * (hba)->sg_entry_size) + /** * struct request_desc_header - Descriptor Header common to both UTRD and UTMRD * @dword0: Descriptor Header DW0 From a58373148e201645e9b23631845c695b9a66ce58 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 4 Mar 2020 13:58:07 -0800 Subject: [PATCH 3473/3715] ANDROID: cuttlefish: disable KPROBES Security teams from Google and Qualcomm request disabling due to "large attack vector". Bug: 149659981 Signed-off-by: Todd Kjos Change-Id: Iee4ace633f28cf4359cb0495be59546a8d3951cd --- arch/arm64/configs/cuttlefish_defconfig | 1 - arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 70ee6a310ed6..bd0df26e4416 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -38,7 +38,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PROFILING=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_LTO_CLANG=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index a9979a58d769..ad10c87900ca 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -38,7 +38,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_REFCOUNT_FULL=y From a69ee7eebc234606238164f4f952b612c331507d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 14:02:32 -0800 Subject: [PATCH 3474/3715] FROMLIST: lib: test_stackinit.c: XFAIL switch variable init tests The tests for initializing a variable defined between a switch statement's test and its first "case" statement are currently not initialized in Clang[1] nor the proposed auto-initialization feature in GCC. We should retain the test (so that we can evaluate compiler fixes), but mark it as an "expected fail". The rest of the kernel source will be adjusted to avoid this corner case. Also disable -Wswitch-unreachable for the test so that the intentionally broken code won't trigger warnings for GCC (nor future Clang) when initialization happens this unhandled place. [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Suggested-by: Alexander Potapenko Signed-off-by: Kees Cook [adelva: cherry picking to avoid boot test flakes] Bug: 144999193 Link: https://lore.kernel.org/lkml/202002191358.2897A07C6@keescook/ Change-Id: I0e691f2299ab42526ea306a92551a1188c469136 Signed-off-by: Alistair Delva --- lib/Makefile | 1 + lib/test_stackinit.c | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index d200f404946b..d3ab213bc1f0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -78,6 +78,7 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index b7e586d559ee..c589bfa120f3 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -92,8 +92,9 @@ static bool range_contains(char *haystack_start, size_t haystack_size, * @var_type: type to be tested for zeroing initialization * @which: is this a SCALAR, STRING, or STRUCT type? * @init_level: what kind of initialization is performed + * @xfail: is this test expected to fail? */ -#define DEFINE_TEST_DRIVER(name, var_type, which) \ +#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \ /* Returns 0 on success, 1 on failure. */ \ static noinline __init int test_ ## name (void) \ { \ @@ -139,13 +140,14 @@ static noinline __init int test_ ## name (void) \ for (sum = 0, i = 0; i < target_size; i++) \ sum += (check_buf[i] == 0xFF); \ \ - if (sum == 0) \ + if (sum == 0) { \ pr_info(#name " ok\n"); \ - else \ - pr_warn(#name " FAIL (uninit bytes: %d)\n", \ - sum); \ - \ - return (sum != 0); \ + return 0; \ + } else { \ + pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \ + (xfail) ? "X" : "", sum); \ + return (xfail) ? 0 : 1; \ + } \ } #define DEFINE_TEST(name, var_type, which, init_level) \ /* no-op to force compiler into ignoring "uninitialized" vars */\ @@ -189,7 +191,7 @@ static noinline __init int leaf_ ## name(unsigned long sp, \ \ return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ } \ -DEFINE_TEST_DRIVER(name, var_type, which) +DEFINE_TEST_DRIVER(name, var_type, which, 0) /* Structure with no padding. */ struct test_packed { @@ -327,8 +329,14 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, return __leaf_switch_none(2, fill); } -DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); -DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); +/* + * These are expected to fail for most configurations because neither + * GCC nor Clang have a way to perform initialization of variables in + * non-code areas (i.e. in a switch statement before the first "case"). + * https://bugs.llvm.org/show_bug.cgi?id=44916 + */ +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1); static int __init test_stackinit_init(void) { From 0e97eac233791a20b588ba35b53073b9a915cc7d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 21 Feb 2020 19:01:24 +0100 Subject: [PATCH 3475/3715] UPSTREAM: binder: prevent UAF for binderfs devices On binder_release(), binder_defer_work(proc, BINDER_DEFERRED_RELEASE) is called which punts the actual cleanup operation to a workqueue. At some point, binder_deferred_func() will be called which will end up calling binder_deferred_release() which will retrieve and cleanup the binder_context attach to this struct binder_proc. If we trace back where this binder_context is attached to binder_proc we see that it is set in binder_open() and is taken from the struct binder_device it is associated with. This obviously assumes that the struct binder_device that context is attached to is _never_ freed. While that might be true for devtmpfs binder devices it is most certainly wrong for binderfs binder devices. So, assume binder_open() is called on a binderfs binder devices. We now stash away the struct binder_context associated with that struct binder_devices: proc->context = &binder_dev->context; /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { . . . proc->context = &binder_dev->context; Now let's assume that the binderfs instance for that binder devices is shutdown via umount() and/or the mount namespace associated with it goes away. As long as there is still an fd open for that binderfs binder device things are fine. But let's assume we now close the last fd for that binderfs binder device. Now binder_release() is called and punts to the workqueue. Assume that the workqueue has quite a bit of stuff to do and doesn't get to cleaning up the struct binder_proc and the associated struct binder_context with it for that binderfs binder device right away. In the meantime, the VFS is killing the super block and is ultimately calling sb->evict_inode() which means it will call binderfs_evict_inode() which does: static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); kfree(device->context.name); kfree(device); } thereby freeing the struct binder_device including struct binder_context. Now the workqueue finally has time to get around to cleaning up struct binder_proc and is now trying to access the associate struct binder_context. Since it's already freed it will OOPs. Fix this by holding an additional reference to the inode that is only released once the workqueue is done cleaning up struct binder_proc. This is an easy alternative to introducing separate refcounting on struct binder_device which we can always do later if it becomes necessary. This is an alternative fix to 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()"). Fixes: 3ad20fe393b3 ("binder: implement binderfs") Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") Related: 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2669b8b0c798fbe1a31d49e07aa33233d469ad9b) Signed-off-by: Greg Kroah-Hartman Change-Id: I047a1e360b4146872bbc1d206dce7a864bb4588b --- drivers/android/binder.c | 5 ++++- drivers/android/binder_internal.h | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 920b1ca35bf0..f4b32cd8b274 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5185,7 +5185,7 @@ static int binder_open(struct inode *nodp, struct file *filp) /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { - binder_dev = nodp->i_private; + binder_dev = binderfs_device_get(nodp->i_private); info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { @@ -5369,6 +5369,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5450,6 +5451,8 @@ static void binder_deferred_release(struct binder_proc *proc) outgoing_refs, active_transactions); binder_proc_dec_tmpref(proc); + device = container_of(proc->context, struct binder_device, context); + binderfs_device_put(device); } static void binder_deferred_func(struct work_struct *work) diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index bd47f7f72075..8d0f73c39116 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -35,6 +35,19 @@ struct binder_device { struct inode *binderfs_inode; }; +static inline struct binder_device *binderfs_device_get(struct binder_device *dev) +{ + if (dev->binderfs_inode) + ihold(dev->binderfs_inode); + return dev; +} + +static inline void binderfs_device_put(struct binder_device *dev) +{ + if (dev->binderfs_inode) + iput(dev->binderfs_inode); +} + /** * binderfs_mount_opts - mount options for binderfs * @max: maximum number of allocatable binderfs binder devices From bbfd216527f774dbb1a1084d163af97050962fca Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Mar 2020 17:43:40 +0100 Subject: [PATCH 3476/3715] UPSTREAM: binder: prevent UAF for binderfs devices II This is a necessary follow up to the first fix I proposed and we merged in 2669b8b0c79 ("binder: prevent UAF for binderfs devices"). I have been overly optimistic that the simple fix I proposed would work. But alas, ihold() + iput() won't work since the inodes won't survive the destruction of the superblock. So all we get with my prior fix is a different race with a tinier race-window but it doesn't solve the issue. Fwiw, the problem lies with generic_shutdown_super(). It even has this cozy Al-style comment: if (!list_empty(&sb->s_inodes)) { printk("VFS: Busy inodes after unmount of %s. " "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } On binder_release(), binder_defer_work(proc, BINDER_DEFERRED_RELEASE) is called which punts the actual cleanup operation to a workqueue. At some point, binder_deferred_func() will be called which will end up calling binder_deferred_release() which will retrieve and cleanup the binder_context attach to this struct binder_proc. If we trace back where this binder_context is attached to binder_proc we see that it is set in binder_open() and is taken from the struct binder_device it is associated with. This obviously assumes that the struct binder_device that context is attached to is _never_ freed. While that might be true for devtmpfs binder devices it is most certainly wrong for binderfs binder devices. So, assume binder_open() is called on a binderfs binder devices. We now stash away the struct binder_context associated with that struct binder_devices: proc->context = &binder_dev->context; /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { . . . proc->context = &binder_dev->context; Now let's assume that the binderfs instance for that binder devices is shutdown via umount() and/or the mount namespace associated with it goes away. As long as there is still an fd open for that binderfs binder device things are fine. But let's assume we now close the last fd for that binderfs binder device. Now binder_release() is called and punts to the workqueue. Assume that the workqueue has quite a bit of stuff to do and doesn't get to cleaning up the struct binder_proc and the associated struct binder_context with it for that binderfs binder device right away. In the meantime, the VFS is killing the super block and is ultimately calling sb->evict_inode() which means it will call binderfs_evict_inode() which does: static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); kfree(device->context.name); kfree(device); } thereby freeing the struct binder_device including struct binder_context. Now the workqueue finally has time to get around to cleaning up struct binder_proc and is now trying to access the associate struct binder_context. Since it's already freed it will OOPs. Fix this by introducing a refounct on binder devices. This is an alternative fix to 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()"). Fixes: 3ad20fe393b3 ("binder: implement binderfs") Fixes: 2669b8b0c798 ("binder: prevent UAF for binderfs devices") Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") Related: 51d8a7eca677 ("binder: prevent UAF read in print_binder_transaction_log_entry()") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20200303164340.670054-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f0fe2c0f050d31babcad7d65f1d550d462a40064) Signed-off-by: Greg Kroah-Hartman Change-Id: I54a6c910002bf1077ba0c34c48fb96f4ffbf012e Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 +++++++++--- drivers/android/binder_internal.h | 15 ++------------- drivers/android/binderfs.c | 7 +++++-- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f4b32cd8b274..7bd038edc1f7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5185,13 +5185,14 @@ static int binder_open(struct inode *nodp, struct file *filp) /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { - binder_dev = binderfs_device_get(nodp->i_private); + binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5388,6 +5389,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -5451,8 +5458,6 @@ static void binder_deferred_release(struct binder_proc *proc) outgoing_refs, active_transactions); binder_proc_dec_tmpref(proc); - device = container_of(proc->context, struct binder_device, context); - binderfs_device_put(device); } static void binder_deferred_func(struct work_struct *work) @@ -6061,6 +6066,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 8d0f73c39116..8d0bffcc9e27 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -33,21 +34,9 @@ struct binder_device { struct miscdevice miscdev; struct binder_context context; struct inode *binderfs_inode; + refcount_t ref; }; -static inline struct binder_device *binderfs_device_get(struct binder_device *dev) -{ - if (dev->binderfs_inode) - ihold(dev->binderfs_inode); - return dev; -} - -static inline void binderfs_device_put(struct binder_device *dev) -{ - if (dev->binderfs_inode) - iput(dev->binderfs_inode); -} - /** * binderfs_mount_opts - mount options for binderfs * @max: maximum number of allocatable binderfs binder devices diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index a4f73af4fa4e..b0bad544052f 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, if (!name) goto err; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode) ida_remove(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); - kfree(device->context.name); - kfree(device); + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } } /** From 4583acebefaf9923617bfa7c9741db69d8ea6a24 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 9 Mar 2020 14:22:07 -0700 Subject: [PATCH 3477/3715] FROMLIST: f2fs: fix wrong check on F2FS_IOC_FSSETXATTR This fixes the incorrect failure when enabling project quota on casefold-enabled file. Cc: Daniel Rosenberg Cc: kernel-team@android.com Signed-off-by: Jaegeuk Kim Link: https://lore.kernel.org/linux-f2fs-devel/20200307002440.GA7944@google.com/ Signed-off-by: Daniel Rosenberg Change-Id: Ib2f8db5e20aabf5a4e6e9ebbc071166e8bd3220f --- fs/f2fs/file.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f5724c62334f..f77fa8430d6f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1801,12 +1801,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) @@ -1820,9 +1823,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; } - if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { if (S_ISREG(inode->i_mode) && - (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || F2FS_HAS_BLOCKS(inode))) return -EINVAL; if (iflags & F2FS_NOCOMP_FL) @@ -1839,8 +1842,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { - if (fi->i_flags & F2FS_COMPR_FL) + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) return -EINVAL; } From 864ef06aaef5b164669a5292eb822c454c46dd69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 29 Feb 2020 04:54:53 +0530 Subject: [PATCH 3478/3715] iwlwifi: pcie: fix rb_allocator workqueue allocation commit 8188a18ee2e48c9a7461139838048363bfce3fef upstream We don't handle failures in the rb_allocator workqueue allocation correctly. To fix that, move the code earlier so the cleanup is easier and we don't have to undo all the interrupt allocations in this case. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho [Ajay: Modified to apply on v4.19.y and v4.14.y] Signed-off-by: Ajay Kaher Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index dffa697d71e0..8a074a516fb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3023,6 +3023,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3195,10 +3204,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_PCIE_RTPM trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; #else @@ -3211,6 +3216,8 @@ out_free_ict: iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } From da79135a05ca0b601cf4ce485cf7c2b37636955a Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Fri, 28 Feb 2020 00:57:38 +0000 Subject: [PATCH 3479/3715] netfilter: nf_conntrack: resolve clash for matching conntracks [ Upstream commit ed07d9a021df6da53456663a76999189badc432a ] This patch enables the clash resolution for NAT (disabled in "590b52e10d41") if clashing conntracks match (i.e. both tuples are equal) and a protocol allows it. The clash might happen for a connections-less protocol (e.g. UDP) when two threads in parallel writes to the same socket and consequent calls to "get_unique_tuple" return the same tuples (incl. reply tuples). In this case it is safe to perform the resolution, as the losing CT describes the same mangling as the winning CT, so no modifications to the packet are needed, and the result of rules traversal for the loser's packet stays valid. Signed-off-by: Martynas Pumputis Signed-off-by: Pablo Neira Ayuso Signed-off-by: Andy Strohman Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e65271bed01..a79f5a89cab1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -543,6 +543,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +static inline bool +nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) +{ + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); +} + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { @@ -736,19 +748,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, oldinfo); + return NF_ACCEPT; + } + nf_ct_put(ct); } NF_CT_STAT_INC(net, drop); return NF_DROP; From 2afeb56881da66c09e8d2b5f6965eabeb75ce834 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 28 Feb 2020 16:48:15 -0800 Subject: [PATCH 3480/3715] ext4: fix potential race between online resizing and write operations commit 1d0c3924a92e69bfa91163bda83c12a994b4d106 upstream. During an online resize an array of pointers to buffer heads gets replaced so it can get enlarged. If there is a racing block allocation or deallocation which uses the old array, and the old array has gotten reused this can lead to a GPF or some other random kernel memory getting modified. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu Reported-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.14.x Signed-off-by: Sasha Levin --- fs/ext4/balloc.c | 14 +++++++++--- fs/ext4/ext4.h | 20 +++++++++++++++++- fs/ext4/resize.c | 55 ++++++++++++++++++++++++++++++++++++++---------- fs/ext4/super.c | 31 +++++++++++++++++++-------- 4 files changed, 96 insertions(+), 24 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 70266a3355dc..fb38f20f869e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,6 +280,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -290,7 +291,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -298,10 +306,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b162f602c430..94f4f6d55c1a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1382,7 +1382,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1556,6 +1556,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Inode dynamic state flags */ @@ -2569,6 +2586,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4f7cd78d0364..16e3830da548 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -545,8 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -854,13 +881,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -904,9 +933,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -917,9 +948,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1183,7 +1214,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1265,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1492,7 +1524,8 @@ exit_journal: for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09b443709bca..b14a0c5638e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -900,6 +900,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; int aborted = 0; int i, err; @@ -931,9 +932,12 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3489,7 +3493,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; @@ -4104,9 +4108,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc(db_count * + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4122,14 +4127,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4521,9 +4531,12 @@ failed_mount3: if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); From 4d830e847378b37294652da7dd4746019991f37a Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 28 Feb 2020 16:48:17 -0800 Subject: [PATCH 3481/3715] ext4: fix potential race between s_flex_groups online resizing and access commit 7c990728b99ed6fbe9c75fc202fce1172d9916da upstream. During an online resize an array of s_flex_groups structures gets replaced so it can get enlarged. If there is a concurrent access to the array and this memory has been reused then this can lead to an invalid memory access. The s_flex_group array has been converted into an array of pointers rather than an array of structures. This is to ensure that the information contained in the structures cannot get out of sync during a resize due to an accessor updating the value in the old structure after it has been copied but before the array pointer is updated. Since the structures them- selves are no longer copied but only the pointers to them this case is mitigated. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-4-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.14.x Signed-off-by: Sasha Levin --- fs/ext4/ext4.h | 2 +- fs/ext4/ialloc.c | 23 +++++++++------ fs/ext4/mballoc.c | 9 ++++-- fs/ext4/resize.c | 7 +++-- fs/ext4/super.c | 72 ++++++++++++++++++++++++++++++++--------------- 5 files changed, 76 insertions(+), 37 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 94f4f6d55c1a..8b55abdd7249 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1492,7 +1492,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2f46564d3fca..2a480c0ef1bc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -333,11 +333,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -378,12 +380,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -1062,7 +1065,8 @@ got: if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1085,7 +1089,8 @@ got: if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3ba9a4ae4eac..fb865216edb9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3052,7 +3052,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4947,7 +4948,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) @@ -5092,7 +5094,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 16e3830da548..d42f7471fd5b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1425,11 +1425,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b14a0c5638e7..f1c1c180d267 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -901,6 +901,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -937,8 +938,13 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } rcu_read_unlock(); - kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2231,8 +2237,8 @@ done: int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i; if (!sbi->s_log_groups_per_flex) return 0; @@ -2241,22 +2247,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (i--; i >= sbi->s_flex_groups_allocated; i--) + kvfree(new_groups[i]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2264,6 +2285,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2281,12 +2303,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -3496,6 +3517,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -4494,8 +4516,14 @@ failed_mount7: ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); From 453cc452833f4225443689e89e0500c419697b9c Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 18 Feb 2020 19:08:50 -0800 Subject: [PATCH 3482/3715] ext4: fix potential race between s_group_info online resizing and access [ Upstream commit df3da4ea5a0fc5d115c90d5aa6caa4dd433750a7 ] During an online resize an array of pointers to s_group_info gets replaced so it can get enlarged. If there is a concurrent access to the array in ext4_get_group_info() and this memory has been reused then this can lead to an invalid memory access. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-3-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Reviewed-by: Balbir Singh Cc: stable@kernel.org Signed-off-by: Sasha Levin --- fs/ext4/ext4.h | 8 ++++---- fs/ext4/mballoc.c | 52 +++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8b55abdd7249..4aa0f8f7d9a0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1442,7 +1442,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -2832,13 +2832,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fb865216edb9..745a89d30a57 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2389,7 +2389,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2402,13 +2402,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2420,6 +2423,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2438,12 +2442,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2491,8 +2495,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2505,6 +2514,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2539,11 +2549,16 @@ err_freebuddy: while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2733,7 +2748,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2751,9 +2766,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); From 45004f2ef0837d7bc1098ee12cc88f4e94c598e6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 23 Dec 2019 10:42:19 -0600 Subject: [PATCH 3483/3715] ipmi:ssif: Handle a possible NULL pointer reference [ Upstream commit 6b8526d3abc02c08a2f888e8c20b7ac9e5776dfe ] In error cases a NULL can be passed to memcpy. The length will always be zero, so it doesn't really matter, but go ahead and check for NULL, anyway, to be more precise and avoid static analysis errors. Reported-by: kbuild test robot Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 941bffd9b49c..0146bc3252c5 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -750,10 +750,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); msg = ssif_info->curr_msg; if (msg) { + if (data) { + if (len > IPMI_MAX_MSG_LENGTH) + len = IPMI_MAX_MSG_LENGTH; + memcpy(msg->rsp, data, len); + } else { + len = 0; + } msg->rsp_size = len; - if (msg->rsp_size > IPMI_MAX_MSG_LENGTH) - msg->rsp_size = IPMI_MAX_MSG_LENGTH; - memcpy(msg->rsp, data, msg->rsp_size); ssif_info->curr_msg = NULL; } From 313810964cacf555c88c77b47f837d5cf232992b Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 21 Jan 2020 11:18:48 -0800 Subject: [PATCH 3484/3715] drm/msm: Set dma maximum segment size for mdss [ Upstream commit db735fc4036bbe1fbe606819b5f0ff26cc76cdff ] Turning on CONFIG_DMA_API_DEBUG_SG results in the following error: [ 12.078665] msm ae00000.mdss: DMA-API: mapping sg segment longer than device claims to support [len=3526656] [max=65536] [ 12.089870] WARNING: CPU: 6 PID: 334 at /mnt/host/source/src/third_party/kernel/v4.19/kernel/dma/debug.c:1301 debug_dma_map_sg+0x1dc/0x318 [ 12.102655] Modules linked in: joydev [ 12.106442] CPU: 6 PID: 334 Comm: frecon Not tainted 4.19.0 #2 [ 12.112450] Hardware name: Google Cheza (rev3+) (DT) [ 12.117566] pstate: 60400009 (nZCv daif +PAN -UAO) [ 12.122506] pc : debug_dma_map_sg+0x1dc/0x318 [ 12.126995] lr : debug_dma_map_sg+0x1dc/0x318 [ 12.131487] sp : ffffff800cc3ba80 [ 12.134913] x29: ffffff800cc3ba80 x28: 0000000000000000 [ 12.140395] x27: 0000000000000004 x26: 0000000000000004 [ 12.145868] x25: ffffff8008e55b18 x24: 0000000000000000 [ 12.151337] x23: 00000000ffffffff x22: ffffff800921c000 [ 12.156809] x21: ffffffc0fa75b080 x20: ffffffc0f7195090 [ 12.162280] x19: ffffffc0f1c53280 x18: 0000000000000000 [ 12.167749] x17: 0000000000000000 x16: 0000000000000000 [ 12.173218] x15: 0000000000000000 x14: 0720072007200720 [ 12.178689] x13: 0720072007200720 x12: 0720072007200720 [ 12.184161] x11: 0720072007200720 x10: 0720072007200720 [ 12.189641] x9 : ffffffc0f1fc6b60 x8 : 0000000000000000 [ 12.195110] x7 : ffffff8008132ce0 x6 : 0000000000000000 [ 12.200585] x5 : 0000000000000000 x4 : ffffff8008134734 [ 12.206058] x3 : ffffff800cc3b830 x2 : ffffffc0f1fc6240 [ 12.211532] x1 : 25045a74f48a7400 x0 : 25045a74f48a7400 [ 12.217006] Call trace: [ 12.219535] debug_dma_map_sg+0x1dc/0x318 [ 12.223671] get_pages+0x19c/0x20c [ 12.227177] msm_gem_fault+0x64/0xfc [ 12.230874] __do_fault+0x3c/0x140 [ 12.234383] __handle_mm_fault+0x70c/0xdb8 [ 12.238603] handle_mm_fault+0xac/0xc4 [ 12.242473] do_page_fault+0x1bc/0x3d4 [ 12.246342] do_translation_fault+0x54/0x88 [ 12.250652] do_mem_abort+0x60/0xf0 [ 12.254250] el0_da+0x20/0x24 [ 12.257317] irq event stamp: 67260 [ 12.260828] hardirqs last enabled at (67259): [] console_unlock+0x214/0x608 [ 12.269693] hardirqs last disabled at (67260): [] do_debug_exception+0x5c/0x178 [ 12.278820] softirqs last enabled at (67256): [] __do_softirq+0x4d4/0x520 [ 12.287510] softirqs last disabled at (67249): [] irq_exit+0xa8/0x100 [ 12.295742] ---[ end trace e63cfc40c313ffab ]--- The root of the problem is that the default segment size for sgt is (UINT_MAX & PAGE_MASK), and the default segment size for device dma is 64K. As such, if you compare the 2, you would deduce that the sg segment will overflow the device's capacity. In reality, the hardware can accommodate the larger sg segments, it's just not initializing its max segment properly. This patch initializes the max segment size for the mdss device, which gets rid of that pesky warning. Reported-by: Stephen Boyd Tested-by: Stephen Boyd Tested-by: Sai Prakash Ranjan Reviewed-by: Rob Clark Signed-off-by: Sean Paul Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20200121111813.REPOST.1.I92c66a35fb13f368095b05287bdabdbe88ca6922@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 77c45a2ebd83..d9c0687435a0 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -420,6 +420,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto fail; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + msm_gem_shrinker_init(ddev); switch (get_mdp_ver(pdev)) { From de020833f8f4bd95a4e73ef59a7b01ce3c78447b Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 5 Feb 2020 14:15:58 -0500 Subject: [PATCH 3485/3715] dax: pass NOWAIT flag to iomap_apply [ Upstream commit 96222d53842dfe54869ec4e1b9d4856daf9105a2 ] fstests generic/471 reports a failure when run with MOUNT_OPTIONS="-o dax". The reason is that the initial pwrite to an empty file with the RWF_NOWAIT flag set does not return -EAGAIN. It turns out that dax_iomap_rw doesn't pass that flag through to iomap_apply. With this patch applied, generic/471 passes for me. Signed-off-by: Jeff Moyer Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/x49r1z86e1d.fsf@segfault.boston.devel.redhat.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- fs/dax.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index ddb4981ae32e..34a55754164f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,6 +1057,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); From 518b947b807c6a1c34c07a90840ae91915628992 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:58 +0200 Subject: [PATCH 3486/3715] mac80211: consider more elements in parsing CRC [ Upstream commit a04564c99bb4a92f805a58e56b2d22cc4978f152 ] We only use the parsing CRC for checking if a beacon changed, and elements with an ID > 63 cannot be represented in the filter. Thus, like we did before with WMM and Cisco vendor elements, just statically add these forgotten items to the CRC: - WLAN_EID_VHT_OPERATION - WLAN_EID_OPMODE_NOTIF I guess that in most cases when VHT/HE operation change, the HT operation also changed, and so the change was picked up, but we did notice that pure operating mode notification changes were ignored. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-22-luca@coelho.fi [restrict to VHT for the mac80211 branch] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/util.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 81f120466c38..cd3cdd1a0b57 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -944,16 +944,22 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; From 6306a605b36139c883900548f189fc04a4a1615c Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 3 Feb 2020 10:56:50 +0000 Subject: [PATCH 3487/3715] cfg80211: check wiphy driver existence for drvinfo report [ Upstream commit bfb7bac3a8f47100ebe7961bd14e924c96e21ca7 ] When preparing ethtool drvinfo, check if wiphy driver is defined before dereferencing it. Driver may not exist, e.g. if wiphy is attached to a virtual platform device. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200203105644.28875-1-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d..24e18405cdb4 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); From 83b3ac0eebb521124a097a613df7fe50efcb8f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 8 Feb 2020 15:50:36 +0100 Subject: [PATCH 3488/3715] qmi_wwan: re-add DW5821e pre-production variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 88bf54603f6f2c137dfee1abf6436ceac3528d2d ] Commit f25e1392fdb5 removed the support for the pre-production variant of the Dell DW5821e to avoid probing another USB interface unnecessarily. However, the pre-production samples are found in the wild, and this lack of support is causing problems for users of such samples. It is therefore necessary to support both variants. Matching on both interfaces 0 and 1 is not expected to cause any problem with either variant, as only the QMI function will be probed successfully on either. Interface 1 will be rejected based on the HID class for the production variant: T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option And interface 0 will be rejected based on too few endpoints for the pre-production variant: T: Bus=01 Lev=02 Prnt=02 Port=03 Cnt=03 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev= 3.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver= I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option Fixes: f25e1392fdb5 ("qmi_wwan: fix interface number for DW5821e production firmware") Link: https://whrl.pl/Rf0vNk Reported-by: Lars Melin Cc: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db70d4c5778a..08215a9f6145 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1287,6 +1287,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ From c4806d0e9b56b3836478f60a2acca971028c4684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 8 Feb 2020 16:55:04 +0100 Subject: [PATCH 3489/3715] qmi_wwan: unconditionally reject 2 ep interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 00516d13d4cfa56ce39da144db2dbf08b09b9357 ] We have been using the fact that the QMI and DIAG functions usually are the only ones with class/subclass/protocol being ff/ff/ff on Quectel modems. This has allowed us to match the QMI function without knowing the exact interface number, which can vary depending on firmware configuration. The ability to silently reject the DIAG function, which is usually handled by the option driver, is important for this method to work. This is done based on the knowledge that it has exactly 2 bulk endpoints. QMI function control interfaces will have either 3 or 1 endpoint. This rule is universal so the quirk condition can be removed. The fixed layouts known from the Gobi1k and Gobi2k modems have been gradually replaced by more dynamic layouts, and many vendors now use configurable layouts without changing device IDs. Renaming the class/subclass/protocol matching macro makes it more obvious that this is now not Quectel specific anymore. Cc: Kristian Evensen Cc: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 42 ++++++++++++++------------------------ 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 08215a9f6145..189715438328 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -63,7 +63,6 @@ enum qmi_wwan_flags { enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ - QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ }; struct qmimux_hdr { @@ -853,16 +852,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = { .data = QMI_WWAN_QUIRK_DTR, }; -static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { - .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, - .bind = qmi_wwan_bind, - .unbind = qmi_wwan_unbind, - .manage_power = qmi_wwan_manage_power, - .rx_fixup = qmi_wwan_rx_fixup, - .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, -}; - #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -883,14 +872,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) -/* Quectel does not use fixed interface numbers on at least some of their - * devices. We need to check the number of endpoints to ensure that we bind to - * the correct interface. +/* Many devices have QMI and DIAG functions which are distinguishable + * from other vendor specific functions by class, subclass and + * protocol all being 0xff. The DIAG function has exactly 2 endpoints + * and is silently rejected when probed. + * + * This makes it possible to match dynamically numbered QMI functions + * as seen on e.g. many Quectel modems. */ -#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ +#define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ - .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ @@ -996,10 +989,10 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1379,7 +1372,6 @@ static int qmi_wwan_probe(struct usb_interface *intf, { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; - const struct driver_info *info; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@ -1415,12 +1407,8 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)id->driver_info; - - if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { - if (desc->bNumEndpoints == 2) - return -ENODEV; - } + if (desc->bNumEndpoints == 2) + return -ENODEV; return usbnet_probe(intf, id); } From 670f5c64a37751cd54fa298d38f8adc9d00a7d9d Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:40 +0000 Subject: [PATCH 3490/3715] net: ena: fix potential crash when rxfh key is NULL [ Upstream commit 91a65b7d3ed8450f31ab717a65dcb5f9ceb5ab02 ] When ethtool -X is called without an hkey, ena_com_fill_hash_function() is called with key=NULL, which is passed to memcpy causing a crash. This commit fixes this issue by checking key is not NULL. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 10e6053f6671..f2dde1ab424a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2069,15 +2069,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; From 482c613e4fb841a149e117555d59255dd1de03d1 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:41 +0000 Subject: [PATCH 3491/3715] net: ena: fix uses of round_jiffies() [ Upstream commit 2a6e5fa2f4c25b66c763428a3e65363214946931 ] >From the documentation of round_jiffies(): "Rounds a time delta in the future (in jiffies) up or down to (approximately) full seconds. This is useful for timers for which the exact time they fire does not matter too much, as long as they fire approximately every X seconds. By rounding these timers to whole seconds, all such timers will fire at the same time, rather than at various times spread out. The goal of this is to have the CPU wake up less, which saves power." There are 2 parts to this patch: ================================ Part 1: ------- In our case we need timer_service to be called approximately every X=1 seconds, and the exact time does not matter, so using round_jiffies() is the right way to go. Therefore we add round_jiffies() to the mod_timer() in ena_timer_service(). Part 2: ------- round_jiffies() is used in check_for_missing_keep_alive() when getting the jiffies of the expiration of the keep_alive timeout. Here it is actually a mistake to use round_jiffies() because we want the exact time when keep_alive should expire and not an approximate rounded time, which can cause early, false positive, timeouts. Therefore we remove round_jiffies() in the calculation of keep_alive_expired() in check_for_missing_keep_alive(). Fixes: 82ef30f13be0 ("net: ena: add hardware hints capability to the driver") Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 518ff393a026..d9ece9ac6f53 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2803,8 +2803,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -2906,7 +2906,7 @@ static void ena_timer_service(unsigned long data) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_io_queue_num(struct pci_dev *pdev, From d13a5be10364c7985ea4a987a64b5f00768f1ec8 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:42 +0000 Subject: [PATCH 3492/3715] net: ena: add missing ethtool TX timestamping indication [ Upstream commit cf6d17fde93bdda23c9b02dd5906a12bf8c55209 ] Current implementation of the driver calls skb_tx_timestamp()to add a software tx timestamp to the skb, however the software-transmit capability is not reported in ethtool -T. This commit updates the ethtool structure to report the software-transmit capability in ethtool -T using the standard ethtool_op_get_ts_info(). This function reports all software timestamping capabilities (tx and rx), as well as setting phc_index = -1. phc_index is the index of the PTP hardware clock device that will be used for hardware timestamps. Since we don't have such a device in ENA, using the default -1 value is the correct setting. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Ezequiel Lara Gomez Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a2f02c23fe14..c58ad6190951 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -816,6 +816,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_channels = ena_get_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) From 304b62b5f4c816b67080c1e21ce3a2f56952a3cb Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:43 +0000 Subject: [PATCH 3493/3715] net: ena: fix incorrect default RSS key [ Upstream commit 0d1c3de7b8c78a5e44b74b62ede4a63629f5d811 ] Bug description: When running "ethtool -x " the key shows up as all zeros. When we use "ethtool -X hfunc toeplitz hkey " to set the key and then try to retrieve it using "ethtool -x " then we return the correct key because we return the one we saved. Bug cause: We don't fetch the key from the device but instead return the key that we have saved internally which is by default set to zero upon allocation. Fix: This commit fixes the issue by initializing the key to a random value using netdev_rss_key_fill(). Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 15 +++++++++++++++ drivers/net/ethernet/amazon/ena/ena_com.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index f2dde1ab424a..c5df80f31005 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -843,6 +843,19 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, 0); } +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; @@ -2403,6 +2416,8 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_hash_key; + ena_com_hash_key_fill_default_key(ena_dev); + rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) goto err_hash_ctrl; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 7b784f8a06a6..90fce5c0ca48 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "ena_common_defs.h" #include "ena_admin_defs.h" From 092a63301b92e0452433df1f6d6e314d6944cf1f Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Tue, 11 Feb 2020 15:17:45 +0000 Subject: [PATCH 3494/3715] net: ena: rss: fix failure to get indirection table [ Upstream commit 0c8923c0a64fb5d14bebb9a9065d2dc25ac5e600 ] On old hardware, getting / setting the hash function is not supported while gettting / setting the indirection table is. This commit enables us to still show the indirection table on older hardwares by setting the hash function and key to NULL. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index c58ad6190951..7ca7bade1c09 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -660,7 +660,21 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; From ef28d60a09846f564f2bae01fa8820c028016c26 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:46 +0000 Subject: [PATCH 3495/3715] net: ena: rss: store hash function as values and not bits [ Upstream commit 4844470d472d660c26149ad764da2406adb13423 ] The device receives, stores and retrieves the hash function value as bits and not as their enum value. The bug: * In ena_com_set_hash_function() we set cmd.u.flow_hash_func.selected_func to the bit value of rss->hash_func. (1 << rss->hash_func) * In ena_com_get_hash_function() we retrieve the hash function and store it's bit value in rss->hash_func. (Now the bit value of rss->hash_func is stored in rss->hash_func instead of it's enum value) The fix: This commit fixes the issue by converting the retrieved hash function values from the device to the matching enum value of the set bit using ffs(). ffs() finds the first set bit's index in a word. Since the function returns 1 for the LSB's index, we need to subtract 1 from the returned value (note that BIT(0) is 1). Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index c5df80f31005..552db5399503 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2128,7 +2128,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; From 135c5af29b5a4a7da0949251afb722079afb5a77 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:47 +0000 Subject: [PATCH 3496/3715] net: ena: fix incorrectly saving queue numbers when setting RSS indirection table [ Upstream commit 92569fd27f5cb0ccbdf7c7d70044b690e89a0277 ] The indirection table has the indices of the Rx queues. When we store it during set indirection operation, we convert the indices to our internal representation of the indices. Our internal representation of the indices is: even indices for Tx and uneven indices for Rx, where every Tx/Rx pair are in a consecutive order starting from 0. For example if the driver has 3 queues (3 for Tx and 3 for Rx) then the indices are as follows: 0 1 2 3 4 5 Tx Rx Tx Rx Tx Rx The BUG: The issue is that when we satisfy a get request for the indirection table, we don't convert the indices back to the original representation. The FIX: Simply apply the inverse function for the indices of the indirection table after we set it. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 24 ++++++++++++++++++- drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 ++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 7ca7bade1c09..9601ddc27427 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -648,6 +648,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -656,7 +678,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 3404376c28ca..5a72267b858b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -113,6 +113,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 From ce54eb55ee12733fa2cd63c5479d548b69e45ed8 Mon Sep 17 00:00:00 2001 From: Sameeh Jubran Date: Tue, 11 Feb 2020 15:17:50 +0000 Subject: [PATCH 3497/3715] net: ena: ethtool: use correct value for crc32 hash [ Upstream commit 886d2089276e40d460731765083a741c5c762461 ] Up till kernel 4.11 there was no enum defined for crc32 hash in ethtool, thus the xor enum was used for supporting crc32. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 9601ddc27427..22238f25e071 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -705,7 +705,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -751,7 +751,7 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: From 3f4d2bba01b6ac0fd36e61be06a7b844f18bd89c Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:51 +0000 Subject: [PATCH 3498/3715] net: ena: ena-com.c: prevent NULL pointer dereference [ Upstream commit c207979f5ae10ed70aff1bb13f39f0736973de99 ] comp_ctx can be NULL in a very rare case when an admin command is executed during the execution of ena_remove(). The bug scenario is as follows: * ena_destroy_device() sets the comp_ctx to be NULL * An admin command is executed before executing unregister_netdev(), this can still happen because our device can still receive callbacks from the netdev infrastructure such as ethtool commands. * When attempting to access the comp_ctx, the bug occurs since it's set to NULL Fix: Added a check that comp_ctx is not NULL Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 552db5399503..31e0cf144201 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -199,6 +199,11 @@ static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); From 125ccba46eca5150045418cd7ee88a9aa4daa8e4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Wed, 12 Feb 2020 15:31:48 -0600 Subject: [PATCH 3499/3715] cifs: Fix mode output in debugging statements [ Upstream commit f52aa79df43c4509146140de0241bc21a4a3b4c7 ] A number of the debug statements output file or directory mode in hex. Change these to print using octal. Signed-off-by: Frank Sorenson Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsacl.c | 4 ++-- fs/cifs/connect.c | 2 +- fs/cifs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index b98436f5c7c7..73d428af97a9 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -603,7 +603,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; } @@ -632,7 +632,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n", mode, *pace_flags); return; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6e5ecf70996a..697edc92dff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3521,7 +3521,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a35c14105906..3a10d405362e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1581,7 +1581,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n", mode, inode); cifs_sb = CIFS_SB(inode->i_sb); From ffe09a48bd96094786c303f1cf29b32a29aa6dc8 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Thu, 13 Feb 2020 13:16:16 +0000 Subject: [PATCH 3500/3715] cfg80211: add missing policy for NL80211_ATTR_STATUS_CODE [ Upstream commit ea75080110a4c1fa011b0a73cb8f42227143ee3e ] The nl80211_policy is missing for NL80211_ATTR_STATUS_CODE attribute. As a result, for strictly validated commands, it's assumed to not be supported. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200213131608.10541-2-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df8c5312f26a..b248578aeb7b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -321,6 +321,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, From 28178a88942819c19d9bc15e3a0703f60de1f2bb Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 11 Jan 2019 13:45:15 +0100 Subject: [PATCH 3501/3715] sysrq: Restore original console_loglevel when sysrq disabled commit 075e1a0c50f59ea210561d0d0fedbd945615df78 upstream. The sysrq header line is printed with an increased loglevel to provide users some positive feedback. The original loglevel is not restored when the sysrq operation is disabled. This bug was introduced in 2.6.12 (pre-git-history) by the commit ("Allow admin to enable only some of the Magic-Sysrq functions"). Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Reviewed-by: Steven Rostedt (VMware) Cc: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 377b3592384e..a5516523f816 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -560,6 +560,7 @@ void __handle_sysrq(int key, bool check_mask) op_p->handler(key); } else { pr_cont("This sysrq operation is disabled.\n"); + console_loglevel = orig_log_level; } } else { pr_cont("HELP : "); From ac5ea065eefd755173d5d51ca45f8bf837d0485b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 11 Jan 2019 17:20:37 +0100 Subject: [PATCH 3502/3715] sysrq: Remove duplicated sysrq message commit c3fee60908db4a8594f2e4a2131998384b8fa006 upstream. The commit 97f5f0cd8cd0a0544 ("Input: implement SysRq as a separate input handler") added pr_fmt() definition. It caused a duplicated message prefix in the sysrq header messages, for example: [ 177.053931] sysrq: SysRq : Show backtrace of all active CPUs [ 742.864776] sysrq: SysRq : HELP : loglevel(0-9) reboot(b) crash(c) Fixes: 97f5f0cd8cd0a05 ("Input: implement SysRq as a separate input handler") Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Cc: Tommi Rantala Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index a5516523f816..4c716ddd6599 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -546,7 +546,6 @@ void __handle_sysrq(int key, bool check_mask) */ orig_log_level = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - pr_info("SysRq : "); op_p = __sysrq_get_key_op(key); if (op_p) { @@ -555,15 +554,15 @@ void __handle_sysrq(int key, bool check_mask) * should not) and is the invoked operation enabled? */ if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { - pr_cont("%s\n", op_p->action_msg); + pr_info("%s\n", op_p->action_msg); console_loglevel = orig_log_level; op_p->handler(key); } else { - pr_cont("This sysrq operation is disabled.\n"); + pr_info("This sysrq operation is disabled.\n"); console_loglevel = orig_log_level; } } else { - pr_cont("HELP : "); + pr_info("HELP : "); /* Only print the help msg once per handler */ for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) { if (sysrq_key_table[i]) { From 4c5b7276d7d1b13f04915900ca02ac7e1b670f67 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 12 Feb 2020 16:43:41 +0100 Subject: [PATCH 3503/3715] net: fib_rules: Correctly set table field when table number exceeds 8 bits [ Upstream commit 540e585a79e9d643ede077b73bcc7aa2d7b4d919 ] In 709772e6e06564ed94ba740de70185ac3d792773, RT_TABLE_COMPAT was added to allow legacy software to deal with routing table numbers >= 256, but the same change to FIB rule queries was overlooked. Signed-off-by: Jethro Beekman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d810..9bb321df0869 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -799,7 +799,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) From 385b8a8b0618d94a6d968f616bff93f6a2419fa1 Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Fri, 14 Feb 2020 13:47:46 -0800 Subject: [PATCH 3504/3715] net: phy: restore mdio regs in the iproc mdio driver commit 6f08e98d62799e53c89dbf2c9a49d77e20ca648c upstream. The mii management register in iproc mdio block does not have a retention register so it is lost on suspend. Save and restore value of register while resuming from suspend. Fixes: bb1a619735b4 ("net: phy: Initialize mdio clock at probe function") Signed-off-by: Arun Parameswaran Signed-off-by: Scott Branden Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-bcm-iproc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 46fe1ae919a3..51ce3ea17fb3 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -188,6 +188,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -198,6 +215,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, From a1229fc80189667f6f5e6bf4a8c271ed3b973958 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 19 Feb 2020 18:01:22 +0300 Subject: [PATCH 3505/3715] nfc: pn544: Fix occasional HW initialization failure [ Upstream commit c3331d2fe3fd4d5e321f2467d01f72de7edfb5d0 ] The PN544 driver checks the "enable" polarity during of driver's probe and it's doing that by turning ON and OFF NFC with different polarities until enabling succeeds. It takes some time for the hardware to power-down, and thus, to deassert the IRQ that is raised by turning ON the hardware. Since the delay after last power-down of the polarity-checking process is missed in the code, the interrupt may trigger immediately after installing the IRQ handler (right after the checking is done), which results in IRQ handler trying to touch the disabled HW and ends with marking NFC as 'DEAD' during of the driver's probe: pn544_hci_i2c 1-002a: NFC: nfc_en polarity : active high pn544_hci_i2c 1-002a: NFC: invalid len byte shdlc: llc_shdlc_recv_frame: NULL Frame -> link is dead This patch fixes the occasional NFC initialization failure on Nexus 7 device. Signed-off-by: Dmitry Osipenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn544/i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 4b14740edb67..8ba5a6d6329e 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -236,6 +236,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) From 4dfb4833a549dfdf3b6771d722c08140fbe9c137 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 18 Feb 2020 12:07:53 +0800 Subject: [PATCH 3506/3715] sctp: move the format error check out of __sctp_sf_do_9_1_abort [ Upstream commit 245709ec8be89af46ea7ef0444c9c80913999d99 ] When T2 timer is to be stopped, the asoc should also be deleted, otherwise, there will be no chance to call sctp_association_free and the asoc could last in memory forever. However, in sctp_sf_shutdown_sent_abort(), after adding the cmd SCTP_CMD_TIMER_STOP for T2 timer, it may return error due to the format error from __sctp_sf_do_9_1_abort() and miss adding SCTP_CMD_ASSOC_FAILED where the asoc will be deleted. This patch is to fix it by moving the format error check out of __sctp_sf_do_9_1_abort(), and do it before adding the cmd SCTP_CMD_TIMER_STOP for T2 timer. Thanks Hangbin for reporting this issue by the fuzz testing. v1->v2: - improve the comment in the code as Marcelo's suggestion. Fixes: 96ca468b86b0 ("sctp: check invalid value of length parameter in error cause") Reported-by: Hangbin Liu Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a2e058127ef7..ba29d782af30 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -182,6 +182,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2202,6 +2212,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2245,6 +2258,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2512,6 +2528,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2529,16 +2548,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ From 14d5cc5ca2cef873b7caa991c3d7d38799a38ffa Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:07 +0900 Subject: [PATCH 3507/3715] ipv6: Fix nlmsg_flags when splitting a multipath route [ Upstream commit afecdb376bd81d7e16578f0cfe82a1aec7ae18f3 ] When splitting an RTA_MULTIPATH request into multiple routes and adding the second and later components, we must not simply remove NLM_F_REPLACE but instead replace it by NLM_F_CREATE. Otherwise, it may look like the netlink message was malformed. For example, ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 \ nexthop via fe80::30:2 dev dummy0 results in the following warnings: [ 1035.057019] IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE [ 1035.057517] IPv6: NLM_F_CREATE should be set when creating new route This patch makes the nlmsg sequence look equivalent for __ip6_ins_rt() to what it would get if the multipath route had been added in multiple netlink operations: ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 ip route append 2001:db8::1/128 nexthop via fe80::30:2 dev dummy0 Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b81522bcf223..a4079ed56803 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3283,6 +3283,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } From 84c4d6642d7bb79e19458bb337c982209c3a46ac Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:06 +0900 Subject: [PATCH 3508/3715] ipv6: Fix route replacement with dev-only route [ Upstream commit e404b8c7cfb31654c9024d497cec58a501501692 ] After commit 27596472473a ("ipv6: fix ECMP route replacement") it is no longer possible to replace an ECMP-able route by a non ECMP-able route. For example, ip route add 2001:db8::1/128 via fe80::1 dev dummy0 ip route replace 2001:db8::1/128 dev dummy0 does not work as expected. Tweak the replacement logic so that point 3 in the log of the above commit becomes: 3. If the new route is not ECMP-able, and no matching non-ECMP-able route exists, replace matching ECMP-able route (if any) or add the new route. We can now summarize the entire replace semantics to: When doing a replace, prefer replacing a matching route of the same "ECMP-able-ness" as the replace argument. If there is no such candidate, fallback to the first route found. Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e5308d7cbd75..d43abeb1e415 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -893,8 +893,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -934,7 +933,9 @@ next_iter: } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = *ins; found++; From eca56a6d55e82256426df9328215d4c67fe4d271 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 17 Feb 2020 13:37:18 +0200 Subject: [PATCH 3509/3715] qede: Fix race between rdma destroy workqueue and link change event [ Upstream commit af6565adb02d3129d3fae4d9d5da945abaf4417a ] If an event is added while the rdma workqueue is being destroyed it could lead to several races, list corruption, null pointer dereference during queue_work or init_queue. This fixes the race between the two flows which can occur during shutdown. A kref object and a completion object are added to the rdma_dev structure, these are initialized before the workqueue is created. The refcnt is used to indicate work is being added to the workqueue and ensures the cleanup flow won't start while we're in the middle of adding the event. Once the work is added, the refcnt is decreased and the cleanup flow is safe to run. Fixes: cee9fbd8e2e ("qede: Add qedr framework") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qede/qede.h | 2 ++ drivers/net/ethernet/qlogic/qede/qede_rdma.c | 29 +++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a80531b5aecc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -156,6 +156,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; }; struct qede_ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1..cd12fb919ad5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -57,6 +57,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -81,8 +84,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -287,15 +305,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) From 7c9fbd9447bcfc59d58ebcd9de92567569b78190 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 17 Feb 2020 15:38:09 -0500 Subject: [PATCH 3510/3715] net: sched: correct flower port blocking [ Upstream commit 8a9093c79863b58cc2f9874d7ae788f0d622a596 ] tc flower rules that are based on src or dst port blocking are sometimes ineffective due to uninitialized stack data. __skb_flow_dissect() extracts ports from the skb for tc flower to match against. However, the port dissection is not done when when the FLOW_DIS_IS_FRAGMENT bit is set in key_control->flags. All callers of __skb_flow_dissect(), zero-out the key_control field except for fl_classify() as used by the flower classifier. Thus, the FLOW_DIS_IS_FRAGMENT may be set on entry to __skb_flow_dissect(), since key_control is allocated on the stack and may not be initialized. Since key_basic and key_control are present for all flow keys, let's make sure they are initialized. Fixes: 62230715fd24 ("flow_dissector: do not dissect l4 ports for fragments") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Acked-by: Cong Wang Signed-off-by: Jason Baron Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/flow_dissector.h | 9 +++++++++ net/sched/cls_flower.c | 1 + 2 files changed, 10 insertions(+) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 227dc0a84172..ddf916e5e57d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /** @@ -282,4 +283,12 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 80a5a6d503c8..8974bd25c71e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -160,6 +160,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (!atomic_read(&head->ht.nelems)) return -1; + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, &head->mask); info = skb_tunnel_info(skb); From 267e0a91b898619f9e747f944bb80e198910f7b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2020 12:22:56 +0300 Subject: [PATCH 3511/3715] ext4: potential crash on allocation error in ext4_alloc_flex_bg_array() commit 37b0b6b8b99c0e1c1f11abbe7cf49b6d03795b3f upstream. If sbi->s_flex_groups_allocated is zero and the first allocation fails then this code will crash. The problem is that "i--" will set "i" to -1 but when we compare "i >= sbi->s_flex_groups_allocated" then the -1 is type promoted to unsigned and becomes UINT_MAX. Since UINT_MAX is more than zero, the condition is true so we call kvfree(new_groups[-1]). The loop will carry on freeing invalid memory until it crashes. Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access") Reviewed-by: Suraj Jitindar Singh Signed-off-by: Dan Carpenter Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200228092142.7irbc44yaz3by7nb@kili.mountain Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f1c1c180d267..f5646bcad770 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2238,7 +2238,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **old_groups, **new_groups; - int size, i; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2259,8 +2259,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) sizeof(struct flex_groups)), GFP_KERNEL); if (!new_groups[i]) { - for (i--; i >= sbi->s_flex_groups_allocated; i--) - kvfree(new_groups[i]); + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); kvfree(new_groups); ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size); From edde9fcd5f41b2e22e455250214de0c4b126b255 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sat, 22 Feb 2020 20:36:47 -0500 Subject: [PATCH 3512/3715] audit: fix error handling in audit_data_to_entry() commit 2ad3e17ebf94b7b7f3f64c050ff168f9915345eb upstream. Commit 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") combined a number of separate fields in the audit_field struct into a single union. Generally this worked just fine because they are generally mutually exclusive. Unfortunately in audit_data_to_entry() the overlap can be a problem when a specific error case is triggered that causes the error path code to attempt to cleanup an audit_field struct and the cleanup involves attempting to free a stored LSM string (the lsm_str field). Currently the code always has a non-NULL value in the audit_field.lsm_str field as the top of the for-loop transfers a value into audit_field.val (both .lsm_str and .val are part of the same union); if audit_data_to_entry() fails and the audit_field struct is specified to contain a LSM string, but the audit_field.lsm_str has not yet been properly set, the error handling code will attempt to free the bogus audit_field.lsm_str value that was set with audit_field.val at the top of the for-loop. This patch corrects this by ensuring that the audit_field.val is only set when needed (it is cleared when the audit_field struct is allocated with kcalloc()). It also corrects a few other issues to ensure that in case of error the proper error code is returned. Cc: stable@vger.kernel.org Fixes: 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") Reported-by: syzbot+1f4d90ead370d72e450b@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/auditfilter.c | 81 ++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 215c6e1ee026..16cf396ea738 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -435,6 +435,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -443,12 +444,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -464,7 +465,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -473,12 +474,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_SESSIONID: case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -491,11 +493,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -504,68 +508,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) - goto exit_free; - entry->rule.buflen += f->val; - entry->rule.filterkey = str; - break; - case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) - goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + entry->rule.buflen += f_val; + entry->rule.filterkey = str; + break; + case AUDIT_EXE: + if (entry->rule.exe || f_val > PATH_MAX) + goto exit_free; + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); + goto exit_free; + } + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } From 0a94e100b4fe9bd250e2c1f7624a70de2cdf4bc8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:39 +0300 Subject: [PATCH 3513/3715] ACPICA: Introduce ACPI_ACCESS_BYTE_WIDTH() macro commit 1dade3a7048ccfc675650cd2cf13d578b095e5fb upstream. Sometimes it is useful to find the access_width field value in bytes and not in bits so add a helper that can be used for this purpose. Suggested-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/acpi/actypes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4f077edb9b81..71fadbe77e21 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -556,11 +556,12 @@ typedef u64 acpi_integer; #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * From dbaf976ebef18921b88e2f360f91c5fd3ef89f8f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:40 +0300 Subject: [PATCH 3514/3715] ACPI: watchdog: Fix gas->access_width usage commit 2ba33a4e9e22ac4dda928d3e9b5978a3a2ded4e0 upstream. ACPI Generic Address Structure (GAS) access_width field is not in bytes as the driver seems to expect in few places so fix this by using the newly introduced macro ACPI_ACCESS_BYTE_WIDTH(). Fixes: b1abf6fc4982 ("ACPI / watchdog: Fix off-by-one error at resource assignment") Fixes: 058dfc767008 ("ACPI / watchdog: Add support for WDAT hardware watchdog") Reported-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_watchdog.c | 3 +-- drivers/watchdog/wdat_wdt.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 95600309ce42..23cde3d8e8fb 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -129,12 +129,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 0da9943d405f..c310e841561c 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { From 634572f8c0aa43f63bb203fd4627e9b487393562 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 29 Feb 2020 11:30:14 -0800 Subject: [PATCH 3515/3715] KVM: VMX: check descriptor table exits on instruction emulation commit 86f7e90ce840aa1db407d3ea6e9b3a52b2ce923c upstream. KVM emulates UMIP on hardware that doesn't support it by setting the 'descriptor table exiting' VM-execution control and performing instruction emulation. When running nested, this emulation is broken as KVM refuses to emulate L2 instructions by default. Correct this regression by allowing the emulation of descriptor table instructions if L1 hasn't requested 'descriptor table exiting'. Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Reported-by: Jan Kiszka Cc: stable@vger.kernel.org Cc: Paolo Bonzini Cc: Jim Mattson Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index acf72da288f9..f85680b86524 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12370,6 +12370,7 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, else intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } @@ -12399,6 +12400,20 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, case x86_intercept_outs: return vmx_check_intercept_io(vcpu, info); + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + /* TODO: check more intercepts... */ default: break; From b011c72eda318297e84af2eec1dd32c1d06bd1b8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Feb 2020 12:56:48 +0100 Subject: [PATCH 3516/3715] HID: ite: Only bind to keyboard USB interface on Acer SW5-012 keyboard dock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit beae56192a2570578ae45050e73c5ff9254f63e6 upstream. Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") added the USB id for the Acer SW5-012's keyboard dock to the hid-ite driver to fix the rfkill driver not working. Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the "Wireless Radio Control" bits which need the special hid-ite driver on the second USB interface (the mouse interface) and their touchpad only supports mouse emulation, so using generic hid-input handling for anything but the "Wireless Radio Control" bits is fine. On these devices we simply bind to all USB interfaces. But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10 (SW5-012)'s touchpad not only does mouse emulation it also supports HID-multitouch and all the keys including the "Wireless Radio Control" bits have been moved to the first USB interface (the keyboard intf). So we need hid-ite to handle the first (keyboard) USB interface and have it NOT bind to the second (mouse) USB interface so that that can be handled by hid-multitouch.c and we get proper multi-touch support. This commit changes the hid_device_id for the SW5-012 keyboard dock to only match on hid devices from the HID_GROUP_GENERIC group, this way hid-ite will not bind the the mouse/multi-touch interface which has HID_GROUP_MULTITOUCH_WIN_8 as group. This fixes the regression to mouse-emulation mode introduced by adding the keyboard dock USB id. Cc: stable@vger.kernel.org Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") Reported-by: Zdeněk Rampas Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ite.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 2ce1eb0c9212..f2e23f81601e 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -44,8 +44,9 @@ static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From 67e43711b143dd933c98934747d4167657966af9 Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:35 +0100 Subject: [PATCH 3517/3715] HID: core: fix off-by-one memset in hid_report_raw_event() commit 5ebdffd25098898aff1249ae2f7dbfddd76d8f8f upstream. In case a report is greater than HID_MAX_BUFFER_SIZE, it is truncated, but the report-number byte is not correctly handled. This results in a off-by-one in the following memset, causing a kernel Oops and ensuing system crash. Note: With commit 8ec321e96e05 ("HID: Fix slab-out-of-bounds read in hid_field_extract") I no longer hit the kernel Oops as we instead fail "controlled" at probe if there is a report too long in the HID report-descriptor. hid_report_raw_event() is an exported symbol, so presumabely we cannot always rely on this being the case. Fixes: 966922f26c7f ("HID: fix a crash in hid_report_raw_event() function.") Signed-off-by: Johan Korsnes Cc: Armando Visconti Cc: Jiri Kosina Cc: Alan Stern Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 2d089d3954e3..75b0a337114d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1567,7 +1567,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { From 33533185aa222d30946017f8f689fa7abc9384ff Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:36 +0100 Subject: [PATCH 3518/3715] HID: core: increase HID report buffer size to 8KiB commit 84a4062632462c4320704fcdf8e99e89e94c0aba upstream. We have a HID touch device that reports its opens and shorts test results in HID buffers of size 8184 bytes. The maximum size of the HID buffer is currently set to 4096 bytes, causing probe of this device to fail. With this patch we increase the maximum size of the HID buffer to 8192 bytes, making device probe and acquisition of said buffers succeed. Signed-off-by: Johan Korsnes Cc: Alan Stern Cc: Armando Visconti Cc: Jiri Kosina Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index 3656a04d764b..ba1f67559831 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -477,7 +477,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 From 581695e615571f68d9c63d82d88a8172ef160d62 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 20 Feb 2020 15:38:01 -0500 Subject: [PATCH 3519/3715] tracing: Disable trace_printk() on post poned tests commit 78041c0c9e935d9ce4086feeff6c569ed88ddfd4 upstream. The tracing seftests checks various aspects of the tracing infrastructure, and one is filtering. If trace_printk() is active during a self test, it can cause the filtering to fail, which will disable that part of the trace. To keep the selftests from failing because of trace_printk() calls, trace_printk() checks the variable tracing_selftest_running, and if set, it does not write to the tracing buffer. As some tracers were registered earlier in boot, the selftest they triggered would fail because not all the infrastructure was set up for the full selftest. Thus, some of the tests were post poned to when their infrastructure was ready (namely file system code). The postpone code did not set the tracing_seftest_running variable, and could fail if a trace_printk() was added and executed during their run. Cc: stable@vger.kernel.org Fixes: 9afecfbb95198 ("tracing: Postpone tracer start-up tests till the system is more robust") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c456c2b06277..207d7c35214f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1543,6 +1543,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ @@ -1561,6 +1562,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); From 596e029dfcc6bfa8236f4e6e64b00dab8e19051a Mon Sep 17 00:00:00 2001 From: Orson Zhai Date: Fri, 21 Feb 2020 01:37:04 +0800 Subject: [PATCH 3520/3715] Revert "PM / devfreq: Modify the device name as devfreq(X) for sysfs" commit 66d0e797bf095d407479c89952d42b1d96ef0a7f upstream. This reverts commit 4585fbcb5331fc910b7e553ad3efd0dd7b320d14. The name changing as devfreq(X) breaks some user space applications, such as Android HAL from Unisoc and Hikey [1]. The device name will be changed unexpectly after every boot depending on module init sequence. It will make trouble to setup some system configuration like selinux for Android. So we'd like to revert it back to old naming rule before any better way being found. [1] https://lkml.org/lkml/2018/5/8/1042 Cc: John Stultz Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Orson Zhai Acked-by: Greg Kroah-Hartman Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 58ec3abfd321..b05e6a15221c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -513,7 +513,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -556,8 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); } - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); From 9539c5cd71b0a506d556f056284403cff5bf8e72 Mon Sep 17 00:00:00 2001 From: "dan.carpenter@oracle.com" Date: Wed, 15 Jan 2020 20:46:28 +0300 Subject: [PATCH 3521/3715] HID: hiddev: Fix race in in hiddev_disconnect() commit 5c02c447eaeda29d3da121a2e17b97ccaf579b51 upstream. Syzbot reports that "hiddev" is used after it's free in hiddev_disconnect(). The hiddev_disconnect() function sets "hiddev->exist = 0;" so hiddev_release() can free it as soon as we drop the "existancelock" lock. This patch moves the mutex_unlock(&hiddev->existancelock) until after we have finished using it. Reported-by: syzbot+784ccb935f9900cc7c9e@syzkaller.appspotmail.com Fixes: 7f77897ef2b6 ("HID: hiddev: fix potential use-after-free") Suggested-by: Alan Stern Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index bccd97cdc53f..d9602f3a359e 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -954,9 +954,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); From dea270c28f815590cf4376da29e4e8341dd5b98d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Feb 2020 21:19:22 +0100 Subject: [PATCH 3522/3715] MIPS: VPE: Fix a double free and a memory leak in 'release_vpe()' commit bef8e2dfceed6daeb6ca3e8d33f9c9d43b926580 upstream. Pointer on the memory allocated by 'alloc_progmem()' is stored in 'v->load_addr'. So this is this memory that should be freed by 'release_progmem()'. 'release_progmem()' is only a call to 'kfree()'. With the current code, there is both a double free and a memory leak. Fix it by passing the correct pointer to 'release_progmem()'. Fixes: e01402b115ccc ("More AP / SP bits for the 34K, the Malta bits and things. Still wants") Signed-off-by: Christophe JAILLET Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-janitors@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 544ea21bfef9..b2683aca401f 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } From ee850f01b3a36d172624f178a20ca434d33240d8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Feb 2020 08:47:04 -0600 Subject: [PATCH 3523/3715] i2c: altera: Fix potential integer overflow commit 54498e8070e19e74498a72c7331348143e7e1f8c upstream. Factor out 100 from the equation and do 32-bit arithmetic (3 * clk_mhz / 10) instead of 64-bit. Notice that clk_mhz is MHz, so the multiplication will never wrap 32 bits and there is no need for div_u64(). Addresses-Coverity: 1458369 ("Unintentional integer overflow") Fixes: 0560ad576268 ("i2c: altera: Add Altera I2C Controller driver") Suggested-by: David Laight Signed-off-by: Gustavo A. R. Silva Reviewed-by: Thor Thayer Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-altera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index f5e1941e65b5..a1cdcfc74acf 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -182,7 +182,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); From 60493d08647814112f2941c37643109b8aa8f6c3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 12 Feb 2020 10:35:30 +0100 Subject: [PATCH 3524/3715] i2c: jz4780: silence log flood on txabrt commit 9e661cedcc0a072d91a32cb88e0515ea26e35711 upstream. The printout for txabrt is way too talkative and is highly annoying with scanning programs like 'i2cdetect'. Reduce it to the minimum, the rest can be gained by I2C core debugging and datasheet information. Also, make it a debug printout, it won't help the regular user. Fixes: ba92222ed63a ("i2c: jz4780: Add i2c bus controller driver for Ingenic JZ4780") Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-jz4780.c | 36 ++------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 30132c3957cd..41ca9ff7b5da 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -82,25 +82,6 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -538,21 +519,8 @@ done: static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, From b67d1c342e1329d7a44d3306242e4dfe3a601552 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 21 Feb 2020 10:32:34 +0800 Subject: [PATCH 3525/3715] drm/i915/gvt: Separate display reset from ALL_ENGINES reset commit 3eb55e6f753a379e293395de8d5f3be28351a7f8 upstream. ALL_ENGINES reset doesn't clobber display with the current gvt-g supported platforms. Thus ALL_ENGINES reset shouldn't reset the display engine registers emulated by gvt-g. This fixes guest warning like [ 14.622026] [drm] Initialized i915 1.6.0 20200114 for 0000:00:03.0 on minor 0 [ 14.967917] fbcon: i915drmfb (fb0) is primary device [ 25.100188] [drm:drm_atomic_helper_wait_for_dependencies [drm_kms_helper]] E RROR [CRTC:51:pipe A] flip_done timed out [ 25.100860] -----------[ cut here ]----------- [ 25.100861] pll on state mismatch (expected 0, found 1) [ 25.101024] WARNING: CPU: 1 PID: 30 at drivers/gpu/drm/i915/display/intel_dis play.c:14382 verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101025] Modules linked in: intel_rapl_msr intel_rapl_common kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel i915 aesni_intel cr ypto_simd cryptd glue_helper cec rc_core video drm_kms_helper joydev drm input_l eds i2c_algo_bit serio_raw fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid qemu_fw_cfg sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 e1000 psmouse i2c_piix4 pata_acpi floppy [ 25.101052] CPU: 1 PID: 30 Comm: kworker/u4:1 Not tainted 5.5.0+ #1 [ 25.101053] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1 .12.1-0-ga5cab58 04/01/2014 [ 25.101055] Workqueue: events_unbound async_run_entry_fn [ 25.101092] RIP: 0010:verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101093] Code: e0 d9 ff e9 a3 fe ff ff 80 3d e9 c2 11 00 00 44 89 f6 48 c7 c7 c0 9d 88 c0 75 3b e8 eb df d9 ff e9 c7 fe ff ff e8 d1 e0 ae c4 <0f> 0b e9 7a fe ff ff 80 3d c0 c2 11 00 00 8d 71 41 89 c2 48 c7 c7 [ 25.101093] RSP: 0018:ffffb1de80107878 EFLAGS: 00010286 [ 25.101094] RAX: 0000000000000000 RBX: ffffb1de80107884 RCX: 0000000000000007 [ 25.101095] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff94fdfdd19740 [ 25.101095] RBP: ffffb1de80107938 R08: 0000000d6bfdc7b4 R09: 000000000000002b [ 25.101096] R10: ffff94fdf82dc000 R11: 0000000000000225 R12: 00000000000001f8 [ 25.101096] R13: ffff94fdb3ca6a90 R14: ffff94fdb3ca0000 R15: 0000000000000000 [ 25.101097] FS: 0000000000000000(0000) GS:ffff94fdfdd00000(0000) knlGS:00000 00000000000 [ 25.101098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 25.101098] CR2: 00007fbc3e2be9c8 CR3: 000000003339a003 CR4: 0000000000360ee0 [ 25.101101] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 25.101101] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 25.101102] Call Trace: [ 25.101139] intel_atomic_commit_tail+0xde4/0x1520 [i915] [ 25.101141] ? flush_workqueue_prep_pwqs+0xfa/0x130 [ 25.101142] ? flush_workqueue+0x198/0x3c0 [ 25.101174] intel_atomic_commit+0x2ad/0x320 [i915] [ 25.101209] drm_atomic_commit+0x4a/0x50 [drm] [ 25.101220] drm_client_modeset_commit_atomic+0x1c4/0x200 [drm] [ 25.101231] drm_client_modeset_commit_force+0x47/0x170 [drm] [ 25.101250] drm_fb_helper_restore_fbdev_mode_unlocked+0x4e/0xa0 [drm_kms_hel per] [ 25.101255] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] [ 25.101287] intel_fbdev_set_par+0x1a/0x40 [i915] [ 25.101289] ? con_is_visible+0x2e/0x60 [ 25.101290] fbcon_init+0x378/0x600 [ 25.101292] visual_init+0xd5/0x130 [ 25.101296] do_bind_con_driver+0x217/0x430 [ 25.101297] do_take_over_console+0x7d/0x1b0 [ 25.101298] do_fbcon_takeover+0x5c/0xb0 [ 25.101299] fbcon_fb_registered+0x199/0x1a0 [ 25.101301] register_framebuffer+0x22c/0x330 [ 25.101306] __drm_fb_helper_initial_config_and_unlock+0x31a/0x520 [drm_kms_h elper] [ 25.101311] drm_fb_helper_initial_config+0x35/0x40 [drm_kms_helper] [ 25.101341] intel_fbdev_initial_config+0x18/0x30 [i915] [ 25.101342] async_run_entry_fn+0x3c/0x150 [ 25.101343] process_one_work+0x1fd/0x3f0 [ 25.101344] worker_thread+0x34/0x410 [ 25.101346] kthread+0x121/0x140 [ 25.101346] ? process_one_work+0x3f0/0x3f0 [ 25.101347] ? kthread_park+0x90/0x90 [ 25.101350] ret_from_fork+0x35/0x40 [ 25.101351] --[ end trace b5b47d44cd998ba1 ]-- Fixes: 6294b61ba769 ("drm/i915/gvt: add missing display part reset for vGPU reset") Signed-off-by: Tina Zhang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200221023234.28635-1-tina.zhang@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a..e9f9063dbf63 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -513,9 +513,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; From ebc406a766d51552ab4a5486d46c5780f0a60a09 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Sat, 1 Feb 2020 14:13:44 +0800 Subject: [PATCH 3526/3715] usb: charger: assign specific number for enum value commit ca4b43c14cd88d28cfc6467d2fa075aad6818f1d upstream. To work properly on every architectures and compilers, the enum value needs to be specific numbers. Suggested-by: Greg KH Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/1580537624-10179-1-git-send-email-peter.chen@nxp.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/charger.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ From 35dc93efe51eae9abc7a031d33f139768d685e02 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 2 Mar 2020 13:39:13 -0700 Subject: [PATCH 3527/3715] ecryptfs: Fix up bad backport of fe2e082f5da5b4a0a92ae32978f81507ef37ec66 When doing the 4.9 merge into certain Android trees, I noticed a warning from Android's deprecated GCC 4.9.4, which causes a build failure in those trees due to basically -Werror: fs/ecryptfs/keystore.c: In function 'ecryptfs_parse_packet_set': fs/ecryptfs/keystore.c:1357:2: warning: 'auth_tok_list_item' may be used uninitialized in this function [-Wmaybe-uninitialized] memset(auth_tok_list_item, 0, ^ fs/ecryptfs/keystore.c:1260:38: note: 'auth_tok_list_item' was declared here struct ecryptfs_auth_tok_list_item *auth_tok_list_item; ^ GCC 9.2.0 was not able to pick up this warning when I tested it. Turns out that Clang warns as well when -Wuninitialized is used, which is not the case in older stable trees at the moment (but shows value in potentially backporting the various warning fixes currently in upstream to get more coverage). fs/ecryptfs/keystore.c:1284:6: warning: variable 'auth_tok_list_item' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1360:4: note: uninitialized use occurs here auth_tok_list_item); ^~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1284:2: note: remove the 'if' if its condition is always false if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ecryptfs/keystore.c:1260:56: note: initialize the variable 'auth_tok_list_item' to silence this warning struct ecryptfs_auth_tok_list_item *auth_tok_list_item; ^ = NULL 1 warning generated. Somehow, commit fe2e082f5da5 ("ecryptfs: fix a memory leak bug in parse_tag_1_packet()") upstream was not applied in the correct if block in 4.4.215, 4.9.215, and 4.14.172, which will indeed lead to use of uninitialized memory. Fix it up by undoing the bad backport in those trees then reapplying the patch in the proper location. Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/keystore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3f3ec50bf773..b134315fb69d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n", ECRYPTFS_TAG_1_PACKET_TYPE); rc = -EINVAL; - goto out_free; + goto out; } /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ @@ -1335,7 +1335,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); From 04fb39b7539f2df050efd4972e9f7fe675489f48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Aug 2018 21:57:03 -0700 Subject: [PATCH 3528/3715] include/linux/bitops.h: introduce BITS_PER_TYPE commit 9144d75e22cad3c89e6b2ccab551db9ee28d250a upstream. net_dim.h has a rather useful extension to BITS_PER_BYTE to compute the number of bits in a type (BITS_PER_BYTE * sizeof(T)), so promote the macro to bitops.h, alongside BITS_PER_BYTE, for wider usage. Link: http://lkml.kernel.org/r/20180706094458.14116-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Cc: Randy Dunlap Cc: Andy Gospodarek Cc: David S. Miller Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [only take the bitops.h portion for stable kernels - gregkh] Signed-off-by: Greg Kroah-Hartman --- include/linux/bitops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b767c7ad65c6..c51574fab0b0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,7 +4,8 @@ #include #include -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); From c05073e1c0f85e7f8de51789b1f61f0373973c40 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 20 Feb 2020 16:42:13 +0200 Subject: [PATCH 3529/3715] net: netlink: cap max groups which will be considered in netlink_bind() commit 3a20773beeeeadec41477a5ba872175b778ff752 upstream. Since nl_groups is a u32 we can't bind more groups via ->bind (netlink_bind) call, but netlink has supported more groups via setsockopt() for a long time and thus nlk->ngroups could be over 32. Recently I added support for per-vlan notifications and increased the groups to 33 for NETLINK_ROUTE which exposed an old bug in the netlink_bind() code causing out-of-bounds access on archs where unsigned long is 32 bits via test_bit() on a local variable. Fix this by capping the maximum groups in netlink_bind() to BITS_PER_TYPE(u32), effectively capping them at 32 which is the minimum of allocated groups and the maximum groups which can be bound via netlink_bind(). CC: Christophe Leroy CC: Richard Guy Briggs Fixes: 4f520900522f ("netlink: have netlink per-protocol bind function return an error code.") Reported-by: Erhard F. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c67abda5d639..07924559cb10 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -997,7 +997,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1016,7 +1017,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } From 31909a6e9d6866dbe6590c4492b99d8875bba2dc Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:56 +0300 Subject: [PATCH 3530/3715] net: atlantic: fix potential error handling commit 380ec5b9af7f0d57dbf6ac067fd9f33cff2fef71 upstream. Code inspection found that in case of mapping error we do return current 'ret' value. But beside error, it is used to count number of descriptors allocated for the packet. In that case map_skb function could return '1'. Changing it to return zero (number of mapped descriptors for skb) Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index a69f5f1ad32a..7a900f76c9ac 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -519,8 +519,10 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; From 713f6323190ca78f656eb8abf87c9d36988995c3 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 11 Feb 2020 15:17:49 +0000 Subject: [PATCH 3531/3715] net: ena: make ena rxfh support ETH_RSS_HASH_NO_CHANGE commit 470793a78ce344bd53d31e0c2d537f71ba957547 upstream. As the name suggests ETH_RSS_HASH_NO_CHANGE is received upon changing the key or indirection table using ethtool while keeping the same hash function. Also add a function for retrieving the current hash function from the ena-com layer. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Saeed Bshara Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 5 +++++ drivers/net/ethernet/amazon/ena/ena_com.h | 8 ++++++++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 3 +++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 31e0cf144201..dc9149a32f41 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -861,6 +861,11 @@ static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); } +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 90fce5c0ca48..7272fb0d858d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -632,6 +632,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 22238f25e071..d29e256bf610 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -748,6 +748,9 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; From 7345a19c1ac2baac28c765eddd83a71dc296cf30 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 7 Dec 2019 01:13:26 +1100 Subject: [PATCH 3532/3715] namei: only return -ECHILD from follow_dotdot_rcu() commit 2b98149c2377bff12be5dd3ce02ae0506e2dd613 upstream. It's over-zealous to return hard errors under RCU-walk here, given that a REF-walk will be triggered for all other cases handling ".." under RCU. The original purpose of this check was to ensure that if a rename occurs such that a directory is moved outside of the bind-mount which the resolution started in, it would be detected and blocked to avoid being able to mess with paths outside of the bind-mount. However, triggering a new REF-walk is just as effective a solution. Cc: "Eric W. Biederman" Fixes: 397d425dc26d ("vfs: Test for and handle paths that are unreachable from their mnt_root") Suggested-by: Al Viro Signed-off-by: Aleksa Sarai Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index f421f8d80f4d..c5ebe33984f4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1382,7 +1382,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->path.dentry = parent; nd->seq = seq; if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + return -ECHILD; break; } else { struct mount *mnt = real_mount(nd->path.mnt); From 3b30e1920821ac5cd12cd7b993bec601278d9b75 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 6 Dec 2019 11:45:35 -0800 Subject: [PATCH 3533/3715] mwifiex: drop most magic numbers from mwifiex_process_tdls_action_frame() commit 70e5b8f445fd27fde0c5583460e82539a7242424 upstream. Before commit 1e58252e334d ("mwifiex: Fix heap overflow in mmwifiex_process_tdls_action_frame()"), mwifiex_process_tdls_action_frame() already had too many magic numbers. But this commit just added a ton more, in the name of checking for buffer overflows. That seems like a really bad idea. Let's make these magic numbers a little less magic, by (a) factoring out 'pos[1]' as 'ie_len' (b) using 'sizeof' on the appropriate source or destination fields where possible, instead of bare numbers (c) dropping redundant checks, per below. Regarding redundant checks: the beginning of the loop has this: if (pos + 2 + pos[1] > end) break; but then individual 'case's include stuff like this: if (pos > end - 3) return; if (pos[1] != 1) return; Note that the second 'return' (validating the length, pos[1]) combined with the above condition (ensuring 'pos + 2 + length' doesn't exceed 'end'), makes the first 'return' (whose 'if' can be reworded as 'pos > end - pos[1] - 2') redundant. Rather than unwind the magic numbers there, just drop those conditions. Fixes: 1e58252e334d ("mwifiex: Fix heap overflow in mmwifiex_process_tdls_action_frame()") Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/tdls.c | 75 ++++++++------------- 1 file changed, 28 insertions(+), 47 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index b5340af9fa5e..80d20fb6f348 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -897,7 +897,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, *pos, *end; u8 i, action, basic; u16 cap = 0; - int ie_len = 0; + int ies_len = 0; if (len < (sizeof(struct ethhdr) + 3)) return; @@ -919,7 +919,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, pos = buf + sizeof(struct ethhdr) + 4; /* payload 1+ category 1 + action 1 + dialog 1 */ cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; pos += 2; break; @@ -929,7 +929,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, /* payload 1+ category 1 + action 1 + dialog 1 + status code 2*/ pos = buf + sizeof(struct ethhdr) + 6; cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; pos += 2; break; @@ -937,7 +937,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, if (len < (sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN)) return; pos = buf + sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN; - ie_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; break; default: mwifiex_dbg(priv->adapter, ERROR, "Unknown TDLS frame type.\n"); @@ -950,33 +950,33 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->tdls_cap.capab = cpu_to_le16(cap); - for (end = pos + ie_len; pos + 1 < end; pos += 2 + pos[1]) { - if (pos + 2 + pos[1] > end) + for (end = pos + ies_len; pos + 1 < end; pos += 2 + pos[1]) { + u8 ie_len = pos[1]; + + if (pos + 2 + ie_len > end) break; switch (*pos) { case WLAN_EID_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; - sta_ptr->tdls_cap.rates_len = pos[1]; - for (i = 0; i < pos[1]; i++) + sta_ptr->tdls_cap.rates_len = ie_len; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; basic = sta_ptr->tdls_cap.rates_len; - if (pos[1] > 32 - basic) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates) - basic) return; - for (i = 0; i < pos[1]; i++) + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; - sta_ptr->tdls_cap.rates_len += pos[1]; + sta_ptr->tdls_cap.rates_len += ie_len; break; case WLAN_EID_HT_CAPABILITY: - if (pos > end - sizeof(struct ieee80211_ht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_cap)) + if (ie_len != sizeof(struct ieee80211_ht_cap)) return; /* copy the ie's value into ht_capb*/ memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, @@ -984,59 +984,45 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - if (pos > end - - sizeof(struct ieee80211_ht_operation) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_operation)) + if (ie_len != sizeof(struct ieee80211_ht_operation)) return; /* copy the ie's value into ht_oper*/ memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: - if (pos > end - sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] < sizeof(struct ieee_types_header)) - return; - if (pos[1] > 8) + if (ie_len > 8) return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], 8)); + min_t(u8, ie_len, 8)); break; case WLAN_EID_RSN: - if (pos > end - sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] < sizeof(struct ieee_types_header)) - return; - if (pos[1] > IEEE_MAX_IE_SIZE - + if (ie_len > IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header)) return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], IEEE_MAX_IE_SIZE - + min_t(u8, ie_len, IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_operation) - 2) - return; - if (pos[1] != + if (ie_len != sizeof(struct ieee80211_vht_operation)) return; /* copy the ie's value into vhtoper*/ @@ -1046,10 +1032,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_vht_cap)) + if (ie_len != sizeof(struct ieee80211_vht_cap)) return; /* copy the ie's value into vhtcap*/ memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, @@ -1059,9 +1042,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_AID: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - 4) - return; - if (pos[1] != 2) + if (ie_len != sizeof(u16)) return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); From e12afc7d76d63eabadc444f620bcf6be4306f0d6 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Jan 2020 17:42:16 -0600 Subject: [PATCH 3534/3715] KVM: SVM: Override default MMIO mask if memory encryption is enabled commit 52918ed5fcf05d97d257f4131e19479da18f5d16 upstream. The KVM MMIO support uses bit 51 as the reserved bit to cause nested page faults when a guest performs MMIO. The AMD memory encryption support uses a CPUID function to define the encryption bit position. Given this, it is possible that these bits can conflict. Use svm_hardware_setup() to override the MMIO mask if memory encryption support is enabled. Various checks are performed to ensure that the mask is properly defined and rsvd_bits() is used to generate the new mask (as was done prior to the change that necessitated this patch). Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8e65a9b40c18..d63621386418 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1088,6 +1088,47 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1123,6 +1164,8 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) From 0bdc63911545438223d5e44f869e3b1d9981a08b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 9 Jan 2020 15:56:18 -0800 Subject: [PATCH 3535/3715] KVM: Check for a bad hva before dropping into the ghc slow path commit fcfbc617547fc6d9552cb6c1c563b6a90ee98085 upstream. When reading/writing using the guest/host cache, check for a bad hva before checking for a NULL memslot, which triggers the slow path for handing cross-page accesses. Because the memslot is nullified on error by __kvm_gfn_to_hva_cache_init(), if the bad hva is encountered after crossing into a new page, then the kvm_{read,write}_guest() slow path could potentially write/access the first chunk prior to detecting the bad hva. Arguably, performing a partial access is semantically correct from an architectural perspective, but that behavior is certainly not intended. In the original implementation, memslot was not explicitly nullified and therefore the partial access behavior varied based on whether the memslot itself was null, or if the hva was simply bad. The current behavior was introduced as a seemingly unintentional side effect in commit f1b9dd5eb86c ("kvm: Disallow wraparound in kvm_gfn_to_hva_cache_init"), which justified the change with "since some callers don't check the return code from this function, it sit seems prudent to clear ghc->memslot in the event of an error". Regardless of intent, the partial access is dependent on _not_ checking the result of the cache initialization, which is arguably a bug in its own right, at best simply weird. Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.") Cc: Jim Mattson Cc: Andrew Honig Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 745ee09083dd..71f77ae6c2a6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2027,12 +2027,12 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2060,12 +2060,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT; From 380a732d1792ab887669f6b5ab438ccad6cc1adb Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 22 May 2018 14:21:04 +0800 Subject: [PATCH 3536/3715] tuntap: correctly set SOCKWQ_ASYNC_NOSPACE commit 2f3ab6221e4c87960347d65c7cab9bd917d1f637 upstream. When link is down, writes to the device might fail with -EIO. Userspace needs an indication when the status is resolved. As a fix, tun_net_open() attempts to wake up writers - but that is only effective if SOCKWQ_ASYNC_NOSPACE has been set in the past. This is not the case of vhost_net which only poll for EPOLLOUT after it meets errors during sendmsg(). This patch fixes this by making sure SOCKWQ_ASYNC_NOSPACE is set when socket is not writable or device is down to guarantee EPOLLOUT will be raised in either tun_chr_poll() or tun_sock_write_space() after device is up. Cc: Hannes Frederic Sowa Cc: Eric Dumazet Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Tommi Rantala --- drivers/net/tun.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3086211829a7..ba34f61d70de 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1134,6 +1134,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1156,10 +1163,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) From cf66af9d9305c3caead5446ced6ad7af762e3e86 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Jan 2020 11:44:29 +0100 Subject: [PATCH 3537/3715] drivers: net: xgene: Fix the order of the arguments of 'alloc_etherdev_mqs()' commit 5a44c71ccda60a50073c5d7fe3f694cdfa3ab0c2 upstream. 'alloc_etherdev_mqs()' expects first 'tx', then 'rx'. The semantic here looks reversed. Reorder the arguments passed to 'alloc_etherdev_mqs()' in order to keep the correct semantic. In fact, this is a no-op because both XGENE_NUM_[RT]X_RING are 8. Fixes: 107dec2749fe ("drivers: net: xgene: Add support for multiple queues") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 50dd6bf176d0..3a489b2b99c9 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2034,7 +2034,7 @@ static int xgene_enet_probe(struct platform_device *pdev) int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), - XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); + XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); if (!ndev) return -ENOMEM; From b996b668dafbff61a630be55479a2f9d5c5a5b19 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 27 Nov 2019 14:57:04 +0900 Subject: [PATCH 3538/3715] kprobes: Set unoptimized flag after unoptimizing code commit f66c0447cca1281116224d474cdb37d6a18e4b5b upstream. Set the unoptimized flag after confirming the code is completely unoptimized. Without this fix, when a kprobe hits the intermediate modified instruction (the first byte is replaced by an INT3, but later bytes can still be a jump address operand) while unoptimizing, it can return to the middle byte of the modified code, which causes an invalid instruction exception in the kernel. Usually, this is a rare case, but if we put a probe on the function call while text patching, it always causes a kernel panic as below: # echo p text_poke+5 > kprobe_events # echo 1 > events/kprobes/enable # echo 0 > events/kprobes/enable invalid opcode: 0000 [#1] PREEMPT SMP PTI RIP: 0010:text_poke+0x9/0x50 Call Trace: arch_unoptimize_kprobe+0x22/0x28 arch_unoptimize_kprobes+0x39/0x87 kprobe_optimizer+0x6e/0x290 process_one_work+0x2a0/0x610 worker_thread+0x28/0x3d0 ? process_one_work+0x610/0x610 kthread+0x10d/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x3a/0x50 text_poke() is used for patching the code in optprobes. This can happen even if we blacklist text_poke() and other functions, because there is a small time window during which we show the intermediate code to other CPUs. [ mingo: Edited the changelog. ] Tested-by: Alexei Starovoitov Signed-off-by: Masami Hiramatsu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: bristot@redhat.com Fixes: 6274de4984a6 ("kprobes: Support delayed unoptimizing") Link: https://lkml.kernel.org/r/157483422375.25881.13508326028469515760.stgit@devnote2 Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d0fe20a5475f..48bf93bbb22e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -523,6 +523,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -662,6 +664,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -689,7 +692,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (!list_empty(&op->list)) { /* Dequeue from the optimization queue */ list_del_init(&op->list); From 28a5ca261d1140b4a22da701e9ab59c8aa81adbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Dec 2019 13:22:33 -0300 Subject: [PATCH 3539/3715] perf hists browser: Restore ESC as "Zoom out" of DSO/thread/etc commit 3f7774033e6820d25beee5cf7aefa11d4968b951 upstream. We need to set actions->ms.map since 599a2f38a989 ("perf hists browser: Check sort keys before hot key actions"), as in that patch we bail out if map is NULL. Reviewed-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Fixes: 599a2f38a989 ("perf hists browser: Check sort keys before hot key actions") Link: https://lkml.kernel.org/n/tip-wp1ssoewy6zihwwexqpohv0j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browsers/hists.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 628ad5f7eddb..49a87fb64156 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3142,6 +3142,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* From b200a5dded6fc266cbcf79ade856ea69e3633817 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 30 Jan 2020 22:14:29 -0800 Subject: [PATCH 3540/3715] mm/huge_memory.c: use head to check huge zero page commit cb829624867b5ab10bc6a7036d183b1b82bfe9f8 upstream. The page could be a tail page, if this is the case, this BUG_ON will never be triggered. Link: http://lkml.kernel.org/r/20200110032610.26499-1-richardw.yang@linux.intel.com Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") Signed-off-by: Wei Yang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92915cc87549..98fb0b4be7b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2561,7 +2561,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) unsigned long flags; pgoff_t end; - VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); From 2630ea39bca2aa6c05f795035cf17e338178bbaf Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 30 Jan 2020 22:14:48 -0800 Subject: [PATCH 3541/3715] mm, thp: fix defrag setting if newline is not used commit f42f25526502d851d0e3ca1e46297da8aafce8a7 upstream. If thp defrag setting "defer" is used and a newline is *not* used when writing to the sysfs file, this is interpreted as the "defer+madvise" option. This is because we do prefix matching and if five characters are written without a newline, the current code ends up comparing to the first five bytes of the "defer+madvise" option and using that instead. Use the more appropriate sysfs_streq() that handles the trailing newline for us. Since this doubles as a nice cleanup, do it in enabled_store() as well. The current implementation relies on prefix matching: the number of bytes compared is either the number of bytes written or the length of the option being compared. With a newline, "defer\n" does not match "defer+"madvise"; without a newline, however, "defer" is considered to match "defer+madvise" (prefix matching is only comparing the first five bytes). End result is that writing "defer" is broken unless it has an additional trailing character. This means that writing "madv" in the past would match and set "madvise". With strict checking, that no longer is the case but it is unlikely anybody is currently doing this. Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2001171411020.56385@chino.kir.corp.google.com Fixes: 21440d7eb904 ("mm, thp: add new defer+madvise defrag option") Signed-off-by: David Rientjes Suggested-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 98fb0b4be7b1..283963032eff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -163,16 +163,13 @@ static ssize_t enabled_store(struct kobject *kobj, { ssize_t ret = count; - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); } else @@ -236,32 +233,27 @@ static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer+madvise", buf, - min(sizeof("defer+madvise")-1, count))) { + } else if (sysfs_streq(buf, "defer+madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { + } else if (sysfs_streq(buf, "defer")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); From 0abb54ae42e7b3b11ff044c7c59e4d7ca1eef7e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Mar 2020 15:48:03 +0100 Subject: [PATCH 3542/3715] Revert "char/random: silence a lockdep splat with printk()" This reverts commit 28820c5802f9f83c655ab09ccae8289103ce1490 which is commit 1b710b1b10eff9d46666064ea25f079f70bc67a8 upstream. It causes problems here just like it did in 4.19.y and odds are it will be reverted upstream as well. Reported-by: Guenter Roeck Cc: Sergey Senozhatsky Cc: Qian Cai Cc: Theodore Ts'o Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 50d5846acf48..e6efa07e9f9e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1598,9 +1598,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); + pr_notice("random: %s called from %pS with crng_init=%d\n", + func_name, caller, crng_init); } /* From c7cba03b2bdced33715a7167cb9c5c8733cd31c3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 24 Feb 2020 16:38:57 -0500 Subject: [PATCH 3543/3715] audit: always check the netlink payload length in audit_receive_msg() [ Upstream commit 756125289285f6e55a03861bf4b6257aa3d19a93 ] This patch ensures that we always check the netlink payload length in audit_receive_msg() before we take any action on the payload itself. Cc: stable@vger.kernel.org Reported-by: syzbot+399c44bf1f43b8747403@syzkaller.appspotmail.com Reported-by: syzbot+e4b12d8d202701f08b6d@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- kernel/audit.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index d301276bca58..b21a8910f765 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1067,13 +1067,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1141,6 +1139,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1154,6 +1153,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1177,7 +1177,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1281,7 +1281,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1293,6 +1295,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1300,26 +1304,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_common_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE); @@ -1327,7 +1329,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1341,7 +1343,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1417,7 +1419,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) From ff8e12b0cfe277a54edbab525f068b39c7ed0de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 5 Mar 2020 17:30:05 +0100 Subject: [PATCH 3544/3715] vhost: Check docket sk_family instead of call getname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42d84c8490f9f0931786f1623191fcab397c3d64 upstream. Doing so, we save one call to get data we already have in the struct. Also, since there is no guarantee that getname use sockaddr_ll parameter beyond its size, we add a little bit of security here. It should do not do beyond MAX_ADDR_LEN, but syzbot found that ax25_getname writes more (72 bytes, the size of full_sockaddr_ax25, versus 20 + 32 bytes of sockaddr_ll + MAX_ADDR_LEN in syzbot repro). Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Reported-by: syzbot+f2a62d07a5198c819c7b@syzkaller.appspotmail.com Signed-off-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman [jwang: backport to 4.14] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- drivers/vhost/net.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4d11152e60c1..8fe07622ae59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1025,11 +1025,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; - int uaddr_len = sizeof uaddr, r; + int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) @@ -1041,12 +1037,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } From 7a4139ccd2ffe87c5125eb476b57c3db1b7b70d1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 5 Mar 2020 17:30:06 +0100 Subject: [PATCH 3545/3715] x86/mce: Handle varying MCA bank counts [ Upstream commit 006c077041dc73b9490fffc4c6af5befe0687110 ] Linux reads MCG_CAP[Count] to find the number of MCA banks visible to a CPU. Currently, this number is the same for all CPUs and a warning is shown if there is a difference. The number of banks is overwritten with the MCG_CAP[Count] value of each following CPU that boots. According to the Intel SDM and AMD APM, the MCG_CAP[Count] value gives the number of banks that are available to a "processor implementation". The AMD BKDGs/PPRs further clarify that this value is per core. This value has historically been the same for every core in the system, but that is not an architectural requirement. Future AMD systems may have different MCG_CAP[Count] values per core, so the assumption that all CPUs will have the same MCG_CAP[Count] value will no longer be valid. Also, the first CPU to boot will allocate the struct mce_banks[] array using the number of banks based on its MCG_CAP[Count] value. The machine check handler and other functions use the global number of banks to iterate and index into the mce_banks[] array. So it's possible to use an out-of-bounds index on an asymmetric system where a following CPU sees a MCG_CAP[Count] value greater than its predecessors. Thus, allocate the mce_banks[] array to the maximum number of banks. This will avoid the potential out-of-bounds index since the value of mca_cfg.banks is capped to MAX_NR_BANKS. Set the value of mca_cfg.banks equal to the max of the previous value and the value for the current CPU. This way mca_cfg.banks will always represent the max number of banks detected on any CPU in the system. This will ensure that all CPUs will access all the banks that are visible to them. A CPU that can access fewer than the max number of banks will find the registers of the extra banks to be read-as-zero. Furthermore, print the resulting number of MCA banks in use. Do this in mcheck_late_init() so that the final value is printed after all CPUs have been initialized. Finally, get bank count from target CPU when doing injection with mce-inject module. [ bp: Remove out-of-bounds example, passify and cleanup commit message. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Pu Wen Cc: Thomas Gleixner Cc: Tony Luck Cc: Vishal Verma Cc: x86-ml Link: https://lkml.kernel.org/r/20180727214009.78289-1-Yazen.Ghannam@amd.com Signed-off-by: Sasha Levin [jwang: cherry-pick to fix boot warning in arch/x86/kernel/cpu/mcheck/mce.c:1549 in epyc rome server] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 14 +++++++------- arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++--------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index f12141ba9a76..e57b59762f9f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -570,9 +568,15 @@ err: static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -665,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0b0e44f85393..95c09db1bba2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1499,13 +1499,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1519,28 +1518,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -2470,6 +2460,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); From 5b5295b5c60d6048db2112f4bb691c9cf97631f0 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 5 Mar 2020 17:30:07 +0100 Subject: [PATCH 3546/3715] EDAC/amd64: Set grain per DIMM [ Upstream commit 466503d6b1b33be46ab87c6090f0ade6c6011cbc ] The following commit introduced a warning on error reports without a non-zero grain value. 3724ace582d9 ("EDAC/mc: Fix grain_bits calculation") The amd64_edac_mod module does not provide a value, so the warning will be given on the first reported memory error. Set the grain per DIMM to cacheline size (64 bytes). This is the current recommendation. Fixes: 3724ace582d9 ("EDAC/mc: Fix grain_bits calculation") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20191022203448.13962-7-Yazen.Ghannam@amd.com [jwang: backport to 4.14 for fix warning during memory error. ] Signed-off-by: Jack Wang Signed-off-by: Sasha Levin --- drivers/edac/amd64_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 40fb0e7ff8fd..b36abd253786 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2863,6 +2863,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } From e1e85c041e00897bfa2e68d46971177f4c0df4f2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Feb 2020 15:56:32 -0800 Subject: [PATCH 3547/3715] net: dsa: bcm_sf2: Forcibly configure IMP port for 1Gb/sec [ Upstream commit 98c5f7d44fef309e692c24c6d71131ee0f0871fb ] We are still experiencing some packet loss with the existing advanced congestion buffering (ACB) settings with the IMP port configured for 2Gb/sec, so revert to conservative link speeds that do not produce packet loss until this is resolved. Fixes: 8f1880cbe8d0 ("net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec") Fixes: de34d7084edd ("net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 747062f04bb5..6bca42e34a53 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -138,8 +138,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - if (priv->type == BCM7278_DEVICE_ID) - reg |= GMII_SPEED_UP_2G; + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ From c18a10efb04a7166831383b168c64d158425c54d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Feb 2020 14:57:28 +0200 Subject: [PATCH 3548/3715] RDMA/core: Fix pkey and port assignment in get_new_pps [ Upstream commit 801b67f3eaafd3f2ec8b65d93142d4ffedba85df ] When port is part of the modify mask, then we should take it from the qp_attr and not from the old pps. Same for PKEY. Otherwise there are panics in some configurations: RIP: 0010:get_pkey_idx_qp_list+0x50/0x80 [ib_core] Code: c7 18 e8 13 04 30 ef 0f b6 43 06 48 69 c0 b8 00 00 00 48 03 85 a0 04 00 00 48 8b 50 20 48 8d 48 20 48 39 ca 74 1a 0f b7 73 04 <66> 39 72 10 75 08 eb 10 66 39 72 10 74 0a 48 8b 12 48 39 ca 75 f2 RSP: 0018:ffffafb3480932f0 EFLAGS: 00010203 RAX: ffff98059ababa10 RBX: ffff980d926e8cc0 RCX: ffff98059ababa30 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff98059ababa28 RBP: ffff98059b940000 R08: 00000000000310c0 R09: ffff97fe47c07480 R10: 0000000000000036 R11: 0000000000000200 R12: 0000000000000071 R13: ffff98059b940000 R14: ffff980d87f948a0 R15: 0000000000000000 FS: 00007f88deb31740(0000) GS:ffff98059f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000853e26001 CR4: 00000000001606e0 Call Trace: port_pkey_list_insert+0x3d/0x1b0 [ib_core] ? kmem_cache_alloc_trace+0x215/0x220 ib_security_modify_qp+0x226/0x3a0 [ib_core] _ib_modify_qp+0xcf/0x390 [ib_core] ipoib_init_qp+0x7f/0x200 [ib_ipoib] ? rvt_modify_port+0xd0/0xd0 [rdmavt] ? ib_find_pkey+0x99/0xf0 [ib_core] ipoib_ib_dev_open_default+0x1a/0x200 [ib_ipoib] ipoib_ib_dev_open+0x96/0x130 [ib_ipoib] ipoib_open+0x44/0x130 [ib_ipoib] __dev_open+0xd1/0x160 __dev_change_flags+0x1ab/0x1f0 dev_change_flags+0x23/0x60 do_setlink+0x328/0xe30 ? __nla_validate_parse+0x54/0x900 __rtnl_newlink+0x54e/0x810 ? __alloc_pages_nodemask+0x17d/0x320 ? page_fault+0x30/0x50 ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x1c8/0x220 rtnl_newlink+0x43/0x60 rtnetlink_rcv_msg+0x28f/0x350 ? kmem_cache_alloc+0x1fb/0x200 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x24d/0x2d0 ? rtnl_calcit.isra.31+0x120/0x120 netlink_rcv_skb+0xcb/0x100 netlink_unicast+0x1e0/0x340 netlink_sendmsg+0x317/0x480 ? __check_object_size+0x48/0x1d0 sock_sendmsg+0x65/0x80 ____sys_sendmsg+0x223/0x260 ? copy_msghdr_from_user+0xdc/0x140 ___sys_sendmsg+0x7c/0xc0 ? skb_dequeue+0x57/0x70 ? __inode_wait_for_writeback+0x75/0xe0 ? fsnotify_grab_connector+0x45/0x80 ? __dentry_kill+0x12c/0x180 __sys_sendmsg+0x58/0xa0 do_syscall_64+0x5b/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f88de467f10 Link: https://lore.kernel.org/r/20200227125728.100551-1-leon@kernel.org Cc: Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/security.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 9b8276691329..61aff69e9f67 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,11 +339,15 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, return NULL; if (qp_attr_mask & IB_QP_PORT) - new_pps->main.port_num = - (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; + if (qp_attr_mask & IB_QP_PKEY_INDEX) - new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : - qp_attr->pkey_index; + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; From b2c775307de06372dd2f6d3b66386e434a66f8d2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 17 Feb 2020 13:43:18 -0700 Subject: [PATCH 3549/3715] RDMA/core: Fix use of logical OR in get_new_pps [ Upstream commit 4ca501d6aaf21de31541deac35128bbea8427aa6 ] Clang warns: ../drivers/infiniband/core/security.c:351:41: warning: converting the enum constant to a boolean [-Wint-in-bool-context] if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { ^ 1 warning generated. A bitwise OR should have been used instead. Fixes: 1dd017882e01 ("RDMA/core: Fix protection fault in get_pkey_idx_qp_list") Link: https://lore.kernel.org/r/20200217204318.13609-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/889 Reported-by: Dan Carpenter Signed-off-by: Nathan Chancellor Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 61aff69e9f67..ce8e3009344a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -351,7 +351,7 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) From 53647b8201accbde651a54e65a2c8c66892a35da Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 Jan 2020 23:42:24 +0900 Subject: [PATCH 3550/3715] kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic [ Upstream commit e4add247789e4ba5e08ad8256183ce2e211877d4 ] optimize_kprobe() and unoptimize_kprobe() cancels if a given kprobe is on the optimizing_list or unoptimizing_list already. However, since the following commit: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") modified the update timing of the KPROBE_FLAG_OPTIMIZED, it doesn't work as expected anymore. The optimized_kprobe could be in the following states: - [optimizing]: Before inserting jump instruction op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. - [optimized]: jump inserted op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is empty. - [unoptimizing]: Before removing jump instruction (including unused optprobe) op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. - [unoptimized]: jump removed op.kp->flags doesn't have KPROBE_FLAG_OPTIMIZED and op->list is empty. Current code mis-expects [unoptimizing] state doesn't have KPROBE_FLAG_OPTIMIZED, and that can cause incorrect results. To fix this, introduce optprobe_queued_unopt() to distinguish [optimizing] and [unoptimizing] states and fixes the logic in optimize_kprobe() and unoptimize_kprobe(). [ mingo: Cleaned up the changelog and the code a bit. ] Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt (VMware) Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bristot@redhat.com Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Link: https://lkml.kernel.org/r/157840814418.7181.13478003006386303481.stgit@devnote2 Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/kprobes.c | 67 +++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 48bf93bbb22e..66f1818d4762 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -625,6 +625,18 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } +static bool optprobe_queued_unopt(struct optimized_kprobe *op) +{ + struct optimized_kprobe *_op; + + list_for_each_entry(_op, &unoptimizing_list, list) { + if (op == _op) + return true; + } + + return false; +} + /* Optimize kprobe if p is ready to be optimized */ static void optimize_kprobe(struct kprobe *p) { @@ -646,17 +658,21 @@ static void optimize_kprobe(struct kprobe *p) return; /* Check if it is already optimized. */ - if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) + if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { + if (optprobe_queued_unopt(op)) { + /* This is under unoptimizing. Just dequeue the probe */ + list_del_init(&op->list); + } return; + } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + if (WARN_ON_ONCE(!list_empty(&op->list))) + return; + + list_add(&op->list, &optimizing_list); + kick_kprobe_optimizer(); } /* Short cut to direct unoptimizing */ @@ -678,30 +694,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; /* This is not an optprobe nor optimized */ op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ + if (!kprobe_optimized(p)) + return; + + if (!list_empty(&op->list)) { + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ + if (force) { + /* + * Forcibly unoptimize the kprobe here, and queue it + * in the freeing list for release afterwards. + */ + force_unoptimize_kprobe(op); + list_move(&op->list, &freeing_list); + } + } else { + /* Dequeue from the optimizing queue */ list_del_init(&op->list); - force_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; } return; } - if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); - return; - } /* Optimized kprobe case */ - if (force) + if (force) { /* Forcibly update the code: this is a special case */ force_unoptimize_kprobe(op); - else { + } else { list_add(&op->list, &unoptimizing_list); kick_kprobe_optimizer(); } From 6d53f29dd99bde2cf093f4246a6f602f4507f552 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 7 Feb 2020 11:53:35 +0200 Subject: [PATCH 3551/3715] serial: ar933x_uart: set UART_CS_{RX,TX}_READY_ORIDE [ Upstream commit 87c5cbf71ecbb9e289d60a2df22eb686c70bf196 ] On AR934x this UART is usually not initialized by the bootloader as it is only used as a secondary serial port while the primary UART is a newly introduced NS16550-compatible. In order to make use of the ar933x-uart on AR934x without RTS/CTS hardware flow control, one needs to set the UART_CS_{RX,TX}_READY_ORIDE bits as other than on AR933x where this UART is used as primary/console, the bootloader on AR934x typically doesn't set those bits. Setting them explicitely on AR933x should not do any harm, so just set them unconditionally. Tested-by: Chuanhong Guo Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/20200207095335.GA179836@makrotopia.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/ar933x_uart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index ed545a61413c..ac56a5131a9c 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -289,6 +289,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -421,6 +425,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); From dd3a97bd15ea278c8347a9a68c5bb6b3355b1240 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Feb 2020 09:40:52 +0100 Subject: [PATCH 3552/3715] selftests: fix too long argument [ Upstream commit c363eb48ada5cf732b3f489fab799fc881097842 ] With some shells, the command construed for install of bpf selftests becomes too large due to long list of files: make[1]: execvp: /bin/sh: Argument list too long make[1]: *** [../lib.mk:73: install] Error 127 Currently, each of the file lists is replicated three times in the command: in the shell 'if' condition, in the 'echo' and in the 'rsync'. Reduce that by one instance by using make conditionals and separate the echo and rsync into two shell commands. (One would be inclined to just remove the '@' at the beginning of the rsync command and let 'make' echo it by itself; unfortunately, it appears that the '@' in the front of mkdir silences output also for the following commands.) Also, separate handling of each of the lists to its own shell command. The semantics of the makefile is unchanged before and after the patch. The ability of individual test directories to override INSTALL_RULE is retained. Reported-by: Yauheni Kaliuta Tested-by: Yauheni Kaliuta Signed-off-by: Jiri Benc Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/lib.mk | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..c9be64dc681d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -54,17 +54,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all From e2dc64fa1df7aa5912e07157ad6f15734d25666d Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:36 -0800 Subject: [PATCH 3553/3715] usb: gadget: composite: Support more than 500mA MaxPower [ Upstream commit a2035411fa1d1206cea7d5dfe833e78481844a76 ] USB 3.x SuperSpeed peripherals can draw up to 900mA of VBUS power when in configured state. However, if a configuration wanting to take advantage of this is added with MaxPower greater than 500 (currently possible if using a ConfigFS gadget) the composite driver fails to accommodate this for a couple reasons: - usb_gadget_vbus_draw() when called from set_config() and composite_resume() will be passed the MaxPower value without regard for the current connection speed, resulting in a violation for USB 2.0 since the max is 500mA. - the bMaxPower of the configuration descriptor would be incorrectly encoded, again if the connection speed is only at USB 2.0 or below, likely wrapping around U8_MAX since the 2mA multiplier corresponds to a maximum of 510mA. Fix these by adding checks against the current gadget->speed when the c->MaxPower value is used (set_config() and composite_resume()) and appropriately limit based on whether it is currently at a low-/full-/high- or super-speed connection. Because 900 is not divisible by 8, with the round-up division currently used in encode_bMaxPower() a MaxPower of 900mA will result in an encoded value of 0x71. When a host stack (including Linux and Windows) enumerates this on a single port root hub, it reads this value back and decodes (multiplies by 8) to get 904mA which is strictly greater than 900mA that is typically budgeted for that port, causing it to reject the configuration. Instead, we should be using the round-down behavior of normal integral division so that 900 / 8 -> 0x70 or 896mA to stay within range. And we might as well change it for the high/full/low case as well for consistency. N.B. USB 3.2 Gen N x 2 allows for up to 1500mA but there doesn't seem to be any any peripheral controller supported by Linux that does two lane operation, so for now keeping the clamp at 900 should be fine. Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/composite.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 6e30b177aa22..5a4cf779b269 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -441,9 +441,13 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, if (!val) return 0; if (speed < USB_SPEED_SUPER) - return DIV_ROUND_UP(val, 2); + return min(val, 500U) / 2; else - return DIV_ROUND_UP(val, 8); + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -841,6 +845,10 @@ static int set_config(struct usb_composite_dev *cdev, /* when we return, be sure our power usage is valid */ power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + power = min(power, 500U); + else + power = min(power, 900U); done: usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2280,7 +2288,7 @@ void composite_resume(struct usb_gadget *gadget) { struct usb_composite_dev *cdev = get_gadget_data(gadget); struct usb_function *f; - u16 maxpower; + unsigned maxpower; /* REVISIT: should we have config level * suspend/resume callbacks? @@ -2294,10 +2302,14 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); - usb_gadget_vbus_draw(gadget, maxpower ? - maxpower : CONFIG_USB_GADGET_VBUS_DRAW); + usb_gadget_vbus_draw(gadget, maxpower); } cdev->suspended = 0; From e476b55da6d22cb29ac50f5b585a16d37854c312 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 16 Jan 2020 15:29:01 +0200 Subject: [PATCH 3554/3715] usb: gadget: ffs: ffs_aio_cancel(): Save/restore IRQ flags [ Upstream commit 43d565727a3a6fd24e37c7c2116475106af71806 ] ffs_aio_cancel() can be called from both interrupt and thread context. Make sure that the current IRQ state is saved and restored by using spin_{un,}lock_irq{save,restore}(). Otherwise undefined behavior might occur. Acked-by: Michal Nazarewicz Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index cdffbe999500..282396e8eec6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1078,18 +1078,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + unsigned long flags; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); return value; } From 60fe004c8cb3b48b575c807988aacef81aef8867 Mon Sep 17 00:00:00 2001 From: Sergey Organov Date: Wed, 29 Jan 2020 14:21:46 +0300 Subject: [PATCH 3555/3715] usb: gadget: serial: fix Tx stall after buffer overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e4bfded56cf39b8d02733c1e6ef546b97961e18a ] Symptom: application opens /dev/ttyGS0 and starts sending (writing) to it while either USB cable is not connected, or nobody listens on the other side of the cable. If driver circular buffer overflows before connection is established, no data will be written to the USB layer until/unless /dev/ttyGS0 is closed and re-opened again by the application (the latter besides having no means of being notified about the event of establishing of the connection.) Fix: on open and/or connect, kick Tx to flush circular buffer data to USB layer. Signed-off-by: Sergey Organov Reviewed-by: Michał Mirosław Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_serial.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 520ace49f91d..942d2977797d 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -715,8 +715,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); From 3824b96e06cc2b28d708e0b0d51051ee112dd5dc Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 18 Jan 2020 15:41:20 -0500 Subject: [PATCH 3556/3715] drm/msm/mdp5: rate limit pp done timeout warnings [ Upstream commit ef8c9809acb0805c991bba8bdd4749fc46d44a98 ] Add rate limiting of the 'pp done time out' warnings since these warnings can quickly fill the dmesg buffer. Signed-off-by: Brian Masney Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 440977677001..99d356b6e915 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -1004,8 +1004,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) From 892afde0f4a1950a3f561253c2a3caf5a5fd8959 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 29 Jan 2020 20:12:44 +0000 Subject: [PATCH 3557/3715] drm: msm: Fix return type of dsi_mgr_connector_mode_valid for kCFI [ Upstream commit 7fd2dfc3694922eb7ace4801b7208cf9f62ebc7d ] I was hitting kCFI crashes when building with clang, and after some digging finally narrowed it down to the dsi_mgr_connector_mode_valid() function being implemented as returning an int, instead of an enum drm_mode_status. This patch fixes it, and appeases the opaque word of the kCFI gods (seriously, clang inlining everything makes the kCFI backtraces only really rough estimates of where things went wrong). Thanks as always to Sami for his help narrowing this down. Cc: Rob Clark Cc: Sean Paul Cc: Sami Tolvanen Cc: Todd Kjos Cc: Alistair Delva Cc: Amit Pundir Cc: Sumit Semwal Cc: freedreno@lists.freedesktop.org Cc: clang-built-linux@googlegroups.com Signed-off-by: John Stultz Reviewed-by: Nick Desaulniers Tested-by: Amit Pundir Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 855248132b2b..7d46399a39b4 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -400,7 +400,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); From 89e30bb46074c1a11b0b6e6797b0bcbcd6d83d54 Mon Sep 17 00:00:00 2001 From: Harigovindan P Date: Thu, 6 Feb 2020 14:26:15 +0530 Subject: [PATCH 3558/3715] drm/msm/dsi: save pll state before dsi host is powered off [ Upstream commit a1028dcfd0dd97884072288d0c8ed7f30399b528 ] Save pll state before dsi host is powered off. Without this change some register values gets resetted. Signed-off-by: Harigovindan P Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 5 +++++ drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 7d46399a39b4..9fbfa9f94e6c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -543,6 +543,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -583,6 +584,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..c0a7fa56d9a7 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -613,10 +613,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); From ea29d94b09cb7629a7ddd5e1484c00a56ed20a86 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:17 +0100 Subject: [PATCH 3559/3715] net: ks8851-ml: Remove 8-bit bus accessors [ Upstream commit 69233bba6543a37755158ca3382765387b8078df ] This driver is mixing 8-bit and 16-bit bus accessors for reasons unknown, however the speculation is that this was some sort of attempt to support the 8-bit bus mode. As per the KS8851-16MLL documentation, all two registers accessed via the 8-bit accessors are internally 16-bit registers, so reading them using 16-bit accessors is fine. The KS_CCR read can be converted to 16-bit read outright, as it is already a concatenation of two 8-bit reads of that register. The KS_RXQCR accesses are 8-bit only, however writing the top 8 bits of the register is OK as well, since the driver caches the entire 16-bit register value anyway. Finally, the driver is not used by any hardware in the kernel right now. The only hardware available to me is one with 16-bit bus, so I have no way to test the 8-bit bus mode, however it is unlikely this ever really worked anyway. If the 8-bit bus mode is ever required, it can be easily added by adjusting the 16-bit accessors to do 2 consecutive accesses, which is how this should have been done from the beginning. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 45 +++--------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index adbe0a6fe0db..77d059d7f8c5 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -474,24 +474,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -507,22 +489,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -642,8 +608,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -747,7 +712,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -764,7 +729,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -997,13 +962,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ From af490013154af4ebe2c02d9d85be6ed84e480f84 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:18 +0100 Subject: [PATCH 3560/3715] net: ks8851-ml: Fix 16-bit data access [ Upstream commit edacb098ea9c31589276152f09b4439052c0f2b1 ] The packet data written to and read from Micrel KSZ8851-16MLLI must be byte-swapped in 16-bit mode, add this byte-swapping. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 77d059d7f8c5..fb5f4055e159 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -515,7 +515,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -529,7 +529,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) From 29c27650511ab1da770a3f35556c6f7d4d2dfc03 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:19 +0100 Subject: [PATCH 3561/3715] net: ks8851-ml: Fix 16-bit IO operation [ Upstream commit 58292104832fef6cb4a89f736012c0e0724c3442 ] The Micrel KSZ8851-16MLLI datasheet DS00002357B page 12 states that BE[3:0] signals are active high. This contradicts the measurements of the behavior of the actual chip, where these signals behave as active low. For example, to read the CIDER register, the bus must expose 0xc0c0 during the address phase, which means BE[3:0]=4'b1100. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index fb5f4055e159..799154d7c047 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -484,7 +484,7 @@ static int msg_enable; static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } @@ -499,7 +499,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } From 5f07ae5c8a2506af5ee979d588faa34514e314a7 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 20 Jan 2020 10:17:29 +0100 Subject: [PATCH 3562/3715] watchdog: da9062: do not ping the hw during stop() [ Upstream commit e9a0e65eda3f78d0b04ec6136c591c000cbc3b76 ] The da9062 hw has a minimum ping cool down phase of at least 200ms. The driver takes that into account by setting the min_hw_heartbeat_ms to 300ms and the core guarantees that the hw limit is observed for the ping() calls. But the core can't guarantee the required minimum ping cool down phase if a stop() command is send immediately after the ping() command. So it is not allowed to ping the watchdog within the stop() command as the driver does. Remove the ping can be done without doubts because the watchdog gets disabled anyway and a (re)start resets the watchdog counter too. Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200120091729.16256-1-m.felsch@pengutronix.de [groeck: Updated description] Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/da9062_wdt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 9083d3d922b0..79383ff62019 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -126,13 +126,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, From 8ea1411740a0b71b06a6eb8cf4aeeadbf540346b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 08:48:55 +0300 Subject: [PATCH 3563/3715] s390/cio: cio_ignore_proc_seq_next should increase position index [ Upstream commit 8b101a5e14f2161869636ff9cb4907b7749dc0c2 ] if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206283 Link: https://lore.kernel.org/r/d44c53a7-9bc1-15c7-6d4a-0c10cb9dffce@virtuozzo.com Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Averin Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- drivers/s390/cio/blacklist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2a3f874a21d5..9cebff8e8d74 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } From a8dc79753d727b4dbb11bb1190f90f4863046f32 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 16 Jan 2020 12:46:51 -0800 Subject: [PATCH 3564/3715] x86/boot/compressed: Don't declare __force_order in kaslr_64.c [ Upstream commit df6d4f9db79c1a5d6f48b59db35ccd1e9ff9adfc ] GCC 10 changed the default to -fno-common, which leads to LD arch/x86/boot/compressed/vmlinux ld: arch/x86/boot/compressed/pgtable_64.o:(.bss+0x0): multiple definition of `__force_order'; \ arch/x86/boot/compressed/kaslr_64.o:(.bss+0x0): first defined here make[2]: *** [arch/x86/boot/compressed/Makefile:119: arch/x86/boot/compressed/vmlinux] Error 1 Since __force_order is already provided in pgtable_64.c, there is no need to declare __force_order in kaslr_64.c. Signed-off-by: H.J. Lu Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200124181811.4780-1-hjl.tools@gmail.com Signed-off-by: Sasha Levin --- arch/x86/boot/compressed/pagetable.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index e691ff734cb5..46573842d8c3 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -36,9 +36,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; From bdb5136a32ee4156ed215fb4dc1ab2b41510aa71 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 20 Feb 2020 00:59:36 +0900 Subject: [PATCH 3565/3715] nvme: Fix uninitialized-variable warning [ Upstream commit 15755854d53b4bbb0bb37a0fce66f0156cfc8a17 ] gcc may detect a false positive on nvme using an unintialized variable if setting features fails. Since this is not a fast path, explicitly initialize this variable to suppress the warning. Reported-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f543b9932c83..a760c449f4a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -889,8 +889,8 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); From e2de3b93e89ffe5ac4efce187790fa9535143e90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:18 -0800 Subject: [PATCH 3566/3715] x86/xen: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9038ec99ceb94fb8d93ade5e236b2928f0792c7c ] Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. arch/x86/xen/enlighten_pv.c: In function ‘xen_write_msr_safe’: arch/x86/xen/enlighten_pv.c:904:12: warning: statement will never be executed [-Wswitch-unreachable] 904 | unsigned which; | ^~~~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200220062318.69299-1-keescook@chromium.org Reviewed-by: Juergen Gross [boris: made @which an 'unsigned int'] Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten_pv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f79a0cdc6b4e..1f8175bf2a5e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -909,14 +909,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; From 34bf8ca7879b3c47c74f6f39662812ad9e0a7555 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 19 Feb 2020 15:19:36 -0800 Subject: [PATCH 3567/3715] net: thunderx: workaround BGX TX Underflow issue [ Upstream commit 971617c3b761c876d686a2188220a33898c90e99 ] While it is not yet understood why a TX underflow can easily occur for SGMII interfaces resulting in a TX wedge. It has been found that disabling/re-enabling the LMAC resolves the issue. Signed-off-by: Tim Harvey Reviewed-by: Robert Jones Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 62 ++++++++++++++++++- .../net/ethernet/cavium/thunder/thunder_bgx.h | 9 +++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 586e35593310..d678f088925c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -234,10 +234,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1340,6 +1349,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1355,7 +1406,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1409,6 +1460,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1425,6 +1478,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1442,6 +1496,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 23acdc5ab896..adaa3bfa5f6c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -179,6 +179,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 From 18a7af5cb6de5c44c3d0f6880b9d6a1d79783ad4 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 19 Feb 2020 06:01:03 +1000 Subject: [PATCH 3568/3715] cifs: don't leak -EAGAIN for stat() during reconnect commit fc513fac56e1b626ae48a74d7551d9c35c50129e upstream. If from cifs_revalidate_dentry_attr() the SMB2/QUERY_INFO call fails with an error, such as STATUS_SESSION_EXPIRED, causing the session to be reconnected it is possible we will leak -EAGAIN back to the application even for system calls such as stat() where this is not a valid error. Fix this by re-trying the operation from within cifs_revalidate_dentry_attr() if cifs_get_inode_info*() returns -EAGAIN. This fixes stat() and possibly also other system calls that uses cifs_revalidate_dentry*(). Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3a10d405362e..bdce714e9448 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1998,6 +1998,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2019,15 +2020,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } From 5c4d9b1e0c68fc969929b6a21b370d4c11542508 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 2 Mar 2020 22:21:35 +0800 Subject: [PATCH 3569/3715] usb: storage: Add quirk for Samsung Fit flash commit 86d92f5465958752481269348d474414dccb1552 upstream. Current driver has 240 (USB2.0) and 2048 (USB3.0) as max_sectors, e.g., /sys/bus/scsi/devices/0:0:0:0/max_sectors If data access times out, driver error handling will issue a port reset. Sometimes Samsung Fit (090C:1000) flash disk will not respond to later Set Address or Get Descriptor command. Adding this quirk to limit max_sectors to 64 sectors to avoid issue occurring. Signed-off-by: Jim Lin Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/1583158895-31342-1-git-send-email-jilin@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index fb69cb64f7d4..df8ee83c3f1a 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1277,6 +1277,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", From 504a02d494b7cda36bb8175704e908c72d247c9e Mon Sep 17 00:00:00 2001 From: Dan Lazewatsky Date: Wed, 26 Feb 2020 14:34:38 +0000 Subject: [PATCH 3570/3715] usb: quirks: add NO_LPM quirk for Logitech Screen Share commit b96ed52d781a2026d0c0daa5787c6f3d45415862 upstream. LPM on the device appears to cause xHCI host controllers to claim that there isn't enough bandwidth to support additional devices. Signed-off-by: Dan Lazewatsky Cc: stable Signed-off-by: Gustavo Padovan Link: https://lore.kernel.org/r/20200226143438.1445-1-gustavo.padovan@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index ad8307140df8..64c03e871f2d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -86,6 +86,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, From bc6da5b19a0f892655783d932f45bc3fd73fd76f Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:34 +0100 Subject: [PATCH 3571/3715] usb: core: hub: fix unhandled return by employing a void function commit 63d6d7ed475c53dc1cabdfedf63de1fd8dcd72ee upstream. Address below Coverity complaint (Feb 25, 2020, 8:06 AM CET): --- drivers/usb/core/hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ff1be6a6841b..b0624940ccb0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1818,7 +1818,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { hub->quirk_disable_autosuspend = 1; - usb_autopm_get_interface(intf); + usb_autopm_get_interface_no_resume(intf); } if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) From af20d8c06ec400a3ad8384efea369821ce3db1bd Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:35 +0100 Subject: [PATCH 3572/3715] usb: core: hub: do error out if usb_autopm_get_interface() fails commit 60e3f6e4ac5b0fda43dad01c32e09409ec710045 upstream. Reviewing a fresh portion of coverity defects in USB core (specifically CID 1458999), Alan Stern noted below in [1]: On Tue, Feb 25, 2020 at 02:39:23PM -0500, Alan Stern wrote: > A revised search finds line 997 in drivers/usb/core/hub.c and lines > 216, 269 in drivers/usb/core/port.c. (I didn't try looking in any > other directories.) AFAICT all three of these should check the > return value, although a error message in the kernel log probably > isn't needed. Factor out the usb_remove_device() change into a standalone patch to allow conflict-free integration on top of the earliest stable branches. [1] https://lore.kernel.org/lkml/Pine.LNX.4.44L0.2002251419120.1485-100000@iolanthe.rowland.org Fixes: 253e05724f9230 ("USB: add a "remove hardware" sysfs attribute") Cc: stable@vger.kernel.org # v2.6.33+ Suggested-by: Alan Stern Signed-off-by: Eugeniu Rosca Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200226175036.14946-2-erosca@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b0624940ccb0..4391192bdd19 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -958,13 +958,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); From 8389c9d75e0867064eb5699251da3836191d0420 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Wed, 26 Feb 2020 18:50:36 +0100 Subject: [PATCH 3573/3715] usb: core: port: do error out if usb_autopm_get_interface() fails commit 1f8b39bc99a31759e97a0428a5c3f64802c1e61d upstream. Reviewing a fresh portion of coverity defects in USB core (specifically CID 1458999), Alan Stern noted below in [1]: On Tue, Feb 25, 2020 at 02:39:23PM -0500, Alan Stern wrote: > A revised search finds line 997 in drivers/usb/core/hub.c and lines > 216, 269 in drivers/usb/core/port.c. (I didn't try looking in any > other directories.) AFAICT all three of these should check the > return value, although a error message in the kernel log probably > isn't needed. Factor out the usb_port_runtime_{resume,suspend}() changes into a standalone patch to allow conflict-free porting on top of stable v3.9+. [1] https://lore.kernel.org/lkml/Pine.LNX.4.44L0.2002251419120.1485-100000@iolanthe.rowland.org Fixes: 971fcd492cebf5 ("usb: add runtime pm support for usb port device") Cc: stable@vger.kernel.org # v3.9+ Suggested-by: Alan Stern Signed-off-by: Eugeniu Rosca Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200226175036.14946-3-erosca@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 460c855be0d0..53c1f6e604b1 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -179,7 +179,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -232,7 +235,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) From 1855aaccd74cb9528c24ceb6bc15358a411f65ff Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Wed, 4 Mar 2020 10:24:29 +0800 Subject: [PATCH 3574/3715] vgacon: Fix a UAF in vgacon_invert_region commit 513dc792d6060d5ef572e43852683097a8420f56 upstream. When syzkaller tests, there is a UAF: BUG: KASan: use after free in vgacon_invert_region+0x9d/0x110 at addr ffff880000100000 Read of size 2 by task syz-executor.1/16489 page:ffffea0000004000 count:0 mapcount:-127 mapping: (null) index:0x0 page flags: 0xfffff00000000() page dumped because: kasan: bad access detected CPU: 1 PID: 16489 Comm: syz-executor.1 Not tainted Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Call Trace: [] dump_stack+0x1e/0x20 [] kasan_report+0x577/0x950 [] __asan_load2+0x62/0x80 [] vgacon_invert_region+0x9d/0x110 [] invert_screen+0xe5/0x470 [] set_selection+0x44b/0x12f0 [] tioclinux+0xee/0x490 [] vt_ioctl+0xff4/0x2670 [] tty_ioctl+0x46a/0x1a10 [] do_vfs_ioctl+0x5bd/0xc40 [] SyS_ioctl+0x132/0x170 [] system_call_fastpath+0x22/0x27 Memory state around the buggy address: ffff8800000fff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8800000fff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff880000100000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff It can be reproduce in the linux mainline by the program: #include #include #include #include #include #include #include #include struct tiocl_selection { unsigned short xs; /* X start */ unsigned short ys; /* Y start */ unsigned short xe; /* X end */ unsigned short ye; /* Y end */ unsigned short sel_mode; /* selection mode */ }; #define TIOCL_SETSEL 2 struct tiocl { unsigned char type; unsigned char pad; struct tiocl_selection sel; }; int main() { int fd = 0; const char *dev = "/dev/char/4:1"; struct vt_consize v = {0}; struct tiocl tioc = {0}; fd = open(dev, O_RDWR, 0); v.v_rows = 3346; ioctl(fd, VT_RESIZEX, &v); tioc.type = TIOCL_SETSEL; ioctl(fd, TIOCLINUX, &tioc); return 0; } When resize the screen, update the 'vc->vc_size_row' to the new_row_size, but when 'set_origin' in 'vgacon_set_origin', vgacon use 'vga_vram_base' for 'vc_origin' and 'vc_visible_origin', not 'vc_screenbuf'. It maybe smaller than 'vc_screenbuf'. When TIOCLINUX, use the new_row_size to calc the offset, it maybe larger than the vga_vram_size in vgacon driver, then bad access. Also, if set an larger screenbuf firstly, then set an more larger screenbuf, when copy old_origin to new_origin, a bad access may happen. So, If the screen size larger than vga_vram, resize screen should be failed. This alse fix CVE-2020-8649 and CVE-2020-8647. Linus pointed out that overflow checking seems absent. We're saved by the existing bounds checks in vc_do_resize() with rather strict limits: if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW) return -EINVAL; Fixes: 0aec4867dca14 ("[PATCH] SVGATextMode fix") Reference: CVE-2020-8647 and CVE-2020-8649 Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu [danvet: augment commit message to point out overflow safety] Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200304022429.37738-1-zhangxiaoxu5@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/vgacon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index a17ba1465815..ff6612a3ddc8 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1309,6 +1309,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) From ebf46a94280d3b30dc20a743b4cc0eb328d61723 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 5 Mar 2020 22:28:26 -0800 Subject: [PATCH 3575/3715] mm, numa: fix bad pmd by atomically check for pmd_trans_huge when marking page tables prot_numa commit 8b272b3cbbb50a6a8e62d8a15affd473a788e184 upstream. : A user reported a bug against a distribution kernel while running a : proprietary workload described as "memory intensive that is not swapping" : that is expected to apply to mainline kernels. The workload is : read/write/modifying ranges of memory and checking the contents. They : reported that within a few hours that a bad PMD would be reported followed : by a memory corruption where expected data was all zeros. A partial : report of the bad PMD looked like : : [ 5195.338482] ../mm/pgtable-generic.c:33: bad pmd ffff8888157ba008(000002e0396009e2) : [ 5195.341184] ------------[ cut here ]------------ : [ 5195.356880] kernel BUG at ../mm/pgtable-generic.c:35! : .... : [ 5195.410033] Call Trace: : [ 5195.410471] [] change_protection_range+0x7dd/0x930 : [ 5195.410716] [] change_prot_numa+0x18/0x30 : [ 5195.410918] [] task_numa_work+0x1fe/0x310 : [ 5195.411200] [] task_work_run+0x72/0x90 : [ 5195.411246] [] exit_to_usermode_loop+0x91/0xc2 : [ 5195.411494] [] prepare_exit_to_usermode+0x31/0x40 : [ 5195.411739] [] retint_user+0x8/0x10 : : Decoding revealed that the PMD was a valid prot_numa PMD and the bad PMD : was a false detection. The bug does not trigger if automatic NUMA : balancing or transparent huge pages is disabled. : : The bug is due a race in change_pmd_range between a pmd_trans_huge and : pmd_nond_or_clear_bad check without any locks held. During the : pmd_trans_huge check, a parallel protection update under lock can have : cleared the PMD and filled it with a prot_numa entry between the transhuge : check and the pmd_none_or_clear_bad check. : : While this could be fixed with heavy locking, it's only necessary to make : a copy of the PMD on the stack during change_pmd_range and avoid races. A : new helper is created for this as the check if quite subtle and the : existing similar helpful is not suitable. This passed 154 hours of : testing (usually triggers between 20 minutes and 24 hours) without : detecting bad PMDs or corruption. A basic test of an autonuma-intensive : workload showed no significant change in behaviour. Although Mel withdrew the patch on the face of LKML comment https://lkml.org/lkml/2017/4/10/922 the race window aforementioned is still open, and we have reports of Linpack test reporting bad residuals after the bad PMD warning is observed. In addition to that, bad rss-counter and non-zero pgtables assertions are triggered on mm teardown for the task hitting the bad PMD. host kernel: mm/pgtable-generic.c:40: bad pmd 00000000b3152f68(8000000d2d2008e7) .... host kernel: BUG: Bad rss-counter state mm:00000000b583043d idx:1 val:512 host kernel: BUG: non-zero pgtables_bytes on freeing mm: 4096 The issue is observed on a v4.18-based distribution kernel, but the race window is expected to be applicable to mainline kernels, as well. [akpm@linux-foundation.org: fix comment typo, per Rafael] Signed-off-by: Andrew Morton Signed-off-by: Rafael Aquini Signed-off-by: Mel Gorman Cc: Cc: Zi Yan Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Michal Hocko Link: http://lkml.kernel.org/r/20200216191800.22423-1-aquini@redhat.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mprotect.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 60864e19421e..18ecbd744978 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,6 +148,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -164,8 +189,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ From 265b81a52542e1a76c53f5aa0f3fd3c576b67be7 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 5 Mar 2020 22:28:36 -0800 Subject: [PATCH 3576/3715] fat: fix uninit-memory access for partial initialized inode commit bc87302a093f0eab45cd4e250c2021299f712ec6 upstream. When get an error in the middle of reading an inode, some fields in the inode might be still not initialized. And then the evict_inode path may access those fields via iput(). To fix, this makes sure that inode fields are initialized. Reported-by: syzbot+9d82b8de2992579da5d0@syzkaller.appspotmail.com Signed-off-by: Andrew Morton Signed-off-by: OGAWA Hirofumi Cc: Link: http://lkml.kernel.org/r/871rqnreqx.fsf@mail.parknet.co.jp Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3b40937b942a..1df023c4c2cc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -736,6 +736,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1366,16 +1373,6 @@ out: return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1820,13 +1817,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); From 414de95cde5efed15bbe1690ed934dbf44d6f70f Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Tue, 28 Jan 2020 19:17:59 +0530 Subject: [PATCH 3577/3715] arm: dts: dra76x: Fix mmc3 max-frequency commit fa63c0039787b8fbacf4d6a51e3ff44288f5b90b upstream. dra76x is not affected by i887 which requires mmc3 node to be limited to a max frequency of 64 MHz. Fix this by overwriting the correct value in the the dra76 specific dtsi. Fixes: 895bd4b3e5ec ("ARM: dts: Add support for dra76-evm") Cc: stable@vger.kernel.org Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra76x.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 1c88c581ff18..78d58b8af67e 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -17,3 +17,8 @@ &crossbar_mpu { ti,irqs-skip = <10 67 68 133 139 140>; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; From b0960a0ea4b941adb70c443be59e276897553f56 Mon Sep 17 00:00:00 2001 From: tangbin Date: Thu, 5 Mar 2020 09:38:23 +0800 Subject: [PATCH 3578/3715] tty:serial:mvebu-uart:fix a wrong return commit 4a3e208474204e879d22a310b244cb2f39e5b1f8 upstream. in this place, the function should return a negative value and the PTR_ERR already returns a negative,so return -PTR_ERR() is wrong. Signed-off-by: tangbin Cc: stable Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20200305013823.20976-1-tangbin@cmss.chinamobile.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 401c983ec5f3..a10e4aa9e18e 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -581,7 +581,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); data = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart_data), GFP_KERNEL); From 7336a80aebc633d75c09d8dad09cd467807c787a Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Thu, 5 Mar 2020 06:05:04 -0800 Subject: [PATCH 3579/3715] serial: 8250_exar: add support for ACCES cards commit 10c5ccc3c6d32f3d7d6c07de1d3f0f4b52f3e3ab upstream. Add ACCES VIDs and PIDs that use the Exar chips Signed-off-by: Jay Dolan Cc: stable Link: https://lore.kernel.org/r/20200305140504.22237-1-jay.dolan@accesio.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 411b4b03457b..899f36b59af7 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -27,6 +27,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -562,6 +570,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -632,6 +656,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), From 432ef54c0444e7cab85a291347bfc1f69ee6257a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:31 +0100 Subject: [PATCH 3580/3715] vt: selection, close sel_buffer race commit 07e6124a1a46b4b5a9b3cacc0c306b50da87abf5 upstream. syzkaller reported this UAF: BUG: KASAN: use-after-free in n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 Read of size 1 at addr ffff8880089e40e9 by task syz-executor.1/13184 CPU: 0 PID: 13184 Comm: syz-executor.1 Not tainted 5.4.7 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: ... kasan_report+0xe/0x20 mm/kasan/common.c:634 n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 tty_ldisc_receive_buf+0xac/0x190 drivers/tty/tty_buffer.c:461 paste_selection+0x297/0x400 drivers/tty/vt/selection.c:372 tioclinux+0x20d/0x4e0 drivers/tty/vt/vt.c:3044 vt_ioctl+0x1bcf/0x28d0 drivers/tty/vt/vt_ioctl.c:364 tty_ioctl+0x525/0x15a0 drivers/tty/tty_io.c:2657 vfs_ioctl fs/ioctl.c:47 [inline] It is due to a race between parallel paste_selection (TIOCL_PASTESEL) and set_selection_user (TIOCL_SETSEL) invocations. One uses sel_buffer, while the other frees it and reallocates a new one for another selection. Add a mutex to close this race. The mutex takes care properly of sel_buffer and sel_buffer_lth only. The other selection global variables (like sel_start, sel_end, and sel_cons) are protected only in set_selection_user. The other functions need quite some more work to close the races of the variables there. This is going to happen later. This likely fixes (I am unsure as there is no reproducer provided) bug 206361 too. It was marked as CVE-2020-8648. Signed-off-by: Jiri Slaby Reported-by: syzbot+59997e8d5cbdc486e6f6@syzkaller.appspotmail.com References: https://bugzilla.kernel.org/show_bug.cgi?id=206361 Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index b157f17d2be2..4b62fb052c82 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -162,7 +164,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t char *bp, *obp; int i, ps, pe, multiplier; u16 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -202,6 +204,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t pe = tmp; } + mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -247,9 +250,10 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t break; case TIOCL_SELPOINTER: highlight_pointer(pe); - return 0; + goto unlock; default: - return -EINVAL; + ret = -EINVAL; + goto unlock; } /* remove the pointer */ @@ -271,7 +275,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - return 0; + goto unlock; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -298,7 +302,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - return -ENOMEM; + ret = -ENOMEM; + goto unlock; } kfree(sel_buffer); sel_buffer = bp; @@ -323,7 +328,9 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t } } sel_buffer_lth = bp - sel_buffer; - return 0; +unlock: + mutex_unlock(&sel_lock); + return ret; } /* Insert the contents of the selection buffer into the @@ -352,6 +359,7 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { @@ -359,7 +367,9 @@ int paste_selection(struct tty_struct *tty) break; } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -368,6 +378,7 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); From 64489a229bbf902244d8407b02015f30e2cd4651 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 28 Feb 2020 12:54:05 +0100 Subject: [PATCH 3581/3715] vt: selection, push console lock down commit 4b70dd57a15d2f4685ac6e38056bad93e81e982f upstream. We need to nest the console lock in sel_lock, so we have to push it down a bit. Fortunately, the callers of set_selection_* just lock the console lock around the function call. So moving it down is easy. In the next patch, we switch the order. Signed-off-by: Jiri Slaby Fixes: 07e6124a1a46 ("vt: selection, close sel_buffer race") Cc: stable Link: https://lore.kernel.org/r/20200228115406.5735-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 13 ++++++++++++- drivers/tty/vt/vt.c | 2 -- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 4b62fb052c82..8fe5f46ff7bb 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -157,7 +157,7 @@ static int store_utf8(u16 c, char *p) * The entire selection process is managed under the console_lock. It's * a lot under the lock but its hardly a performance path */ -int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) +static int __set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int sel_mode, new_sel_start, new_sel_end, spc; @@ -333,6 +333,17 @@ unlock: return ret; } +int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty) +{ + int ret; + + console_lock(); + ret = __set_selection(v, tty); + console_unlock(); + + return ret; +} + /* Insert the contents of the selection buffer into the * queue of the tty associated with the current console. * Invoked by ioctl(). diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06761fcedeff..826433af4bdd 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2688,9 +2688,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); From a4719f6d07b2c63223f7452c435c5f578f105cfe Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 28 Feb 2020 12:54:06 +0100 Subject: [PATCH 3582/3715] vt: selection, push sel_lock up commit e8c75a30a23c6ba63f4ef6895cbf41fd42f21aa2 upstream. sel_lock cannot nest in the console lock. Thanks to syzkaller, the kernel states firmly: > WARNING: possible circular locking dependency detected > 5.6.0-rc3-syzkaller #0 Not tainted > ------------------------------------------------------ > syz-executor.4/20336 is trying to acquire lock: > ffff8880a2e952a0 (&tty->termios_rwsem){++++}, at: tty_unthrottle+0x22/0x100 drivers/tty/tty_ioctl.c:136 > > but task is already holding lock: > ffffffff89462e70 (sel_lock){+.+.}, at: paste_selection+0x118/0x470 drivers/tty/vt/selection.c:374 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (sel_lock){+.+.}: > mutex_lock_nested+0x1b/0x30 kernel/locking/mutex.c:1118 > set_selection_kernel+0x3b8/0x18a0 drivers/tty/vt/selection.c:217 > set_selection_user+0x63/0x80 drivers/tty/vt/selection.c:181 > tioclinux+0x103/0x530 drivers/tty/vt/vt.c:3050 > vt_ioctl+0x3f1/0x3a30 drivers/tty/vt/vt_ioctl.c:364 This is ioctl(TIOCL_SETSEL). Locks held on the path: console_lock -> sel_lock > -> #1 (console_lock){+.+.}: > console_lock+0x46/0x70 kernel/printk/printk.c:2289 > con_flush_chars+0x50/0x650 drivers/tty/vt/vt.c:3223 > n_tty_write+0xeae/0x1200 drivers/tty/n_tty.c:2350 > do_tty_write drivers/tty/tty_io.c:962 [inline] > tty_write+0x5a1/0x950 drivers/tty/tty_io.c:1046 This is write(). Locks held on the path: termios_rwsem -> console_lock > -> #0 (&tty->termios_rwsem){++++}: > down_write+0x57/0x140 kernel/locking/rwsem.c:1534 > tty_unthrottle+0x22/0x100 drivers/tty/tty_ioctl.c:136 > mkiss_receive_buf+0x12aa/0x1340 drivers/net/hamradio/mkiss.c:902 > tty_ldisc_receive_buf+0x12f/0x170 drivers/tty/tty_buffer.c:465 > paste_selection+0x346/0x470 drivers/tty/vt/selection.c:389 > tioclinux+0x121/0x530 drivers/tty/vt/vt.c:3055 > vt_ioctl+0x3f1/0x3a30 drivers/tty/vt/vt_ioctl.c:364 This is ioctl(TIOCL_PASTESEL). Locks held on the path: sel_lock -> termios_rwsem > other info that might help us debug this: > > Chain exists of: > &tty->termios_rwsem --> console_lock --> sel_lock Clearly. From the above, we have: console_lock -> sel_lock sel_lock -> termios_rwsem termios_rwsem -> console_lock Fix this by reversing the console_lock -> sel_lock dependency in ioctl(TIOCL_SETSEL). First, lock sel_lock, then console_lock. Signed-off-by: Jiri Slaby Reported-by: syzbot+26183d9746e62da329b8@syzkaller.appspotmail.com Fixes: 07e6124a1a46 ("vt: selection, close sel_buffer race") Cc: stable Link: https://lore.kernel.org/r/20200228115406.5735-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 8fe5f46ff7bb..91ffe3f2b8a0 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -204,7 +204,6 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ pe = tmp; } - mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -250,10 +249,9 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ break; case TIOCL_SELPOINTER: highlight_pointer(pe); - goto unlock; + return 0; default: - ret = -EINVAL; - goto unlock; + return -EINVAL; } /* remove the pointer */ @@ -275,7 +273,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - goto unlock; + return 0; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -302,8 +300,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - ret = -ENOMEM; - goto unlock; + return -ENOMEM; } kfree(sel_buffer); sel_buffer = bp; @@ -328,8 +325,7 @@ static int __set_selection(const struct tiocl_selection __user *sel, struct tty_ } } sel_buffer_lth = bp - sel_buffer; -unlock: - mutex_unlock(&sel_lock); + return ret; } @@ -337,9 +333,11 @@ int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty { int ret; + mutex_lock(&sel_lock); console_lock(); ret = __set_selection(v, tty); console_unlock(); + mutex_unlock(&sel_lock); return ret; } From a350eee14eca753114567d66ee1895187f9f40e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2020 15:16:15 -0800 Subject: [PATCH 3583/3715] x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes commit 735a6dd02222d8d070c7bb748f25895239ca8c92 upstream. Explicitly set X86_FEATURE_OSPKE via set_cpu_cap() instead of calling get_cpu_cap() to pull the feature bit from CPUID after enabling CR4.PKE. Invoking get_cpu_cap() effectively wipes out any {set,clear}_cpu_cap() changes that were made between this_cpu->c_init() and setup_pku(), as all non-synthetic feature words are reinitialized from the CPU's CPUID values. Blasting away capability updates manifests most visibility when running on a VMX capable CPU, but with VMX disabled by BIOS. To indicate that VMX is disabled, init_ia32_feat_ctl() clears X86_FEATURE_VMX, using clear_cpu_cap() instead of setup_clear_cpu_cap() so that KVM can report which CPU is misconfigured (KVM needs to probe every CPU anyways). Restoring X86_FEATURE_VMX from CPUID causes KVM to think VMX is enabled, ultimately leading to an unexpected #GP when KVM attempts to do VMXON. Arguably, init_ia32_feat_ctl() should use setup_clear_cpu_cap() and let KVM figure out a different way to report the misconfigured CPU, but VMX is not the only feature bit that is affected, i.e. there is precedent that tweaking feature bits via {set,clear}_cpu_cap() after ->c_init() is expected to work. Most notably, x86_init_rdrand()'s clearing of X86_FEATURE_RDRAND when RDRAND malfunctions is also overwritten. Fixes: 0697694564c8 ("x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU") Reported-by: Jacob Keller Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Tested-by: Jacob Keller Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200226231615.13664-1-sean.j.christopherson@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d805e8b3739..7b4141889919 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -360,7 +360,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS From 04b31630d500a14e64090470b7d5adf58b2be4fd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 9 Feb 2020 19:33:38 +0300 Subject: [PATCH 3584/3715] dmaengine: tegra-apb: Fix use-after-free commit 94788af4ed039476ff3527b0e6a12c1dc42cb022 upstream. I was doing some experiments with I2C and noticed that Tegra APB DMA driver crashes sometime after I2C DMA transfer termination. The crash happens because tegra_dma_terminate_all() bails out immediately if pending list is empty, and thus, it doesn't release the half-completed descriptors which are getting re-used before ISR tasklet kicks-in. tegra-i2c 7000c400.i2c: DMA transfer timeout elants_i2c 0-0010: elants_i2c_irq: failed to read data: -110 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 142 at lib/list_debug.c:45 __list_del_entry_valid+0x45/0xac list_del corruption, ddbaac44->next is LIST_POISON1 (00000100) Modules linked in: CPU: 0 PID: 142 Comm: kworker/0:2 Not tainted 5.5.0-rc2-next-20191220-00175-gc3605715758d-dirty #538 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events_freezable_power_ thermal_zone_device_check [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x85/0x94) [] (dump_stack) from [] (__warn+0xc1/0xc4) [] (__warn) from [] (warn_slowpath_fmt+0x61/0x78) [] (warn_slowpath_fmt) from [] (__list_del_entry_valid+0x45/0xac) [] (__list_del_entry_valid) from [] (tegra_dma_tasklet+0x5b/0x154) [] (tegra_dma_tasklet) from [] (tasklet_action_common.constprop.0+0x41/0x7c) [] (tasklet_action_common.constprop.0) from [] (__do_softirq+0xd3/0x2a8) [] (__do_softirq) from [] (irq_exit+0x7b/0x98) [] (irq_exit) from [] (__handle_domain_irq+0x45/0x80) [] (__handle_domain_irq) from [] (gic_handle_irq+0x45/0x7c) [] (gic_handle_irq) from [] (__irq_svc+0x65/0x94) Exception stack(0xde2ebb90 to 0xde2ebbd8) Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Cc: Link: https://lore.kernel.org/r/20200209163356.6439-2-digetx@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/tegra20-apb-dma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 7db2766b5fe9..ffe8126a9553 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -755,10 +755,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; From 3e4c735e6ba9a5add132c8bcad8700029fbdb609 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 9 Feb 2020 19:33:39 +0300 Subject: [PATCH 3585/3715] dmaengine: tegra-apb: Prevent race conditions of tasklet vs free list commit c33ee1301c393a241d6424e36eff1071811b1064 upstream. The interrupt handler puts a half-completed DMA descriptor on a free list and then schedules tasklet to process bottom half of the descriptor that executes client's callback, this creates possibility to pick up the busy descriptor from the free list. Thus, let's disallow descriptor's re-use until it is fully processed. Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Cc: Link: https://lore.kernel.org/r/20200209163356.6439-3-digetx@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/tegra20-apb-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index ffe8126a9553..3402494cadf9 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -288,7 +288,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; From 165dc070ce2f57e01541409741c721db65eecd39 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 19 Feb 2020 10:25:45 -0500 Subject: [PATCH 3586/3715] dm cache: fix a crash due to incorrect work item cancelling commit 7cdf6a0aae1cccf5167f3f04ecddcf648b78e289 upstream. The crash can be reproduced by running the lvm2 testsuite test lvconvert-thin-external-cache.sh for several minutes, e.g.: while :; do make check T=shell/lvconvert-thin-external-cache.sh; done The crash happens in this call chain: do_waker -> policy_tick -> smq_tick -> end_hotspot_period -> clear_bitset -> memset -> __memset -- which accesses an invalid pointer in the vmalloc area. The work entry on the workqueue is executed even after the bitmap was freed. The problem is that cancel_delayed_work doesn't wait for the running work item to finish, so the work item can continue running and re-submitting itself even after cache_postsuspend. In order to make sure that the work item won't be running, we must use cancel_delayed_work_sync. Also, change flush_workqueue to drain_workqueue, so that if some work item submits itself or another work item, we are properly waiting for both of them. Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org # v3.9 Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b5f541112fca..69cdb29ef6be 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2971,8 +2971,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* From 77ed33b91c868973b08c0b3c8b206a88416327b3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 26 Jan 2020 21:49:50 +0200 Subject: [PATCH 3587/3715] ARM: dts: ls1021a: Restore MDIO compatible to gianfar commit 7155c44624d061692b4c13aa8343f119c67d4fc0 upstream. The difference between "fsl,etsec2-mdio" and "gianfar" has to do with the .get_tbipa function, which calculates the address of the TBIPA register automatically, if not explicitly specified. [ see drivers/net/ethernet/freescale/fsl_pq_mdio.c ]. On LS1021A, the TBIPA register is at offset 0x30 within the port register block, which is what the "gianfar" method of calculating addresses actually does. Luckily, the bad "compatible" is inconsequential for ls1021a.dtsi, because the TBIPA register is explicitly specified via the second "reg" (<0x0 0x2d10030 0x0 0x4>), so the "get_tbipa" function is dead code. Nonetheless it's good to restore it to its correct value. Background discussion: https://www.spinics.net/lists/stable/msg361156.html Fixes: c7861adbe37f ("ARM: dts: ls1021: Fix SGMII PCS link remaining down after PHY disconnect") Reported-by: Pavel Machek Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ls1021a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 1343c86988c5..68f4482c35e2 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -562,7 +562,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -570,7 +570,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; From c40c33a8936174dcd78268e619960c2ed421d43b Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Fri, 7 Feb 2020 20:53:24 +0200 Subject: [PATCH 3588/3715] ASoC: topology: Fix memleak in soc_tplg_link_elems_load() commit 2b2d5c4db732c027a14987cfccf767dac1b45170 upstream. If soc_tplg_link_config() fails, _link needs to be freed in case of topology ABI version mismatch. However the current code is returning directly and ends up leaking memory in this case. This patch fixes that. Fixes: 593d9e52f9bb ("ASoC: topology: Add support to configure existing physical DAI links") Signed-off-by: Dragos Tarcatu Link: https://lore.kernel.org/r/20200207185325.22320-2-dragos_tarcatu@mentor.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-topology.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 72301bcad3bd..0f91b4ed6814 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2177,8 +2177,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size From 543eafede7b67fec47286f61f7fb370691d415f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:35 +0100 Subject: [PATCH 3589/3715] ASoC: intel: skl: Fix pin debug prints commit 64bbacc5f08c01954890981c63de744df1f29a30 upstream. skl_print_pins() loops over all given pins but it overwrites the text at the very same position while increasing the returned length. Fix this to show the all pin contents properly. Fixes: d14700a01f91 ("ASoC: Intel: Skylake: Debugfs facility to dump module config") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-2-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 1987f78ea91e..9e38f2afa084 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -42,7 +42,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) + for (i = 0; i < max_pin; i++) { ret += snprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" @@ -53,6 +53,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } From 7c4e080113704692c075a1cd98fe98d9dcf51e9c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:36 +0100 Subject: [PATCH 3590/3715] ASoC: intel: skl: Fix possible buffer overflow in debug outputs commit 549cd0ba04dcfe340c349cd983bd440480fae8ee upstream. The debugfs output of intel skl driver writes strings with multiple snprintf() calls with the fixed size. This was supposed to avoid the buffer overflow but actually it still would, because snprintf() returns the expected size to be output, not the actual output size. Fix it by replacing snprintf() calls with scnprintf(). Fixes: d14700a01f91 ("ASoC: Intel: Skylake: Debugfs facility to dump module config") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-3-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-debug.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 9e38f2afa084..71c6bbf37b6c 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -43,7 +43,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, ssize_t ret = 0; for (i = 0; i < max_pin; i++) { - ret += snprintf(buf + size, MOD_BUF - size, + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -61,7 +61,7 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -83,16 +83,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tMCPS %#x\n\tIBS %#x\n\tOBS %#x\t\n", mconfig->mcps, mconfig->ibs, mconfig->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -102,38 +102,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -143,7 +143,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogenous Input %s\n\t" "Homogenous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -201,7 +201,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); From 6dfcfe0c07fa24f6d601feb3499746c8a4f6102b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 12:17:37 +0100 Subject: [PATCH 3591/3715] ASoC: pcm: Fix possible buffer overflow in dpcm state sysfs output commit 6c89ffea60aa3b2a33ae7987de1e84bfb89e4c9e upstream. dpcm_show_state() invokes multiple snprintf() calls to concatenate formatted strings on the fixed size buffer. The usage of snprintf() is supposed for avoiding the buffer overflow, but it doesn't work as expected because snprintf() doesn't return the actual output size but the size to be written. Fix this bug by replacing all snprintf() calls with scnprintf() calls. Fixes: f86dcef87b77 ("ASoC: dpcm: Add debugFS support for DPCM") Signed-off-by: Takashi Iwai Acked-by: Cezary Rojewski Link: https://lore.kernel.org/r/20200218111737.14193-4-tiwai@suse.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 89f772ed4705..e75822dd9930 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2957,16 +2957,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, ssize_t offset = 0; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -2974,10 +2974,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -2986,16 +2986,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), From 5c17ed40a9a0fb37b07b461eeb11b4d48ab4206b Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Thu, 20 Feb 2020 21:29:56 +0100 Subject: [PATCH 3592/3715] ASoC: pcm512x: Fix unbalanced regulator enable call in probe error path commit ac0a68997935c4acb92eaae5ad8982e0bb432d56 upstream. When we get a clock error during probe we have to call regulator_bulk_disable before bailing out, otherwise we trigger a warning in regulator_put. Fix this by using "goto err" like in the error cases above. Fixes: 5a3af1293194d ("ASoC: pcm512x: Add PCM512x driver") Signed-off-by: Matthias Reichl Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200220202956.29233-1-hias@horus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm512x.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 68feae262476..940bdc30753d 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1438,13 +1438,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } From 9b2c4c1b2c114c3bbe69351a91213f2aa204a6fc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2020 15:31:45 +0000 Subject: [PATCH 3593/3715] ASoC: dapm: Correct DAPM handling of active widgets during shutdown commit 9b3193089e77d3b59b045146ff1c770dd899acb1 upstream. commit c2caa4da46a4 ("ASoC: Fix widget powerdown on shutdown") added a set of the power state during snd_soc_dapm_shutdown to ensure the widgets powered off. However, when commit 39eb5fd13dff ("ASoC: dapm: Delay w->power update until the changes are written") added the new_power member of the widget structure, to differentiate between the current power state and the target power state, it did not update the shutdown to use the new_power member. As new_power has not updated it will be left in the state set by the last DAPM sequence, ie. 1 for active widgets. So as the DAPM sequence for the shutdown proceeds it will turn the widgets on (despite them already being on) rather than turning them off. Fixes: 39eb5fd13dff ("ASoC: dapm: Delay w->power update until the changes are written") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200228153145.21013-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 104d5f487c7d..fb2fef166672 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4481,7 +4481,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } From 8457a77611f784abb4b02d01e0e97a1ad3139c8c Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Mon, 2 Mar 2020 19:16:14 +0100 Subject: [PATCH 3594/3715] RDMA/iwcm: Fix iwcm work deallocation commit 810dbc69087b08fd53e1cdd6c709f385bc2921ad upstream. The dealloc_work_entries() function must update the work_free_list pointer while freeing its entries, since potentially called again on same list. A second iteration of the work list caused system crash. This happens, if work allocation fails during cma_iw_listen() and free_cm_id() tries to free the list again during cleanup. Fixes: 922a8e9fb2e0 ("RDMA: iWARP Connection Manager.") Link: https://lore.kernel.org/r/20200302181614.17042-1-bmt@zurich.ibm.com Reported-by: syzbot+cb0c054eabfba4342146@syzkaller.appspotmail.com Signed-off-by: Bernard Metzler Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/iwcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 30d7277249b8..16b0c10348e8 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -158,8 +158,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) From 3c1099c80cd1cefd205c604e22b0e69c6728a7ad Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 21 Feb 2020 15:20:26 +0000 Subject: [PATCH 3595/3715] RMDA/cm: Fix missing ib_cm_destroy_id() in ib_cm_insert_listen() commit c14dfddbd869bf0c2bafb7ef260c41d9cebbcfec upstream. The algorithm pre-allocates a cm_id since allocation cannot be done while holding the cm.lock spinlock, however it doesn't free it on one error path, leading to a memory leak. Fixes: 067b171b8679 ("IB/cm: Share listening CM IDs") Link: https://lore.kernel.org/r/20200221152023.GA8680@ziepe.ca Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2af79e4f3235..80a8eb7e5d6e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1143,6 +1143,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); From de374e15e8d82104765cbebfe88bf2bd8b3b8698 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 25 Feb 2020 14:54:45 -0500 Subject: [PATCH 3596/3715] IB/hfi1, qib: Ensure RCU is locked when accessing list commit 817a68a6584aa08e323c64283fec5ded7be84759 upstream. The packet handling function, specifically the iteration of the qp list for mad packet processing misses locking RCU before running through the list. Not only is this incorrect, but the list_for_each_entry_rcu() call can not be called with a conditional check for lock dependency. Remedy this by invoking the rcu lock and unlock around the critical section. This brings MAD packet processing in line with what is done for non-MAD packets. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20200225195445.140896.41873.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/verbs.c | 4 +++- drivers/infiniband/hw/qib/qib_verbs.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ad78b471c112..b962dbcfe9a7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -593,10 +593,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -605,6 +606,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 350bc29a066f..b473df8eea1a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -360,8 +360,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. From 09192ee93dc01a38f479bfa5a0d9a5ddbc8730c7 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 16 Jan 2020 15:18:49 +0100 Subject: [PATCH 3597/3715] ARM: imx: build v7_cpu_resume() unconditionally commit 512a928affd51c2dc631401e56ad5ee5d5dd68b6 upstream. This function is not only needed by the platform suspend code, but is also reused as the CPU resume function when the ARM cores can be powered down completely in deep idle, which is the case on i.MX6SX and i.MX6UL(L). Providing the static inline stub whenever CONFIG_SUSPEND is disabled means that those platforms will hang on resume from cpuidle if suspend is disabled. So there are two problems: - The static inline stub masks the linker error - The function is not available where needed Fix both by just building the function unconditionally, when CONFIG_SOC_IMX6 is enabled. The actual code is three instructions long, so it's arguably ok to just leave it in for all i.MX6 kernel configurations. Fixes: 05136f0897b5 ("ARM: imx: support arm power off in cpuidle for i.mx6sx") Signed-off-by: Lucas Stach Signed-off-by: Ahmad Fatoum Signed-off-by: Rouven Czerwinski Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/Makefile | 2 ++ arch/arm/mach-imx/common.h | 4 ++-- arch/arm/mach-imx/resume-imx6.S | 24 ++++++++++++++++++++++++ arch/arm/mach-imx/suspend-imx6.S | 14 -------------- 4 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 arch/arm/mach-imx/resume-imx6.S diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 8ff71058207d..8cf1a98785a5 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -87,6 +87,8 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index b09a2ec19267..4b318c864446 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -111,17 +111,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 76ee2ceec8d5..7d84b617af48 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -333,17 +333,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) From 5c5cdae8fb84b06e6145cd4f649385db074b56d3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Mar 2020 13:16:08 +0300 Subject: [PATCH 3598/3715] hwmon: (adt7462) Fix an error return in ADT7462_REG_VOLT() commit 44f2f882909fedfc3a56e4b90026910456019743 upstream. This is only called from adt7462_update_device(). The caller expects it to return zero on error. I fixed a similar issue earlier in commit a4bf06d58f21 ("hwmon: (adt7462) ADT7462_REG_VOLT_MAX() should return 0") but I missed this one. Fixes: c0b4e3ab0c76 ("adt7462: new hwmon driver") Signed-off-by: Dan Carpenter Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20200303101608.kqjwfcazu2ylhi2a@kili.mountain Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7462.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 19f2a6d48bac..bdd7679fd298 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -426,7 +426,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ From 956b5b1c038557d2d2168865846462b03087593e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Feb 2020 17:40:50 +0300 Subject: [PATCH 3599/3715] dmaengine: coh901318: Fix a double lock bug in dma_tc_handle() commit 36d5d22090d13fd3a7a8c9663a711cbe6970aac8 upstream. The caller is already holding the lock so this will deadlock. Fixes: 0b58828c923e ("DMAENGINE: COH 901 318 remove irq counting") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200217144050.3i4ymbytogod4ijn@kili.mountain Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/coh901318.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 6d7d2d54eacf..f0932f25a9b1 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1944,8 +1944,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1966,8 +1964,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. From d78934442b62f345338ab4370d3d875ad1891686 Mon Sep 17 00:00:00 2001 From: "Desnes A. Nunes do Rosario" Date: Thu, 27 Feb 2020 10:47:15 -0300 Subject: [PATCH 3600/3715] powerpc: fix hardware PMU exception bug on PowerVM compatibility mode systems commit fc37a1632d40c80c067eb1bc235139f5867a2667 upstream. PowerVM systems running compatibility mode on a few Power8 revisions are still vulnerable to the hardware defect that loses PMU exceptions arriving prior to a context switch. The software fix for this issue is enabled through the CPU_FTR_PMAO_BUG cpu_feature bit, nevertheless this bit also needs to be set for PowerVM compatibility mode systems. Fixes: 68f2f0d431d9ea4 ("powerpc: Add a cpu feature CPU_FTR_PMAO_BUG") Signed-off-by: Desnes A. Nunes do Rosario Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200227134715.9715-1-desnesn@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/cputable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index da4b0e379238..6ef41e823013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2232,11 +2232,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } From ec6ddb1aa82fb7fae89ce485ee874eae26de56a8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 17 Feb 2020 07:43:03 -0500 Subject: [PATCH 3601/3715] dm integrity: fix a deadlock due to offloading to an incorrect workqueue commit 53770f0ec5fd417429775ba006bc4abe14002335 upstream. If we need to perform synchronous I/O in dm_integrity_map_continue(), we must make sure that we are not in the map function - in order to avoid the deadlock due to bio queuing in generic_make_request. To avoid the deadlock, we offload the request to metadata_wq. However, metadata_wq also processes metadata updates for write requests. If there are too many requests that get offloaded to metadata_wq at the beginning of dm_integrity_map_continue, the workqueue metadata_wq becomes clogged and the system is incapable of processing any metadata updates. This causes a deadlock because all the requests that need to do metadata updates wait for metadata_wq to proceed and metadata_wq waits inside wait_and_add_new_range until some existing request releases its range lock (which doesn't happen because the range lock is released after metadata update). In order to fix the deadlock, we create a new workqueue offload_wq and offload requests to it - so that processing of offload_wq is independent from processing of metadata_wq. Fixes: 7eada909bfd7 ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Reported-by: Heinz Mauelshagen Tested-by: Heinz Mauelshagen Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 23f0f4eaaa2e..b6ca5b1100db 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -187,6 +187,7 @@ struct dm_integrity_c { struct rb_root in_progress; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -1157,7 +1158,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1577,7 +1578,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -3005,6 +3006,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -3189,6 +3198,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) From df4bf4dceb3a9fd91dc85b3d7f6ca1a0afdf8a75 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 9 Nov 2018 17:21:18 +0200 Subject: [PATCH 3602/3715] xhci: handle port status events for removed USB3 hcd commit 1245374e9b8340fc255fd51b2015173a83050d03 upstream. At xhci removal the USB3 hcd (shared_hcd) is removed before the primary USB2 hcd. Interrupts for port status changes may still occur for USB3 ports after the shared_hcd is freed, causing NULL pointer dereference. Check if xhci->shared_hcd is still valid before handing USB3 port events Cc: Reported-by: Peter Chen Tested-by: Jack Pham Signed-off-by: Mathias Nyman Cc: Macpaul Lin [redone for 4.14.y based on Mathias's comments] Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 868878f5b72b..97cf8e1fc07c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1640,6 +1640,12 @@ static void handle_port_status(struct xhci_hcd *xhci, if ((major_revision == 0x03) != (hcd->speed >= HCD_USB3)) hcd = xhci->shared_hcd; + if (!hcd) { + xhci_dbg(xhci, "No hcd found for port %u event\n", port_id); + bogus_port_status = true; + goto cleanup; + } + if (major_revision == 0) { xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", From 30238068123e304f02f594726451e3e93a06ce42 Mon Sep 17 00:00:00 2001 From: Dragos Tarcatu Date: Fri, 7 Feb 2020 20:53:25 +0200 Subject: [PATCH 3603/3715] ASoC: topology: Fix memleak in soc_tplg_manifest_load() commit 242c46c023610dbc0213fc8fb6b71eb836bc5d95 upstream. In case of ABI version mismatch, _manifest needs to be freed as it is just a copy of the original topology manifest. However, if a driver manifest handler is defined, that would get executed and the cleanup is never reached. Fix that by getting the return status of manifest() instead of returning directly. Fixes: 583958fa2e52 ("ASoC: topology: Make manifest backward compatible from ABI v4") Signed-off-by: Dragos Tarcatu Link: https://lore.kernel.org/r/20200207185325.22320-3-dragos_tarcatu@mentor.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 0f91b4ed6814..1a912f72bddd 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2333,7 +2333,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2346,19 +2346,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, _manifest); + ret = tplg->ops->manifest(tplg->comp, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ From 12cd844a39ed16aa183a820a54fe6f9a0bb4cd14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Mar 2020 18:03:09 +0100 Subject: [PATCH 3604/3715] Linux 4.14.173 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d3cecad7f1e..9a524b5c1d55 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 172 +SUBLEVEL = 173 EXTRAVERSION = NAME = Petit Gorille From 9e469e717b409592a9ca42a1203a267e56491446 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Mar 2020 00:41:38 -0800 Subject: [PATCH 3605/3715] UPSTREAM: fscrypt: don't evict dirty inodes after removing key After FS_IOC_REMOVE_ENCRYPTION_KEY removes a key, it syncs the filesystem and tries to get and put all inodes that were unlocked by the key so that unused inodes get evicted via fscrypt_drop_inode(). Normally, the inodes are all clean due to the sync. However, after the filesystem is sync'ed, userspace can modify and close one of the files. (Userspace is *supposed* to close the files before removing the key. But it doesn't always happen, and the kernel can't assume it.) This causes the inode to be dirtied and have i_count == 0. Then, fscrypt_drop_inode() failed to consider this case and indicated that the inode can be dropped, causing the write to be lost. On f2fs, other problems such as a filesystem freeze could occur due to the inode being freed while still on f2fs's dirty inode list. Fix this bug by making fscrypt_drop_inode() only drop clean inodes. I've written an xfstest which detects this bug on ext4, f2fs, and ubifs. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200305084138.653498-1-ebiggers@kernel.org Signed-off-by: Eric Biggers (cherry picked from commit 2b4eae95c7361e0a147b838715c8baa1380a428f) Bug: 150589360 Test: kvm-xfstests -c ext4,f2fs -g encrypt Change-Id: Ia32db980c2fffb68caeaf9f38e5cfbe781b45011 Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index c289f4e32186..f4c6d8cb4587 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -607,6 +607,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with From 1540e7955b923e0dddf33343031b951c922acd9d Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Wed, 4 Mar 2020 12:03:23 -0800 Subject: [PATCH 3606/3715] ANDROID: serdev: restrict claim of platform devices Make the fallback path for claiming platform devices trigger only if a new module parameter is specified: serdev_ttyport.pdev_tty_port=ttyS2 Bug: 146517987 Change-Id: Ibf331ad6e6d8712a405921530f217f7122428b13 Signed-off-by: Alistair Delva --- drivers/tty/serdev/core.c | 15 +++++++++------ drivers/tty/serdev/serdev-ttyport.c | 26 +++++++++++++++++++++++++- include/linux/serdev.h | 14 +++++++++++++- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index f513107b9ea0..e6c9ff65402a 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -452,16 +452,18 @@ static int platform_serdev_register_devices(struct serdev_controller *ctrl) return err; } + /** - * serdev_controller_add() - Add an serdev controller + * serdev_controller_add_platform() - Add an serdev controller * @ctrl: controller to be registered. + * @platform: whether to permit fallthrough to platform device probe * * Register a controller previously allocated via serdev_controller_alloc() with - * the serdev core. + * the serdev core. Optionally permit probing via a platform device fallback. */ -int serdev_controller_add(struct serdev_controller *ctrl) +int serdev_controller_add_platform(struct serdev_controller *ctrl, bool platform) { - int ret_of, ret_platform, ret; + int ret, ret_of, ret_platform = -ENODEV; /* Can't register until after driver model init */ if (WARN_ON(!is_registered)) @@ -471,8 +473,9 @@ int serdev_controller_add(struct serdev_controller *ctrl) if (ret) return ret; - ret_platform = platform_serdev_register_devices(ctrl); ret_of = of_serdev_register_devices(ctrl); + if (platform) + ret_platform = platform_serdev_register_devices(ctrl); if (ret_of && ret_platform) { dev_dbg(&ctrl->dev, "no devices registered: of:%d " "platform:%d\n", @@ -489,7 +492,7 @@ out_dev_del: device_del(&ctrl->dev); return ret; }; -EXPORT_SYMBOL_GPL(serdev_controller_add); +EXPORT_SYMBOL_GPL(serdev_controller_add_platform); /* Remove a device associated with a controller */ static int serdev_remove_device(struct device *dev, void *data) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 88cf520da739..0669e18ff879 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -15,9 +15,15 @@ #include #include #include +#include +#include #define SERPORT_ACTIVE 1 +static char *pdev_tty_port; +module_param(pdev_tty_port, charp, 0644); +MODULE_PARM_DESC(pdev_tty_port, "platform device tty port to claim"); + struct serport { struct tty_port *port; struct tty_struct *tty; @@ -240,6 +246,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, { struct serdev_controller *ctrl; struct serport *serport; + bool platform = false; int ret; if (!port || !drv || !parent) @@ -259,7 +266,24 @@ struct device *serdev_tty_port_register(struct tty_port *port, port->client_ops = &client_ops; port->client_data = ctrl; - ret = serdev_controller_add(ctrl); + /* There is not always a way to bind specific platform devices because + * they may be defined on platforms without DT or ACPI. When dealing + * with a platform devices, do not allow direct binding unless it is + * whitelisted by module parameter. If a platform device is otherwise + * described by DT or ACPI it will still be bound and this check will + * be ignored. + */ + if (parent->bus == &platform_bus_type) { + char tty_port_name[7]; + + sprintf(tty_port_name, "%s%d", drv->name, idx); + if (pdev_tty_port && + !strcmp(pdev_tty_port, tty_port_name)) { + platform = true; + } + } + + ret = serdev_controller_add_platform(ctrl, platform); if (ret) goto err_reset_data; diff --git a/include/linux/serdev.h b/include/linux/serdev.h index d609e6dc5bad..49f6e382c94e 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -164,9 +164,21 @@ int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); struct serdev_controller *serdev_controller_alloc(struct device *, size_t); -int serdev_controller_add(struct serdev_controller *); +int serdev_controller_add_platform(struct serdev_controller *, bool); void serdev_controller_remove(struct serdev_controller *); +/** + * serdev_controller_add() - Add an serdev controller + * @ctrl: controller to be registered. + * + * Register a controller previously allocated via serdev_controller_alloc() with + * the serdev core. + */ +static inline int serdev_controller_add(struct serdev_controller *ctrl) +{ + return serdev_controller_add_platform(ctrl, false); +} + static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) { struct serdev_device *serdev = ctrl->serdev; From 5198da7465585d01d68785d8a43bd8b57edb91fd Mon Sep 17 00:00:00 2001 From: "A. Cody Schuffelen" Date: Tue, 11 Feb 2020 18:01:10 -0800 Subject: [PATCH 3607/3715] ANDROID: Add TPM support and the vTPM proxy to Cuttlefish. This module allows presenting the kernel TPM interface while proxying the TPM commands into a file descriptor. The module was originally implemented to support running a TPM simulator on the same host system and exposing a kernel TPM interface to a Linux container, but it is also a convenient incremental step while we figure out our long-term strategy with crosvm, which does not have TPM support following the same standards as qemu. Implicitly enables SECURITYFS via 'selects' from these new drivers. Bug: 148102533 Test: Build and run locally with cuttlefish, check for /dev/vtpmx Change-Id: I568a50c2ecb7899aae70e7a20efaedc84443511d Signed-off-by: A. Cody Schuffelen --- arch/arm64/configs/cuttlefish_defconfig | 2 ++ arch/x86/configs/x86_64_cuttlefish_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index bd0df26e4416..83f6f54d009a 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -306,6 +306,8 @@ CONFIG_SERIAL_DEV_BUS=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CAVIUM is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index ad10c87900ca..9d2d0f051085 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -327,6 +327,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HPET=y # CONFIG_HPET_MMAP_DEFAULT is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_ACPI_I2C_OPREGION is not set # CONFIG_I2C_COMPAT is not set From 3419228ff5f9a17b7c7342221e92fd8d671d99d2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 19 Feb 2020 12:31:56 +0000 Subject: [PATCH 3608/3715] UPSTREAM: mm: Avoid creating virtual address aliases in brk()/mmap()/mremap() (Upstream commit dcde237319e626d1ec3c9d8b7613032f0fd4663a.) Currently the arm64 kernel ignores the top address byte passed to brk(), mmap() and mremap(). When the user is not aware of the 56-bit address limit or relies on the kernel to return an error, untagging such pointers has the potential to create address aliases in user-space. Passing a tagged address to munmap(), madvise() is permitted since the tagged pointer is expected to be inside an existing mapping. The current behaviour breaks the existing glibc malloc() implementation which relies on brk() with an address beyond 56-bit to be rejected by the kernel. Remove untagging in the above functions by partially reverting commit ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk"). In addition, update the arm64 tagged-address-abi.rst document accordingly. Link: https://bugzilla.redhat.com/1797052 Fixes: ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk") Cc: # 5.4.x- Cc: Florian Weimer Reviewed-by: Andrew Morton Reported-by: Victor Stinner Acked-by: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Bug: 135692346 Change-Id: Iadeceb2d5d5fb576ab1bb5ae1a67f4971bbbf88e Signed-off-by: Andrey Konovalov --- Documentation/arm64/tagged-address-abi.rst | 11 +++++++++-- mm/mmap.c | 4 ---- mm/mremap.c | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf9..4a9d9c794ee5 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/mm/mmap.c b/mm/mmap.c index e19a8fa081f6..6afba0872180 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -184,8 +184,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool populate; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1522,8 +1520,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mremap.c b/mm/mremap.c index 6cd85324de7f..e9990c1afd60 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -530,7 +530,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; From df4f18795afa02f8f55720a191e9e156f9655d32 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Feb 2020 10:19:13 +0000 Subject: [PATCH 3609/3715] UPSTREAM: arm64: memory: Add missing brackets to untagged_addr() macro (Upstream commit d0022c0ef29b78bcbe8a5c5894bd2307143afce1.) Add brackets around the evaluation of the 'addr' parameter to the untagged_addr() macro so that the cast to 'u64' applies to the result of the expression. Cc: Fixes: 597399d0cb91 ("arm64: tags: Preserve tags for addresses translated via TTBR1") Reported-by: Linus Torvalds Signed-off-by: Will Deacon Bug: 135692346 Change-Id: I1bce8f6a185258a365aaa292483fabc02519301f Signed-off-by: Andrey Konovalov --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8f7f30fec6be..d4f49599c5db 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -219,7 +219,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) From 67ea9300c746ed5a29c19ff30dc51aa9b849e517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 15:07:27 -0800 Subject: [PATCH 3610/3715] UPSTREAM: cgroup: Iterate tasks that did not finish do_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PF_EXITING is set earlier than actual removal from css_set when a task is exitting. This can confuse cgroup.procs readers who see no PF_EXITING tasks, however, rmdir is checking against css_set membership so it can transitionally fail with EBUSY. Fix this by listing tasks that weren't unlinked from css_set active lists. It may happen that other users of the task iterator (without CSS_TASK_ITER_PROCS) spot a PF_EXITING task before cgroup_exit(). This is equal to the state before commit c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") but it may be reviewed later. Reported-by: Suren Baghdasaryan Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") Signed-off-by: Michal Koutný (cherry picked from commit 9c974c77246460fa6a92c18554c3311c8c83c160) Bug: 141213848 Bug: 146758430 Test: test_cgcore_destroy from linux-kselftest Signed-off-by: Suren Baghdasaryan Change-Id: Iac57661b931129ed1e44b89675f8115bb89084ff (cherry picked from commit 21ee296526c70d6dc3c64639406f156f39b80fd0) --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index de0f5fe28490..a22949de5b40 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ea287f53c3a3..3ef1680d9c1c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4141,12 +4141,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4204,10 +4208,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4226,11 +4234,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } From 89e090543660aef3da90faa341be0932a66ac4a4 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 16 Mar 2020 13:47:17 -0700 Subject: [PATCH 3611/3715] ANDROID: Incremental fs: Remove all access_ok checks They provide no value and simply duplicate a check in copy_from/to_user Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Icc6054a2d6a495c9a03cd1507dda1ab8ca0b0dc4 --- fs/incfs/vfs.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 22edaeeaa613..46e2617e4c25 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -465,9 +465,6 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, ssize_t result = 0; int i = 0; - if (!access_ok(VERIFY_WRITE, buf, len)) - return -EFAULT; - if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) return 0; @@ -859,9 +856,6 @@ static struct signature_info *incfs_copy_signature_info_from_user( if (!original) return NULL; - if (!access_ok(VERIFY_READ, original, sizeof(usr_si))) - return ERR_PTR(-EFAULT); - if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) return ERR_PTR(-EFAULT); @@ -1191,10 +1185,7 @@ static long ioctl_create_file(struct mount_info *mi, error = -EFAULT; goto out; } - if (!access_ok(VERIFY_READ, usr_args, sizeof(args))) { - error = -EFAULT; - goto out; - } + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { error = -EFAULT; goto out; @@ -1320,12 +1311,6 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } - if (!access_ok(VERIFY_READ, u64_to_user_ptr(args.file_attr), - args.file_attr_len)) { - error = -EFAULT; - goto delete_index_file; - } - if (copy_from_user(attr_value, u64_to_user_ptr(args.file_attr), args.file_attr_len) > 0) { @@ -1387,15 +1372,9 @@ static long ioctl_read_file_signature(struct file *f, void __user *arg) if (!df) return -EINVAL; - if (!access_ok(VERIFY_READ, args_usr_ptr, sizeof(args))) - return -EFAULT; if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; - if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(args.file_signature), - args.file_signature_buf_size)) - return -EFAULT; - sig_buf_size = args.file_signature_buf_size; if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) return -E2BIG; @@ -1917,9 +1896,6 @@ static ssize_t file_write(struct file *f, const char __user *buf, if (!df) return -EBADF; - if (!access_ok(VERIFY_READ, usr_blocks, size)) - return -EFAULT; - data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); if (!data_buf) return -ENOMEM; @@ -1936,11 +1912,7 @@ static ssize_t file_write(struct file *f, const char __user *buf, error = -E2BIG; break; } - if (!access_ok(VERIFY_READ, u64_to_user_ptr(block.data), - block.data_len)) { - error = -EFAULT; - break; - } + if (copy_from_user(data_buf, u64_to_user_ptr(block.data), block.data_len) > 0) { error = -EFAULT; From dd3909c4a7fc7288f24af4bcb251836d135608cc Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 10 Mar 2020 13:03:38 -0700 Subject: [PATCH 3612/3715] ANDROID: Incremental fs: Make fill block an ioctl Filling blocks is not equivalent to writing a file, since they are constrained by the root hash. selinux policy may wish to treat them differently, for instance. Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Ic369b84b92547b1cfefe422bd881c4e466090aed --- fs/incfs/data_mgmt.c | 4 +- fs/incfs/data_mgmt.h | 5 +- fs/incfs/vfs.c | 138 +++++++++--------- include/uapi/linux/incrementalfs.h | 28 +++- .../selftests/filesystems/incfs/incfs_test.c | 56 +++---- 5 files changed, 118 insertions(+), 113 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 90bf9e37d236..eb4e32040f4e 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -699,7 +699,7 @@ out: } int incfs_process_new_data_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data) + struct incfs_fill_block *block, u8 *data) { struct mount_info *mi = NULL; struct backing_file_context *bfc = NULL; @@ -781,7 +781,7 @@ int incfs_read_file_signature(struct data_file *df, struct mem_range dst) } int incfs_process_new_hash_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data) + struct incfs_fill_block *block, u8 *data) { struct backing_file_context *bfc = NULL; struct mount_info *mi = NULL; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 8b62b0348d51..5ce1966b4fc5 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -262,11 +262,10 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, int incfs_read_file_signature(struct data_file *df, struct mem_range dst); int incfs_process_new_data_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data); + struct incfs_fill_block *block, u8 *data); int incfs_process_new_hash_block(struct data_file *df, - struct incfs_new_data_block *block, u8 *data); - + struct incfs_fill_block *block, u8 *data); bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 46e2617e4c25..04f292e3377b 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -52,8 +52,6 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, static int file_open(struct inode *inode, struct file *file); static int file_release(struct inode *inode, struct file *file); -static ssize_t file_write(struct file *f, const char __user *buf, - size_t size, loff_t *offset); static int read_single_page(struct file *f, struct page *page); static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); @@ -129,7 +127,6 @@ static const struct address_space_operations incfs_address_space_ops = { static const struct file_operations incfs_file_ops = { .open = file_open, .release = file_release, - .write = file_write, .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, @@ -797,9 +794,6 @@ static int read_single_page(struct file *f, struct page *page) size = df->df_size; timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; - pr_debug("incfs: %s %s %lld\n", __func__, - f->f_path.dentry->d_name.name, offset); - if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE @@ -1359,6 +1353,72 @@ out: return error; } +static long ioctl_fill_blocks(struct file *f, void __user *arg) +{ + struct incfs_fill_blocks __user *usr_fill_blocks = arg; + struct incfs_fill_blocks fill_blocks; + struct incfs_fill_block *usr_fill_block_array; + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) + return -EFAULT; + + usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < fill_blocks.count; i++) { + struct incfs_fill_block fill_block = {}; + + if (copy_from_user(&fill_block, &usr_fill_block_array[i], + sizeof(fill_block)) > 0) { + error = -EFAULT; + break; + } + + if (fill_block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + + if (copy_from_user(data_buf, u64_to_user_ptr(fill_block.data), + fill_block.data_len) > 0) { + error = -EFAULT; + break; + } + fill_block.data = 0; /* To make sure nobody uses it. */ + if (fill_block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &fill_block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &fill_block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i; +} + static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1414,6 +1474,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) switch (req) { case INCFS_IOC_CREATE_FILE: return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_FILL_BLOCKS: + return ioctl_fill_blocks(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); default: @@ -1881,70 +1943,6 @@ static int file_release(struct inode *inode, struct file *file) return 0; } -static ssize_t file_write(struct file *f, const char __user *buf, - size_t size, loff_t *offset) -{ - struct data_file *df = get_incfs_data_file(f); - const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; - size_t block_count = size / sizeof(struct incfs_new_data_block); - struct incfs_new_data_block __user *usr_blocks = - (struct incfs_new_data_block __user *)buf; - u8 *data_buf = NULL; - ssize_t error = 0; - int i = 0; - - if (!df) - return -EBADF; - - data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); - if (!data_buf) - return -ENOMEM; - - for (i = 0; i < block_count; i++) { - struct incfs_new_data_block block = {}; - - if (copy_from_user(&block, &usr_blocks[i], sizeof(block)) > 0) { - error = -EFAULT; - break; - } - - if (block.data_len > data_buf_size) { - error = -E2BIG; - break; - } - - if (copy_from_user(data_buf, u64_to_user_ptr(block.data), - block.data_len) > 0) { - error = -EFAULT; - break; - } - block.data = 0; /* To make sure nobody uses it. */ - if (block.flags & INCFS_BLOCK_FLAGS_HASH) { - error = incfs_process_new_hash_block(df, &block, - data_buf); - } else { - error = incfs_process_new_data_block(df, &block, - data_buf); - } - if (error) - break; - } - - if (data_buf) - free_pages((unsigned long)data_buf, get_order(data_buf_size)); - *offset = 0; - - /* - * Only report the error if no records were processed, otherwise - * just return how many were processed successfully. - */ - if (i == 0) - return error; - - return i * sizeof(struct incfs_new_data_block); -} - - static int dentry_revalidate(struct dentry *d, unsigned int flags) { struct path backing_path = {}; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 787049031cca..81947ded482e 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -46,7 +46,15 @@ /* Read file signature */ #define INCFS_IOC_READ_FILE_SIGNATURE \ - _IOWR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + +/* + * Fill in one or more data block + * + * Returns number of blocks filled in, or error if none were + */ +#define INCFS_IOC_FILL_BLOCKS \ + _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) enum incfs_compression_alg { COMPRESSION_NONE = 0, @@ -81,10 +89,9 @@ struct incfs_pending_read_info { }; /* - * A struct to be written into a control file to load a data or hash - * block to a data file. + * Description of a data or hash block to add to a data file. */ -struct incfs_new_data_block { +struct incfs_fill_block { /* Index of a data block. */ __u32 block_index; @@ -117,6 +124,19 @@ struct incfs_new_data_block { __aligned_u64 reserved3; }; +/* + * Description of a number of blocks to add to a data file + * + * Argument for INCFS_IOC_FILL_BLOCKS + */ +struct incfs_fill_blocks { + /* Number of blocks */ + __u64 count; + + /* A pointer to an array of incfs_fill_block structs */ + __aligned_u64 fill_blocks; +}; + enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_NONE = 0, INCFS_HASH_TREE_SHA256 = 1 diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index dd70e019dc4c..5d0012b3972b 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -343,8 +343,12 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, uint8_t *data_buf = malloc(data_buf_size); uint8_t *current_data = data_buf; uint8_t *data_end = data_buf + data_buf_size; - struct incfs_new_data_block *block_buf = - calloc(block_count, sizeof(*block_buf)); + struct incfs_fill_block *block_buf = + calloc(block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = block_count, + .fill_blocks = ptr_to_u64(block_buf), + }; ssize_t write_res = 0; int fd; int error = 0; @@ -404,17 +408,15 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, block_buf[i].block_index = block_index; block_buf[i].data_len = block_size; block_buf[i].data = ptr_to_u64(current_data); - block_buf[i].compression = - compress ? COMPRESSION_LZ4 : COMPRESSION_NONE; current_data += block_size; } if (!error) { - write_res = write(fd, block_buf, sizeof(*block_buf) * i); + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); if (write_res < 0) error = -errno; else - blocks_written = write_res / sizeof(*block_buf); + blocks_written = write_res; } if (error) { ksft_print_msg( @@ -813,21 +815,22 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) int err; int i; int fd; - - size_t blocks_size = - file->mtree_block_count * sizeof(struct incfs_new_data_block); - struct incfs_new_data_block *blocks = NULL; char *file_path; + struct incfs_fill_blocks fill_blocks = { + .count = file->mtree_block_count, + }; + struct incfs_fill_block *fill_block_array = + calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); - if (blocks_size == 0) + if (fill_blocks.count == 0) return 0; - blocks = malloc(blocks_size); - if (!blocks) + if (!fill_block_array) return -ENOMEM; + fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); - for (i = 0; i < file->mtree_block_count; i++) { - blocks[i] = (struct incfs_new_data_block){ + for (i = 0; i < fill_blocks.count; i++) { + fill_block_array[i] = (struct incfs_fill_block){ .block_index = i, .data_len = INCFS_DATA_FILE_BLOCK_SIZE, .data = ptr_to_u64(file->mtree[i].data), @@ -843,10 +846,10 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) goto failure; } - err = write(fd, blocks, blocks_size); + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); close(fd); - if (err < blocks_size) + if (err < fill_blocks.count) err = errno; else { err = 0; @@ -854,7 +857,7 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) } failure: - free(blocks); + free(fill_block_array); return err; } @@ -1274,13 +1277,6 @@ static int dynamic_files_and_data_test(char *mount_dir) if (i == missing_file_idx) continue; - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - res = emit_test_file_data(mount_dir, file); if (res) { ksft_print_msg("Error %s emiting data for %s.\n", @@ -1479,7 +1475,6 @@ static int work_after_remount_test(char *mount_dir) /* Write first half of the data into the command file. (stage 1) */ for (i = 0; i < file_num_stage1; i++) { struct test_file *file = &test.files[i]; - int res; build_mtree(file); if (emit_file(cmd_fd, NULL, file->name, &file->id, @@ -1488,14 +1483,7 @@ static int work_after_remount_test(char *mount_dir) if (emit_test_file_data(mount_dir, file)) goto failure; - - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } -} + } /* Unmount and mount again, to see that data is persistent. */ close(cmd_fd); From 8118f34d9664148a08f77f40a47b39f819e941ee Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 19 Feb 2020 10:07:25 -0800 Subject: [PATCH 3613/3715] ANDROID: Incremental fs: Pad hash blocks Test: incfs_test passes Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: I4e6fbd0938f00e7e6883ce1a26cbfd38fdcaa9a5 --- fs/incfs/integrity.c | 14 ++++++++++++++ .../selftests/filesystems/incfs/incfs_test.c | 9 ++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index feb212c38945..1d00dda109e3 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -198,6 +198,20 @@ int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, return -EINVAL; desc->tfm = alg->shash; + + if (data.len < INCFS_DATA_FILE_BLOCK_SIZE) { + int err; + void *buf = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS); + + if (!buf) + return -ENOMEM; + + memcpy(buf, data.data, data.len); + err = crypto_shash_digest(desc, buf, INCFS_DATA_FILE_BLOCK_SIZE, + digest.data); + kfree(buf); + return err; + } return crypto_shash_digest(desc, data.data, data.len, digest.data); } diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 5d0012b3972b..22f6c7fca7de 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -750,8 +750,9 @@ static int build_mtree(struct test_file *file) if (block_count == 1) { int seed = get_file_block_seed(file->index, 0); + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); rnd_buf((uint8_t *)data, file->size, seed); - sha256(data, file->size, file->root_hash); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); return 0; } @@ -766,11 +767,13 @@ static int build_mtree(struct test_file *file) int seed = get_file_block_seed(file->index, i); char *hash_ptr = file->mtree[block_index].data + block_off; - if (file->size - offset < block_size) + if (file->size - offset < block_size) { block_size = file->size - offset; + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); + } rnd_buf((uint8_t *)data, block_size, seed); - sha256(data, block_size, hash_ptr); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); } /* Build higher levels of hash tree. */ From 758073bec37716c23f4711fbed1bd4b9a21cbb96 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 13 Mar 2020 12:38:35 -0700 Subject: [PATCH 3614/3715] ANDROID: Incremental fs: Remove signature checks from kernel Test: selftests pass Bug: 133435829 Signed-off-by: Paul Lawrence Change-Id: Ia7e69b1b0176202da4b418ea815b370cbdacd5c2 --- fs/incfs/data_mgmt.c | 154 +++------- fs/incfs/data_mgmt.h | 5 +- fs/incfs/format.c | 22 +- fs/incfs/format.h | 45 +-- fs/incfs/integrity.c | 169 ++++++----- fs/incfs/integrity.h | 20 +- fs/incfs/vfs.c | 155 +++------- include/uapi/linux/incrementalfs.h | 69 ++--- .../selftests/filesystems/incfs/incfs_test.c | 275 +----------------- .../selftests/filesystems/incfs/utils.c | 231 +++++---------- .../selftests/filesystems/incfs/utils.h | 34 +-- 11 files changed, 306 insertions(+), 873 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index eb4e32040f4e..afdb3dfd3355 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -251,7 +251,7 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, { u8 digest[INCFS_MAX_HASH_SIZE] = {}; struct mtree *tree = NULL; - struct ondisk_signature *sig = NULL; + struct incfs_df_signature *sig = NULL; struct mem_range calc_digest_rng; struct mem_range saved_digest_rng; struct mem_range root_hash_rng; @@ -274,8 +274,8 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return res; for (lvl = 0; lvl < tree->depth; lvl++) { - loff_t lvl_off = tree->hash_level_suboffset[lvl] + - sig->mtree_offset; + loff_t lvl_off = + tree->hash_level_suboffset[lvl] + sig->hash_offset; loff_t hash_block_off = lvl_off + round_down(hash_block_index * digest_size, INCFS_DATA_FILE_BLOCK_SIZE); @@ -323,72 +323,6 @@ static int validate_hash_tree(struct file *bf, struct data_file *df, return 0; } -static int revalidate_signature(struct file *bf, struct data_file *df) -{ - struct ondisk_signature *sig = df->df_signature; - struct mem_range root_hash = {}; - int result = 0; - u8 *sig_buf = NULL; - u8 *add_data_buf = NULL; - ssize_t read_res; - - /* File has no signature. */ - if (!sig || !df->df_hash_tree || sig->sig_size == 0) - return 0; - - /* Signature has already been validated. */ - if (df->df_signature_validated) - return 0; - - add_data_buf = kzalloc(sig->add_data_size, GFP_NOFS); - if (!add_data_buf) { - result = -ENOMEM; - goto out; - } - - read_res = incfs_kread(bf, add_data_buf, sig->add_data_size, - sig->add_data_offset); - if (read_res < 0) { - result = read_res; - goto out; - } - if (read_res != sig->add_data_size) { - result = -EIO; - goto out; - } - - sig_buf = kzalloc(sig->sig_size, GFP_NOFS); - if (!sig_buf) { - result = -ENOMEM; - goto out; - } - - read_res = incfs_kread(bf, sig_buf, sig->sig_size, sig->sig_offset); - if (read_res < 0) { - result = read_res; - goto out; - } - if (read_res != sig->sig_size) { - result = -EIO; - goto out; - } - - root_hash = range(df->df_hash_tree->root_hash, - df->df_hash_tree->alg->digest_size); - - result = incfs_validate_pkcs7_signature( - range(sig_buf, sig->sig_size), - root_hash, - range(add_data_buf, sig->add_data_size)); - - if (result == 0) - df->df_signature_validated = true; -out: - kfree(sig_buf); - kfree(add_data_buf); - return result; -} - static struct data_file_segment *get_file_segment(struct data_file *df, int block_index) { @@ -684,13 +618,6 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, result = err; } - if (result > 0) { - int err = revalidate_signature(bf, df); - - if (err < 0) - result = err; - } - if (result >= 0) log_block_read(mi, &df->df_id, index, false /*timed out*/); @@ -756,7 +683,7 @@ unlock: int incfs_read_file_signature(struct data_file *df, struct mem_range dst) { struct file *bf = df->df_backing_file_context->bc_file; - struct ondisk_signature *sig; + struct incfs_df_signature *sig; int read_res = 0; if (!dst.data) @@ -786,7 +713,7 @@ int incfs_process_new_hash_block(struct data_file *df, struct backing_file_context *bfc = NULL; struct mount_info *mi = NULL; struct mtree *hash_tree = NULL; - struct ondisk_signature *sig = NULL; + struct incfs_df_signature *sig = NULL; loff_t hash_area_base = 0; loff_t hash_area_size = 0; int error = 0; @@ -805,11 +732,11 @@ int incfs_process_new_hash_block(struct data_file *df, hash_tree = df->df_hash_tree; sig = df->df_signature; - if (!hash_tree || !sig || sig->mtree_offset == 0) + if (!hash_tree || !sig || sig->hash_offset == 0) return -ENOTSUPP; - hash_area_base = sig->mtree_offset; - hash_area_size = sig->mtree_size; + hash_area_base = sig->hash_offset; + hash_area_size = sig->hash_size; if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + block->data_len) { /* Hash block goes beyond dedicated hash area of this file. */ @@ -867,58 +794,69 @@ static int process_file_signature_md(struct incfs_file_signature *sg, { struct data_file *df = handler->context; struct mtree *hash_tree = NULL; - struct ondisk_signature *signature = NULL; int error = 0; - loff_t base_tree_off = le64_to_cpu(sg->sg_hash_tree_offset); - u32 tree_size = le32_to_cpu(sg->sg_hash_tree_size); - loff_t sig_off = le64_to_cpu(sg->sg_sig_offset); - u32 sig_size = le32_to_cpu(sg->sg_sig_size); - loff_t add_data_off = le64_to_cpu(sg->sg_add_data_offset); - u32 add_data_size = le32_to_cpu(sg->sg_add_data_size); + struct incfs_df_signature *signature = + kzalloc(sizeof(*signature), GFP_NOFS); + void *buf = 0; + ssize_t read; - if (!df) - return -ENOENT; + if (!df || !df->df_backing_file_context || + !df->df_backing_file_context->bc_file) { + error = -ENOENT; + goto out; + } - signature = kzalloc(sizeof(*signature), GFP_NOFS); - if (!signature) { + signature->hash_offset = le64_to_cpu(sg->sg_hash_tree_offset); + signature->hash_size = le32_to_cpu(sg->sg_hash_tree_size); + signature->sig_offset = le64_to_cpu(sg->sg_sig_offset); + signature->sig_size = le32_to_cpu(sg->sg_sig_size); + + buf = kzalloc(signature->sig_size, GFP_NOFS); + if (!buf) { error = -ENOMEM; goto out; } - signature->add_data_offset = add_data_off; - signature->add_data_size = add_data_size; - signature->sig_offset = sig_off; - signature->sig_size = sig_size; - signature->mtree_offset = base_tree_off; - signature->mtree_size = tree_size; + read = incfs_kread(df->df_backing_file_context->bc_file, buf, + signature->sig_size, signature->sig_offset); + if (read < 0) { + error = read; + goto out; + } - hash_tree = incfs_alloc_mtree(sg->sg_hash_alg, df->df_block_count, - range(sg->sg_root_hash, sizeof(sg->sg_root_hash))); + if (read != signature->sig_size) { + error = -EINVAL; + goto out; + } + + hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), + df->df_block_count); if (IS_ERR(hash_tree)) { error = PTR_ERR(hash_tree); hash_tree = NULL; goto out; } - if (hash_tree->hash_tree_area_size != tree_size) { + if (hash_tree->hash_tree_area_size != signature->hash_size) { error = -EINVAL; goto out; } - if (tree_size > 0 && handler->md_record_offset <= base_tree_off) { + if (signature->hash_size > 0 && + handler->md_record_offset <= signature->hash_offset) { error = -EINVAL; goto out; } - if (handler->md_record_offset <= signature->add_data_offset || - handler->md_record_offset <= signature->sig_offset) { + if (handler->md_record_offset <= signature->sig_offset) { error = -EINVAL; goto out; } df->df_hash_tree = hash_tree; + hash_tree = NULL; df->df_signature = signature; + signature = NULL; out: - if (error) { - incfs_free_mtree(hash_tree); - kfree(signature); - } + incfs_free_mtree(hash_tree); + kfree(signature); + kfree(buf); return error; } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 5ce1966b4fc5..01045403026a 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -215,10 +215,7 @@ struct data_file { struct mtree *df_hash_tree; - struct ondisk_signature *df_signature; - - /* True, if file signature has already been validated. */ - bool df_signature_validated; + struct incfs_df_signature *df_signature; }; struct dir_file { diff --git a/fs/incfs/format.c b/fs/incfs/format.c index db71f527cf36..8c8213ee325d 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -299,9 +299,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, } int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - u8 hash_alg, u32 tree_size, - struct mem_range root_hash, struct mem_range add_data, - struct mem_range sig) + struct mem_range sig, u32 tree_size) { struct incfs_file_signature sg = {}; int result = 0; @@ -311,8 +309,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, if (!bfc) return -EFAULT; - if (root_hash.len > sizeof(sg.sg_root_hash)) - return -E2BIG; LOCK_REQUIRED(bfc->bc_mutex); @@ -321,7 +317,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); sg.sg_header.h_next_md_offset = cpu_to_le64(0); - sg.sg_hash_alg = hash_alg; if (sig.data != NULL && sig.len > 0) { loff_t pos = incfs_get_end_offset(bfc->bc_file); @@ -333,20 +328,8 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, goto err; } - if (add_data.len > 0) { - loff_t pos = incfs_get_end_offset(bfc->bc_file); - - sg.sg_add_data_size = cpu_to_le32(add_data.len); - sg.sg_add_data_offset = cpu_to_le64(pos); - - result = write_to_bf(bfc, add_data.data, - add_data.len, pos, false); - if (result) - goto err; - } - tree_area_pos = incfs_get_end_offset(bfc->bc_file); - if (hash_alg && tree_size > 0) { + if (tree_size > 0) { if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { /* * If hash tree is big enough, it makes sense to @@ -369,7 +352,6 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, sg.sg_hash_tree_size = cpu_to_le32(tree_size); sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); } - memcpy(sg.sg_root_hash, root_hash.data, root_hash.len); /* Write a hash tree metadata record pointing to the hash tree above. */ result = append_md_to_backing_file(bfc, &sg.sg_header); diff --git a/fs/incfs/format.h b/fs/incfs/format.h index a86881482e19..55e6938b30d6 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -217,27 +217,27 @@ struct incfs_file_attr { __le32 fa_crc; } __packed; -/* Metadata record for file attribute. Type = INCFS_MD_SIGNATURE */ +/* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ struct incfs_file_signature { struct incfs_md_header sg_header; - __u8 sg_hash_alg; /* Value from incfs_hash_tree_algorithm */ + __le32 sg_sig_size; /* The size of the signature. */ + + __le64 sg_sig_offset; /* Signature's offset in the backing file */ __le32 sg_hash_tree_size; /* The size of the hash tree. */ __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ - - __u8 sg_root_hash[INCFS_MAX_HASH_SIZE]; - - __le32 sg_sig_size; /* The size of the pkcs7 signature. */ - - __le64 sg_sig_offset; /* pkcs7 signature's offset in the backing file */ - - __le32 sg_add_data_size; /* The size of the additional data. */ - - __le64 sg_add_data_offset; /* Additional data's offset */ } __packed; +/* In memory version of above */ +struct incfs_df_signature { + u32 sig_size; + u64 sig_offset; + u32 hash_size; + u64 hash_offset; +}; + /* State of the backing file. */ struct backing_file_context { /* Protects writes to bc_file */ @@ -253,23 +253,6 @@ struct backing_file_context { loff_t bc_last_md_record_offset; }; - -/* Backing file locations of things required for signature validation. */ -struct ondisk_signature { - - loff_t add_data_offset; /* Additional data's offset */ - - loff_t sig_offset; /* pkcs7 signature's offset in the backing file */ - - loff_t mtree_offset; /* Backing file offset of the hash tree. */ - - u32 add_data_size; /* The size of the additional data. */ - - u32 sig_size; /* The size of the pkcs7 signature. */ - - u32 mtree_size; /* The size of the hash tree. */ -}; - struct metadata_handler { loff_t md_record_offset; loff_t md_prev_record_offset; @@ -319,9 +302,7 @@ int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, struct mem_range value, struct incfs_file_attr *attr); int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, - u8 hash_alg, u32 tree_size, - struct mem_range root_hash, struct mem_range add_data, - struct mem_range sig); + struct mem_range sig, u32 tree_size); int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c index 1d00dda109e3..f8af9a83ea8a 100644 --- a/fs/incfs/integrity.c +++ b/fs/incfs/integrity.c @@ -10,70 +10,6 @@ #include "integrity.h" -int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, - struct mem_range root_hash, struct mem_range add_data) -{ - struct pkcs7_message *pkcs7 = NULL; - const void *data = NULL; - size_t data_len = 0; - const char *p; - int err; - - pkcs7 = pkcs7_parse_message(pkcs7_blob.data, pkcs7_blob.len); - if (IS_ERR(pkcs7)) { - pr_debug("PKCS#7 parsing error. ptr=%p size=%ld err=%ld\n", - pkcs7_blob.data, pkcs7_blob.len, -PTR_ERR(pkcs7)); - return PTR_ERR(pkcs7); - } - - err = pkcs7_get_content_data(pkcs7, &data, &data_len, NULL); - if (err || data_len == 0 || data == NULL) { - pr_debug("PKCS#7 message does not contain data\n"); - err = -EBADMSG; - goto out; - } - - if (root_hash.len == 0) { - pr_debug("Root hash is empty.\n"); - err = -EBADMSG; - goto out; - } - - if (data_len != root_hash.len + add_data.len) { - pr_debug("PKCS#7 data size doesn't match arguments.\n"); - err = -EKEYREJECTED; - goto out; - } - - p = data; - if (memcmp(p, root_hash.data, root_hash.len) != 0) { - pr_debug("Root hash mismatch.\n"); - err = -EKEYREJECTED; - goto out; - } - p += root_hash.len; - if (memcmp(p, add_data.data, add_data.len) != 0) { - pr_debug("Additional data mismatch.\n"); - err = -EKEYREJECTED; - goto out; - } - - err = pkcs7_verify(pkcs7, VERIFYING_UNSPECIFIED_SIGNATURE); - if (err) - pr_debug("PKCS#7 signature verification error: %d\n", -err); - - /* - * RSA signature verification sometimes returns unexpected error codes - * when signature doesn't match. - */ - if (err == -ERANGE || err == -EINVAL) - err = -EBADMSG; - -out: - pkcs7_free_message(pkcs7); - return err; -} - struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) { static struct incfs_hash_alg sha256 = { @@ -113,11 +49,90 @@ struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) return result; } +struct signature_info { + u32 version; + enum incfs_hash_tree_algorithm hash_algorithm; + u8 log2_blocksize; + struct mem_range salt; + struct mem_range root_hash; +}; -struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, - int data_block_count, - struct mem_range root_hash) +static u32 read_u32(u8 **p, u8 *top, u32 *result) { + if (*p + sizeof(u32) > top) + return false; + + *result = le32_to_cpu(*(u32 *)*p); + *p += sizeof(u32); + return true; +} + +static bool read_u8(u8 **p, u8 *top, u8 *result) +{ + if (*p + sizeof(u8) > top) + return false; + + *result = *(u8 *)*p; + *p += sizeof(u8); + return true; +} + +static bool read_mem_range(u8 **p, u8 *top, struct mem_range *range) +{ + u32 len; + + if (!read_u32(p, top, &len) || *p + len > top) + return false; + + range->len = len; + range->data = *p; + *p += len; + return true; +} + +static int incfs_parse_signature(struct mem_range signature, + struct signature_info *si) +{ + u8 *p = signature.data; + u8 *top = signature.data + signature.len; + u32 hash_section_size; + + if (signature.len > INCFS_MAX_SIGNATURE_SIZE) + return -EINVAL; + + if (!read_u32(&p, top, &si->version) || + si->version != INCFS_SIGNATURE_VERSION) + return -EINVAL; + + if (!read_u32(&p, top, &hash_section_size) || + p + hash_section_size > top) + return -EINVAL; + top = p + hash_section_size; + + if (!read_u32(&p, top, &si->hash_algorithm) || + si->hash_algorithm != INCFS_HASH_TREE_SHA256) + return -EINVAL; + + if (!read_u8(&p, top, &si->log2_blocksize) || si->log2_blocksize != 12) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->salt)) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->root_hash)) + return -EINVAL; + + if (p != top) + return -EINVAL; + + return 0; +} + +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count) +{ + int error; + struct signature_info si; struct mtree *result = NULL; struct incfs_hash_alg *hash_alg = NULL; int hash_per_block; @@ -129,11 +144,15 @@ struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, if (data_block_count <= 0) return ERR_PTR(-EINVAL); - hash_alg = incfs_get_hash_alg(id); + error = incfs_parse_signature(signature, &si); + if (error) + return ERR_PTR(error); + + hash_alg = incfs_get_hash_alg(si.hash_algorithm); if (IS_ERR(hash_alg)) return ERR_PTR(PTR_ERR(hash_alg)); - if (root_hash.len < hash_alg->digest_size) + if (si.root_hash.len < hash_alg->digest_size) return ERR_PTR(-EINVAL); result = kzalloc(sizeof(*result), GFP_NOFS); @@ -173,7 +192,7 @@ struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, } /* Root hash is stored separately from the rest of the tree. */ - memcpy(result->root_hash, root_hash.data, hash_alg->digest_size); + memcpy(result->root_hash, si.root_hash.data, hash_alg->digest_size); return result; err: @@ -215,13 +234,3 @@ int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, return crypto_shash_digest(desc, data.data, data.len, digest.data); } -void incfs_free_signature_info(struct signature_info *si) -{ - if (!si) - return; - kfree(si->root_hash.data); - kfree(si->additional_data.data); - kfree(si->signature.data); - kfree(si); -} - diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h index da1c38486b2f..cf79b64da736 100644 --- a/fs/incfs/integrity.h +++ b/fs/incfs/integrity.h @@ -38,21 +38,10 @@ struct mtree { int depth; }; -struct signature_info { - struct mem_range root_hash; - - struct mem_range additional_data; - - struct mem_range signature; - - enum incfs_hash_tree_algorithm hash_alg; -}; - struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); -struct mtree *incfs_alloc_mtree(enum incfs_hash_tree_algorithm id, - int data_block_count, - struct mem_range root_hash); +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count); void incfs_free_mtree(struct mtree *tree); @@ -64,9 +53,4 @@ size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, struct mem_range digest); -int incfs_validate_pkcs7_signature(struct mem_range pkcs7_blob, - struct mem_range root_hash, struct mem_range add_data); - -void incfs_free_signature_info(struct signature_info *si); - #endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 04f292e3377b..aae918963a76 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -840,104 +840,39 @@ static char *file_id_to_str(incfs_uuid_t id) return result; } -static struct signature_info *incfs_copy_signature_info_from_user( - struct incfs_file_signature_info __user *original) +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, + u64 size) { - struct incfs_file_signature_info usr_si; - struct signature_info *result; - int error; + u8 *result; if (!original) - return NULL; + return range(NULL, 0); - if (copy_from_user(&usr_si, original, sizeof(usr_si)) > 0) - return ERR_PTR(-EFAULT); + if (size > INCFS_MAX_SIGNATURE_SIZE) + return range(ERR_PTR(-EFAULT), 0); - result = kzalloc(sizeof(*result), GFP_NOFS); + result = kzalloc(size, GFP_NOFS); if (!result) - return ERR_PTR(-ENOMEM); + return range(ERR_PTR(-ENOMEM), 0); - result->hash_alg = usr_si.hash_tree_alg; - - if (result->hash_alg) { - void *p = kzalloc(INCFS_MAX_HASH_SIZE, GFP_NOFS); - - if (!p) { - error = -ENOMEM; - goto err; - } - - /* TODO this sets the root_hash length to MAX_HASH_SIZE not - * the actual size. Fix, then set INCFS_MAX_HASH_SIZE back - * to 64 - */ - result->root_hash = range(p, INCFS_MAX_HASH_SIZE); - if (copy_from_user(p, u64_to_user_ptr(usr_si.root_hash), - result->root_hash.len) > 0) { - error = -EFAULT; - goto err; - } + if (copy_from_user(result, original, size)) { + kfree(result); + return range(ERR_PTR(-EFAULT), 0); } - if (usr_si.additional_data_size > INCFS_MAX_FILE_ATTR_SIZE) { - error = -E2BIG; - goto err; - } - - if (usr_si.additional_data && usr_si.additional_data_size) { - void *p = kzalloc(usr_si.additional_data_size, GFP_NOFS); - - if (!p) { - error = -ENOMEM; - goto err; - } - result->additional_data = range(p, - usr_si.additional_data_size); - if (copy_from_user(p, u64_to_user_ptr(usr_si.additional_data), - result->additional_data.len) > 0) { - error = -EFAULT; - goto err; - } - } - - if (usr_si.signature_size > INCFS_MAX_SIGNATURE_SIZE) { - error = -E2BIG; - goto err; - } - - if (usr_si.signature && usr_si.signature_size) { - void *p = kzalloc(usr_si.signature_size, GFP_NOFS); - - if (!p) { - error = -ENOMEM; - goto err; - } - result->signature = range(p, usr_si.signature_size); - if (copy_from_user(p, u64_to_user_ptr(usr_si.signature), - result->signature.len) > 0) { - error = -EFAULT; - goto err; - } - } - - return result; - -err: - incfs_free_signature_info(result); - return ERR_PTR(-error); + return range(result, size); } static int init_new_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, struct mem_range attr, - struct incfs_file_signature_info __user *fsi) + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + u8 __user *user_signature_info, u64 signature_size) { struct path path = {}; struct file *new_file; int error = 0; struct backing_file_context *bfc = NULL; u32 block_count; - struct mem_range mem_range = {NULL}; - struct signature_info *si = NULL; + struct mem_range raw_signature = { NULL }; struct mtree *hash_tree = NULL; if (!mi || !dentry || !uuid) @@ -987,44 +922,27 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, goto out; } - if (fsi) { - si = incfs_copy_signature_info_from_user(fsi); + if (user_signature_info) { + raw_signature = incfs_copy_signature_info_from_user( + user_signature_info, signature_size); - if (IS_ERR(si)) { - error = PTR_ERR(si); - si = NULL; + if (IS_ERR(raw_signature.data)) { + error = PTR_ERR(raw_signature.data); + raw_signature.data = NULL; goto out; } - if (si->hash_alg) { - hash_tree = incfs_alloc_mtree(si->hash_alg, block_count, - si->root_hash); - if (IS_ERR(hash_tree)) { - error = PTR_ERR(hash_tree); - hash_tree = NULL; - goto out; - } - - /* TODO This code seems wrong when len is zero - we - * should error out?? - */ - if (si->signature.len > 0) - error = incfs_validate_pkcs7_signature( - si->signature, - si->root_hash, - si->additional_data); - if (error) - goto out; - - error = incfs_write_signature_to_backing_file(bfc, - si->hash_alg, - hash_tree->hash_tree_area_size, - si->root_hash, si->additional_data, - si->signature); - - if (error) - goto out; + hash_tree = incfs_alloc_mtree(raw_signature, block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; } + + error = incfs_write_signature_to_backing_file( + bfc, raw_signature, hash_tree->hash_tree_area_size); + if (error) + goto out; } out: @@ -1033,8 +951,7 @@ out: incfs_free_bfc(bfc); } incfs_free_mtree(hash_tree); - incfs_free_signature_info(si); - kfree(mem_range.data); + kfree(raw_signature.data); if (error) pr_debug("incfs: %s error: %d\n", __func__, error); @@ -1292,7 +1209,7 @@ static long ioctl_create_file(struct mount_info *mi, goto delete_index_file; } - /* Save the file's attrubute as an xattr */ + /* Save the file's attribute as an xattr */ if (args.file_attr_len && args.file_attr) { if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { error = -E2BIG; @@ -1323,9 +1240,9 @@ static long ioctl_create_file(struct mount_info *mi, /* Initializing a newly created file. */ error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, - range(attr_value, args.file_attr_len), - (struct incfs_file_signature_info __user *) - args.signature_info); + range(attr_value, args.file_attr_len), + (u8 __user *)args.signature_info, + args.signature_size); if (error) goto delete_index_file; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 81947ded482e..2efc53f591ef 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -35,6 +35,8 @@ #define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") #define INCFS_MAX_SIGNATURE_SIZE 8096 +#define INCFS_SIGNATURE_VERSION 2 +#define INCFS_SIGNATURE_SECTIONS 2 #define INCFS_IOCTL_BASE_CODE 'g' @@ -142,48 +144,6 @@ enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_SHA256 = 1 }; -struct incfs_file_signature_info { - /* - * A pointer to file's root hash (if determined != 0) - * Actual hash size determined by hash_tree_alg. - * Size of the buffer should be at least INCFS_MAX_HASH_SIZE - * - * Equivalent to: u8 *root_hash; - */ - __aligned_u64 root_hash; - - /* - * A pointer to additional data that was attached to the root hash - * before signing. - * - * Equivalent to: u8 *additional_data; - */ - __aligned_u64 additional_data; - - /* Size of additional data. */ - __u32 additional_data_size; - - /* Reserved - must be 0 */ - __u32 reserved1; - - /* - * A pointer to pkcs7 signature DER blob. - * - * Equivalent to: u8 *signature; - */ - __aligned_u64 signature; - - - /* Size of pkcs7 signature DER blob */ - __u32 signature_size; - - /* Reserved - must be 0 */ - __u32 reserved2; - - /* Value from incfs_hash_tree_algorithm */ - __u8 hash_tree_alg; -}; - /* * Create a new file or directory. */ @@ -240,11 +200,30 @@ struct incfs_new_file_args { /* Reserved - must be 0 */ __u32 reserved4; - /* struct incfs_file_signature_info *signature_info; */ + /* + * Points to an APK V4 Signature data blob + * Signature must have two sections + * Format is: + * u32 version + * u32 size_of_hash_info_section + * u8 hash_info_section[] + * u32 size_of_signing_info_section + * u8 signing_info_section[] + * + * Note that incfs does not care about what is in signing_info_section + * + * hash_info_section has following format: + * u32 hash_algorithm; // Must be SHA256 == 1 + * u8 log2_blocksize; // Must be 12 for 4096 byte blocks + * u32 salt_size; + * u8 salt[]; + * u32 hash_size; + * u8 root_hash[]; + */ __aligned_u64 signature_info; - /* Reserved - must be 0 */ - __aligned_u64 reserved5; + /* Size of signature_info */ + __aligned_u64 signature_size; /* Reserved - must be 0 */ __aligned_u64 reserved6; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 22f6c7fca7de..1cd1226f4e44 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -25,8 +25,6 @@ #include "lz4.h" #include "utils.h" -#define __packed __attribute__((__packed__)) - #define TEST_FAILURE 1 #define TEST_SUCCESS 0 #define INCFS_MAX_MTREE_LEVELS 8 @@ -69,101 +67,6 @@ struct linux_dirent64 { char d_name[0]; } __packed; -/* - * The certificate below and the private key were created by calling: - * openssl req -x509 -newkey rsa:4096 -keyout private.key -out cert.crt - * -days 1000 -sha256 -nodes -outform PEM -subj - * "/C=US/ST=WA/L=Kirkland/O=Example/OU=Org/CN=www.example.com" - */ -char x509_cert[] = -"-----BEGIN CERTIFICATE-----\n" -"MIIFvzCCA6egAwIBAgIUXpwqelEljm6BBllRQGHLrls2MYgwDQYJKoZIhvcNAQEL\n" -"BQAwbzELMAkGA1UEBhMCVVMxEzARBgNVBAgMCldhc2hpbmd0b24xETAPBgNVBAcM\n" -"CEtpcmtsYW5kMRAwDgYDVQQKDAdFeGFtcGxlMQwwCgYDVQQLDANPcmcxGDAWBgNV\n" -"BAMMD3d3dy5leGFtcGxlLmNvbTAeFw0xOTA4MDgyMzA3MDZaFw0yMjA1MDQyMzA3\n" -"MDZaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApXYXNoaW5ndG9uMREwDwYDVQQH\n" -"DAhLaXJrbGFuZDEQMA4GA1UECgwHRXhhbXBsZTEMMAoGA1UECwwDT3JnMRgwFgYD\n" -"VQQDDA93d3cuZXhhbXBsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK\n" -"AoICAQC1LuFW/lDV/GflqFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43\n" -"NeeJtqUoVxSLS9wHURjSjD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtA\n" -"uYcY4P9GHQEXYUX+ue82A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt\n" -"4/NXS/Dn+S0/mJlxw34IKfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RAD\n" -"qGewNNCab3ClJDP7/M32BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolV\n" -"gSL1HM2jin5bi4bpFMreY0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBT\n" -"qjjFb3oiSMugJzY+MhISM754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3\n" -"UgC6SyVmZxG2o+AO6m8TRTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiV\n" -"XDmotNb2myXNYHHTjRYNxkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61S\n" -"oxKWi+LGa7B4NaCMjz1LnaOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAb\n" -"uxkq9EYUDg+w9broltiBf4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABo1MwUTAd\n" -"BgNVHQ4EFgQUo6JN3gY2yGbzOTNj8Al7hNB3rw0wHwYDVR0jBBgwFoAUo6JN3gY2\n" -"yGbzOTNj8Al7hNB3rw0wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n" -"AgEAQb3pJqOzM4whfNVdpEOswd1EApcWNM1ps9iTlEEjDoRv9F7F1PW0uXCIpk3B\n" -"j5JgCmIxAcPnzj42rduRSx421hHMZhbAIWI/JL4ZSF64qlG0YrmJDXlJgSMoyst5\n" -"biUqeWgO7Js5udPt3zhkeA62z3hGM6dE5B3k7gHTaKKtK17+UeR9imZKsOK8GBnM\n" -"rxMPI6XghxxAK2OQ/r09DHDiyf/GxgOE46oknfXfMPx3HaSvDKrZUTZ+UvVbM5c2\n" -"5eXOgH5UO/e4llLknJK7CoP/R6G7pV44iT4t4t9FMnvCYvavAHwfR+6z5vTF3o8a\n" -"wd80fC8z1vfLsIPLROdzBl9rGCvv536fPiEA677CM1AZkjfT0a9DVzrE1NDvuCUF\n" -"0KgEdiNwux+hO6dbTyiS38yPT6TbpoWJptJmFhFkC4hGvUgoX/TI0covSyf74VRH\n" -"k3BHojOBMYiX1K66xoN7fhlGK8cith3L0XXPB8CgSEUPWURvm8RCaGuX2T3FZomF\n" -"BCnNpN+WNnN3Yf4OkjtuvtxxktUU7pfVLsUxrdpo/ph4rWm6U83VT/Zlq92aF4vW\n" -"QJ+7uraQFip7e+Gy9g3UJINm3B7b1C4ch/Z/upCZESOI/23sVGzkfTgOrS+23i6/\n" -"Vi9YW75zySC2FCa1AWMS1NmS5qfDSycJUgD6YvOUg0C54ZI=\n" -"-----END CERTIFICATE-----"; - -char private_key[] = -"-----BEGIN PRIVATE KEY-----\n" -"MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1LuFW/lDV/Gfl\n" -"qFMz7RDvFFgWld982ZuDJRaK55JNj+MI4RZNL61PDw43NeeJtqUoVxSLS9wHURjS\n" -"jD/CV5GudUOnzGfbwFlLko+jhYRT4HNFS+5ys1FEJLtAuYcY4P9GHQEXYUX+ue82\n" -"A2kJ91oY6G3vCQYJFiGteb6TRDICmug31x4pBfB8rOdt4/NXS/Dn+S0/mJlxw34I\n" -"KfqrlFjzUziRZtAWWqDcfxFDUizSggkdXIUq4GY38RADqGewNNCab3ClJDP7/M32\n" -"BhSNgsIKhgtSTM2+ocfvBhwup+BjV6UbL21DPAshlolVgSL1HM2jin5bi4bpFMre\n" -"Y0LXwFih87/6AVSfQHY9TZrombVZnMxvB7NG1NCSwDBTqjjFb3oiSMugJzY+MhIS\n" -"M754m46fwUyHZ1ylWCLJEU8kQ5A1q9vvqMcaDa4uTGP3UgC6SyVmZxG2o+AO6m8T\n" -"RTCtqHN41mPTM9HK4T1UyuzVpykSc2LlYkKE517SyEiVXDmotNb2myXNYHHTjRYN\n" -"xkq75Lbii2I4Q4z8XtDngaIrhZqACKSqIt2CocGjx61SoxKWi+LGa7B4NaCMjz1L\n" -"naOIsXn1rJDRnUWL49T42g4kOi/5QaC2JDygfefw1hAbuxkq9EYUDg+w9broltiB\n" -"f4rKAnw8JMySARnyPZbj0lhZK3va5wIDAQABAoICAQCMKul/0J2e/ncub6t2t4dr\n" -"PnTrfCT6xKqPqciny4Ee6hr9So1jR2gvink380bd/mQFMmEdZqGhM3cdpAzLf82f\n" -"hu7BSNxsYIF0er0PB4MZFMJ4sMaXC+zp5/TJnP5MG/zBND0c5k8tQpEyWy8O28Jj\n" -"FKW/0F5P90Q0ncP20EJUS50tXgniOMsU2Prtw/UE6yZDgD0mPxsurMu66ycXSFwM\n" -"WqyfqEeBk7lw/AjR6Sft71W31lTbl+DclG0MN2OIKUPcxiwCRmDFKI36MDgERk1x\n" -"sMPfdrWRLj2ryDFTUuLAWBTOVEGWS0RdRsWWVaJCuHbKd6FLl0TW2xQbOfWDTjYC\n" -"Ps31ejh163qdbk7OGOZIbd83fP3jsyL+4eNzhUpeXMKhfG58mFIv4yhdZIUOpuL6\n" -"aqnoU9z9wEsJKj/SrKr3nw6tuTnmbXgNjun9LfTFmqqDRBYd0Okiprw6jHNM1jgA\n" -"GG0kC/K7r89jKymVDABwGMFCS33ynR1Tb6zG+cqgNMPw19Fy3uQuW21CjqSzCOyP\n" -"aEVCEUZeP+ofql5+7ZKi6Dj+EdTfeKt2ihgheHZZoaYSINb8tsnKbdJhwBfW9PFT\n" -"aT/hu3bnO2FPC8H2NGOqxOEeel9ALU4SFu1pOknEhiL3/mNfOQ+KgrSRDtNRlcL0\n" -"cto05J90u0cmqwWKlshfaQKCAQEA5dcklxs4ezyzt28NcsiyS02oZ+9TkQp6pCXV\n" -"kx7AwhivAmVTlJ+c6BegA5EPd7A1gknM3+EKzGpoBOqmlF45G57phVIAphAp4oCH\n" -"UOVtIQgM8p4EU2gtX+uNOopdYlpBQnWimXaHA2sOD9/yTbZ03j/McRH6D15+iCld\n" -"3880GHdZaYYbQmHoSDg39LRRO1bdS3WC0oKBD2gPi3K0b9RaZSwKzuVrmlvrLURj\n" -"WMZfmkGl4BsITfuoTxbWFVncG3Kb9eYkYUFZy4M2G/s849PS/HjrN7BvgpanjtVp\n" -"1/39APQfAYfUuBPbKYnb6F8dE0pb5cVd4uMZklAeTb3bXjOO9QKCAQEAyc4CxWXr\n" -"bG6Do5dGpWudQ7ucq00MR0T3MHQIu5XTn6BsPHAJ9ZgrQw9C24PXm2VEjjsrMs5T\n" -"rHNF9oeO39s25Za1iyJ+893icqA3h3ivCUOOoVE54BkuJK6REhkXPD5G1ubmxeBz\n" -"MKNehlpd/eSbJJArkzKFZ8sBtLt8i9VFhRnXSpDAbiMpCbjW+bem9MWdLmkenSnu\n" -"OUbnqYcJhFBCvOT7ZCHFCDNUNPfHcaReSY2EYjw0ZqtqAZD0Q+DL+RkLz7l1+/bF\n" -"eEwNjmjFTcwRyawqf38D4miU0H6ca16FkeSlbmM5p3HdwZK2HVYYz3FSwhox6Ebd\n" -"n6in42qfL4Ug6wKCAQAh9IDRWhIkErmyNdPUy1WbzmM8x5ye5t9rdLNywq5TfnYM\n" -"co/AezwhBax8GmgglIWzM9fykzqXLHklkMz/SlRBgl6ZdZ3m6qhlb/uNtfdDU/8l\n" -"sLaO4+sgKpp4tYxKRW8ytFJLPbmAhcZUDg+r73KgiuhXJAK/VoR29TWLJP9bRfaN\n" -"omRQkEpSsQuDOUhu7cxPo5KqKuGKNyNkxJNnmgWowLLwEfCtozrBO0M6EER7c4tf\n" -"6l51tuIMnSEPknD0FSB5WYCyZYcwi7fotlsuhVK8PdjyJzyyHDOw5FJ4uGsyQt55\n" -"yWlhsH1GS7mTQMn42Zlt/pR6OnbCqNdxQMUxy4gpAoIBAFvMbs5E0pb8nr0n72cI\n" -"UP2itl3mKpOw95D+94n9WcrfOt0zShSCKAvVQWCB1O5HXqwklj4CRWXI+iZu+7sx\n" -"CQPfTq3//ygH4x6paxkg+N6J8LPJMz6Rtb/R+QP2je9FlQvk9U1GEKArcLBFI0R/\n" -"XWOAgZHwBWd1nU0NjFY/qeQmIR02Q5LWQ7C8eG4X8MafriSShO6RSGCdtHwVhWq+\n" -"59ztfL3L7skQMFn37K3xS0LCMVpOcLfTeeFEgxjthVvG3OydPOJlGubiEbiaSEZf\n" -"cif/PUXKDYZMdIVzUsw0ryXykJ5qXKuizHFlv5oQtDCJKFBLgjBbLC2YluaIdekz\n" -"8gkCggEBAJWxS7EuB/qL7fOz0o3HRy0plR3qbwZ0pLoCz0Ii7WxraBS1yQwmxif1\n" -"Rgv89GyFqg1yQl3CSrMiw7oC9WxxxuiEZDO18c4KO3NTv9K4itN9OPQVBTHmEhod\n" -"KWcyP4/W/Sfuae77PyclSqUsAARRrKYn2fpLTS5ibaU0QZgHmdPgYDUrPr+6PHKK\n" -"ZfQKU2uBfuo6zoMbMmFi3UYG49j9rv4d6v+44vS1MPHV9JK/LD8YfBhgx8Pg/u6D\n" -"nUgipS48pkGjJr2u2Vu7Mx70vqz0Yf2neyyDbdLtkYauC4w7YKPTD0yzDJyGuAeB\n" -"GyPbW1yZa5vE302a1Cr0Cd7RC4AFAAw=\n" -"-----END PRIVATE KEY-----"; - struct test_files_set get_test_files_set(void) { static struct test_file files[] = { @@ -290,7 +193,7 @@ char *bin2hex(char *dst, const void *src, size_t count) return dst; } -static char *get_index_filename(char *mnt_dir, incfs_uuid_t id) +static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) { char path[FILENAME_MAX]; char str_id[1 + 2 * sizeof(id)]; @@ -722,8 +625,6 @@ static int build_mtree(struct test_file *file) int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; int levels_count = 0; - char data_to_sign[256] = {}; - int sig_data_size; int i, level; if (file->size == 0) @@ -797,19 +698,6 @@ static int build_mtree(struct test_file *file) sha256(file->mtree[0].data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); - /* Calculating digital signature */ - snprintf(file->sig.add_data, sizeof(file->sig.add_data), "%ld", - file->size); - memcpy(data_to_sign, file->root_hash, SHA256_DIGEST_SIZE); - memcpy(data_to_sign + SHA256_DIGEST_SIZE, file->sig.add_data, - strlen(file->sig.add_data)); - sig_data_size = SHA256_DIGEST_SIZE + strlen(file->sig.add_data); - if (!sign_pkcs7(data_to_sign, sig_data_size, private_key, x509_cert, - &file->sig.data, &file->sig.size)) { - ksft_print_msg("Signing failed.\n"); - return -EINVAL; - } - return 0; } @@ -1873,162 +1761,6 @@ failure: return TEST_FAILURE; } -static int signature_test(char *mount_dir) -{ - struct test_files_set test = get_test_files_set(); - const int file_num = test.files_count; - int i = 0; - unsigned char sig_buf[INCFS_MAX_SIGNATURE_SIZE]; - char *backing_dir; - int cmd_fd = -1; - - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - /* Mount FS and release the backing file. (10s wait time) */ - if (mount_fs(mount_dir, backing_dir, 10000) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - /* Write hashes and data. */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int res; - - build_mtree(file); - - res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.data, file->sig.size, file->sig.add_data); - - if (res) { - ksft_print_msg("Emit failed for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - - if (emit_test_file_data(mount_dir, file)) - goto failure; - - res = load_hash_tree(mount_dir, file); - if (res) { - ksft_print_msg("Can't load hashes for %s. error: %s\n", - file->name, strerror(-res)); - goto failure; - } - } - - /* Validate data */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int sig_len; - char *path; - int fd; - - if (validate_test_file_content(mount_dir, file) < 0) - goto failure; - - path = concat_file_name(mount_dir, file->name); - fd = open(path, O_RDWR); - free(path); - if (fd < 0) { - print_error("Can't open file"); - goto failure; - } - - sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); - - if (close(fd)) { - print_error("Can't close file"); - goto failure; - } - - if (sig_len < 0) { - ksft_print_msg("Can't load signature %s. error: %s\n", - file->name, strerror(-sig_len)); - goto failure; - } - - if (sig_len != file->sig.size || - memcmp(sig_buf, file->sig.data, sig_len)) { - ksft_print_msg("Signature mismatch %s.\n", - file->name); - goto failure; - } - } - - /* Unmount and mount again, to make sure the signature is persistent. */ - close(cmd_fd); - cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - if (mount_fs(mount_dir, backing_dir, 50) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - int sig_len; - char *path; - int fd; - - if (validate_test_file_content(mount_dir, file) < 0) - goto failure; - - path = concat_file_name(mount_dir, file->name); - fd = open(path, O_RDWR); - free(path); - if (fd < 0) { - print_error("Can't open file"); - goto failure; - } - - sig_len = get_file_signature(fd, sig_buf, ARRAY_SIZE(sig_buf)); - - if (close(fd)) { - print_error("Can't close file"); - goto failure; - } - - if (sig_len < 0) { - ksft_print_msg("Can't load signature %s. error: %s\n", - file->name, strerror(-sig_len)); - goto failure; - } - if (sig_len != file->sig.size || - memcmp(sig_buf, file->sig.data, sig_len)) { - ksft_print_msg("Signature mismatch %s.\n", - file->name); - goto failure; - } - } - - /* Final unmount */ - close(cmd_fd); - cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - return TEST_SUCCESS; - -failure: - close(cmd_fd); - free(backing_dir); - umount(mount_dir); - return TEST_FAILURE; -} - static int hash_tree_test(char *mount_dir) { char *backing_dir; @@ -2057,8 +1789,8 @@ static int hash_tree_test(char *mount_dir) build_mtree(file); res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, file->root_hash, - file->sig.data, file->sig.size, file->sig.add_data); + file->size, file->root_hash, + file->sig.add_data); if (i == corrupted_file_idx) { /* Corrupt third blocks hash */ @@ -2383,7 +2115,6 @@ int main(int argc, char *argv[]) MAKE_TEST(work_after_remount_test), MAKE_TEST(child_procs_waiting_for_data_test), MAKE_TEST(multiple_providers_test), - MAKE_TEST(signature_test), MAKE_TEST(hash_tree_test), MAKE_TEST(read_log_test), }; diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 08b8452ad0bc..3a72fa5d5e9a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -23,7 +23,8 @@ #include "utils.h" -int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms) { static const char fs_name[] = INCFS_NAME; char mount_options[512]; @@ -39,7 +40,8 @@ int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms) return result; } -int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt) { static const char fs_name[] = INCFS_NAME; int result; @@ -50,179 +52,94 @@ int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt) return result; } -int unlink_node(int fd, int parent_ino, char *filename) +struct hash_section { + uint32_t algorithm; + uint8_t log2_blocksize; + uint32_t salt_size; + /* no salt */ + uint32_t hash_size; + uint8_t hash[SHA256_DIGEST_SIZE]; +} __packed; + +struct signature_blob { + uint32_t version; + uint32_t hash_section_size; + struct hash_section hash_section; + uint32_t signing_section_size; + uint8_t signing_section[]; +} __packed; + +size_t format_signature(void **buf, const char *root_hash, const char *add_data) { - return 0; + size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; + struct signature_blob *sb = malloc(size); + + *sb = (struct signature_blob){ + .version = INCFS_SIGNATURE_VERSION, + .hash_section_size = sizeof(struct hash_section), + .hash_section = + (struct hash_section){ + .algorithm = INCFS_HASH_TREE_SHA256, + .log2_blocksize = 12, + .salt_size = 0, + .hash_size = SHA256_DIGEST_SIZE, + }, + .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, + }; + + memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); + memcpy((char *)sb->signing_section, add_data, strlen(add_data) + 1); + *buf = sb; + return size; } - -static EVP_PKEY *deserialize_private_key(const char *pem_key) -{ - BIO *bio = NULL; - EVP_PKEY *pkey = NULL; - int len = strlen(pem_key); - - bio = BIO_new_mem_buf(pem_key, len); - if (!bio) - return NULL; - - pkey = PEM_read_bio_PrivateKey(bio, NULL, NULL, NULL); - BIO_free(bio); - return pkey; -} - -static X509 *deserialize_cert(const char *pem_cert) -{ - BIO *bio = NULL; - X509 *cert = NULL; - int len = strlen(pem_cert); - - bio = BIO_new_mem_buf(pem_cert, len); - if (!bio) - return NULL; - - cert = PEM_read_bio_X509(bio, NULL, NULL, NULL); - BIO_free(bio); - return cert; -} - -bool sign_pkcs7(const void *data_to_sign, size_t data_size, - char *pkey_pem, char *cert_pem, - void **sig_ret, size_t *sig_size_ret) -{ - /* - * PKCS#7 signing flags: - * - * - PKCS7_BINARY signing binary data, so skip MIME translation - * - * - PKCS7_NOATTR omit extra authenticated attributes, such as - * SMIMECapabilities - * - * - PKCS7_PARTIAL PKCS7_sign() creates a handle only, then - * PKCS7_sign_add_signer() can add a signer later. - * This is necessary to change the message digest - * algorithm from the default of SHA-1. Requires - * OpenSSL 1.0.0 or later. - */ - int pkcs7_flags = PKCS7_BINARY | PKCS7_NOATTR | PKCS7_PARTIAL; - void *sig; - size_t sig_size; - BIO *bio = NULL; - PKCS7 *p7 = NULL; - EVP_PKEY *pkey = NULL; - X509 *cert = NULL; - bool ok = false; - - const EVP_MD *md = EVP_sha256(); - - pkey = deserialize_private_key(pkey_pem); - if (!pkey) { - printf("deserialize_private_key failed\n"); - goto out; - } - - cert = deserialize_cert(cert_pem); - if (!cert) { - printf("deserialize_cert failed\n"); - goto out; - } - - bio = BIO_new_mem_buf(data_to_sign, data_size); - if (!bio) - goto out; - - p7 = PKCS7_sign(NULL, NULL, NULL, bio, pkcs7_flags); - if (!p7) { - printf("failed to initialize PKCS#7 signature object\n"); - goto out; - } - - if (!PKCS7_sign_add_signer(p7, cert, pkey, md, pkcs7_flags)) { - printf("failed to add signer to PKCS#7 signature object\n"); - goto out; - } - - if (PKCS7_final(p7, bio, pkcs7_flags) != 1) { - printf("failed to finalize PKCS#7 signature\n"); - goto out; - } - - BIO_free(bio); - bio = BIO_new(BIO_s_mem()); - if (!bio) { - printf("out of memory\n"); - goto out; - } - - if (i2d_PKCS7_bio(bio, p7) != 1) { - printf("failed to DER-encode PKCS#7 signature object\n"); - goto out; - } - - sig_size = BIO_get_mem_data(bio, &sig); - *sig_ret = malloc(sig_size); - memcpy(*sig_ret, sig, sig_size); - *sig_size_ret = sig_size; - ok = true; -out: - PKCS7_free(p7); - BIO_free(bio); - return ok; -} - -int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, const char *root_hash, char *sig, size_t sig_size, - char *add_data) +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data) { int mode = __S_IFREG | 0555; - struct incfs_file_signature_info sig_info = { - .hash_tree_alg = root_hash - ? INCFS_HASH_TREE_SHA256 - : 0, - .root_hash = ptr_to_u64(root_hash), - .additional_data = ptr_to_u64(add_data), - .additional_data_size = strlen(add_data), - .signature = ptr_to_u64(sig), - .signature_size = sig_size, - }; + void *signature; + int error = 0; struct incfs_new_file_args args = { .size = size, .mode = mode, .file_name = ptr_to_u64(filename), .directory_path = ptr_to_u64(dir), - .signature_info = ptr_to_u64(&sig_info), .file_attr = 0, .file_attr_len = 0 }; + args.signature_size = format_signature(&signature, root_hash, add_data); + args.signature_info = ptr_to_u64(signature); + md5(filename, strlen(filename), (char *)args.file_id.bytes); - if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) - return -errno; + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) { + error = -errno; + goto out; + } *id_out = args.file_id; - return 0; + +out: + free(signature); + return error; } - -int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, char *attr) +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr) { int mode = __S_IFREG | 0555; - struct incfs_file_signature_info sig_info = { - .hash_tree_alg = 0, - .root_hash = ptr_to_u64(NULL) - }; - struct incfs_new_file_args args = { - .size = size, - .mode = mode, - .file_name = ptr_to_u64(filename), - .directory_path = ptr_to_u64(dir), - .signature_info = ptr_to_u64(&sig_info), - .file_attr = ptr_to_u64(attr), - .file_attr_len = attr ? strlen(attr) : 0 - }; + struct incfs_new_file_args args = { .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(NULL), + .signature_size = 0, + .file_attr = ptr_to_u64(attr), + .file_attr_len = + attr ? strlen(attr) : 0 }; md5(filename, strlen(filename), (char *)args.file_id.bytes); @@ -250,7 +167,7 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size) return -errno; } -loff_t get_file_size(char *name) +loff_t get_file_size(const char *name) { struct stat st; @@ -259,7 +176,7 @@ loff_t get_file_size(char *name) return -ENOENT; } -int open_commands_file(char *mount_dir) +int open_commands_file(const char *mount_dir) { char cmd_file[255]; int cmd_fd; @@ -273,7 +190,7 @@ int open_commands_file(char *mount_dir) return cmd_fd; } -int open_log_file(char *mount_dir) +int open_log_file(const char *mount_dir) { char cmd_file[255]; int cmd_fd; @@ -358,7 +275,7 @@ out: return result; } -void sha256(char *data, size_t dsize, char *hash) +void sha256(const char *data, size_t dsize, char *hash) { SHA256_CTX ctx; @@ -367,7 +284,7 @@ void sha256(char *data, size_t dsize, char *hash) SHA256_Final((unsigned char *)hash, &ctx); } -void md5(char *data, size_t dsize, char *hash) +void md5(const char *data, size_t dsize, char *hash) { MD5_CTX ctx; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 9c9ba3c5f70a..23c8a099662a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -9,6 +9,8 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#define __packed __attribute__((__packed__)) + #ifdef __LP64__ #define ptr_to_u64(p) ((__u64)p) #else @@ -17,9 +19,11 @@ #define SHA256_DIGEST_SIZE 32 -int mount_fs(char *mount_dir, char *backing_dir, int read_timeout_ms); +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms); -int mount_fs_opt(char *mount_dir, char *backing_dir, char *opt); +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt); int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); @@ -28,32 +32,26 @@ int get_file_signature(int fd, unsigned char *buf, int buf_size); int emit_node(int fd, char *filename, int *ino_out, int parent_ino, size_t size, mode_t mode, char *attr); -int emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, char *attr); +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr); -int crypto_emit_file(int fd, char *dir, char *filename, incfs_uuid_t *id_out, - size_t size, const char *root_hash, char *sig, size_t sig_size, - char *add_data); +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data); -int unlink_node(int fd, int parent_ino, char *filename); +loff_t get_file_size(const char *name); -loff_t get_file_size(char *name); +int open_commands_file(const char *mount_dir); -int open_commands_file(char *mount_dir); - -int open_log_file(char *mount_dir); +int open_log_file(const char *mount_dir); int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); char *concat_file_name(const char *dir, char *file); -void sha256(char *data, size_t dsize, char *hash); +void sha256(const char *data, size_t dsize, char *hash); -void md5(char *data, size_t dsize, char *hash); - -bool sign_pkcs7(const void *data_to_sign, size_t data_size, - char *pkey_pem, char *cert_pem, - void **sig_ret, size_t *sig_size_ret); +void md5(const char *data, size_t dsize, char *hash); int delete_dir_tree(const char *path); From cb94ec7a4ddaddd0df315a33f621f9101e7a2e6f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 11 Mar 2020 15:21:20 -0700 Subject: [PATCH 3615/3715] ANDROID: Incremental fs: Add INCFS_IOC_PERMIT_FILL Provide a securable way to open a file for filling Test: incfs_test passes Bug: 138149732 Signed-off-by: Paul Lawrence Change-Id: Ib4b6fd839ad30ce08e31121d19e2c0d7066d302f --- fs/incfs/vfs.c | 62 +++++++++++++++- include/uapi/linux/incrementalfs.h | 23 +++++- .../selftests/filesystems/incfs/incfs_test.c | 73 ++++++++++++++++--- 3 files changed, 143 insertions(+), 15 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index aae918963a76..f911c5eb9290 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -135,6 +135,11 @@ static const struct file_operations incfs_file_ops = { .compat_ioctl = dispatch_ioctl }; +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + static const struct file_operations incfs_pending_read_file_ops = { .read = pending_reads_read, .poll = pending_reads_poll, @@ -1284,6 +1289,9 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (!df) return -EBADF; + if ((uintptr_t)f->private_data != CAN_FILL) + return -EPERM; + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) return -EFAULT; @@ -1336,6 +1344,53 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) return i; } +static long ioctl_permit_fill(struct file *f, void __user *arg) +{ + struct incfs_permit_fill __user *usr_permit_fill = arg; + struct incfs_permit_fill permit_fill; + long error = 0; + struct file *file = 0; + + if (f->f_op != &incfs_pending_read_file_ops) + return -EPERM; + + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) + return -EFAULT; + + file = fget(permit_fill.file_descriptor); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (file->f_op != &incfs_file_ops) { + error = -EPERM; + goto out; + } + + if (file->f_inode->i_sb != f->f_inode->i_sb) { + error = -EPERM; + goto out; + } + + switch ((uintptr_t)file->private_data) { + case CANT_FILL: + file->private_data = (void *)CAN_FILL; + break; + + case CAN_FILL: + pr_debug("CAN_FILL already set"); + break; + + default: + pr_warn("Invalid file private data"); + error = -EFAULT; + goto out; + } + +out: + fput(file); + return error; +} + static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1393,6 +1448,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) return ioctl_create_file(mi, (void __user *)arg); case INCFS_IOC_FILL_BLOCKS: return ioctl_fill_blocks(f, (void __user *)arg); + case INCFS_IOC_PERMIT_FILL: + return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); default: @@ -1823,9 +1880,10 @@ static int file_open(struct inode *inode, struct file *file) goto out; } - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { err = make_inode_ready_for_data_ops(mi, inode, backing_file); - else if (S_ISDIR(inode->i_mode)) { + file->private_data = (void *)CANT_FILL; + } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = NULL; dir = incfs_open_dir_file(mi, backing_file); diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 2efc53f591ef..fd65f575cdf0 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -51,13 +51,23 @@ _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) /* - * Fill in one or more data block + * Fill in one or more data block. This may only be called on a handle + * passed as a parameter to INCFS_IOC_PERMIT_FILLING * * Returns number of blocks filled in, or error if none were */ #define INCFS_IOC_FILL_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Returns 0 on success or error + */ +#define INCFS_IOC_PERMIT_FILL \ + _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) + enum incfs_compression_alg { COMPRESSION_NONE = 0, COMPRESSION_LZ4 = 1 @@ -139,6 +149,17 @@ struct incfs_fill_blocks { __aligned_u64 fill_blocks; }; +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Argument for INCFS_IOC_PERMIT_FILL + */ +struct incfs_permit_fill { + /* File to permit fills on */ + __u32 file_descriptor; +}; + enum incfs_hash_tree_algorithm { INCFS_HASH_TREE_NONE = 0, INCFS_HASH_TREE_SHA256 = 1 diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 1cd1226f4e44..7031561c0173 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -204,15 +204,43 @@ static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) return strdup(path); } -int open_file_by_id(char *mnt_dir, incfs_uuid_t id) +int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) { char *path = get_index_filename(mnt_dir, id); + int cmd_fd = open_commands_file(mnt_dir); int fd = open(path, O_RDWR); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int error = 0; - free(path); if (fd < 0) { print_error("Can't open file by id."); + error = -errno; + goto out; + } + + if (use_ioctl && ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + print_error("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || + errno != EPERM) { + print_error( + "Successfully called PERMIT_FILL on non pending_read file"); return -errno; + goto out; + } + +out: + free(path); + close(cmd_fd); + + if (error) { + close(fd); + return error; } return fd; @@ -258,12 +286,6 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, int i = 0; int blocks_written = 0; - fd = open_file_by_id(mnt_dir, file->id); - if (fd <= 0) { - error = -errno; - goto out; - } - for (i = 0; i < block_count; i++) { int block_index = blocks[i]; bool compress = (file->index + block_index) % 2 == 0; @@ -315,6 +337,24 @@ static int emit_test_blocks(char *mnt_dir, struct test_file *file, } if (!error) { + fd = open_file_by_id(mnt_dir, file->id, false); + if (fd < 0) { + error = -errno; + goto out; + } + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + if (write_res >= 0) { + ksft_print_msg("Wrote to file via normal fd error\n"); + error = -EPERM; + goto out; + } + + close(fd); + fd = open_file_by_id(mnt_dir, file->id, true); + if (fd < 0) { + error = -errno; + goto out; + } write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); if (write_res < 0) error = -errno; @@ -706,7 +746,6 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) int err; int i; int fd; - char *file_path; struct incfs_fill_blocks fill_blocks = { .count = file->mtree_block_count, }; @@ -729,9 +768,7 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) }; } - file_path = concat_file_name(mount_dir, file->name); - fd = open(file_path, O_RDWR); - free(file_path); + fd = open_file_by_id(mount_dir, file->id, false); if (fd < 0) { err = errno; goto failure; @@ -739,7 +776,19 @@ static int load_hash_tree(const char *mount_dir, struct test_file *file) err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); close(fd); + if (err >= 0) { + err = -EPERM; + goto failure; + } + fd = open_file_by_id(mount_dir, file->id, true); + if (fd < 0) { + err = errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); if (err < fill_blocks.count) err = errno; else { From ffee24cf3053e942fc694ed52d79dbda6e6c01c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Dec 2018 08:47:44 -0800 Subject: [PATCH 3616/3715] phy: Revert toggling reset changes. commit 7b566f70e1bf65b189b66eb3de6f431c30f7dff2 upstream. This reverts: ef1b5bf506b1 ("net: phy: Fix not to call phy_resume() if PHY is not attached") 8c85f4b81296 ("net: phy: micrel: add toggling phy reset if PHY is not attached") Andrew Lunn informs me that there are alternative efforts underway to fix this more properly. Signed-off-by: David S. Miller [just take the ef1b5bf506b1 revert - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 99dae55cd334..a98c227a4c2e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -76,7 +76,7 @@ static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); #ifdef CONFIG_PM -static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) +static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); @@ -88,11 +88,10 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) /* PHY not attached? May suspend if the PHY has not already been * suspended as part of a prior call to phy_disconnect() -> * phy_detach() -> phy_suspend() because the parent netdev might be the - * MDIO bus driver and clock gated at this point. Also may resume if - * PHY is not attached. + * MDIO bus driver and clock gated at this point. */ if (!netdev) - return suspend ? !phydev->suspended : phydev->suspended; + return !phydev->suspended; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -122,7 +121,7 @@ static int mdio_bus_phy_suspend(struct device *dev) if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); - if (!mdio_bus_phy_may_suspend(phydev, true)) + if (!mdio_bus_phy_may_suspend(phydev)) return 0; return phy_suspend(phydev); @@ -133,7 +132,7 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev, false)) + if (!mdio_bus_phy_may_suspend(phydev)) goto no_resume; ret = phy_resume(phydev); From 139fe35030c33abc54e0cee38a0c50388a38cdd9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Feb 2020 15:34:53 -0800 Subject: [PATCH 3617/3715] net: phy: Avoid multiple suspends commit 503ba7c6961034ff0047707685644cad9287c226 upstream. It is currently possible for a PHY device to be suspended as part of a network device driver's suspend call while it is still being attached to that net_device, either via phy_suspend() or implicitly via phy_stop(). Later on, when the MDIO bus controller get suspended, we would attempt to suspend again the PHY because it is still attached to a network device. This is both a waste of time and creates an opportunity for improper clock/power management bugs to creep in. Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a98c227a4c2e..31ef3e47edf6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -91,7 +91,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -106,7 +106,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) From 81ab17d70bfbcebc328556ab9896b3015af76c60 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 17:45:57 +0300 Subject: [PATCH 3618/3715] cgroup, netclassid: periodically release file_lock on classid updating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 018d26fcd12a75fb9b5fe233762aa3f2f0854b88 ] In our production environment we have faced with problem that updating classid in cgroup with heavy tasks cause long freeze of the file tables in this tasks. By heavy tasks we understand tasks with many threads and opened sockets (e.g. balancers). This freeze leads to an increase number of client timeouts. This patch implements following logic to fix this issue: аfter iterating 1000 file descriptors file table lock will be released thus providing a time gap for socket creation/deletion. Now update is non atomic and socket may be skipped using calls: dup2(oldfd, newfd); close(oldfd); But this case is not typical. Moreover before this patch skip is possible too by hiding socket fd in unix socket buffer. New sockets will be allocated with updated classid because cgroup state is updated before start of the file descriptors iteration. So in common cases this patch has no side effects. Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/netclassid_cgroup.c | 47 ++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 7bf833598615..67feeb207dad 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -57,30 +57,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -102,10 +132,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); From 31b4b975e3ab80af34d9dba33664e7cd01274e94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2020 22:05:14 -0800 Subject: [PATCH 3619/3715] gre: fix uninit-value in __iptunnel_pull_header [ Upstream commit 17c25cafd4d3e74c83dce56b158843b19c40b414 ] syzbot found an interesting case of the kernel reading an uninit-value [1] Problem is in the handling of ETH_P_WCCP in gre_parse_header() We look at the byte following GRE options to eventually decide if the options are four bytes longer. Use skb_header_pointer() to not pull bytes if we found that no more bytes were needed. All callers of gre_parse_header() are properly using pskb_may_pull() anyway before proceeding to next header. [1] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2303 [inline] BUG: KMSAN: uninit-value in __iptunnel_pull_header+0x30c/0xbd0 net/ipv4/ip_tunnel_core.c:94 CPU: 1 PID: 11784 Comm: syz-executor940 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 pskb_may_pull include/linux/skbuff.h:2303 [inline] __iptunnel_pull_header+0x30c/0xbd0 net/ipv4/ip_tunnel_core.c:94 iptunnel_pull_header include/net/ip_tunnels.h:411 [inline] gre_rcv+0x15e/0x19c0 net/ipv6/ip6_gre.c:606 ip6_protocol_deliver_rcu+0x181b/0x22c0 net/ipv6/ip6_input.c:432 ip6_input_finish net/ipv6/ip6_input.c:473 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip6_input net/ipv6/ip6_input.c:482 [inline] ip6_mc_input+0xdf2/0x1460 net/ipv6/ip6_input.c:576 dst_input include/net/dst.h:442 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:76 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ipv6_rcv+0x683/0x710 net/ipv6/ip6_input.c:306 __netif_receive_skb_one_core net/core/dev.c:5198 [inline] __netif_receive_skb net/core/dev.c:5312 [inline] netif_receive_skb_internal net/core/dev.c:5402 [inline] netif_receive_skb+0x66b/0xf20 net/core/dev.c:5461 tun_rx_batched include/linux/skbuff.h:4321 [inline] tun_get_user+0x6aef/0x6f60 drivers/net/tun.c:1997 tun_chr_write_iter+0x1f2/0x360 drivers/net/tun.c:2026 call_write_iter include/linux/fs.h:1901 [inline] new_sync_write fs/read_write.c:483 [inline] __vfs_write+0xa5a/0xca0 fs/read_write.c:496 vfs_write+0x44a/0x8f0 fs/read_write.c:558 ksys_write+0x267/0x450 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:620 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f62d99 Code: 90 e8 0b 00 00 00 f3 90 0f ae e8 eb f9 8d 74 26 00 89 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000fffedb2c EFLAGS: 00000217 ORIG_RAX: 0000000000000004 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000020002580 RDX: 0000000000000fca RSI: 0000000000000036 RDI: 0000000000000004 RBP: 0000000000008914 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] alloc_skb_with_frags+0x18c/0xa70 net/core/skbuff.c:5766 sock_alloc_send_pskb+0xada/0xc60 net/core/sock.c:2242 tun_alloc_skb drivers/net/tun.c:1529 [inline] tun_get_user+0x10ae/0x6f60 drivers/net/tun.c:1843 tun_chr_write_iter+0x1f2/0x360 drivers/net/tun.c:2026 call_write_iter include/linux/fs.h:1901 [inline] new_sync_write fs/read_write.c:483 [inline] __vfs_write+0xa5a/0xca0 fs/read_write.c:496 vfs_write+0x44a/0x8f0 fs/read_write.c:558 ksys_write+0x267/0x450 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:620 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 Fixes: 95f5c64c3c13 ("gre: Move utility functions to common headers") Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/gre_demux.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7efe740c06eb..4a5e55e94a9e 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,7 +60,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -114,8 +116,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; From 4dcb3398fbb14f613eda33b770f641b273ef8f69 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 10 Mar 2020 15:27:37 +0800 Subject: [PATCH 3620/3715] ipv6/addrconf: call ipv6_mc_up() for non-Ethernet interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 60380488e4e0b95e9e82aa68aa9705baa86de84c ] Rafał found an issue that for non-Ethernet interface, if we down and up frequently, the memory will be consumed slowly. The reason is we add allnodes/allrouters addressed in multicast list in ipv6_add_dev(). When link down, we call ipv6_mc_down(), store all multicast addresses via mld_add_delrec(). But when link up, we don't call ipv6_mc_up() for non-Ethernet interface to remove the addresses. This makes idev->mc_tomb getting bigger and bigger. The call stack looks like: addrconf_notify(NETDEV_REGISTER) ipv6_add_dev ipv6_dev_mc_inc(ff01::1) ipv6_dev_mc_inc(ff02::1) ipv6_dev_mc_inc(ff02::2) addrconf_notify(NETDEV_UP) addrconf_dev_config /* Alas, we support only Ethernet autoconfiguration. */ return; addrconf_notify(NETDEV_DOWN) addrconf_ifdown ipv6_mc_down igmp6_group_dropped(ff02::2) mld_add_delrec(ff02::2) igmp6_group_dropped(ff02::1) igmp6_group_dropped(ff01::1) After investigating, I can't found a rule to disable multicast on non-Ethernet interface. In RFC2460, the link could be Ethernet, PPP, ATM, tunnels, etc. In IPv4, it doesn't check the dev type when calls ip_mc_up() in inetdev_event(). Even for IPv6, we don't check the dev type and call ipv6_add_dev(), ipv6_dev_mc_inc() after register device. So I think it's OK to fix this memory consumer by calling ipv6_mc_up() for non-Ethernet interface. v2: Also check IFF_MULTICAST flag to make sure the interface supports multicast Reported-by: Rafał Miłecki Tested-by: Rafał Miłecki Fixes: 74235a25c673 ("[IPV6] addrconf: Fix IPv6 on tuntap tunnels") Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a81201dd3a1a..092e72d6a61d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3223,6 +3223,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } From 6c5251993d38c59521b645a29eb2479957ba512f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:57:02 -0700 Subject: [PATCH 3621/3715] ipvlan: add cond_resched_rcu() while processing muticast backlog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e18b353f102e371580f3f01dd47567a25acc3c1d ] If there are substantial number of slaves created as simulated by Syzbot, the backlog processing could take much longer and result into the issue found in the Syzbot report. INFO: rcu_sched detected stalls on CPUs/tasks: (detected by 1, t=10502 jiffies, g=5049, c=5048, q=752) All QSes seen, last rcu_sched kthread activity 10502 (4294965563-4294955061), jiffies_till_next_fqs=1, root ->qsmask 0x0 syz-executor.1 R running task on cpu 1 10984 11210 3866 0x30020008 179034491270 Call Trace: [] _sched_show_task kernel/sched/core.c:8063 [inline] [] _sched_show_task.cold+0x2fd/0x392 kernel/sched/core.c:8030 [] sched_show_task+0xb/0x10 kernel/sched/core.c:8073 [] print_other_cpu_stall kernel/rcu/tree.c:1577 [inline] [] check_cpu_stall kernel/rcu/tree.c:1695 [inline] [] __rcu_pending kernel/rcu/tree.c:3478 [inline] [] rcu_pending kernel/rcu/tree.c:3540 [inline] [] rcu_check_callbacks.cold+0xbb4/0xc29 kernel/rcu/tree.c:2876 [] update_process_times+0x32/0x80 kernel/time/timer.c:1635 [] tick_sched_handle+0xa0/0x180 kernel/time/tick-sched.c:161 [] tick_sched_timer+0x44/0x130 kernel/time/tick-sched.c:1193 [] __run_hrtimer kernel/time/hrtimer.c:1393 [inline] [] __hrtimer_run_queues+0x307/0xd90 kernel/time/hrtimer.c:1455 [] hrtimer_interrupt+0x2ea/0x730 kernel/time/hrtimer.c:1513 [] local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1031 [inline] [] smp_apic_timer_interrupt+0x144/0x5e0 arch/x86/kernel/apic/apic.c:1056 [] apic_timer_interrupt+0x8e/0xa0 arch/x86/entry/entry_64.S:778 RIP: 0010:do_raw_read_lock+0x22/0x80 kernel/locking/spinlock_debug.c:153 RSP: 0018:ffff8801dad07ab8 EFLAGS: 00000a02 ORIG_RAX: ffffffffffffff12 RAX: 0000000000000000 RBX: ffff8801c4135680 RCX: 0000000000000000 RDX: 1ffff10038826afe RSI: ffff88019d816bb8 RDI: ffff8801c41357f0 RBP: ffff8801dad07ac0 R08: 0000000000004b15 R09: 0000000000310273 R10: ffff88019d816bb8 R11: 0000000000000001 R12: ffff8801c41357e8 R13: 0000000000000000 R14: ffff8801cfb19850 R15: ffff8801cfb198b0 [] __raw_read_lock_bh include/linux/rwlock_api_smp.h:177 [inline] [] _raw_read_lock_bh+0x3e/0x50 kernel/locking/spinlock.c:240 [] ipv6_chk_mcast_addr+0x11a/0x6f0 net/ipv6/mcast.c:1006 [] ip6_mc_input+0x319/0x8e0 net/ipv6/ip6_input.c:482 [] dst_input include/net/dst.h:449 [inline] [] ip6_rcv_finish+0x408/0x610 net/ipv6/ip6_input.c:78 [] NF_HOOK include/linux/netfilter.h:292 [inline] [] NF_HOOK include/linux/netfilter.h:286 [inline] [] ipv6_rcv+0x10e/0x420 net/ipv6/ip6_input.c:278 [] __netif_receive_skb_one_core+0x12a/0x1f0 net/core/dev.c:5303 [] __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:5417 [] process_backlog+0x216/0x6c0 net/core/dev.c:6243 [] napi_poll net/core/dev.c:6680 [inline] [] net_rx_action+0x47b/0xfb0 net/core/dev.c:6748 [] __do_softirq+0x2c8/0x99a kernel/softirq.c:317 [] invoke_softirq kernel/softirq.c:399 [inline] [] irq_exit+0x16a/0x1a0 kernel/softirq.c:439 [] exiting_irq arch/x86/include/asm/apic.h:561 [inline] [] smp_apic_timer_interrupt+0x165/0x5e0 arch/x86/kernel/apic/apic.c:1058 [] apic_timer_interrupt+0x8e/0xa0 arch/x86/entry/entry_64.S:778 RIP: 0010:__sanitizer_cov_trace_pc+0x26/0x50 kernel/kcov.c:102 RSP: 0018:ffff880196033bd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff12 RAX: ffff88019d8161c0 RBX: 00000000ffffffff RCX: ffffc90003501000 RDX: 0000000000000002 RSI: ffffffff816236d1 RDI: 0000000000000005 RBP: ffff880196033bd8 R08: ffff88019d8161c0 R09: 0000000000000000 R10: 1ffff10032c067f0 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000080 R14: 0000000000000000 R15: 0000000000000000 [] do_futex+0x151/0x1d50 kernel/futex.c:3548 [] C_SYSC_futex kernel/futex_compat.c:201 [inline] [] compat_SyS_futex+0x270/0x3b0 kernel/futex_compat.c:175 [] do_syscall_32_irqs_on arch/x86/entry/common.c:353 [inline] [] do_fast_syscall_32+0x357/0xe1c arch/x86/entry/common.c:415 [] entry_SYSENTER_compat+0x8b/0x9d arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f23c69 RSP: 002b:00000000f5d1f12c EFLAGS: 00000282 ORIG_RAX: 00000000000000f0 RAX: ffffffffffffffda RBX: 000000000816af88 RCX: 0000000000000080 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000816af8c RBP: 00000000f5d1f228 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 rcu_sched kthread starved for 10502 jiffies! g5049 c5048 f0x2 RCU_GP_WAIT_FQS(3) ->state=0x0 ->cpu=1 rcu_sched R running task on cpu 1 13048 8 2 0x90000000 179099587640 Call Trace: [] context_switch+0x60f/0xa60 kernel/sched/core.c:3209 [] __schedule+0x5aa/0x1da0 kernel/sched/core.c:3934 [] schedule+0x8f/0x1b0 kernel/sched/core.c:4011 [] schedule_timeout+0x50d/0xee0 kernel/time/timer.c:1803 [] rcu_gp_kthread+0xda1/0x3b50 kernel/rcu/tree.c:2327 [] kthread+0x348/0x420 kernel/kthread.c:246 [] ret_from_fork+0x56/0x70 arch/x86/entry/entry_64.S:393 Fixes: ba35f8588f47 (“ipvlan: Defer multicast / broadcast processing to a work-queue”) Signed-off-by: Mahesh Bandewar Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 71ff6bd4be9f..91886b5323df 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -240,6 +240,7 @@ void ipvlan_process_multicast(struct work_struct *work) } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); + cond_resched_rcu(); } rcu_read_unlock(); From 7c315855c6f490d0bb70bc38a85b536011b9bd82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Mar 2020 18:22:58 -0700 Subject: [PATCH 3622/3715] ipvlan: do not use cond_resched_rcu() in ipvlan_process_multicast() [ Upstream commit afe207d80a61e4d6e7cfa0611a4af46d0ba95628 ] Commit e18b353f102e ("ipvlan: add cond_resched_rcu() while processing muticast backlog") added a cond_resched_rcu() in a loop using rcu protection to iterate over slaves. This is breaking rcu rules, so lets instead use cond_resched() at a point we can reschedule Fixes: e18b353f102e ("ipvlan: add cond_resched_rcu() while processing muticast backlog") Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 91886b5323df..1d97d6958e4b 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -240,7 +240,6 @@ void ipvlan_process_multicast(struct work_struct *work) } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); - cond_resched_rcu(); } rcu_read_unlock(); @@ -257,6 +256,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } From d5f90b1703867035e4b80014d3341a65cf6d8f31 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Feb 2020 19:47:34 +0100 Subject: [PATCH 3623/3715] netlink: Use netlink header as base to calculate bad attribute offset [ Upstream commit 84b3268027641401bb8ad4427a90a3cce2eb86f5 ] Userspace might send a batch that is composed of several netlink messages. The netlink_ack() function must use the pointer to the netlink header as base to calculate the bad attribute offset. Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting") Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 07924559cb10..3e4e07559272 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2389,7 +2389,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, From 3f9e0b25fc13589071f93642b6de033e4792ddf0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 10 Mar 2020 18:22:24 +0300 Subject: [PATCH 3624/3715] net: macsec: update SCI upon MAC address change. [ Upstream commit 6fc498bc82929ee23aa2f35a828c6178dfd3f823 ] SCI should be updated, because it contains MAC in its first 6 octets. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Dmitry Bogdanov Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9bb65e0af7dd..ed2cb3ac578a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2871,6 +2871,11 @@ static void macsec_dev_set_rx_mode(struct net_device *dev) dev_uc_sync(real_dev, dev); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2892,6 +2897,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: ether_addr_copy(dev->dev_addr, addr->sa_data); + macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); return 0; } @@ -3159,11 +3165,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); From ac07a9a4de593a9a3d94aa92f98e09f979c9eb03 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Mar 2020 17:24:31 +0300 Subject: [PATCH 3625/3715] net: nfc: fix bounds checking bugs on "pipe" [ Upstream commit a3aefbfe45751bf7b338c181b97608e276b5bb73 ] This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands") and commit d7ee81ad09f0 ("NFC: nci: Add some bounds checking in nci_hci_cmd_received()") which added range checks on "pipe". The "pipe" variable comes skb->data[0] in nfc_hci_msg_rx_work(). It's in the 0-255 range. We're using it as the array index into the hdev->pipes[] array which has NFC_HCI_MAX_PIPES (128) members. Fixes: 118278f20aa8 ("NFC: hci: Add pipes table to reference them with a tuple {gate, host}") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/hci/core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6bf14f4f4b42..ae315dbd3732 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -193,13 +193,20 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -387,8 +394,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; From db159fd8e2c71257e988612ab18e367a672f0243 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Mar 2020 11:34:35 -0400 Subject: [PATCH 3626/3715] net/packet: tpacket_rcv: do not increment ring index on drop [ Upstream commit 46e4c421a053c36bf7a33dda2272481bcaf3eed3 ] In one error case, tpacket_rcv drops packets after incrementing the ring producer index. If this happens, it does not update tp_status to TP_STATUS_USER and thus the reader is stalled for an iteration of the ring, causing out of order arrival. The only such error path is when virtio_net_hdr_from_skb fails due to encountering an unknown GSO type. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 46b7fac82775..387589a4a340 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2299,6 +2299,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2311,12 +2318,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; From 9c7cef12efca42f66c494d8e2c63dc7a76cb46ae Mon Sep 17 00:00:00 2001 From: You-Sheng Yang Date: Wed, 26 Feb 2020 23:37:10 +0800 Subject: [PATCH 3627/3715] r8152: check disconnect status after long sleep [ Upstream commit d64c7a08034b32c285e576208ae44fc3ba3fa7df ] Dell USB Type C docking WD19/WD19DC attaches additional peripherals as: /: Bus 02.Port 1: Dev 1, Class=root_hub, Driver=xhci_hcd/6p, 5000M |__ Port 1: Dev 11, If 0, Class=Hub, Driver=hub/4p, 5000M |__ Port 3: Dev 12, If 0, Class=Hub, Driver=hub/4p, 5000M |__ Port 4: Dev 13, If 0, Class=Vendor Specific Class, Driver=r8152, 5000M where usb 2-1-3 is a hub connecting all USB Type-A/C ports on the dock. When hotplugging such dock with additional usb devices already attached on it, the probing process may reset usb 2.1 port, therefore r8152 ethernet device is also reset. However, during r8152 device init there are several for-loops that, when it's unable to retrieve hardware registers due to being disconnected from USB, may take up to 14 seconds each in practice, and that has to be completed before USB may re-enumerate devices on the bus. As a result, devices attached to the dock will only be available after nearly 1 minute after the dock was plugged in: [ 216.388290] [250] r8152 2-1.4:1.0: usb_probe_interface [ 216.388292] [250] r8152 2-1.4:1.0: usb_probe_interface - got id [ 258.830410] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): PHY not ready [ 258.830460] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): Invalid header when reading pass-thru MAC addr [ 258.830464] r8152 2-1.4:1.0 (unnamed net_device) (uninitialized): Get ether addr fail This happens in, for example, r8153_init: static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, void *data, u16 type) { if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; ... } static u16 ocp_read_word(struct r8152 *tp, u16 type, u16 index) { u32 data; ... generic_ocp_read(tp, index, sizeof(tmp), &tmp, type | byen); data = __le32_to_cpu(tmp); ... return (u16)data; } static void r8153_init(struct r8152 *tp) { ... if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; for (i = 0; i < 500; i++) { if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; msleep(20); } ... } Since ocp_read_word() doesn't check the return status of generic_ocp_read(), and the only exit condition for the loop is to have a match in the returned value, such loops will only ends after exceeding its maximum runs when the device has been marked as disconnected, which takes 500 * 20ms = 10 seconds in theory, 14 in practice. To solve this long latency another test to RTL8152_UNPLUG flag should be added after those 20ms sleep to skip unnecessary loops, so that the device probe can complete early and proceed to parent port reset/reprobe process. This can be reproduced on all kernel versions up to latest v5.6-rc2, but after v5.5-rc7 the reproduce rate is dramatically lowered to 1/30 or less while it was around 1/2. Signed-off-by: You-Sheng Yang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a7f9c1886bd4..cadf5ded45a9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2696,6 +2696,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -4055,7 +4057,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -4170,7 +4175,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); From e27f53b37d55635cd8e4eb9bd31d52998ed016d0 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 9 Mar 2020 18:16:24 +0000 Subject: [PATCH 3628/3715] sfc: detach from cb_page in efx_copy_channel() [ Upstream commit 4b1bd9db078f7d5332c8601a2f5bd43cf0458fd4 ] It's a resource, not a parameter, so we can't copy it into the new channel's TX queues, otherwise aliasing will lead to resource- management bugs if the channel is subsequently torn down without being initialised. Before the Fixes:-tagged commit there was a similar bug with tsoh_page, but I'm not sure it's worth doing another fix for such old kernels. Fixes: e9117e5099ea ("sfc: Firmware-Assisted TSO version 2") Suggested-by: Derek Shute Signed-off-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/efx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..e0d4c1e850cf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -505,6 +505,7 @@ efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } From e69c7ad48309cecc7f9d708853e5e48b90be1683 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 1 Mar 2020 22:07:17 -0500 Subject: [PATCH 3629/3715] bnxt_en: reinitialize IRQs when MTU is modified [ Upstream commit a9b952d267e59a3b405e644930f46d252cea7122 ] MTU changes may affect the number of IRQs so we must call bnxt_close_nic()/bnxt_open_nic() with the irq_re_init parameter set to true. The reason is that a larger MTU may require aggregation rings not needed with smaller MTU. We may not be able to allocate the required number of aggregation rings and so we reduce the number of channels which will change the number of IRQs. Without this patch, it may crash eventually in pci_disable_msix() when the IRQs are not properly unwound. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 41bc7820d2dd..5163da01e54f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7310,13 +7310,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } From 944f7205341501a8135daee53b4b959af132de0a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 9 Mar 2020 22:16:05 -0700 Subject: [PATCH 3630/3715] cgroup: memcg: net: do not associate sock with unrelated cgroup [ Upstream commit e876ecc67db80dfdb8e237f71e5b43bb88ae549c ] We are testing network memory accounting in our setup and noticed inconsistent network memory usage and often unrelated cgroups network usage correlates with testing workload. On further inspection, it seems like mem_cgroup_sk_alloc() and cgroup_sk_alloc() are broken in irq context specially for cgroup v1. mem_cgroup_sk_alloc() and cgroup_sk_alloc() can be called in irq context and kind of assumes that this can only happen from sk_clone_lock() and the source sock object has already associated cgroup. However in cgroup v1, where network memory accounting is opt-in, the source sock can be unassociated with any cgroup and the new cloned sock can get associated with unrelated interrupted cgroup. Cgroup v2 can also suffer if the source sock object was created by process in the root cgroup or if sk_alloc() is called in irq context. The fix is to just do nothing in interrupt. WARNING: Please note that about half of the TCP sockets are allocated from the IRQ context, so, memory used by such sockets will not be accouted by the memcg. The stack trace of mem_cgroup_sk_alloc() from IRQ-context: CPU: 70 PID: 12720 Comm: ssh Tainted: 5.6.0-smp-DEV #1 Hardware name: ... Call Trace: dump_stack+0x57/0x75 mem_cgroup_sk_alloc+0xe9/0xf0 sk_clone_lock+0x2a7/0x420 inet_csk_clone_lock+0x1b/0x110 tcp_create_openreq_child+0x23/0x3b0 tcp_v6_syn_recv_sock+0x88/0x730 tcp_check_req+0x429/0x560 tcp_v6_rcv+0x72d/0xa40 ip6_protocol_deliver_rcu+0xc9/0x400 ip6_input+0x44/0xd0 ? ip6_protocol_deliver_rcu+0x400/0x400 ip6_rcv_finish+0x71/0x80 ipv6_rcv+0x5b/0xe0 ? ip6_sublist_rcv+0x2e0/0x2e0 process_backlog+0x108/0x1e0 net_rx_action+0x26b/0x460 __do_softirq+0x104/0x2a6 do_softirq_own_stack+0x2a/0x40 do_softirq.part.19+0x40/0x50 __local_bh_enable_ip+0x51/0x60 ip6_finish_output2+0x23d/0x520 ? ip6table_mangle_hook+0x55/0x160 __ip6_finish_output+0xa1/0x100 ip6_finish_output+0x30/0xd0 ip6_output+0x73/0x120 ? __ip6_finish_output+0x100/0x100 ip6_xmit+0x2e3/0x600 ? ipv6_anycast_cleanup+0x50/0x50 ? inet6_csk_route_socket+0x136/0x1e0 ? skb_free_head+0x1e/0x30 inet6_csk_xmit+0x95/0xf0 __tcp_transmit_skb+0x5b4/0xb20 __tcp_send_ack.part.60+0xa3/0x110 tcp_send_ack+0x1d/0x20 tcp_rcv_state_process+0xe64/0xe80 ? tcp_v6_connect+0x5d1/0x5f0 tcp_v6_do_rcv+0x1b1/0x3f0 ? tcp_v6_do_rcv+0x1b1/0x3f0 __release_sock+0x7f/0xd0 release_sock+0x30/0xa0 __inet_stream_connect+0x1c3/0x3b0 ? prepare_to_wait+0xb0/0xb0 inet_stream_connect+0x3b/0x60 __sys_connect+0x101/0x120 ? __sys_getsockopt+0x11b/0x140 __x64_sys_connect+0x1a/0x20 do_syscall_64+0x51/0x200 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The stack trace of mem_cgroup_sk_alloc() from IRQ-context: Fixes: 2d7580738345 ("mm: memcontrol: consolidate cgroup socket tracking") Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 4 ++++ mm/memcontrol.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 829943aad7be..1e727fbaa0e4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5799,6 +5799,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 326525a97c47..81400be03dcb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5881,6 +5881,10 @@ void mem_cgroup_sk_alloc(struct sock *sk) return; } + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) + return; + rcu_read_lock(); memcg = mem_cgroup_from_task(current); if (memcg == root_mem_cgroup) From 357ac1da6e4d1dd02c1437b56ca4c8e21f652b5d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 9 Mar 2020 22:16:06 -0700 Subject: [PATCH 3631/3715] net: memcg: late association of sock to memcg [ Upstream commit d752a4986532cb6305dfd5290a614cde8072769d ] If a TCP socket is allocated in IRQ context or cloned from unassociated (i.e. not associated to a memcg) in IRQ context then it will remain unassociated for its whole life. Almost half of the TCPs created on the system are created in IRQ context, so, memory used by such sockets will not be accounted by the memcg. This issue is more widespread in cgroup v1 where network memory accounting is opt-in but it can happen in cgroup v2 if the source socket for the cloning was created in root memcg. To fix the issue, just do the association of the sockets at the accept() time in the process context and then force charge the memory buffer already used and reserved by the socket. Signed-off-by: Shakeel Butt Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 14 -------------- net/core/sock.c | 5 ++++- net/ipv4/inet_connection_sock.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 81400be03dcb..5d6ebd1449f0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5867,20 +5867,6 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); - return; - } - /* Do not associate the sock with unrelated interrupted task's memcg. */ if (in_interrupt()) return; diff --git a/net/core/sock.c b/net/core/sock.c index 03ca2f638eb4..d2cb2051d045 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1684,7 +1684,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index da55ce62fe50..c786f81952f1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,6 +475,26 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + + if (mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * sk->sk_memcg. + */ + lock_sock(newsk); + + /* The sk has not been accepted yet, no need to look at + * sk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&sk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } out: release_sock(sk); if (req) From 69b1fc5ce9dd58e890a6549197d6cbadcfa257cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2020 11:44:26 -0700 Subject: [PATCH 3632/3715] net: memcg: fix lockdep splat in inet_csk_accept() commit 06669ea346e476a5339033d77ef175566a40efbb upstream. Locking newsk while still holding the listener lock triggered a lockdep splat [1] We can simply move the memcg code after we release the listener lock, as this can also help if multiple threads are sharing a common listener. Also fix a typo while reading socket sk_rmem_alloc. [1] WARNING: possible recursive locking detected 5.6.0-rc3-syzkaller #0 Not tainted -------------------------------------------- syz-executor598/9524 is trying to acquire lock: ffff88808b5b8b90 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] ffff88808b5b8b90 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x69f/0xd30 net/ipv4/inet_connection_sock.c:492 but task is already holding lock: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x8d/0xd30 net/ipv4/inet_connection_sock.c:445 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(sk_lock-AF_INET6); lock(sk_lock-AF_INET6); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by syz-executor598/9524: #0: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: lock_sock include/net/sock.h:1541 [inline] #0: ffff88808b5b9590 (sk_lock-AF_INET6){+.+.}, at: inet_csk_accept+0x8d/0xd30 net/ipv4/inet_connection_sock.c:445 stack backtrace: CPU: 0 PID: 9524 Comm: syz-executor598 Not tainted 5.6.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 print_deadlock_bug kernel/locking/lockdep.c:2370 [inline] check_deadlock kernel/locking/lockdep.c:2411 [inline] validate_chain kernel/locking/lockdep.c:2954 [inline] __lock_acquire.cold+0x114/0x288 kernel/locking/lockdep.c:3954 lock_acquire+0x197/0x420 kernel/locking/lockdep.c:4484 lock_sock_nested+0xc5/0x110 net/core/sock.c:2947 lock_sock include/net/sock.h:1541 [inline] inet_csk_accept+0x69f/0xd30 net/ipv4/inet_connection_sock.c:492 inet_accept+0xe9/0x7c0 net/ipv4/af_inet.c:734 __sys_accept4_file+0x3ac/0x5b0 net/socket.c:1758 __sys_accept4+0x53/0x90 net/socket.c:1809 __do_sys_accept4 net/socket.c:1821 [inline] __se_sys_accept4 net/socket.c:1818 [inline] __x64_sys_accept4+0x93/0xf0 net/socket.c:1818 do_syscall_64+0xf6/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4445c9 Code: e8 0c 0d 03 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc35b37608 EFLAGS: 00000246 ORIG_RAX: 0000000000000120 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004445c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000306777 R09: 0000000000306777 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004053d0 R14: 0000000000000000 R15: 0000000000000000 Fixes: d752a4986532 ("net: memcg: late association of sock to memcg") Signed-off-by: Eric Dumazet Cc: Shakeel Butt Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c786f81952f1..7826fba34b14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -476,27 +476,27 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } - if (mem_cgroup_sockets_enabled) { +out: + release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { int amt; /* atomically get the memory usage, set and charge the - * sk->sk_memcg. + * newsk->sk_memcg. */ lock_sock(newsk); - /* The sk has not been accepted yet, no need to look at - * sk->sk_wmem_queued. + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. */ amt = sk_mem_pages(newsk->sk_forward_alloc + - atomic_read(&sk->sk_rmem_alloc)); + atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); if (newsk->sk_memcg && amt) mem_cgroup_charge_skmem(newsk->sk_memcg, amt); release_sock(newsk); } -out: - release_sock(sk); if (req) reqsk_put(req); return newsk; From 47186107f7f2aa381aa9d890d5d8a81f6aee430a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:13 -0800 Subject: [PATCH 3633/3715] fib: add missing attribute validation for tun_id [ Upstream commit 4c16d64ea04056f1b1b324ab6916019f6a064114 ] Add missing netlink policy entry for FRA_TUN_ID. Fixes: e7030878fc84 ("fib: Add fib rule match on tunnel id") Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/fib_rules.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b8fd023ba625 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,6 +102,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ From 40ab0c53bc77f49f3da9050760066e14e37a6e03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:14 -0800 Subject: [PATCH 3634/3715] nl802154: add missing attribute validation [ Upstream commit 9322cd7c4af2ccc7fe7c5f01adb53f4f77949e92 ] Add missing attribute validation for several u8 types. Fixes: 2c21d11518b6 ("net: add NL802154 interface for configuration of 802.15.4 devices") Signed-off-by: Jakub Kicinski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/nl_policy.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 35c432668454..7e7428afc357 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -30,6 +30,11 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, From 6fbf53205cd58683d8bd89e4df8a8bf2edb89f63 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:15 -0800 Subject: [PATCH 3635/3715] nl802154: add missing attribute validation for dev_type [ Upstream commit b60673c4c418bef7550d02faf53c34fbfeb366bf ] Add missing attribute type validation for IEEE802154_ATTR_DEV_TYPE to the netlink policy. Fixes: 90c049b2c6ae ("ieee802154: interface type to be added") Signed-off-by: Jakub Kicinski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/nl_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 7e7428afc357..040983fc15da 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -36,6 +36,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, From 13e91bc63dcac99b9d96a8459e309c27009c1eb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:16 -0800 Subject: [PATCH 3636/3715] can: add missing attribute validation for termination [ Upstream commit ab02ad660586b94f5d08912a3952b939cf4c4430 ] Add missing attribute validation for IFLA_CAN_TERMINATION to the netlink policy. Fixes: 12a6075cabc0 ("can: dev: add CAN interface termination API") Signed-off-by: Jakub Kicinski Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index d92113db4fb9..05ad5ed145a3 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -867,6 +867,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], From f0247be8076aa43a1b795530ceded43e7773a4a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:17 -0800 Subject: [PATCH 3637/3715] macsec: add missing attribute validation for port [ Upstream commit 31d9a1c524964bac77b7f9d0a1ac140dc6b57461 ] Add missing attribute validation for IFLA_MACSEC_PORT to the netlink policy. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ed2cb3ac578a..c2c3ce5653db 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2980,6 +2980,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, From 2165d304e82cd5a5c36d4c7a7c8579b4d907b8a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:19 -0800 Subject: [PATCH 3638/3715] net: fq: add missing attribute validation for orphan mask [ Upstream commit 7e6dc03eeb023e18427a373522f1d247b916a641 ] Add missing attribute validation for TCA_FQ_ORPHAN_MASK to the netlink policy. Fixes: 06eb395fa985 ("pkt_sched: fq: better control of DDOS traffic") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_fq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 7a944f508cae..66f1d40b910a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -695,6 +695,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; From 994674f0ef734197359ac8e5876bb91103f526ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:21 -0800 Subject: [PATCH 3639/3715] team: add missing attribute validation for port ifindex [ Upstream commit dd25cb272ccce4db67dc8509278229099e4f5e99 ] Add missing attribute validation for TEAM_ATTR_OPTION_PORT_IFINDEX to the netlink policy. Fixes: 80f7c6683fe0 ("team: add support for per-port options") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f1aabf8a16c2..01c51a1526ef 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2207,6 +2207,7 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) From 41d830acc66107415ea37d92b325e21be7ea801c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:22 -0800 Subject: [PATCH 3640/3715] team: add missing attribute validation for array index [ Upstream commit 669fcd7795900cd1880237cbbb57a7db66cb9ac8 ] Add missing attribute validation for TEAM_ATTR_OPTION_ARRAY_INDEX to the netlink policy. Fixes: b13033262d24 ("team: introduce array options") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 01c51a1526ef..3dba58fa3433 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2208,6 +2208,7 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) From 69f30950da6d4e954bc865a8c2632392d3dc3ed5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:24 -0800 Subject: [PATCH 3641/3715] nfc: add missing attribute validation for SE API [ Upstream commit 361d23e41ca6e504033f7e66a03b95788377caae ] Add missing attribute validation for NFC_ATTR_SE_INDEX to the netlink policy. Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 7b8d4d235a3a..587aea4ae8c0 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -55,6 +55,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, From 402f86abf2c3dae913b166c217147f2e4c05eca6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:26 -0800 Subject: [PATCH 3642/3715] nfc: add missing attribute validation for vendor subcommand [ Upstream commit 6ba3da446551f2150fadbf8c7788edcb977683d3 ] Add missing attribute validation for vendor subcommand attributes to the netlink policy. Fixes: 9e58095f9660 ("NFC: netlink: Implement vendor command support") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 587aea4ae8c0..6199f4334fbd 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -57,6 +57,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { .len = NFC_FIRMWARE_NAME_MAXSIZE }, [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; From ee8cb6a025b7e4cb0b8c2dec545e0b14579a8b99 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 12 Mar 2020 22:25:20 +0100 Subject: [PATCH 3643/3715] net: phy: fix MDIO bus PM PHY resuming [ Upstream commit 611d779af7cad2b87487ff58e4931a90c20b113c ] So far we have the unfortunate situation that mdio_bus_phy_may_suspend() is called in suspend AND resume path, assuming that function result is the same. After the original change this is no longer the case, resulting in broken resume as reported by Geert. To fix this call mdio_bus_phy_may_suspend() in the suspend path only, and let the phy_device store the info whether it was suspended by MDIO bus PM. Fixes: 503ba7c69610 ("net: phy: Avoid multiple suspends") Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 6 +++++- include/linux/phy.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 31ef3e47edf6..27f1f0b5b8f6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -125,6 +125,8 @@ static int mdio_bus_phy_suspend(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = true; + return phy_suspend(phydev); } @@ -133,9 +135,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = false; + ret = phy_resume(phydev); if (ret < 0) return ret; diff --git a/include/linux/phy.h b/include/linux/phy.h index efc04c2d92c9..8b6850707e62 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -372,6 +372,7 @@ struct phy_c45_device_ids { * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -410,6 +411,7 @@ struct phy_device { bool is_pseudo_fixed_link; bool has_fixups; bool suspended; + bool suspended_by_mdio_bus; bool sysfs_links; bool loopback_enabled; From c21c708ca257772691b99544937d3726c17d0ef9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2020 09:32:16 -0800 Subject: [PATCH 3644/3715] bonding/alb: make sure arp header is pulled before accessing it commit b7469e83d2add567e4e0b063963db185f3167cea upstream. Similar to commit 38f88c454042 ("bonding/alb: properly access headers in bond_alb_xmit()"), we need to make sure arp header was pulled in skb->head before blindly accessing it in rlb_arp_xmit(). Remove arp_pkt() private helper, since it is more readable/obvious to have the following construct back to back : if (!pskb_network_may_pull(skb, sizeof(*arp))) return NULL; arp = (struct arp_pkt *)skb_network_header(skb); syzbot reported : BUG: KMSAN: uninit-value in bond_slave_has_mac_rx include/net/bonding.h:704 [inline] BUG: KMSAN: uninit-value in rlb_arp_xmit drivers/net/bonding/bond_alb.c:662 [inline] BUG: KMSAN: uninit-value in bond_alb_xmit+0x575/0x25e0 drivers/net/bonding/bond_alb.c:1477 CPU: 0 PID: 12743 Comm: syz-executor.4 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 bond_slave_has_mac_rx include/net/bonding.h:704 [inline] rlb_arp_xmit drivers/net/bonding/bond_alb.c:662 [inline] bond_alb_xmit+0x575/0x25e0 drivers/net/bonding/bond_alb.c:1477 __bond_start_xmit drivers/net/bonding/bond_main.c:4257 [inline] bond_start_xmit+0x85d/0x2f70 drivers/net/bonding/bond_main.c:4282 __netdev_start_xmit include/linux/netdevice.h:4524 [inline] netdev_start_xmit include/linux/netdevice.h:4538 [inline] xmit_one net/core/dev.c:3470 [inline] dev_hard_start_xmit+0x531/0xab0 net/core/dev.c:3486 __dev_queue_xmit+0x37de/0x4220 net/core/dev.c:4063 dev_queue_xmit+0x4b/0x60 net/core/dev.c:4096 packet_snd net/packet/af_packet.c:2967 [inline] packet_sendmsg+0x8347/0x93b0 net/packet/af_packet.c:2992 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] __sys_sendto+0xc1b/0xc50 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2006 __x64_sys_sendto+0x6e/0x90 net/socket.c:2006 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc77ffbbc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fc77ffbc6d4 RCX: 000000000045c479 RDX: 000000000000000e RSI: 00000000200004c0 RDI: 0000000000000003 RBP: 000000000076bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000a04 R14: 00000000004cc7b0 R15: 000000000076bf2c Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] alloc_skb_with_frags+0x18c/0xa70 net/core/skbuff.c:5766 sock_alloc_send_pskb+0xada/0xc60 net/core/sock.c:2242 packet_alloc_skb net/packet/af_packet.c:2815 [inline] packet_snd net/packet/af_packet.c:2910 [inline] packet_sendmsg+0x66a0/0x93b0 net/packet/af_packet.c:2992 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] __sys_sendto+0xc1b/0xc50 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2006 __x64_sys_sendto+0x6e/0x90 net/socket.c:2006 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_alb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 755d588bbcb1..0b79ddec15b7 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -71,11 +71,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -574,10 +569,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -674,8 +670,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -685,7 +685,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -696,7 +696,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - rlb_choose_channel(skb, bond); + rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. From e3849fc5a293d721b1ac385c89aceba62760794c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2020 15:51:43 -0800 Subject: [PATCH 3645/3715] slip: make slhc_compress() more robust against malicious packets [ Upstream commit 110a40dfb708fe940a3f3704d470e431c368d256 ] Before accessing various fields in IPV4 network header and TCP header, make sure the packet : - Has IP version 4 (ip->version == 4) - Has not a silly network length (ip->ihl >= 5) - Is big enough to hold network and transport headers - Has not a silly TCP header size (th->doff >= sizeof(struct tcphdr) / 4) syzbot reported : BUG: KMSAN: uninit-value in slhc_compress+0x5b9/0x2e60 drivers/net/slip/slhc.c:270 CPU: 0 PID: 11728 Comm: syz-executor231 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 slhc_compress+0x5b9/0x2e60 drivers/net/slip/slhc.c:270 ppp_send_frame drivers/net/ppp/ppp_generic.c:1637 [inline] __ppp_xmit_process+0x1902/0x2970 drivers/net/ppp/ppp_generic.c:1495 ppp_xmit_process+0x147/0x2f0 drivers/net/ppp/ppp_generic.c:1516 ppp_write+0x6bb/0x790 drivers/net/ppp/ppp_generic.c:512 do_loop_readv_writev fs/read_write.c:717 [inline] do_iter_write+0x812/0xdc0 fs/read_write.c:1000 compat_writev+0x2df/0x5a0 fs/read_write.c:1351 do_compat_pwritev64 fs/read_write.c:1400 [inline] __do_compat_sys_pwritev fs/read_write.c:1420 [inline] __se_compat_sys_pwritev fs/read_write.c:1414 [inline] __ia32_compat_sys_pwritev+0x349/0x3f0 fs/read_write.c:1414 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f7cd99 Code: 90 e8 0b 00 00 00 f3 90 0f ae e8 eb f9 8d 74 26 00 89 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000ffdb84ac EFLAGS: 00000217 ORIG_RAX: 000000000000014e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000200001c0 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000040047459 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2793 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4401 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1051 [inline] ppp_write+0x115/0x790 drivers/net/ppp/ppp_generic.c:500 do_loop_readv_writev fs/read_write.c:717 [inline] do_iter_write+0x812/0xdc0 fs/read_write.c:1000 compat_writev+0x2df/0x5a0 fs/read_write.c:1351 do_compat_pwritev64 fs/read_write.c:1400 [inline] __do_compat_sys_pwritev fs/read_write.c:1420 [inline] __se_compat_sys_pwritev fs/read_write.c:1414 [inline] __ia32_compat_sys_pwritev+0x349/0x3f0 fs/read_write.c:1414 do_syscall_32_irqs_on arch/x86/entry/common.c:339 [inline] do_fast_syscall_32+0x3c7/0x6e0 arch/x86/entry/common.c:410 entry_SYSENTER_compat+0x68/0x77 arch/x86/entry/entry_64_compat.S:139 Fixes: b5451d783ade ("slip: Move the SLIP drivers") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/slip/slhc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index ea90db3c7705..01334aeac577 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, register struct cstate *cs = lcs->next; register unsigned long deltaS, deltaA; register short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; register unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if From daf9c84758b8d2a8e3423fc98d6815f052e34a69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2020 20:36:16 -0700 Subject: [PATCH 3646/3715] net: fec: validate the new settings in fec_enet_set_coalesce() [ Upstream commit ab14961d10d02d20767612c78ce148f6eb85bd58 ] fec_enet_set_coalesce() validates the previously set params and if they are within range proceeds to apply the new ones. The new ones, however, are not validated. This seems backwards, probably a copy-paste error? Compile tested only. Fixes: d851b47b22fc ("net: fec: add interrupt coalescence feature support") Signed-off-by: Jakub Kicinski Acked-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 62bc19bedb06..8ba915cc4c2e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2478,15 +2478,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + pr_err("Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } From d0a5359e5e2462ff529d1ae3dbbf177a6873824c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:57:07 -0700 Subject: [PATCH 3647/3715] macvlan: add cond_resched() during multicast processing [ Upstream commit ce9a4186f9ac475c415ffd20348176a4ea366670 ] The Rx bound multicast packets are deferred to a workqueue and macvlan can also suffer from the same attack that was discovered by Syzbot for IPvlan. This solution is not as effective as in IPvlan. IPvlan defers all (Tx and Rx) multicast packet processing to a workqueue while macvlan does this way only for the Rx. This fix should address the Rx codition to certain extent. Tx is still suseptible. Tx multicast processing happens when .ndo_start_xmit is called, hence we cannot add cond_resched(). However, it's not that severe since the user which is generating / flooding will be affected the most. Fixes: 412ca1550cbe ("macvlan: Move broadcasts into a work queue") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ab539136d5bf..6989e84670e5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -338,6 +338,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); kfree_skb(skb); + + cond_resched(); } } From e6b382e861bcf841154e391f65c0e37e0441771c Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 15:33:12 +0300 Subject: [PATCH 3648/3715] inet_diag: return classid for all socket types [ Upstream commit 83f73c5bb7b9a9135173f0ba2b1aa00c06664ff9 ] In commit 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") croup classid reporting was fixed. But this works only for TCP sockets because for other socket types icsk parameter can be NULL and classid code path is skipped. This change moves classid handling to inet_diag_msg_attrs_fill() function. Also inet_diag_msg_attrs_size() helper was added and addends in nlmsg_new() were reordered to save order from inet_sk_diag_fill(). Fixes: 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/inet_diag.h | 18 ++++++++++------ net/ipv4/inet_diag.c | 44 ++++++++++++++++++--------------------- net/ipv4/raw_diag.c | 5 +++-- net/ipv4/udp_diag.c | 5 +++-- net/sctp/sctp_diag.c | 8 ++----- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..c91cf2dee12a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index eb158badebc4..7ba013d6c00a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,13 +105,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -152,6 +148,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -289,24 +303,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - out: nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 6367ecdf76c4..1d84b02ec765 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -99,8 +99,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d9ad986c7b2c..cc3f6da306c6 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -67,8 +67,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 75274a60b77a..6a5a3dfa6c8d 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -221,15 +221,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } From dd18c7005ec437c6d7de538a6571abde9b66c92d Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Sat, 7 Mar 2020 13:31:57 +0100 Subject: [PATCH 3649/3715] ipvlan: do not add hardware address of master to its unicast filter list [ Upstream commit 63aae7b17344d4b08a7d05cb07044de4c0f9dcc6 ] There is a problem when ipvlan slaves are created on a master device that is a vmxnet3 device (ipvlan in VMware guests). The vmxnet3 driver does not support unicast address filtering. When an ipvlan device is brought up in ipvlan_open(), the ipvlan driver calls dev_uc_add() to add the hardware address of the vmxnet3 master device to the unicast address list of the master device, phy_dev->uc. This inevitably leads to the vmxnet3 master device being forced into promiscuous mode by __dev_set_rx_mode(). Promiscuous mode is switched on the master despite the fact that there is still only one hardware address that the master device should use for filtering in order for the ipvlan device to be able to receive packets. The comment above struct net_device describes the uc_promisc member as a "counter, that indicates, that promiscuous mode has been enabled due to the need to listen to additional unicast addresses in a device that does not implement ndo_set_rx_mode()". Moreover, the design of ipvlan guarantees that only the hardware address of a master device, phy_dev->dev_addr, will be used to transmit and receive all packets from its ipvlan slaves. Thus, the unicast address list of the master device should not be modified by ipvlan_open() and ipvlan_stop() in order to make ipvlan a workable option on masters that do not support unicast address filtering. Fixes: 2ad7bf3638411 ("ipvlan: Initial check-in of the IPVLAN driver") Reported-by: Per Sundstrom Signed-off-by: Jiri Wiesner Reviewed-by: Eric Dumazet Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 09f6795cce53..cd32d6623f6a 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -236,7 +236,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -248,7 +247,7 @@ static int ipvlan_open(struct net_device *dev) list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -260,8 +259,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); From 72c457e5a3fef36c2979d40fcdf32c5c5ab0d957 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 28 Feb 2018 11:43:27 +0100 Subject: [PATCH 3650/3715] ipvlan: egress mcast packets are not exceptional commit cccc200fcaf04cff4342036a72e51d6adf6c98c1 upstream. Currently, if IPv6 is enabled on top of an ipvlan device in l3 mode, the following warning message: Dropped {multi|broad}cast of type= [86dd] is emitted every time that a RS is generated and dmseg is soon filled with irrelevant messages. Replace pr_warn with pr_debug, to preserve debuggability, without scaring the sysadmin. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1d97d6958e4b..cc0bd2ce4cc3 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -451,8 +451,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb) /* In this mode we dont care about multicast and broadcast traffic */ if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); + pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); kfree_skb(skb); goto out; } From c07b71b6f377c65942c35daf7005e8be548b756c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 9 Mar 2020 15:56:56 -0700 Subject: [PATCH 3651/3715] ipvlan: don't deref eth hdr before checking it's set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ad8192767c9f9cf97da57b9ffcea70fb100febef ] IPvlan in L3 mode discards outbound multicast packets but performs the check before ensuring the ether-header is set or not. This is an error that Eric found through code browsing. Fixes: 2ad7bf363841 (“ipvlan: Initial check-in of the IPVLAN driver.”) Signed-off-by: Mahesh Bandewar Reported-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ipvlan/ipvlan_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index cc0bd2ce4cc3..baf8aab59f82 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -449,19 +449,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); From b58120a61b256e3c24b957fe36617bdc738efc9c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 30 Jan 2020 13:34:59 +0300 Subject: [PATCH 3652/3715] cgroup: cgroup_procs_next should increase position index commit 2d4ecb030dcc90fb725ecbfc82ce5d6c37906e0e upstream. If seq_file .next fuction does not change position index, read after some lseek can generate unexpected output: 1) dd bs=1 skip output of each 2nd elements $ dd if=/sys/fs/cgroup/cgroup.procs bs=8 count=1 2 3 4 5 1+0 records in 1+0 records out 8 bytes copied, 0,000267297 s, 29,9 kB/s [test@localhost ~]$ dd if=/sys/fs/cgroup/cgroup.procs bs=1 count=8 2 4 <<< NB! 3 was skipped 6 <<< ... and 5 too 8 <<< ... and 7 8+0 records in 8+0 records out 8 bytes copied, 5,2123e-05 s, 153 kB/s This happen because __cgroup_procs_start() makes an extra extra cgroup_procs_next() call 2) read after lseek beyond end of file generates whole last line. 3) read after lseek into middle of last line generates expected rest of last line and unexpected whole line once again. Additionally patch removes an extra position index changes in __cgroup_procs_start() Cc: stable@vger.kernel.org https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1e727fbaa0e4..4dba8069f036 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4249,6 +4249,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4264,7 +4267,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4272,10 +4275,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } From 713f26696c8c8f7121cabe1a5a44353ffccda06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 12:40:15 +0100 Subject: [PATCH 3653/3715] cgroup: Iterate tasks that did not finish do_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9c974c77246460fa6a92c18554c3311c8c83c160 upstream. PF_EXITING is set earlier than actual removal from css_set when a task is exitting. This can confuse cgroup.procs readers who see no PF_EXITING tasks, however, rmdir is checking against css_set membership so it can transitionally fail with EBUSY. Fix this by listing tasks that weren't unlinked from css_set active lists. It may happen that other users of the task iterator (without CSS_TASK_ITER_PROCS) spot a PF_EXITING task before cgroup_exit(). This is equal to the state before commit c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") but it may be reviewed later. Reported-by: Suren Baghdasaryan Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0e21619f1c03..61ab21c34866 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4dba8069f036..2b3f2ea6a8a3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4051,12 +4051,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4114,10 +4118,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4136,11 +4144,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } From 6e9c7d95ee119911feef7ef8426d177cf6949f53 Mon Sep 17 00:00:00 2001 From: Dan Moulding Date: Tue, 28 Jan 2020 02:31:07 -0700 Subject: [PATCH 3654/3715] iwlwifi: mvm: Do not require PHY_SKU NVM section for 3168 devices commit a9149d243f259ad8f02b1e23dfe8ba06128f15e1 upstream. The logic for checking required NVM sections was recently fixed in commit b3f20e098293 ("iwlwifi: mvm: fix NVM check for 3168 devices"). However, with that fixed the else is now taken for 3168 devices and within the else clause there is a mandatory check for the PHY_SKU section. This causes the parsing to fail for 3168 devices. The PHY_SKU section is really only mandatory for the IWL_NVM_EXT layout (the phy_sku parameter of iwl_parse_nvm_data is only used when the NVM type is IWL_NVM_EXT). So this changes the PHY_SKU section check so that it's only mandatory for IWL_NVM_EXT. Fixes: b3f20e098293 ("iwlwifi: mvm: fix NVM check for 3168 devices") Signed-off-by: Dan Moulding Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 8f3032b7174d..b2e393c4fab5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -326,7 +326,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; From 81b00ac7528c3b146b737eb6d4d8cbe107f1d8e5 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 13 Feb 2020 13:37:27 +0100 Subject: [PATCH 3655/3715] virtio-blk: fix hw_queue stopped on arbitrary error commit f5f6b95c72f7f8bb46eace8c5306c752d0133daa upstream. Since nobody else is going to restart our hw_queue for us, the blk_mq_start_stopped_hw_queues() is in virtblk_done() is not sufficient necessarily sufficient to ensure that the queue will get started again. In case of global resource outage (-ENOMEM because mapping failure, because of swiotlb full) our virtqueue may be empty and we can get stuck with a stopped hw_queue. Let us not stop the queue on arbitrary errors, but only on -EONSPC which indicates a full virtqueue, where the hw_queue is guaranteed to get started by virtblk_done() before when it makes sense to carry on submitting requests. Let us also remove a stale comment. Signed-off-by: Halil Pasic Cc: Jens Axboe Fixes: f7728002c1c7 ("virtio_ring: fix return code on DMA mapping fails") Link: https://lore.kernel.org/r/20200213123728.61216-2-pasic@linux.ibm.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Greg Kroah-Hartman --- drivers/block/virtio_blk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 8767401f75e0..19d226ff15ef 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -271,10 +271,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) return BLK_STS_RESOURCE; return BLK_STS_IOERR; From 209ac82ca0d70738764870af33bbbcb35dc4d2d0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 19:25:10 +0100 Subject: [PATCH 3656/3715] iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint commit 81ee85d0462410de8eeeec1b9761941fd6ed8c7b upstream. Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Fixes: 556ab45f9a77 ("ioat2: catch and recover from broken vtd configurations v6") Signed-off-by: Hans de Goede Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200309182510.373875-1-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=701847 Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b48666849dbe..b8aa5e60e4c3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3984,10 +3984,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); From 48c336253b74bba9bd9d8d1c0d27dc7ead2de9af Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Fri, 24 Jan 2020 20:14:45 -0500 Subject: [PATCH 3657/3715] workqueue: don't use wq_select_unbound_cpu() for bound works commit aa202f1f56960c60e7befaa0f49c72b8fa11b0a8 upstream. wq_select_unbound_cpu() is designed for unbound workqueues only, but it's wrongly called when using a bound workqueue too. Fixing this ensures work queued to a bound workqueue with cpu=WORK_CPU_UNBOUND always runs on the local CPU. Before, that would happen only if wq_unbound_cpumask happened to include it (likely almost always the case), or was empty, or we got lucky with forced round-robin placement. So restricting /sys/devices/virtual/workqueue/cpumask to a small subset of a machine's CPUs would cause some bound work items to run unexpectedly there. Fixes: ef557180447f ("workqueue: schedule WORK_CPU_UNBOUND work on wq_unbound_cpumask CPUs") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Hillf Danton [dj: massage changelog] Signed-off-by: Daniel Jordan Cc: Tejun Heo Cc: Lai Jiangshan Cc: linux-kernel@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a37f5dc7cb39..18fae55713b0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1386,14 +1386,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(!is_chained_work(wq))) return; retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be From 9797798d7d384bc9ca58b91c577be1aa42eab806 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Nov 2019 14:45:27 +0000 Subject: [PATCH 3658/3715] drm/amd/display: remove duplicated assignment to grph_obj_type commit d785476c608c621b345dd9396e8b21e90375cb0e upstream. Variable grph_obj_type is being assigned twice, one of these is redundant so remove it. Addresses-Coverity: ("Evaluation order violation") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 4779740421a8..2153f19e59cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -363,8 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_type= - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; From 9cc22f086221a6eb6f2f6dd76cf16eab9d599580 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 9 Mar 2020 16:00:11 -0400 Subject: [PATCH 3659/3715] ktest: Add timeout for ssh sync testing commit 4d00fc477a2ce8b6d2b09fb34ef9fe9918e7d434 upstream. Before rebooting the box, a "ssh sync" is called to the test machine to see if it is alive or not. But if the test machine is in a partial state, that ssh may never actually finish, and the ktest test hangs. Add a 10 second timeout to the sync test, which will fail after 10 seconds and then cause the test to reboot the test machine. Cc: stable@vger.kernel.org Fixes: 6474ace999edd ("ktest.pl: Powercycle the box on reboot if no connection can be made") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c8b61f8398e..3bdd6a463819 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1345,7 +1345,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; From b4ef7d85fe8677effad26e1c869ab47414314a4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Mar 2020 18:25:20 -0400 Subject: [PATCH 3660/3715] cifs_atomic_open(): fix double-put on late allocation failure commit d9a9f4849fe0c9d560851ab22a85a666cddfdd24 upstream. several iterations of ->atomic_open() calling conventions ago, we used to need fput() if ->atomic_open() failed at some point after successful finish_open(). Now (since 2016) it's not needed - struct file carries enough state to make fput() work regardless of the point in struct file lifecycle and discarding it on failure exits in open() got unified. Unfortunately, I'd missed the fact that we had an instance of ->atomic_open() (cifs one) that used to need that fput(), as well as the stale comment in finish_open() demanding such late failure handling. Trivially fixed... Fixes: fe9ec8291fca "do_last(): take fput() on error after opening to out:" Cc: stable@kernel.org # v4.7+ Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/porting | 7 +++++++ fs/cifs/dir.c | 1 - fs/open.c | 3 --- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 93e0a2404532..c757c1c3cb81 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -606,3 +606,10 @@ in your dentry operations instead. dentry separately, and it now has request_mask and query_flags arguments to specify the fields and sync type requested by statx. Filesystems not supporting any statx-specific features may ignore the new arguments. +-- +[mandatory] + + [should've been added in 2016] stale comment in finish_open() + nonwithstanding, failure exits in ->atomic_open() instances should + *NOT* fput() the file, no matter what. Everything is handled by the + caller. diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f00a7ce3eb6e..03293e543c07 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -562,7 +562,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/open.c b/fs/open.c index 29a2cdcbcb17..49fd070be0ec 100644 --- a/fs/open.c +++ b/fs/open.c @@ -824,9 +824,6 @@ cleanup_file: * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, From 365851b725f87527023a2448213e2f3c76002cd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Mar 2020 09:31:41 -0400 Subject: [PATCH 3661/3715] gfs2_atomic_open(): fix O_EXCL|O_CREAT handling on cold dcache commit 21039132650281de06a169cbe8a0f7e5c578fd8b upstream. with the way fs/namei.c:do_last() had been done, ->atomic_open() instances needed to recognize the case when existing file got found with O_EXCL|O_CREAT, either by falling back to finish_no_open() or failing themselves. gfs2 one didn't. Fixes: 6d4ade986f9c (GFS2: Add atomic_open support) Cc: stable@kernel.org # v3.11 Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c850579ae5a4..6c6401084d3d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1255,7 +1255,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(*opened & FILE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); From 02cba24f984cc796d6d62839b877ae553fff0d5c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 3 Mar 2020 15:33:15 +0100 Subject: [PATCH 3662/3715] KVM: x86: clear stale x86_emulate_ctxt->intercept value commit 342993f96ab24d5864ab1216f46c0b199c2baf8e upstream. After commit 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Hyper-V guests on KVM stopped booting with: kvm_nested_vmexit: rip fffff802987d6169 reason EPT_VIOLATION info1 181 info2 0 int_info 0 int_info_err 0 kvm_page_fault: address febd0000 error_code 181 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 FAIL kvm_inj_exception: #UD (0x0) "f3 a5" is a "rep movsw" instruction, which should not be intercepted at all. Commit c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") reduced the number of fields cleared by init_decode_cache() claiming that they are being cleared elsewhere, 'intercept', however, is left uncleared if the instruction does not have any of the "slow path" flags (NotImpl, Stack, Op3264, Sse, Mmx, CheckPerm, NearBranch, No16 and of course Intercept itself). Fixes: c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Cc: stable@vger.kernel.org Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 70f3636aff11..4cc8a4a6f1d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5062,6 +5062,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { From b08e88aca8ded59b810f8cb96d51541d60c3157c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 11 Mar 2020 19:26:43 +0300 Subject: [PATCH 3663/3715] ARC: define __ALIGN_STR and __ALIGN symbols for ARC commit 8d92e992a785f35d23f845206cf8c6cafbc264e0 upstream. The default defintions use fill pattern 0x90 for padding which for ARC generates unintended "ldh_s r12,[r0,0x20]" corresponding to opcode 0x9090 So use ".align 4" which insert a "nop_s" instruction instead. Cc: stable@vger.kernel.org Acked-by: Vineet Gupta Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/linkage.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index b29f1a9fd6f7..07c8e1a6c56e 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -14,6 +14,8 @@ #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm From 5c3d354b51be3546dacbe55bde67eccdd108d007 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:54 +0100 Subject: [PATCH 3664/3715] efi: Fix a race and a buffer overflow while reading efivars via sysfs commit 286d3250c9d6437340203fb64938bea344729a0e upstream. There is a race and a buffer overflow corrupting a kernel memory while reading an EFI variable with a size more than 1024 bytes via the older sysfs method. This happens because accessing struct efi_variable in efivar_{attr,size,data}_read() and friends is not protected from a concurrent access leading to a kernel memory corruption and, at best, to a crash. The race scenario is the following: CPU0: CPU1: efivar_attr_read() var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) efivar_attr_read() // same EFI var var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) virt_efi_get_variable() // returns EFI_BUFFER_TOO_SMALL but // var->DataSize is set to a real // var size more than 1024 bytes up(&efivars_lock) virt_efi_get_variable() // called with var->DataSize set // to a real var size, returns // successfully and overwrites // a 1024-bytes kernel buffer up(&efivars_lock) This can be reproduced by concurrent reading of an EFI variable which size is more than 1024 bytes: ts# for cpu in $(seq 0 $(nproc --ignore=1)); do ( taskset -c $cpu \ cat /sys/firmware/efi/vars/KEKDefault*/size & ) ; done Fix this by using a local variable for a var's data buffer size so it does not get overwritten. Fixes: e14ab23dde12b80d ("efivars: efivar_entry API") Reported-by: Bob Sanders and the LTP testsuite Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-2-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-24-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3e626fd9bd4e..c8688490f148 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -139,13 +139,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -172,13 +175,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -189,12 +195,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -314,14 +323,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (is_compat()) { From ef0d4fec18b8cb0901058b46da75e3fcf7265f55 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 25 Feb 2020 17:17:37 -0800 Subject: [PATCH 3665/3715] x86/mce: Fix logic and comments around MSR_PPIN_CTL commit 59b5809655bdafb0767d3fd00a3e41711aab07e6 upstream. There are two implemented bits in the PPIN_CTL MSR: Bit 0: LockOut (R/WO) Set 1 to prevent further writes to MSR_PPIN_CTL. Bit 1: Enable_PPIN (R/W) If 1, enables MSR_PPIN to be accessible using RDMSR. If 0, an attempt to read MSR_PPIN will cause #GP. So there are four defined values: 0: PPIN is disabled, PPIN_CTL may be updated 1: PPIN is disabled. PPIN_CTL is locked against updates 2: PPIN is enabled. PPIN_CTL may be updated 3: PPIN is enabled. PPIN_CTL is locked against updates Code would only enable the X86_FEATURE_INTEL_PPIN feature for case "2". When it should have done so for both case "2" and case "3". Fix the final test to just check for the enable bit. Also fix some of the other comments in this function. Fixes: 3f5a7896a509 ("x86/mce: Include the PPIN in MCE records when available") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200226011737.9958-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d05be307d081..1d87b85150db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } From f6efa6116f332a78791ae5b0b6076525184c09ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 11:11:17 +0000 Subject: [PATCH 3666/3715] iommu/dma: Fix MSI reservation allocation commit 65ac74f1de3334852fb7d9b1b430fa5a06524276 upstream. The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them. The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base granule size is 4kB. This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it). Fortunately, this only affects systems where the ITS isn't translated by the SMMU, which are both rare and non-standard. Fix it by allocating iommu_dma_msi_page structures one at a time. Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Cc: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dma-iommu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c87764a4e212..8000b798e6e6 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -208,15 +208,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } From f680da6339f54ba3c522fc4ffa835513531eb5aa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 15:01:37 +0100 Subject: [PATCH 3667/3715] iommu/vt-d: dmar: replace WARN_TAINT with pr_warn + add_taint commit 59833696442c674acbbd297772ba89e7ad8c753d upstream. Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Some distros, e.g. Fedora, have tools watching for the kernel backtraces logged by the WARN macros and offer the user an option to file a bug for this when these are encountered. The WARN_TAINT in warn_invalid_dmar() + another iommu WARN_TAINT, addressed in another patch, have lead to over a 100 bugs being filed this way. This commit replaces the WARN_TAINT("...") calls, with pr_warn(FW_BUG "...") + add_taint(TAINT_FIRMWARE_WORKAROUND, ...) calls avoiding the backtrace and thus also avoiding bug-reports being filed about this against the kernel. Fixes: fd0c8894893c ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Fixes: e625b4a95d50 ("iommu/vt-d: Parse ANDD records") Signed-off-by: Hans de Goede Signed-off-by: Joerg Roedel Acked-by: Lu Baolu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200309140138.3753-2-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1564895 Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 38d0128b8135..f04a4edc5cfc 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -451,12 +451,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -482,14 +483,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", drhd->reg_base_addr, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -835,14 +836,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref From a691dd3d0563b495aa7d816119a51c508976d791 Mon Sep 17 00:00:00 2001 From: Yonghyun Hwang Date: Wed, 26 Feb 2020 12:30:06 -0800 Subject: [PATCH 3668/3715] iommu/vt-d: Fix a bug in intel_iommu_iova_to_phys() for huge page commit 77a1bce84bba01f3f143d77127b72e872b573795 upstream. intel_iommu_iova_to_phys() has a bug when it translates an IOVA for a huge page onto its corresponding physical address. This commit fixes the bug by accomodating the level of page entry for the IOVA and adds IOVA's lower address to the physical address. Cc: Acked-by: Lu Baolu Reviewed-by: Moritz Fischer Signed-off-by: Yonghyun Hwang Fixes: 3871794642579 ("VT-d: Changes to support KVM") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b8aa5e60e4c3..db1b546134f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5124,8 +5124,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } From 217a7c2200944aec137d83838cbdb29d24d105da Mon Sep 17 00:00:00 2001 From: Nicolas Belin Date: Thu, 20 Feb 2020 14:15:12 +0100 Subject: [PATCH 3669/3715] pinctrl: meson-gxl: fix GPIOX sdio pins commit dc7a06b0dbbafac8623c2b7657e61362f2f479a7 upstream. In the gxl driver, the sdio cmd and clk pins are inverted. It has not caused any issue so far because devices using these pins always take both pins so the resulting configuration is OK. Fixes: 0f15f500ff2c ("pinctrl: meson: Add GXL pinctrl definitions") Reviewed-by: Jerome Brunet Signed-off-by: Nicolas Belin Link: https://lore.kernel.org/r/1582204512-7582-1-git-send-email-nbelin@baylibre.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/meson/pinctrl-meson-gxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 36c14b85fc7c..8db182067ecb 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -158,8 +158,8 @@ static const unsigned int sdio_d0_pins[] = { PIN(GPIOX_0, EE_OFF) }; static const unsigned int sdio_d1_pins[] = { PIN(GPIOX_1, EE_OFF) }; static const unsigned int sdio_d2_pins[] = { PIN(GPIOX_2, EE_OFF) }; static const unsigned int sdio_d3_pins[] = { PIN(GPIOX_3, EE_OFF) }; -static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_4, EE_OFF) }; -static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_5, EE_OFF) }; +static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_4, EE_OFF) }; +static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_5, EE_OFF) }; static const unsigned int sdio_irq_pins[] = { PIN(GPIOX_7, EE_OFF) }; static const unsigned int nand_ce0_pins[] = { PIN(BOOT_8, EE_OFF) }; From 6f3817634e1d5b9006270fd7381ac7225551eabf Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Feb 2020 15:41:42 +0000 Subject: [PATCH 3670/3715] pinctrl: core: Remove extra kref_get which blocks hogs being freed commit aafd56fc79041bf36f97712d4b35208cbe07db90 upstream. kref_init starts with the reference count at 1, which will be balanced by the pinctrl_put in pinctrl_unregister. The additional kref_get in pinctrl_claim_hogs will increase this count to 2 and cause the hogs to not get freed when pinctrl_unregister is called. Fixes: 6118714275f0 ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200228154142.13860-1-ckeepax@opensource.cirrus.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c55517312485..08ea74177de2 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2031,7 +2031,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { From 1cb937c39c8c6b4fc5933ca3c50f5426f8d3357c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:56 -0800 Subject: [PATCH 3671/3715] nl80211: add missing attribute validation for critical protocol indication commit 0e1a1d853ecedc99da9d27f9f5c376935547a0e2 upstream. Add missing attribute validation for critical protocol fields to the netlink policy. Fixes: 5de17984898c ("cfg80211: introduce critical protocol indication from user-space") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-2-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b248578aeb7b..8165e0c147a2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -395,6 +395,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, From 214022646ecf8750cfbc97366832d9aafae1b555 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:57 -0800 Subject: [PATCH 3672/3715] nl80211: add missing attribute validation for beacon report scanning commit 056e9375e1f3c4bf2fd49b70258c7daf788ecd9d upstream. Add missing attribute validation for beacon report scanning to the netlink policy. Fixes: 1d76250bd34a ("nl80211: support beacon report scanning") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-3-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8165e0c147a2..96e527fc131e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -347,6 +347,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 }, + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, From 66be2d1ab3c6ad4c98014fbf508882b30a50a78f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:10:58 -0800 Subject: [PATCH 3673/3715] nl80211: add missing attribute validation for channel switch commit 5cde05c61cbe13cbb3fa66d52b9ae84f7975e5e6 upstream. Add missing attribute validation for NL80211_ATTR_OPER_CLASS to the netlink policy. Fixes: 1057d35ede5d ("cfg80211: introduce TDLS channel switch commands") Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20200303051058.4089398-4-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 96e527fc131e..d0b75781e6f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -424,6 +424,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, From 94d289a9e813b6b6cdb9c0255cb686d1b09a2284 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:08:31 -0800 Subject: [PATCH 3674/3715] netfilter: cthelper: add missing attribute validation for cthelper commit c049b3450072b8e3998053490e025839fecfef31 upstream. Add missing attribute validation for cthelper to the netlink policy. Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Jakub Kicinski Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d33ce6d5ebce..dd1030f5dd5e 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -733,6 +733,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { From a9755e81b69b83ddba3206cf51207fabdca4f538 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:08:32 -0800 Subject: [PATCH 3675/3715] netfilter: nft_payload: add missing attribute validation for payload csum flags commit 9d6effb2f1523eb84516e44213c00f2fd9e6afff upstream. Add missing attribute validation for NFTA_PAYLOAD_CSUM_FLAGS to the netlink policy. Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields") Signed-off-by: Jakub Kicinski Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_payload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58..19446a89a2a8 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -121,6 +121,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, From 3628a53e6cde8a644cd12c1bb00eee7eb392d60f Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Mar 2020 14:09:54 +0800 Subject: [PATCH 3676/3715] iommu/vt-d: Fix the wrong printing in RHSA parsing commit b0bb0c22c4db623f2e7b1a471596fbf1c22c6dc5 upstream. When base address in RHSA structure doesn't match base address in each DRHD structure, the base address in last DRHD is printed out. This doesn't make sense when there are multiple DRHD units, fix it by printing the buggy RHSA's base address. Signed-off-by: Lu Baolu Signed-off-by: Zhenzhong Duan Fixes: fd0c8894893cb ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index f04a4edc5cfc..a7cf733bcd33 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -486,7 +486,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); From 24de9d8fdd877d132dc552d63121f0d1a1cf3f4d Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 12 Mar 2020 14:09:55 +0800 Subject: [PATCH 3677/3715] iommu/vt-d: Ignore devices with out-of-spec domain number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit da72a379b2ec0bad3eb265787f7008bead0b040c upstream. VMD subdevices are created with a PCI domain ID of 0x10000 or higher. These subdevices are also handled like all other PCI devices by dmar_pci_bus_notifier(). However, when dmar_alloc_pci_notify_info() take records of such devices, it will truncate the domain ID to a u16 value (in info->seg). The device at (e.g.) 10000:00:02.0 is then treated by the DMAR code as if it is 0000:00:02.0. In the unlucky event that a real device also exists at 0000:00:02.0 and also has a device-specific entry in the DMAR table, dmar_insert_dev_scope() will crash on:   BUG_ON(i >= devices_cnt); That's basically a sanity check that only one PCI device matches a single DMAR entry; in this case we seem to have two matching devices. Fix this by ignoring devices that have a domain number higher than what can be looked up in the DMAR table. This problem was carefully diagnosed by Jian-Hong Pan. Signed-off-by: Lu Baolu Signed-off-by: Daniel Drake Fixes: 59ce0515cdaf3 ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index a7cf733bcd33..1f527ca60955 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) From 5916adba73830d6b8f7f4305e4151ab4952b7ede Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 12 Mar 2020 14:32:44 +0100 Subject: [PATCH 3678/3715] i2c: acpi: put device when verifying client fails commit 8daee952b4389729358665fb91949460641659d4 upstream. i2c_verify_client() can fail, so we need to put the device when that happens. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-acpi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index df9800aaeac7..0d4d5dcf94f3 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -352,10 +352,18 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_find_match_device); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, From e929f447bea484a36515bdaf1a73dfaa8afac605 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Feb 2020 11:52:29 -0800 Subject: [PATCH 3679/3715] ipv6: restrict IPV6_ADDRFORM operation commit b6f6118901d1e867ac9177bbff3b00b185bd4fdc upstream. IPV6_ADDRFORM is able to transform IPv6 socket to IPv4 one. While this operation sounds illogical, we have to support it. One of the things it does for TCP socket is to switch sk->sk_prot to tcp_prot. We now have other layers playing with sk->sk_prot, so we should make sure to not interfere with them. This patch makes sure sk_prot is the default pointer for TCP IPv6 socket. syzbot reported : BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD a0113067 P4D a0113067 PUD a8771067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 10686 Comm: syz-executor.0 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_release+0x165/0x1c0 net/ipv4/af_inet.c:427 __sock_release net/socket.c:605 [inline] sock_close+0xe1/0x260 net/socket.c:1283 __fput+0x2e4/0x740 fs/file_table.c:280 ____fput+0x15/0x20 fs/file_table.c:313 task_work_run+0x176/0x1b0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop arch/x86/entry/common.c:164 [inline] prepare_exit_to_usermode+0x480/0x5b0 arch/x86/entry/common.c:195 syscall_return_slowpath+0x113/0x4a0 arch/x86/entry/common.c:278 do_syscall_64+0x11f/0x1c0 arch/x86/entry/common.c:304 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45c429 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2ae75dac78 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: 0000000000000000 RBX: 00007f2ae75db6d4 RCX: 000000000045c429 RDX: 0000000000000001 RSI: 000000000000011a RDI: 0000000000000004 RBP: 000000000076bf20 R08: 0000000000000038 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000a9d R14: 00000000004ccfb4 R15: 000000000076bf2c Modules linked in: CR2: 0000000000000000 ---[ end trace 82567b5207e87bae ]--- RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Eric Dumazet Reported-by: syzbot+1938db17e275e85dc328@syzkaller.appspotmail.com Cc: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ipv6_sockglue.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5c91b05c8d8f..8c492471b0da 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -185,9 +185,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; From 5a09fc5060754baf59b28cf92719a776d1c3bfbd Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 26 Feb 2020 17:52:46 +0100 Subject: [PATCH 3680/3715] net/smc: check for valid ib_client_data commit a2f2ef4a54c0d97aa6a8386f4ff23f36ebb488cf upstream. In smc_ib_remove_dev() check if the provided ib device was actually initialized for SMC before. Reported-by: syzbot+84484ccebdd4e5451d91@syzkaller.appspotmail.com Fixes: a4cf0443c414 ("smc: introduce SMC as an IB-client") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/smc_ib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4410d0071515..7d89b0584944 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -513,6 +513,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ From 02f13e4e682390d10d7ece6260d9ee4059c8f450 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:55 +0100 Subject: [PATCH 3681/3715] efi: Add a sanity check to efivar_store_raw() commit d6c066fda90d578aacdf19771a027ed484a79825 upstream. Add a sanity check to efivar_store_raw() the same way efivar_{attr,size,data}_read() and efivar_show_raw() have it. Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-3-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-25-ardb@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index c8688490f148..1c65f5ac4368 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -272,6 +272,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (is_compat()) { struct compat_efi_variable *compat; From c408b35da8ef4378b858d3f9f33f23a16989676b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:18 +0100 Subject: [PATCH 3682/3715] batman-adv: Avoid spurious warnings from bat_v neigh_cmp implementation commit 6a4bc44b012cbc29c9d824be2c7ab9eac8ee6b6f upstream. The neighbor compare API implementation for B.A.T.M.A.N. V checks whether the neigh_ifinfo for this neighbor on a specific interface exists. A warning is printed when it isn't found. But it is not called inside a lock which would prevent that this information is lost right before batadv_neigh_ifinfo_get. It must therefore be expected that batadv_v_neigh_(cmp|is_sob) might not be able to get the requested neigh_ifinfo. A WARN_ON for such a situation seems not to be appropriate because this will only flood the kernel logs. The warnings must therefore be removed. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 371a1f1651b4..f81e67fbb352 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; From 1c2139faa923dcdd75f58e68959422c8475fd58f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:19 +0100 Subject: [PATCH 3683/3715] batman-adv: Always initialize fragment header priority commit fe77d8257c4d838c5976557ddb87bd789f312412 upstream. The batman-adv unuicast fragment header contains 3 bits for the priority of the packet. These bits will be initialized when the skb->priority contains a value between 256 and 263. But otherwise, the uninitialized bits from the stack will be used. Fixes: c0f25c802b33 ("batman-adv: Include frame priority in fragment header") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/fragmentation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c6d37d22bd12..788d62073964 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -500,6 +500,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ if (skb->priority >= 256 && skb->priority <= 263) frag_header.priority = skb->priority - 256; + else + frag_header.priority = 0; ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); From f0455763b0f4140f18d7f83d88b048552bbc57c8 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:20 +0100 Subject: [PATCH 3684/3715] batman-adv: Fix check of retrieved orig_gw in batadv_v_gw_is_eligible commit 198a62ddffa4a4ffaeb741f642b7b52f2d91ae9b upstream. The batadv_v_gw_is_eligible function already assumes that orig_node is not NULL. But batadv_gw_node_get may have failed to find the originator. It must therefore be checked whether the batadv_gw_node_get failed and not whether orig_node is NULL to detect this error. Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index f81e67fbb352..eb8cec14b854 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, } orig_gw = batadv_gw_node_get(bat_priv, orig_node); - if (!orig_node) + if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) From 16e33df6dfa83a3f7078cdc01797f502fc9a6bf4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:21 +0100 Subject: [PATCH 3685/3715] batman-adv: Fix lock for ogm cnt access in batadv_iv_ogm_calc_tq commit 5ba7dcfe77037b67016263ea597a8b431692ecab upstream. The originator node object orig_neigh_node is used to when accessing the bcast_own(_sum) and real_packet_count information. The access to them has to be protected with the spinlock in orig_neigh_node. But the function uses the lock in orig_node instead. This is incorrect because they could be two different originator node objects. Fixes: 0ede9f41b217 ("batman-adv: protect bit operations to count OGMs with spinlock") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 8b3f9441b3a0..1dda8949734e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1220,7 +1220,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); @@ -1230,7 +1230,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, } else { neigh_rq_count = 0; } - spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) From 78855971878cc5f6542bef42d4ef2eb403498fcd Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:22 +0100 Subject: [PATCH 3686/3715] batman-adv: Fix internal interface indices types commit f22e08932c2960f29b5e828e745c9f3fb7c1bb86 upstream. batman-adv uses internal indices for each enabled and active interface. It is currently used by the B.A.T.M.A.N. IV algorithm to identifify the correct position in the ogm_cnt bitmaps. The type for the number of enabled interfaces (which defines the next interface index) was set to char. This type can be (depending on the architecture) either signed (limiting batman-adv to 127 active slave interfaces) or unsigned (limiting batman-adv to 255 active slave interfaces). This limit was not correctly checked when an interface was enabled and thus an overflow happened. This was only catched on systems with the signed char type when the B.A.T.M.A.N. IV code tried to resize its counter arrays with a negative size. The if_num interface index was only a s16 and therefore significantly smaller than the ifindex (int) used by the code net code. Both &batadv_hard_iface->if_num and &batadv_priv->num_ifaces must be (unsigned) int to support the same number of slave interfaces as the net core code. And the interface activation code must check the number of active slave interfaces to avoid integer overflows. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 24 ++++++++++++++---------- net/batman-adv/hard-interface.c | 9 +++++++-- net/batman-adv/originator.c | 4 ++-- net/batman-adv/originator.h | 4 ++-- net/batman-adv/types.h | 11 ++++++----- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1dda8949734e..0b2f69924444 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -149,7 +149,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -193,7 +193,8 @@ unlock: */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -231,7 +232,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -268,7 +270,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -302,7 +305,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -890,7 +894,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1020,7 +1024,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1179,7 +1183,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1698,9 +1702,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 2e1a084b0bd2..4b67731677af 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -738,6 +738,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL); if (ret) @@ -845,7 +850,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -881,7 +886,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..653eaadcfefb 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1500,7 +1500,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) } int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1535,7 +1535,7 @@ err: } int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d94220a6d21a..d6ca52220ec0 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -78,9 +78,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d5e3968619b8..dbeaa015edc9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -155,7 +155,7 @@ enum batadv_hard_iface_wifi_flags { */ struct batadv_hard_iface { struct list_head list; - s16 if_num; + unsigned int if_num; char if_status; u8 num_bcasts; u32 wifi_flags; @@ -1081,7 +1081,7 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - char num_ifaces; + unsigned int num_ifaces; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -1479,9 +1479,10 @@ struct batadv_algo_neigh_ops { */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); From 416cada5adabd95ddd63776e367eb6dd134dff89 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 16 Mar 2020 23:30:23 +0100 Subject: [PATCH 3687/3715] batman-adv: update data pointers after skb_cow() commit bc44b78157f621ff2a2618fe287a827bcb094ac4 upstream. batadv_check_unicast_ttvn() calls skb_cow(), so pointers into the SKB data must be (re)set after calling it. The ethhdr variable is dropped altogether. Fixes: 7cdcf6dddc42 ("batman-adv: add UNICAST_4ADDR packet type") Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/routing.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cd82cff716c7..f59aac06733e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -950,14 +950,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -977,12 +973,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -997,6 +995,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); From 6620d5e5d1764d89046a24be01de9977a7236ad2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:24 +0100 Subject: [PATCH 3688/3715] batman-adv: Avoid race in TT TVLV allocator helper commit 8ba0f9bd3bdea1058c2b2676bec7905724418e40 upstream. The functions batadv_tt_prepare_tvlv_local_data and batadv_tt_prepare_tvlv_global_data are responsible for preparing a buffer which can be used to store the TVLV container for TT and add the VLAN information to it. This will be done in three phases: 1. count the number of VLANs and their entries 2. allocate the buffer using the counters from the previous step and limits from the caller (parameter tt_len) 3. insert the VLAN information to the buffer The step 1 and 3 operate on a list which contains the VLANs. The access to these lists must be protected with an appropriate lock or otherwise they might operate on on different entries. This could for example happen when another context is adding VLAN entries to this list. This could lead to a buffer overflow in these functions when enough entries were added between step 1 and 3 to the VLAN lists that the buffer room for the entries (*tt_change) is smaller then the now required extra buffer for new VLAN entries. Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2c2670b85fa9..adc686087a26 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -872,7 +872,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -910,7 +910,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -946,7 +946,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -984,7 +984,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } From 98a21317d0336cb203e352e2161bcef0c6c76beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 16 Mar 2020 23:30:25 +0100 Subject: [PATCH 3689/3715] batman-adv: Fix TT sync flags for intermediate TT responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7072337e52b3e9d5460500d8dc9cbc1ba2db084c upstream. The previous TT sync fix so far only fixed TT responses issued by the target node directly. So far, TT responses issued by intermediate nodes still lead to the wrong flags being added, leading to CRC mismatches. This behaviour was observed at Freifunk Hannover in a 800 nodes setup where a considerable amount of nodes were still infected with 'WI' TT flags even with (most) nodes having the previous TT sync fix applied. I was able to reproduce the issue with intermediate TT responses in a four node test setup and this patch fixes this issue by ensuring to use the per originator instead of the summarized, OR'd ones. Fixes: e9c00136a475 ("batman-adv: fix tt_global_entries flags update") Reported-by: Leonardo Mörlein Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 61 +++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index adc686087a26..c37611bea429 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1544,6 +1544,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1551,7 +1553,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1559,6 +1562,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1741,7 +1748,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2884,23 +2891,46 @@ unlock: } /** - * batadv_tt_local_valid - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2914,7 +2944,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2924,25 +2955,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2952,11 +2992,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); From 90ae6475b1753f0c1a4c66034b5666de3189fac8 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Mon, 16 Mar 2020 23:30:26 +0100 Subject: [PATCH 3690/3715] batman-adv: prevent TT request storms by not sending inconsistent TT TLVLs commit 16116dac23396e73c01eeee97b102e4833a4b205 upstream. A translation table TVLV changset sent with an OGM consists of a number of headers (one per VLAN) plus the changeset itself (addition and/or deletion of entries). The per-VLAN headers are used by OGM recipients for consistency checks. Said consistency check might determine that a full translation table request is needed to restore consistency. If the TT sender adds per-VLAN headers of empty VLANs into the OGM, recipients are led to believe to have reached an inconsistent state and thus request a full table update. The full table does not contain empty VLANs (due to missing entries) the cycle restarts when the next OGM is issued. Consequently, when the translation table TVLV headers are composed, empty VLANs are to be excluded. Fixes: 21a57f6e7a3b ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c37611bea429..dbc516824175 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -941,15 +941,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -957,7 +962,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -974,6 +979,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); From 33dfa3bf0ec31a1c64425b61bae2206b7bcebf13 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:27 +0100 Subject: [PATCH 3691/3715] batman-adv: Fix debugfs path for renamed hardif commit 36dc621ceca1be3ec885aeade5fdafbbcc452a6d upstream. batman-adv is creating special debugfs directories in the init net_namespace for each valid hard-interface (net_device). But it is possible to rename a net_device to a completely different name then the original one. It can therefore happen that a user registers a new net_device which gets the name "wlan0" assigned by default. batman-adv is also adding a new directory under $debugfs/batman-adv/ with the name "wlan0". The user then decides to rename this device to "wl_pri" and registers a different device. The kernel may now decide to use the name "wlan0" again for this new device. batman-adv will detect it as a valid net_device and tries to create a directory with the name "wlan0" under $debugfs/batman-adv/. But there already exists one with this name under this path and thus this fails. batman-adv will detect a problem and rollback the registering of this device. batman-adv must therefore take care of renaming the debugfs directories for hard-interfaces whenever it detects such a net_device rename. Fixes: 5bc7c1eb44f2 ("batman-adv: add debugfs structure for information per interface") Reported-by: John Soros Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/debugfs.c | 22 +++++++++++++++++++++- net/batman-adv/debugfs.h | 6 ++++++ net/batman-adv/hard-interface.c | 3 +++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index e32ad47c6efd..7ee828cd9778 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,6 +18,7 @@ #include "debugfs.h" #include "main.h" +#include #include #include #include @@ -338,7 +339,26 @@ out: } /** - * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif + * @hard_iface: hard interface which was renamed + */ +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ + const char *name = hard_iface->net_dev->name; + struct dentry *dir; + struct dentry *d; + + dir = hard_iface->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_hardif() - delete the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which is deleted. */ diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 9c5d4a65b98c..295e11146818 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -31,6 +31,7 @@ void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #else @@ -58,6 +59,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) return 0; } +static inline +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ +} + static inline void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4b67731677af..e72e95208339 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1017,6 +1017,9 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_hardif(hard_iface); + break; default: break; } From da2c2e3c63c6de983accd410de8c125481ee2eb3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:28 +0100 Subject: [PATCH 3692/3715] batman-adv: Fix debugfs path for renamed softif commit 6da7be7d24b2921f8215473ba7552796dff05fe1 upstream. batman-adv is creating special debugfs directories in the init net_namespace for each created soft-interface (batadv net_device). But it is possible to rename a net_device to a completely different name then the original one. It can therefore happen that a user registers a new batadv net_device with the name "bat0". batman-adv is then also adding a new directory under $debugfs/batman-adv/ with the name "wlan0". The user then decides to rename this device to "bat1" and registers a different batadv device with the name "bat0". batman-adv will then try to create a directory with the name "bat0" under $debugfs/batman-adv/ again. But there already exists one with this name under this path and thus this fails. batman-adv will detect a problem and rollback the registering of this device. batman-adv must therefore take care of renaming the debugfs directories for soft-interfaces whenever it detects such a net_device rename. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/debugfs.c | 24 +++++++++++++++++++++++ net/batman-adv/debugfs.h | 5 +++++ net/batman-adv/hard-interface.c | 34 +++++++++++++++++++++++++++------ 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 7ee828cd9778..4957d4824437 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -421,6 +421,30 @@ out: return -ENOMEM; } +/** + * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif + * @dev: net_device which was renamed + */ +void batadv_debugfs_rename_meshif(struct net_device *dev) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + const char *name = dev->name; + struct dentry *dir; + struct dentry *d; + + dir = bat_priv->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries + * @dev: netdev struct of the soft interface + */ void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 295e11146818..901bbc357bf4 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -29,6 +29,7 @@ struct net_device; void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); +void batadv_debugfs_rename_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); @@ -49,6 +50,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev) return 0; } +static inline void batadv_debugfs_rename_meshif(struct net_device *dev) +{ +} + static inline void batadv_debugfs_del_meshif(struct net_device *dev) { } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e72e95208339..c43887fa29a9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -955,6 +955,32 @@ void batadv_hardif_remove_interfaces(void) rtnl_unlock(); } +/** + * batadv_hard_if_event_softif() - Handle events for soft interfaces + * @event: NETDEV_* event to handle + * @net_dev: net_device which generated an event + * + * Return: NOTIFY_* result + */ +static int batadv_hard_if_event_softif(unsigned long event, + struct net_device *net_dev) +{ + struct batadv_priv *bat_priv; + + switch (event) { + case NETDEV_REGISTER: + batadv_sysfs_add_meshif(net_dev); + bat_priv = netdev_priv(net_dev); + batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_meshif(net_dev); + break; + } + + return NOTIFY_DONE; +} + static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -963,12 +989,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { - batadv_sysfs_add_meshif(net_dev); - bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); - return NOTIFY_DONE; - } + if (batadv_softif_is_valid(net_dev)) + return batadv_hard_if_event_softif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || From 43340e23e9f621a49dfafc9b862d612f24af3b4a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:29 +0100 Subject: [PATCH 3693/3715] batman-adv: Fix duplicated OGMs on NETDEV_UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9e6b5648bbc4cd48fab62cecbb81e9cc3c6e7e88 upstream. The state of slave interfaces are handled differently depending on whether the interface is up or not. All active interfaces (IFF_UP) will transmit OGMs. But for B.A.T.M.A.N. IV, also non-active interfaces are scheduling (low TTL) OGMs on active interfaces. The code which setups and schedules the OGMs must therefore already be called when the interfaces gets added as slave interface and the transmit function must then check whether it has to send out the OGM or not on the specific slave interface. But the commit f0d97253fb5f ("batman-adv: remove ogm_emit and ogm_schedule API calls") moved the setup code from the enable function to the activate function. The latter is called either when the added slave was already up when batadv_hardif_enable_interface processed the new interface or when a NETDEV_UP event was received for this slave interfac. As result, each NETDEV_UP would schedule a new OGM worker for the interface and thus OGMs would be send a lot more than expected. Fixes: f0d97253fb5f ("batman-adv: remove ogm_emit and ogm_schedule API calls") Reported-by: Linus Lüssing Tested-by: Linus Lüssing Acked-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++-- net/batman-adv/hard-interface.c | 3 +++ net/batman-adv/types.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0b2f69924444..0ed33a9a41b7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2481,7 +2481,7 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, return ret; } -static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); @@ -2821,8 +2821,8 @@ unlock: static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { - .activate = batadv_iv_iface_activate, .enable = batadv_iv_ogm_iface_enable, + .enabled = batadv_iv_iface_enabled, .disable = batadv_iv_ogm_iface_disable, .update_mac = batadv_iv_ogm_iface_update_mac, .primary_set = batadv_iv_ogm_primary_iface_set, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c43887fa29a9..63760967712e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -795,6 +795,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); + if (bat_priv->algo_ops->iface.enabled) + bat_priv->algo_ops->iface.enabled(hard_iface); + out: return 0; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index dbeaa015edc9..7ecf268e6626 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1424,6 +1424,7 @@ struct batadv_forw_packet { * @activate: start routing mechanisms when hard-interface is brought up * (optional) * @enable: init routing info when hard-interface is enabled + * @enabled: notification when hard-interface was enabled (optional) * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information * belonging to this hard-interface @@ -1432,6 +1433,7 @@ struct batadv_forw_packet { struct batadv_algo_iface_ops { void (*activate)(struct batadv_hard_iface *hard_iface); int (*enable)(struct batadv_hard_iface *hard_iface); + void (*enabled)(struct batadv_hard_iface *hard_iface); void (*disable)(struct batadv_hard_iface *hard_iface); void (*update_mac)(struct batadv_hard_iface *hard_iface); void (*primary_set)(struct batadv_hard_iface *hard_iface); From 5427da3add52932440df60972cedab088769bb23 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:30 +0100 Subject: [PATCH 3694/3715] batman-adv: Avoid free/alloc race when handling OGM2 buffer commit a8d23cbbf6c9f515ed678204ad2962be7c336344 upstream. A B.A.T.M.A.N. V virtual interface has an OGM2 packet buffer which is initialized using data from the netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified data is sent out or the functions modifying the OGM2 buffer try to access already freed memory regions. Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_ogm.c | 42 ++++++++++++++++++++++++++++++-------- net/batman-adv/types.h | 3 +++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e07f636160b6..cec31769bb3f 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -127,14 +129,12 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -142,8 +142,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -234,6 +233,23 @@ out: return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare @@ -260,11 +276,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -886,6 +906,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -897,7 +919,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7ecf268e6626..21642fbe95c3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -989,12 +990,14 @@ struct batadv_softif_vlan { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len * @ogm_wq: workqueue used to schedule OGM transmissions */ struct batadv_priv_bat_v { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; struct delayed_work ogm_wq; }; From ca6579b18e7983b26438c62b3291bd43fb34017a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:31 +0100 Subject: [PATCH 3695/3715] batman-adv: Avoid free/alloc race when handling OGM buffer commit 40e220b4218bb3d278e5e8cc04ccdfd1c7ff8307 upstream. Each slave interface of an B.A.T.M.A.N. IV virtual interface has an OGM packet buffer which is initialized using data from netdevice notifier and other rtnetlink related hooks. It is sent regularly via various slave interfaces of the batadv virtual interface and in this process also modified (realloced) to integrate additional state information via TVLV containers. It must be avoided that the worker item is executed without a common lock with the netdevice notifier/rtnetlink helpers. Otherwise it can either happen that half modified/freed data is sent out or functions modifying the OGM buffer try to access already freed memory regions. Reported-by: syzbot+0cc629f19ccb8534935b@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 60 ++++++++++++++++++++++++++++----- net/batman-adv/hard-interface.c | 2 ++ net/batman-adv/types.h | 2 ++ 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0ed33a9a41b7..30e774354d4e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -370,14 +371,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -389,35 +394,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -915,7 +944,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -926,9 +959,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) - return; + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -996,6 +1027,17 @@ out: batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an * originator diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 63760967712e..9fdfa9984f02 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -901,6 +902,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 21642fbe95c3..540a9c5c2270 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -82,11 +82,13 @@ enum batadv_dhcp_recipient { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct batadv_hard_iface_bat_iv { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; }; /** From e181bb93c904708962b1e92e0db830ccb82be51a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Mar 2020 23:30:32 +0100 Subject: [PATCH 3696/3715] batman-adv: Don't schedule OGM for disabled interface commit 8e8ce08198de193e3d21d42e96945216e3d9ac7f upstream. A transmission scheduling for an interface which is currently dropped by batadv_iv_ogm_iface_disable could still be in progress. The B.A.T.M.A.N. V is simply cancelling the workqueue item in an synchronous way but this is not possible with B.A.T.M.A.N. IV because the OGM submissions are intertwined. Instead it has to stop submitting the OGM when it detect that the buffer pointer is set to NULL. Reported-by: syzbot+a98f2016f40b9cd3818a@syzkaller.appspotmail.com Reported-by: syzbot+ac36b6a33c28a491e929@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Cc: Hillf Danton Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_iv_ogm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 30e774354d4e..7a723e124dbb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -961,6 +961,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) + return; + /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in * hardif_activate_interface() where the originator mac is set and From ba362da15ee2c915041b9d7582ff8ca017c615f6 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 11 Mar 2020 14:13:21 -0500 Subject: [PATCH 3697/3715] perf/amd/uncore: Replace manual sampling check with CAP_NO_INTERRUPT flag [ Upstream commit f967140dfb7442e2db0868b03b961f9c59418a1b ] Enable the sampling check in kernel/events/core.c::perf_event_open(), which returns the more appropriate -EOPNOTSUPP. BEFORE: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (l3_request_g1.caching_l3_cache_accesses). /bin/dmesg | grep -i perf may provide additional information. With nothing relevant in dmesg. AFTER: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: l3_request_g1.caching_l3_cache_accesses: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Fixes: c43ca5091a37 ("perf/x86/amd: Add support for AMD NB and L2I "uncore" counters") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200311191323.13124-1-kim.phillips@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index baa7e36073f9..604a8558752d 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -193,20 +193,18 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -314,6 +312,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -324,6 +323,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) From 140421867f98b8d944562a01cff0a04298dc3ad2 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 6 Feb 2020 16:58:45 +0100 Subject: [PATCH 3698/3715] ACPI: watchdog: Allow disabling WDAT at boot [ Upstream commit 3f9e12e0df012c4a9a7fd7eb0d3ae69b459d6b2c ] In case the WDAT interface is broken, give the user an option to ignore it to let a native driver bind to the watchdog device instead. Signed-off-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ drivers/acpi/acpi_watchdog.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7e0a4be3503d..ae51b1b7b67f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -137,6 +137,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 23cde3d8e8fb..0bd1899a287f 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -58,12 +58,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -91,6 +93,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; From f098e1a042b7f997d4348178df99a0f4821b86cf Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Wed, 29 Jan 2020 17:26:31 +1100 Subject: [PATCH 3699/3715] HID: apple: Add support for recent firmware on Magic Keyboards [ Upstream commit e433be929e63265b7412478eb7ff271467aee2d7 ] Magic Keyboards with more recent firmware (0x0100) report Fn key differently. Without this patch, Fn key may not behave as expected and may not be configurable via hid_apple fnmode module parameter. Signed-off-by: Mansour Behabadi Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-apple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d0a81a03ddbd..8ab8f2350bbc 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -343,7 +343,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); From 1d3e7e1be06f08c380a28cf1ed8578b7b87cbfea Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 14 Feb 2020 14:53:07 +0800 Subject: [PATCH 3700/3715] HID: i2c-hid: add Trekstor Surfbook E11B to descriptor override [ Upstream commit be0aba826c4a6ba5929def1962a90d6127871969 ] The Surfbook E11B uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list. BugLink: https://bugs.launchpad.net/bugs/1858299 Signed-off-by: Kai-Heng Feng Reviewed-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 10af8585c820..95052373a828 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -341,6 +341,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { From 8da8673cbe9525fd4439dbbd189766ba993e554d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2020 10:44:50 +0100 Subject: [PATCH 3701/3715] cfg80211: check reg_rule for NULL in handle_channel_custom() [ Upstream commit a7ee7d44b57c9ae174088e53a668852b7f4f452d ] We may end up with a NULL reg_rule after the loop in handle_channel_custom() if the bandwidth didn't fit, check if this is the case and bail out if so. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200221104449.3b558a50201c.I4ad3725c4dacaefd2d18d3cc65ba6d18acd5dbfe@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a520f433d476..b95d1c2bdef7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1733,7 +1733,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { From 358e3a57a2558069863404249d8af3bdca7e1563 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 14 Jan 2020 14:43:19 +0000 Subject: [PATCH 3702/3715] scsi: libfc: free response frame from GPN_ID [ Upstream commit ff6993bb79b9f99bdac0b5378169052931b65432 ] fc_disc_gpn_id_resp() should be the last function using it so free it here to avoid memory leak. Link: https://lore.kernel.org/r/1579013000-14570-2-git-send-email-igor.druzhinin@citrix.com Reviewed-by: Hannes Reinecke Signed-off-by: Igor Druzhinin Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_disc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index bb9c1c016643..28b50ab2fbb0 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -652,6 +652,8 @@ redisc: } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** From 74f38a3033be28d4587f959e610419ac52295638 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 21 Feb 2020 14:17:05 +0100 Subject: [PATCH 3703/3715] net: usb: qmi_wwan: restore mtu min/max values after raw_ip switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eae7172f8141eb98e64e6e81acc9e9d5b2add127 ] usbnet creates network interfaces with min_mtu = 0 and max_mtu = ETH_MAX_MTU. These values are not modified by qmi_wwan when the network interface is created initially, allowing, for example, to set mtu greater than 1500. When a raw_ip switch is done (raw_ip set to 'Y', then set to 'N') the mtu values for the network interface are set through ether_setup, with min_mtu = ETH_MIN_MTU and max_mtu = ETH_DATA_LEN, not allowing anymore to set mtu greater than 1500 (error: mtu greater than device maximum). The patch restores the original min/max mtu values set by usbnet after a raw_ip switch. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 189715438328..a8d5561afc7d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -274,6 +274,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } From 51425a0d1e47af13c45016c5437abb0a0b2f2ebc Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Feb 2020 14:38:40 +0100 Subject: [PATCH 3704/3715] net: ks8851-ml: Fix IRQ handling and locking [ Upstream commit 44343418d0f2f623cb9da6f5000df793131cbe3b ] The KS8851 requires that packet RX and TX are mutually exclusive. Currently, the driver hopes to achieve this by disabling interrupt from the card by writing the card registers and by disabling the interrupt on the interrupt controller. This however is racy on SMP. Replace this approach by expanding the spinlock used around the ks_start_xmit() TX path to ks_irq() RX path to assure true mutual exclusion and remove the interrupt enabling/disabling, which is now not needed anymore. Furthermore, disable interrupts also in ks_net_stop(), which was missing before. Note that a massive improvement here would be to re-use the KS8851 driver approach, which is to move the TX path into a worker thread, interrupt handling to threaded interrupt, and synchronize everything with mutexes, but that would be a much bigger rework, for a separate patch. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_mll.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 799154d7c047..c699a779757e 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -831,14 +831,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -864,6 +867,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -933,6 +937,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -989,10 +994,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -1006,9 +1010,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } From f4bb37a0f6c55e12865b39696ed2c6423a1eeddc Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sun, 23 Feb 2020 20:03:02 +0530 Subject: [PATCH 3705/3715] mac80211: rx: avoid RCU list traversal under mutex [ Upstream commit 253216ffb2a002a682c6f68bd3adff5b98b71de8 ] local->sta_mtx is held in __ieee80211_check_fast_rx_iface(). No need to use list_for_each_entry_rcu() as it also requires a cond argument to avoid false lockdep warnings when not used in RCU read-side section (with CONFIG_PROVE_RCU_LIST). Therefore use list_for_each_entry(); Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200223143302.15390-1-madhuparnabhowmik10@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 31000622376d..7c92b1471c34 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3862,7 +3862,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; From d8a4a55bdcf50206de12766400c989de2f840d1a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Feb 2020 12:47:14 -0800 Subject: [PATCH 3706/3715] signal: avoid double atomic counter increments for user accounting [ Upstream commit fda31c50292a5062332fa0343c084bd9f46604d9 ] When queueing a signal, we increment both the users count of pending signals (for RLIMIT_SIGPENDING tracking) and we increment the refcount of the user struct itself (because we keep a reference to the user in the signal structure in order to correctly account for it when freeing). That turns out to be fairly expensive, because both of them are atomic updates, and particularly under extreme signal handling pressure on big machines, you can get a lot of cache contention on the user struct. That can then cause horrid cacheline ping-pong when you do these multiple accesses. So change the reference counting to only pin the user for the _first_ pending signal, and to unpin it when the last pending signal is dequeued. That means that when a user sees a lot of concurrent signal queuing - which is the only situation when this matters - the only atomic access needed is generally the 'sigpending' count update. This was noticed because of a particularly odd timing artifact on a dual-socket 96C/192T Cascade Lake platform: when you get into bad contention, on that machine for some reason seems to be much worse when the contention happens in the upper 32-byte half of the cacheline. As a result, the kernel test robot will-it-scale 'signal1' benchmark had an odd performance regression simply due to random alignment of the 'struct user_struct' (and pointed to a completely unrelated and apparently nonsensical commit for the regression). Avoiding the double increments (and decrements on the dequeueing side, of course) makes for much less contention and hugely improved performance on that will-it-scale microbenchmark. Quoting Feng Tang: "It makes a big difference, that the performance score is tripled! bump from original 17000 to 54000. Also the gap between 5.0-rc6 and 5.0-rc6+Jiri's patch is reduced to around 2%" [ The "2% gap" is the odd cacheline placement difference on that platform: under the extreme contention case, the effect of which half of the cacheline was hot was 5%, so with the reduced contention the odd timing artifact is reduced too ] It does help in the non-contended case too, but is not nearly as noticeable. Reported-and-tested-by: Feng Tang Cc: Eric W. Biederman Cc: Huang, Ying Cc: Philip Li Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/signal.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 8fee1f2eba2f..c066168f8854 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -379,27 +379,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -413,8 +418,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } From 085e871e8037849e9c574d86a79aa7b51ee5c952 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 26 Feb 2020 11:54:35 +0800 Subject: [PATCH 3707/3715] slip: not call free_netdev before rtnl_unlock in slip_open [ Upstream commit f596c87005f7b1baeb7d62d9a9e25d68c3dfae10 ] As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. Signed-off-by: yangerkun Reviewed-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/slip/slip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index b07f367abd91..d7882b548b79 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -867,7 +867,10 @@ err_free_chan: tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); From ebef8d300abea8bc838072d8b32d1106fbf133fa Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:43 +0000 Subject: [PATCH 3708/3715] hinic: fix a bug of setting hw_ioctxt [ Upstream commit d2ed69ce9ed3477e2a9527e6b89fe4689d99510e ] a reserved field is used to signify prime physical function index in the latest firmware version, so we must assign a value to it correctly Signed-off-by: Luo bin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 +- drivers/net/ethernet/huawei/hinic/hinic_hw_if.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 79b567447084..46aba02b8672 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -312,6 +312,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index 0f5563f3b779..a011fd2d2627 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -104,8 +104,8 @@ struct hinic_cmd_hw_ioctxt { u8 rsvd2; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 5b4760c0e9f5..f683ccbdfca0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -146,6 +146,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) From 5930339b092e27e9bf09267ea87f37d6fecd6298 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:23:52 +0000 Subject: [PATCH 3709/3715] net: rmnet: fix NULL pointer dereference in rmnet_newlink() [ Upstream commit 93b5cbfa9636d385126f211dca9efa7e3f683202 ] rmnet registers IFLA_LINK interface as a lower interface. But, IFLA_LINK could be NULL. In the current code, rmnet doesn't check IFLA_LINK. So, panic would occur. Test commands: modprobe rmnet ip link add rmnet0 type rmnet mux_id 1 Splat looks like: [ 36.826109][ T1115] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 36.838817][ T1115] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 36.839908][ T1115] CPU: 1 PID: 1115 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 36.840569][ T1115] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 36.841408][ T1115] RIP: 0010:rmnet_newlink+0x54/0x510 [rmnet] [ 36.841986][ T1115] Code: 83 ec 18 48 c1 e9 03 80 3c 01 00 0f 85 d4 03 00 00 48 8b 6a 28 48 b8 00 00 00 00 00 c [ 36.843923][ T1115] RSP: 0018:ffff8880b7e0f1c0 EFLAGS: 00010247 [ 36.844756][ T1115] RAX: dffffc0000000000 RBX: ffff8880d14cca00 RCX: 1ffff11016fc1e99 [ 36.845859][ T1115] RDX: 0000000000000000 RSI: ffff8880c3d04000 RDI: 0000000000000004 [ 36.846961][ T1115] RBP: 0000000000000000 R08: ffff8880b7e0f8b0 R09: ffff8880b6ac2d90 [ 36.848020][ T1115] R10: ffffffffc0589a40 R11: ffffed1016d585b7 R12: ffffffff88ceaf80 [ 36.848788][ T1115] R13: ffff8880c3d04000 R14: ffff8880b7e0f8b0 R15: ffff8880c3d04000 [ 36.849546][ T1115] FS: 00007f50ab3360c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 36.851784][ T1115] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.852422][ T1115] CR2: 000055871afe5ab0 CR3: 00000000ae246001 CR4: 00000000000606e0 [ 36.853181][ T1115] Call Trace: [ 36.853514][ T1115] __rtnl_newlink+0xbdb/0x1270 [ 36.853967][ T1115] ? lock_downgrade+0x6e0/0x6e0 [ 36.854420][ T1115] ? rtnl_link_unregister+0x220/0x220 [ 36.854936][ T1115] ? lock_acquire+0x164/0x3b0 [ 36.855376][ T1115] ? is_bpf_image_address+0xff/0x1d0 [ 36.855884][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.856304][ T1115] ? kernel_text_address+0x111/0x140 [ 36.856857][ T1115] ? __kernel_text_address+0xe/0x30 [ 36.857440][ T1115] ? unwind_get_return_address+0x5f/0xa0 [ 36.858063][ T1115] ? create_prof_cpu_mask+0x20/0x20 [ 36.858644][ T1115] ? arch_stack_walk+0x83/0xb0 [ 36.859171][ T1115] ? stack_trace_save+0x82/0xb0 [ 36.859710][ T1115] ? stack_trace_consume_entry+0x160/0x160 [ 36.860357][ T1115] ? deactivate_slab.isra.78+0x2c5/0x800 [ 36.860928][ T1115] ? kasan_unpoison_shadow+0x30/0x40 [ 36.861520][ T1115] ? kmem_cache_alloc_trace+0x135/0x350 [ 36.862125][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.864073][ T1115] rtnl_newlink+0x65/0x90 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 7d8303e45f09..b7df8c1121e3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -157,6 +157,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; From 8419d8e01a6b52bfdfcbce5f5c887643200eeacf Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 21 Feb 2020 23:31:11 -0500 Subject: [PATCH 3710/3715] jbd2: fix data races at struct journal_head [ Upstream commit 6c5d911249290f41f7b50b43344a7520605b1acb ] journal_head::b_transaction and journal_head::b_next_transaction could be accessed concurrently as noticed by KCSAN, LTP: starting fsync04 /dev/zero: Can't open blockdev EXT4-fs (loop0): mounting ext3 file system using the ext4 subsystem EXT4-fs (loop0): mounted filesystem with ordered data mode. Opts: (null) ================================================================== BUG: KCSAN: data-race in __jbd2_journal_refile_buffer [jbd2] / jbd2_write_access_granted [jbd2] write to 0xffff99f9b1bd0e30 of 8 bytes by task 25721 on cpu 70: __jbd2_journal_refile_buffer+0xdd/0x210 [jbd2] __jbd2_journal_refile_buffer at fs/jbd2/transaction.c:2569 jbd2_journal_commit_transaction+0x2d15/0x3f20 [jbd2] (inlined by) jbd2_journal_commit_transaction at fs/jbd2/commit.c:1034 kjournald2+0x13b/0x450 [jbd2] kthread+0x1cd/0x1f0 ret_from_fork+0x27/0x50 read to 0xffff99f9b1bd0e30 of 8 bytes by task 25724 on cpu 68: jbd2_write_access_granted+0x1b2/0x250 [jbd2] jbd2_write_access_granted at fs/jbd2/transaction.c:1155 jbd2_journal_get_write_access+0x2c/0x60 [jbd2] __ext4_journal_get_write_access+0x50/0x90 [ext4] ext4_mb_mark_diskspace_used+0x158/0x620 [ext4] ext4_mb_new_blocks+0x54f/0xca0 [ext4] ext4_ind_map_blocks+0xc79/0x1b40 [ext4] ext4_map_blocks+0x3b4/0x950 [ext4] _ext4_get_block+0xfc/0x270 [ext4] ext4_get_block+0x3b/0x50 [ext4] __block_write_begin_int+0x22e/0xae0 __block_write_begin+0x39/0x50 ext4_write_begin+0x388/0xb50 [ext4] generic_perform_write+0x15d/0x290 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe 5 locks held by fsync04/25724: #0: ffff99f9911093f8 (sb_writers#13){.+.+}, at: vfs_write+0x21c/0x260 #1: ffff99f9db4c0348 (&sb->s_type->i_mutex_key#15){+.+.}, at: ext4_buffered_write_iter+0x65/0x210 [ext4] #2: ffff99f5e7dfcf58 (jbd2_handle){++++}, at: start_this_handle+0x1c1/0x9d0 [jbd2] #3: ffff99f9db4c0168 (&ei->i_data_sem){++++}, at: ext4_map_blocks+0x176/0x950 [ext4] #4: ffffffff99086b40 (rcu_read_lock){....}, at: jbd2_write_access_granted+0x4e/0x250 [jbd2] irq event stamp: 1407125 hardirqs last enabled at (1407125): [] __find_get_block+0x107/0x790 hardirqs last disabled at (1407124): [] __find_get_block+0x49/0x790 softirqs last enabled at (1405528): [] __do_softirq+0x34c/0x57c softirqs last disabled at (1405521): [] irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 68 PID: 25724 Comm: fsync04 Tainted: G L 5.6.0-rc2-next-20200221+ #7 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The plain reads are outside of jh->b_state_lock critical section which result in data races. Fix them by adding pairs of READ|WRITE_ONCE(). Reviewed-by: Jan Kara Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/20200222043111.2227-1-cai@lca.pw Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/transaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f2ff141a4479..a355ca418e78 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1050,8 +1050,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2466,8 +2466,8 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) From 1940bd214aeacfc27d27c082f0285fe3e673cff7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Jan 2020 20:22:13 +0100 Subject: [PATCH 3711/3715] ARM: 8957/1: VDSO: Match ARMv8 timer in cntvct_functional() commit 45939ce292b4b11159719faaf60aba7d58d5fe33 upstream. It is possible for a system with an ARMv8 timer to run a 32-bit kernel. When this happens we will unconditionally have the vDSO code remove the __vdso_gettimeofday and __vdso_clock_gettime symbols because cntvct_functional() returns false since it does not match that compatibility string. Fixes: ecf99a439105 ("ARM: 8331/1: VDSO initialization, mapping, and synchronization") Signed-off-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/vdso.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index f4dd7f9663c1..0001742c131d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -103,6 +103,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; From 3b61a9a6519744f9de3538586f75f1325091a861 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Feb 2020 02:04:17 +0100 Subject: [PATCH 3712/3715] ARM: 8958/1: rename missed uaccess .fixup section commit f87b1c49bc675da30d8e1e8f4b60b800312c7b90 upstream. When the uaccess .fixup section was renamed to .text.fixup, one case was missed. Under ld.bfd, the orphaned section was moved close to .text (since they share the "ax" bits), so things would work normally on uaccess faults. Under ld.lld, the orphaned section was placed outside the .text section, making it unreachable. Link: https://github.com/ClangBuiltLinux/linux/issues/282 Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1020633#c44 Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.1912032147340.17114@knanqh.ubzr Link: https://lore.kernel.org/lkml/202002071754.F5F073F1D@keescook/ Fixes: c4a84ae39b4a5 ("ARM: 8322/1: keep .text and .fixup regions closer together") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/lib/copy_from_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 6709a8d33963..f1e34f16cfab 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -100,7 +100,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} From 992a5c90af04da6e1e047f10c5ee5d6d8752a699 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 17 Mar 2020 01:28:45 +0100 Subject: [PATCH 3713/3715] mm: slub: add missing TID bump in kmem_cache_alloc_bulk() commit fd4d9c7d0c71866ec0c2825189ebd2ce35bd95b8 upstream. When kmem_cache_alloc_bulk() attempts to allocate N objects from a percpu freelist of length M, and N > M > 0, it will first remove the M elements from the percpu freelist, then call ___slab_alloc() to allocate the next element and repopulate the percpu freelist. ___slab_alloc() can re-enable IRQs via allocate_slab(), so the TID must be bumped before ___slab_alloc() to properly commit the freelist head change. Fix it by unconditionally bumping c->tid when entering the slowpath. Cc: stable@vger.kernel.org Fixes: ebe909e0fdb3 ("slub: improve bulk alloc strategy") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 07aeb129f3f8..099c7a85ede0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3122,6 +3122,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist From 62a47c2e063e8ca601e1cd514d6bd27e6befe3e6 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 21 Feb 2020 12:28:38 +0100 Subject: [PATCH 3714/3715] ipv4: ensure rcu_read_lock() in cipso_v4_error() commit 3e72dfdf8227b052393f71d820ec7599909dddc2 upstream. Similarly to commit c543cb4a5f07 ("ipv4: ensure rcu_read_lock() in ipv4_link_failure()"), __ip_options_compile() must be called under rcu protection. Fixes: 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") Suggested-by: Guillaume Nault Signed-off-by: Matteo Croce Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f0165c5f376b..1c21dc5d6dd4 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1738,6 +1738,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1749,7 +1750,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) From 01364dad1d4577e27a57729d41053f661bb8a5b9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Mar 2020 10:54:27 +0100 Subject: [PATCH 3715/3715] Linux 4.14.174 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a524b5c1d55..edc6b62bd892 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 173 +SUBLEVEL = 174 EXTRAVERSION = NAME = Petit Gorille